diff options
author | bapt <bapt@FreeBSD.org> | 2015-10-27 21:19:11 +0000 |
---|---|---|
committer | bapt <bapt@FreeBSD.org> | 2015-10-27 21:19:11 +0000 |
commit | bf66c97c4a64e64410bf0223d221a54ca9555f52 (patch) | |
tree | 776471e6ddbb557aac5a48779cb813f63c28b4d5 | |
parent | 7678f812c1d8bea1cf36871abe8ab1e7ec6912fd (diff) | |
download | FreeBSD-src-bf66c97c4a64e64410bf0223d221a54ca9555f52.zip FreeBSD-src-bf66c97c4a64e64410bf0223d221a54ca9555f52.tar.gz |
Update libucl to git snapshot 20151027 (77d9d33)
61 files changed, 7111 insertions, 617 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ae28a89 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,260 @@ +PROJECT(libucl C) +CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) + +SET(LIBUCL_VERSION_MAJOR 0) +SET(LIBUCL_VERSION_MINOR 5) +SET(LIBUCL_VERSION_PATCH 0) + +SET(LIBUCL_VERSION + "${LIBUCL_VERSION_MAJOR}.${LIBUCL_VERSION_MINOR}.${LIBUCL_VERSION_PATCH}") + +INCLUDE(CheckCCompilerFlag) +INCLUDE(FindOpenSSL) + +OPTION(ENABLE_URL_INCLUDE "Enable urls in ucl includes (requires libcurl or libfetch) [default: OFF]" OFF) +OPTION(ENABLE_URL_SIGN "Enable signatures check in ucl includes (requires openssl) [default: OFF]" OFF) +OPTION(BUILD_SHARED_LIBS "Build Shared Libraries [default: OFF]" OFF) +OPTION(ENABLE_LUA "Enable lua support [default: OFF]" OFF) +OPTION(ENABLE_LUAJIT "Enable luajit support [default: OFF]" OFF) + +# Find lua installation +MACRO(FindLua) + # Find lua libraries + UNSET(LUA_INCLUDE_DIR CACHE) + UNSET(LUA_LIBRARY CACHE) + CMAKE_PARSE_ARGUMENTS(LUA "" "VERSION_MAJOR;VERSION_MINOR;ROOT" "" ${ARGN}) + + IF(NOT LUA_VERSION_MAJOR OR NOT LUA_VERSION_MINOR) + MESSAGE(FATAL_ERROR "Invalid FindLua invocation: ${ARGN}") + ENDIF() + + IF(ENABLE_LUAJIT MATCHES "ON") + MESSAGE(STATUS "Check for luajit ${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}") + FIND_PATH(LUA_INCLUDE_DIR luajit.h + HINTS + "${RSPAMD_SEARCH_PATH}" "${LUA_ROOT}" + $ENV{LUA_DIR} + PATH_SUFFIXES "include/luajit-2.0" + "include/luajit${LUA_VERSION_MAJOR}${LUA_VERSION_MINOR}" + "include/luajit${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "include/luajit-${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "include/luajit" + "include/lua${LUA_VERSION_MAJOR}${LUA_VERSION_MINOR}" + "include/lua${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "include/lua-${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + include/lua include + PATHS ${RSPAMD_DEFAULT_INCLUDE_PATHS} + ) + FIND_LIBRARY(LUA_LIBRARY + NAMES luajit + "luajit-2.0" + "luajit2.0" + "luajit${LUA_VERSION_MAJOR}${LUA_VERSION_MINOR}" + "luajit${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "luajit-${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + HINTS + "${RSPAMD_SEARCH_PATH}" "${LUA_ROOT}" + $ENV{LUA_DIR} + PATH_SUFFIXES lib64 lib + PATHS ${RSPAMD_DEFAULT_LIBRARY_PATHS} + DOC "Lua library" + ) + + IF(NOT LUA_LIBRARY OR NOT LUA_INCLUDE_DIR) + MESSAGE(STATUS "Fallback from luajit to plain lua") + SET(ENABLE_LUAJIT "OFF") + MESSAGE(STATUS "Check for lua ${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}") + FIND_PATH(LUA_INCLUDE_DIR lua.h + HINTS + "${RSPAMD_SEARCH_PATH}" "${LUA_ROOT}" + $ENV{LUA_DIR} + PATH_SUFFIXES "include/lua${LUA_VERSION_MAJOR}${LUA_VERSION_MINOR}" + "include/lua${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "include/lua-${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + include/lua include + PATHS ${RSPAMD_DEFAULT_INCLUDE_PATHS} + ) + FIND_LIBRARY(LUA_LIBRARY + NAMES lua + "lua${LUA_VERSION_MAJOR}${LUA_VERSION_MINOR}" + "lua${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "lua-${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + HINTS + "${RSPAMD_SEARCH_PATH}" "${LUA_ROOT}" + $ENV{LUA_DIR} + PATH_SUFFIXES lib64 lib + PATHS ${RSPAMD_DEFAULT_LIBRARY_PATHS} + DOC "Lua library" + ) + ENDIF() + ELSE(ENABLE_LUAJIT MATCHES "ON") + MESSAGE(STATUS "Check for lua ${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}") + FIND_PATH(LUA_INCLUDE_DIR lua.h + HINTS + "${RSPAMD_SEARCH_PATH}" "${LUA_ROOT}" + $ENV{LUA_DIR} + PATH_SUFFIXES "include/lua${LUA_VERSION_MAJOR}${LUA_VERSION_MINOR}" + "include/lua${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "include/lua-${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + include/lua include + PATHS ${RSPAMD_DEFAULT_INCLUDE_PATHS} + ) + FIND_LIBRARY(LUA_LIBRARY + NAMES lua + "lua${LUA_VERSION_MAJOR}${LUA_VERSION_MINOR}" + "lua${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + "lua-${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}" + HINTS + "${RSPAMD_SEARCH_PATH}" "${LUA_ROOT}" + $ENV{LUA_DIR} + PATH_SUFFIXES lib64 lib + PATHS ${RSPAMD_DEFAULT_LIBRARY_PATHS} + DOC "Lua library" + ) + ENDIF(ENABLE_LUAJIT MATCHES "ON") + + IF(LUA_LIBRARY AND LUA_INCLUDE_DIR) + SET(LUA_FOUND 1) + IF(NOT LUA_VERSION_MAJOR OR NOT LUA_VERSION_MINOR) + SET(LUA_VERSION_MAJOR ${LUA_VERSION_MAJOR}) + SET(LUA_VERSION_MINOR ${LUA_VERSION_MINOR}) + ENDIF(NOT LUA_VERSION_MAJOR OR NOT LUA_VERSION_MINOR) + IF(ENABLE_LUAJIT MATCHES "ON") + MESSAGE(STATUS "Found luajit ${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}") + ELSE(ENABLE_LUAJIT MATCHES "ON") + MESSAGE(STATUS "Found lua ${LUA_VERSION_MAJOR}.${LUA_VERSION_MINOR}") + ENDIF(ENABLE_LUAJIT MATCHES "ON") + ENDIF(LUA_LIBRARY AND LUA_INCLUDE_DIR) +ENDMACRO() + +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + LIST(APPEND CMAKE_REQUIRED_LIBRARIES rt) +ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + +IF(ENABLE_URL_INCLUDE MATCHES "ON") + FIND_LIBRARY(LIBFETCH_LIBRARY NAMES fetch PATHS PATH_SUFFIXES lib64 lib + PATHS + ~/Library/Frameworks + /Library/Frameworks + /usr/local + /usr + /sw + /opt/local + /opt/csw + /opt + DOC "Path where the libfetch library can be found") + IF(LIBFETCH_LIBRARY) + FIND_FILE(HAVE_FETCH_H NAMES fetch.h PATHS /usr/include + /opt/include + /usr/local/include + DOC "Path to libfetch header") + ELSE(LIBFETCH_LIBRARY) + # Try to find libcurl + ProcessPackage(CURL libcurl) + IF(NOT CURL_FOUND) + MESSAGE(WARNING "Neither libcurl nor libfetch were found, no support of URL includes in configuration") + ENDIF(NOT CURL_FOUND) + ENDIF(LIBFETCH_LIBRARY) +ENDIF(ENABLE_URL_INCLUDE MATCHES "ON") + +SET(CMAKE_C_WARN_FLAGS "") +CHECK_C_COMPILER_FLAG(-Wall SUPPORT_WALL) +CHECK_C_COMPILER_FLAG(-W SUPPORT_W) +CHECK_C_COMPILER_FLAG(-Wno-unused-parameter SUPPORT_WPARAM) +CHECK_C_COMPILER_FLAG(-Wno-pointer-sign SUPPORT_WPOINTER_SIGN) +CHECK_C_COMPILER_FLAG(-Wstrict-prototypes SUPPORT_WSTRICT_PROTOTYPES) +IF(NOT "${CMAKE_C_COMPILER_ID}" MATCHES SunPro) + CHECK_C_COMPILER_FLAG("-std=c99" SUPPORT_STD_FLAG) +ENDIF(NOT "${CMAKE_C_COMPILER_ID}" MATCHES SunPro) +IF(SUPPORT_W) + SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -W") +ENDIF(SUPPORT_W) +IF(SUPPORT_WALL) + SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wall") +ENDIF(SUPPORT_WALL) +IF(SUPPORT_WPARAM) + SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wno-unused-parameter") +ENDIF(SUPPORT_WPARAM) +IF(SUPPORT_WPOINTER_SIGN) + SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wno-pointer-sign") +ENDIF(SUPPORT_WPOINTER_SIGN) +IF(SUPPORT_WSTRICT_PROTOTYPES) + SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wstrict-prototypes") +ENDIF(SUPPORT_WSTRICT_PROTOTYPES) +IF(SUPPORT_STD_FLAG) + SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -std=c99") +ENDIF(SUPPORT_STD_FLAG) + +IF(ENABLE_URL_SIGN MATCHES "ON") + IF(OPENSSL_FOUND) + SET(HAVE_OPENSSL 1) + INCLUDE_DIRECTORIES("${OPENSSL_INCLUDE_DIR}") + ENDIF(OPENSSL_FOUND) +ENDIF(ENABLE_URL_SIGN MATCHES "ON") + +INCLUDE_DIRECTORIES("src") +INCLUDE_DIRECTORIES("include") +INCLUDE_DIRECTORIES("uthash") +INCLUDE_DIRECTORIES("klib") + +SET(UCLSRC src/ucl_util.c + src/ucl_parser.c + src/ucl_emitter.c + src/ucl_emitter_streamline.c + src/ucl_emitter_utils.c + src/ucl_hash.c + src/ucl_schema.c + src/ucl_msgpack.c + src/ucl_sexp.c + src/xxhash.c) + + +SET (LIB_TYPE STATIC) +IF (BUILD_SHARED_LIBS) + SET (LIB_TYPE SHARED) +ENDIF (BUILD_SHARED_LIBS) +ADD_LIBRARY(ucl ${LIB_TYPE} ${UCLSRC}) +SET_TARGET_PROPERTIES(ucl PROPERTIES VERSION ${LIBUCL_VERSION} SOVERSION ${LIBUCL_VERSION_MAJOR}) + +IF(ENABLE_LUA MATCHES "ON") + IF(ENABLE_LUAJIT MATCHES "ON") + FindLua(VERSION_MAJOR "5" VERSION_MINOR "1" ROOT "${LUA_ROOT}") + IF(NOT LUA_FOUND) + MESSAGE(FATAL_ERROR "Lua not found, lua support is required") + ELSE(NOT LUA_FOUND) + INCLUDE_DIRECTORIES("${LUA_INCLUDE_DIR}") + ENDIF(NOT LUA_FOUND) + ELSE(ENABLE_LUAJIT MATCHES "ON") + FindLua(VERSION_MAJOR "5" VERSION_MINOR "2" ROOT "${LUA_ROOT}") + IF(NOT LUA_FOUND) + FindLua(VERSION_MAJOR "5" VERSION_MINOR "1" ROOT "${LUA_ROOT}") + ENDIF(NOT LUA_FOUND) + IF(NOT LUA_FOUND) + MESSAGE(FATAL_ERROR "Lua not found, lua support is required") + ELSE(NOT LUA_FOUND) + INCLUDE_DIRECTORIES("${LUA_INCLUDE_DIR}") + ENDIF(NOT LUA_FOUND) + ENDIF(ENABLE_LUAJIT MATCHES "ON") + SET(UCL_LUA_SRC lua/lua_ucl.c) + ADD_LIBRARY(lua-ucl ${LIB_TYPE} ${UCL_LUA_SRC}) + IF(ENABLE_LUAJIT MATCHES "ON") + TARGET_LINK_LIBRARIES(lua-ucl "${LUAJIT_LIBRARY}") + ELSE(ENABLE_LUAJIT MATCHES "ON") + TARGET_LINK_LIBRARIES(lua-ucl "${LUA_LIBRARY}") + ENDIF(ENABLE_LUAJIT MATCHES "ON") + TARGET_LINK_LIBRARIES(lua-ucl ucl) + SET_TARGET_PROPERTIES(lua-ucl PROPERTIES VERSION ${LIBUCL_VERSION} SOVERSION ${LIBUCL_VERSION_MAJOR}) +ENDIF() + +IF(HAVE_FETCH_H) + TARGET_LINK_LIBRARIES(ucl fetch) +ELSE(HAVE_FETCH_H) + IF(CURL_FOUND) + TARGET_LINK_LIBRARIES(ucl ${CURL_LIBRARIES}) + ENDIF(CURL_FOUND) +ENDIF(HAVE_FETCH_H) +IF(ENABLE_URL_SIGN MATCHES "ON") + IF(OPENSSL_FOUND) + TARGET_LINK_LIBRARIES(ucl ${OPENSSL_LIBRARIES}) + ENDIF(OPENSSL_FOUND) +ENDIF(ENABLE_URL_SIGN MATCHES "ON") diff --git a/ChangeLog.md b/ChangeLog.md index 093ff97..f714f40 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -32,3 +32,8 @@ ### Libucl 0.7.2 - Fixed serious bugs in schema and arrays iteration + +### Libucl 0.7.3 + +- Fixed a bug with macroes that come after an empty object +- Fixed a bug in include processing when an incorrect variable has been destroyed (use-after-free) @@ -21,7 +21,7 @@ - [Performance](#performance) - [Conclusion](#conclusion) -## Introduction +## Introduction This document describes the main features and principles of the configuration language called `UCL` - universal configuration language. @@ -262,7 +262,20 @@ parser is created but before any configurations are parsed. all files that matches the specified pattern (normally the format of patterns is defined in `glob` manual page for your operating system). This option is meaningless for URL includes. * `url` (default: **true**) - allow URL includes. +* `path` (default: empty) - A UCL_ARRAY of directories to search for the include file. +Search ends after the first patch, unless `glob` is true, then all matches are included. +* `prefix` (default false) - Put included contents inside an object, instead +of loading them into the root. If no `key` is provided, one is automatically generated based on each files basename() +* `key` (default: <empty string>) - Key to load contents of include into. If +the key already exists, it must be the correct type +* `target` (default: object) - Specify if the `prefix` `key` should be an +object or an array. * `priority` (default: 0) - specify priority for the include (see below). +* `duplicate` (default: 'append') - specify policy of duplicates resolving: + - `append` - default strategy, if we have new object of higher priority then it replaces old one, if we have new object with less priority it is ignored completely, and if we have two duplicate objects with the same priority then we have a multi-value key (implicit array) + - `merge` - if we have object or array, then new keys are merged inside, if we have a plain object then an implicit array is formed (regardeless of priorities) + - `error` - create error on duplicate keys and stop parsing + - `rewrite` - always rewrite an old value with new one (ignoring priorities) Priorities are used by UCL parser to manage the policy of objects rewriting during including other files as following: diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt deleted file mode 100644 index e84f619..0000000 --- a/cmake/CMakeLists.txt +++ /dev/null @@ -1,127 +0,0 @@ -PROJECT(libucl C) - -SET(LIBUCL_VERSION_MAJOR 0) -SET(LIBUCL_VERSION_MINOR 5) -SET(LIBUCL_VERSION_PATCH 0) - -SET(LIBUCL_VERSION "${LIBUCL_VERSION_MAJOR}.${LIBUCL_VERSION_MINOR}.${LIBUCL_VERSION_PATCH}") - -INCLUDE(CheckCCompilerFlag) -INCLUDE(FindOpenSSL) -CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) - -OPTION(ENABLE_URL_INCLUDE "Enable urls in ucl includes (requires libcurl or libfetch) [default: OFF]" OFF) -OPTION(ENABLE_URL_SIGN "Enable signatures check in ucl includes (requires openssl) [default: OFF]" OFF) -OPTION(BUILD_SHARED_LIBS "Build Shared Libraries [default: OFF]" OFF) - -IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - LIST(APPEND CMAKE_REQUIRED_LIBRARIES rt) -ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - -IF(ENABLE_URL_INCLUDE MATCHES "ON") - FIND_LIBRARY(LIBFETCH_LIBRARY NAMES fetch PATHS PATH_SUFFIXES lib64 lib - PATHS - ~/Library/Frameworks - /Library/Frameworks - /usr/local - /usr - /sw - /opt/local - /opt/csw - /opt - DOC "Path where the libfetch library can be found") - IF(LIBFETCH_LIBRARY) - FIND_FILE(HAVE_FETCH_H NAMES fetch.h PATHS /usr/include - /opt/include - /usr/local/include - DOC "Path to libfetch header") - ELSE(LIBFETCH_LIBRARY) - # Try to find libcurl - ProcessPackage(CURL libcurl) - IF(NOT CURL_FOUND) - MESSAGE(WARNING "Neither libcurl nor libfetch were found, no support of URL includes in configuration") - ENDIF(NOT CURL_FOUND) - ENDIF(LIBFETCH_LIBRARY) -ENDIF(ENABLE_URL_INCLUDE MATCHES "ON") - -SET(CMAKE_C_WARN_FLAGS "") -CHECK_C_COMPILER_FLAG(-Wall SUPPORT_WALL) -CHECK_C_COMPILER_FLAG(-W SUPPORT_W) -CHECK_C_COMPILER_FLAG(-Wno-unused-parameter SUPPORT_WPARAM) -CHECK_C_COMPILER_FLAG(-Wno-pointer-sign SUPPORT_WPOINTER_SIGN) -CHECK_C_COMPILER_FLAG(-Wstrict-prototypes SUPPORT_WSTRICT_PROTOTYPES) -IF(NOT "${CMAKE_C_COMPILER_ID}" MATCHES SunPro) - CHECK_C_COMPILER_FLAG("-std=c99" SUPPORT_STD_FLAG) -ENDIF(NOT "${CMAKE_C_COMPILER_ID}" MATCHES SunPro) -IF(SUPPORT_W) - SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -W") -ENDIF(SUPPORT_W) -IF(SUPPORT_WALL) - SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wall") -ENDIF(SUPPORT_WALL) -IF(SUPPORT_WPARAM) - SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wno-unused-parameter") -ENDIF(SUPPORT_WPARAM) -IF(SUPPORT_WPOINTER_SIGN) - SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wno-pointer-sign") -ENDIF(SUPPORT_WPOINTER_SIGN) -IF(SUPPORT_WSTRICT_PROTOTYPES) - SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wstrict-prototypes") -ENDIF(SUPPORT_WSTRICT_PROTOTYPES) -IF(SUPPORT_STD_FLAG) - SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -std=c99") -ENDIF(SUPPORT_STD_FLAG) - -IF(ENABLE_URL_SIGN MATCHES "ON") - IF(OPENSSL_FOUND) - SET(HAVE_OPENSSL 1) - INCLUDE_DIRECTORIES("${OPENSSL_INCLUDE_DIR}") - ENDIF(OPENSSL_FOUND) -ENDIF(ENABLE_URL_SIGN MATCHES "ON") - -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../src") -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../include") -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../uthash") -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../klib") - -SET(UCLSRC ../src/ucl_util.c - ../src/ucl_parser.c - ../src/ucl_emitter.c - ../src/ucl_emitter_streamline.c - ../src/ucl_emitter_utils.c - ../src/ucl_hash.c - ../src/ucl_schema.c - ../src/xxhash.c) - - -SET (LIB_TYPE STATIC) -IF (BUILD_SHARED_LIBS) - SET (LIB_TYPE SHARED) -ENDIF (BUILD_SHARED_LIBS) -ADD_LIBRARY(ucl ${LIB_TYPE} ${UCLSRC}) -SET_TARGET_PROPERTIES(ucl PROPERTIES VERSION ${LIBUCL_VERSION} SOVERSION ${LIBUCL_VERSION_MAJOR}) - -IF(WITH_LUA) - SET(UCL_LUA_SRC ../lua/lua_ucl.c) - ADD_LIBRARY(lua-ucl ${LIB_TYPE} ${UCL_LUA_SRC}) - IF(ENABLE_LUAJIT MATCHES "ON") - TARGET_LINK_LIBRARIES(lua-ucl "${LUAJIT_LIBRARY}") - ELSE(ENABLE_LUAJIT MATCHES "ON") - TARGET_LINK_LIBRARIES(lua-ucl "${LUA_LIBRARY}") - ENDIF(ENABLE_LUAJIT MATCHES "ON") - TARGET_LINK_LIBRARIES(lua-ucl ucl) - SET_TARGET_PROPERTIES(lua-ucl PROPERTIES VERSION ${LIBUCL_VERSION} SOVERSION ${LIBUCL_VERSION_MAJOR}) -ENDIF(WITH_LUA) - -IF(HAVE_FETCH_H) - TARGET_LINK_LIBRARIES(ucl fetch) -ELSE(HAVE_FETCH_H) - IF(CURL_FOUND) - TARGET_LINK_LIBRARIES(ucl ${CURL_LIBRARIES}) - ENDIF(CURL_FOUND) -ENDIF(HAVE_FETCH_H) -IF(ENABLE_URL_SIGN MATCHES "ON") - IF(OPENSSL_FOUND) - TARGET_LINK_LIBRARIES(ucl ${OPENSSL_LIBRARIES}) - ENDIF(OPENSSL_FOUND) -ENDIF(ENABLE_URL_SIGN MATCHES "ON") diff --git a/configure.ac b/configure.ac index be6f652..73d3227 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ m4_define([maj_ver], [0]) m4_define([med_ver], [7]) -m4_define([min_ver], [2]) -m4_define([so_version], [5:0:1]) +m4_define([min_ver], [3]) +m4_define([so_version], [5:0:2]) m4_define([ucl_version], [maj_ver.med_ver.min_ver]) AC_INIT([libucl],[ucl_version],[https://github.com/vstakhov/libucl],[libucl]) @@ -47,6 +47,7 @@ AC_CHECK_HEADERS_ONCE([libgen.h]) AC_CHECK_HEADERS_ONCE([stdio.h]) AC_CHECK_HEADERS_ONCE([float.h]) AC_CHECK_HEADERS_ONCE([math.h]) +AC_CHECK_HEADERS_ONCE([endian.h sys/endian.h machine/endian.h]) dnl Example of default-disabled feature AC_ARG_ENABLE([urls], AS_HELP_STRING([--enable-urls], diff --git a/doc/Makefile.am b/doc/Makefile.am index a90a6fa..dcfacf6 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -4,6 +4,6 @@ dist_man_MANS = libucl.3 gen-man: @PANDOC@ tail -n +$$(grep -n '# Synopsis' api.md | cut -d':' -f1) api.md | \ - cat pandoc.template - | sed -e 's/^# \(.*\)/# \U\1/' \ + cat pandoc.template - | sed -e 's/^# \(.*\)/# \U\1/' \ -e "s/%%date%%/$$(LANG=C date +'%d %B, %Y')/" | \ - @PANDOC@ -s -f markdown -t man -o libucl.3
\ No newline at end of file + @PANDOC@ -s -f markdown -t man -o libucl.3 diff --git a/doc/lua_api.md b/doc/lua_api.md index a53353b..f7af3ca 100644 --- a/doc/lua_api.md +++ b/doc/lua_api.md @@ -16,7 +16,7 @@ if not res then else local obj = parser:get_object() local got = ucl.to_format(obj, 'json') -endif +end local table = { str = 'value', @@ -25,6 +25,7 @@ local table = { func = function () return 'huh' end +} print(ucl.to_format(table, 'ucl')) @@ -115,6 +116,7 @@ local table = { func = function () return 'huh' end +} print(ucl.to_format(table, 'ucl')) diff --git a/examples/ucl_cpp.cc b/examples/ucl_cpp.cc new file mode 100644 index 0000000..2d15d84 --- /dev/null +++ b/examples/ucl_cpp.cc @@ -0,0 +1,26 @@ +#include <iostream> +#include <string> +#include "ucl++.h" + +int main(int argc, char **argv) +{ + std::string input, err; + + input.assign((std::istreambuf_iterator<char>(std::cin)), + std::istreambuf_iterator<char>()); + + auto obj = ucl::Ucl::parse(input, err); + + if (obj) { + std::cout << obj.dump(UCL_EMIT_CONFIG) << std::endl; + + for (const auto &o : obj) { + std::cout << o.dump(UCL_EMIT_CONFIG) << std::endl; + } + } + else { + std::cerr << "Error: " << err << std::endl; + + return 1; + } +} diff --git a/include/ucl++.h b/include/ucl++.h new file mode 100644 index 0000000..87d2041 --- /dev/null +++ b/include/ucl++.h @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once +#include <string> +#include <memory> +#include <iostream> +#include <strstream> + +#include "ucl.h" + +// C++11 API inspired by json11: https://github.com/dropbox/json11/ + +namespace ucl { + +struct ucl_map_construct_t { }; +constexpr ucl_map_construct_t ucl_map_construct = ucl_map_construct_t(); +struct ucl_array_construct_t { }; +constexpr ucl_array_construct_t ucl_array_construct = ucl_array_construct_t(); + +class Ucl final { +private: + + struct ucl_deleter { + void operator() (ucl_object_t *obj) { + ucl_object_unref (obj); + } + }; + + static int + append_char (unsigned char c, size_t nchars, void *ud) + { + std::string *out = reinterpret_cast<std::string *>(ud); + + out->append (nchars, (char)c); + + return nchars; + } + static int + append_len (unsigned const char *str, size_t len, void *ud) + { + std::string *out = reinterpret_cast<std::string *>(ud); + + out->append ((const char *)str, len); + + return len; + } + static int + append_int (int64_t elt, void *ud) + { + std::string *out = reinterpret_cast<std::string *>(ud); + auto nstr = std::to_string (elt); + + out->append (nstr); + + return nstr.size (); + } + static int + append_double (double elt, void *ud) + { + std::string *out = reinterpret_cast<std::string *>(ud); + auto nstr = std::to_string (elt); + + out->append (nstr); + + return nstr.size (); + } + + static struct ucl_emitter_functions default_emit_funcs() + { + struct ucl_emitter_functions func = { + Ucl::append_char, + Ucl::append_len, + Ucl::append_int, + Ucl::append_double, + nullptr, + nullptr + }; + + return func; + }; + + std::unique_ptr<ucl_object_t, ucl_deleter> obj; + +public: + class const_iterator { + private: + struct ucl_iter_deleter { + void operator() (ucl_object_iter_t it) { + ucl_object_iterate_free (it); + } + }; + std::shared_ptr<void> it; + std::unique_ptr<Ucl> cur; + public: + typedef std::forward_iterator_tag iterator_category; + + const_iterator(const Ucl &obj) { + it = std::shared_ptr<void>(ucl_object_iterate_new (obj.obj.get()), + ucl_iter_deleter()); + cur.reset (new Ucl(ucl_object_iterate_safe (it.get(), true))); + } + + const_iterator() {} + const_iterator(const const_iterator &other) { + it = other.it; + } + ~const_iterator() {} + + const_iterator& operator=(const const_iterator &other) { + it = other.it; + return *this; + } + + bool operator==(const const_iterator &other) const + { + if (cur && other.cur) { + return cur->obj.get() == other.cur->obj.get(); + } + + return !cur && !other.cur; + } + + bool operator!=(const const_iterator &other) const + { + return !(*this == other); + } + + const_iterator& operator++() + { + if (it) { + cur.reset (new Ucl(ucl_object_iterate_safe (it.get(), true))); + } + + if (!*cur) { + it.reset (); + cur.reset (); + } + + return *this; + } + + const Ucl& operator*() const + { + return *cur; + } + const Ucl* operator->() const + { + return cur.get(); + } + }; + + // We grab ownership if get non-const ucl_object_t + Ucl(ucl_object_t *other) { + obj.reset (other); + } + + // Shared ownership + Ucl(const ucl_object_t *other) { + obj.reset (ucl_object_ref (other)); + } + + Ucl(const Ucl &other) { + obj.reset (ucl_object_ref (other.obj.get())); + } + + Ucl(Ucl &&other) { + obj.swap (other.obj); + } + + Ucl() noexcept { + obj.reset (ucl_object_typed_new (UCL_NULL)); + } + Ucl(std::nullptr_t) noexcept { + obj.reset (ucl_object_typed_new (UCL_NULL)); + } + Ucl(double value) { + obj.reset (ucl_object_typed_new (UCL_FLOAT)); + obj->value.dv = value; + } + Ucl(int64_t value) { + obj.reset (ucl_object_typed_new (UCL_INT)); + obj->value.iv = value; + } + Ucl(bool value) { + obj.reset (ucl_object_typed_new (UCL_BOOLEAN)); + obj->value.iv = static_cast<int64_t>(value); + } + Ucl(const std::string &value) { + obj.reset (ucl_object_fromstring_common (value.data (), value.size (), + UCL_STRING_RAW)); + } + Ucl(const char * value) { + obj.reset (ucl_object_fromstring_common (value, 0, UCL_STRING_RAW)); + } + + // Implicit constructor: anything with a to_json() function. + template <class T, class = decltype(&T::to_ucl)> + Ucl(const T & t) : Ucl(t.to_ucl()) {} + + // Implicit constructor: map-like objects (std::map, std::unordered_map, etc) + template <class M, typename std::enable_if< + std::is_constructible<std::string, typename M::key_type>::value + && std::is_constructible<Ucl, typename M::mapped_type>::value, + int>::type = 0> + Ucl(const M & m) { + obj.reset (ucl_object_typed_new (UCL_OBJECT)); + auto cobj = obj.get (); + + for (const auto &e : m) { + ucl_object_insert_key (cobj, ucl_object_ref (e.second.obj.get()), + e.first.data (), e.first.size (), true); + } + } + + // Implicit constructor: vector-like objects (std::list, std::vector, std::set, etc) + template <class V, typename std::enable_if< + std::is_constructible<Ucl, typename V::value_type>::value, + int>::type = 0> + Ucl(const V & v) { + obj.reset (ucl_object_typed_new (UCL_ARRAY)); + auto cobj = obj.get (); + + for (const auto &e : v) { + ucl_array_append (cobj, ucl_object_ref (e.obj.get())); + } + } + + ucl_type_t type () const { + if (obj) { + return ucl_object_type (obj.get ()); + } + return UCL_NULL; + } + + const std::string key () const { + std::string res; + + if (obj->key) { + res.assign (obj->key, obj->keylen); + } + + return res; + } + + double number_value () const + { + if (obj) { + return ucl_object_todouble (obj.get()); + } + + return 0.0; + } + + int64_t int_value () const + { + if (obj) { + return ucl_object_toint (obj.get()); + } + + return 0; + } + + bool bool_value () const + { + if (obj) { + return ucl_object_toboolean (obj.get()); + } + + return false; + } + + const std::string string_value () const + { + std::string res; + + if (obj) { + res.assign (ucl_object_tostring (obj.get())); + } + + return res; + } + + const Ucl operator[] (size_t i) const + { + if (type () == UCL_ARRAY) { + return Ucl (ucl_array_find_index (obj.get(), i)); + } + + return Ucl (nullptr); + } + + const Ucl operator[](const std::string &key) const + { + if (type () == UCL_OBJECT) { + return Ucl (ucl_object_find_keyl (obj.get(), + key.data (), key.size ())); + } + + return Ucl (nullptr); + } + // Serialize. + void dump (std::string &out, ucl_emitter_t type = UCL_EMIT_JSON) const + { + struct ucl_emitter_functions cbdata; + + cbdata = Ucl::default_emit_funcs(); + cbdata.ud = reinterpret_cast<void *>(&out); + + ucl_object_emit_full (obj.get(), type, &cbdata); + } + + std::string dump (ucl_emitter_t type = UCL_EMIT_JSON) const + { + std::string out; + + dump (out, type); + + return out; + } + + static Ucl parse (const std::string & in, std::string & err) + { + auto parser = ucl_parser_new (UCL_PARSER_DEFAULT); + + if (!ucl_parser_add_chunk (parser, (const unsigned char *)in.data (), + in.size ())) { + err.assign (ucl_parser_get_error (parser)); + ucl_parser_free (parser); + + return nullptr; + } + + auto obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + + // Obj will handle ownership + return Ucl (obj); + } + + static Ucl parse (const char * in, std::string & err) + { + if (in) { + return parse (std::string(in), err); + } else { + err = "null input"; + return nullptr; + } + } + + static Ucl parse (std::istream &ifs, std::string &err) + { + return Ucl::parse (std::string(std::istreambuf_iterator<char>(ifs), + std::istreambuf_iterator<char>()), err); + } + + bool operator== (const Ucl &rhs) const + { + return ucl_object_compare (obj.get(), rhs.obj.get ()) == 0; + } + bool operator< (const Ucl &rhs) const + { + return ucl_object_compare (obj.get(), rhs.obj.get ()) < 0; + } + bool operator!= (const Ucl &rhs) const { return !(*this == rhs); } + bool operator<= (const Ucl &rhs) const { return !(rhs < *this); } + bool operator> (const Ucl &rhs) const { return (rhs < *this); } + bool operator>= (const Ucl &rhs) const { return !(*this < rhs); } + + operator bool () const + { + if (!obj || type() == UCL_NULL) { + return false; + } + + if (type () == UCL_BOOLEAN) { + return bool_value (); + } + + return true; + } + + const_iterator begin() const + { + return const_iterator(*this); + } + const_iterator cbegin() const + { + return const_iterator(*this); + } + const_iterator end() const + { + return const_iterator(); + } + const_iterator cend() const + { + return const_iterator(); + } +}; + +}; diff --git a/include/ucl.h b/include/ucl.h index 823ac8d..82a1fd1 100644 --- a/include/ucl.h +++ b/include/ucl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2013, Vsevolod Stakhov +/* Copyright (c) 2013-2015, Vsevolod Stakhov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -135,7 +135,9 @@ typedef enum ucl_emitter { UCL_EMIT_JSON = 0, /**< Emit fine formatted JSON */ UCL_EMIT_JSON_COMPACT, /**< Emit compacted JSON */ UCL_EMIT_CONFIG, /**< Emit human readable config format */ - UCL_EMIT_YAML /**< Emit embedded YAML format */ + UCL_EMIT_YAML, /**< Emit embedded YAML format */ + UCL_EMIT_MSGPACK, /**< Emit msgpack output */ + UCL_EMIT_MAX /**< Unsupported emitter type */ } ucl_emitter_t; /** @@ -145,6 +147,7 @@ typedef enum ucl_emitter { * UCL still has to perform copying implicitly. */ typedef enum ucl_parser_flags { + UCL_PARSER_DEFAULT = 0x0, /**< No special flags */ UCL_PARSER_KEY_LOWERCASE = 0x1, /**< Convert all keys to lower case */ UCL_PARSER_ZEROCOPY = 0x2, /**< Parse input in zero-copy mode if possible */ UCL_PARSER_NO_TIME = 0x4, /**< Do not parse time and treat time values as strings */ @@ -155,6 +158,7 @@ typedef enum ucl_parser_flags { * String conversion flags, that are used in #ucl_object_fromstring_common function. */ typedef enum ucl_string_flags { + UCL_STRING_RAW = 0x0, /**< Treat string as is */ UCL_STRING_ESCAPE = 0x1, /**< Perform JSON escape */ UCL_STRING_TRIM = 0x2, /**< Trim leading and trailing whitespaces */ UCL_STRING_PARSE_BOOLEAN = 0x4, /**< Parse passed string and detect boolean */ @@ -172,15 +176,36 @@ typedef enum ucl_string_flags { * Basic flags for an object */ typedef enum ucl_object_flags { - UCL_OBJECT_ALLOCATED_KEY = 0x1, /**< An object has key allocated internally */ - UCL_OBJECT_ALLOCATED_VALUE = 0x2, /**< An object has a string value allocated internally */ - UCL_OBJECT_NEED_KEY_ESCAPE = 0x4, /**< The key of an object need to be escaped on output */ - UCL_OBJECT_EPHEMERAL = 0x8, /**< Temporary object that does not need to be freed really */ - UCL_OBJECT_MULTILINE = 0x10, /**< String should be displayed as multiline string */ - UCL_OBJECT_MULTIVALUE = 0x20 /**< Object is a key with multiple values */ + UCL_OBJECT_ALLOCATED_KEY = (1 << 0), /**< An object has key allocated internally */ + UCL_OBJECT_ALLOCATED_VALUE = (1 << 1), /**< An object has a string value allocated internally */ + UCL_OBJECT_NEED_KEY_ESCAPE = (1 << 2), /**< The key of an object need to be escaped on output */ + UCL_OBJECT_EPHEMERAL = (1 << 3), /**< Temporary object that does not need to be freed really */ + UCL_OBJECT_MULTILINE = (1 << 4), /**< String should be displayed as multiline string */ + UCL_OBJECT_MULTIVALUE = (1 << 5), /**< Object is a key with multiple values */ + UCL_OBJECT_INHERITED = (1 << 6), /**< Object has been inherited from another */ + UCL_OBJECT_BINARY = (1 << 7) /**< Object contains raw binary data */ } ucl_object_flags_t; /** + * Duplicate policy types + */ +enum ucl_duplicate_strategy { + UCL_DUPLICATE_APPEND = 0, /**< Default policy to merge based on priorities */ + UCL_DUPLICATE_MERGE, /**< Merge new object with old one */ + UCL_DUPLICATE_REWRITE, /**< Rewrite old keys */ + UCL_DUPLICATE_ERROR /**< Stop parsing on duplicate found */ +}; + +/** + * Input format type + */ +enum ucl_parse_type { + UCL_PARSE_UCL = 0, /**< Default ucl format */ + UCL_PARSE_MSGPACK, /**< Message pack input format */ + UCL_PARSE_CSEXP /**< Canonical S-expressions */ +}; + +/** * UCL object structure. Please mention that the most of fields should not be touched by * UCL users. In future, this structure may be converted to private one. */ @@ -190,7 +215,7 @@ typedef struct ucl_object_s { */ union { int64_t iv; /**< Int value of an object */ - const char *sv; /**< String value of an object */ + const char *sv; /**< String value of an object */ double dv; /**< Double value of an object */ void *av; /**< Array */ void *ov; /**< Object */ @@ -496,6 +521,15 @@ UCL_EXTERN const ucl_object_t* ucl_array_find_index (const ucl_object_t *top, unsigned int index); /** + * Return the index of `elt` in the array `top` + * @param top object to get a key from (must be of type UCL_ARRAY) + * @param elt element to find index of (must NOT be NULL) + * @return index of `elt` in the array `top or (unsigned int)-1 if `elt` is not found + */ +UCL_EXTERN unsigned int ucl_array_index_of (ucl_object_t *top, + ucl_object_t *elt); + +/** * Replace an element in an array with a different element, returning the object * that was replaced. This object is not released, caller must unref the * returned object when it is no longer needed. @@ -612,6 +646,19 @@ UCL_EXTERN const ucl_object_t* ucl_object_find_key (const ucl_object_t *obj, const char *key); /** + * Return object identified by a key in the specified object, if the first key is + * not found then look for the next one. This process is repeated unless + * the next argument in the list is not NULL. So, `ucl_object_find_any_key(obj, key, NULL)` + * is equal to `ucl_object_find_key(obj, key)` + * @param obj object to get a key from (must be of type UCL_OBJECT) + * @param key key to search + * @param ... list of alternative keys to search (NULL terminated) + * @return object matching the specified key or NULL if key was not found + */ +UCL_EXTERN const ucl_object_t* ucl_object_find_any_key (const ucl_object_t *obj, + const char *key, ...); + +/** * Return object identified by a fixed size key in the specified object * @param obj object to get a key from (must be of type UCL_OBJECT) * @param key key to search @@ -631,6 +678,16 @@ UCL_EXTERN const ucl_object_t *ucl_lookup_path (const ucl_object_t *obj, const char *path); /** + * Return object identified by object notation string using arbitrary delimiter + * @param obj object to search in + * @param path dot.notation.path to the path to lookup. May use numeric .index on arrays + * @param sep the sepatorator to use in place of . (incase keys have . in them) + * @return object matched the specified path or NULL if path is not found + */ +UCL_EXTERN const ucl_object_t *ucl_lookup_path_char (const ucl_object_t *obj, + const char *path, char sep); + +/** * Returns a key of an object as a NULL terminated string * @param obj CL object * @return key or NULL if there is no key @@ -683,7 +740,7 @@ UCL_EXTERN int ucl_object_compare (const ucl_object_t *o1, * @param cmp */ UCL_EXTERN void ucl_object_array_sort (ucl_object_t *ar, - int (*cmp)(const ucl_object_t *o1, const ucl_object_t *o2)); + int (*cmp)(const ucl_object_t **o1, const ucl_object_t **o2)); /** * Get the priority for specific UCL object @@ -769,6 +826,21 @@ typedef bool (*ucl_macro_handler) (const unsigned char *data, size_t len, const ucl_object_t *arguments, void* ud); +/** + * Context dependent macro handler for a parser + * @param data the content of macro + * @param len the length of content + * @param arguments arguments object + * @param context previously parsed context + * @param ud opaque user data + * @param err error pointer + * @return true if macro has been parsed + */ +typedef bool (*ucl_context_macro_handler) (const unsigned char *data, size_t len, + const ucl_object_t *arguments, + const ucl_object_t *context, + void* ud); + /* Opaque parser */ struct ucl_parser; @@ -780,16 +852,38 @@ struct ucl_parser; UCL_EXTERN struct ucl_parser* ucl_parser_new (int flags); /** + * Sets the default priority for the parser applied to chunks that does not + * specify priority explicitly + * @param parser parser object + * @param prio default priority (0 .. 16) + * @return true if parser's default priority was set + */ +UCL_EXTERN bool ucl_parser_set_default_priority (struct ucl_parser *parser, + unsigned prio); +/** * Register new handler for a macro * @param parser parser object * @param macro macro name (without leading dot) * @param handler handler (it is called immediately after macro is parsed) * @param ud opaque user data for a handler */ -UCL_EXTERN void ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, +UCL_EXTERN void ucl_parser_register_macro (struct ucl_parser *parser, + const char *macro, ucl_macro_handler handler, void* ud); /** + * Register new context dependent handler for a macro + * @param parser parser object + * @param macro macro name (without leading dot) + * @param handler handler (it is called immediately after macro is parsed) + * @param ud opaque user data for a handler + */ +UCL_EXTERN void ucl_parser_register_context_macro (struct ucl_parser *parser, + const char *macro, + ucl_context_macro_handler handler, + void* ud); + +/** * Handler to detect unregistered variables * @param data variable data * @param len length of variable @@ -843,6 +937,21 @@ UCL_EXTERN bool ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *data, size_t len, unsigned priority); /** + * Full version of ucl_add_chunk with priority and duplicate strategy + * @param parser parser structure + * @param data the pointer to the beginning of a chunk + * @param len the length of a chunk + * @param priority the desired priority of a chunk (only 4 least significant bits + * are considered for this parameter) + * @param strat duplicates merging strategy + * @param parse_type input format + * @return true if chunk has been added and false in case of error + */ +UCL_EXTERN bool ucl_parser_add_chunk_full (struct ucl_parser *parser, + const unsigned char *data, size_t len, unsigned priority, + enum ucl_duplicate_strategy strat, enum ucl_parse_type parse_type); + +/** * Load ucl object from a string * @param parser parser structure * @param data the pointer to the string @@ -853,6 +962,18 @@ UCL_EXTERN bool ucl_parser_add_string (struct ucl_parser *parser, const char *data,size_t len); /** + * Load ucl object from a string + * @param parser parser structure + * @param data the pointer to the string + * @param len the length of the string, if `len` is 0 then `data` must be zero-terminated string + * @param priority the desired priority of a chunk (only 4 least significant bits + * are considered for this parameter) + * @return true if string has been added and false in case of error + */ +UCL_EXTERN bool ucl_parser_add_string_priority (struct ucl_parser *parser, + const char *data, size_t len, unsigned priority); + +/** * Load and add data from a file * @param parser parser structure * @param filename the name of file @@ -863,6 +984,18 @@ UCL_EXTERN bool ucl_parser_add_file (struct ucl_parser *parser, const char *filename); /** + * Load and add data from a file + * @param parser parser structure + * @param filename the name of file + * @param err if *err is NULL it is set to parser error + * @param priority the desired priority of a chunk (only 4 least significant bits + * are considered for this parameter) + * @return true if chunk has been added and false in case of error + */ +UCL_EXTERN bool ucl_parser_add_file_priority (struct ucl_parser *parser, + const char *filename, unsigned priority); + +/** * Load and add data from a file descriptor * @param parser parser structure * @param filename the name of file @@ -873,6 +1006,28 @@ UCL_EXTERN bool ucl_parser_add_fd (struct ucl_parser *parser, int fd); /** + * Load and add data from a file descriptor + * @param parser parser structure + * @param filename the name of file + * @param err if *err is NULL it is set to parser error + * @param priority the desired priority of a chunk (only 4 least significant bits + * are considered for this parameter) + * @return true if chunk has been added and false in case of error + */ +UCL_EXTERN bool ucl_parser_add_fd_priority (struct ucl_parser *parser, + int fd, unsigned priority); + +/** + * Provide a UCL_ARRAY of paths to search for include files. The object is + * copied so caller must unref the object. + * @param parser parser structure + * @param paths UCL_ARRAY of paths to search + * @return true if the path search array was replaced in the parser + */ +UCL_EXTERN bool ucl_set_include_path (struct ucl_parser *parser, + ucl_object_t *paths); + +/** * Get a top object for a parser (refcount is increased) * @param parser parser structure * @param err if *err is NULL it is set to parser error @@ -881,12 +1036,34 @@ UCL_EXTERN bool ucl_parser_add_fd (struct ucl_parser *parser, UCL_EXTERN ucl_object_t* ucl_parser_get_object (struct ucl_parser *parser); /** - * Get the error string if failing + * Get the error string if parsing has been failed * @param parser parser object + * @return error description */ UCL_EXTERN const char *ucl_parser_get_error(struct ucl_parser *parser); /** + * Get the code of the last error + * @param parser parser object + * @return error code + */ +UCL_EXTERN int ucl_parser_get_error_code(struct ucl_parser *parser); + +/** + * Get the current column number within parser + * @param parser parser object + * @return current column number + */ +UCL_EXTERN unsigned ucl_parser_get_column(struct ucl_parser *parser); + +/** + * Get the current line number within parser + * @param parser parser object + * @return current line number + */ +UCL_EXTERN unsigned ucl_parser_get_linenum(struct ucl_parser *parser); + +/** * Clear the error in the parser * @param parser parser object */ @@ -994,6 +1171,17 @@ UCL_EXTERN unsigned char *ucl_object_emit (const ucl_object_t *obj, enum ucl_emitter emit_type); /** + * Emit object to a string that can contain `\0` inside + * @param obj object + * @param emit_type if type is #UCL_EMIT_JSON then emit json, if type is + * #UCL_EMIT_CONFIG then emit config like object + * @param len the resulting length + * @return dump of an object (must be freed after using) or NULL in case of error + */ +UCL_EXTERN unsigned char *ucl_object_emit_len (const ucl_object_t *obj, + enum ucl_emitter emit_type, size_t *len); + +/** * Emit object to a string * @param obj object * @param emit_type if type is #UCL_EMIT_JSON then emit json, if type is diff --git a/klib/kvec.h b/klib/kvec.h index b5cce85..b0a7504 100644 --- a/klib/kvec.h +++ b/klib/kvec.h @@ -89,7 +89,7 @@ int main() { #define kv_concat(type, v1, v0) do { \ if ((v1).m < (v0).n + (v1).n) kv_resize(type, v1, (v0).n + (v1).n); \ - memcpy((v1).a + (v1).n, (v0).a, sizeof(type) * ((v0).n + (v1).n)); \ + memcpy((v1).a + (v1).n, (v0).a, sizeof(type) * (v0).n); \ (v1).n = (v0).n + (v1).n; \ } while (0) diff --git a/lua/lua_ucl.c b/lua/lua_ucl.c index 682b0b5..b6162b4 100644 --- a/lua/lua_ucl.c +++ b/lua/lua_ucl.c @@ -69,6 +69,7 @@ func = "huh"; #define PARSER_META "ucl.parser.meta" #define EMITTER_META "ucl.emitter.meta" #define NULL_META "null.emitter.meta" +#define OBJECT_META "ucl.object.meta" static int ucl_object_lua_push_array (lua_State *L, const ucl_object_t *obj); static int ucl_object_lua_push_scalar (lua_State *L, const ucl_object_t *obj, bool allow_array); @@ -172,19 +173,33 @@ static int ucl_object_lua_push_array (lua_State *L, const ucl_object_t *obj) { const ucl_object_t *cur; + ucl_object_iter_t it; int i = 1, nelt = 0; - /* Optimize allocation by preallocation of table */ - LL_FOREACH (obj, cur) { - nelt ++; + if (obj->type == UCL_ARRAY) { + nelt = obj->len; + it = ucl_object_iterate_new (obj); + lua_createtable (L, nelt, 0); + + while ((cur = ucl_object_iterate_safe (it, true))) { + ucl_object_push_lua (L, cur, false); + lua_rawseti (L, -2, i); + i ++; + } } + else { + /* Optimize allocation by preallocation of table */ + LL_FOREACH (obj, cur) { + nelt ++; + } - lua_createtable (L, nelt, 0); + lua_createtable (L, nelt, 0); - LL_FOREACH (obj, cur) { - ucl_object_push_lua (L, cur, false); - lua_rawseti (L, -2, i); - i ++; + LL_FOREACH (obj, cur) { + ucl_object_push_lua (L, cur, false); + lua_rawseti (L, -2, i); + i ++; + } } return 1; @@ -259,7 +274,7 @@ ucl_object_push_lua (lua_State *L, const ucl_object_t *obj, bool allow_array) case UCL_OBJECT: return ucl_object_lua_push_object (L, obj, allow_array); case UCL_ARRAY: - return ucl_object_lua_push_array (L, obj->value.av); + return ucl_object_lua_push_array (L, obj); default: return ucl_object_lua_push_scalar (L, obj, allow_array); } @@ -322,6 +337,7 @@ ucl_object_lua_fromtable (lua_State *L, int idx) if (obj != NULL) { ucl_array_append (top, obj); } + lua_pop (L, 1); } } else { @@ -446,6 +462,24 @@ ucl_object_lua_import (lua_State *L, int idx) } static int +lua_ucl_to_string (lua_State *L, const ucl_object_t *obj, enum ucl_emitter type) +{ + unsigned char *result; + + result = ucl_object_emit (obj, type); + + if (result != NULL) { + lua_pushstring (L, (const char *)result); + free (result); + } + else { + lua_pushnil (L); + } + + return 1; +} + +static int lua_ucl_parser_init (lua_State *L) { struct ucl_parser *parser, **pparser; @@ -474,6 +508,12 @@ lua_ucl_parser_get (lua_State *L, int index) return *((struct ucl_parser **) luaL_checkudata(L, index, PARSER_META)); } +static ucl_object_t * +lua_ucl_object_get (lua_State *L, int index) +{ + return *((ucl_object_t **) luaL_checkudata(L, index, OBJECT_META)); +} + /*** * @method parser:parse_file(name) * Parse UCL object from file. @@ -579,6 +619,108 @@ lua_ucl_parser_get_object (lua_State *L) return ret; } +/*** + * @method parser:get_object_wrapped() + * Get top object from parser and export it to userdata object without + * unwrapping to lua. + * @return {ucl.object or nil} ucl object wrapped variable + */ +static int +lua_ucl_parser_get_object_wrapped (lua_State *L) +{ + struct ucl_parser *parser; + ucl_object_t *obj, **pobj; + int ret = 1; + + parser = lua_ucl_parser_get (L, 1); + obj = ucl_parser_get_object (parser); + + if (obj != NULL) { + pobj = lua_newuserdata (L, sizeof (*pobj)); + *pobj = obj; + luaL_getmetatable (L, OBJECT_META); + lua_setmetatable (L, -2); + } + else { + lua_pushnil (L); + } + + return ret; +} + +/*** + * @method parser:validate(schema) + * Validates the top object in the parser against schema. Schema might be + * another object or a string that represents file to load schema from. + * + * @param {string/table} schema input schema + * @return {result,err} two values: boolean result and the corresponding error + * + */ +static int +lua_ucl_parser_validate (lua_State *L) +{ + struct ucl_parser *parser, *schema_parser; + ucl_object_t *schema; + const char *schema_file; + struct ucl_schema_error err; + + parser = lua_ucl_parser_get (L, 1); + + if (parser && parser->top_obj) { + if (lua_type (L, 2) == LUA_TTABLE) { + schema = ucl_object_lua_import (L, 2); + + if (schema == NULL) { + lua_pushboolean (L, false); + lua_pushstring (L, "cannot load schema from lua table"); + + return 2; + } + } + else if (lua_type (L, 2) == LUA_TSTRING) { + schema_parser = ucl_parser_new (0); + schema_file = luaL_checkstring (L, 2); + + if (!ucl_parser_add_file (schema_parser, schema_file)) { + lua_pushboolean (L, false); + lua_pushfstring (L, "cannot parse schema file \"%s\": " + "%s", schema_file, ucl_parser_get_error (parser)); + ucl_parser_free (schema_parser); + + return 2; + } + + schema = ucl_parser_get_object (schema_parser); + ucl_parser_free (schema_parser); + } + else { + lua_pushboolean (L, false); + lua_pushstring (L, "invalid schema argument"); + + return 2; + } + + if (!ucl_object_validate (schema, parser->top_obj, &err)) { + lua_pushboolean (L, false); + lua_pushfstring (L, "validation error: " + "%s", err.msg); + } + else { + lua_pushboolean (L, true); + lua_pushnil (L); + } + + ucl_object_unref (schema); + } + else { + lua_pushboolean (L, false); + lua_pushstring (L, "invalid parser or empty top object"); + } + + return 2; +} + static int lua_ucl_parser_gc (lua_State *L) { @@ -590,6 +732,161 @@ lua_ucl_parser_gc (lua_State *L) return 0; } +/*** + * @method object:unwrap() + * Unwraps opaque ucl object to the native lua object (performing copying) + * @return {variant} any lua object + */ +static int +lua_ucl_object_unwrap (lua_State *L) +{ + ucl_object_t *obj; + + obj = lua_ucl_object_get (L, 1); + + if (obj) { + ucl_object_push_lua (L, obj, true); + } + else { + lua_pushnil (L); + } + + return 1; +} + +/*** + * @method object:tostring(type) + * Unwraps opaque ucl object to string (json by default). Optionally you can + * specify output format: + * + * - `json` - fine printed json + * - `json-compact` - compacted json + * - `config` - fine printed configuration + * - `ucl` - same as `config` + * - `yaml` - embedded yaml + * @param {string} type optional + * @return {string} string representation of the opaque ucl object + */ +static int +lua_ucl_object_tostring (lua_State *L) +{ + ucl_object_t *obj; + enum ucl_emitter format = UCL_EMIT_JSON_COMPACT; + + obj = lua_ucl_object_get (L, 1); + + if (obj) { + if (lua_gettop (L) > 1) { + if (lua_type (L, 2) == LUA_TSTRING) { + const char *strtype = lua_tostring (L, 2); + + if (strcasecmp (strtype, "json") == 0) { + format = UCL_EMIT_JSON; + } + else if (strcasecmp (strtype, "json-compact") == 0) { + format = UCL_EMIT_JSON_COMPACT; + } + else if (strcasecmp (strtype, "yaml") == 0) { + format = UCL_EMIT_YAML; + } + else if (strcasecmp (strtype, "config") == 0 || + strcasecmp (strtype, "ucl") == 0) { + format = UCL_EMIT_CONFIG; + } + } + } + + return lua_ucl_to_string (L, obj, format); + } + else { + lua_pushnil (L); + } + + return 1; +} + +/*** + * @method object:validate(schema, path) + * Validates the given ucl object using schema object represented as another + * opaque ucl object. You can also specify path in the form `#/path/def` to + * specify the specific schema element to perform validation. + * + * @param {ucl.object} schema schema object + * @param {string} path optional path for validation procedure + * @return {result,err} two values: boolean result and the corresponding error + */ +static int +lua_ucl_object_validate (lua_State *L) +{ + ucl_object_t *obj, *schema; + const ucl_object_t *schema_elt; + bool res = false; + struct ucl_schema_error err; + const char *path = NULL; + + obj = lua_ucl_object_get (L, 1); + schema = lua_ucl_object_get (L, 2); + + if (schema && obj && ucl_object_type (schema) == UCL_OBJECT) { + if (lua_gettop (L) > 2 && lua_type (L, 3) == LUA_TSTRING) { + path = lua_tostring (L, 3); + if (path[0] == '#') { + path ++; + } + } + + if (path) { + schema_elt = ucl_lookup_path_char (schema, path, '/'); + } + else { + /* Use the top object */ + schema_elt = schema; + } + + if (schema_elt) { + res = ucl_object_validate (schema_elt, obj, &err); + + if (res) { + lua_pushboolean (L, res); + lua_pushnil (L); + } + else { + lua_pushboolean (L, res); + lua_pushfstring (L, "validation error: %s", err.msg); + } + } + else { + lua_pushboolean (L, res); + + if (path) { + lua_pushfstring (L, "cannot find the requested path: %s", path); + } + else { + /* Should not be reached */ + lua_pushstring (L, "unknown error"); + } + } + } + else { + lua_pushboolean (L, res); + lua_pushstring (L, "invalid object or schema"); + } + + return 2; +} + +static int +lua_ucl_object_gc (lua_State *L) +{ + ucl_object_t *obj; + + obj = lua_ucl_object_get (L, 1); + + ucl_object_unref (obj); + + return 0; +} + static void lua_ucl_parser_mt (lua_State *L) { @@ -610,25 +907,45 @@ lua_ucl_parser_mt (lua_State *L) lua_pushcfunction (L, lua_ucl_parser_get_object); lua_setfield (L, -2, "get_object"); + lua_pushcfunction (L, lua_ucl_parser_get_object_wrapped); + lua_setfield (L, -2, "get_object_wrapped"); + + lua_pushcfunction (L, lua_ucl_parser_validate); + lua_setfield (L, -2, "validate"); + lua_pop (L, 1); } -static int -lua_ucl_to_string (lua_State *L, const ucl_object_t *obj, enum ucl_emitter type) +static void +lua_ucl_object_mt (lua_State *L) { - unsigned char *result; + luaL_newmetatable (L, OBJECT_META); - result = ucl_object_emit (obj, type); + lua_pushvalue(L, -1); + lua_setfield(L, -2, "__index"); - if (result != NULL) { - lua_pushstring (L, (const char *)result); - free (result); - } - else { - lua_pushnil (L); - } + lua_pushcfunction (L, lua_ucl_object_gc); + lua_setfield (L, -2, "__gc"); - return 1; + lua_pushcfunction (L, lua_ucl_object_tostring); + lua_setfield (L, -2, "__tostring"); + + lua_pushcfunction (L, lua_ucl_object_tostring); + lua_setfield (L, -2, "tostring"); + + lua_pushcfunction (L, lua_ucl_object_unwrap); + lua_setfield (L, -2, "unwrap"); + + lua_pushcfunction (L, lua_ucl_object_unwrap); + lua_setfield (L, -2, "tolua"); + + lua_pushcfunction (L, lua_ucl_object_validate); + lua_setfield (L, -2, "validate"); + + lua_pushstring (L, OBJECT_META); + lua_setfield (L, -2, "class"); + + lua_pop (L, 1); } static int @@ -774,6 +1091,7 @@ luaopen_ucl (lua_State *L) { lua_ucl_parser_mt (L); lua_ucl_null_mt (L); + lua_ucl_object_mt (L); /* Create the refs weak table: */ lua_createtable (L, 0, 2); diff --git a/m4/ax_lua.m4 b/m4/ax_lua.m4 new file mode 100644 index 0000000..f8e2fd4 --- /dev/null +++ b/m4/ax_lua.m4 @@ -0,0 +1,664 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_lua.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PROG_LUA[([MINIMUM-VERSION], [TOO-BIG-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])] +# AX_LUA_HEADERS[([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])] +# AX_LUA_LIBS[([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])] +# AX_LUA_READLINE[([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])] +# +# DESCRIPTION +# +# Detect a Lua interpreter, optionally specifying a minimum and maximum +# version number. Set up important Lua paths, such as the directories in +# which to install scripts and modules (shared libraries). +# +# Also detect Lua headers and libraries. The Lua version contained in the +# header is checked to match the Lua interpreter version exactly. When +# searching for Lua libraries, the version number is used as a suffix. +# This is done with the goal of supporting multiple Lua installs (5.1, +# 5.2, and 5.3 side-by-side). +# +# A note on compatibility with previous versions: This file has been +# mostly rewritten for serial 18. Most developers should be able to use +# these macros without needing to modify configure.ac. Care has been taken +# to preserve each macro's behavior, but there are some differences: +# +# 1) AX_WITH_LUA is deprecated; it now expands to the exact same thing as +# AX_PROG_LUA with no arguments. +# +# 2) AX_LUA_HEADERS now checks that the version number defined in lua.h +# matches the interpreter version. AX_LUA_HEADERS_VERSION is therefore +# unnecessary, so it is deprecated and does not expand to anything. +# +# 3) The configure flag --with-lua-suffix no longer exists; the user +# should instead specify the LUA precious variable on the command line. +# See the AX_PROG_LUA description for details. +# +# Please read the macro descriptions below for more information. +# +# This file was inspired by Andrew Dalke's and James Henstridge's +# python.m4 and Tom Payne's, Matthieu Moy's, and Reuben Thomas's ax_lua.m4 +# (serial 17). Basically, this file is a mash-up of those two files. I +# like to think it combines the best of the two! +# +# AX_PROG_LUA: Search for the Lua interpreter, and set up important Lua +# paths. Adds precious variable LUA, which may contain the path of the Lua +# interpreter. If LUA is blank, the user's path is searched for an +# suitable interpreter. +# +# If MINIMUM-VERSION is supplied, then only Lua interpreters with a +# version number greater or equal to MINIMUM-VERSION will be accepted. If +# TOO-BIG-VERSION is also supplied, then only Lua interpreters with a +# version number greater or equal to MINIMUM-VERSION and less than +# TOO-BIG-VERSION will be accepted. +# +# The Lua version number, LUA_VERSION, is found from the interpreter, and +# substituted. LUA_PLATFORM is also found, but not currently supported (no +# standard representation). +# +# Finally, the macro finds four paths: +# +# luadir Directory to install Lua scripts. +# pkgluadir $luadir/$PACKAGE +# luaexecdir Directory to install Lua modules. +# pkgluaexecdir $luaexecdir/$PACKAGE +# +# These paths are found based on $prefix, $exec_prefix, Lua's +# package.path, and package.cpath. The first path of package.path +# beginning with $prefix is selected as luadir. The first path of +# package.cpath beginning with $exec_prefix is used as luaexecdir. This +# should work on all reasonable Lua installations. If a path cannot be +# determined, a default path is used. Of course, the user can override +# these later when invoking make. +# +# luadir Default: $prefix/share/lua/$LUA_VERSION +# luaexecdir Default: $exec_prefix/lib/lua/$LUA_VERSION +# +# These directories can be used by Automake as install destinations. The +# variable name minus 'dir' needs to be used as a prefix to the +# appropriate Automake primary, e.g. lua_SCRIPS or luaexec_LIBRARIES. +# +# If an acceptable Lua interpreter is found, then ACTION-IF-FOUND is +# performed, otherwise ACTION-IF-NOT-FOUND is preformed. If ACTION-IF-NOT- +# FOUND is blank, then it will default to printing an error. To prevent +# the default behavior, give ':' as an action. +# +# AX_LUA_HEADERS: Search for Lua headers. Requires that AX_PROG_LUA be +# expanded before this macro. Adds precious variable LUA_INCLUDE, which +# may contain Lua specific include flags, e.g. -I/usr/include/lua5.1. If +# LUA_INCLUDE is blank, then this macro will attempt to find suitable +# flags. +# +# LUA_INCLUDE can be used by Automake to compile Lua modules or +# executables with embedded interpreters. The *_CPPFLAGS variables should +# be used for this purpose, e.g. myprog_CPPFLAGS = $(LUA_INCLUDE). +# +# This macro searches for the header lua.h (and others). The search is +# performed with a combination of CPPFLAGS, CPATH, etc, and LUA_INCLUDE. +# If the search is unsuccessful, then some common directories are tried. +# If the headers are then found, then LUA_INCLUDE is set accordingly. +# +# The paths automatically searched are: +# +# * /usr/include/luaX.Y +# * /usr/include/lua/X.Y +# * /usr/include/luaXY +# * /usr/local/include/luaX.Y +# * /usr/local/include/lua-X.Y +# * /usr/local/include/lua/X.Y +# * /usr/local/include/luaXY +# +# (Where X.Y is the Lua version number, e.g. 5.1.) +# +# The Lua version number found in the headers is always checked to match +# the Lua interpreter's version number. Lua headers with mismatched +# version numbers are not accepted. +# +# If headers are found, then ACTION-IF-FOUND is performed, otherwise +# ACTION-IF-NOT-FOUND is performed. If ACTION-IF-NOT-FOUND is blank, then +# it will default to printing an error. To prevent the default behavior, +# set the action to ':'. +# +# AX_LUA_LIBS: Search for Lua libraries. Requires that AX_PROG_LUA be +# expanded before this macro. Adds precious variable LUA_LIB, which may +# contain Lua specific linker flags, e.g. -llua5.1. If LUA_LIB is blank, +# then this macro will attempt to find suitable flags. +# +# LUA_LIB can be used by Automake to link Lua modules or executables with +# embedded interpreters. The *_LIBADD and *_LDADD variables should be used +# for this purpose, e.g. mymod_LIBADD = $(LUA_LIB). +# +# This macro searches for the Lua library. More technically, it searches +# for a library containing the function lua_load. The search is performed +# with a combination of LIBS, LIBRARY_PATH, and LUA_LIB. +# +# If the search determines that some linker flags are missing, then those +# flags will be added to LUA_LIB. +# +# If libraries are found, then ACTION-IF-FOUND is performed, otherwise +# ACTION-IF-NOT-FOUND is performed. If ACTION-IF-NOT-FOUND is blank, then +# it will default to printing an error. To prevent the default behavior, +# set the action to ':'. +# +# AX_LUA_READLINE: Search for readline headers and libraries. Requires the +# AX_LIB_READLINE macro, which is provided by ax_lib_readline.m4 from the +# Autoconf Archive. +# +# If a readline compatible library is found, then ACTION-IF-FOUND is +# performed, otherwise ACTION-IF-NOT-FOUND is performed. +# +# LICENSE +# +# Copyright (c) 2015 Reuben Thomas <rrt@sc3d.org> +# Copyright (c) 2014 Tim Perkins <tprk77@gmail.com> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 39 + +dnl ========================================================================= +dnl AX_PROG_LUA([MINIMUM-VERSION], [TOO-BIG-VERSION], +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ========================================================================= +AC_DEFUN([AX_PROG_LUA], +[ + dnl Check for required tools. + AC_REQUIRE([AC_PROG_GREP]) + AC_REQUIRE([AC_PROG_SED]) + + dnl Make LUA a precious variable. + AC_ARG_VAR([LUA], [The Lua interpreter, e.g. /usr/bin/lua5.1]) + + dnl Find a Lua interpreter. + m4_define_default([_AX_LUA_INTERPRETER_LIST], + [lua lua5.3 lua53 lua5.2 lua52 lua5.1 lua51 lua50]) + + m4_if([$1], [], + [ dnl No version check is needed. Find any Lua interpreter. + AS_IF([test "x$LUA" = 'x'], + [AC_PATH_PROGS([LUA], [_AX_LUA_INTERPRETER_LIST], [:])]) + ax_display_LUA='lua' + + AS_IF([test "x$LUA" != 'x:'], + [ dnl At least check if this is a Lua interpreter. + AC_MSG_CHECKING([if $LUA is a Lua interpreter]) + _AX_LUA_CHK_IS_INTRP([$LUA], + [AC_MSG_RESULT([yes])], + [ AC_MSG_RESULT([no]) + AC_MSG_ERROR([not a Lua interpreter]) + ]) + ]) + ], + [ dnl A version check is needed. + AS_IF([test "x$LUA" != 'x'], + [ dnl Check if this is a Lua interpreter. + AC_MSG_CHECKING([if $LUA is a Lua interpreter]) + _AX_LUA_CHK_IS_INTRP([$LUA], + [AC_MSG_RESULT([yes])], + [ AC_MSG_RESULT([no]) + AC_MSG_ERROR([not a Lua interpreter]) + ]) + dnl Check the version. + m4_if([$2], [], + [_ax_check_text="whether $LUA version >= $1"], + [_ax_check_text="whether $LUA version >= $1, < $2"]) + AC_MSG_CHECKING([$_ax_check_text]) + _AX_LUA_CHK_VER([$LUA], [$1], [$2], + [AC_MSG_RESULT([yes])], + [ AC_MSG_RESULT([no]) + AC_MSG_ERROR([version is out of range for specified LUA])]) + ax_display_LUA=$LUA + ], + [ dnl Try each interpreter until we find one that satisfies VERSION. + m4_if([$2], [], + [_ax_check_text="for a Lua interpreter with version >= $1"], + [_ax_check_text="for a Lua interpreter with version >= $1, < $2"]) + AC_CACHE_CHECK([$_ax_check_text], + [ax_cv_pathless_LUA], + [ for ax_cv_pathless_LUA in _AX_LUA_INTERPRETER_LIST none; do + test "x$ax_cv_pathless_LUA" = 'xnone' && break + _AX_LUA_CHK_IS_INTRP([$ax_cv_pathless_LUA], [], [continue]) + _AX_LUA_CHK_VER([$ax_cv_pathless_LUA], [$1], [$2], [break]) + done + ]) + dnl Set $LUA to the absolute path of $ax_cv_pathless_LUA. + AS_IF([test "x$ax_cv_pathless_LUA" = 'xnone'], + [LUA=':'], + [AC_PATH_PROG([LUA], [$ax_cv_pathless_LUA])]) + ax_display_LUA=$ax_cv_pathless_LUA + ]) + ]) + + AS_IF([test "x$LUA" = 'x:'], + [ dnl Run any user-specified action, or abort. + m4_default([$4], [AC_MSG_ERROR([cannot find suitable Lua interpreter])]) + ], + [ dnl Query Lua for its version number. + AC_CACHE_CHECK([for $ax_display_LUA version], + [ax_cv_lua_version], + [ dnl Get the interpreter version in X.Y format. This should work for + dnl interpreters version 5.0 and beyond. + ax_cv_lua_version=[`$LUA -e ' + -- return a version number in X.Y format + local _, _, ver = string.find(_VERSION, "^Lua (%d+%.%d+)") + print(ver)'`] + ]) + AS_IF([test "x$ax_cv_lua_version" = 'x'], + [AC_MSG_ERROR([invalid Lua version number])]) + AC_SUBST([LUA_VERSION], [$ax_cv_lua_version]) + AC_SUBST([LUA_SHORT_VERSION], [`echo "$LUA_VERSION" | $SED 's|\.||'`]) + + dnl The following check is not supported: + dnl At times (like when building shared libraries) you may want to know + dnl which OS platform Lua thinks this is. + AC_CACHE_CHECK([for $ax_display_LUA platform], + [ax_cv_lua_platform], + [ax_cv_lua_platform=[`$LUA -e 'print("unknown")'`]]) + AC_SUBST([LUA_PLATFORM], [$ax_cv_lua_platform]) + + dnl Use the values of $prefix and $exec_prefix for the corresponding + dnl values of LUA_PREFIX and LUA_EXEC_PREFIX. These are made distinct + dnl variables so they can be overridden if need be. However, the general + dnl consensus is that you shouldn't need this ability. + AC_SUBST([LUA_PREFIX], ['${prefix}']) + AC_SUBST([LUA_EXEC_PREFIX], ['${exec_prefix}']) + + dnl Lua provides no way to query the script directory, and instead + dnl provides LUA_PATH. However, we should be able to make a safe educated + dnl guess. If the built-in search path contains a directory which is + dnl prefixed by $prefix, then we can store scripts there. The first + dnl matching path will be used. + AC_CACHE_CHECK([for $ax_display_LUA script directory], + [ax_cv_lua_luadir], + [ AS_IF([test "x$prefix" = 'xNONE'], + [ax_lua_prefix=$ac_default_prefix], + [ax_lua_prefix=$prefix]) + + dnl Initialize to the default path. + ax_cv_lua_luadir="$LUA_PREFIX/share/lua/$LUA_VERSION" + + dnl Try to find a path with the prefix. + _AX_LUA_FND_PRFX_PTH([$LUA], [$ax_lua_prefix], [script]) + AS_IF([test "x$ax_lua_prefixed_path" != 'x'], + [ dnl Fix the prefix. + _ax_strip_prefix=`echo "$ax_lua_prefix" | $SED 's|.|.|g'` + ax_cv_lua_luadir=`echo "$ax_lua_prefixed_path" | \ + $SED "s|^$_ax_strip_prefix|$LUA_PREFIX|"` + ]) + ]) + AC_SUBST([luadir], [$ax_cv_lua_luadir]) + AC_SUBST([pkgluadir], [\${luadir}/$PACKAGE]) + + dnl Lua provides no way to query the module directory, and instead + dnl provides LUA_PATH. However, we should be able to make a safe educated + dnl guess. If the built-in search path contains a directory which is + dnl prefixed by $exec_prefix, then we can store modules there. The first + dnl matching path will be used. + AC_CACHE_CHECK([for $ax_display_LUA module directory], + [ax_cv_lua_luaexecdir], + [ AS_IF([test "x$exec_prefix" = 'xNONE'], + [ax_lua_exec_prefix=$ax_lua_prefix], + [ax_lua_exec_prefix=$exec_prefix]) + + dnl Initialize to the default path. + ax_cv_lua_luaexecdir="$LUA_EXEC_PREFIX/lib/lua/$LUA_VERSION" + + dnl Try to find a path with the prefix. + _AX_LUA_FND_PRFX_PTH([$LUA], + [$ax_lua_exec_prefix], [module]) + AS_IF([test "x$ax_lua_prefixed_path" != 'x'], + [ dnl Fix the prefix. + _ax_strip_prefix=`echo "$ax_lua_exec_prefix" | $SED 's|.|.|g'` + ax_cv_lua_luaexecdir=`echo "$ax_lua_prefixed_path" | \ + $SED "s|^$_ax_strip_prefix|$LUA_EXEC_PREFIX|"` + ]) + ]) + AC_SUBST([luaexecdir], [$ax_cv_lua_luaexecdir]) + AC_SUBST([pkgluaexecdir], [\${luaexecdir}/$PACKAGE]) + + dnl Run any user specified action. + $3 + ]) +]) + +dnl AX_WITH_LUA is now the same thing as AX_PROG_LUA. +AC_DEFUN([AX_WITH_LUA], +[ + AC_MSG_WARN([[$0 is deprecated, please use AX_PROG_LUA instead]]) + AX_PROG_LUA +]) + + +dnl ========================================================================= +dnl _AX_LUA_CHK_IS_INTRP(PROG, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) +dnl ========================================================================= +AC_DEFUN([_AX_LUA_CHK_IS_INTRP], +[ + dnl A minimal Lua factorial to prove this is an interpreter. This should work + dnl for Lua interpreters version 5.0 and beyond. + _ax_lua_factorial=[`$1 2>/dev/null -e ' + -- a simple factorial + function fact (n) + if n == 0 then + return 1 + else + return n * fact(n-1) + end + end + print("fact(5) is " .. fact(5))'`] + AS_IF([test "$_ax_lua_factorial" = 'fact(5) is 120'], + [$2], [$3]) +]) + + +dnl ========================================================================= +dnl _AX_LUA_CHK_VER(PROG, MINIMUM-VERSION, [TOO-BIG-VERSION], +dnl [ACTION-IF-TRUE], [ACTION-IF-FALSE]) +dnl ========================================================================= +AC_DEFUN([_AX_LUA_CHK_VER], +[ + dnl Check that the Lua version is within the bounds. Only the major and minor + dnl version numbers are considered. This should work for Lua interpreters + dnl version 5.0 and beyond. + _ax_lua_good_version=[`$1 -e ' + -- a script to compare versions + function verstr2num(verstr) + local _, _, majorver, minorver = string.find(verstr, "^(%d+)%.(%d+)") + if majorver and minorver then + return tonumber(majorver) * 100 + tonumber(minorver) + end + end + local minver = verstr2num("$2") + local _, _, trimver = string.find(_VERSION, "^Lua (.*)") + local ver = verstr2num(trimver) + local maxver = verstr2num("$3") or 1e9 + if minver <= ver and ver < maxver then + print("yes") + else + print("no") + end'`] + AS_IF([test "x$_ax_lua_good_version" = "xyes"], + [$4], [$5]) +]) + + +dnl ========================================================================= +dnl _AX_LUA_FND_PRFX_PTH(PROG, PREFIX, SCRIPT-OR-MODULE-DIR) +dnl ========================================================================= +AC_DEFUN([_AX_LUA_FND_PRFX_PTH], +[ + dnl Get the script or module directory by querying the Lua interpreter, + dnl filtering on the given prefix, and selecting the shallowest path. If no + dnl path is found matching the prefix, the result will be an empty string. + dnl The third argument determines the type of search, it can be 'script' or + dnl 'module'. Supplying 'script' will perform the search with package.path + dnl and LUA_PATH, and supplying 'module' will search with package.cpath and + dnl LUA_CPATH. This is done for compatibility with Lua 5.0. + + ax_lua_prefixed_path=[`$1 -e ' + -- get the path based on search type + local searchtype = "$3" + local paths = "" + if searchtype == "script" then + paths = (package and package.path) or LUA_PATH + elseif searchtype == "module" then + paths = (package and package.cpath) or LUA_CPATH + end + -- search for the prefix + local prefix = "'$2'" + local minpath = "" + local mindepth = 1e9 + string.gsub(paths, "(@<:@^;@:>@+)", + function (path) + path = string.gsub(path, "%?.*$", "") + path = string.gsub(path, "/@<:@^/@:>@*$", "") + if string.find(path, prefix) then + local depth = string.len(string.gsub(path, "@<:@^/@:>@", "")) + if depth < mindepth then + minpath = path + mindepth = depth + end + end + end) + print(minpath)'`] +]) + + +dnl ========================================================================= +dnl AX_LUA_HEADERS([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ========================================================================= +AC_DEFUN([AX_LUA_HEADERS], +[ + dnl Check for LUA_VERSION. + AC_MSG_CHECKING([if LUA_VERSION is defined]) + AS_IF([test "x$LUA_VERSION" != 'x'], + [AC_MSG_RESULT([yes])], + [ AC_MSG_RESULT([no]) + AC_MSG_ERROR([cannot check Lua headers without knowing LUA_VERSION]) + ]) + + dnl Make LUA_INCLUDE a precious variable. + AC_ARG_VAR([LUA_INCLUDE], [The Lua includes, e.g. -I/usr/include/lua5.1]) + + dnl Some default directories to search. + LUA_SHORT_VERSION=`echo "$LUA_VERSION" | $SED 's|\.||'` + m4_define_default([_AX_LUA_INCLUDE_LIST], + [ /usr/include/lua$LUA_VERSION \ + /usr/include/lua-$LUA_VERSION \ + /usr/include/lua/$LUA_VERSION \ + /usr/include/lua$LUA_SHORT_VERSION \ + /usr/local/include/lua$LUA_VERSION \ + /usr/local/include/lua-$LUA_VERSION \ + /usr/local/include/lua/$LUA_VERSION \ + /usr/local/include/lua$LUA_SHORT_VERSION \ + ]) + + dnl Try to find the headers. + _ax_lua_saved_cppflags=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $LUA_INCLUDE" + AC_CHECK_HEADERS([lua.h lualib.h lauxlib.h luaconf.h]) + CPPFLAGS=$_ax_lua_saved_cppflags + + dnl Try some other directories if LUA_INCLUDE was not set. + AS_IF([test "x$LUA_INCLUDE" = 'x' && + test "x$ac_cv_header_lua_h" != 'xyes'], + [ dnl Try some common include paths. + for _ax_include_path in _AX_LUA_INCLUDE_LIST; do + test ! -d "$_ax_include_path" && continue + + AC_MSG_CHECKING([for Lua headers in]) + AC_MSG_RESULT([$_ax_include_path]) + + AS_UNSET([ac_cv_header_lua_h]) + AS_UNSET([ac_cv_header_lualib_h]) + AS_UNSET([ac_cv_header_lauxlib_h]) + AS_UNSET([ac_cv_header_luaconf_h]) + + _ax_lua_saved_cppflags=$CPPFLAGS + CPPFLAGS="$CPPFLAGS -I$_ax_include_path" + AC_CHECK_HEADERS([lua.h lualib.h lauxlib.h luaconf.h]) + CPPFLAGS=$_ax_lua_saved_cppflags + + AS_IF([test "x$ac_cv_header_lua_h" = 'xyes'], + [ LUA_INCLUDE="-I$_ax_include_path" + break + ]) + done + ]) + + AS_IF([test "x$ac_cv_header_lua_h" = 'xyes'], + [ dnl Make a program to print LUA_VERSION defined in the header. + dnl TODO It would be really nice if we could do this without compiling a + dnl program, then it would work when cross compiling. But I'm not sure how + dnl to do this reliably. For now, assume versions match when cross compiling. + + AS_IF([test "x$cross_compiling" != 'xyes'], + [ AC_CACHE_CHECK([for Lua header version], + [ax_cv_lua_header_version], + [ _ax_lua_saved_cppflags=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $LUA_INCLUDE" + AC_RUN_IFELSE( + [ AC_LANG_SOURCE([[ +#include <lua.h> +#include <stdlib.h> +#include <stdio.h> +int main(int argc, char ** argv) +{ + if(argc > 1) printf("%s", LUA_VERSION); + exit(EXIT_SUCCESS); +} +]]) + ], + [ ax_cv_lua_header_version=`./conftest$EXEEXT p | \ + $SED -n "s|^Lua \(@<:@0-9@:>@\{1,\}\.@<:@0-9@:>@\{1,\}\).\{0,\}|\1|p"` + ], + [ax_cv_lua_header_version='unknown']) + CPPFLAGS=$_ax_lua_saved_cppflags + ]) + + dnl Compare this to the previously found LUA_VERSION. + AC_MSG_CHECKING([if Lua header version matches $LUA_VERSION]) + AS_IF([test "x$ax_cv_lua_header_version" = "x$LUA_VERSION"], + [ AC_MSG_RESULT([yes]) + ax_header_version_match='yes' + ], + [ AC_MSG_RESULT([no]) + ax_header_version_match='no' + ]) + ], + [ AC_MSG_WARN([cross compiling so assuming header version number matches]) + ax_header_version_match='yes' + ]) + ]) + + dnl Was LUA_INCLUDE specified? + AS_IF([test "x$ax_header_version_match" != 'xyes' && + test "x$LUA_INCLUDE" != 'x'], + [AC_MSG_ERROR([cannot find headers for specified LUA_INCLUDE])]) + + dnl Test the final result and run user code. + AS_IF([test "x$ax_header_version_match" = 'xyes'], [$1], + [m4_default([$2], [AC_MSG_ERROR([cannot find Lua includes])])]) +]) + +dnl AX_LUA_HEADERS_VERSION no longer exists, use AX_LUA_HEADERS. +AC_DEFUN([AX_LUA_HEADERS_VERSION], +[ + AC_MSG_WARN([[$0 is deprecated, please use AX_LUA_HEADERS instead]]) +]) + + +dnl ========================================================================= +dnl AX_LUA_LIBS([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ========================================================================= +AC_DEFUN([AX_LUA_LIBS], +[ + dnl TODO Should this macro also check various -L flags? + + dnl Check for LUA_VERSION. + AC_MSG_CHECKING([if LUA_VERSION is defined]) + AS_IF([test "x$LUA_VERSION" != 'x'], + [AC_MSG_RESULT([yes])], + [ AC_MSG_RESULT([no]) + AC_MSG_ERROR([cannot check Lua libs without knowing LUA_VERSION]) + ]) + + dnl Make LUA_LIB a precious variable. + AC_ARG_VAR([LUA_LIB], [The Lua library, e.g. -llua5.1]) + + AS_IF([test "x$LUA_LIB" != 'x'], + [ dnl Check that LUA_LIBS works. + _ax_lua_saved_libs=$LIBS + LIBS="$LUA_LIB $LIBS" + AC_SEARCH_LIBS([lua_load], [], + [_ax_found_lua_libs='yes'], + [_ax_found_lua_libs='no']) + LIBS=$_ax_lua_saved_libs + + dnl Check the result. + AS_IF([test "x$_ax_found_lua_libs" != 'xyes'], + [AC_MSG_ERROR([cannot find libs for specified LUA_LIB])]) + ], + [ dnl First search for extra libs. + _ax_lua_extra_libs='' + + _ax_lua_saved_libs=$LIBS + LIBS="$LUA_LIB $LIBS" + AC_SEARCH_LIBS([exp], [m]) + AC_SEARCH_LIBS([dlopen], [dl]) + LIBS=$_ax_lua_saved_libs + + AS_IF([test "x$ac_cv_search_exp" != 'xno' && + test "x$ac_cv_search_exp" != 'xnone required'], + [_ax_lua_extra_libs="$_ax_lua_extra_libs $ac_cv_search_exp"]) + + AS_IF([test "x$ac_cv_search_dlopen" != 'xno' && + test "x$ac_cv_search_dlopen" != 'xnone required'], + [_ax_lua_extra_libs="$_ax_lua_extra_libs $ac_cv_search_dlopen"]) + + dnl Try to find the Lua libs. + _ax_lua_saved_libs=$LIBS + LIBS="$LUA_LIB $LIBS" + AC_SEARCH_LIBS([lua_load], + [ lua$LUA_VERSION \ + lua$LUA_SHORT_VERSION \ + lua-$LUA_VERSION \ + lua-$LUA_SHORT_VERSION \ + lua \ + ], + [_ax_found_lua_libs='yes'], + [_ax_found_lua_libs='no'], + [$_ax_lua_extra_libs]) + LIBS=$_ax_lua_saved_libs + + AS_IF([test "x$ac_cv_search_lua_load" != 'xno' && + test "x$ac_cv_search_lua_load" != 'xnone required'], + [LUA_LIB="$ac_cv_search_lua_load $_ax_lua_extra_libs"]) + ]) + + dnl Test the result and run user code. + AS_IF([test "x$_ax_found_lua_libs" = 'xyes'], [$1], + [m4_default([$2], [AC_MSG_ERROR([cannot find Lua libs])])]) +]) + + +dnl ========================================================================= +dnl AX_LUA_READLINE([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ========================================================================= +AC_DEFUN([AX_LUA_READLINE], +[ + AX_LIB_READLINE + AS_IF([test "x$ac_cv_header_readline_readline_h" != 'x' && + test "x$ac_cv_header_readline_history_h" != 'x'], + [ LUA_LIBS_CFLAGS="-DLUA_USE_READLINE $LUA_LIBS_CFLAGS" + $1 + ], + [$2]) +]) diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 0000000..b2b8981 --- /dev/null +++ b/python/setup.py @@ -0,0 +1,37 @@ +import distutils.ccompiler +import distutils.sysconfig +from distutils.core import setup, Extension +import os + + +compiler = distutils.ccompiler.new_compiler() +search_paths=[os.path.expanduser('~/{}'), '/opt/local/{}', '/usr/local/{}', '/usr/{}'] +lib_paths = [ a.format("lib") for a in search_paths] +inc_paths = [ a.format("include") for a in search_paths] + +uclmodule = Extension('ucl', + include_dirs = inc_paths, + library_dirs = lib_paths, + libraries = ['ucl'], + sources = ['src/uclmodule.c'], + runtime_library_dirs = lib_paths, + language='c') + +setup(name='ucl', + version='1.0', + description='ucl parser and emmitter', + ext_modules = [uclmodule], + author="Eitan Adler", + author_email="lists@eitanadler.com", + url="https://github.com/vstakhov/libucl/", + license="MIT", + classifiers=["Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: DFSG approved", + "License :: OSI Approved :: MIT License", + "Programming Language :: C", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Software Development :: Libraries", + ] + ) diff --git a/python/src/uclmodule.c b/python/src/uclmodule.c new file mode 100644 index 0000000..d63b73a --- /dev/null +++ b/python/src/uclmodule.c @@ -0,0 +1,156 @@ +// Attempts to load a UCL structure from a string +#include <ucl.h> +#include <Python.h> + +static PyObject* +_basic_ucl_type(ucl_object_t const * const obj) { + if (obj->type == UCL_INT) { + return Py_BuildValue("L", (long long)ucl_object_toint (obj)); + } + else if (obj->type == UCL_FLOAT) { + return Py_BuildValue("d", ucl_object_todouble (obj)); + } + else if (obj->type == UCL_STRING) { + return Py_BuildValue("s", ucl_object_tostring (obj)); + } + else if (obj->type == UCL_BOOLEAN) { + // maybe used 'p' here? + return Py_BuildValue("s", ucl_object_tostring_forced (obj)); + } + else if (obj->type == UCL_TIME) { + return Py_BuildValue("d", ucl_object_todouble (obj)); + } + return NULL; +} + +static PyObject* +_iterate_valid_ucl(ucl_object_t const * obj) { + const ucl_object_t *tmp; + ucl_object_iter_t it = NULL; + + tmp = obj; + + while ((obj = ucl_iterate_object (tmp, &it, false))) { + + PyObject* val; + + val = _basic_ucl_type(obj); + if (!val) { + PyObject* key = NULL; + if (obj->key != NULL) { + key = Py_BuildValue("s", ucl_object_key(obj)); + } + + PyObject* ret; + ret = PyDict_New(); + if (obj->type == UCL_OBJECT) { + val = PyDict_New(); + const ucl_object_t *cur; + ucl_object_iter_t it_obj = NULL; + while ((cur = ucl_iterate_object (obj, &it_obj, true))) { + PyObject* keyobj = Py_BuildValue("s",ucl_object_key(cur)); + PyDict_SetItem(val, keyobj, _iterate_valid_ucl(cur)); + } + } + else if (obj->type == UCL_ARRAY) { + val = PyList_New(0); + const ucl_object_t *cur; + ucl_object_iter_t it_obj = NULL; + while ((cur = ucl_iterate_object (obj, &it_obj, true))) { + PyList_Append(val, _iterate_valid_ucl(cur)); + } + } + else if (obj->type == UCL_USERDATA) { + // XXX: this should be + // PyBytes_FromStringAndSize; where is the + // length from? + val = PyBytes_FromString(obj->value.ud); + } + } + return val; + } + + PyErr_SetString(PyExc_SystemError, "unhandled type"); + return NULL; +} + +static PyObject* +_internal_load_ucl(char* uclstr) { + PyObject* ret; + + struct ucl_parser *parser = ucl_parser_new (UCL_PARSER_NO_TIME); + + bool r = ucl_parser_add_string(parser, uclstr, 0); + if (r) { + if (ucl_parser_get_error (parser)) { + PyErr_SetString(PyExc_ValueError, ucl_parser_get_error(parser)); + ucl_parser_free(parser); + ret = NULL; + goto return_with_parser; + } else { + ucl_object_t* uclobj = ucl_parser_get_object(parser); + ret = _iterate_valid_ucl(uclobj); + ucl_object_unref(uclobj); + goto return_with_parser; + } + + } else { + PyErr_SetString(PyExc_ValueError, ucl_parser_get_error (parser)); + ret = NULL; + goto return_with_parser; + } + +return_with_parser: + ucl_parser_free(parser); + return ret; +} + +static PyObject* +ucl_load(PyObject *self, PyObject *args) { + char* uclstr; + if (PyArg_ParseTuple(args, "z", &uclstr)) { + if (!uclstr) { + Py_RETURN_NONE; + } + return _internal_load_ucl(uclstr); + } + return NULL; +} + +static PyObject* +ucl_validate(PyObject *self, PyObject *args) { + char *uclstr, *schema; + if (PyArg_ParseTuple(args, "zz", &uclstr, &schema)) { + if (!uclstr || !schema) { + Py_RETURN_NONE; + } + PyErr_SetString(PyExc_NotImplementedError, "schema validation is not yet supported"); + return NULL; + } + return NULL; +} + +static PyMethodDef uclMethods[] = { + {"load", ucl_load, METH_VARARGS, "Load UCL from stream"}, + {"validate", ucl_validate, METH_VARARGS, "Validate ucl stream against schema"}, + {NULL, NULL, 0, NULL} +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef uclmodule = { + PyModuleDef_HEAD_INIT, + "ucl", + NULL, + -1, + uclMethods +}; + +PyMODINIT_FUNC +PyInit_ucl(void) { + return PyModule_Create(&uclmodule); +} +#else +void initucl(void) { + Py_InitModule("ucl", uclMethods); +} +#endif diff --git a/python/test.sh b/python/test.sh new file mode 100755 index 0000000..53af6a3 --- /dev/null +++ b/python/test.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -xe +python3.4 setup.py build_ext --inplace +./test_uclmodule.py -v +rm -rfv build +rm ucl.so diff --git a/python/test_uclmodule.py b/python/test_uclmodule.py new file mode 100755 index 0000000..de295dc --- /dev/null +++ b/python/test_uclmodule.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +import json +import unittest +import ucl +import sys + +if sys.version_info[:2] == (2, 7): + unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp + + +class TestUcl(unittest.TestCase): + def test_no_args(self): + with self.assertRaises(TypeError): + ucl.load() + + def test_multi_args(self): + with self.assertRaises(TypeError): + ucl.load(0,0) + + def test_none(self): + r = ucl.load(None) + self.assertEqual(r, None) + + def test_int(self): + r = ucl.load("a : 1") + self.assertEqual(ucl.load("a : 1"), { "a" : 1 } ) + + def test_braced_int(self): + self.assertEqual(ucl.load("{a : 1}"), { "a" : 1 } ) + + def test_nested_int(self): + self.assertEqual(ucl.load("a : { b : 1 }"), { "a" : { "b" : 1 } }) + + def test_str(self): + self.assertEqual(ucl.load("a : b"), {"a" : "b"}) + + def test_float(self): + self.assertEqual(ucl.load("a : 1.1"), {"a" : 1.1}) + + def test_empty_ucl(self): + r = ucl.load("{}") + self.assertEqual(r, {}) + + def test_single_brace(self): + self.assertEqual(ucl.load("{"), {}) + + def test_single_back_brace(self): + ucl.load("}") + + def test_single_square_forward(self): + self.assertEqual(ucl.load("["), []) + + def test_invalid_ucl(self): + with self.assertRaisesRegex(ValueError, "unfinished key$"): + ucl.load('{ "var"') + + def test_comment_ignored(self): + self.assertEqual(ucl.load("{/*1*/}"), {}) + + def test_1_in(self): + with open("../tests/basic/1.in", "r") as in1: + self.assertEqual(ucl.load(in1.read()), {'key1': 'value'}) + + def test_every_type(self): + totest="""{ + "key1": value; + "key2": value2; + "key3": "value;" + "key4": 1.0, + "key5": -0xdeadbeef + "key6": 0xdeadbeef.1 + "key7": 0xreadbeef + "key8": -1e-10, + "key9": 1 + "key10": true + "key11": no + "key12": yes + }""" + correct = { + 'key1': 'value', + 'key2': 'value2', + 'key3': 'value;', + 'key4': 1.0, + 'key5': -3735928559, + 'key6': '0xdeadbeef.1', + 'key7': '0xreadbeef', + 'key8': -1e-10, + 'key9': 1, + 'key10': 'true', + 'key11': 'false', + 'key12': 'true', + } + self.assertEqual(ucl.load(totest), correct) + + def test_validation_useless(self): + with self.assertRaises(NotImplementedError): + ucl.validate("","") + +if __name__ == '__main__': + unittest.main() diff --git a/src/Makefile.am b/src/Makefile.am index c3f0c9f..c2f3a4b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,6 +11,8 @@ libucl_la_SOURCES= ucl_emitter.c \ ucl_parser.c \ ucl_schema.c \ ucl_util.c \ + ucl_msgpack.c \ + ucl_sexp.c \ xxhash.c libucl_la_CFLAGS= $(libucl_common_cflags) \ @CURL_CFLAGS@ @@ -20,7 +22,8 @@ libucl_la_LIBADD= @LIBFETCH_LIBS@ \ @LIBREGEX_LIB@ \ @CURL_LIBS@ -include_HEADERS= $(top_srcdir)/include/ucl.h +include_HEADERS= $(top_srcdir)/include/ucl.h \ + $(top_srcdir)/include/ucl++.h noinst_HEADERS= ucl_internal.h \ xxhash.h \ ucl_hash.h \ @@ -44,6 +44,13 @@ #define TREE_DELTA_MAX 1 +#ifndef _HU_FUNCTION +# if defined(__GNUC__) || defined(__clang__) +# define _HU_FUNCTION(x) __attribute__((__unused__)) x +# else +# define _HU_FUNCTION(x) x +# endif +#endif #define TREE_ENTRY(type) \ struct { \ @@ -68,9 +75,9 @@ #define TREE_DEFINE(node, field) \ \ - struct node *TREE_BALANCE_##node##_##field(struct node *); \ + static struct node *_HU_FUNCTION(TREE_BALANCE_##node##_##field)(struct node *); \ \ - struct node *TREE_ROTL_##node##_##field(struct node *self) \ + static struct node *_HU_FUNCTION(TREE_ROTL_##node##_##field)(struct node *self) \ { \ struct node *r= self->field.avl_right; \ self->field.avl_right= r->field.avl_left; \ @@ -78,7 +85,7 @@ return TREE_BALANCE_##node##_##field(r); \ } \ \ - struct node *TREE_ROTR_##node##_##field(struct node *self) \ + static struct node *_HU_FUNCTION(TREE_ROTR_##node##_##field)(struct node *self) \ { \ struct node *l= self->field.avl_left; \ self->field.avl_left= l->field.avl_right; \ @@ -86,7 +93,7 @@ return TREE_BALANCE_##node##_##field(l); \ } \ \ - struct node *TREE_BALANCE_##node##_##field(struct node *self) \ + static struct node *_HU_FUNCTION(TREE_BALANCE_##node##_##field)(struct node *self) \ { \ int delta= TREE_DELTA(self, field); \ \ @@ -111,7 +118,7 @@ return self; \ } \ \ - struct node *TREE_INSERT_##node##_##field \ + static struct node *_HU_FUNCTION(TREE_INSERT_##node##_##field) \ (struct node *self, struct node *elm, int (*compare)(struct node *lhs, struct node *rhs)) \ { \ if (!self) \ @@ -123,7 +130,7 @@ return TREE_BALANCE_##node##_##field(self); \ } \ \ - struct node *TREE_FIND_##node##_##field \ + static struct node *_HU_FUNCTION(TREE_FIND_##node##_##field) \ (struct node *self, struct node *elm, int (*compare)(struct node *lhs, struct node *rhs)) \ { \ if (!self) \ @@ -136,7 +143,7 @@ return TREE_FIND_##node##_##field(self->field.avl_right, elm, compare); \ } \ \ - struct node *TREE_MOVE_RIGHT(struct node *self, struct node *rhs) \ + static struct node *_HU_FUNCTION(TREE_MOVE_RIGHT)(struct node *self, struct node *rhs) \ { \ if (!self) \ return rhs; \ @@ -144,7 +151,7 @@ return TREE_BALANCE_##node##_##field(self); \ } \ \ - struct node *TREE_REMOVE_##node##_##field \ + static struct node *_HU_FUNCTION(TREE_REMOVE_##node##_##field) \ (struct node *self, struct node *elm, int (*compare)(struct node *lhs, struct node *rhs)) \ { \ if (!self) return 0; \ @@ -163,7 +170,7 @@ return TREE_BALANCE_##node##_##field(self); \ } \ \ - void TREE_FORWARD_APPLY_ALL_##node##_##field \ + static void _HU_FUNCTION(TREE_FORWARD_APPLY_ALL_##node##_##field) \ (struct node *self, void (*function)(struct node *node, void *data), void *data) \ { \ if (self) \ @@ -174,7 +181,7 @@ } \ } \ \ - void TREE_REVERSE_APPLY_ALL_##node##_##field \ + static void _HU_FUNCTION(TREE_REVERSE_APPLY_ALL_##node##_##field) \ (struct node *self, void (*function)(struct node *node, void *data), void *data) \ { \ if (self) \ diff --git a/src/ucl_chartable.h b/src/ucl_chartable.h index 5248e11..db9f029 100644 --- a/src/ucl_chartable.h +++ b/src/ucl_chartable.h @@ -26,7 +26,7 @@ #include "ucl_internal.h" -static const unsigned int ucl_chartable[255] = { +static const unsigned int ucl_chartable[256] = { UCL_CHARACTER_VALUE_END, UCL_CHARACTER_DENIED, UCL_CHARACTER_DENIED, UCL_CHARACTER_DENIED, UCL_CHARACTER_DENIED, UCL_CHARACTER_DENIED, UCL_CHARACTER_DENIED, UCL_CHARACTER_DENIED, @@ -255,6 +255,7 @@ UCL_CHARACTER_KEY_START|UCL_CHARACTER_KEY|UCL_CHARACTER_VALUE_STR, UCL_CHARACTER_KEY_START|UCL_CHARACTER_KEY|UCL_CHARACTER_VALUE_STR, UCL_CHARACTER_KEY_START|UCL_CHARACTER_KEY|UCL_CHARACTER_VALUE_STR, UCL_CHARACTER_KEY_START|UCL_CHARACTER_KEY|UCL_CHARACTER_VALUE_STR, +UCL_CHARACTER_KEY_START|UCL_CHARACTER_KEY|UCL_CHARACTER_VALUE_STR, UCL_CHARACTER_KEY_START|UCL_CHARACTER_KEY|UCL_CHARACTER_VALUE_STR }; diff --git a/src/ucl_emitter.c b/src/ucl_emitter.c index 9ddf358..8bfbf09 100644 --- a/src/ucl_emitter.c +++ b/src/ucl_emitter.c @@ -62,6 +62,7 @@ UCL_EMIT_TYPE_OPS(json); UCL_EMIT_TYPE_OPS(json_compact); UCL_EMIT_TYPE_OPS(config); UCL_EMIT_TYPE_OPS(yaml); +UCL_EMIT_TYPE_OPS(msgpack); #define UCL_EMIT_TYPE_CONTENT(type) { \ .ucl_emitter_write_elt = ucl_emit_ ## type ## _elt, \ @@ -71,12 +72,12 @@ UCL_EMIT_TYPE_OPS(yaml); .ucl_emitter_end_array = ucl_emit_ ## type ##_end_array \ } - const struct ucl_emitter_operations ucl_standartd_emitter_ops[] = { [UCL_EMIT_JSON] = UCL_EMIT_TYPE_CONTENT(json), [UCL_EMIT_JSON_COMPACT] = UCL_EMIT_TYPE_CONTENT(json_compact), [UCL_EMIT_CONFIG] = UCL_EMIT_TYPE_CONTENT(config), - [UCL_EMIT_YAML] = UCL_EMIT_TYPE_CONTENT(yaml) + [UCL_EMIT_YAML] = UCL_EMIT_TYPE_CONTENT(yaml), + [UCL_EMIT_MSGPACK] = UCL_EMIT_TYPE_CONTENT(msgpack) }; /* @@ -469,19 +470,150 @@ UCL_EMIT_TYPE_IMPL(json_compact, true) UCL_EMIT_TYPE_IMPL(config, false) UCL_EMIT_TYPE_IMPL(yaml, false) +static void +ucl_emit_msgpack_elt (struct ucl_emitter_context *ctx, + const ucl_object_t *obj, bool first, bool print_key) +{ + ucl_object_iter_t it; + struct ucl_object_userdata *ud; + const char *ud_out; + const ucl_object_t *cur, *celt; + + switch (obj->type) { + case UCL_INT: + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + ucl_emitter_print_int_msgpack (ctx, ucl_object_toint (obj)); + break; + + case UCL_FLOAT: + case UCL_TIME: + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + ucl_emitter_print_double_msgpack (ctx, ucl_object_todouble (obj)); + break; + + case UCL_BOOLEAN: + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + ucl_emitter_print_bool_msgpack (ctx, ucl_object_toboolean (obj)); + break; + + case UCL_STRING: + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + + if (obj->flags & UCL_OBJECT_BINARY) { + ucl_emitter_print_binary_string_msgpack (ctx, obj->value.sv, + obj->len); + } + else { + ucl_emitter_print_string_msgpack (ctx, obj->value.sv, obj->len); + } + break; + + case UCL_NULL: + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + ucl_emitter_print_null_msgpack (ctx); + break; + + case UCL_OBJECT: + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + ucl_emit_msgpack_start_obj (ctx, obj, print_key); + it = NULL; + + while ((cur = ucl_iterate_object (obj, &it, true)) != NULL) { + LL_FOREACH (cur, celt) { + ucl_emit_msgpack_elt (ctx, celt, false, true); + /* XXX: + * in msgpack the length of objects is encoded within a single elt + * so in case of multi-value keys we are using merely the first + * element ignoring others + */ + break; + } + } + + break; + + case UCL_ARRAY: + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + ucl_emit_msgpack_start_array (ctx, obj, print_key); + it = NULL; + + while ((cur = ucl_iterate_object (obj, &it, true)) != NULL) { + ucl_emit_msgpack_elt (ctx, cur, false, false); + } + + break; + + case UCL_USERDATA: + ud = (struct ucl_object_userdata *)obj; + ucl_emitter_print_key_msgpack (print_key, ctx, obj); + + if (ud->emitter) { + ud_out = ud->emitter (obj->value.ud); + if (ud_out == NULL) { + ud_out = "null"; + } + } + ucl_emitter_print_string_msgpack (ctx, obj->value.sv, obj->len); + break; + } +} + +static void +ucl_emit_msgpack_start_obj (struct ucl_emitter_context *ctx, + const ucl_object_t *obj, bool print_key) +{ + ucl_emitter_print_object_msgpack (ctx, obj->len); +} + +static void +ucl_emit_msgpack_start_array (struct ucl_emitter_context *ctx, + const ucl_object_t *obj, bool print_key) +{ + ucl_emitter_print_array_msgpack (ctx, obj->len); +} + +static void +ucl_emit_msgpack_end_object (struct ucl_emitter_context *ctx, + const ucl_object_t *obj) +{ + +} + +static void +ucl_emit_msgpack_end_array (struct ucl_emitter_context *ctx, + const ucl_object_t *obj) +{ + +} + unsigned char * ucl_object_emit (const ucl_object_t *obj, enum ucl_emitter emit_type) { + return ucl_object_emit_len (obj, emit_type, NULL); +} + +unsigned char * +ucl_object_emit_len (const ucl_object_t *obj, enum ucl_emitter emit_type, + size_t *outlen) +{ unsigned char *res = NULL; struct ucl_emitter_functions *func; + UT_string *s; + if (obj == NULL) { return NULL; } func = ucl_object_emit_memory_funcs ((void **)&res); + s = func->ud; if (func != NULL) { ucl_object_emit_full (obj, emit_type, func); + + if (outlen != NULL) { + *outlen = s->i; + } + ucl_object_emit_funcs_free (func); } diff --git a/src/ucl_emitter_utils.c b/src/ucl_emitter_utils.c index 91cad78..95ac9a5 100644 --- a/src/ucl_emitter_utils.c +++ b/src/ucl_emitter_utils.c @@ -62,6 +62,12 @@ static const struct ucl_emitter_context ucl_standard_emitters[] = { .id = UCL_EMIT_YAML, .func = NULL, .ops = &ucl_standartd_emitter_ops[UCL_EMIT_YAML] + }, + [UCL_EMIT_MSGPACK] = { + .name = "msgpack", + .id = UCL_EMIT_MSGPACK, + .func = NULL, + .ops = &ucl_standartd_emitter_ops[UCL_EMIT_MSGPACK] } }; @@ -73,7 +79,7 @@ static const struct ucl_emitter_context ucl_standard_emitters[] = { const struct ucl_emitter_context * ucl_emit_get_standard_context (enum ucl_emitter emit_type) { - if (emit_type >= UCL_EMIT_JSON && emit_type <= UCL_EMIT_YAML) { + if (emit_type >= UCL_EMIT_JSON && emit_type < UCL_EMIT_MAX) { return &ucl_standard_emitters[emit_type]; } diff --git a/src/ucl_hash.c b/src/ucl_hash.c index 275e84d..7de8196 100644 --- a/src/ucl_hash.c +++ b/src/ucl_hash.c @@ -26,6 +26,9 @@ #include "khash.h" #include "kvec.h" +#include <time.h> +#include <limits.h> + struct ucl_hash_elt { const ucl_object_t *obj; size_t ar_idx; @@ -37,11 +40,78 @@ struct ucl_hash_struct { bool caseless; }; +static uint64_t +ucl_hash_seed (void) +{ + static uint64_t seed; + + if (seed == 0) { +#ifdef UCL_RANDOM_FUNCTION + seed = UCL_RANDOM_FUNCTION; +#else + /* Not very random but can be useful for our purposes */ + seed = time (NULL); +#endif + } + + return seed; +} + +static const unsigned char lc_map[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +#if (defined(WORD_BIT) && WORD_BIT == 64) || \ + (defined(__WORDSIZE) && __WORDSIZE == 64) || \ + defined(__x86_64__) || \ + defined(__amd64__) +#define UCL64_BIT_HASH 1 +#endif + +#ifdef UCL64_BIT_HASH +static inline uint32_t +ucl_hash_func (const ucl_object_t *o) +{ + return XXH64 (o->key, o->keylen, ucl_hash_seed ()); +} +#else static inline uint32_t ucl_hash_func (const ucl_object_t *o) { - return XXH32 (o->key, o->keylen, 0xdeadbeef); + return XXH32 (o->key, o->keylen, ucl_hash_seed ()); } +#endif static inline int ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2) @@ -56,33 +126,91 @@ ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2) KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt, 1, ucl_hash_func, ucl_hash_equal) +#ifdef UCL64_BIT_HASH static inline uint32_t ucl_hash_caseless_func (const ucl_object_t *o) { - void *xxh = XXH32_init (0xdeadbeef); - char hash_buf[64], *c; - const char *p; - ssize_t remain = o->keylen; - - p = o->key; - c = &hash_buf[0]; + unsigned len = o->keylen; + unsigned leftover = o->keylen % 4; + unsigned fp, i; + const uint8_t* s = (const uint8_t*)o->key; + union { + struct { + unsigned char c1, c2, c3, c4; + } c; + uint32_t pp; + } u; + XXH64_state_t st; + + fp = len - leftover; + XXH64_reset (&st, ucl_hash_seed ()); + + for (i = 0; i != fp; i += 4) { + u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; + u.c.c1 = lc_map[u.c.c1]; + u.c.c2 = lc_map[u.c.c2]; + u.c.c3 = lc_map[u.c.c3]; + u.c.c4 = lc_map[u.c.c4]; + XXH64_update (&st, &u.pp, sizeof (u)); + } - while (remain > 0) { - *c++ = tolower (*p++); + u.pp = 0; + switch (leftover) { + case 3: + u.c.c3 = lc_map[(unsigned char)s[i++]]; + case 2: + u.c.c2 = lc_map[(unsigned char)s[i++]]; + case 1: + u.c.c1 = lc_map[(unsigned char)s[i]]; + XXH64_update (&st, &u.pp, leftover); + break; + } - if (c - &hash_buf[0] == sizeof (hash_buf)) { - XXH32_update (xxh, hash_buf, sizeof (hash_buf)); - c = &hash_buf[0]; - } - remain --; + return XXH64_digest (&st); +} +#else +static inline uint32_t +ucl_hash_caseless_func (const ucl_object_t *o) +{ + unsigned len = o->keylen; + unsigned leftover = o->keylen % 4; + unsigned fp, i; + const uint8_t* s = (const uint8_t*)o->key; + union { + struct { + unsigned char c1, c2, c3, c4; + } c; + uint32_t pp; + } u; + XXH32_state_t st; + + fp = len - leftover; + XXH32_reset (&st, ucl_hash_seed ()); + + for (i = 0; i != fp; i += 4) { + u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; + u.c.c1 = lc_map[u.c.c1]; + u.c.c2 = lc_map[u.c.c2]; + u.c.c3 = lc_map[u.c.c3]; + u.c.c4 = lc_map[u.c.c4]; + XXH32_update (&st, &u.pp, sizeof (u)); } - if (c - &hash_buf[0] != 0) { - XXH32_update (xxh, hash_buf, c - &hash_buf[0]); + u.pp = 0; + switch (leftover) { + case 3: + u.c.c3 = lc_map[(unsigned char)s[i++]]; + case 2: + u.c.c2 = lc_map[(unsigned char)s[i++]]; + case 1: + u.c.c1 = lc_map[(unsigned char)s[i]]; + XXH32_update (&st, &u.pp, leftover); + break; } - return XXH32_digest (xxh); + return XXH32_digest (&st); } +#endif static inline int ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2) @@ -254,6 +382,11 @@ ucl_hash_iterate (ucl_hash_t *hashlin, ucl_hash_iter_t *iter) if (it == NULL) { it = UCL_ALLOC (sizeof (*it)); + + if (it == NULL) { + return NULL; + } + it->cur = &hashlin->ar.a[0]; it->end = it->cur + hashlin->ar.n; } @@ -336,7 +469,7 @@ ucl_hash_delete (ucl_hash_t* hashlin, const ucl_object_t *obj) k = kh_get (ucl_hash_caseless_node, h, obj); if (k != kh_end (h)) { elt = &kh_value (h, k); - kv_A (hashlin->ar, elt->ar_idx) = NULL; + kv_del (const ucl_object_t *, hashlin->ar, elt->ar_idx); kh_del (ucl_hash_caseless_node, h, k); } } @@ -346,7 +479,7 @@ ucl_hash_delete (ucl_hash_t* hashlin, const ucl_object_t *obj) k = kh_get (ucl_hash_node, h, obj); if (k != kh_end (h)) { elt = &kh_value (h, k); - kv_A (hashlin->ar, elt->ar_idx) = NULL; + kv_del (const ucl_object_t *, hashlin->ar, elt->ar_idx); kh_del (ucl_hash_node, h, k); } } diff --git a/src/ucl_internal.h b/src/ucl_internal.h index bdbe691..31d6b13 100644 --- a/src/ucl_internal.h +++ b/src/ucl_internal.h @@ -145,15 +145,19 @@ enum ucl_character_type { struct ucl_macro { char *name; - ucl_macro_handler handler; + union { + ucl_macro_handler handler; + ucl_context_macro_handler context_handler; + } h; void* ud; + bool is_context; UT_hash_handle hh; }; struct ucl_stack { ucl_object_t *obj; struct ucl_stack *next; - int level; + uint64_t level; }; struct ucl_chunk { @@ -164,6 +168,8 @@ struct ucl_chunk { unsigned int line; unsigned int column; unsigned priority; + enum ucl_duplicate_strategy strategy; + enum ucl_parse_type parse_type; struct ucl_chunk *next; }; @@ -191,8 +197,12 @@ struct ucl_parser { enum ucl_parser_state prev_state; unsigned int recursion; int flags; + unsigned default_priority; + int err_code; ucl_object_t *top_obj; ucl_object_t *cur_obj; + ucl_object_t *trash_objs; + ucl_object_t *includepaths; char *cur_file; struct ucl_macro *macroes; struct ucl_stack *stack; @@ -220,13 +230,21 @@ size_t ucl_unescape_json_string (char *str, size_t len); * Handle include macro * @param data include data * @param len length of data + * @param args UCL object representing arguments to the macro * @param ud user data - * @param err error ptr * @return */ bool ucl_include_handler (const unsigned char *data, size_t len, const ucl_object_t *args, void* ud); +/** + * Handle tryinclude macro + * @param data include data + * @param len length of data + * @param args UCL object representing arguments to the macro + * @param ud user data + * @return + */ bool ucl_try_include_handler (const unsigned char *data, size_t len, const ucl_object_t *args, void* ud); @@ -234,17 +252,52 @@ bool ucl_try_include_handler (const unsigned char *data, size_t len, * Handle includes macro * @param data include data * @param len length of data + * @param args UCL object representing arguments to the macro * @param ud user data - * @param err error ptr * @return */ bool ucl_includes_handler (const unsigned char *data, size_t len, const ucl_object_t *args, void* ud); +/** + * Handle priority macro + * @param data include data + * @param len length of data + * @param args UCL object representing arguments to the macro + * @param ud user data + * @return + */ +bool ucl_priority_handler (const unsigned char *data, size_t len, + const ucl_object_t *args, void* ud); + +/** + * Handle load macro + * @param data include data + * @param len length of data + * @param args UCL object representing arguments to the macro + * @param ud user data + * @return + */ +bool ucl_load_handler (const unsigned char *data, size_t len, + const ucl_object_t *args, void* ud); +/** + * Handle inherit macro + * @param data include data + * @param len length of data + * @param args UCL object representing arguments to the macro + * @param ctx the current context object + * @param ud user data + * @return + */ +bool ucl_inherit_handler (const unsigned char *data, size_t len, + const ucl_object_t *args, const ucl_object_t *ctx, void* ud); + size_t ucl_strlcpy (char *dst, const char *src, size_t siz); size_t ucl_strlcpy_unsafe (char *dst, const char *src, size_t siz); size_t ucl_strlcpy_tolower (char *dst, const char *src, size_t siz); +char *ucl_strnstr (const char *s, const char *find, int len); +char *ucl_strncasestr (const char *s, const char *find, int len); #ifdef __GNUC__ static inline void @@ -252,9 +305,10 @@ ucl_create_err (UT_string **err, const char *fmt, ...) __attribute__ (( format( printf, 2, 3) )); #endif +#undef UCL_FATAL_ERRORS + static inline void ucl_create_err (UT_string **err, const char *fmt, ...) - { if (*err == NULL) { utstring_new (*err); @@ -263,6 +317,10 @@ ucl_create_err (UT_string **err, const char *fmt, ...) utstring_printf_va (*err, fmt, ap); va_end (ap); } + +#ifdef UCL_FATAL_ERRORS + assert (0); +#endif } /** @@ -311,7 +369,7 @@ ucl_maybe_parse_boolean (ucl_object_t *obj, const unsigned char *start, size_t l } } - if (ret) { + if (ret && obj != NULL) { obj->type = UCL_BOOLEAN; obj->value.iv = val; } @@ -396,4 +454,89 @@ unsigned char * ucl_object_emit_single_json (const ucl_object_t *obj); */ bool ucl_maybe_long_string (const ucl_object_t *obj); +/** + * Print integer to the msgpack output + * @param ctx + * @param val + */ +void ucl_emitter_print_int_msgpack (struct ucl_emitter_context *ctx, + int64_t val); +/** + * Print integer to the msgpack output + * @param ctx + * @param val + */ +void ucl_emitter_print_double_msgpack (struct ucl_emitter_context *ctx, + double val); +/** + * Print double to the msgpack output + * @param ctx + * @param val + */ +void ucl_emitter_print_bool_msgpack (struct ucl_emitter_context *ctx, + bool val); +/** + * Print string to the msgpack output + * @param ctx + * @param s + * @param len + */ +void ucl_emitter_print_string_msgpack (struct ucl_emitter_context *ctx, + const char *s, size_t len); + +/** + * Print binary string to the msgpack output + * @param ctx + * @param s + * @param len + */ +void ucl_emitter_print_binary_string_msgpack (struct ucl_emitter_context *ctx, + const char *s, size_t len); + +/** + * Print array preamble for msgpack + * @param ctx + * @param len + */ +void ucl_emitter_print_array_msgpack (struct ucl_emitter_context *ctx, + size_t len); + +/** + * Print object preamble for msgpack + * @param ctx + * @param len + */ +void ucl_emitter_print_object_msgpack (struct ucl_emitter_context *ctx, + size_t len); +/** + * Print NULL to the msgpack output + * @param ctx + */ +void ucl_emitter_print_null_msgpack (struct ucl_emitter_context *ctx); +/** + * Print object's key if needed to the msgpack output + * @param print_key + * @param ctx + * @param obj + */ +void ucl_emitter_print_key_msgpack (bool print_key, + struct ucl_emitter_context *ctx, + const ucl_object_t *obj); + +/** + * Add new element to an object using the current merge strategy and priority + * @param parser + * @param nobj + * @return + */ +bool ucl_parser_process_object_element (struct ucl_parser *parser, + ucl_object_t *nobj); + +/** + * Parse msgpack chunk + * @param parser + * @return + */ +bool ucl_parse_msgpack (struct ucl_parser *parser); + #endif /* UCL_INTERNAL_H_ */ diff --git a/src/ucl_msgpack.c b/src/ucl_msgpack.c new file mode 100644 index 0000000..e8ebfba --- /dev/null +++ b/src/ucl_msgpack.c @@ -0,0 +1,1600 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "ucl.h" +#include "ucl_internal.h" + +#ifdef HAVE_ENDIAN_H +#include <endian.h> +#elif defined(HAVE_SYS_ENDIAN_H) +#include <sys/endian.h> +#elif defined(HAVE_MACHINE_ENDIAN_H) +#include <machine/endian.h> +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) + #if __BYTE_ORDER == __LITTLE_ENDIAN + #define __LITTLE_ENDIAN__ + #elif __BYTE_ORDER == __BIG_ENDIAN + #define __BIG_ENDIAN__ + #elif _WIN32 + #define __LITTLE_ENDIAN__ + #endif +#endif + +#define SWAP_LE_BE16(val) ((uint16_t) ( \ + (uint16_t) ((uint16_t) (val) >> 8) | \ + (uint16_t) ((uint16_t) (val) << 8))) + +#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 4 && defined (__GNUC_MINOR__) && __GNUC_MINOR__ >= 3) +# define SWAP_LE_BE32(val) ((uint32_t)__builtin_bswap32 ((uint32_t)(val))) +# define SWAP_LE_BE64(val) ((uint64_t)__builtin_bswap64 ((uint64_t)(val))) +#else + #define SWAP_LE_BE32(val) ((uint32_t)( \ + (((uint32_t)(val) & (uint32_t)0x000000ffU) << 24) | \ + (((uint32_t)(val) & (uint32_t)0x0000ff00U) << 8) | \ + (((uint32_t)(val) & (uint32_t)0x00ff0000U) >> 8) | \ + (((uint32_t)(val) & (uint32_t)0xff000000U) >> 24))) + + #define SWAP_LE_BE64(val) ((uint64_t)( \ + (((uint64_t)(val) & \ + (uint64_t)(0x00000000000000ffULL)) << 56) | \ + (((uint64_t)(val) & \ + (uint64_t)(0x000000000000ff00ULL)) << 40) | \ + (((uint64_t)(val) & \ + (uint64_t)(0x0000000000ff0000ULL)) << 24) | \ + (((uint64_t)(val) & \ + (uint64_t) (0x00000000ff000000ULL)) << 8) | \ + (((uint64_t)(val) & \ + (uint64_t)(0x000000ff00000000ULL)) >> 8) | \ + (((uint64_t)(val) & \ + (uint64_t)(0x0000ff0000000000ULL)) >> 24) | \ + (((uint64_t)(val) & \ + (uint64_t)(0x00ff000000000000ULL)) >> 40) | \ + (((uint64_t)(val) & \ + (uint64_t)(0xff00000000000000ULL)) >> 56))) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define TO_BE16 SWAP_LE_BE16 +#define TO_BE32 SWAP_LE_BE32 +#define TO_BE64 SWAP_LE_BE64 +#define FROM_BE16 SWAP_LE_BE16 +#define FROM_BE32 SWAP_LE_BE32 +#define FROM_BE64 SWAP_LE_BE64 +#else +#define TO_BE16(val) (uint16_t)(val) +#define TO_BE32(val) (uint32_t)(val) +#define TO_BE64(val) (uint64_t)(val) +#define FROM_BE16(val) (uint16_t)(val) +#define FROM_BE32(val) (uint32_t)(val) +#define FROM_BE64(val) (uint64_t)(val) +#endif + +void +ucl_emitter_print_int_msgpack (struct ucl_emitter_context *ctx, int64_t val) +{ + const struct ucl_emitter_functions *func = ctx->func; + unsigned char buf[sizeof(uint64_t) + 1]; + const unsigned char mask_positive = 0x7f, mask_negative = 0xe0, + uint8_ch = 0xcc, uint16_ch = 0xcd, uint32_ch = 0xce, uint64_ch = 0xcf, + int8_ch = 0xd0, int16_ch = 0xd1, int32_ch = 0xd2, int64_ch = 0xd3; + unsigned len; + + if (val >= 0) { + if (val <= 0x7f) { + /* Fixed num 7 bits */ + len = 1; + buf[0] = mask_positive & val; + } + else if (val <= 0xff) { + len = 2; + buf[0] = uint8_ch; + buf[1] = val & 0xff; + } + else if (val <= 0xffff) { + uint16_t v = TO_BE16 (val); + + len = 3; + buf[0] = uint16_ch; + memcpy (&buf[1], &v, sizeof (v)); + } + else if (val <= 0xffffffff) { + uint32_t v = TO_BE32 (val); + + len = 5; + buf[0] = uint32_ch; + memcpy (&buf[1], &v, sizeof (v)); + } + else { + uint64_t v = TO_BE64 (val); + + len = 9; + buf[0] = uint64_ch; + memcpy (&buf[1], &v, sizeof (v)); + } + } + else { + uint64_t uval; + /* Bithack abs */ + uval = ((val ^ (val >> 63)) - (val >> 63)); + + if (val > -(1 << 5)) { + len = 1; + buf[0] = (mask_negative | uval) & 0xff; + } + else if (uval <= 0xff) { + len = 2; + buf[0] = int8_ch; + buf[1] = (unsigned char)val; + } + else if (uval <= 0xffff) { + uint16_t v = TO_BE16 (val); + + len = 3; + buf[0] = int16_ch; + memcpy (&buf[1], &v, sizeof (v)); + } + else if (uval <= 0xffffffff) { + uint32_t v = TO_BE32 (val); + + len = 5; + buf[0] = int32_ch; + memcpy (&buf[1], &v, sizeof (v)); + } + else { + uint64_t v = TO_BE64 (val); + + len = 9; + buf[0] = int64_ch; + memcpy (&buf[1], &v, sizeof (v)); + } + } + + func->ucl_emitter_append_len (buf, len, func->ud); +} + +void +ucl_emitter_print_double_msgpack (struct ucl_emitter_context *ctx, double val) +{ + const struct ucl_emitter_functions *func = ctx->func; + union { + double d; + uint64_t i; + } u; + const unsigned char dbl_ch = 0xcb; + unsigned char buf[sizeof(double) + 1]; + + /* Convert to big endian */ + u.d = val; + u.i = TO_BE64 (u.i); + + buf[0] = dbl_ch; + memcpy (&buf[1], &u.d, sizeof (double)); + func->ucl_emitter_append_len (buf, sizeof (buf), func->ud); +} + +void +ucl_emitter_print_bool_msgpack (struct ucl_emitter_context *ctx, bool val) +{ + const struct ucl_emitter_functions *func = ctx->func; + const unsigned char true_ch = 0xc3, false_ch = 0xc2; + + func->ucl_emitter_append_character (val ? true_ch : false_ch, 1, func->ud); +} + +void +ucl_emitter_print_string_msgpack (struct ucl_emitter_context *ctx, + const char *s, size_t len) +{ + const struct ucl_emitter_functions *func = ctx->func; + const unsigned char fix_mask = 0xA0, l8_ch = 0xd9, l16_ch = 0xda, l32_ch = 0xdb; + unsigned char buf[5]; + unsigned blen; + + if (len <= 0x1F) { + blen = 1; + buf[0] = (len | fix_mask) & 0xff; + } + else if (len <= 0xff) { + blen = 2; + buf[0] = l8_ch; + buf[1] = len & 0xff; + } + else if (len <= 0xffff) { + uint16_t bl = TO_BE16 (len); + + blen = 3; + buf[0] = l16_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + else { + uint32_t bl = TO_BE32 (len); + + blen = 5; + buf[0] = l32_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + + func->ucl_emitter_append_len (buf, blen, func->ud); + func->ucl_emitter_append_len (s, len, func->ud); +} + +void +ucl_emitter_print_binary_string_msgpack (struct ucl_emitter_context *ctx, + const char *s, size_t len) +{ + const struct ucl_emitter_functions *func = ctx->func; + const unsigned char l8_ch = 0xc4, l16_ch = 0xc5, l32_ch = 0xc6; + unsigned char buf[5]; + unsigned blen; + + if (len <= 0xff) { + blen = 2; + buf[0] = l8_ch; + buf[1] = len & 0xff; + } + else if (len <= 0xffff) { + uint16_t bl = TO_BE16 (len); + + blen = 3; + buf[0] = l16_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + else { + uint32_t bl = TO_BE32 (len); + + blen = 5; + buf[0] = l32_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + + func->ucl_emitter_append_len (buf, blen, func->ud); + func->ucl_emitter_append_len (s, len, func->ud); +} + +void +ucl_emitter_print_null_msgpack (struct ucl_emitter_context *ctx) +{ + const struct ucl_emitter_functions *func = ctx->func; + const unsigned char nil = 0xc0; + + func->ucl_emitter_append_character (nil, 1, func->ud); +} + +void +ucl_emitter_print_key_msgpack (bool print_key, struct ucl_emitter_context *ctx, + const ucl_object_t *obj) +{ + if (print_key) { + ucl_emitter_print_string_msgpack (ctx, obj->key, obj->keylen); + } +} + +void +ucl_emitter_print_array_msgpack (struct ucl_emitter_context *ctx, size_t len) +{ + const struct ucl_emitter_functions *func = ctx->func; + const unsigned char fix_mask = 0x90, l16_ch = 0xdc, l32_ch = 0xdd; + unsigned char buf[5]; + unsigned blen; + + if (len <= 0xF) { + blen = 1; + buf[0] = (len | fix_mask) & 0xff; + } + else if (len <= 0xffff) { + uint16_t bl = TO_BE16 (len); + + blen = 3; + buf[0] = l16_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + else { + uint32_t bl = TO_BE32 (len); + + blen = 5; + buf[0] = l32_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + + func->ucl_emitter_append_len (buf, blen, func->ud); +} + +void +ucl_emitter_print_object_msgpack (struct ucl_emitter_context *ctx, size_t len) +{ + const struct ucl_emitter_functions *func = ctx->func; + const unsigned char fix_mask = 0x80, l16_ch = 0xde, l32_ch = 0xdf; + unsigned char buf[5]; + unsigned blen; + + if (len <= 0xF) { + blen = 1; + buf[0] = (len | fix_mask) & 0xff; + } + else if (len <= 0xffff) { + uint16_t bl = TO_BE16 (len); + + blen = 3; + buf[0] = l16_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + else { + uint32_t bl = TO_BE32 (len); + + blen = 5; + buf[0] = l32_ch; + memcpy (&buf[1], &bl, sizeof (bl)); + } + + func->ucl_emitter_append_len (buf, blen, func->ud); +} + + +enum ucl_msgpack_format { + msgpack_positive_fixint = 0, + msgpack_fixmap, + msgpack_fixarray, + msgpack_fixstr, + msgpack_nil, + msgpack_false, + msgpack_true, + msgpack_bin8, + msgpack_bin16, + msgpack_bin32, + msgpack_ext8, + msgpack_ext16, + msgpack_ext32, + msgpack_float32, + msgpack_float64, + msgpack_uint8, + msgpack_uint16, + msgpack_uint32, + msgpack_uint64, + msgpack_int8, + msgpack_int16, + msgpack_int32, + msgpack_int64, + msgpack_fixext1, + msgpack_fixext2, + msgpack_fixext4, + msgpack_fixext8, + msgpack_fixext16, + msgpack_str8, + msgpack_str16, + msgpack_str32, + msgpack_array16, + msgpack_array32, + msgpack_map16, + msgpack_map32, + msgpack_negative_fixint, + msgpack_invalid +}; + +typedef ssize_t (*ucl_msgpack_parse_function)(struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); + +static ssize_t ucl_msgpack_parse_map (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); +static ssize_t ucl_msgpack_parse_array (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); +static ssize_t ucl_msgpack_parse_string (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); +static ssize_t ucl_msgpack_parse_int (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); +static ssize_t ucl_msgpack_parse_float (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); +static ssize_t ucl_msgpack_parse_bool (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); +static ssize_t ucl_msgpack_parse_null (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); +static ssize_t ucl_msgpack_parse_ignore (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain); + +#define MSGPACK_FLAG_FIXED (1 << 0) +#define MSGPACK_FLAG_CONTAINER (1 << 1) +#define MSGPACK_FLAG_TYPEVALUE (1 << 2) +#define MSGPACK_FLAG_EXT (1 << 3) +#define MSGPACK_FLAG_ASSOC (1 << 4) +#define MSGPACK_FLAG_KEY (1 << 5) +#define MSGPACK_CONTAINER_BIT (1ULL << 62) + +/* + * Search tree packed in array + */ +struct ucl_msgpack_parser { + uint8_t prefix; /* Prefix byte */ + uint8_t prefixlen; /* Length of prefix in bits */ + uint8_t fmt; /* The desired format */ + uint8_t len; /* Length of the object + (either length bytes + or length of value in case + of fixed objects */ + uint8_t flags; /* Flags of the specified type */ + ucl_msgpack_parse_function func; /* Parser function */ +} parsers[] = { + { + 0xa0, + 3, + msgpack_fixstr, + 0, + MSGPACK_FLAG_FIXED|MSGPACK_FLAG_KEY, + ucl_msgpack_parse_string + }, + { + 0x0, + 1, + msgpack_positive_fixint, + 0, + MSGPACK_FLAG_FIXED|MSGPACK_FLAG_TYPEVALUE, + ucl_msgpack_parse_int + }, + { + 0xe0, + 3, + msgpack_negative_fixint, + 0, + MSGPACK_FLAG_FIXED|MSGPACK_FLAG_TYPEVALUE, + ucl_msgpack_parse_int + }, + { + 0x80, + 4, + msgpack_fixmap, + 0, + MSGPACK_FLAG_FIXED|MSGPACK_FLAG_CONTAINER|MSGPACK_FLAG_ASSOC, + ucl_msgpack_parse_map + }, + { + 0x90, + 4, + msgpack_fixarray, + 0, + MSGPACK_FLAG_FIXED|MSGPACK_FLAG_CONTAINER, + ucl_msgpack_parse_array + }, + { + 0xd9, + 8, + msgpack_str8, + 1, + MSGPACK_FLAG_KEY, + ucl_msgpack_parse_string + }, + { + 0xc4, + 8, + msgpack_bin8, + 1, + MSGPACK_FLAG_KEY, + ucl_msgpack_parse_string + }, + { + 0xcf, + 8, + msgpack_uint64, + 8, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xd3, + 8, + msgpack_int64, + 8, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xce, + 8, + msgpack_uint32, + 4, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xd2, + 8, + msgpack_int32, + 4, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xcb, + 8, + msgpack_float64, + 8, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_float + }, + { + 0xca, + 8, + msgpack_float32, + 4, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_float + }, + { + 0xc2, + 8, + msgpack_false, + 1, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_TYPEVALUE, + ucl_msgpack_parse_bool + }, + { + 0xc3, + 8, + msgpack_true, + 1, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_TYPEVALUE, + ucl_msgpack_parse_bool + }, + { + 0xcc, + 8, + msgpack_uint8, + 1, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xcd, + 8, + msgpack_uint16, + 2, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xd0, + 8, + msgpack_int8, + 1, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xd1, + 8, + msgpack_int16, + 2, + MSGPACK_FLAG_FIXED, + ucl_msgpack_parse_int + }, + { + 0xc0, + 8, + msgpack_nil, + 0, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_TYPEVALUE, + ucl_msgpack_parse_null + }, + { + 0xda, + 8, + msgpack_str16, + 2, + MSGPACK_FLAG_KEY, + ucl_msgpack_parse_string + }, + { + 0xdb, + 8, + msgpack_str32, + 4, + MSGPACK_FLAG_KEY, + ucl_msgpack_parse_string + }, + { + 0xc5, + 8, + msgpack_bin16, + 2, + MSGPACK_FLAG_KEY, + ucl_msgpack_parse_string + }, + { + 0xc6, + 8, + msgpack_bin32, + 4, + MSGPACK_FLAG_KEY, + ucl_msgpack_parse_string + }, + { + 0xdc, + 8, + msgpack_array16, + 2, + MSGPACK_FLAG_CONTAINER, + ucl_msgpack_parse_array + }, + { + 0xdd, + 8, + msgpack_array32, + 4, + MSGPACK_FLAG_CONTAINER, + ucl_msgpack_parse_array + }, + { + 0xde, + 8, + msgpack_map16, + 2, + MSGPACK_FLAG_CONTAINER|MSGPACK_FLAG_ASSOC, + ucl_msgpack_parse_map + }, + { + 0xdf, + 8, + msgpack_map32, + 4, + MSGPACK_FLAG_CONTAINER|MSGPACK_FLAG_ASSOC, + ucl_msgpack_parse_map + }, + { + 0xc7, + 8, + msgpack_ext8, + 1, + MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + }, + { + 0xc8, + 8, + msgpack_ext16, + 2, + MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + }, + { + 0xc9, + 8, + msgpack_ext32, + 4, + MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + }, + { + 0xd4, + 8, + msgpack_fixext1, + 1, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + }, + { + 0xd5, + 8, + msgpack_fixext2, + 2, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + }, + { + 0xd6, + 8, + msgpack_fixext4, + 4, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + }, + { + 0xd7, + 8, + msgpack_fixext8, + 8, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + }, + { + 0xd8, + 8, + msgpack_fixext16, + 16, + MSGPACK_FLAG_FIXED | MSGPACK_FLAG_EXT, + ucl_msgpack_parse_ignore + } +}; + +#undef MSGPACK_DEBUG_PARSER + +static inline struct ucl_msgpack_parser * +ucl_msgpack_get_parser_from_type (unsigned char t) +{ + unsigned int i, shift, mask; + + for (i = 0; i < sizeof (parsers) / sizeof (parsers[0]); i ++) { + shift = CHAR_BIT - parsers[i].prefixlen; + mask = parsers[i].prefix >> shift; + + if (mask == (t >> shift)) { + return &parsers[i]; + } + } + + return NULL; +} + +static inline struct ucl_stack * +ucl_msgpack_get_container (struct ucl_parser *parser, + struct ucl_msgpack_parser *obj_parser, uint64_t len) +{ + struct ucl_stack *stack; + + assert (obj_parser != NULL); + + if (obj_parser->flags & MSGPACK_FLAG_CONTAINER) { + assert ((len & MSGPACK_CONTAINER_BIT) == 0); + /* + * Insert new container to the stack + */ + if (parser->stack == NULL) { + parser->stack = calloc (1, sizeof (struct ucl_stack)); + + if (parser->stack == NULL) { + ucl_create_err (&parser->err, "no memory"); + return NULL; + } + } + else { + stack = calloc (1, sizeof (struct ucl_stack)); + + if (stack == NULL) { + ucl_create_err (&parser->err, "no memory"); + return NULL; + } + + stack->next = parser->stack; + parser->stack = stack; + } + + parser->stack->level = len | MSGPACK_CONTAINER_BIT; + +#ifdef MSGPACK_DEBUG_PARSER + stack = parser->stack; + while (stack) { + fprintf(stderr, "+"); + stack = stack->next; + } + + fprintf(stderr, "%s -> %d\n", obj_parser->flags & MSGPACK_FLAG_ASSOC ? "object" : "array", (int)len); +#endif + } + else { + /* + * Get the current stack top + */ + if (parser->stack) { + return parser->stack; + } + else { + ucl_create_err (&parser->err, "bad top level object for msgpack"); + return NULL; + } + } + + return parser->stack; +} + +static bool +ucl_msgpack_is_container_finished (struct ucl_stack *container) +{ + uint64_t level; + + assert (container != NULL); + + if (container->level & MSGPACK_CONTAINER_BIT) { + level = container->level & ~MSGPACK_CONTAINER_BIT; + + if (level == 0) { + return true; + } + } + + return false; +} + +static bool +ucl_msgpack_insert_object (struct ucl_parser *parser, + const unsigned char *key, + size_t keylen, ucl_object_t *obj) +{ + uint64_t level; + struct ucl_stack *container; + + container = parser->stack; + assert (container != NULL); + assert (container->level > 0); + assert (obj != NULL); + assert (container->obj != NULL); + + if (container->obj->type == UCL_ARRAY) { + ucl_array_append (container->obj, obj); + } + else if (container->obj->type == UCL_OBJECT) { + if (key == NULL || keylen == 0) { + ucl_create_err (&parser->err, "cannot insert object with no key"); + return false; + } + + obj->key = key; + obj->keylen = keylen; + + if (!(parser->flags & UCL_PARSER_ZEROCOPY)) { + ucl_copy_key_trash (obj); + } + + ucl_parser_process_object_element (parser, obj); + } + else { + ucl_create_err (&parser->err, "bad container type"); + return false; + } + + if (container->level & MSGPACK_CONTAINER_BIT) { + level = container->level & ~MSGPACK_CONTAINER_BIT; + container->level = (level - 1) | MSGPACK_CONTAINER_BIT; + } + + return true; +} + +static struct ucl_stack * +ucl_msgpack_get_next_container (struct ucl_parser *parser) +{ + struct ucl_stack *cur = NULL; + uint64_t level; + + cur = parser->stack; + + if (cur == NULL) { + return NULL; + } + + if (cur->level & MSGPACK_CONTAINER_BIT) { + level = cur->level & ~MSGPACK_CONTAINER_BIT; + + if (level == 0) { + /* We need to switch to the previous container */ + parser->stack = cur->next; + parser->cur_obj = cur->obj; + free (cur); + +#ifdef MSGPACK_DEBUG_PARSER + cur = parser->stack; + while (cur) { + fprintf(stderr, "-"); + cur = cur->next; + } + fprintf(stderr, "-%s -> %d\n", parser->cur_obj->type == UCL_OBJECT ? "object" : "array", (int)parser->cur_obj->len); +#endif + + return ucl_msgpack_get_next_container (parser); + } + } + + /* + * For UCL containers we don't know length, so we just insert the whole + * message pack blob into the top level container + */ + + assert (cur->obj != NULL); + + return cur; +} + +#define CONSUME_RET do { \ + if (ret != -1) { \ + p += ret; \ + remain -= ret; \ + obj_parser = NULL; \ + assert (remain >= 0); \ + } \ + else { \ + ucl_create_err (&parser->err, \ + "cannot parse type %d of len %u", \ + (int)obj_parser->fmt, \ + (unsigned)len); \ + return false; \ + } \ +} while(0) + +#define GET_NEXT_STATE do { \ + container = ucl_msgpack_get_next_container (parser); \ + if (container == NULL) { \ + ucl_create_err (&parser->err, \ + "empty container"); \ + return false; \ + } \ + next_state = container->obj->type == UCL_OBJECT ? \ + read_assoc_key : read_array_value; \ +} while(0) + +static bool +ucl_msgpack_consume (struct ucl_parser *parser) +{ + const unsigned char *p, *end, *key = NULL; + struct ucl_stack *container; + enum e_msgpack_parser_state { + read_type, + start_assoc, + start_array, + read_assoc_key, + read_assoc_value, + finish_assoc_value, + read_array_value, + finish_array_value, + error_state + } state = read_type, next_state = error_state; + struct ucl_msgpack_parser *obj_parser; + uint64_t len; + ssize_t ret, remain, keylen = 0; +#ifdef MSGPACK_DEBUG_PARSER + uint64_t i; + enum e_msgpack_parser_state hist[256]; +#endif + + p = parser->chunks->begin; + remain = parser->chunks->remain; + end = p + remain; + + + while (p < end) { +#ifdef MSGPACK_DEBUG_PARSER + hist[i++ % 256] = state; +#endif + switch (state) { + case read_type: + obj_parser = ucl_msgpack_get_parser_from_type (*p); + + if (obj_parser == NULL) { + ucl_create_err (&parser->err, "unknown msgpack format: %x", + (unsigned int)*p); + + return false; + } + /* Now check length sanity */ + if (obj_parser->flags & MSGPACK_FLAG_FIXED) { + if (obj_parser->len == 0) { + /* We have an embedded size */ + len = *p & ~obj_parser->prefix; + } + else { + if (remain < obj_parser->len) { + ucl_create_err (&parser->err, "not enough data remain to " + "read object's length: %u remain, %u needed", + (unsigned)remain, obj_parser->len); + + return false; + } + + len = obj_parser->len; + } + + if (!(obj_parser->flags & MSGPACK_FLAG_TYPEVALUE)) { + /* We must pass value as the second byte */ + if (remain > 0) { + p ++; + remain --; + } + } + else { + /* Len is irrelevant now */ + len = 0; + } + } + else { + /* Length is not embedded */ + if (remain < obj_parser->len) { + ucl_create_err (&parser->err, "not enough data remain to " + "read object's length: %u remain, %u needed", + (unsigned)remain, obj_parser->len); + + return false; + } + + p ++; + remain --; + + switch (obj_parser->len) { + case 1: + len = *p; + break; + case 2: + len = FROM_BE16 (*(uint16_t *)p); + break; + case 4: + len = FROM_BE32 (*(uint32_t *)p); + break; + case 8: + len = FROM_BE64 (*(uint64_t *)p); + break; + default: + assert (0); + break; + } + + p += obj_parser->len; + remain -= obj_parser->len; + } + + if (obj_parser->flags & MSGPACK_FLAG_ASSOC) { + /* We have just read the new associative map */ + state = start_assoc; + } + else if (obj_parser->flags & MSGPACK_FLAG_CONTAINER){ + state = start_array; + } + else { + state = next_state; + } + + break; + case start_assoc: + parser->cur_obj = ucl_object_new_full (UCL_OBJECT, + parser->chunks->priority); + /* Insert to the previous level container */ + if (parser->stack && !ucl_msgpack_insert_object (parser, + key, keylen, parser->cur_obj)) { + return false; + } + /* Get new container */ + container = ucl_msgpack_get_container (parser, obj_parser, len); + + if (container == NULL) { + return false; + } + + ret = obj_parser->func (parser, container, len, obj_parser->fmt, + p, remain); + CONSUME_RET; + key = NULL; + keylen = 0; + + if (len > 0) { + state = read_type; + next_state = read_assoc_key; + } + else { + /* Empty object */ + state = finish_assoc_value; + } + break; + + case start_array: + parser->cur_obj = ucl_object_new_full (UCL_ARRAY, + parser->chunks->priority); + /* Insert to the previous level container */ + if (parser->stack && !ucl_msgpack_insert_object (parser, + key, keylen, parser->cur_obj)) { + return false; + } + /* Get new container */ + container = ucl_msgpack_get_container (parser, obj_parser, len); + + if (container == NULL) { + return false; + } + + ret = obj_parser->func (parser, container, len, obj_parser->fmt, + p, remain); + CONSUME_RET; + + if (len > 0) { + state = read_type; + next_state = read_array_value; + } + else { + /* Empty array */ + state = finish_array_value; + } + break; + + case read_array_value: + /* + * p is now at the value start, len now contains length read and + * obj_parser contains the corresponding specific parser + */ + container = parser->stack; + + if (container == NULL) { + return false; + } + + ret = obj_parser->func (parser, container, len, obj_parser->fmt, + p, remain); + CONSUME_RET; + + + /* Insert value to the container and check if we have finished array */ + if (!ucl_msgpack_insert_object (parser, NULL, 0, + parser->cur_obj)) { + return false; + } + + if (ucl_msgpack_is_container_finished (container)) { + state = finish_array_value; + } + else { + /* Read more elements */ + state = read_type; + next_state = read_array_value; + } + + break; + + case read_assoc_key: + /* + * Keys must have string type for ucl msgpack + */ + if (!(obj_parser->flags & MSGPACK_FLAG_KEY)) { + ucl_create_err (&parser->err, "bad type for key: %u, expected " + "string", (unsigned)obj_parser->fmt); + + return false; + } + + key = p; + keylen = len; + + if (keylen > remain || keylen == 0) { + ucl_create_err (&parser->err, "too long or empty key"); + return false; + } + + p += len; + remain -= len; + + state = read_type; + next_state = read_assoc_value; + break; + + case read_assoc_value: + /* + * p is now at the value start, len now contains length read and + * obj_parser contains the corresponding specific parser + */ + container = parser->stack; + + if (container == NULL) { + return false; + } + + ret = obj_parser->func (parser, container, len, obj_parser->fmt, + p, remain); + CONSUME_RET; + + assert (key != NULL && keylen > 0); + + if (!ucl_msgpack_insert_object (parser, key, keylen, + parser->cur_obj)) { + return false; + } + + key = NULL; + keylen = 0; + + if (ucl_msgpack_is_container_finished (container)) { + state = finish_assoc_value; + } + else { + /* Read more elements */ + state = read_type; + next_state = read_assoc_key; + } + break; + + case finish_array_value: + case finish_assoc_value: + GET_NEXT_STATE; + state = read_type; + break; + + case error_state: + ucl_create_err (&parser->err, "invalid state machine state"); + + return false; + } + } + + /* Check the finishing state */ + switch (state) { + case start_array: + case start_assoc: + /* Empty container at the end */ + if (len != 0) { + ucl_create_err (&parser->err, "invalid non-empty container at the end"); + + return false; + } + + parser->cur_obj = ucl_object_new_full ( + state == start_array ? UCL_ARRAY : UCL_OBJECT, + parser->chunks->priority); + /* Insert to the previous level container */ + if (!ucl_msgpack_insert_object (parser, + key, keylen, parser->cur_obj)) { + return false; + } + /* Get new container */ + container = ucl_msgpack_get_container (parser, obj_parser, len); + + if (container == NULL) { + return false; + } + + ret = obj_parser->func (parser, container, len, obj_parser->fmt, + p, remain); + break; + + case read_array_value: + case read_assoc_value: + if (len != 0) { + ucl_create_err (&parser->err, "unfinished value at the end"); + + return false; + } + + container = parser->stack; + + if (container == NULL) { + return false; + } + + ret = obj_parser->func (parser, container, len, obj_parser->fmt, + p, remain); + CONSUME_RET; + + + /* Insert value to the container and check if we have finished array */ + if (!ucl_msgpack_insert_object (parser, NULL, 0, + parser->cur_obj)) { + return false; + } + break; + case finish_array_value: + case finish_assoc_value: + case read_type: + /* Valid finishing state */ + break; + default: + /* Invalid finishing state */ + ucl_create_err (&parser->err, "invalid state machine finishing state: %d", + state); + + return false; + } + + /* Rewind to the top level container */ + ucl_msgpack_get_next_container (parser); + assert (parser->stack == NULL || + (parser->stack->level & MSGPACK_CONTAINER_BIT) == 0); + + return true; +} + +bool +ucl_parse_msgpack (struct ucl_parser *parser) +{ + ucl_object_t *container = NULL; + const unsigned char *p; + bool ret; + + assert (parser != NULL); + assert (parser->chunks != NULL); + assert (parser->chunks->begin != NULL); + assert (parser->chunks->remain != 0); + + p = parser->chunks->begin; + + if (parser->stack) { + container = parser->stack->obj; + } + + /* + * When we start parsing message pack chunk, we must ensure that we + * have either a valid container or the top object inside message pack is + * of container type + */ + if (container == NULL) { + if ((*p & 0x80) != 0x80 && !(*p >= 0xdc && *p <= 0xdf)) { + ucl_create_err (&parser->err, "bad top level object for msgpack"); + return false; + } + } + + ret = ucl_msgpack_consume (parser); + + if (ret && parser->top_obj == NULL) { + parser->top_obj = parser->cur_obj; + } + + return ret; +} + +static ssize_t +ucl_msgpack_parse_map (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + container->obj = parser->cur_obj; + + return 0; +} + +static ssize_t +ucl_msgpack_parse_array (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + container->obj = parser->cur_obj; + + return 0; +} + +static ssize_t +ucl_msgpack_parse_string (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + ucl_object_t *obj; + + if (len > remain) { + return -1; + } + + obj = ucl_object_new_full (UCL_STRING, parser->chunks->priority); + obj->value.sv = pos; + obj->len = len; + + if (fmt >= msgpack_bin8 && fmt <= msgpack_bin32) { + obj->flags |= UCL_OBJECT_BINARY; + } + + if (!(parser->flags & UCL_PARSER_ZEROCOPY)) { + if (obj->flags & UCL_OBJECT_BINARY) { + obj->trash_stack[UCL_TRASH_VALUE] = malloc (len); + + if (obj->trash_stack[UCL_TRASH_VALUE] != NULL) { + memcpy (obj->trash_stack[UCL_TRASH_VALUE], pos, len); + } + } + else { + ucl_copy_value_trash (obj); + } + } + + parser->cur_obj = obj; + + return len; +} + +static ssize_t +ucl_msgpack_parse_int (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + ucl_object_t *obj; + + if (len > remain) { + return -1; + } + + obj = ucl_object_new_full (UCL_INT, parser->chunks->priority); + + switch (fmt) { + case msgpack_positive_fixint: + obj->value.iv = (*pos & 0x7f); + len = 1; + break; + case msgpack_negative_fixint: + obj->value.iv = - (*pos & 0x1f); + len = 1; + break; + case msgpack_uint8: + obj->value.iv = (unsigned char)*pos; + len = 1; + break; + case msgpack_int8: + obj->value.iv = (signed char)*pos; + len = 1; + break; + case msgpack_int16: + obj->value.iv = FROM_BE16 (*(int16_t *)pos); + len = 2; + break; + case msgpack_uint16: + obj->value.iv = FROM_BE16 (*(uint16_t *)pos); + len = 2; + break; + case msgpack_int32: + obj->value.iv = FROM_BE32 (*(int32_t *)pos); + len = 4; + break; + case msgpack_uint32: + obj->value.iv = FROM_BE32 (*(uint32_t *)pos); + len = 4; + break; + case msgpack_int64: + obj->value.iv = FROM_BE64 (*(int64_t *)pos); + len = 8; + break; + case msgpack_uint64: + obj->value.iv = FROM_BE64 (*(uint64_t *)pos); + len = 8; + break; + default: + assert (0); + break; + } + + parser->cur_obj = obj; + + return len; +} + +static ssize_t +ucl_msgpack_parse_float (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + ucl_object_t *obj; + union { + uint32_t i; + float f; + } d; + + if (len > remain) { + return -1; + } + + obj = ucl_object_new_full (UCL_FLOAT, parser->chunks->priority); + + switch (fmt) { + case msgpack_float32: + d.i = FROM_BE32 (*(uint32_t *)pos); + /* XXX: can be slow */ + obj->value.dv = d.f; + len = 4; + break; + case msgpack_float64: + obj->value.iv = FROM_BE64 (*(uint64_t *)pos); + len = 8; + break; + default: + assert (0); + break; + } + + parser->cur_obj = obj; + + return len; +} + +static ssize_t +ucl_msgpack_parse_bool (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + ucl_object_t *obj; + + if (len > remain) { + return -1; + } + + obj = ucl_object_new_full (UCL_BOOLEAN, parser->chunks->priority); + + switch (fmt) { + case msgpack_true: + obj->value.iv = true; + break; + case msgpack_false: + obj->value.iv = false; + break; + default: + assert (0); + break; + } + + parser->cur_obj = obj; + + return 1; +} + +static ssize_t +ucl_msgpack_parse_null (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + ucl_object_t *obj; + + if (len > remain) { + return -1; + } + + obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); + parser->cur_obj = obj; + + return 1; +} + +static ssize_t +ucl_msgpack_parse_ignore (struct ucl_parser *parser, + struct ucl_stack *container, size_t len, enum ucl_msgpack_format fmt, + const unsigned char *pos, size_t remain) +{ + if (len > remain) { + return -1; + } + + switch (fmt) { + case msgpack_fixext1: + len = 2; + break; + case msgpack_fixext2: + len = 3; + break; + case msgpack_fixext4: + len = 5; + break; + case msgpack_fixext8: + len = 9; + break; + case msgpack_fixext16: + len = 17; + break; + case msgpack_ext8: + case msgpack_ext16: + case msgpack_ext32: + len = len + 1; + break; + default: + ucl_create_err (&parser->err, "bad type: %x", (unsigned)fmt); + return -1; + } + + return len; +} diff --git a/src/ucl_parser.c b/src/ucl_parser.c index 75acba8..9bd4139 100644 --- a/src/ucl_parser.c +++ b/src/ucl_parser.c @@ -67,6 +67,7 @@ ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **e else { filename = "<unknown>"; } + if (chunk->pos < chunk->end) { if (isgraph (*chunk->pos)) { fmt_string = "error while parsing %s: " @@ -84,6 +85,8 @@ ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **e ucl_create_err (err, "error while parsing %s: at the end of chunk: %s", filename, str); } + + parser->err_code = code; } /** @@ -513,7 +516,7 @@ ucl_copy_or_store_ptr (struct ucl_parser *parser, /* Copy string */ *dst = UCL_ALLOC (in_len + 1); if (*dst == NULL) { - ucl_set_err (parser, 0, "cannot allocate memory for a string", + ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string", &parser->err); return false; } @@ -559,7 +562,8 @@ ucl_copy_or_store_ptr (struct ucl_parser *parser, * @return */ static inline ucl_object_t * -ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level) +ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser, + bool is_array, int level) { struct ucl_stack *st; @@ -570,7 +574,9 @@ ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_arra else { obj->type = UCL_OBJECT; } - obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); + if (obj->value.ov == NULL) { + obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); + } parser->state = UCL_STATE_KEY; } else { @@ -585,7 +591,7 @@ ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_arra st = UCL_ALLOC (sizeof (struct ucl_stack)); if (st == NULL) { - ucl_set_err (parser, 0, "cannot allocate memory for an object", + ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object", &parser->err); ucl_object_unref (obj); return NULL; @@ -834,19 +840,21 @@ ucl_maybe_parse_number (ucl_object_t *obj, *pos = c; return EINVAL; - set_obj: - if (allow_double && (need_double || is_time)) { - if (!is_time) { - obj->type = UCL_FLOAT; +set_obj: + if (obj != NULL) { + if (allow_double && (need_double || is_time)) { + if (!is_time) { + obj->type = UCL_FLOAT; + } + else { + obj->type = UCL_TIME; + } + obj->value.dv = is_neg ? (-dv) : dv; } else { - obj->type = UCL_TIME; + obj->type = UCL_INT; + obj->value.iv = is_neg ? (-lv) : lv; } - obj->value.dv = is_neg ? (-dv) : dv; - } - else { - obj->type = UCL_INT; - obj->value.iv = is_neg ? (-lv) : lv; } *pos = p; return 0; @@ -856,6 +864,7 @@ ucl_maybe_parse_number (ucl_object_t *obj, * Parse possible number * @param parser * @param chunk + * @param obj * @return true if a number has been parsed */ static bool @@ -875,7 +884,8 @@ ucl_lex_number (struct ucl_parser *parser, return true; } else if (ret == ERANGE) { - ucl_set_err (parser, ERANGE, "numeric value out of range", &parser->err); + ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range", + &parser->err); } return false; @@ -885,6 +895,9 @@ ucl_lex_number (struct ucl_parser *parser, * Parse quoted string with possible escapes * @param parser * @param chunk + * @param need_unescape + * @param ucl_escape + * @param var_expand * @return true if a string has been parsed */ static bool @@ -971,6 +984,7 @@ ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, /* Implicit array */ top->flags |= UCL_OBJECT_MULTIVALUE; DL_APPEND (top, elt); + parser->stack->obj->len ++; } else { if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) { @@ -979,34 +993,127 @@ ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, } else { /* Convert to an array */ - ucl_hash_delete (cont, top); nobj = ucl_object_typed_new (UCL_ARRAY); nobj->key = top->key; nobj->keylen = top->keylen; nobj->flags |= UCL_OBJECT_MULTIVALUE; ucl_array_append (nobj, top); ucl_array_append (nobj, elt); - ucl_hash_insert (cont, nobj, nobj->key, nobj->keylen); + ucl_hash_replace (cont, top, nobj); + } + } +} + +bool +ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj) +{ + ucl_hash_t *container; + ucl_object_t *tobj; + + container = parser->stack->obj->value.ov; + + tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); + if (tobj == NULL) { + container = ucl_hash_insert_object (container, nobj, + parser->flags & UCL_PARSER_KEY_LOWERCASE); + nobj->prev = nobj; + nobj->next = NULL; + parser->stack->obj->len ++; + } + else { + unsigned priold = ucl_object_get_priority (tobj), + prinew = ucl_object_get_priority (nobj); + switch (parser->chunks->strategy) { + + case UCL_DUPLICATE_APPEND: + /* + * The logic here is the following: + * + * - if we have two objects with the same priority, then we form an + * implicit or explicit array + * - if a new object has bigger priority, then we overwrite an old one + * - if a new object has lower priority, then we ignore it + */ + + + /* Special case for inherited objects */ + if (tobj->flags & UCL_OBJECT_INHERITED) { + prinew = priold + 1; + } + + if (priold == prinew) { + ucl_parser_append_elt (parser, container, tobj, nobj); + } + else if (priold > prinew) { + /* + * We add this new object to a list of trash objects just to ensure + * that it won't come to any real object + * XXX: rather inefficient approach + */ + DL_APPEND (parser->trash_objs, nobj); + } + else { + ucl_hash_replace (container, tobj, nobj); + ucl_object_unref (tobj); + } + + break; + + case UCL_DUPLICATE_REWRITE: + /* We just rewrite old values regardless of priority */ + ucl_hash_replace (container, tobj, nobj); + ucl_object_unref (tobj); + + break; + + case UCL_DUPLICATE_ERROR: + ucl_create_err (&parser->err, "error while parsing %s: " + "line: %d, column: %d: duplicate element for key '%s' " + "has been found", + parser->cur_file ? parser->cur_file : "<unknown>", + parser->chunks->line, parser->chunks->column, nobj->key); + return false; + + case UCL_DUPLICATE_MERGE: + /* + * Here we do have some old object so we just push it on top of objects stack + */ + if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) { + ucl_object_unref (nobj); + nobj = tobj; + } + else { + /* For other types we create implicit array as usual */ + ucl_parser_append_elt (parser, container, tobj, nobj); + } + break; } } + + parser->stack->obj->value.ov = container; + parser->cur_obj = nobj; + + return true; } /** * Parse a key in an object * @param parser * @param chunk + * @param next_key + * @param end_of_object * @return true if a key has been parsed */ static bool -ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object) +ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, + bool *next_key, bool *end_of_object) { const unsigned char *p, *c = NULL, *end, *t; const char *key = NULL; bool got_quote = false, got_eq = false, got_semicolon = false, need_unescape = false, ucl_escape = false, var_expand = false, got_content = false, got_sep = false; - ucl_object_t *nobj, *tobj; - ucl_hash_t *container; + ucl_object_t *nobj; ssize_t keylen; p = chunk->pos; @@ -1191,47 +1298,17 @@ ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_ke return false; } - container = parser->stack->obj->value.ov; nobj->key = key; nobj->keylen = keylen; - tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); - if (tobj == NULL) { - container = ucl_hash_insert_object (container, nobj, - parser->flags & UCL_PARSER_KEY_LOWERCASE); - nobj->prev = nobj; - nobj->next = NULL; - parser->stack->obj->len ++; - } - else { - /* - * The logic here is the following: - * - * - if we have two objects with the same priority, then we form an - * implicit or explicit array - * - if a new object has bigger priority, then we overwrite an old one - * - if a new object has lower priority, then we ignore it - */ - unsigned priold = ucl_object_get_priority (tobj), - prinew = ucl_object_get_priority (nobj); - if (priold == prinew) { - ucl_parser_append_elt (parser, container, tobj, nobj); - } - else if (priold > prinew) { - ucl_object_unref (nobj); - return true; - } - else { - ucl_hash_replace (container, tobj, nobj); - ucl_object_unref (tobj); - } + + if (!ucl_parser_process_object_element (parser, nobj)) { + return false; } if (ucl_escape) { nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; } - parser->stack->obj->value.ov = container; - parser->cur_obj = nobj; return true; } @@ -1240,6 +1317,8 @@ ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_ke * Parse a cl string * @param parser * @param chunk + * @param var_expand + * @param need_unescape * @return true if a key has been parsed */ static bool @@ -1309,6 +1388,8 @@ ucl_parse_string_value (struct ucl_parser *parser, * @param chunk * @param term * @param term_len + * @param beg + * @param var_expand * @return size of multiline string or 0 in case of error */ static int @@ -1360,8 +1441,8 @@ ucl_parse_multiline_string (struct ucl_parser *parser, return len; } -static ucl_object_t* -ucl_get_value_object (struct ucl_parser *parser) +static inline ucl_object_t* +ucl_parser_get_container (struct ucl_parser *parser) { ucl_object_t *t, *obj = NULL; @@ -1373,7 +1454,12 @@ ucl_get_value_object (struct ucl_parser *parser) /* Object must be allocated */ obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); t = parser->stack->obj; - ucl_array_append (t, obj); + + if (!ucl_array_append (t, obj)) { + ucl_object_unref (obj); + return NULL; + } + parser->cur_obj = obj; } else { @@ -1417,42 +1503,51 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) c = p; switch (*p) { case '"': - obj = ucl_get_value_object (parser); ucl_chunk_skipc (chunk, p); - if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { + + if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, + &var_expand)) { return false; } + + obj = ucl_parser_get_container (parser); str_len = chunk->pos - c - 2; obj->type = UCL_STRING; - if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], - &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { + if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len, need_unescape, false, + var_expand)) == -1) { return false; } obj->len = str_len; + parser->state = UCL_STATE_AFTER_VALUE; p = chunk->pos; + return true; break; case '{': - obj = ucl_get_value_object (parser); + obj = ucl_parser_get_container (parser); /* We have a new object */ - obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); + obj = ucl_parser_add_container (obj, parser, false, parser->stack->level); if (obj == NULL) { return false; } ucl_chunk_skipc (chunk, p); + return true; break; case '[': - obj = ucl_get_value_object (parser); + obj = ucl_parser_get_container (parser); /* We have a new array */ - obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); + obj = ucl_parser_add_container (obj, parser, true, parser->stack->level); if (obj == NULL) { return false; } ucl_chunk_skipc (chunk, p); + return true; break; case ']': @@ -1466,7 +1561,7 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) } break; case '<': - obj = ucl_get_value_object (parser); + obj = ucl_parser_get_container (parser); /* We have something like multiline value, which must be <<[A-Z]+\n */ if (chunk->end - p > 3) { if (memcmp (p, "<<", 2) == 0) { @@ -1488,13 +1583,19 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) "unterminated multiline value", &parser->err); return false; } + obj->type = UCL_STRING; - if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], - &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) { + obj->flags |= UCL_OBJECT_MULTILINE; + if ((str_len = ucl_copy_or_store_ptr (parser, c, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len - 1, false, + false, var_expand)) == -1) { return false; } obj->len = str_len; + parser->state = UCL_STATE_AFTER_VALUE; + return true; } } @@ -1503,8 +1604,9 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) default: parse_string: if (obj == NULL) { - obj = ucl_get_value_object (parser); + obj = ucl_parser_get_container (parser); } + /* Parse atom */ if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { if (!ucl_lex_number (parser, chunk, obj)) { @@ -1519,7 +1621,8 @@ parse_string: /* Fallback to normal string */ } - if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) { + if (!ucl_parse_string_value (parser, chunk, &var_expand, + &need_unescape)) { return false; } /* Cut trailing spaces */ @@ -1530,7 +1633,7 @@ parse_string: } str_len = chunk->pos - c - stripped_spaces; if (str_len <= 0) { - ucl_set_err (parser, 0, "string value must not be empty", + ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty", &parser->err); return false; } @@ -1540,7 +1643,8 @@ parse_string: } else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { obj->type = UCL_STRING; - if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], + if ((str_len = ucl_copy_or_store_ptr (parser, c, + &obj->trash_stack[UCL_TRASH_VALUE], &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { return false; @@ -1652,6 +1756,9 @@ ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) * Handle macro data * @param parser * @param chunk + * @param marco + * @param macro_start + * @param macro_len * @return */ static bool @@ -1790,6 +1897,7 @@ ucl_parse_macro_arguments (struct ucl_parser *parser, if (chunk->remain == 0) { goto restore_chunk; } + args_len ++; ucl_chunk_skipc (chunk, p); break; case 99: @@ -1842,8 +1950,6 @@ restore_chunk: /** * Handle the main states of rcl parser * @param parser parser structure - * @param data the pointer to the beginning of a chunk - * @param len the length of a chunk * @return true if chunk has been parsed and false in case of error */ static bool @@ -1858,17 +1964,6 @@ ucl_state_machine (struct ucl_parser *parser) bool next_key = false, end_of_object = false, ret; if (parser->top_obj == NULL) { - if (*chunk->pos == '[') { - obj = ucl_add_parser_stack (NULL, parser, true, 0); - } - else { - obj = ucl_add_parser_stack (NULL, parser, false, 0); - } - if (obj == NULL) { - return false; - } - parser->top_obj = obj; - parser->cur_obj = obj; parser->state = UCL_STATE_INIT; } @@ -1892,7 +1987,9 @@ ucl_state_machine (struct ucl_parser *parser) UCL_CHARACTER_WHITESPACE_UNSAFE)) { ucl_chunk_skipc (chunk, p); } + p = chunk->pos; + if (*p == '[') { parser->state = UCL_STATE_VALUE; ucl_chunk_skipc (chunk, p); @@ -1903,6 +2000,23 @@ ucl_state_machine (struct ucl_parser *parser) ucl_chunk_skipc (chunk, p); } } + + if (parser->top_obj == NULL) { + if (parser->state == UCL_STATE_VALUE) { + obj = ucl_parser_add_container (NULL, parser, true, 0); + } + else { + obj = ucl_parser_add_container (NULL, parser, false, 0); + } + + if (obj == NULL) { + return false; + } + + parser->top_obj = obj; + parser->cur_obj = obj; + } + } break; case UCL_STATE_KEY: @@ -1936,7 +2050,7 @@ ucl_state_machine (struct ucl_parser *parser) else if (parser->state != UCL_STATE_MACRO_NAME) { if (next_key && parser->stack->obj->type == UCL_OBJECT) { /* Parse more keys and nest objects accordingly */ - obj = ucl_add_parser_stack (parser->cur_obj, parser, false, + obj = ucl_parser_add_container (parser->cur_obj, parser, false, parser->stack->level + 1); if (obj == NULL) { return false; @@ -1967,6 +2081,7 @@ ucl_state_machine (struct ucl_parser *parser) parser->state = UCL_STATE_ERROR; return false; } + if (parser->stack != NULL) { if (parser->stack->obj->type == UCL_OBJECT) { parser->state = UCL_STATE_KEY; @@ -1987,20 +2102,36 @@ ucl_state_machine (struct ucl_parser *parser) *p != '(') { ucl_chunk_skipc (chunk, p); } - else if (p - c > 0) { - /* We got macro name */ - macro_len = (size_t)(p - c); - HASH_FIND (hh, parser->macroes, c, macro_len, macro); - if (macro == NULL) { - ucl_create_err (&parser->err, "error on line %d at column %d: " - "unknown macro: '%.*s', character: '%c'", - chunk->line, chunk->column, (int)(p - c), c, *chunk->pos); + else { + if (p - c > 0) { + /* We got macro name */ + macro_len = (size_t) (p - c); + HASH_FIND (hh, parser->macroes, c, macro_len, macro); + if (macro == NULL) { + ucl_create_err (&parser->err, + "error on line %d at column %d: " + "unknown macro: '%.*s', character: '%c'", + chunk->line, + chunk->column, + (int) (p - c), + c, + *chunk->pos); + parser->state = UCL_STATE_ERROR; + return false; + } + /* Now we need to skip all spaces */ + SKIP_SPACES_COMMENTS(parser, chunk, p); + parser->state = UCL_STATE_MACRO; + } + else { + /* We have invalid macro name */ + ucl_create_err (&parser->err, + "error on line %d at column %d: invalid macro name", + chunk->line, + chunk->column); parser->state = UCL_STATE_ERROR; return false; } - /* Now we need to skip all spaces */ - SKIP_SPACES_COMMENTS(parser, chunk, p); - parser->state = UCL_STATE_MACRO; } break; case UCL_STATE_MACRO: @@ -2024,14 +2155,36 @@ ucl_state_machine (struct ucl_parser *parser) macro_start, macro_len); parser->state = parser->prev_state; if (macro_escaped == NULL) { - ret = macro->handler (macro_start, macro_len, macro_args, - macro->ud); + if (macro->is_context) { + ret = macro->h.context_handler (macro_start, macro_len, + macro_args, + parser->top_obj, + macro->ud); + } + else { + ret = macro->h.handler (macro_start, macro_len, macro_args, + macro->ud); + } } else { - ret = macro->handler (macro_escaped, macro_len, macro_args, + if (macro->is_context) { + ret = macro->h.context_handler (macro_escaped, macro_len, + macro_args, + parser->top_obj, + macro->ud); + } + else { + ret = macro->h.handler (macro_escaped, macro_len, macro_args, macro->ud); + } + UCL_FREE (macro_len + 1, macro_escaped); } + + /* + * Chunk can be modified within macro handler + */ + chunk = parser->chunks; p = chunk->pos; if (macro_args) { ucl_object_unref (macro_args); @@ -2061,13 +2214,18 @@ ucl_parser_new (int flags) if (new == NULL) { return NULL; } + memset (new, 0, sizeof (struct ucl_parser)); ucl_parser_register_macro (new, "include", ucl_include_handler, new); ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new); ucl_parser_register_macro (new, "includes", ucl_includes_handler, new); + ucl_parser_register_macro (new, "priority", ucl_priority_handler, new); + ucl_parser_register_macro (new, "load", ucl_load_handler, new); + ucl_parser_register_context_macro (new, "inherit", ucl_inherit_handler, new); new->flags = flags; + new->includepaths = NULL; /* Initial assumption about filevars */ ucl_parser_set_filevars (new, NULL, false); @@ -2075,6 +2233,17 @@ ucl_parser_new (int flags) return new; } +bool +ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio) +{ + if (parser == NULL) { + return false; + } + + parser->default_priority = prio; + + return true; +} void ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, @@ -2085,14 +2254,39 @@ ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, if (macro == NULL || handler == NULL) { return; } + + new = UCL_ALLOC (sizeof (struct ucl_macro)); + if (new == NULL) { + return; + } + + memset (new, 0, sizeof (struct ucl_macro)); + new->h.handler = handler; + new->name = strdup (macro); + new->ud = ud; + HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); +} + +void +ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro, + ucl_context_macro_handler handler, void* ud) +{ + struct ucl_macro *new; + + if (macro == NULL || handler == NULL) { + return; + } + new = UCL_ALLOC (sizeof (struct ucl_macro)); if (new == NULL) { return; } + memset (new, 0, sizeof (struct ucl_macro)); - new->handler = handler; + new->h.context_handler = handler; new->name = strdup (macro); new->ud = ud; + new->is_context = true; HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); } @@ -2159,11 +2353,16 @@ ucl_parser_set_variables_handler (struct ucl_parser *parser, } bool -ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *data, - size_t len, unsigned priority) +ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data, + size_t len, unsigned priority, enum ucl_duplicate_strategy strat, + enum ucl_parse_type parse_type) { struct ucl_chunk *chunk; + if (parser == NULL) { + return false; + } + if (data == NULL) { ucl_create_err (&parser->err, "invalid chunk added"); return false; @@ -2185,14 +2384,24 @@ ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *d chunk->line = 1; chunk->column = 0; chunk->priority = priority; + chunk->strategy = strat; + chunk->parse_type = parse_type; LL_PREPEND (parser->chunks, chunk); parser->recursion ++; + if (parser->recursion > UCL_MAX_RECURSION) { ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", parser->recursion); return false; } - return ucl_state_machine (parser); + + switch (parse_type) { + default: + case UCL_PARSE_UCL: + return ucl_state_machine (parser); + case UCL_PARSE_MSGPACK: + return ucl_parse_msgpack (parser); + } } ucl_create_err (&parser->err, "a parser is in an invalid state"); @@ -2201,15 +2410,33 @@ ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *d } bool +ucl_parser_add_chunk_priority (struct ucl_parser *parser, + const unsigned char *data, size_t len, unsigned priority) +{ + /* We dereference parser, so this check is essential */ + if (parser == NULL) { + return false; + } + + return ucl_parser_add_chunk_full (parser, data, len, + priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); +} + +bool ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, size_t len) { - return ucl_parser_add_chunk_priority (parser, data, len, 0); + if (parser == NULL) { + return false; + } + + return ucl_parser_add_chunk_full (parser, data, len, + parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); } bool -ucl_parser_add_string (struct ucl_parser *parser, const char *data, - size_t len) +ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data, + size_t len, unsigned priority) { if (data == NULL) { ucl_create_err (&parser->err, "invalid string added"); @@ -2219,5 +2446,40 @@ ucl_parser_add_string (struct ucl_parser *parser, const char *data, len = strlen (data); } - return ucl_parser_add_chunk (parser, (const unsigned char *)data, len); + return ucl_parser_add_chunk_priority (parser, + (const unsigned char *)data, len, priority); +} + +bool +ucl_parser_add_string (struct ucl_parser *parser, const char *data, + size_t len) +{ + if (parser == NULL) { + return false; + } + + return ucl_parser_add_string_priority (parser, + (const unsigned char *)data, len, parser->default_priority); +} + +bool +ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths) +{ + if (parser == NULL || paths == NULL) { + return false; + } + + if (parser->includepaths == NULL) { + parser->includepaths = ucl_object_copy (paths); + } + else { + ucl_object_unref (parser->includepaths); + parser->includepaths = ucl_object_copy (paths); + } + + if (parser->includepaths == NULL) { + return false; + } + + return true; } diff --git a/src/ucl_sexp.c b/src/ucl_sexp.c new file mode 100644 index 0000000..3ca7eb1 --- /dev/null +++ b/src/ucl_sexp.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <ucl.h> +#include "ucl.h" +#include "ucl_internal.h" +#include "utlist.h" + +#define NEXT_STATE do { \ +if (p >= end) { \ + if (state != read_ebrace) { \ + ucl_create_err (&parser->err,\ + "extra data");\ + state = parse_err; \ + } \ +} \ +else { \ +switch (*p) { \ + case '(': \ + state = read_obrace; \ + break; \ + case ')': \ + state = read_ebrace; \ + break; \ + default: \ + len = 0; \ + mult = 1; \ + state = read_length; \ + break; \ + } \ +} \ +} while(0) + +bool +ucl_parse_csexp (struct ucl_parser *parser) +{ + const unsigned char *p, *end; + ucl_object_t *obj; + struct ucl_stack *st; + uint64_t len = 0, mult = 1; + enum { + start_parse, + read_obrace, + read_length, + read_value, + read_ebrace, + parse_err + } state = start_parse; + + assert (parser != NULL); + assert (parser->chunks != NULL); + assert (parser->chunks->begin != NULL); + assert (parser->chunks->remain != 0); + + p = parser->chunks->begin; + end = p + parser->chunks->remain; + + while (p < end) { + switch (state) { + case start_parse: + /* At this point we expect open brace */ + if (*p == '(') { + state = read_obrace; + } + else { + ucl_create_err (&parser->err, "bad starting character for " + "sexp block: %x", (int)*p); + state = parse_err; + } + break; + + case read_obrace: + st = calloc (1, sizeof (*st)); + + if (st == NULL) { + ucl_create_err (&parser->err, "no memory"); + state = parse_err; + continue; + } + + st->obj = ucl_object_typed_new (UCL_ARRAY); + + if (st->obj == NULL) { + ucl_create_err (&parser->err, "no memory"); + state = parse_err; + continue; + } + + if (parser->stack == NULL) { + /* We have no stack */ + parser->stack = st; + + if (parser->top_obj == NULL) { + parser->top_obj = st->obj; + } + } + else { + /* Prepend new element to the stack */ + LL_PREPEND (parser->stack, st); + } + + p ++; + NEXT_STATE; + + break; + + case read_length: + if (*p == ':') { + if (len == 0) { + ucl_create_err (&parser->err, "zero length element"); + state = parse_err; + continue; + } + + state = read_value; + } + else if (*p >= '0' && *p <= '9') { + len += (*p - '0') * mult; + mult *= 10; + + if (len > UINT32_MAX) { + ucl_create_err (&parser->err, "too big length of an " + "element"); + state = parse_err; + continue; + } + } + else { + ucl_create_err (&parser->err, "bad length character: %x", + (int)*p); + state = parse_err; + continue; + } + + p ++; + break; + + case read_value: + if ((uint64_t)(end - p) > len || len == 0) { + ucl_create_err (&parser->err, "invalid length: %llu, %ld " + "remain", (long long unsigned)len, (long)(end - p)); + state = parse_err; + continue; + } + obj = ucl_object_typed_new (UCL_STRING); + + obj->value.sv = (const char*)p; + obj->len = len; + obj->flags |= UCL_OBJECT_BINARY; + + if (!(parser->flags & UCL_PARSER_ZEROCOPY)) { + ucl_copy_value_trash (obj); + } + + ucl_array_append (parser->stack->obj, obj); + p += len; + NEXT_STATE; + break; + + case read_ebrace: + if (parser->stack == NULL) { + /* We have an extra end brace */ + ucl_create_err (&parser->err, "invalid length: %llu, %ld " + "remain", (long long unsigned)len, (long)(end - p)); + state = parse_err; + continue; + } + /* Pop the container */ + st = parser->stack; + parser->stack = st->next; + + if (parser->stack->obj->type == UCL_ARRAY) { + ucl_array_append (parser->stack->obj, st->obj); + } + else { + ucl_create_err (&parser->err, "bad container object, array " + "expected"); + state = parse_err; + continue; + } + + free (st); + p++; + NEXT_STATE; + break; + + case parse_err: + default: + return false; + } + } + + if (state != read_ebrace) { + ucl_create_err (&parser->err, "invalid finishing state: %d", state); + return false; + } + + return true; +}
\ No newline at end of file diff --git a/src/ucl_util.c b/src/ucl_util.c index 41e012b..730a5c4 100644 --- a/src/ucl_util.c +++ b/src/ucl_util.c @@ -1,4 +1,5 @@ /* Copyright (c) 2013, Vsevolod Stakhov + * Copyright (c) 2015 Allan Jude <allanjude@freebsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,6 +26,7 @@ #include "ucl_internal.h" #include "ucl_chartable.h" #include "kvec.h" +#include <stdarg.h> #ifndef _WIN32 #include <glob.h> @@ -230,11 +232,13 @@ ucl_object_free_internal (ucl_object_t *obj, bool allow_rec, ucl_object_dtor dto kv_destroy (*vec); UCL_FREE (sizeof (*vec), vec); } + obj->value.av = NULL; } else if (obj->type == UCL_OBJECT) { if (obj->value.ov != NULL) { ucl_hash_destroy (obj->value.ov, (ucl_hash_free_func *)dtor); } + obj->value.ov = NULL; } tmp = obj->next; dtor (obj); @@ -266,6 +270,17 @@ ucl_unescape_json_string (char *str, size_t len) while (len) { if (*h == '\\') { h ++; + + if (len == 1) { + /* + * If \ is last, then do not try to go further + * Issue: #74 + */ + len --; + *t++ = '\\'; + continue; + } + switch (*h) { case 'n': *t++ = '\n'; @@ -350,7 +365,10 @@ ucl_unescape_json_string (char *str, size_t len) else { *t++ = *h++; } - len --; + + if (len > 0) { + len --; + } } *t = '\0'; @@ -392,11 +410,24 @@ ucl_copy_value_trash (const ucl_object_t *obj) if (obj->type == UCL_STRING) { /* Special case for strings */ - deconst->trash_stack[UCL_TRASH_VALUE] = malloc (obj->len + 1); - if (deconst->trash_stack[UCL_TRASH_VALUE] != NULL) { - memcpy (deconst->trash_stack[UCL_TRASH_VALUE], obj->value.sv, obj->len); - deconst->trash_stack[UCL_TRASH_VALUE][obj->len] = '\0'; - deconst->value.sv = obj->trash_stack[UCL_TRASH_VALUE]; + if (obj->flags & UCL_OBJECT_BINARY) { + deconst->trash_stack[UCL_TRASH_VALUE] = malloc (obj->len); + if (deconst->trash_stack[UCL_TRASH_VALUE] != NULL) { + memcpy (deconst->trash_stack[UCL_TRASH_VALUE], + obj->value.sv, + obj->len); + deconst->value.sv = obj->trash_stack[UCL_TRASH_VALUE]; + } + } + else { + deconst->trash_stack[UCL_TRASH_VALUE] = malloc (obj->len + 1); + if (deconst->trash_stack[UCL_TRASH_VALUE] != NULL) { + memcpy (deconst->trash_stack[UCL_TRASH_VALUE], + obj->value.sv, + obj->len); + deconst->trash_stack[UCL_TRASH_VALUE][obj->len] = '\0'; + deconst->value.sv = obj->trash_stack[UCL_TRASH_VALUE]; + } } } else { @@ -406,10 +437,11 @@ ucl_copy_value_trash (const ucl_object_t *obj) } deconst->flags |= UCL_OBJECT_ALLOCATED_VALUE; } + return obj->trash_stack[UCL_TRASH_VALUE]; } -UCL_EXTERN ucl_object_t* +ucl_object_t* ucl_parser_get_object (struct ucl_parser *parser) { if (parser->state != UCL_STATE_ERROR && parser->top_obj != NULL) { @@ -419,7 +451,7 @@ ucl_parser_get_object (struct ucl_parser *parser) return NULL; } -UCL_EXTERN void +void ucl_parser_free (struct ucl_parser *parser) { struct ucl_stack *stack, *stmp; @@ -427,6 +459,7 @@ ucl_parser_free (struct ucl_parser *parser) struct ucl_chunk *chunk, *ctmp; struct ucl_pubkey *key, *ktmp; struct ucl_variable *var, *vtmp; + ucl_object_t *tr, *trtmp; if (parser == NULL) { return; @@ -436,6 +469,10 @@ ucl_parser_free (struct ucl_parser *parser) ucl_object_unref (parser->top_obj); } + if (parser->includepaths != NULL) { + ucl_object_unref (parser->includepaths); + } + LL_FOREACH_SAFE (parser->stack, stack, stmp) { free (stack); } @@ -455,6 +492,9 @@ ucl_parser_free (struct ucl_parser *parser) free (var->var); UCL_FREE (sizeof (struct ucl_variable), var); } + LL_FOREACH_SAFE (parser->trash_objs, tr, trtmp) { + ucl_object_free_internal (tr, false, ucl_object_dtor_free); + } if (parser->err != NULL) { utstring_free (parser->err); @@ -467,29 +507,61 @@ ucl_parser_free (struct ucl_parser *parser) UCL_FREE (sizeof (struct ucl_parser), parser); } -UCL_EXTERN const char * +const char * ucl_parser_get_error(struct ucl_parser *parser) { if (parser == NULL) { return NULL; } - if (parser->err == NULL) + if (parser->err == NULL) { return NULL; + } - return utstring_body(parser->err); + return utstring_body (parser->err); } -UCL_EXTERN void +int +ucl_parser_get_error_code(struct ucl_parser *parser) +{ + if (parser == NULL) { + return 0; + } + + return parser->err_code; +} + +unsigned +ucl_parser_get_column(struct ucl_parser *parser) +{ + if (parser == NULL || parser->chunks == NULL) { + return 0; + } + + return parser->chunks->column; +} + +unsigned +ucl_parser_get_linenum(struct ucl_parser *parser) +{ + if (parser == NULL || parser->chunks == NULL) { + return 0; + } + + return parser->chunks->line; +} + +void ucl_parser_clear_error(struct ucl_parser *parser) { if (parser != NULL && parser->err != NULL) { utstring_free(parser->err); parser->err = NULL; + parser->err_code = 0; } } -UCL_EXTERN bool +bool ucl_pubkey_add (struct ucl_parser *parser, const unsigned char *key, size_t len) { #ifndef HAVE_OPENSSL @@ -738,6 +810,20 @@ ucl_sig_check (const unsigned char *data, size_t datalen, } #endif +struct ucl_include_params { + bool check_signature; + bool must_exist; + bool use_glob; + bool use_prefix; + bool soft_fail; + bool allow_glob; + unsigned priority; + enum ucl_duplicate_strategy strat; + enum ucl_parse_type parse_type; + const char *prefix; + const char *target; +}; + /** * Include an url to configuration * @param data @@ -748,8 +834,8 @@ ucl_sig_check (const unsigned char *data, size_t datalen, */ static bool ucl_include_url (const unsigned char *data, size_t len, - struct ucl_parser *parser, bool check_signature, bool must_exist, - unsigned priority) + struct ucl_parser *parser, + struct ucl_include_params *params) { bool res; @@ -761,11 +847,11 @@ ucl_include_url (const unsigned char *data, size_t len, snprintf (urlbuf, sizeof (urlbuf), "%.*s", (int)len, data); - if (!ucl_fetch_url (urlbuf, &buf, &buflen, &parser->err, must_exist)) { - return (!must_exist || false); + if (!ucl_fetch_url (urlbuf, &buf, &buflen, &parser->err, params->must_exist)) { + return (!params->must_exist || false); } - if (check_signature) { + if (params->check_signature) { #if (defined(HAVE_OPENSSL) && OPENSSL_VERSION_NUMBER >= 0x10000000L) unsigned char *sigbuf = NULL; size_t siglen = 0; @@ -792,7 +878,8 @@ ucl_include_url (const unsigned char *data, size_t len, prev_state = parser->state; parser->state = UCL_STATE_INIT; - res = ucl_parser_add_chunk_priority (parser, buf, buflen, priority); + res = ucl_parser_add_chunk_full (parser, buf, buflen, params->priority, + params->strat, params->parse_type); if (res == true) { /* Remove chunk from the stack */ chunk = parser->chunks; @@ -821,22 +908,27 @@ ucl_include_url (const unsigned char *data, size_t len, */ static bool ucl_include_file_single (const unsigned char *data, size_t len, - struct ucl_parser *parser, bool check_signature, bool must_exist, - unsigned priority) + struct ucl_parser *parser, struct ucl_include_params *params) { bool res; struct ucl_chunk *chunk; unsigned char *buf = NULL; - char *old_curfile; - size_t buflen; + char *old_curfile, *ext; + size_t buflen = 0; char filebuf[PATH_MAX], realbuf[PATH_MAX]; int prev_state; struct ucl_variable *cur_var, *tmp_var, *old_curdir = NULL, *old_filename = NULL; + ucl_object_t *nest_obj = NULL, *old_obj = NULL, *new_obj = NULL; + ucl_hash_t *container = NULL; + struct ucl_stack *st = NULL; snprintf (filebuf, sizeof (filebuf), "%.*s", (int)len, data); if (ucl_realpath (filebuf, realbuf) == NULL) { - if (!must_exist) { + if (params->soft_fail) { + return false; + } + if (!params->must_exist) { return true; } ucl_create_err (&parser->err, "cannot open file %s: %s", @@ -847,16 +939,23 @@ ucl_include_file_single (const unsigned char *data, size_t len, if (parser->cur_file && strcmp (realbuf, parser->cur_file) == 0) { /* We are likely including the file itself */ + if (params->soft_fail) { + return false; + } + ucl_create_err (&parser->err, "trying to include the file %s from itself", realbuf); return false; } - if (!ucl_fetch_file (realbuf, &buf, &buflen, &parser->err, must_exist)) { - return (!must_exist || false); + if (!ucl_fetch_file (realbuf, &buf, &buflen, &parser->err, params->must_exist)) { + if (params->soft_fail) { + return false; + } + return (!params->must_exist || false); } - if (check_signature) { + if (params->check_signature) { #if (defined(HAVE_OPENSSL) && OPENSSL_VERSION_NUMBER >= 0x10000000L) unsigned char *sigbuf = NULL; size_t siglen = 0; @@ -900,14 +999,128 @@ ucl_include_file_single (const unsigned char *data, size_t len, prev_state = parser->state; parser->state = UCL_STATE_INIT; - res = ucl_parser_add_chunk_priority (parser, buf, buflen, priority); - if (!res && !must_exist) { + if (params->use_prefix && params->prefix == NULL) { + /* Auto generate a key name based on the included filename */ + params->prefix = basename (realbuf); + ext = strrchr (params->prefix, '.'); + if (ext != NULL && (strcmp (ext, ".conf") == 0 || strcmp (ext, ".ucl") == 0)) { + /* Strip off .conf or .ucl */ + *ext = '\0'; + } + } + if (params->prefix != NULL) { + /* This is a prefixed include */ + container = parser->stack->obj->value.ov; + + old_obj = __DECONST (ucl_object_t *, ucl_hash_search (container, + params->prefix, strlen (params->prefix))); + + if (strcasecmp (params->target, "array") == 0 && old_obj == NULL) { + /* Create an array with key: prefix */ + old_obj = ucl_object_new_full (UCL_ARRAY, params->priority); + old_obj->key = params->prefix; + old_obj->keylen = strlen (params->prefix); + ucl_copy_key_trash(old_obj); + old_obj->prev = old_obj; + old_obj->next = NULL; + + container = ucl_hash_insert_object (container, old_obj, + parser->flags & UCL_PARSER_KEY_LOWERCASE); + parser->stack->obj->len ++; + + nest_obj = ucl_object_new_full (UCL_OBJECT, params->priority); + nest_obj->prev = nest_obj; + nest_obj->next = NULL; + + ucl_array_append (old_obj, nest_obj); + } + else if (old_obj == NULL) { + /* Create an object with key: prefix */ + nest_obj = ucl_object_new_full (UCL_OBJECT, params->priority); + nest_obj->key = params->prefix; + nest_obj->keylen = strlen (params->prefix); + ucl_copy_key_trash(nest_obj); + nest_obj->prev = nest_obj; + nest_obj->next = NULL; + + container = ucl_hash_insert_object (container, nest_obj, + parser->flags & UCL_PARSER_KEY_LOWERCASE); + parser->stack->obj->len ++; + } + else if (strcasecmp (params->target, "array") == 0 || + ucl_object_type(old_obj) == UCL_ARRAY) { + if (ucl_object_type(old_obj) == UCL_ARRAY) { + /* Append to the existing array */ + nest_obj = ucl_object_new_full (UCL_OBJECT, params->priority); + nest_obj->prev = nest_obj; + nest_obj->next = NULL; + + ucl_array_append (old_obj, nest_obj); + } + else { + /* Convert the object to an array */ + new_obj = ucl_object_typed_new (UCL_ARRAY); + new_obj->key = old_obj->key; + new_obj->keylen = old_obj->keylen; + new_obj->flags |= UCL_OBJECT_MULTIVALUE; + new_obj->prev = new_obj; + new_obj->next = NULL; + + nest_obj = ucl_object_new_full (UCL_OBJECT, params->priority); + nest_obj->prev = nest_obj; + nest_obj->next = NULL; + + ucl_array_append (new_obj, old_obj); + ucl_array_append (new_obj, nest_obj); + ucl_hash_replace (container, old_obj, new_obj); + } + } + else { + if (ucl_object_type (old_obj) == UCL_OBJECT) { + /* Append to existing Object*/ + nest_obj = old_obj; + } + else { + /* The key is not an object */ + ucl_create_err (&parser->err, + "Conflicting type for key: %s", + params->prefix); + return false; + } + } + + /* Put all of the content of the include inside that object */ + parser->stack->obj->value.ov = container; + + if (nest_obj != NULL) { + st = UCL_ALLOC (sizeof (struct ucl_stack)); + if (st == NULL) { + ucl_create_err (&parser->err, "cannot allocate memory for an object"); + ucl_object_unref (nest_obj); + return NULL; + } + st->obj = nest_obj; + st->level = parser->stack->level; + LL_PREPEND (parser->stack, st); + parser->cur_obj = nest_obj; + } + } + + res = ucl_parser_add_chunk_full (parser, buf, buflen, params->priority, + params->strat, params->parse_type); + if (!res && !params->must_exist) { /* Free error */ utstring_free (parser->err); parser->err = NULL; parser->state = UCL_STATE_AFTER_VALUE; } + /* Stop nesting the include, take 1 level off the stack */ + if (params->prefix != NULL && nest_obj != NULL) { + parser->stack = st->next; + UCL_FREE (sizeof (struct ucl_stack), st); + } + /* Remove chunk from the stack */ chunk = parser->chunks; if (chunk != NULL) { @@ -917,6 +1130,10 @@ ucl_include_file_single (const unsigned char *data, size_t len, } /* Restore old file vars */ + if (parser->cur_file) { + free (parser->cur_file); + } + parser->cur_file = old_curfile; DL_FOREACH_SAFE (parser->variables, cur_var, tmp_var) { if (strcmp (cur_var->var, "CURDIR") == 0 && old_curdir) { @@ -938,9 +1155,6 @@ ucl_include_file_single (const unsigned char *data, size_t len, if (old_curdir) { DL_APPEND (parser->variables, old_curdir); } - if (old_curfile) { - free (old_curfile); - } parser->state = prev_state; @@ -961,8 +1175,7 @@ ucl_include_file_single (const unsigned char *data, size_t len, */ static bool ucl_include_file (const unsigned char *data, size_t len, - struct ucl_parser *parser, bool check_signature, bool must_exist, - bool allow_glob, unsigned priority) + struct ucl_parser *parser, struct ucl_include_params *params) { const unsigned char *p = data, *end = data + len; bool need_glob = false; @@ -971,9 +1184,8 @@ ucl_include_file (const unsigned char *data, size_t len, size_t i; #ifndef _WIN32 - if (!allow_glob) { - return ucl_include_file_single (data, len, parser, check_signature, - must_exist, priority); + if (!params->allow_glob) { + return ucl_include_file_single (data, len, parser, params); } else { /* Check for special symbols in a filename */ @@ -987,14 +1199,17 @@ ucl_include_file (const unsigned char *data, size_t len, if (need_glob) { glob_t globbuf; memset (&globbuf, 0, sizeof (globbuf)); - ucl_strlcpy (glob_pattern, (const char *)data, sizeof (glob_pattern)); + ucl_strlcpy (glob_pattern, (const char *)data, + (len + 1 < sizeof (glob_pattern) ? len + 1 : sizeof (glob_pattern))); if (glob (glob_pattern, 0, NULL, &globbuf) != 0) { - return (!must_exist || false); + return (!params->must_exist || false); } for (i = 0; i < globbuf.gl_pathc; i ++) { if (!ucl_include_file_single ((unsigned char *)globbuf.gl_pathv[i], - strlen (globbuf.gl_pathv[i]), parser, check_signature, - must_exist, priority)) { + strlen (globbuf.gl_pathv[i]), parser, params)) { + if (params->soft_fail) { + continue; + } globfree (&globbuf); return false; } @@ -1002,22 +1217,20 @@ ucl_include_file (const unsigned char *data, size_t len, } globfree (&globbuf); - if (cnt == 0 && must_exist) { + if (cnt == 0 && params->must_exist) { ucl_create_err (&parser->err, "cannot match any files for pattern %s", glob_pattern); return false; } } else { - return ucl_include_file_single (data, len, parser, check_signature, - must_exist, priority); + return ucl_include_file_single (data, len, parser, params); } } #else /* Win32 compilers do not support globbing. Therefore, for Win32, treat allow_glob/need_glob as a NOOP and just return */ - return ucl_include_file_single (data, len, parser, check_signature, - must_exist, priority); + return ucl_include_file_single (data, len, parser, params); #endif return true; @@ -1039,52 +1252,123 @@ ucl_include_common (const unsigned char *data, size_t len, bool default_try, bool default_sign) { - bool try_load, allow_glob, allow_url, need_sign; - unsigned priority; + bool allow_url, search; + const char *duplicate; const ucl_object_t *param; - ucl_object_iter_t it = NULL; + ucl_object_iter_t it = NULL, ip = NULL; + char ipath[PATH_MAX]; + struct ucl_include_params params; /* Default values */ - try_load = default_try; - allow_glob = false; - allow_url = true; - need_sign = default_sign; - priority = 0; + params.soft_fail = default_try; + params.allow_glob = false; + params.check_signature = default_sign; + params.use_prefix = false; + params.target = "object"; + params.prefix = NULL; + params.priority = 0; + params.parse_type = UCL_PARSE_UCL; + params.strat = UCL_DUPLICATE_APPEND; + params.must_exist = !default_try; + + search = false; /* Process arguments */ if (args != NULL && args->type == UCL_OBJECT) { while ((param = ucl_iterate_object (args, &it, true)) != NULL) { if (param->type == UCL_BOOLEAN) { - if (strcmp (param->key, "try") == 0) { - try_load = ucl_object_toboolean (param); + if (strncmp (param->key, "try", param->keylen) == 0) { + params.must_exist = !ucl_object_toboolean (param); + } + else if (strncmp (param->key, "sign", param->keylen) == 0) { + params.check_signature = ucl_object_toboolean (param); + } + else if (strncmp (param->key, "glob", param->keylen) == 0) { + params.allow_glob = ucl_object_toboolean (param); + } + else if (strncmp (param->key, "url", param->keylen) == 0) { + allow_url = ucl_object_toboolean (param); + } + else if (strncmp (param->key, "prefix", param->keylen) == 0) { + params.use_prefix = ucl_object_toboolean (param); + } + } + else if (param->type == UCL_STRING) { + if (strncmp (param->key, "key", param->keylen) == 0) { + params.prefix = ucl_object_tostring (param); } - else if (strcmp (param->key, "sign") == 0) { - need_sign = ucl_object_toboolean (param); + else if (strncmp (param->key, "target", param->keylen) == 0) { + params.target = ucl_object_tostring (param); } - else if (strcmp (param->key, "glob") == 0) { - allow_glob = ucl_object_toboolean (param); + else if (strncmp (param->key, "duplicate", param->keylen) == 0) { + duplicate = ucl_object_tostring (param); + + if (strcmp (duplicate, "append") == 0) { + params.strat = UCL_DUPLICATE_APPEND; + } + else if (strcmp (duplicate, "merge") == 0) { + params.strat = UCL_DUPLICATE_MERGE; + } + else if (strcmp (duplicate, "rewrite") == 0) { + params.strat = UCL_DUPLICATE_REWRITE; + } + else if (strcmp (duplicate, "error") == 0) { + params.strat = UCL_DUPLICATE_ERROR; + } } - else if (strcmp (param->key, "url") == 0) { - allow_url = ucl_object_toboolean (param); + } + else if (param->type == UCL_ARRAY) { + if (strncmp (param->key, "path", param->keylen) == 0) { + ucl_set_include_path (parser, __DECONST(ucl_object_t *, param)); } } else if (param->type == UCL_INT) { - if (strcmp (param->key, "priority") == 0) { - priority = ucl_object_toint (param); + if (strncmp (param->key, "priority", param->keylen) == 0) { + params.priority = ucl_object_toint (param); } } } } - if (*data == '/' || *data == '.') { - /* Try to load a file */ - return ucl_include_file (data, len, parser, need_sign, !try_load, - allow_glob, priority); + if (parser->includepaths == NULL) { + if (allow_url && ucl_strnstr (data, "://", len) != NULL) { + /* Globbing is not used for URL's */ + return ucl_include_url (data, len, parser, ¶ms); + } + else if (data != NULL) { + /* Try to load a file */ + return ucl_include_file (data, len, parser, ¶ms); + } } - else if (allow_url) { - /* Globbing is not used for URL's */ - return ucl_include_url (data, len, parser, need_sign, !try_load, - priority); + else { + if (allow_url && ucl_strnstr (data, "://", len) != NULL) { + /* Globbing is not used for URL's */ + return ucl_include_url (data, len, parser, ¶ms); + } + + ip = ucl_object_iterate_new (parser->includepaths); + while ((param = ucl_object_iterate_safe (ip, true)) != NULL) { + if (ucl_object_type(param) == UCL_STRING) { + snprintf (ipath, sizeof (ipath), "%s/%.*s", ucl_object_tostring(param), + (int)len, data); + if ((search = ucl_include_file (ipath, strlen (ipath), + parser, ¶ms))) { + if (!params.allow_glob) { + break; + } + } + } + } + ucl_object_iterate_free (ip); + if (search == true) { + return true; + } + else { + ucl_create_err (&parser->err, + "cannot find file: %.*s in search path", + (int)len, data); + return false; + } } return false; @@ -1094,11 +1378,11 @@ ucl_include_common (const unsigned char *data, size_t len, * Handle include macro * @param data include data * @param len length of data + * @param args UCL object representing arguments to the macro * @param ud user data - * @param err error ptr * @return */ -UCL_EXTERN bool +bool ucl_include_handler (const unsigned char *data, size_t len, const ucl_object_t *args, void* ud) { @@ -1111,11 +1395,11 @@ ucl_include_handler (const unsigned char *data, size_t len, * Handle includes macro * @param data include data * @param len length of data + * @param args UCL object representing arguments to the macro * @param ud user data - * @param err error ptr * @return */ -UCL_EXTERN bool +bool ucl_includes_handler (const unsigned char *data, size_t len, const ucl_object_t *args, void* ud) { @@ -1124,8 +1408,15 @@ ucl_includes_handler (const unsigned char *data, size_t len, return ucl_include_common (data, len, args, parser, false, true); } - -UCL_EXTERN bool +/** + * Handle tryinclude macro + * @param data include data + * @param len length of data + * @param args UCL object representing arguments to the macro + * @param ud user data + * @return + */ +bool ucl_try_include_handler (const unsigned char *data, size_t len, const ucl_object_t *args, void* ud) { @@ -1134,7 +1425,251 @@ ucl_try_include_handler (const unsigned char *data, size_t len, return ucl_include_common (data, len, args, parser, true, false); } -UCL_EXTERN bool +/** + * Handle priority macro + * @param data include data + * @param len length of data + * @param args UCL object representing arguments to the macro + * @param ud user data + * @return + */ +bool +ucl_priority_handler (const unsigned char *data, size_t len, + const ucl_object_t *args, void* ud) +{ + struct ucl_parser *parser = ud; + unsigned priority = 255; + const ucl_object_t *param; + bool found = false; + char *value = NULL, *leftover = NULL; + ucl_object_iter_t it = NULL; + + if (parser == NULL) { + return false; + } + + /* Process arguments */ + if (args != NULL && args->type == UCL_OBJECT) { + while ((param = ucl_iterate_object (args, &it, true)) != NULL) { + if (param->type == UCL_INT) { + if (strncmp (param->key, "priority", param->keylen) == 0) { + priority = ucl_object_toint (param); + found = true; + } + } + } + } + + if (len > 0) { + value = malloc(len + 1); + ucl_strlcpy(value, (const char *)data, len + 1); + priority = strtol(value, &leftover, 10); + if (*leftover != '\0') { + ucl_create_err (&parser->err, "Invalid priority value in macro: %s", + value); + free(value); + return false; + } + free(value); + found = true; + } + + if (found == true) { + parser->chunks->priority = priority; + return true; + } + + ucl_create_err (&parser->err, "Unable to parse priority macro"); + return false; +} + +/** + * Handle load macro + * @param data include data + * @param len length of data + * @param args UCL object representing arguments to the macro + * @param ud user data + * @return + */ +bool +ucl_load_handler (const unsigned char *data, size_t len, + const ucl_object_t *args, void* ud) +{ + struct ucl_parser *parser = ud; + const ucl_object_t *param; + ucl_object_t *obj, *old_obj; + ucl_object_iter_t it = NULL; + bool try_load, multiline, test; + const char *target, *prefix; + char *load_file, *tmp; + unsigned char *buf; + size_t buflen; + unsigned priority; + int64_t iv; + ucl_hash_t *container = NULL; + enum ucl_string_flags flags; + + /* Default values */ + try_load = false; + multiline = false; + test = false; + target = "string"; + prefix = NULL; + load_file = NULL; + buf = NULL; + buflen = 0; + priority = 0; + obj = NULL; + old_obj = NULL; + flags = 0; + + if (parser == NULL) { + return false; + } + + /* Process arguments */ + if (args != NULL && args->type == UCL_OBJECT) { + while ((param = ucl_iterate_object (args, &it, true)) != NULL) { + if (param->type == UCL_BOOLEAN) { + if (strncmp (param->key, "try", param->keylen) == 0) { + try_load = ucl_object_toboolean (param); + } + else if (strncmp (param->key, "multiline", param->keylen) == 0) { + multiline = ucl_object_toboolean (param); + } + else if (strncmp (param->key, "escape", param->keylen) == 0) { + test = ucl_object_toboolean (param); + if (test) { + flags |= UCL_STRING_ESCAPE; + } + } + else if (strncmp (param->key, "trim", param->keylen) == 0) { + test = ucl_object_toboolean (param); + if (test) { + flags |= UCL_STRING_TRIM; + } + } + } + else if (param->type == UCL_STRING) { + if (strncmp (param->key, "key", param->keylen) == 0) { + prefix = ucl_object_tostring (param); + } + else if (strncmp (param->key, "target", param->keylen) == 0) { + target = ucl_object_tostring (param); + } + } + else if (param->type == UCL_INT) { + if (strncmp (param->key, "priority", param->keylen) == 0) { + priority = ucl_object_toint (param); + } + } + } + } + + if (prefix == NULL || strlen(prefix) == 0) { + ucl_create_err (&parser->err, "No Key specified in load macro"); + return false; + } + + if (len > 0) { + asprintf (&load_file, "%.*s", (int)len, data); + if (!ucl_fetch_file (load_file, &buf, &buflen, &parser->err, !try_load)) { + return (try_load || false); + } + + container = parser->stack->obj->value.ov; + old_obj = __DECONST (ucl_object_t *, ucl_hash_search (container, prefix, strlen (prefix))); + if (old_obj != NULL) { + ucl_create_err (&parser->err, "Key %s already exists", prefix); + return false; + } + + if (strcasecmp (target, "string") == 0) { + obj = ucl_object_fromstring_common (buf, buflen, flags); + ucl_copy_value_trash (obj); + if (multiline) { + obj->flags |= UCL_OBJECT_MULTILINE; + } + } + else if (strcasecmp (target, "int") == 0) { + asprintf(&tmp, "%.*s", (int)buflen, buf); + iv = strtoll(tmp, NULL, 10); + obj = ucl_object_fromint(iv); + } + + if (buflen > 0) { + ucl_munmap (buf, buflen); + } + + if (obj != NULL) { + obj->key = prefix; + obj->keylen = strlen (prefix); + ucl_copy_key_trash(obj); + obj->prev = obj; + obj->next = NULL; + ucl_object_set_priority (obj, priority); + container = ucl_hash_insert_object (container, obj, + parser->flags & UCL_PARSER_KEY_LOWERCASE); + parser->stack->obj->value.ov = container; + } + return true; + } + + ucl_create_err (&parser->err, "Unable to parse load macro"); + return false; +} + +bool +ucl_inherit_handler (const unsigned char *data, size_t len, + const ucl_object_t *args, const ucl_object_t *ctx, void* ud) +{ + const ucl_object_t *parent, *cur; + ucl_object_t *target, *copy; + ucl_object_iter_t it = NULL; + bool replace = false; + struct ucl_parser *parser = ud; + + parent = ucl_object_find_keyl (ctx, data, len); + + /* Some sanity checks */ + if (parent == NULL || ucl_object_type (parent) != UCL_OBJECT) { + ucl_create_err (&parser->err, "Unable to find inherited object %*.s", + (int)len, data); + return false; + } + + if (parser->stack == NULL || parser->stack->obj == NULL || + ucl_object_type (parser->stack->obj) != UCL_OBJECT) { + ucl_create_err (&parser->err, "Invalid inherit context"); + return false; + } + + target = parser->stack->obj; + + if (args && (cur = ucl_object_find_key (args, "replace")) != NULL) { + replace = ucl_object_toboolean (cur); + } + + while ((cur = ucl_iterate_object (parent, &it, true))) { + /* We do not replace existing keys */ + if (!replace && ucl_object_find_keyl (target, cur->key, cur->keylen)) { + continue; + } + + copy = ucl_object_copy (cur); + + if (!replace) { + copy->flags |= UCL_OBJECT_INHERITED; + } + + ucl_object_insert_key (target, copy, copy->key, + copy->keylen, false); + } + + return true; +} + +bool ucl_parser_set_filevars (struct ucl_parser *parser, const char *filename, bool need_expand) { char realbuf[PATH_MAX], *curdir; @@ -1164,8 +1699,9 @@ ucl_parser_set_filevars (struct ucl_parser *parser, const char *filename, bool n return true; } -UCL_EXTERN bool -ucl_parser_add_file (struct ucl_parser *parser, const char *filename) +bool +ucl_parser_add_file_priority (struct ucl_parser *parser, const char *filename, + unsigned priority) { unsigned char *buf; size_t len; @@ -1188,7 +1724,7 @@ ucl_parser_add_file (struct ucl_parser *parser, const char *filename) } parser->cur_file = strdup (realbuf); ucl_parser_set_filevars (parser, realbuf, false); - ret = ucl_parser_add_chunk (parser, buf, len); + ret = ucl_parser_add_chunk_priority (parser, buf, len, priority); if (len > 0) { ucl_munmap (buf, len); @@ -1197,8 +1733,20 @@ ucl_parser_add_file (struct ucl_parser *parser, const char *filename) return ret; } -UCL_EXTERN bool -ucl_parser_add_fd (struct ucl_parser *parser, int fd) +bool +ucl_parser_add_file (struct ucl_parser *parser, const char *filename) +{ + if (parser == NULL) { + return false; + } + + return ucl_parser_add_file_priority(parser, filename, + parser->default_priority); +} + +bool +ucl_parser_add_fd_priority (struct ucl_parser *parser, int fd, + unsigned priority) { unsigned char *buf; size_t len; @@ -1221,7 +1769,7 @@ ucl_parser_add_fd (struct ucl_parser *parser, int fd) } parser->cur_file = NULL; len = st.st_size; - ret = ucl_parser_add_chunk (parser, buf, len); + ret = ucl_parser_add_chunk_priority (parser, buf, len, priority); if (len > 0) { ucl_munmap (buf, len); @@ -1230,6 +1778,16 @@ ucl_parser_add_fd (struct ucl_parser *parser, int fd) return ret; } +bool +ucl_parser_add_fd (struct ucl_parser *parser, int fd) +{ + if (parser == NULL) { + return false; + } + + return ucl_parser_add_fd_priority(parser, fd, parser->default_priority); +} + size_t ucl_strlcpy (char *dst, const char *src, size_t siz) { @@ -1285,6 +1843,51 @@ ucl_strlcpy_tolower (char *dst, const char *src, size_t siz) return (s - src); /* count does not include NUL */ } +/* + * Find the first occurrence of find in s + */ +char * +ucl_strnstr (const char *s, const char *find, int len) +{ + char c, sc; + int mlen; + + if ((c = *find++) != 0) { + mlen = strlen (find); + do { + do { + if ((sc = *s++) == 0 || len-- == 0) + return (NULL); + } while (sc != c); + } while (strncmp (s, find, mlen) != 0); + s--; + } + return ((char *)s); +} + +/* + * Find the first occurrence of find in s, ignore case. + */ +char * +ucl_strncasestr (const char *s, const char *find, int len) +{ + char c, sc; + int mlen; + + if ((c = *find++) != 0) { + c = tolower (c); + mlen = strlen (find); + do { + do { + if ((sc = *s++) == 0 || len-- == 0) + return (NULL); + } while (tolower (sc) != c); + } while (strncasecmp (s, find, mlen) != 0); + s--; + } + return ((char *)s); +} + ucl_object_t * ucl_object_fromstring_common (const char *str, size_t len, enum ucl_string_flags flags) { @@ -1539,7 +2142,7 @@ ucl_object_delete_keyl (ucl_object_t *top, const char *key, size_t keylen) bool ucl_object_delete_key (ucl_object_t *top, const char *key) { - return ucl_object_delete_keyl (top, key, strlen(key)); + return ucl_object_delete_keyl (top, key, strlen (key)); } ucl_object_t* @@ -1564,7 +2167,7 @@ ucl_object_pop_keyl (ucl_object_t *top, const char *key, size_t keylen) ucl_object_t* ucl_object_pop_key (ucl_object_t *top, const char *key) { - return ucl_object_pop_keyl (top, key, strlen(key)); + return ucl_object_pop_keyl (top, key, strlen (key)); } bool @@ -1642,10 +2245,45 @@ ucl_object_find_keyl (const ucl_object_t *obj, const char *key, size_t klen) const ucl_object_t * ucl_object_find_key (const ucl_object_t *obj, const char *key) { - if (key == NULL) + if (key == NULL) { return NULL; + } - return ucl_object_find_keyl (obj, key, strlen(key)); + return ucl_object_find_keyl (obj, key, strlen (key)); +} + +const ucl_object_t* +ucl_object_find_any_key (const ucl_object_t *obj, + const char *key, ...) +{ + va_list ap; + const ucl_object_t *ret = NULL; + const char *nk = NULL; + + if (obj == NULL || key == NULL) { + return NULL; + } + + ret = ucl_object_find_keyl (obj, key, strlen (key)); + + if (ret == NULL) { + va_start (ap, key); + + while (ret == NULL) { + nk = va_arg (ap, const char *); + + if (nk == NULL) { + break; + } + else { + ret = ucl_object_find_keyl (obj, nk, strlen (nk)); + } + } + + va_end (ap); + } + + return ret; } const ucl_object_t* @@ -1792,6 +2430,12 @@ ucl_object_iterate_free (ucl_object_iter_t it) const ucl_object_t * ucl_lookup_path (const ucl_object_t *top, const char *path_in) { + return ucl_lookup_path_char (top, path_in, '.'); +} + + +const ucl_object_t * +ucl_lookup_path_char (const ucl_object_t *top, const char *path_in, const char sep) { const ucl_object_t *o = NULL, *found; const char *p, *c; char *err_str; @@ -1805,20 +2449,20 @@ ucl_lookup_path (const ucl_object_t *top, const char *path_in) { p = path_in; /* Skip leading dots */ - while (*p == '.') { + while (*p == sep) { p ++; } c = p; while (*p != '\0') { p ++; - if (*p == '.' || *p == '\0') { + if (*p == sep || *p == '\0') { if (p > c) { switch (top->type) { case UCL_ARRAY: /* Key should be an int */ index = strtoul (c, &err_str, 10); - if (err_str != NULL && (*err_str != '.' && *err_str != '\0')) { + if (err_str != NULL && (*err_str != sep && *err_str != '\0')) { return NULL; } o = ucl_array_find_index (top, index); @@ -1913,6 +2557,10 @@ ucl_object_new_userdata (ucl_userdata_dtor dtor, ucl_userdata_emitter emitter) ucl_type_t ucl_object_type (const ucl_object_t *obj) { + if (obj == NULL) { + return UCL_NULL; + } + return obj->type; } @@ -1981,6 +2629,11 @@ ucl_array_append (ucl_object_t *top, ucl_object_t *elt) if (vec == NULL) { vec = UCL_ALLOC (sizeof (*vec)); + + if (vec == NULL) { + return false; + } + kv_init (*vec); top->value.av = (void *)vec; } @@ -2021,14 +2674,23 @@ bool ucl_array_merge (ucl_object_t *top, ucl_object_t *elt, bool copy) { unsigned i; + ucl_object_t *cp = NULL; ucl_object_t **obj; - UCL_ARRAY_GET (v1, top); - UCL_ARRAY_GET (v2, elt); if (elt == NULL || top == NULL || top->type != UCL_ARRAY || elt->type != UCL_ARRAY) { return false; } + if (copy) { + cp = ucl_object_copy (elt); + } + else { + cp = ucl_object_ref (elt); + } + + UCL_ARRAY_GET (v1, top); + UCL_ARRAY_GET (v2, cp); + kv_concat (ucl_object_t *, *v1, *v2); for (i = v2->n; i < v1->n; i ++) { @@ -2036,14 +2698,7 @@ ucl_array_merge (ucl_object_t *top, ucl_object_t *elt, bool copy) if (*obj == NULL) { continue; } - top->len ++; - if (copy) { - *obj = ucl_object_copy (*obj); - } - else { - ucl_object_ref (*obj); - } } return true; @@ -2056,6 +2711,10 @@ ucl_array_delete (ucl_object_t *top, ucl_object_t *elt) ucl_object_t *ret = NULL; unsigned i; + if (vec == NULL) { + return NULL; + } + for (i = 0; i < vec->n; i ++) { if (kv_A (*vec, i) == elt) { kv_del (ucl_object_t *, *vec, i); @@ -2073,7 +2732,8 @@ ucl_array_head (const ucl_object_t *top) { UCL_ARRAY_GET (vec, top); - if (top == NULL || top->type != UCL_ARRAY || top->value.av == NULL) { + if (vec == NULL || top == NULL || top->type != UCL_ARRAY || + top->value.av == NULL) { return NULL; } @@ -2136,6 +2796,25 @@ ucl_array_find_index (const ucl_object_t *top, unsigned int index) return NULL; } +unsigned int +ucl_array_index_of (ucl_object_t *top, ucl_object_t *elt) +{ + UCL_ARRAY_GET (vec, top); + unsigned i; + + if (vec == NULL) { + return (unsigned int)(-1); + } + + for (i = 0; i < vec->n; i ++) { + if (kv_A (*vec, i) == elt) { + return i; + } + } + + return (unsigned int)(-1); +} + ucl_object_t * ucl_array_replace_index (ucl_object_t *top, ucl_object_t *elt, unsigned int index) @@ -2265,7 +2944,9 @@ ucl_object_tostring_safe (const ucl_object_t *obj, const char **target) switch (obj->type) { case UCL_STRING: - *target = ucl_copy_value_trash (obj); + if (!(obj->flags & UCL_OBJECT_BINARY)) { + *target = ucl_copy_value_trash (obj); + } break; default: return false; @@ -2286,7 +2967,12 @@ ucl_object_tostring (const ucl_object_t *obj) const char * ucl_object_tostring_forced (const ucl_object_t *obj) { - return ucl_copy_value_trash (obj); + /* TODO: For binary strings we might encode string here */ + if (!(obj->flags & UCL_OBJECT_BINARY)) { + return ucl_copy_value_trash (obj); + } + + return NULL; } bool @@ -2533,7 +3219,7 @@ ucl_object_compare (const ucl_object_t *o1, const ucl_object_t *o2) void ucl_object_array_sort (ucl_object_t *ar, - int (*cmp)(const ucl_object_t *o1, const ucl_object_t *o2)) + int (*cmp)(const ucl_object_t **o1, const ucl_object_t **o2)) { UCL_ARRAY_GET (vec, ar); @@ -2563,6 +3249,9 @@ ucl_object_set_priority (ucl_object_t *obj, { if (obj != NULL) { priority &= (0x1 << PRIOBITS) - 1; - obj->flags |= priority << ((sizeof (obj->flags) * NBBY) - PRIOBITS); + priority <<= ((sizeof (obj->flags) * NBBY) - PRIOBITS); + priority |= obj->flags & ((1 << ((sizeof (obj->flags) * NBBY) - + PRIOBITS)) - 1); + obj->flags = priority; } } diff --git a/src/xxhash.c b/src/xxhash.c index fc44633..3473eb4 100644 --- a/src/xxhash.c +++ b/src/xxhash.c @@ -1,6 +1,6 @@ /* xxHash - Fast Hash algorithm -Copyright (C) 2012-2013, Yann Collet. +Copyright (C) 2012-2014, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without @@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash source repository : http://code.google.com/p/xxhash/ +- public discussion board : https://groups.google.com/forum/#!forum/lz4c */ @@ -47,7 +48,7 @@ You can contact the author at : // When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. // This option has a very small performance cost (only measurable on small inputs). // By default, this option is disabled. To enable it, uncomment below define : -//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 +// #define XXH_ACCEPT_NULL_INPUT_POINTER 1 // XXH_FORCE_NATIVE_FORMAT : // By default, xxHash library provides endian-independant Hash values, based on little-endian convention. @@ -58,7 +59,6 @@ You can contact the author at : // This option has no impact on Little_Endian CPU. #define XXH_FORCE_NATIVE_FORMAT 0 - //************************************** // Compiler Specific Options //************************************** @@ -68,28 +68,30 @@ You can contact the author at : #endif #ifdef _MSC_VER // Visual Studio -# define forceinline static __forceinline -#else +# define FORCE_INLINE static __forceinline +#else # ifdef __GNUC__ -# define forceinline static inline __attribute__((always_inline)) +# define FORCE_INLINE static inline __attribute__((always_inline)) # else -# define forceinline static inline +# define FORCE_INLINE static inline # endif #endif - //************************************** // Includes & Memory related functions //************************************** #include "xxhash.h" -// Modify the local functions below should you wish to use some other memory related routines +// Modify the local functions below should you wish to use some other memory routines // for malloc(), free() #include <stdlib.h> -forceinline void* XXH_malloc(size_t s) { return malloc(s); } -forceinline void XXH_free (void* p) { free(p); } +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } // for memcpy() #include <string.h> -forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) +{ + return memcpy(dest,src,size); +} //************************************** @@ -97,17 +99,17 @@ forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return //************************************** #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 # include <stdint.h> - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; #else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef unsigned int U32; +typedef signed int S32; +typedef uint64_t U64; #endif #if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) @@ -124,13 +126,21 @@ forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return # endif #endif -typedef struct _U32_S { U32 v; } _PACKED U32_S; +typedef struct _U32_S +{ + U32 v; +} _PACKED U32_S; +typedef struct _U64_S +{ + U64 v; +} _PACKED U64_S; #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) # pragma pack(pop) #endif #define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) //*************************************** @@ -141,20 +151,37 @@ typedef struct _U32_S { U32 v; } _PACKED U32_S; // Note : although _rotl exists for minGW (GCC under windows), performance seems poor #if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) #else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif #if defined(_MSC_VER) // Visual Studio # define XXH_swap32 _byteswap_ulong -#elif GCC_VERSION >= 403 +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 || defined(__clang__) # define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 #else -static inline U32 XXH_swap32 (U32 x) { +static inline U32 XXH_swap32 (U32 x) +{ return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff );} + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static inline U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} #endif @@ -167,13 +194,18 @@ static inline U32 XXH_swap32 (U32 x) { #define PRIME32_4 668265263U #define PRIME32_5 374761393U +#define PRIME64_1 11400714785074694791ULL +#define PRIME64_2 14029467366897019727ULL +#define PRIME64_3 1609587929392839161ULL +#define PRIME64_4 9650029242287828579ULL +#define PRIME64_5 2870177450012600261ULL //************************************** // Architecture Macros //************************************** typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; #ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch - static const int one = 1; +static const int one = 1; # define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) #endif @@ -189,28 +221,49 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; //**************************** typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) -{ +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); else - return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); + return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr); } -forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); + else + return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} //**************************** // Simple Hash Functions //**************************** -forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + const BYTE* bEnd = p + len; U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } #endif if (len>=16) @@ -223,11 +276,24 @@ forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_end do { - v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + v1 += XXH_get32bits(p) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_get32bits(p) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_get32bits(p) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_get32bits(p) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } @@ -238,9 +304,9 @@ forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_end h32 += (U32) len; - while (p<=bEnd-4) + while (p+4<=bEnd) { - h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; + h32 += XXH_get32bits(p) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; p+=4; } @@ -262,18 +328,19 @@ forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_end } -U32 XXH32(const void* input, int len, U32 seed) +unsigned int XXH32 (const void* input, size_t len, unsigned seed) { #if 0 // Simple version, good for code maintenance, but unfortunately slow for small inputs - void* state = XXH32_init(seed); - XXH32_update(state, input, len); - return XXH32_digest(state); + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, input, len); + return XXH32_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; # if !defined(XXH_USE_UNALIGNED_ACCESS) - if (!(((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage + if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage { if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); @@ -289,12 +356,152 @@ U32 XXH32(const void* input, int len, U32 seed) #endif } +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) -//**************************** -// Advanced Hash Functions -//**************************** +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64 * PRIME64_1 + PRIME64_4; + } + else + { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_get64bits(p); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } -struct XXH_state32_t + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p<bEnd) + { + h64 ^= (*p) * PRIME64_5; + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + p++; + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +uint64_t XXH64 (const void* input, size_t len, uint64_t seed) +{ +#if 0 + // Simple version, good for code maintenance, but unfortunately slow for small inputs + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, input, len); + return XXH64_digest(&state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/**************************************************** + * Advanced Hash Functions +****************************************************/ + +/*** Allocation ***/ +typedef struct { U64 total_len; U32 seed; @@ -302,21 +509,60 @@ struct XXH_state32_t U32 v2; U32 v3; U32 v4; - int memsize; - char memory[16]; -}; + U32 mem32[4]; /* defined as U32 for alignment */ + U32 memsize; +} XXH_istate32_t; + +typedef struct +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + U64 mem64[4]; /* defined as U64 for alignment */ + U32 memsize; +} XXH_istate64_t; + +XXH32_state_t* XXH32_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} -int XXH32_sizeofState(void) +void* XXH32_init (unsigned seed) { - XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state32_t); + XXH32_state_t *st = XXH32_createState(); + XXH32_reset(st, seed); + + return st; } +XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +}; + +XXH64_state_t* XXH64_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +}; -XXH_errorcode XXH32_resetState(void* state_in, U32 seed) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + +/*** Hash feed ***/ + +XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed) +{ + XXH_istate32_t* state = (XXH_istate32_t*) state_in; state->seed = seed; state->v1 = seed + PRIME32_1 + PRIME32_2; state->v2 = seed + PRIME32_2; @@ -327,18 +573,23 @@ XXH_errorcode XXH32_resetState(void* state_in, U32 seed) return XXH_OK; } - -void* XXH32_init (U32 seed) +XXH_errorcode XXH64_reset(XXH64_state_t* state_in, uint64_t seed) { - void* state = XXH_malloc (sizeof(struct XXH_state32_t)); - XXH32_resetState(state, seed); - return state; + XXH_istate64_t* state = (XXH_istate64_t*) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; } -forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + XXH_istate32_t* state = (XXH_istate32_t *) state_in; const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -350,20 +601,32 @@ forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input if (state->memsize + len < 16) // fill in tmp buffer { - XXH_memcpy(state->memory + state->memsize, input, len); - state->memsize += len; + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (U32)len; return XXH_OK; } if (state->memsize) // some data left from previous update { - XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { - const U32* p32 = (const U32*)state->memory; - state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; - state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; - state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + const U32* p32 = state->mem32; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v1 = XXH_rotl32(state->v1, 13); + state->v1 *= PRIME32_1; + p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v2 = XXH_rotl32(state->v2, 13); + state->v2 *= PRIME32_1; + p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v3 = XXH_rotl32(state->v3, 13); + state->v3 *= PRIME32_1; + p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v4 = XXH_rotl32(state->v4, 13); + state->v4 *= PRIME32_1; + p32++; } p += 16-state->memsize; state->memsize = 0; @@ -379,11 +642,24 @@ forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input do { - v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + v1 += XXH_readLE32(p, endian) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_readLE32(p, endian) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_readLE32(p, endian) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_readLE32(p, endian) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); state->v1 = v1; state->v2 = v2; @@ -393,17 +669,17 @@ forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input if (p < bEnd) { - XXH_memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->mem32, p, bEnd-p); state->memsize = (int)(bEnd-p); } return XXH_OK; } -XXH_errorcode XXH32_update (void* state_in, const void* input, int len) +XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_update_endian(state_in, input, len, XXH_littleEndian); else @@ -412,11 +688,11 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len) -forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) +FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; + XXH_istate32_t* state = (XXH_istate32_t*) state_in; + const BYTE * p = (const BYTE*)state->mem32; + BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->total_len >= 16) @@ -430,9 +706,9 @@ forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess e h32 += (U32) state->total_len; - while (p<=bEnd-4) + while (p+4<=bEnd) { - h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 += XXH_readLE32(p, endian) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4; p+=4; } @@ -449,27 +725,217 @@ forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess e h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; - +#if 0 + XXH32_freeState((XXH32_state_t *)state_in); +#endif return h32; } -U32 XXH32_intermediateDigest (void* state_in) +U32 XXH32_digest (const XXH32_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + return XXH32_digest_endian(state_in, XXH_littleEndian); else - return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); + return XXH32_digest_endian(state_in, XXH_bigEndian); } -U32 XXH32_digest (void* state_in) +FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian) { - U32 h32 = XXH32_intermediateDigest(state_in); + XXH_istate64_t * state = (XXH_istate64_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif - XXH_free(state_in); + state->total_len += len; - return h32; + if (state->memsize + len < 32) // fill in tmp buffer + { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + { + const U64* p64 = state->mem64; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + p64++; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_readLE64(p, endian) * PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + p+=8; + v2 += XXH_readLE64(p, endian) * PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + p+=8; + v3 += XXH_readLE64(p, endian) * PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + p+=8; + v4 += XXH_readLE64(p, endian) * PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + p+=8; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->mem64, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) +{ + XXH_istate64_t * state = (XXH_istate64_t *) state_in; + const BYTE * p = (const BYTE*)state->mem64; + BYTE* bEnd = (BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64*PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64*PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64*PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64*PRIME64_1 + PRIME64_4; + } + else + { + h64 = state->seed + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_readLE64(p, endian); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p<bEnd) + { + h64 ^= (*p) * PRIME64_5; + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + p++; + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; +#if 0 + XXH64_freeState((XXH64_state_t *)state_in); +#endif + return h64; +} + + +uint64_t XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); } + + diff --git a/src/xxhash.h b/src/xxhash.h index b892388..49b0239 100644 --- a/src/xxhash.h +++ b/src/xxhash.h @@ -1,20 +1,20 @@ /* - xxHash - Fast Hash algorithm + xxHash - Extremely Fast Hash algorithm Header File - Copyright (C) 2012-2013, Yann Collet. + Copyright (C) 2012-2014, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -52,113 +52,114 @@ CRC32 0.43 GB/s 9 MD5-32 0.33 GB/s 10 Ronald L. Rivest SHA1-32 0.28 GB/s 10 -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. 10 is a perfect score. */ - -#pragma once +#ifndef LIBUCL_XXHASH_H +#define LIBUCL_XXHASH_H #if defined (__cplusplus) extern "C" { #endif -//**************************** -// Type -//**************************** +/***************************** + Includes +*****************************/ +#include <stddef.h> /* size_t */ +#include <stdint.h> + + +/***************************** + Type +*****************************/ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; -//**************************** -// Simple Hash Functions -//**************************** +/***************************** + Simple Hash Functions +*****************************/ -unsigned int XXH32 (const void* input, int len, unsigned int seed); +unsigned int XXH32 (const void* input, size_t length, unsigned seed); +uint64_t XXH64 (const void* input, size_t length, uint64_t seed); /* XXH32() : - Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". - The memory between input & input+len must be valid (allocated and read-accessible). + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. This function successfully passes all SMHasher tests. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s - Note that "len" is type "int", which means it is limited to 2^31-1. - If your data is larger, use the advanced functions below. +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". */ -//**************************** -// Advanced Hash Functions -//**************************** - -void* XXH32_init (unsigned int seed); -XXH_errorcode XXH32_update (void* state, const void* input, int len); -unsigned int XXH32_digest (void* state); +/***************************** + Advanced Hash Functions +*****************************/ +typedef struct { int64_t ll[ 6]; } XXH32_state_t; +typedef struct { int64_t ll[11]; } XXH64_state_t; /* -These functions calculate the xxhash of an input provided in several small packets, -as opposed to an input provided as a single block. - -It must be started with : -void* XXH32_init() -The function returns a pointer which holds the state of calculation. +These structures allow static allocation of XXH states. +States must then be initialized using XXHnn_reset() before first use. -This pointer must be provided as "void* state" parameter for XXH32_update(). -XXH32_update() can be called as many times as necessary. -The user must provide a valid (allocated) input. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -Note that "len" is type "int", which means it is limited to 2^31-1. -If your data is larger, it is recommended to chunk your data into blocks -of size for example 2^30 (1GB) to avoid any "int" overflow issue. - -Finally, you can end the calculation anytime, by using XXH32_digest(). -This function returns the final 32-bits hash. -You must provide the same "void* state" parameter created by XXH32_init(). -Memory will be freed by XXH32_digest(). +If you prefer dynamic allocation, please refer to functions below. */ +/* + * !!! + * Rspamd specific: we use the legacy method to free state when digest is obtained + * !!! + */ +void * XXH32_init (unsigned seed); +XXH32_state_t* XXH32_createState(void); +XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); -int XXH32_sizeofState(void); -XXH_errorcode XXH32_resetState(void* state, unsigned int seed); +XXH64_state_t* XXH64_createState(void); +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); -#define XXH32_SIZEOFSTATE 48 -typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; /* -These functions allow user application to make its own allocation for state. +These functions create and release memory for XXH state. +States must then be initialized using XXHnn_reset() before first use. +*/ -XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state. -Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. -This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state. -For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), -use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. -*/ +XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed); +XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +unsigned int XXH32_digest (const XXH32_state_t* statePtr); +XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, uint64_t seed); +XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +uint64_t XXH64_digest (const XXH64_state_t* statePtr); -unsigned int XXH32_intermediateDigest (void* state); /* -This function does the same as XXH32_digest(), generating a 32-bit hash, -but preserve memory context. -This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update(). -To free memory context, use XXH32_digest(), or free(). -*/ +These functions calculate the xxHash of an input provided in multiple smaller packets, +as opposed to an input provided as a single block. +XXH state space must first be allocated, using either static or dynamic method provided above. +Start a new hash by initializing state with a seed, using XXHnn_reset(). -//**************************** -// Deprecated function names -//**************************** -// The following translations are provided to ease code transition -// You are encouraged to no longer this function names -#define XXH32_feed XXH32_update -#define XXH32_result XXH32_digest -#define XXH32_getIntermediateResult XXH32_intermediateDigest +Then, feed the hash state by calling XXHnn_update() as many times as necessary. +Obviously, input must be valid, meaning allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. +Finally, you can produce a hash anytime, by using XXHnn_digest(). +This function returns the final nn-bits hash. +You can nonetheless continue feeding the hash state with more input, +and therefore get some new hashes, by calling again XXHnn_digest(). + +When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). +*/ #if defined (__cplusplus) } #endif + +#endif diff --git a/tests/Makefile.am b/tests/Makefile.am index 5b17e1f..055eb8b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,10 +1,12 @@ -EXTRA_DIST = $(TESTS) basic schema generate.res streamline.res rcl_test.json.xz +EXTRA_DIST = $(TESTS) basic schema generate.res \ + streamline.res rcl_test.json.xz TESTS = basic.test \ generate.test \ schema.test \ + msgpack.test \ speed.test \ - streamline.test + msgpack.test TESTS_ENVIRONMENT = $(SH) \ TEST_DIR=$(top_srcdir)/tests \ TEST_OUT_DIR=$(top_builddir)/tests \ @@ -35,4 +37,9 @@ test_streamline_SOURCES = test_streamline.c test_streamline_LDADD = $(common_test_ldadd) test_streamline_CFLAGS = $(common_test_cflags) -check_PROGRAMS = test_basic test_speed test_generate test_schema test_streamline
\ No newline at end of file +test_msgpack_SOURCES = test_msgpack.c +test_msgpack_LDADD = $(common_test_ldadd) +test_msgpack_CFLAGS = $(common_test_cflags) + +check_PROGRAMS = test_basic test_speed test_generate test_schema test_streamline \ + test_msgpack
\ No newline at end of file diff --git a/tests/basic/13.in b/tests/basic/13.in index 81f03a6..6e31e9c 100644 --- a/tests/basic/13.in +++ b/tests/basic/13.in @@ -1,7 +1,7 @@ key = value_orig; # test glob -.include(glob=true) "${CURDIR}/include_dir/test*.conf" +.include(glob=true,something="test") "${CURDIR}/include_dir/test*.conf" .include(priority=1) "${CURDIR}/include_dir/pri1.conf" .include(priority=2) "${CURDIR}/include_dir/pri2.conf" diff --git a/tests/basic/15.in b/tests/basic/15.in new file mode 100644 index 0000000..1329b8c --- /dev/null +++ b/tests/basic/15.in @@ -0,0 +1,24 @@ +# In this test we test include override bug + +.include(priority = 1) "${CURDIR}/15.inc" + +section = { + value = "test"; +} + +overrided = { + value = "not-to-be-shown"; +} + +/* + BUGGED UCL: + overrided { + key = "overrided"; + } + !!! So overrided has actually rewritten the previous key + section { + value { + value = "not-to-be-shown"; + } + } +*/ diff --git a/tests/basic/15.inc b/tests/basic/15.inc new file mode 100644 index 0000000..a9ab027 --- /dev/null +++ b/tests/basic/15.inc @@ -0,0 +1,3 @@ +overrided { + key = "overrided"; +} diff --git a/tests/basic/15.res b/tests/basic/15.res new file mode 100644 index 0000000..b6b4e26 --- /dev/null +++ b/tests/basic/15.res @@ -0,0 +1,7 @@ +overrided { + key = "overrided"; +} +section { + value = "test"; +} + diff --git a/tests/basic/16.in b/tests/basic/16.in new file mode 100644 index 0000000..0712252 --- /dev/null +++ b/tests/basic/16.in @@ -0,0 +1,12 @@ +.include(priority = 1) "${CURDIR}/16.inc" + +section = { + value = "test"; +} + +overrided = { + value = "not-to-be-shown"; +} +overrided = { + value2 = "implicit-array"; +} diff --git a/tests/basic/16.inc b/tests/basic/16.inc new file mode 100644 index 0000000..34b6085 --- /dev/null +++ b/tests/basic/16.inc @@ -0,0 +1,3 @@ +overrided { + key = "overrided"; +} diff --git a/tests/basic/16.res b/tests/basic/16.res new file mode 100644 index 0000000..b6b4e26 --- /dev/null +++ b/tests/basic/16.res @@ -0,0 +1,7 @@ +overrided { + key = "overrided"; +} +section { + value = "test"; +} + diff --git a/tests/basic/17.in b/tests/basic/17.in new file mode 100644 index 0000000..bae66fb --- /dev/null +++ b/tests/basic/17.in @@ -0,0 +1,2 @@ +# issue 74 +string that ends in slash\
\ No newline at end of file diff --git a/tests/basic/17.res b/tests/basic/17.res new file mode 100644 index 0000000..9870b04 --- /dev/null +++ b/tests/basic/17.res @@ -0,0 +1,2 @@ +string = "that ends in slash\\"; + diff --git a/tests/basic/18.in b/tests/basic/18.in new file mode 100644 index 0000000..bd12c01 --- /dev/null +++ b/tests/basic/18.in @@ -0,0 +1,10 @@ +defaults { + key = "val" + foo = "bar" + many = "values here" +} + +mything { + .inherit "defaults" + key = "newval" +} diff --git a/tests/basic/18.res b/tests/basic/18.res new file mode 100644 index 0000000..a6272d5 --- /dev/null +++ b/tests/basic/18.res @@ -0,0 +1,11 @@ +defaults { + key = "val"; + foo = "bar"; + many = "values here"; +} +mything { + key = "newval"; + foo = "bar"; + many = "values here"; +} + diff --git a/tests/basic/19-append.inc b/tests/basic/19-append.inc new file mode 100644 index 0000000..909dfce --- /dev/null +++ b/tests/basic/19-append.inc @@ -0,0 +1,8 @@ +okey_append = { + key = value1; + key1 = value2 +} + +akey_append = ["value3"]; + +skey_append = "value4"; diff --git a/tests/basic/19-merge.inc b/tests/basic/19-merge.inc new file mode 100644 index 0000000..46deccb --- /dev/null +++ b/tests/basic/19-merge.inc @@ -0,0 +1,8 @@ +okey_merge = { + key = value1; + key1 = value2; +} + +akey_merge = ["value3"]; + +skey_merge = "value4"; diff --git a/tests/basic/19-rewrite.inc b/tests/basic/19-rewrite.inc new file mode 100644 index 0000000..5a7094b --- /dev/null +++ b/tests/basic/19-rewrite.inc @@ -0,0 +1,8 @@ +okey_rewrite = { + key = value1; + key1 = value2; +} + +akey_rewrite = ["value3"]; + +skey_rewrite = "value4"; diff --git a/tests/basic/19.in b/tests/basic/19.in new file mode 100644 index 0000000..717aed8 --- /dev/null +++ b/tests/basic/19.in @@ -0,0 +1,28 @@ +okey_append = { + key = value; +} + +akey_append = ["value"]; + +skey_append = "value"; + +okey_merge = { + key = value; + source = original; +} + +akey_merge = ["value"]; + +skey_merge = "value"; + +okey_rewrite = { + key = value; +} + +akey_rewrite = ["value"]; + +skey_rewrite = "value"; + +.include(duplicate="append") "${CURDIR}/19-append.inc" +.include(duplicate="merge") "${CURDIR}/19-merge.inc" +.include(duplicate="rewrite") "${CURDIR}/19-rewrite.inc" diff --git a/tests/basic/19.res b/tests/basic/19.res new file mode 100644 index 0000000..ca7bc41 --- /dev/null +++ b/tests/basic/19.res @@ -0,0 +1,36 @@ +okey_append { + key = "value"; +} +okey_append { + key = "value1"; + key1 = "value2"; +} +akey_append [ + "value", +] +akey_append [ + "value3", +] +skey_append = "value"; +skey_append = "value4"; +okey_merge { + key = "value"; + key = "value1"; + source = "original"; + key1 = "value2"; +} +akey_merge [ + "value", + "value3", +] +skey_merge = "value"; +skey_merge = "value4"; +okey_rewrite { + key = "value1"; + key1 = "value2"; +} +akey_rewrite [ + "value3", +] +skey_rewrite = "value4"; + diff --git a/tests/basic/20.in b/tests/basic/20.in new file mode 100644 index 0000000..f9d4088 --- /dev/null +++ b/tests/basic/20.in @@ -0,0 +1,2 @@ +# issue 112 +[[0
\ No newline at end of file diff --git a/tests/basic/20.res b/tests/basic/20.res new file mode 100644 index 0000000..abfbbf0 --- /dev/null +++ b/tests/basic/20.res @@ -0,0 +1,5 @@ +[ + [ + 0, + ] +] diff --git a/tests/basic/21.in b/tests/basic/21.in new file mode 100644 index 0000000..8f4b328 --- /dev/null +++ b/tests/basic/21.in @@ -0,0 +1,2 @@ + [9 +{0 [[0
\ No newline at end of file diff --git a/tests/basic/21.res b/tests/basic/21.res new file mode 100644 index 0000000..db091ce --- /dev/null +++ b/tests/basic/21.res @@ -0,0 +1,10 @@ +[ + 9, + { + 0 [ + [ + 0, + ] + ] + } +] diff --git a/tests/basic/22.in b/tests/basic/22.in new file mode 100644 index 0000000..244cea0 --- /dev/null +++ b/tests/basic/22.in @@ -0,0 +1,2 @@ +# issue 113 +ÿ=1
\ No newline at end of file diff --git a/tests/basic/22.res b/tests/basic/22.res new file mode 100644 index 0000000..d4bf94d --- /dev/null +++ b/tests/basic/22.res @@ -0,0 +1,2 @@ +ÿ = 1; + diff --git a/tests/msgpack.test b/tests/msgpack.test new file mode 100755 index 0000000..67a3c93 --- /dev/null +++ b/tests/msgpack.test @@ -0,0 +1,3 @@ +#!/bin/sh + +${TEST_BINARY_DIR}/test_msgpack
\ No newline at end of file diff --git a/tests/test_basic.c b/tests/test_basic.c index 5859c0b..45a9c8b 100644 --- a/tests/test_basic.c +++ b/tests/test_basic.c @@ -27,13 +27,14 @@ int main (int argc, char **argv) { - char inbuf[8192], *test_in = NULL; + char *inbuf; struct ucl_parser *parser = NULL, *parser2 = NULL; ucl_object_t *obj; + ssize_t bufsize, r; FILE *in, *out; unsigned char *emitted = NULL; const char *fname_in = NULL, *fname_out = NULL; - int ret = 0, inlen, opt, json = 0, compact = 0, yaml = 0; + int ret = 0, opt, json = 0, compact = 0, yaml = 0; while ((opt = getopt(argc, argv, "jcy")) != -1) { switch (opt) { @@ -82,16 +83,28 @@ main (int argc, char **argv) ucl_parser_set_filevars (parser, fname_in, true); } - while (!feof (in)) { - memset (inbuf, 0, sizeof (inbuf)); - if (fread (inbuf, 1, sizeof (inbuf) - 1, in) == 0) { - break; + inbuf = malloc (BUFSIZ); + bufsize = BUFSIZ; + r = 0; + + while (!feof (in) && !ferror (in)) { + if (r == bufsize) { + inbuf = realloc (inbuf, bufsize * 2); + bufsize *= 2; + if (inbuf == NULL) { + perror ("realloc"); + exit (EXIT_FAILURE); + } } - inlen = strlen (inbuf); - test_in = malloc (inlen); - memcpy (test_in, inbuf, inlen); - ucl_parser_add_chunk (parser, (const unsigned char *)test_in, inlen); + r += fread (inbuf + r, 1, bufsize - r, in); + } + + if (ferror (in)) { + fprintf (stderr, "Failed to read the input file.\n"); + exit (EXIT_FAILURE); } + + ucl_parser_add_chunk (parser, (const unsigned char *)inbuf, r); fclose (in); if (fname_out != NULL) { @@ -103,12 +116,15 @@ main (int argc, char **argv) else { out = stdout; } + if (ucl_parser_get_error (parser) != NULL) { fprintf (out, "Error occurred: %s\n", ucl_parser_get_error(parser)); ret = 1; goto end; } + obj = ucl_parser_get_object (parser); + if (json) { if (compact) { emitted = ucl_object_emit (obj, UCL_EMIT_JSON_COMPACT); @@ -123,6 +139,7 @@ main (int argc, char **argv) else { emitted = ucl_object_emit (obj, UCL_EMIT_CONFIG); } + ucl_parser_free (parser); ucl_object_unref (obj); parser2 = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE); @@ -134,9 +151,11 @@ main (int argc, char **argv) ret = 1; goto end; } + if (emitted != NULL) { free (emitted); } + obj = ucl_parser_get_object (parser2); if (json) { if (compact) { @@ -163,8 +182,8 @@ end: if (parser2 != NULL) { ucl_parser_free (parser2); } - if (test_in != NULL) { - free (test_in); + if (inbuf != NULL) { + free (inbuf); } fclose (out); diff --git a/tests/test_msgpack.c b/tests/test_msgpack.c new file mode 100644 index 0000000..dd5860e --- /dev/null +++ b/tests/test_msgpack.c @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ucl.h" +#include "ucl_internal.h" +#include <ctype.h> + +static const int niter = 1000; +static const int ntests = 100; +static const int nelt = 10; + +static int recursion = 0; + +typedef ucl_object_t* (*ucl_msgpack_test)(void); + +static ucl_object_t* ucl_test_integer (void); +static ucl_object_t* ucl_test_string (void); +static ucl_object_t* ucl_test_boolean (void); +static ucl_object_t* ucl_test_map (void); +static ucl_object_t* ucl_test_array (void); + +ucl_msgpack_test tests[] = { + ucl_test_integer, + ucl_test_string, + ucl_test_boolean, + ucl_test_map, + ucl_test_array, +}; + +#define NTESTS (sizeof(tests) / sizeof(tests[0])) + +typedef struct +{ + uint64_t state; + uint64_t inc; +} pcg32_random_t; + +pcg32_random_t rng; + +/* + * From http://www.pcg-random.org/ + */ +static uint32_t +pcg32_random (void) +{ + uint64_t oldstate = rng.state; + + rng.state = oldstate * 6364136223846793005ULL + (rng.inc | 1); + uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; + uint32_t rot = oldstate >> 59u; + return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); +} + +static const char * +random_key (size_t *lenptr) +{ + static char keybuf[512]; + int keylen, i; + char c; + + keylen = pcg32_random () % (sizeof (keybuf) - 1) + 1; + + for (i = 0; i < keylen; i ++) { + do { + c = pcg32_random () & 0xFF; + } while (!isgraph (c)); + + keybuf[i] = c; + } + + *lenptr = keylen; + return keybuf; +} + +int +main (int argc, char **argv) +{ + int fd, i, j; + uint32_t sel; + ucl_object_t *obj, *elt; + struct ucl_parser *parser; + size_t klen, elen, elen2; + const char *key; + unsigned char *emitted, *emitted2; + FILE *out; + const char *fname_out = NULL; + + switch (argc) { + case 2: + fname_out = argv[1]; + break; + } + + /* Seed prng */ + fd = open ("/dev/urandom", O_RDONLY); + assert (fd != -1); + assert (read (fd, &rng, sizeof (rng)) == sizeof (rng)); + close (fd); + + for (i = 0; i < niter; i ++) { + if (fname_out != NULL) { + out = fopen (fname_out, "w"); + if (out == NULL) { + exit (-errno); + } + } + else { + out = NULL; + } + + /* Generate phase */ + obj = ucl_object_typed_new (UCL_OBJECT); + + for (j = 0; j < ntests; j ++) { + sel = pcg32_random () % NTESTS; + + key = random_key (&klen); + recursion = 0; + elt = tests[sel](); + assert (elt != NULL); + assert (klen != 0); + + ucl_object_insert_key (obj, elt, key, klen, true); + } + + emitted = ucl_object_emit_len (obj, UCL_EMIT_MSGPACK, &elen); + + assert (emitted != NULL); + + if (out) { + fprintf (out, "%*.s\n", (int)elen, emitted); + + fclose (out); + } + ucl_object_unref (obj); + + parser = ucl_parser_new (0); + + if (!ucl_parser_add_chunk_full (parser, emitted, elen, 0, + UCL_DUPLICATE_APPEND, UCL_PARSE_MSGPACK)) { + fprintf (stderr, "error parsing input: %s", + ucl_parser_get_error (parser)); + assert (0); + } + + obj = ucl_parser_get_object (parser); + assert (obj != NULL); + + emitted2 = ucl_object_emit_len (obj, UCL_EMIT_MSGPACK, &elen2); + + assert (emitted2 != NULL); + assert (elen2 == elen); + assert (memcmp (emitted, emitted2, elen) == 0); + + ucl_parser_free (parser); + ucl_object_unref (obj); + free (emitted); + free (emitted2); + } + + return 0; +} + + +static ucl_object_t* +ucl_test_integer (void) +{ + ucl_object_t *res; + int count, i; + uint64_t cur; + + res = ucl_object_typed_new (UCL_ARRAY); + count = pcg32_random () % nelt; + + for (i = 0; i < count; i ++) { + cur = ((uint64_t)pcg32_random ()) << 32 | pcg32_random (); + ucl_array_append (res, ucl_object_fromint (cur % 128)); + cur = ((uint64_t)pcg32_random ()) << 32 | pcg32_random (); + ucl_array_append (res, ucl_object_fromint (-cur % 128)); + cur = ((uint64_t)pcg32_random ()) << 32 | pcg32_random (); + ucl_array_append (res, ucl_object_fromint (cur % 65536)); + cur = ((uint64_t)pcg32_random ()) << 32 | pcg32_random (); + ucl_array_append (res, ucl_object_fromint (cur % INT32_MAX)); + cur = ((uint64_t)pcg32_random ()) << 32 | pcg32_random (); + ucl_array_append (res, ucl_object_fromint (cur)); + } + + return res; +} + +static ucl_object_t* +ucl_test_string (void) +{ + ucl_object_t *res, *elt; + int count, i; + uint32_t cur_len; + char *str; + + res = ucl_object_typed_new (UCL_ARRAY); + count = pcg32_random () % nelt; + + for (i = 0; i < count; i ++) { + while ((cur_len = pcg32_random ()) % 128 == 0); + + str = malloc (cur_len % 128); + ucl_array_append (res, ucl_object_fromstring_common (str, cur_len % 128, + UCL_STRING_RAW)); + free (str); + + while ((cur_len = pcg32_random ()) % 512 == 0); + str = malloc (cur_len % 512); + ucl_array_append (res, ucl_object_fromstring_common (str, cur_len % 512, + UCL_STRING_RAW)); + free (str); + + while ((cur_len = pcg32_random ()) % 128 == 0); + str = malloc (cur_len % 128); + elt = ucl_object_fromstring_common (str, cur_len % 128, + UCL_STRING_RAW); + elt->flags |= UCL_OBJECT_BINARY; + ucl_array_append (res, elt); + free (str); + + while ((cur_len = pcg32_random ()) % 512 == 0); + str = malloc (cur_len % 512); + elt = ucl_object_fromstring_common (str, cur_len % 512, + UCL_STRING_RAW); + elt->flags |= UCL_OBJECT_BINARY; + ucl_array_append (res, elt); + free (str); + } + + return res; +} + +static ucl_object_t* +ucl_test_boolean (void) +{ + ucl_object_t *res; + int count, i; + + res = ucl_object_typed_new (UCL_ARRAY); + count = pcg32_random () % nelt; + + for (i = 0; i < count; i ++) { + ucl_array_append (res, ucl_object_frombool (pcg32_random () % 2)); + } + + return res; +} + +static ucl_object_t* +ucl_test_map (void) +{ + ucl_object_t *res, *cur; + int count, i; + uint32_t cur_len, sel; + size_t klen; + const char *key; + + res = ucl_object_typed_new (UCL_OBJECT); + count = pcg32_random () % nelt; + + recursion ++; + + for (i = 0; i < count; i ++) { + + if (recursion > 10) { + sel = pcg32_random () % (NTESTS - 2); + } + else { + sel = pcg32_random () % NTESTS; + } + + key = random_key (&klen); + cur = tests[sel](); + assert (cur != NULL); + assert (klen != 0); + + ucl_object_insert_key (res, cur, key, klen, true); + + /* Multi value key */ + cur = tests[sel](); + assert (cur != NULL); + + ucl_object_insert_key (res, cur, key, klen, true); + } + + return res; +} + +static ucl_object_t* +ucl_test_array (void) +{ + ucl_object_t *res, *cur; + int count, i; + uint32_t cur_len, sel; + + res = ucl_object_typed_new (UCL_ARRAY); + count = pcg32_random () % nelt; + + recursion ++; + + for (i = 0; i < count; i ++) { + if (recursion > 10) { + sel = pcg32_random () % (NTESTS - 2); + } + else { + sel = pcg32_random () % NTESTS; + } + + cur = tests[sel](); + assert (cur != NULL); + + ucl_array_append (res, cur); + } + + return res; +} diff --git a/tests/test_schema.c b/tests/test_schema.c index 4f075dae..39eb7f3 100644 --- a/tests/test_schema.c +++ b/tests/test_schema.c @@ -40,20 +40,24 @@ read_stdin (char **buf) p = *buf; remain = size; - while ((ret = read (STDIN_FILENO, p, remain)) > 0) { + while ((ret = read (STDIN_FILENO, p, remain - 1)) > 0) { remain -= ret; p += ret; - if (remain == 0) { + + if (remain <= 1) { *buf = realloc (*buf, size * 2); if (*buf == NULL) { return -1; } - p = *buf + size; - remain = size; + + p = *buf + size - 1; + remain = size + 1; size *= 2; } } + *p = '\0'; + return ret; } diff --git a/utils/Makefile.am b/utils/Makefile.am index 61e06a7..41a3930 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -11,8 +11,12 @@ ucl_objdump_SOURCES = objdump.c ucl_objdump_LDADD = $(common_utils_ldadd) ucl_objdump_CFLAGS = $(common_utils_cflags) +ucl_tool_SOURCES = ucl-tool.c +ucl_tool_LDADD = $(common_utils_ldadd) +ucl_tool_CFLAGS = $(common_utils_cflags) + if UTILS -UTL = ucl_chargen ucl_objdump +UTL = ucl_chargen ucl_objdump ucl_tool else UTL = endif diff --git a/utils/chargen.c b/utils/chargen.c index d6fa86a..3981340 100644 --- a/utils/chargen.c +++ b/utils/chargen.c @@ -54,9 +54,9 @@ main (int argc, char **argv) name = argv[1]; } - printf ("static const unsigned int %s[255] = {\n", name); + printf ("static const unsigned int %s[256] = {\n", name); - for (i = 0; i < 255; i ++) { + for (i = 0; i < 256; i ++) { need_or = false; r = 0; /* UCL_CHARACTER_VALUE_END */ @@ -110,7 +110,7 @@ main (int argc, char **argv) if (isprint (i)) { r += sprintf (valbuf + r, " /* %c */", i); } - if (i != 254) { + if (i != 255) { r += sprintf (valbuf + r, ", "); } col += r; diff --git a/utils/objdump.c b/utils/objdump.c index 74581ba..6fde2f4 100644 --- a/utils/objdump.c +++ b/utils/objdump.c @@ -99,9 +99,10 @@ int main(int argc, char **argv) { const char *fn = NULL; - unsigned char inbuf[8192]; + unsigned char *inbuf; struct ucl_parser *parser; int k, ret = 0, r = 0; + ssize_t bufsize; ucl_object_t *obj = NULL; const ucl_object_t *par; FILE *in; @@ -121,9 +122,27 @@ main(int argc, char **argv) } parser = ucl_parser_new (0); - while (!feof (in) && r < (int)sizeof (inbuf)) { - r += fread (inbuf + r, 1, sizeof (inbuf) - r, in); + inbuf = malloc (BUFSIZ); + bufsize = BUFSIZ; + r = 0; + + while (!feof (in) && !ferror (in)) { + if (r == bufsize) { + inbuf = realloc (inbuf, bufsize * 2); + bufsize *= 2; + if (inbuf == NULL) { + perror ("realloc"); + exit (EXIT_FAILURE); + } + } + r += fread (inbuf + r, 1, bufsize - r, in); } + + if (ferror (in)) { + fprintf (stderr, "Failed to read the input file.\n"); + exit (EXIT_FAILURE); + } + ucl_parser_add_chunk (parser, inbuf, r); fclose (in); if (ucl_parser_get_error(parser)) { diff --git a/utils/ucl-tool.c b/utils/ucl-tool.c new file mode 100644 index 0000000..feea9c2 --- /dev/null +++ b/utils/ucl-tool.c @@ -0,0 +1,168 @@ +/* Copyright (c) 2015, Cesanta Software + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <getopt.h> +#include <stdlib.h> + +#include "ucl.h" + +static struct option opts[] = { + {"help", no_argument, NULL, 'h'}, + {"in", required_argument, NULL, 'i' }, + {"out", required_argument, NULL, 'o' }, + {"schema", required_argument, NULL, 's'}, + {"format", required_argument, NULL, 'f'}, + {0, 0, 0, 0} +}; + +void usage(const char *name, FILE *out) { + fprintf(out, "Usage: %s [--help] [-i|--in file] [-o|--out file]\n", name); + fprintf(out, " [-s|--schema file] [-f|--format format]\n\n"); + fprintf(out, " --help - print this message and exit\n"); + fprintf(out, " --in - specify input filename " + "(default: standard input)\n"); + fprintf(out, " --out - specify output filename " + "(default: standard output)\n"); + fprintf(out, " --schema - specify schema file for validation\n"); + fprintf(out, " --format - output format. Options: ucl (default), " + "json, compact_json, yaml, msgpack\n"); +} + +int main(int argc, char **argv) { + char ch; + FILE *in = stdin, *out = stdout; + const char *schema = NULL; + unsigned char *buf = NULL; + size_t size = 0, r = 0; + struct ucl_parser *parser = NULL; + ucl_object_t *obj = NULL; + ucl_emitter_t emitter = UCL_EMIT_CONFIG; + + while((ch = getopt_long(argc, argv, "hi:o:s:f:", opts, NULL)) != -1) { + switch (ch) { + case 'i': + in = fopen(optarg, "r"); + if (in == NULL) { + perror("fopen on input file"); + exit(EXIT_FAILURE); + } + break; + case 'o': + out = fopen(optarg, "w"); + if (out == NULL) { + perror("fopen on output file"); + exit(EXIT_FAILURE); + } + break; + case 's': + schema = optarg; + break; + case 'f': + if (strcmp(optarg, "ucl") == 0) { + emitter = UCL_EMIT_CONFIG; + } else if (strcmp(optarg, "json") == 0) { + emitter = UCL_EMIT_JSON; + } else if (strcmp(optarg, "yaml") == 0) { + emitter = UCL_EMIT_YAML; + } else if (strcmp(optarg, "compact_json") == 0) { + emitter = UCL_EMIT_JSON_COMPACT; + } else if (strcmp(optarg, "msgpack") == 0) { + emitter = UCL_EMIT_MSGPACK; + } else { + fprintf(stderr, "Unknown output format: %s\n", optarg); + exit(EXIT_FAILURE); + } + break; + case 'h': + usage(argv[0], stdout); + exit(0); + default: + usage(argv[0], stderr); + exit(EXIT_FAILURE); + break; + } + } + + parser = ucl_parser_new(0); + buf = malloc(BUFSIZ); + size = BUFSIZ; + while(!feof(in) && !ferror(in)) { + if (r == size) { + buf = realloc(buf, size*2); + size *= 2; + if (buf == NULL) { + perror("realloc"); + exit(EXIT_FAILURE); + } + } + r += fread(buf + r, 1, size - r, in); + } + if (ferror(in)) { + fprintf(stderr, "Failed to read the input file.\n"); + exit(EXIT_FAILURE); + } + fclose(in); + if (!ucl_parser_add_chunk(parser, buf, r)) { + fprintf(stderr, "Failed to parse input file: %s\n", + ucl_parser_get_error(parser)); + exit(EXIT_FAILURE); + } + if ((obj = ucl_parser_get_object(parser)) == NULL) { + fprintf(stderr, "Failed to get root object: %s\n", + ucl_parser_get_error(parser)); + exit(EXIT_FAILURE); + } + if (schema != NULL) { + struct ucl_parser *schema_parser = ucl_parser_new(0); + ucl_object_t *schema_obj = NULL; + struct ucl_schema_error error; + + if (!ucl_parser_add_file(schema_parser, schema)) { + fprintf(stderr, "Failed to parse schema file: %s\n", + ucl_parser_get_error(schema_parser)); + exit(EXIT_FAILURE); + } + if ((schema_obj = ucl_parser_get_object(schema_parser)) == NULL) { + fprintf(stderr, "Failed to get root object: %s\n", + ucl_parser_get_error(schema_parser)); + exit(EXIT_FAILURE); + } + if (!ucl_object_validate(schema_obj, obj, &error)) { + fprintf(stderr, "Validation failed: %s\n", error.msg); + exit(EXIT_FAILURE); + } + } + + if (emitter != UCL_EMIT_MSGPACK) { + fprintf(out, "%s\n", ucl_object_emit(obj, emitter)); + } + else { + size_t len; + unsigned char *res; + + res = ucl_object_emit_len(obj, emitter, &len); + fwrite(res, 1, len, out); + } + + return 0; +} |