From: Tomek Mrugalski Date: Fri, 28 Oct 2016 17:19:04 +0000 (+0200) Subject: [5014] Initial JSON parser written in bison/flex implemented (wip) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=832c1e7ec07e816d43f7ca3d8734f6c1729e0e05;p=thirdparty%2Fkea.git [5014] Initial JSON parser written in bison/flex implemented (wip) --- diff --git a/src/bin/dhcp6/Makefile.am b/src/bin/dhcp6/Makefile.am index 47155aecf5..fb1f9f6174 100644 --- a/src/bin/dhcp6/Makefile.am +++ b/src/bin/dhcp6/Makefile.am @@ -64,6 +64,10 @@ libdhcp6_la_SOURCES += ctrl_dhcp6_srv.cc ctrl_dhcp6_srv.h libdhcp6_la_SOURCES += json_config_parser.cc json_config_parser.h libdhcp6_la_SOURCES += dhcp6to4_ipc.cc dhcp6to4_ipc.h +libdhcp6_la_SOURCES += dhcp6_lexer.ll location.hh position.hh stack.hh +libdhcp6_la_SOURCES += dhcp6_parser.cc dhcp6_parser.h +libdhcp6_la_SOURCES += parser_context.cc parser_context.h + libdhcp6_la_SOURCES += kea_controller.cc nodist_libdhcp6_la_SOURCES = dhcp6_messages.h dhcp6_messages.cc @@ -105,3 +109,27 @@ endif kea_dhcp6dir = $(pkgdatadir) kea_dhcp6_DATA = dhcp6.spec + +if GENERATE_PARSER + +parser: dhcp6_lexer.cc location.hh position.hh stack.hh dhcp6_parser.cc dhcp6_parser.h + @echo "Flex/bison files regenerated" + +# --- Flex/Bison stuff below -------------------------------------------------- +# When debugging grammar issues, it's useful to add -v to bison parameters. +# bison will generate parser.output file that explains the whole grammar. +# It can be used to manually follow what's going on in the parser. +# This is especially useful if yydebug_ is set to 1 as that variable +# will cause parser to print out its internal state. +location.hh position.hh stack.hh dhcp6_parser.cc dhcp6_parser.h: dhcp6_parser.yy + $(YACC) --defines=dhcp6_parser.h -o dhcp6_parser.cc dhcp6_parser.yy + +dhcp6_lexer.cc: dhcp6_lexer.ll + $(LEX) -o dhcp6_lexer.cc dhcp6_lexer.ll + +else + +parser location.hh position.hh stack.hh dhcp6_parser.cc dhcp6_parser.h dhcp6_lexer.cc: + @echo Parser generation disabled. Configure with --enable-generate-parser to enable it. + +endif diff --git a/src/bin/dhcp6/dhcp6_lexer.ll b/src/bin/dhcp6/dhcp6_lexer.ll new file mode 100644 index 0000000000..54e5c6e018 --- /dev/null +++ b/src/bin/dhcp6/dhcp6_lexer.ll @@ -0,0 +1,187 @@ +/* Copyright (C) 2015-2016 Internet Systems Consortium, Inc. ("ISC") + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +%{ /* -*- C++ -*- */ +#include +#include +#include +#include +#include +#include +#include + +// Work around an incompatibility in flex (at least versions +// 2.5.31 through 2.5.33): it generates code that does +// not conform to C89. See Debian bug 333231 +// . +# undef yywrap +# define yywrap() 1 + +// The location of the current token. The lexer will keep updating it. This +// variable will be useful for logging errors. +static isc::dhcp::location loc; + +// To avoid the call to exit... oops! +#define YY_FATAL_ERROR(msg) isc::dhcp::Parser6Context::fatal(msg) +%} + +/* noyywrap disables automatic rewinding for the next file to parse. Since we + always parse only a single string, there's no need to do any wraps. And + using yywrap requires linking with -lfl, which provides the default yywrap + implementation that always returns 1 anyway. */ +%option noyywrap + +/* nounput simplifies the lexer, by removing support for putting a character + back into the input stream. We never use such capability anyway. */ +%option nounput + +/* batch means that we'll never use the generated lexer interactively. */ +%option batch + +/* Enables debug mode. To see the debug messages, one needs to also set + yy_flex_debug to 1, then the debug messages will be printed on stderr. */ +%option debug + +/* I have no idea what this option does, except it was specified in the bison + examples and Postgres folks added it to remove gcc 4.3 warnings. Let's + be on the safe side and keep it. */ +%option noinput + +/* This line tells flex to track the line numbers. It's not really that + useful for client classes, which typically are one-liners, but it may be + useful in more complex cases. */ +%option yylineno + +/* These are not token expressions yet, just convenience expressions that + can be used during actual token definitions. Note some can match + incorrect inputs (e.g., IP addresses) which must be checked. */ +int \-?[0-9]+ +blank [ \t] + +UnicodeEscapeSequence u[0-9A-Fa-f]{4} +JSONEscapeCharacter ["\\/bfnrt] +JSONEscapeSequence {JSONEscapeCharacter}|{UnicodeEscapeSequence} +JSONStringCharacter [^"\\]|\\{JSONEscapeSequence} +JSONString \"{JSONStringCharacter}*\" + + +%{ +// This code run each time a pattern is matched. It updates the location +// by moving it ahead by yyleng bytes. yyleng specifies the length of the +// currently matched token. +#define YY_USER_ACTION loc.columns(yyleng); +%} + +%% + +%{ + // Code run each time yylex is called. + loc.step(); +%} + +{blank}+ { + // Ok, we found a with space. Let's ignore it and update loc variable. + loc.step(); +} +[\n]+ { + // Newline found. Let's update the location and continue. + loc.lines(yyleng); + loc.step(); +} + + +{JSONString} { + // A string has been matched. It contains the actual string and single quotes. + // We need to get those quotes out of the way and just use its content, e.g. + // for 'foo' we should get foo + std::string tmp(yytext+1); + tmp.resize(tmp.size() - 1); + + return isc::dhcp::Dhcp6Parser::make_STRING(tmp, loc); +} + +"[" { return isc::dhcp::Dhcp6Parser::make_LSQUARE_BRACKET(loc); } +"]" { return isc::dhcp::Dhcp6Parser::make_RSQUARE_BRACKET(loc); } +"{" { return isc::dhcp::Dhcp6Parser::make_LCURLY_BRACKET(loc); } +"}" { return isc::dhcp::Dhcp6Parser::make_RCURLY_BRACKET(loc); } +"," { return isc::dhcp::Dhcp6Parser::make_COMMA(loc); } +":" { return isc::dhcp::Dhcp6Parser::make_COLON(loc); } + +{int} { + // An integer was found. + std::string tmp(yytext); + int64_t integer = 0; + try { + // In substring we want to use negative values (e.g. -1). + // In enterprise-id we need to use values up to 0xffffffff. + // To cover both of those use cases, we need at least + // int64_t. + integer = boost::lexical_cast(tmp); + } catch (const boost::bad_lexical_cast &) { + driver.error(loc, "Failed to convert " + tmp + " to an integer."); + } + + // The parser needs the string form as double conversion is no lossless + return isc::dhcp::Dhcp6Parser::make_INTEGER(integer, loc); +} +[-+]?[0-9]*\.?[0-9]*([eE][-+]?[0-9]+)? { + // A floating point was found. + std::string tmp(yytext); + double fp = 0.0; + try { + // In substring we want to use negative values (e.g. -1). + // In enterprise-id we need to use values up to 0xffffffff. + // To cover both of those use cases, we need at least + // int64_t. + fp = boost::lexical_cast(tmp); + } catch (const boost::bad_lexical_cast &) { + driver.error(loc, "Failed to convert " + tmp + " to a floating point."); + } + + return isc::dhcp::Dhcp6Parser::make_FLOAT(fp, loc); +} + +true|false { + string tmp(yytext); + return isc::dhcp::Dhcp6Parser::make_BOOLEAN(tmp == "true", loc); +} + +null { + return isc::dhcp::Dhcp6Parser::make_NULL_TYPE(loc); +} + +. driver.error (loc, "Invalid character: " + std::string(yytext)); +<> return isc::dhcp::Dhcp6Parser::make_END(loc); +%% + +using namespace isc::dhcp; + +void +Parser6Context::scanStringBegin() +{ + loc.initialize(&file_); + yy_flex_debug = trace_scanning_; + YY_BUFFER_STATE buffer; + buffer = yy_scan_bytes(string_.c_str(), string_.size()); + if (!buffer) { + fatal("cannot scan string"); + // fatal() throws an exception so this can't be reached + } +} + +void +Parser6Context::scanStringEnd() +{ + yy_delete_buffer(YY_CURRENT_BUFFER); +} + +namespace { +/// To avoid unused function error +class Dummy { + // cppcheck-suppress unusedPrivateFunction + void dummy() { yy_fatal_error("Fix me: how to disable its definition?"); } +}; +} diff --git a/src/bin/dhcp6/dhcp6_parser.yy b/src/bin/dhcp6/dhcp6_parser.yy new file mode 100644 index 0000000000..019d52b8a3 --- /dev/null +++ b/src/bin/dhcp6/dhcp6_parser.yy @@ -0,0 +1,113 @@ +/* Copyright (C) 2015-2016 Internet Systems Consortium, Inc. ("ISC") + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +%skeleton "lalr1.cc" /* -*- C++ -*- */ +%require "3.0.0" +%defines +%define parser_class_name {Dhcp6Parser} +%define api.token.constructor +%define api.value.type variant +%define api.namespace {isc::dhcp} +%define parse.assert +%code requires +{ +#include +#include +#include +#include +#include + +using namespace isc::dhcp; +using namespace isc::data; +using namespace std; +} +// The parsing context. +%param { isc::dhcp::Parser6Context& ctx } +%locations +%define parse.trace +%define parse.error verbose +%code +{ +#include + +} + +%define api.token.prefix {TOKEN_} +// Tokens in an order which makes sense and related to the intented use. +%token + END 0 "end of file" + COMMA "," + COLON ":" + LSQUARE_BRACKET "[" + RSQUARE_BRACKET "]" + LCURLY_BRACKET "{" + RCURLY_BRACKET "}" + NULL_TYPE "null" +; + +%token STRING "constant string" +%token INTEGER "integer" +%token FLOAT "floating point" +%token BOOLEAN "boolean" + +%type value + + +%printer { yyoutput << $$; } <*>; + +%% +// The whole grammar starts with a map, because the config file +// constists of Dhcp, Logger and DhcpDdns entries in one big { }. +%start map; + +// Values rule +value : INTEGER { $$ = ElementPtr(new IntElement($1)); } + | FLOAT { $$ = ElementPtr(new DoubleElement($1)); } + | BOOLEAN { $$ = ElementPtr(new BoolElement($1)); } + | STRING { $$ = ElementPtr(new StringElement($1)); } + | NULL_TYPE { $$ = ElementPtr(new NullElement()); } + | map { $$ = ElementPtr(new MapElement()); } + | list { $$ = ElementPtr(new ListElement()); } + ; + + +map: LCURLY_BRACKET { + ctx.stack_.push_back(ElementPtr(new MapElement())); + } map_content RCURLY_BRACKET { + ctx.stack_.pop_back(); + }; + +// Assignments rule +map_content: { /* do nothing, it's an empty map */ } + | STRING COLON value { + (*ctx.stack_.end())->set($1, $3); + } + | map COMMA STRING COLON value { + (*ctx.stack_.end())->set($3, $5); + } + ; + +list: LSQUARE_BRACKET list_content RSQUARE_BRACKET { }; + +list_content: { /* do nothing, it's an empty list */ } + | value { + // List consisting of a single element. + (*ctx.stack_.end())->add($1); + } + | list COMMA value { + // List ending with , and a value. + (*ctx.stack_.end())->add($3); + } + ; + +%% + +void +isc::dhcp::Dhcp6Parser::error(const location_type& loc, + const std::string& what) +{ + ctx.error(loc, what); +} diff --git a/src/bin/dhcp6/parser_context.cc b/src/bin/dhcp6/parser_context.cc new file mode 100644 index 0000000000..fbf6d8b6dc --- /dev/null +++ b/src/bin/dhcp6/parser_context.cc @@ -0,0 +1,62 @@ +// Copyright (C) 2016 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include +#include +#include +#include +#include +#include + +namespace isc { +namespace dhcp { + +Parser6Context::Parser6Context() + : trace_scanning_(false), trace_parsing_(false) +{ +} + +Parser6Context::~Parser6Context() +{ +} + +isc::data::ConstElementPtr +Parser6Context::parseString(const std::string& str) +{ + file_ = ""; + string_ = str; + scanStringBegin(); + isc::dhcp::Dhcp6Parser parser(*this); + parser.set_debug_level(trace_parsing_); + int res = parser.parse(); + if (res != 0) { + + } + scanStringEnd(); + return (*stack_.end()); +} + +void +Parser6Context::error(const isc::dhcp::location& loc, const std::string& what) +{ + isc_throw(EvalParseError, loc << ": " << what); +} + +void +Parser6Context::error (const std::string& what) +{ + isc_throw(EvalParseError, what); +} + +void +Parser6Context::fatal (const std::string& what) +{ + isc_throw(Unexpected, what); +} + +}; +}; diff --git a/src/bin/dhcp6/parser_context.h b/src/bin/dhcp6/parser_context.h new file mode 100644 index 0000000000..efa8182f05 --- /dev/null +++ b/src/bin/dhcp6/parser_context.h @@ -0,0 +1,97 @@ +// Copyright (C) 2015-2016 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef PARSER_CONTEXT_H +#define PARSER_CONTEXT_H +#include +#include +#include +#include +#include +#include + +// Tell Flex the lexer's prototype ... +#define YY_DECL isc::dhcp::Dhcp6Parser::symbol_type yylex (Parser6Context& driver) + +// ... and declare it for the parser's sake. +YY_DECL; + +namespace isc { +namespace dhcp { + +/// @brief Evaluation error exception raised when trying to parse an axceptions. +class EvalParseError : public isc::Exception { +public: + EvalParseError(const char* file, size_t line, const char* what) : + isc::Exception(file, line, what) { }; +}; + + +/// @brief Evaluation context, an interface to the expression evaluation. +class Parser6Context +{ +public: + /// @brief Default constructor. + /// + /// @param option_universe Option universe: DHCPv4 or DHCPv6. This is used + /// by the parser to determine which option definitions set should be used + /// to map option names to option codes. + Parser6Context(); + + /// @brief destructor + virtual ~Parser6Context(); + + /// @brief JSON elements being parsed. + std::vector stack_; + + /// @brief Method called before scanning starts on a string. + void scanStringBegin(); + + /// @brief Method called after the last tokens are scanned from a string. + void scanStringEnd(); + + /// @brief Run the parser on the string specified. + /// + /// @param str string to be written + /// @return true on success. + isc::data::ConstElementPtr parseString(const std::string& str); + + /// @brief The name of the file being parsed. + /// Used later to pass the file name to the location tracker. + std::string file_; + + /// @brief The string being parsed. + std::string string_; + + /// @brief Error handler + /// + /// @param loc location within the parsed file when experienced a problem. + /// @param what string explaining the nature of the error. + static void error(const isc::dhcp::location& loc, const std::string& what); + + /// @brief Error handler + /// + /// This is a simplified error reporting tool for possible future + /// cases when the EvalParser is not able to handle the packet. + static void error(const std::string& what); + + /// @brief Fatal error handler + /// + /// This is for should not happen but fatal errors + static void fatal(const std::string& what); + + private: + /// @brief Flag determining scanner debugging. + bool trace_scanning_; + + /// @brief Flag determing parser debugging. + bool trace_parsing_; +}; + +}; // end of isc::eval namespace +}; // end of isc namespace + +#endif diff --git a/src/bin/dhcp6/tests/Makefile.am b/src/bin/dhcp6/tests/Makefile.am index 87c619761e..0fa0220f8d 100644 --- a/src/bin/dhcp6/tests/Makefile.am +++ b/src/bin/dhcp6/tests/Makefile.am @@ -93,6 +93,7 @@ dhcp6_unittests_SOURCES += dhcp6_message_test.cc dhcp6_message_test.h dhcp6_unittests_SOURCES += kea_controller_unittest.cc dhcp6_unittests_SOURCES += dhcp6to4_ipc_unittest.cc dhcp6_unittests_SOURCES += classify_unittests.cc +dhcp6_unittests_SOURCES += parser_unittest.cc nodist_dhcp6_unittests_SOURCES = marker_file.h test_libraries.h diff --git a/src/bin/dhcp6/tests/parser_unittest.cc b/src/bin/dhcp6/tests/parser_unittest.cc new file mode 100644 index 0000000000..c506395808 --- /dev/null +++ b/src/bin/dhcp6/tests/parser_unittest.cc @@ -0,0 +1,27 @@ +// Copyright (C) 2016 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include +#include + +using namespace isc::data; +using namespace std; + +namespace { + +TEST(ParserTest, basic) { + + Parser6Context ctx; + + string txt = "{ \"Dhcp6\": { } }"; + + ConstElementPtr json = ctx.parseString(txt); + + ASSERT_TRUE(json); +} + +};