From 8a123d43428dfc4b9bccd1c883206d8e653b2ec8 Mon Sep 17 00:00:00 2001 From: Hugo Landau Date: Fri, 8 Sep 2023 11:13:39 +0100 Subject: [PATCH] QLOG: JSON Encoder: Design Reviewed-by: Matt Caswell Reviewed-by: Neil Horman (Merged from https://github.com/openssl/openssl/pull/22037) --- doc/designs/quic-design/json-encoder.md | 91 ++++++++++ include/internal/json_enc.h | 225 ++++++++++++++++++++++++ 2 files changed, 316 insertions(+) create mode 100644 doc/designs/quic-design/json-encoder.md create mode 100644 include/internal/json_enc.h diff --git a/doc/designs/quic-design/json-encoder.md b/doc/designs/quic-design/json-encoder.md new file mode 100644 index 00000000000..fed938499a3 --- /dev/null +++ b/doc/designs/quic-design/json-encoder.md @@ -0,0 +1,91 @@ +JSON Encoder +============ + +Approach +-------- + +The JSON encoder exists to support QLOG implementation. There is no intention to +implement a decoder at this time. The encoder is intended to support +zero-allocation automation using immediate calls without the use of an +intermediate syntax tree representation. This enables highly efficient +serialization when called from QUIC code without dynamic memory allocation. + +An example usage is as follows: + +```c +int generate_json(BIO *b) +{ + int ret = 1; + JSON_ENC z; + + if (!ossl_json_init(&z, b, 0)) + return 0; + + ossl_json_object_begin(&z); + { + ossl_json_key(&z, "key"); + ossl_json_str(&z, "value"); + + ossl_json_key(&z, "key2"); + ossl_json_u64(&z, 42); + + ossl_json_key(&z, "key3"); + ossl_json_array_begin(&z); + { + ossl_json_null(&z); + ossl_json_f64(&z, 42.0); + ossl_json_str(&z, "string"); + } + ossl_json_array_end(&z); + } + ossl_json_object_end(&z); + + if (ossl_json_get_error_flag(&z)) + ret = 0; + + ossl_json_cleanup(&z); + return ret; +} +``` + +The zero-allocation, immediate-output design means that most API calls +correspond directly to immediately generated output; however there is some +minimal state tracking. The API guarantees that it will never generate invalid +JSON, with two exceptions: + +- it is the caller's responsibility to avoid generating duplicate keys; +- it is the caller's responsibility to provide valid UTF-8 strings. + +Since the JSON encoder is for internal use only, its structure is defined in +headers and can be incorporated into other objects without a heap allocation. +The JSON encoder maintains an internal write buffer and a small state tracking +stack (1 bit per level of depth in a JSON hierarchy). + +JSON-SEQ +-------- + +The encoder supports JSON-SEQ (RFC 7464), as this is an optimal format for +outputting QLOG for our purposes. + +Number Handling +--------------- + +It is an unfortunate reality that many JSON implementations are not able to +handle integers outside `[-2**53 + 1, 2**53 - 1]`. This leads to the I-JSON +specification, RFC 7493, which recommends that values outside these ranges are +encoded as strings. + +An optional I-JSON mode is offered, in which case integers outside these ranges +are automatically serialized as strings instead. + +Error Handling +-------------- + +Error handling is deferred to improve ergonomics. If any call to a JSON encoder +fails, all future calls also fail and the caller is expected to ascertain that +the encoding process failed by calling `ossl_json_get_error_flag`. + +API +--- + +The API is documented in `include/internal/json_enc.h`. diff --git a/include/internal/json_enc.h b/include/internal/json_enc.h new file mode 100644 index 00000000000..b5ee8361e0a --- /dev/null +++ b/include/internal/json_enc.h @@ -0,0 +1,225 @@ +/* + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef JSON_ENC_H +# define JSON_ENC_H + +# include + +/* + * JSON Encoder + * ============ + * + * This JSON encoder is used for QLOG. It supports ordinary JSON (RFC 7159), + * JSON-SEQ (RFC 7464) and I-JSON (RFC 7493). It supports only basic ASCII. + */ + +struct json_write_buf { + BIO *bio; + char *buf; + size_t alloc, cur; +}; + +typedef struct json_enc_st { + uint32_t flags; + /* error: 1 if an error has occurred. */ + /* state: current state. */ + /* stack stores a bitmap. 0=object, 1=array. */ + /* stack cur size: stack_end_byte bytes, stack_end_bit bits. */ + /* stack alloc size: stack_bytes bytes. */ + unsigned char error, stack_end_bit, state, *stack, defer_indent; + unsigned char stack_small[16]; + struct json_write_buf wbuf; + size_t stack_end_byte, stack_bytes; +} JSON_ENC; + +/* + * ossl_json_init + * -------------- + * + * Initialises a JSON encoder. + * + * If the flag JSON_FLAG_SEQ is passed, the output is in JSON-SEQ. The caller + * should use the encoder as though it is encoding members of a JSON array (but + * without calling ossl_json_array_begin() or ossl_json_array_end()). Each + * top-level JSON item (e.g. JSON object) encoded will be separated correctly as + * per the JSON-SEQ format. + * + * If the flag JSON_FLAG_SEQ is not passed, the output is in JSON format. + * Generally the caller should encode only a single output item (e.g. + * a JSON object). + * + * By default, JSON output is maximally compact. If JSON_FLAG_PRETTY is set, + * JSON/JSON-SEQ output is spaced for optimal human readability. + * + * If JSON_FLAG_IJSON is set, integers outside the range `[-2**53 + 1, 2**53 - + * 1]` are automatically converted to decimal strings before serialization. + */ +#define JSON_FLAG_NONE 0 +#define JSON_FLAG_SEQ (1U << 0) +#define JSON_FLAG_PRETTY (1U << 1) +#define JSON_FLAG_IJSON (1U << 2) + +int ossl_json_init(JSON_ENC *json, BIO *bio, uint32_t flags); + +/* + * ossl_json_cleanup + * ----------------- + * + * Destroys a JSON encoder. + */ +void ossl_json_cleanup(JSON_ENC *json); + +/* + * ossl_json_reset + * --------------- + * + * Resets a JSON encoder, as though it has just been initialised, allowing it + * to be used again for new output syntactically unrelated to any previous + * output. This is similar to calling ossl_json_cleanup followed by + * ossl_json_init but may allow internal buffers, etc. to be reused. + * + * If the JSON encoder has entered an error state, this function MAY allow + * recovery from this error state, in which case it will return 1. If this + * function returns 0, the JSON encoder is unrecoverable and + * ossl_json_cleanup() must be called. + * + * Automatically calls ossl_json_flush(). + */ +int ossl_json_reset(JSON_ENC *json); + +/* + * ossl_json_flush + * --------------- + * + * Flushes the JSON encoder, ensuring that any residual bytes in internal + * buffers are written to the provided sink BIO. Flushing may also happen + * autonomously as buffers are filled, but the caller must use this function + * to guarantee all data has been flushed. + */ +int ossl_json_flush(JSON_ENC *json); + +/* + * ossl_json_flush_cleanup + * ----------------------- + * + * Tries to flush as in a call to ossl_json_flush, and then calls + * ossl_json_cleanup regardless of the result. The result of the flush call is + * returned. + */ +int ossl_json_flush_cleanup(JSON_ENC *json); + +/* + * ossl_json_set_sink + * ------------------ + * + * Changes the sink used by the JSON encoder. + */ +int ossl_json_set_sink(JSON_ENC *json, BIO *bio); + +/* + * ossl_json_in_error + * ------------------ + * + * To enhance the ergonomics of the JSON API, the JSON object uses an implicit + * error tracking model. When a JSON API call fails (for example due to caller + * error, such as trying to close an array which was not opened), the JSON + * object enters an error state and all further calls are silently ignored. + * + * The caller can detect this condition after it is finished making builder + * calls to the JSON object by calling this function. This function returns 1 + * if an error occurred. At this point the caller's only recourse is to call + * ossl_json_reset() or ossl_json_cleanup(). + * + * Note that partial (i.e., invalid) output may still have been sent to the BIO + * in this case. Since the amount of output which can potentially be produced + * by a JSON object is unbounded, it is impractical to buffer it all before + * flushing. It is expected that errors will ordinarily be either caller errors + * (programming errors) or BIO errors. + */ +int ossl_json_in_error(JSON_ENC *json); + +/* + * JSON Builder Calls + * ================== + * + * These functions are used to build JSON output. The functions which have + * begin and end function pairs must be called in correctly nested sequence. + * When writing an object, ossl_json_key() must be called exactly once before + * each call to write a JSON item. + * + * The JSON library takes responsibility for enforcing correct usage patterns. + * If a call is made that does not correspond to the JSON syntax, the JSON + * object enters the error state and all subsequent calls are ignored. + * + * In JSON-SEQ mode, the caller should act as though the library implicitly + * places all calls between an ossl_json_array_begin() and + * ossl_json_array_end() pair; for example, the normal usage pattern would be + * to call ossl_json_object_begin() followed by ossl_json_object_end(), in + * repeated sequence. + * + * The library does not enforce non-generation of duplicate keys. Avoiding this + * is the caller's responsibility. It is also the caller's responsibility to + * pass valid UTF-8 strings. All other forms of invalid output will cause an + * error. Note that due to the immediate nature of the API, partial output may + * have already been generated in such a case. + */ + +/* Begin a new JSON object. */ +void ossl_json_object_begin(JSON_ENC *json); + +/* End a JSON obejct. Must be matched with a call to ossl_json_object_begin(). */ +void ossl_json_object_end(JSON_ENC *json); + +/* Begin a new JSON array. */ +void ossl_json_array_begin(JSON_ENC *json); + +/* End a JSON array. Must be matched with a call to ossl_json_array_end(). */ +void ossl_json_array_end(JSON_ENC *json); + +/* + * Encode a JSON key within an object. Pass a zero-terminated string, which can + * be freed immediately following the call to this function. + */ +void ossl_json_key(JSON_ENC *json, const char *key); + +/* Encode a JSON 'null' value. */ +void ossl_json_null(JSON_ENC *json); + +/* Encode a JSON boolean value. */ +void ossl_json_bool(JSON_ENC *json, int value); + +/* Encode a JSON integer from a uint64_t. */ +void ossl_json_u64(JSON_ENC *json, uint64_t value); + +/* Encode a JSON integer from an int64_t. */ +void ossl_json_i64(JSON_ENC *json, int64_t value); + +/* Encode a JSON number from a 64-bit floating point value. */ +void ossl_json_f64(JSON_ENC *json, double value); + +/* + * Encode a JSON UTF-8 string from a zero-terminated string. The string passed + * can be freed immediately following the call to this function. + */ +void ossl_json_str(JSON_ENC *json, const char *str); + +/* + * Encode a JSON UTF-8 string from a string with the given length. The string + * passed can be freed immediately following the call to this function. + */ +void ossl_json_str_len(JSON_ENC *json, const char *str, size_t str_len); + +/* + * Encode binary data as a lowercase hex string. data_len is the data length in + * bytes. + */ +void ossl_json_str_hex(JSON_ENC *json, const void *data, size_t data_len); + +#endif -- 2.47.2