From: Alberto Leiva Popper Date: Thu, 23 Aug 2018 02:14:21 +0000 (-0500) Subject: Add string parsing to RTR X-Git-Tag: v0.0.2~52^2~75 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=19cc799a5ec21401d670fdbb172ba4de25bd05b6;p=thirdparty%2FFORT-validator.git Add string parsing to RTR Rob Austein, author of RFCs 6810 and 8210, clarified that string length unit is octets. Dumping according (still untested) code. --- diff --git a/src/rtr/pdu.c b/src/rtr/pdu.c index 7adbb824..f8b7a143 100644 --- a/src/rtr/pdu.c +++ b/src/rtr/pdu.c @@ -7,7 +7,6 @@ #include "../common.h" #include "pdu_handler.h" -#include "primitive_reader.h" static int pdu_header_from_stream(int, struct pdu_header *); static int serial_notify_from_stream(struct pdu_header *, int, void *); @@ -54,6 +53,7 @@ pdu_load(int fd, void **pdu, struct pdu_metadata const **metadata) static int pdu_header_from_stream(int fd, struct pdu_header *header) { + /* TODO if the first read yields no bytes, the connection was terminated. */ return read_int8(fd, &header->protocol_version) || read_int8(fd, &header->pdu_type) || read_int16(fd, &header->session_id) diff --git a/src/rtr/pdu.h b/src/rtr/pdu.h index da338508..66a950ec 100644 --- a/src/rtr/pdu.h +++ b/src/rtr/pdu.h @@ -4,6 +4,7 @@ #include #include "../common.h" +#include "primitive_reader.h" struct pdu_header { u_int8_t protocol_version; @@ -66,7 +67,7 @@ struct cache_reset_pdu { struct error_report_pdu { struct pdu_header header; void *erroneous_pdu; - char *error_message; + rtr_char *error_message; }; struct pdu_metadata { diff --git a/src/rtr/primitive_reader.c b/src/rtr/primitive_reader.c index 74ea07db..4dea588b 100644 --- a/src/rtr/primitive_reader.c +++ b/src/rtr/primitive_reader.c @@ -2,34 +2,33 @@ #include #include +#include #include +#include + +static int read_exact(int, unsigned char *, size_t); +static int read_and_waste(int, unsigned char *, size_t, u_int64_t); +static int get_octets(rtr_char); +static void place_null_character(rtr_char *, size_t); static int -read_exact(int fd, unsigned char *buffer, size_t length) +read_exact(int fd, unsigned char *buffer, size_t buffer_len) { - int n, m; + ssize_t read_result; + size_t offset; int err; - for (n = 0; n < length;) { - m = read(fd, &buffer[n], length - n); - if (m < 0) { + for (offset = 0; offset < buffer_len; offset += read_result) { + read_result = read(fd, &buffer[offset], buffer_len - offset); + if (read_result == -1) { err = errno; warn("Client socket read interrupted"); return err; } - - if (m == 0 && n == 0) { - /* Stream ended gracefully. */ - return 0; - } - - if (m == 0) { - err = -EPIPE; + if (read_result == 0) { warn("Stream ended mid-PDU"); - return err; + return -EPIPE; } - - n += m; } return 0; @@ -89,20 +88,133 @@ read_in6_addr(int fd, struct in6_addr *result) || read_int32(fd, &result->s6_addr32[3]); } -int -read_string(int fd, char **result) +/* + * Consumes precisely @total_len bytes from @fd. + * The first @str_len bytes are stored in @str. + * + * It is required that @str_len <= @total_len. + */ +static int +read_and_waste(int fd, unsigned char *str, size_t str_len, u_int64_t total_len) { - u_int32_t length; +#define TLEN 1024 /* "Trash length" */ + unsigned char trash[TLEN]; + size_t offset; int err; - err = read_int32(fd, &length); + err = read_exact(fd, str, str_len); if (err) return err; + for (offset = str_len; (offset + TLEN) < total_len; offset += TLEN) { + err = read_exact(fd, trash, TLEN); + if (err) + return err; + } + + return read_exact(fd, trash, total_len - offset); +#undef TLEN +} + +#define EINVALID_UTF8 -0xFFFF + +/* + * Returns the length (in octets) of the UTF-8 code point that starts with octet + * @first_octet. + */ +static int +get_octets(rtr_char first_octet) +{ + if ((first_octet & 0xC0) == 0) + return 1; + if ((first_octet >> 5) == 6) /* 0b110 */ + return 2; + if ((first_octet >> 4) == 14) /* 0b1110 */ + return 3; + if ((first_octet >> 3) == 30) /* 0b11110 */ + return 4; + return EINVALID_UTF8; +} + +/* + * This also sanitizes the string, BTW. + * (Because it places the null chara in the first invalid character. + * The rest is silently ignored.) + * + * TODO test the hell out of this. + */ +static void +place_null_character(rtr_char *str, size_t len) +{ + rtr_char *null_chara_pos; + rtr_char *cursor; + int octet; + int octets; + /* - * TODO the RFC doesn't say if the length is in bytes, code points or - * graphemes... + * This could be optimized by noticing that all byte continuations in + * UTF-8 start with 0b10. This means that we could start from the end + * of the string and move left until we find a valid character. + * But if we do that, we'd lose the sanitization. So this is better + * methinks. */ - *result = NULL; + + null_chara_pos = str; + cursor = str; + + while (cursor < str + len) { + octets = get_octets(*cursor); + if (octets == EINVALID_UTF8) + break; + for (octet = 1; octet < octets; octet++) { + if (cursor >= str + len - 1 || cursor[1] >> 6 != 0x10) + break; + cursor++; + } + + null_chara_pos = cursor; + } + + *null_chara_pos = '\0'; +} + +int +read_string(int fd, rtr_char **result) +{ + /* Actual string length claimed by the PDU, in octets. */ + u_int32_t full_length32; /* Excludes the null chara */ + u_int64_t full_length64; /* Includes the null chara */ + /* + * Actual length that we allocate. Octets. + * This exists because there might be value in truncating the string; + * full_length is a fucking 32-bit integer for some reason. + * Note that, because this is UTF-8 we're dealing with, this might not + * necessarily end up being the actual octet length of the final string; + * since our truncation can land in the middle of a code point, the null + * character might need to be shifted left slightly. + */ + size_t alloc_length; /* Includes the null chara */ + rtr_char *str; + int err; + + err = read_int32(fd, &full_length32); + if (err) + return err; + full_length64 = ((u_int64_t) full_length32) + 1; + + alloc_length = (full_length64 > 4096) ? 4096 : full_length64; + str = malloc(alloc_length); + if (!str) + return -ENOMEM; + + err = read_and_waste(fd, str, alloc_length - 1, full_length64); + if (err) { + free(str); + return err; + } + + place_null_character(str, alloc_length); + + *result = str; return 0; } diff --git a/src/rtr/primitive_reader.h b/src/rtr/primitive_reader.h index 346f59fd..bea9913f 100644 --- a/src/rtr/primitive_reader.h +++ b/src/rtr/primitive_reader.h @@ -5,13 +5,15 @@ #include "../common.h" +typedef unsigned char rtr_char; + __BEGIN_DECLS int read_int8(int, u_int8_t *); int read_int16(int, u_int16_t *); int read_int32(int, u_int32_t *); int read_in_addr(int, struct in_addr *); int read_in6_addr(int, struct in6_addr *); -int read_string(int, char **); +int read_string(int, rtr_char **); __END_DECLS #endif /* RTR_PRIMITIVE_READER_H_ */