From dfd92948a0a88a9f245e71c1cfb63ae670e6e7c1 Mon Sep 17 00:00:00 2001 From: "Anders K. Pedersen" Date: Fri, 28 Oct 2016 05:56:32 +0000 Subject: [PATCH] rt: introduce routing expression Introduce rt expression for routing related data with support for nexthop (i.e. the directly connected IP address that an outgoing packet is sent to), which can be used either for matching or accounting, eg. # nft add rule filter postrouting \ ip daddr 192.168.1.0/24 rt nexthop != 192.168.0.1 drop This will drop any traffic to 192.168.1.0/24 that is not routed via 192.168.0.1. # nft add rule filter postrouting \ flow table acct { rt nexthop timeout 600s counter } # nft add rule ip6 filter postrouting \ flow table acct { rt nexthop timeout 600s counter } These rules count outgoing traffic per nexthop. Note that the timeout releases an entry if no traffic is seen for this nexthop within 10 minutes. # nft add rule inet filter postrouting \ ether type ip \ flow table acct { rt nexthop timeout 600s counter } # nft add rule inet filter postrouting \ ether type ip6 \ flow table acct { rt nexthop timeout 600s counter } Same as above, but via the inet family, where the ether type must be specified explicitly. "rt classid" is also implemented identical to "meta rtclassid", since it is more logical to have this match in the routing expression going forward. Signed-off-by: Anders K. Pedersen Signed-off-by: Pablo Neira Ayuso --- doc/nft.xml | 84 +++++++++++++++++ include/expression.h | 7 ++ include/linux/netfilter/nf_tables.h | 27 ++++++ include/rt.h | 33 +++++++ src/Makefile.am | 1 + src/evaluate.c | 34 +++++++ src/netlink_delinearize.c | 17 ++++ src/netlink_linearize.c | 14 +++ src/parser_bison.y | 18 ++++ src/rt.c | 141 ++++++++++++++++++++++++++++ src/scanner.l | 3 + tests/files/expr-rt | 21 +++++ tests/shell/run-tests.sh | 2 +- 13 files changed, 401 insertions(+), 1 deletion(-) create mode 100644 include/rt.h create mode 100644 src/rt.c create mode 100644 tests/files/expr-rt diff --git a/doc/nft.xml b/doc/nft.xml index 3b215f8c..e6b98ae7 100644 --- a/doc/nft.xml +++ b/doc/nft.xml @@ -1222,6 +1222,90 @@ filter output oif eth0 + + + Routing expressions + + + rt + + classid + nexthop + + + + + A routing expression refers to routing data associated with a packet. + + + + Routing expression types + + + + + + + Keyword + Description + Type + + + + + classid + Routing realm + realm + + + nexthop + Routing nexthop + ipv4_addr/ipv6_addr + + + +
+
+ + + Routing expression specific types + + + + + + Type + Description + + + + + realm + + Routing Realm (32 bit number). Can be specified numerically + or as symbolic name defined in /etc/iproute2/rt_realms. + + + + +
+
+ + + Using routing expressions + +# IP family independent rt expression +filter output rt classid 10 + +# IP family dependent rt expressions +ip filter output rt nexthop 192.168.0.1 +ip6 filter output rt nexthop fd00::1 +inet filter ether type ip output rt nexthop 192.168.0.1 +inet filter ether type ip6 output rt nexthop fd00::1 + + + +
diff --git a/include/expression.h b/include/expression.h index eda3d98f..3ae4e804 100644 --- a/include/expression.h +++ b/include/expression.h @@ -35,6 +35,7 @@ * @EXPR_RELATIONAL: equality and relational expressions * @EXPR_NUMGEN: number generation expression * @EXPR_HASH: hash expression + * @EXPR_RT: routing expression */ enum expr_types { EXPR_INVALID, @@ -59,6 +60,7 @@ enum expr_types { EXPR_RELATIONAL, EXPR_NUMGEN, EXPR_HASH, + EXPR_RT, }; enum ops { @@ -180,6 +182,7 @@ enum expr_flags { #include #include #include +#include #include #include @@ -282,6 +285,10 @@ struct expr { enum nft_meta_keys key; enum proto_bases base; } meta; + struct { + /* EXPR_RT */ + enum nft_rt_keys key; + } rt; struct { /* EXPR_CT */ enum nft_ct_keys key; diff --git a/include/linux/netfilter/nf_tables.h b/include/linux/netfilter/nf_tables.h index e84a9f5b..2d477847 100644 --- a/include/linux/netfilter/nf_tables.h +++ b/include/linux/netfilter/nf_tables.h @@ -752,6 +752,19 @@ enum nft_meta_keys { NFT_META_PRANDOM, }; +/** + * enum nft_rt_keys - nf_tables routing expression keys + * + * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 + * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + */ +enum nft_rt_keys { + NFT_RT_CLASSID, + NFT_RT_NEXTHOP4, + NFT_RT_NEXTHOP6, +}; + /** * enum nft_hash_attributes - nf_tables hash expression netlink attributes * @@ -788,6 +801,20 @@ enum nft_meta_attributes { }; #define NFTA_META_MAX (__NFTA_META_MAX - 1) +/** + * enum nft_rt_attributes - nf_tables routing expression netlink attributes + * + * @NFTA_RT_DREG: destination register (NLA_U32) + * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys) + */ +enum nft_rt_attributes { + NFTA_RT_UNSPEC, + NFTA_RT_DREG, + NFTA_RT_KEY, + __NFTA_RT_MAX +}; +#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) + /** * enum nft_ct_keys - nf_tables ct expression keys * diff --git a/include/rt.h b/include/rt.h new file mode 100644 index 00000000..728cf5f0 --- /dev/null +++ b/include/rt.h @@ -0,0 +1,33 @@ +#ifndef NFTABLES_RT_H +#define NFTABLES_RT_H + +/** + * struct rt_template - template for routing expressions + * + * @token: parser token for the expression + * @dtype: data type of the expression + * @len: length of the expression + * @byteorder: byteorder + * @invalid: invalidate datatype on allocation from parser + */ +struct rt_template { + const char *token; + const struct datatype *dtype; + unsigned int len; + enum byteorder byteorder; + bool invalid; +}; + +#define RT_TEMPLATE(__token, __dtype, __len, __byteorder, __invalid) { \ + .token = (__token), \ + .dtype = (__dtype), \ + .len = (__len), \ + .byteorder = (__byteorder), \ + .invalid = (__invalid), \ +} + +extern struct expr *rt_expr_alloc(const struct location *loc, + enum nft_rt_keys key, bool invalid); +extern void rt_expr_update_type(struct proto_ctx *ctx, struct expr *expr); + +#endif /* NFTABLES_RT_H */ diff --git a/src/Makefile.am b/src/Makefile.am index 63bbef2c..9a151bd4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -38,6 +38,7 @@ nft_SOURCES = main.c \ exthdr.c \ hash.c \ meta.c \ + rt.c \ numgen.c \ ct.c \ netlink.c \ diff --git a/src/evaluate.c b/src/evaluate.c index 45af3298..ab0dd9ef 100644 --- a/src/evaluate.c +++ b/src/evaluate.c @@ -611,6 +611,38 @@ static int expr_evaluate_payload(struct eval_ctx *ctx, struct expr **exprp) return 0; } +/* + * RT expression: validate protocol dependencies. + */ +static int expr_evaluate_rt(struct eval_ctx *ctx, struct expr **expr) +{ + const struct proto_desc *base; + struct expr *rt = *expr; + + rt_expr_update_type(&ctx->pctx, rt); + + base = ctx->pctx.protocol[PROTO_BASE_NETWORK_HDR].desc; + switch (rt->rt.key) { + case NFT_RT_NEXTHOP4: + if (base != &proto_ip) + goto err; + break; + case NFT_RT_NEXTHOP6: + if (base != &proto_ip6) + goto err; + break; + default: + break; + } + + return expr_evaluate_primary(ctx, expr); + +err: + return expr_error(ctx->msgs, rt, + "ether type ip or ip6 must be specified before " + "routing expression"); +} + /* * CT expression: update the protocol dependant types bases on the protocol * context. @@ -1609,6 +1641,8 @@ static int expr_evaluate(struct eval_ctx *ctx, struct expr **expr) return expr_evaluate_primary(ctx, expr); case EXPR_PAYLOAD: return expr_evaluate_payload(ctx, expr); + case EXPR_RT: + return expr_evaluate_rt(ctx, expr); case EXPR_CT: return expr_evaluate_ct(ctx, expr); case EXPR_PREFIX: diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c index c3b0b278..dc9cc837 100644 --- a/src/netlink_delinearize.c +++ b/src/netlink_delinearize.c @@ -585,6 +585,21 @@ static void netlink_parse_meta(struct netlink_parse_ctx *ctx, netlink_parse_meta_stmt(ctx, loc, nle); } +static void netlink_parse_rt(struct netlink_parse_ctx *ctx, + const struct location *loc, + const struct nftnl_expr *nle) +{ + enum nft_registers dreg; + uint32_t key; + struct expr *expr; + + key = nftnl_expr_get_u32(nle, NFTNL_EXPR_RT_KEY); + expr = rt_expr_alloc(loc, key, false); + + dreg = netlink_parse_register(nle, NFTNL_EXPR_RT_DREG); + netlink_set_register(ctx, dreg, expr); +} + static void netlink_parse_numgen(struct netlink_parse_ctx *ctx, const struct location *loc, const struct nftnl_expr *nle) @@ -1086,6 +1101,7 @@ static const struct { { .name = "payload", .parse = netlink_parse_payload }, { .name = "exthdr", .parse = netlink_parse_exthdr }, { .name = "meta", .parse = netlink_parse_meta }, + { .name = "rt", .parse = netlink_parse_rt }, { .name = "ct", .parse = netlink_parse_ct }, { .name = "counter", .parse = netlink_parse_counter }, { .name = "log", .parse = netlink_parse_log }, @@ -1724,6 +1740,7 @@ static void expr_postprocess(struct rule_pp_ctx *ctx, struct expr **exprp) break; case EXPR_SET_REF: case EXPR_META: + case EXPR_RT: case EXPR_VERDICT: case EXPR_NUMGEN: break; diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c index b04625f1..a5d4502f 100644 --- a/src/netlink_linearize.c +++ b/src/netlink_linearize.c @@ -172,6 +172,18 @@ static void netlink_gen_meta(struct netlink_linearize_ctx *ctx, nftnl_rule_add_expr(ctx->nlr, nle); } +static void netlink_gen_rt(struct netlink_linearize_ctx *ctx, + const struct expr *expr, + enum nft_registers dreg) +{ + struct nftnl_expr *nle; + + nle = alloc_nft_expr("rt"); + netlink_put_register(nle, NFTNL_EXPR_RT_DREG, dreg); + nftnl_expr_set_u32(nle, NFTNL_EXPR_RT_KEY, expr->rt.key); + nftnl_rule_add_expr(ctx->nlr, nle); +} + static void netlink_gen_numgen(struct netlink_linearize_ctx *ctx, const struct expr *expr, enum nft_registers dreg) @@ -641,6 +653,8 @@ static void netlink_gen_expr(struct netlink_linearize_ctx *ctx, return netlink_gen_exthdr(ctx, expr, dreg); case EXPR_META: return netlink_gen_meta(ctx, expr, dreg); + case EXPR_RT: + return netlink_gen_rt(ctx, expr, dreg); case EXPR_CT: return netlink_gen_ct(ctx, expr, dreg); case EXPR_SET_ELEM: diff --git a/src/parser_bison.y b/src/parser_bison.y index ec9052af..dc02fd23 100644 --- a/src/parser_bison.y +++ b/src/parser_bison.y @@ -338,6 +338,9 @@ static void location_update(struct location *loc, struct location *rhs, int n) %token OIFGROUP "oifgroup" %token CGROUP "cgroup" +%token CLASSID "classid" +%token NEXTHOP "nexthop" + %token CT "ct" %token DIRECTION "direction" %token STATE "state" @@ -590,6 +593,10 @@ static void location_update(struct location *loc, struct location *rhs, int n) %destructor { expr_free($$); } meta_expr %type meta_key meta_key_qualified meta_key_unqualified numgen_type +%type rt_expr +%destructor { expr_free($$); } rt_expr +%type rt_key + %type ct_expr %destructor { expr_free($$); } ct_expr %type ct_key ct_key_dir ct_key_counters @@ -1995,6 +2002,7 @@ primary_expr : symbol_expr { $$ = $1; } | payload_expr { $$ = $1; } | exthdr_expr { $$ = $1; } | meta_expr { $$ = $1; } + | rt_expr { $$ = $1; } | ct_expr { $$ = $1; } | numgen_expr { $$ = $1; } | hash_expr { $$ = $1; } @@ -2529,6 +2537,16 @@ hash_expr : JHASH expr MOD NUM SEED NUM } ; +rt_expr : RT rt_key + { + $$ = rt_expr_alloc(&@$, $2, true); + } + ; + +rt_key : CLASSID { $$ = NFT_RT_CLASSID; } + | NEXTHOP { $$ = NFT_RT_NEXTHOP4; } + ; + ct_expr : CT ct_key { $$ = ct_expr_alloc(&@$, $2, -1); diff --git a/src/rt.c b/src/rt.c new file mode 100644 index 00000000..232c1dcb --- /dev/null +++ b/src/rt.c @@ -0,0 +1,141 @@ +/* + * Routing expression related definition and types. + * + * Copyright (c) 2016 Anders K. Pedersen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static struct symbol_table *realm_tbl; +static void __init realm_table_init(void) +{ + realm_tbl = rt_symbol_table_init("/etc/iproute2/rt_realms"); +} + +static void __exit realm_table_exit(void) +{ + rt_symbol_table_free(realm_tbl); +} + +static void realm_type_print(const struct expr *expr) +{ + return symbolic_constant_print(realm_tbl, expr, true); +} + +static struct error_record *realm_type_parse(const struct expr *sym, + struct expr **res) +{ + return symbolic_constant_parse(sym, realm_tbl, res); +} + +static const struct datatype realm_type = { + .type = TYPE_REALM, + .name = "realm", + .desc = "routing realm", + .byteorder = BYTEORDER_HOST_ENDIAN, + .size = 4 * BITS_PER_BYTE, + .basetype = &integer_type, + .print = realm_type_print, + .parse = realm_type_parse, + .flags = DTYPE_F_PREFIX, +}; + +static const struct rt_template rt_templates[] = { + [NFT_RT_CLASSID] = RT_TEMPLATE("classid", + &realm_type, + 4 * BITS_PER_BYTE, + BYTEORDER_HOST_ENDIAN, + false), + [NFT_RT_NEXTHOP4] = RT_TEMPLATE("nexthop", + &ipaddr_type, + 4 * BITS_PER_BYTE, + BYTEORDER_BIG_ENDIAN, + true), + [NFT_RT_NEXTHOP6] = RT_TEMPLATE("nexthop", + &ip6addr_type, + 16 * BITS_PER_BYTE, + BYTEORDER_BIG_ENDIAN, + true), +}; + +static void rt_expr_print(const struct expr *expr) +{ + printf("rt %s", rt_templates[expr->rt.key].token); +} + +static bool rt_expr_cmp(const struct expr *e1, const struct expr *e2) +{ + return e1->rt.key == e2->rt.key; +} + +static void rt_expr_clone(struct expr *new, const struct expr *expr) +{ + new->rt.key = expr->rt.key; +} + +static const struct expr_ops rt_expr_ops = { + .type = EXPR_RT, + .name = "rt", + .print = rt_expr_print, + .cmp = rt_expr_cmp, + .clone = rt_expr_clone, +}; + +struct expr *rt_expr_alloc(const struct location *loc, enum nft_rt_keys key, + bool invalid) +{ + const struct rt_template *tmpl = &rt_templates[key]; + struct expr *expr; + + if (invalid && tmpl->invalid) + expr = expr_alloc(loc, &rt_expr_ops, &invalid_type, + tmpl->byteorder, 0); + else + expr = expr_alloc(loc, &rt_expr_ops, tmpl->dtype, + tmpl->byteorder, tmpl->len); + expr->rt.key = key; + + return expr; +} + +void rt_expr_update_type(struct proto_ctx *ctx, struct expr *expr) +{ + const struct proto_desc *desc; + + switch (expr->rt.key) { + case NFT_RT_NEXTHOP4: + desc = ctx->protocol[PROTO_BASE_NETWORK_HDR].desc; + if (desc == &proto_ip) + expr->dtype = &ipaddr_type; + else if (desc == &proto_ip6) { + expr->rt.key++; + expr->dtype = &ip6addr_type; + } + expr->len = expr->dtype->size; + break; + default: + break; + } +} + +static void __init rt_init(void) +{ + datatype_register(&realm_type); +} diff --git a/src/scanner.l b/src/scanner.l index 8afddf15..c8352014 100644 --- a/src/scanner.l +++ b/src/scanner.l @@ -446,6 +446,9 @@ addrstring ({macaddr}|{ip4addr}|{ip6addr}) "oifgroup" { return OIFGROUP; } "cgroup" { return CGROUP; } +"classid" { return CLASSID; } +"nexthop" { return NEXTHOP; } + "ct" { return CT; } "l3proto" { return L3PROTOCOL; } "proto-src" { return PROTO_SRC; } diff --git a/tests/files/expr-rt b/tests/files/expr-rt new file mode 100644 index 00000000..4c154091 --- /dev/null +++ b/tests/files/expr-rt @@ -0,0 +1,21 @@ +#! nft -f + +add table ip filter +add chain ip filter output { type filter hook output priority 0 ; } + +add table ip6 filter +add chain ip6 filter output { type filter hook output priority 0 ; } + +add table inet filter +add chain inet filter output { type filter hook output priority 0 ; } + +# rt: classid (see /etc/iproute2/rt_realms) +add rule ip filter output rt classid cosmos counter +add rule ip6 filter output rt classid cosmos counter +add rule inet filter output rt classid cosmos counter + +# rt: nexthop +add rule ip filter output rt nexthop 192.168.0.1 counter +add rule ip6 filter output rt nexthop fd00::1 counter +add rule inet filter output ether type ip rt nexthop 192.168.0.1 counter +add rule inet filter output ether type ip6 rt nexthop fd00::1 counter diff --git a/tests/shell/run-tests.sh b/tests/shell/run-tests.sh index ff243444..d9c44c80 100755 --- a/tests/shell/run-tests.sh +++ b/tests/shell/run-tests.sh @@ -57,7 +57,7 @@ kernel_cleanup() { nft_exthdr nft_payload nft_cmp nft_range \ nft_quota nft_queue nft_numgen \ nft_meta nft_meta_bridge nft_counter nft_log nft_limit \ - nft_hash nft_ct nft_compat \ + nft_hash nft_ct nft_compat nft_rt \ nft_set_hash nft_set_rbtree \ nft_chain_nat_ipv4 nft_chain_nat_ipv6 \ nf_tables_inet nf_tables_bridge nf_tables_arp \ -- 2.47.3