]> git.ipfire.org Git - thirdparty/nftables.git/commitdiff
src: add TCP option matching
authorManuel Messner <mm@skelett.io>
Tue, 7 Feb 2017 02:14:12 +0000 (03:14 +0100)
committerFlorian Westphal <fw@strlen.de>
Sun, 12 Feb 2017 14:34:47 +0000 (15:34 +0100)
This patch enables nft to match against TCP options.

Currently these TCP options are supported:
* End of Option List (eol)
* No-Operation (noop)
* Maximum Segment Size (maxseg)
* Window Scale (window)
* SACK Permitted (sack_permitted)
* SACK (sack)
* Timestamps (timestamp)

Syntax: tcp options $option_name [$offset] $field_name
Example:

 # count all incoming packets with a specific maximum segment size `x`
 # nft add rule filter input tcp option maxseg size x counter

 # count all incoming packets with a SACK TCP option where the third
 # (counted from zero) left field is greater `x`.
 # nft add rule filter input tcp option sack 2 left \> x counter

If the offset (the `2` in the example above) is zero, it can optionally
be omitted.
For all non-SACK TCP options it is always zero, thus can be left out.

Option names and field names are parsed from templates, similar to meta
and ct options rather than via keywords to prevent adding more keywords
than necessary.

Signed-off-by: Manuel Messner <mm@skelett.io>
Signed-off-by: Florian Westphal <fw@strlen.de>
12 files changed:
doc/nft.xml
include/expression.h
include/exthdr.h
include/tcpopt.h [new file with mode: 0644]
src/Makefile.am
src/evaluate.c
src/exthdr.c
src/netlink_delinearize.c
src/netlink_linearize.c
src/parser_bison.y
src/scanner.l
src/tcpopt.c [new file with mode: 0644]

index be729a89ad24065fb33aee42b817e4f9159d9e9f..28258104581549894ef7a2191112dd4be6a11089 100644 (file)
@@ -2363,14 +2363,182 @@ inet filter meta nfproto ipv6 output rt nexthop fd00::1
                                </table>
                        </para>
                </refsect2>
-       </refsect1>
 
-       <refsect1>
-               <title>bla</title>
                <refsect2>
-                       <title>IPv6 extension header expressions</title>
+                       <title>Extension header expressions</title>
+                       <para>
+                               Extension header expressions refer to data from variable-sized protocol headers, such as IPv6 extension headers and
+                               TCPs options.
+                       </para>
+                       <para>
+                               nftables currently supports matching (finding) a given ipv6 extension header or TCP option.
+                       </para>
+                       <cmdsynopsis>
+                               <command>hbh</command>
+                               <group choice="req">
+                                       <arg>nexthdr</arg>
+                                       <arg>hdrlength</arg>
+                               </group>
+                       </cmdsynopsis>
+                       <cmdsynopsis>
+                               <command>frag</command>
+                               <group choice="req">
+                                       <arg>nexthdr</arg>
+                                       <arg>frag-off</arg>
+                                       <arg>more-fragments</arg>
+                                       <arg>id</arg>
+                               </group>
+                       </cmdsynopsis>
+
+                       <cmdsynopsis>
+                               <command>rt</command>
+                               <group choice="req">
+                                       <arg>nexthdr</arg>
+                                       <arg>hdrlength</arg>
+                                       <arg>type</arg>
+                                       <arg>seg-left</arg>
+                               </group>
+                       </cmdsynopsis>
+                       <cmdsynopsis>
+                               <command>dst</command>
+                               <group choice="req">
+                                       <arg>nexthdr</arg>
+                                       <arg>hdrlength</arg>
+                               </group>
+                       </cmdsynopsis>
+                       <cmdsynopsis>
+                               <command>mh</command>
+                               <group choice="req">
+                                       <arg>nexthdr</arg>
+                                       <arg>hdrlength</arg>
+                                       <arg>checksum</arg>
+                                       <arg>type</arg>
+                               </group>
+                       </cmdsynopsis>
+                       <cmdsynopsis>
+                               <command>tcp option</command>
+                               <group choice="req">
+                                       <arg>eol</arg>
+                                       <arg>noop</arg>
+                                       <arg>maxseg</arg>
+                                       <arg>window</arg>
+                                       <arg>sack_permitted</arg>
+                                       <arg>sack</arg>
+                                       <arg>timestamp</arg>
+                               </group>
+                <arg><replaceable>offset</replaceable></arg>
+                               <arg choice="none"><replaceable>tcp_option_field</replaceable></arg>
+                       </cmdsynopsis>
+                       <para>
+                               <table frame="all">
+                                       <title>IPv6 extension headers</title>
+                                       <tgroup cols='2' align='left' colsep='1' rowsep='1'>
+                                               <colspec colname='c1'/>
+                                               <colspec colname='c2'/>
+                                               <thead>
+                                                       <row>
+                                                               <entry>Keyword</entry>
+                                                               <entry>Description</entry>
+                                                       </row>
+                                               </thead>
+                                               <tbody>
+                                                       <row>
+                                                               <entry>hbh</entry>
+                                                               <entry>Hop by Hop</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>rt</entry>
+                                                               <entry>Routing Header</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>frag</entry>
+                                                               <entry>Fragmentation header</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>dst</entry>
+                                                               <entry>dst options</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>mh</entry>
+                                                               <entry>Mobility Header</entry>
+                                                       </row>
+                                               </tbody>
+                                       </tgroup>
+                               </table>
+
+                               <table frame="all">
+                                       <title>TCP Options</title>
+                                       <tgroup cols='3' align='left' colsep='1' rowsep='1'>
+                                               <colspec colname='c1'/>
+                                               <colspec colname='c2'/>
+                                               <colspec colname='c3'/>
+                                               <thead>
+                                                       <row>
+                                                               <entry>Keyword</entry>
+                                                               <entry>Description</entry>
+                                                               <entry>TCP option fields</entry>
+                                                       </row>
+                                               </thead>
+                                               <tbody>
+                                                       <row>
+                                                               <entry>eol</entry>
+                                                               <entry>End of option list</entry>
+                                                               <entry>kind</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>noop</entry>
+                                                               <entry>1 Byte TCP No-op options</entry>
+                                                               <entry>kind</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>maxseg</entry>
+                                                               <entry>TCP Maximum Segment Size</entry>
+                                                               <entry>kind, length, size</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>window</entry>
+                                                               <entry>TCP Window Scaling</entry>
+                                                               <entry>kind, length, count</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>sack_permitted</entry>
+                                                               <entry>TCP SACK permitted</entry>
+                                                               <entry>kind, length</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>sack</entry>
+                                                               <entry>TCP Selective Acknowledgement</entry>
+                                                               <entry>kind, length, left, right</entry>
+                                                       </row>
+                                                       <row>
+                                                               <entry>timestamp</entry>
+                                                               <entry>TCP Timestamps</entry>
+                                                               <entry>kind, length, tsval, tsecr</entry>
+                                                       </row>
+                                               </tbody>
+                                       </tgroup>
+                               </table>
+                       </para>
+
+                       <para>
+                               The <replaceable>offset</replaceable> is only used for the SACK TCP option fields <command>left</command> and <command>right</command>.
+                               For all non-SACK TCP options it is always zero.
+                               <replaceable>Offsets</replaceable> which equals to zero can be omitted.
+                       </para>
+
                        <para>
-                               IPv6 extension header expressions refer to data from an IPv6 packet's extension headers.
+                               <example>
+                                       <title>finding TCP options</title>
+                                       <programlisting>
+filter input tcp option sack_permitted kind 1 counter
+                                       </programlisting>
+                               </example>
+                               <example>
+                               <title>matching IPv6 exthdr</title>
+                                       <programlisting>
+ip6 filter input frag more-fragments 1 counter
+                                       </programlisting>
+                               </example>
                        </para>
                </refsect2>
 
index ec90265b5f926bbcb87c66d3192c8e0be24565b5..83ecf1114bf14c472895985d9a58cf1c47133182 100644 (file)
@@ -281,6 +281,7 @@ struct expr {
                        const struct exthdr_desc        *desc;
                        const struct proto_hdr_template *tmpl;
                        unsigned int                    offset;
+                       enum nft_exthdr_op              op;
                } exthdr;
                struct {
                        /* EXPR_META */
index 93a53f307f553184d880f4e35169ab58d84bb0a8..cdcc2b9537972e0859b7aaebde86d0d7cde4af71 100644 (file)
@@ -2,6 +2,7 @@
 #define NFTABLES_EXTHDR_H
 
 #include <proto.h>
+#include <tcpopt.h>
 
 /**
  * struct exthdr_desc - extension header description
@@ -78,6 +79,7 @@ enum mh_hdr_fields {
        MHHDR_CHECKSUM,
 };
 
+extern const struct expr_ops exthdr_expr_ops;
 extern const struct exthdr_desc exthdr_hbh;
 extern const struct exthdr_desc exthdr_rt;
 extern const struct exthdr_desc exthdr_rt0;
diff --git a/include/tcpopt.h b/include/tcpopt.h
new file mode 100644 (file)
index 0000000..5b99008
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef NFTABLES_TCPOPT_H
+#define NFTABLES_TCPOPT_H
+
+#include <proto.h>
+#include <exthdr.h>
+
+extern struct expr *tcpopt_expr_alloc(const struct location *loc,
+                                     const char *option_str,
+                                     const unsigned int option_num,
+                                     const char *optioni_field);
+
+extern void tcpopt_init_raw(struct expr *expr, uint8_t type,
+                           unsigned int offset, unsigned int len);
+
+extern bool tcpopt_find_template(struct expr *expr, const struct expr *mask,
+                                unsigned int *shift);
+
+extern const struct exthdr_desc tcpopt_eol;
+extern const struct exthdr_desc tcpopt_nop;
+extern const struct exthdr_desc tcpopt_maxseg;
+extern const struct exthdr_desc tcpopt_window;
+extern const struct exthdr_desc tcpopt_sack_permitted;
+extern const struct exthdr_desc tcpopt_sack;
+extern const struct exthdr_desc tcpopt_timestamp;
+
+#endif /* NFTABLES_TCPOPT_H */
index c6586f5f03c21339c6380b12553bb3b3da3df5d5..99eef7bb849b6489e8901bfeb9bd1dc83431f00c 100644 (file)
@@ -55,6 +55,7 @@ nft_SOURCES = main.c                          \
                services.c                      \
                mergesort.c                     \
                scanner.l                       \
+               tcpopt.c                        \
                parser_bison.y
 
 if BUILD_CLI
index 0e02548ceec320034aaded954c6a931c237d5dd2..4817a55c82ef8fb965b901afc3f069c7dc8ce750 100644 (file)
@@ -438,6 +438,26 @@ static int __expr_evaluate_exthdr(struct eval_ctx *ctx, struct expr **exprp)
            expr->len % BITS_PER_BYTE != 0)
                expr_evaluate_bits(ctx, exprp);
 
+       switch (expr->exthdr.op) {
+       case NFT_EXTHDR_OP_TCPOPT: {
+               static const uint8_t tcphdrlen = 20 * BITS_PER_BYTE;
+               static const unsigned int max_tcpoptlen = 15 * 4 * BITS_PER_BYTE - tcphdrlen;
+               unsigned int totlen = 0;
+
+               totlen += expr->exthdr.tmpl->offset;
+               totlen += expr->exthdr.tmpl->len;
+               totlen += expr->exthdr.offset;
+
+               if (totlen > max_tcpoptlen)
+                       return expr_error(ctx->msgs, expr,
+                                         "offset and size %u exceeds max tcp headerlen (%u)",
+                                         totlen, max_tcpoptlen);
+               break;
+       }
+       default:
+               break;
+       }
+
        return 0;
 }
 
@@ -448,11 +468,24 @@ static int __expr_evaluate_exthdr(struct eval_ctx *ctx, struct expr **exprp)
  */
 static int expr_evaluate_exthdr(struct eval_ctx *ctx, struct expr **exprp)
 {
-       const struct proto_desc *base, *dependency = &proto_ip6;
+       const struct proto_desc *base, *dependency = NULL;
        enum proto_bases pb = PROTO_BASE_NETWORK_HDR;
        struct expr *expr = *exprp;
        struct stmt *nstmt;
 
+       switch (expr->exthdr.op) {
+       case NFT_EXTHDR_OP_TCPOPT:
+               dependency = &proto_tcp;
+               pb = PROTO_BASE_TRANSPORT_HDR;
+               break;
+       case NFT_EXTHDR_OP_IPV6:
+       default:
+               dependency = &proto_ip6;
+               break;
+       }
+
+       assert(dependency);
+
        base = ctx->pctx.protocol[pb].desc;
        if (base == dependency)
                return __expr_evaluate_exthdr(ctx, exprp);
index 45b1b690766480358c2ee06bf7c793609f8a068d..cfc6bb604d2c96ae679f645df67ef440b58dd6d4 100644 (file)
 
 static void exthdr_expr_print(const struct expr *expr)
 {
-       printf("%s %s", expr->exthdr.desc->name, expr->exthdr.tmpl->token);
+       if (expr->exthdr.op == NFT_EXTHDR_OP_TCPOPT) {
+               /* Offset calcualtion is a bit hacky at this point.
+                * There might be an tcp option one day with another
+                * multiplicator
+                */
+               unsigned int offset = expr->exthdr.offset / 64;
+               char buf[3] = {0};
+
+               if (offset)
+                       snprintf(buf, sizeof buf, " %d", offset);
+               printf("tcp option %s%s %s", expr->exthdr.desc->name, buf,
+                                            expr->exthdr.tmpl->token);
+       }
+       else
+               printf("%s %s", expr->exthdr.desc->name,
+                               expr->exthdr.tmpl->token);
 }
 
 static bool exthdr_expr_cmp(const struct expr *e1, const struct expr *e2)
 {
        return e1->exthdr.desc == e2->exthdr.desc &&
-              e1->exthdr.tmpl == e2->exthdr.tmpl;
+              e1->exthdr.tmpl == e2->exthdr.tmpl &&
+              e1->exthdr.op == e2->exthdr.op;
 }
 
 static void exthdr_expr_clone(struct expr *new, const struct expr *expr)
@@ -38,9 +54,10 @@ static void exthdr_expr_clone(struct expr *new, const struct expr *expr)
        new->exthdr.desc = expr->exthdr.desc;
        new->exthdr.tmpl = expr->exthdr.tmpl;
        new->exthdr.offset = expr->exthdr.offset;
+       new->exthdr.op = expr->exthdr.op;
 }
 
-static const struct expr_ops exthdr_expr_ops = {
+const struct expr_ops exthdr_expr_ops = {
        .type           = EXPR_EXTHDR,
        .name           = "exthdr",
        .print          = exthdr_expr_print,
@@ -86,6 +103,8 @@ void exthdr_init_raw(struct expr *expr, uint8_t type,
        unsigned int i;
 
        assert(expr->ops->type == EXPR_EXTHDR);
+       if (op == NFT_EXTHDR_OP_TCPOPT)
+               return tcpopt_init_raw(expr, type, offset, len);
 
        expr->len = len;
        expr->exthdr.offset = offset;
@@ -117,6 +136,12 @@ bool exthdr_find_template(struct expr *expr, const struct expr *mask, unsigned i
        if (expr->exthdr.tmpl != &exthdr_unknown_template)
                return false;
 
+       /* In case we are handling tcp options instead of the default ipv6
+        * extension headers.
+        */
+       if (expr->exthdr.op == NFT_EXTHDR_OP_TCPOPT)
+               return tcpopt_find_template(expr, mask, shift);
+
        mask_offset = mpz_scan1(mask->value, 0);
        mask_len = mask_length(mask);
 
index f21d2d56ef284ecdf5a1e2d8ba6201b3ff24dbd8..1e94af49013d0cffb9d1b7143ab218625c200cac 100644 (file)
@@ -507,7 +507,7 @@ static void netlink_parse_exthdr(struct netlink_parse_ctx *ctx,
        type   = nftnl_expr_get_u8(nle, NFTNL_EXPR_EXTHDR_TYPE);
        offset = nftnl_expr_get_u32(nle, NFTNL_EXPR_EXTHDR_OFFSET) * BITS_PER_BYTE;
        len    = nftnl_expr_get_u32(nle, NFTNL_EXPR_EXTHDR_LEN) * BITS_PER_BYTE;
-       op     = NFT_EXTHDR_OP_IPV6;
+       op     = nftnl_expr_get_u32(nle, NFTNL_EXPR_EXTHDR_OP);
 
        expr = exthdr_expr_alloc(loc, NULL, 0);
        exthdr_init_raw(expr, type, offset, len, op);
@@ -1221,6 +1221,7 @@ static const struct {
        { .name = "numgen",     .parse = netlink_parse_numgen },
        { .name = "hash",       .parse = netlink_parse_hash },
        { .name = "fib",        .parse = netlink_parse_fib },
+       { .name = "tcpopt",     .parse = netlink_parse_exthdr },
 };
 
 static int netlink_parse_expr(const struct nftnl_expr *nle,
index 056f11317298682b3508cce969e58f9979090f38..8849b0e472680dab0f17061c0f5bd772642a338a 100644 (file)
@@ -162,7 +162,7 @@ static void netlink_gen_exthdr(struct netlink_linearize_ctx *ctx,
                               const struct expr *expr,
                               enum nft_registers dreg)
 {
-       unsigned int offset = expr->exthdr.tmpl->offset;
+       unsigned int offset = expr->exthdr.tmpl->offset + expr->exthdr.offset;
        struct nftnl_expr *nle;
 
        nle = alloc_nft_expr("exthdr");
@@ -172,6 +172,7 @@ static void netlink_gen_exthdr(struct netlink_linearize_ctx *ctx,
        nftnl_expr_set_u32(nle, NFTNL_EXPR_EXTHDR_OFFSET, offset / BITS_PER_BYTE);
        nftnl_expr_set_u32(nle, NFTNL_EXPR_EXTHDR_LEN,
                           div_round_up(expr->len, BITS_PER_BYTE));
+       nftnl_expr_set_u8(nle, NFTNL_EXPR_EXTHDR_OP, expr->exthdr.op);
        nftnl_rule_add_expr(ctx->nlr, nle);
 }
 
index d543e3ea2515c2074131e1a7d64e7ce4303d30eb..b295bfde2ed3aaed2d4f7769db1d95737b623153 100644 (file)
@@ -308,6 +308,7 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %token DOFF                    "doff"
 %token WINDOW                  "window"
 %token URGPTR                  "urgptr"
+%token OPTION                  "option"
 
 %token DCCP                    "dccp"
 
@@ -428,8 +429,8 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 
 %token NOTRACK                 "notrack"
 
-%type <string>                 identifier type_identifier string comment_spec
-%destructor { xfree($$); }     identifier type_identifier string comment_spec
+%type <string>                 identifier type_identifier string comment_spec tcp_option_name tcp_option_field
+%destructor { xfree($$); }     identifier type_identifier string comment_spec tcp_option_name tcp_option_field
 
 %type <val>                    time_spec quota_used
 
@@ -581,9 +582,9 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %type <expr>                   auth_hdr_expr   esp_hdr_expr            comp_hdr_expr
 %destructor { expr_free($$); } auth_hdr_expr   esp_hdr_expr            comp_hdr_expr
 %type <val>                    auth_hdr_field  esp_hdr_field           comp_hdr_field
-%type <expr>                   udp_hdr_expr    udplite_hdr_expr        tcp_hdr_expr
-%destructor { expr_free($$); } udp_hdr_expr    udplite_hdr_expr        tcp_hdr_expr
-%type <val>                    udp_hdr_field   udplite_hdr_field       tcp_hdr_field
+%type <expr>                   udp_hdr_expr    udplite_hdr_expr
+%destructor { expr_free($$); } udp_hdr_expr    udplite_hdr_expr
+%type <val>                    udp_hdr_field   udplite_hdr_field
 %type <expr>                   dccp_hdr_expr   sctp_hdr_expr
 %destructor { expr_free($$); } dccp_hdr_expr   sctp_hdr_expr
 %type <val>                    dccp_hdr_field  sctp_hdr_field
@@ -600,6 +601,9 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %destructor { expr_free($$); } mh_hdr_expr
 %type <val>                    mh_hdr_field
 
+%type <expr>                   tcp_hdr_optexpr
+%destructor { expr_free($$); } tcp_hdr_optexpr
+
 %type <expr>                   meta_expr
 %destructor { expr_free($$); } meta_expr
 %type <val>                    meta_key        meta_key_qualified      meta_key_unqualified    numgen_type
@@ -626,6 +630,10 @@ static void location_update(struct location *loc, struct location *rhs, int n)
 %type <quota>                  quota_config
 %destructor { xfree($$); }     quota_config
 
+%type <expr>                   tcp_hdr_expr
+%destructor { expr_free($$); } tcp_hdr_expr
+%type <val>                    tcp_hdr_field
+
 %%
 
 input                  :       /* empty */
@@ -3232,6 +3240,7 @@ exthdr_expr               :       hbh_hdr_expr
                        |       frag_hdr_expr
                        |       dst_hdr_expr
                        |       mh_hdr_expr
+                       |       tcp_hdr_optexpr
                        ;
 
 hbh_hdr_expr           :       HBH     hbh_hdr_field
@@ -3314,4 +3323,31 @@ mh_hdr_field             :       NEXTHDR         { $$ = MHHDR_NEXTHDR; }
                        |       CHECKSUM        { $$ = MHHDR_CHECKSUM; }
                        ;
 
+tcp_option_name                :       STRING          { $$ = $1; }
+                       |       WINDOW          { $$ = xstrdup("window"); }
+                       ;
+
+tcp_option_field       :       STRING          { $$ = $1; }
+                       |       LENGTH          { $$ = xstrdup("length"); }
+                       |       SIZE            { $$ = xstrdup("size"); }
+                       ;
+
+tcp_hdr_optexpr                :       TCP     OPTION  tcp_option_name         tcp_option_field
+                       {
+                               $$ = tcpopt_expr_alloc(&@$, $3, 0, $4);
+                       }
+                       |       TCP     OPTION  STRING  NUM     tcp_option_field
+                       {
+                               if (strcmp($3, "sack")) {
+                                       erec_queue(error(&@2, "tcp: number (%d) can only be used with sack option", $4), state->msgs);
+                                       YYERROR;
+                               }
+
+                               if ($4 > 3) {
+                                       erec_queue(error(&@2, "tcp: option block (%d) too large (0-3)", $4), state->msgs);
+                                       YYERROR;
+                               }
+                               $$ = tcpopt_expr_alloc(&@$, $3, $4, $5);
+                       }
+                       ;
 %%
index d0d25ea946009ee724b022af30fa9060a4057b3c..922d8ec82fc12158e60787fe52b33d18dd8f981a 100644 (file)
@@ -411,6 +411,7 @@ addrstring  ({macaddr}|{ip4addr}|{ip6addr})
 "doff"                 { return DOFF; }
 "window"               { return WINDOW; }
 "urgptr"               { return URGPTR; }
+"option"               { return OPTION; }
 
 "dccp"                 { return DCCP; }
 
diff --git a/src/tcpopt.c b/src/tcpopt.c
new file mode 100644 (file)
index 0000000..e6f92bc
--- /dev/null
@@ -0,0 +1,269 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+
+#include <utils.h>
+#include <headers.h>
+#include <expression.h>
+#include <tcpopt.h>
+
+/* We do not need to export these enums, because the tcpopts are parsed at
+ * runtime and not by bison.
+ */
+enum tcpopt_eol_hdr_fields {
+       TCPOPT_EOLHDR_KIND,
+};
+
+enum tcpopt_nop_hdr_fields {
+       TCPOPT_NOPHDR_KIND,
+};
+
+enum tcpopt_maxseg_hdr_fields {
+       TCPOPT_MAXSEGHDR_KIND,
+       TCPOPT_MAXSEGHDR_LENGTH,
+       TCPOPT_MAXSEGHDR_SIZE,
+};
+
+enum tcpopt_window_hdr_fields {
+       TCPOPT_WINDOWHDR_KIND,
+       TCPOPT_WINDOWHDR_LENGTH,
+       TCPOPT_WINDOWHDR_COUNT,
+};
+
+enum tcpopt_sack_permitted_hdr_fields {
+       TCPOPT_SACKPERMHDR_KIND,
+       TCPOPT_SACKPERMHDR_LENGTH,
+};
+
+enum tcpopt_sack_hdr_fields {
+       TCPOPT_SACKHDR_KIND,
+       TCPOPT_SACKHDR_LENGTH,
+       TCPOPT_SACKHDR_LEFT,
+       TCPOPT_SACKHDR_RIGHT,
+};
+
+enum tcpopt_timestamp_hdr_fields {
+       TCPOPT_TIMESTAMPSHDR_KIND,
+       TCPOPT_TIMESTAMPSHDR_LENGTH,
+       TCPOPT_TIMESTAMPSHDR_TSVAL,
+       TCPOPT_TIMESTAMPSHDR_TSECR,
+};
+
+static const struct proto_hdr_template tcpopt_unknown_template =
+       PROTO_HDR_TEMPLATE("unknown", &invalid_type, BYTEORDER_INVALID, 0, 0);
+
+#define PHT(__token, __offset, __len) \
+       PROTO_HDR_TEMPLATE(__token, &integer_type, BYTEORDER_BIG_ENDIAN, \
+                          __offset, __len)
+const struct exthdr_desc tcpopt_eol = {
+       .name           = "eol",
+       .type           = TCPOPT_EOL,
+       .templates      = {
+               [TCPOPT_EOLHDR_KIND]            = PHT("kind",  0,    8),
+       },
+};
+
+const struct exthdr_desc tcpopt_nop = {
+       .name           = "noop",
+       .type           = TCPOPT_NOP,
+       .templates      = {
+               [TCPOPT_NOPHDR_KIND]            = PHT("kind",   0,   8),
+       },
+};
+
+const struct exthdr_desc tcptopt_maxseg = {
+       .name           = "maxseg",
+       .type           = TCPOPT_MAXSEG,
+       .templates      = {
+               [TCPOPT_MAXSEGHDR_KIND]         = PHT("kind",   0,  8),
+               [TCPOPT_MAXSEGHDR_LENGTH]       = PHT("length", 8,  8),
+               [TCPOPT_MAXSEGHDR_SIZE]         = PHT("size",  16, 16),
+       },
+};
+
+const struct exthdr_desc tcpopt_window = {
+       .name           = "window",
+       .type           = TCPOPT_WINDOW,
+       .templates      = {
+               [TCPOPT_WINDOWHDR_KIND]         = PHT("kind",   0,  8),
+               [TCPOPT_WINDOWHDR_LENGTH]       = PHT("length", 8,  8),
+               [TCPOPT_WINDOWHDR_COUNT]        = PHT("count", 16,  8),
+       },
+};
+
+const struct exthdr_desc tcpopt_sack_permitted = {
+       .name           = "sack_permitted",
+       .type           = TCPOPT_SACK_PERMITTED,
+       .templates      = {
+               [TCPOPT_SACKPERMHDR_KIND]       = PHT("kind",   0, 8),
+               [TCPOPT_SACKPERMHDR_LENGTH]     = PHT("length", 8, 8),
+       },
+};
+
+const struct exthdr_desc tcpopt_sack = {
+       .name           = "sack",
+       .type           = TCPOPT_SACK,
+       .templates      = {
+               [TCPOPT_SACKHDR_KIND]           = PHT("kind",   0,   8),
+               [TCPOPT_SACKHDR_LENGTH]         = PHT("length", 8,   8),
+               [TCPOPT_SACKHDR_LEFT]           = PHT("left",  16,  32),
+               [TCPOPT_SACKHDR_RIGHT]          = PHT("right", 48,  32),
+       },
+};
+
+const struct exthdr_desc tcpopt_timestamp = {
+       .name           = "timestamp",
+       .type           = TCPOPT_TIMESTAMP,
+       .templates      = {
+               [TCPOPT_TIMESTAMPSHDR_KIND]     = PHT("kind",   0,  8),
+               [TCPOPT_TIMESTAMPSHDR_LENGTH]   = PHT("length", 8,  8),
+               [TCPOPT_TIMESTAMPSHDR_TSVAL]    = PHT("tsval",  16, 32),
+               [TCPOPT_TIMESTAMPSHDR_TSECR]    = PHT("tsecr",  48, 32),
+       },
+};
+#undef PHT
+
+#define TCPOPT_OBSOLETE ((struct exthdr_desc *)NULL)
+#define TCPOPT_ECHO 6
+#define TCPOPT_ECHO_REPLY 7
+const struct exthdr_desc *tcpopt_protocols[] = {
+       [TCPOPT_EOL]            = &tcpopt_eol,
+       [TCPOPT_NOP]            = &tcpopt_nop,
+       [TCPOPT_MAXSEG]         = &tcptopt_maxseg,
+       [TCPOPT_WINDOW]         = &tcpopt_window,
+       [TCPOPT_SACK_PERMITTED] = &tcpopt_sack_permitted,
+       [TCPOPT_SACK]           = &tcpopt_sack,
+       [TCPOPT_ECHO]           = TCPOPT_OBSOLETE,
+       [TCPOPT_ECHO_REPLY]     = TCPOPT_OBSOLETE,
+       [TCPOPT_TIMESTAMP]      = &tcpopt_timestamp,
+};
+
+static unsigned int calc_offset(const struct exthdr_desc *desc,
+                               const struct proto_hdr_template *tmpl,
+                               unsigned int num)
+{
+       if (!desc || tmpl == &tcpopt_unknown_template)
+               return 0;
+
+       switch (desc->type) {
+       case TCPOPT_SACK:
+               /* Make sure, offset calculations only apply to left and right
+                * fields
+                */
+               return (tmpl->offset < 16) ? 0 : num * 64;
+       default:
+               return 0;
+       }
+}
+
+
+static unsigned int calc_offset_reverse(const struct exthdr_desc *desc,
+                                       const struct proto_hdr_template *tmpl,
+                                       unsigned int offset)
+{
+       if (!desc || tmpl == &tcpopt_unknown_template)
+               return offset;
+
+       switch (desc->type) {
+       case TCPOPT_SACK:
+               /* We can safely ignore the first left/right field */
+               return offset < 80 ? offset : (offset % 64);
+       default:
+               return offset;
+       }
+}
+
+
+struct expr *tcpopt_expr_alloc(const struct location *loc,
+                              const char *option_str,
+                              const unsigned int option_num,
+                              const char *option_field)
+{
+       const struct proto_hdr_template *tmp, *tmpl = &tcpopt_unknown_template;
+       const struct exthdr_desc *desc = NULL;
+       struct expr *expr;
+       unsigned int i, j;
+
+       for (i = 0; i < array_size(tcpopt_protocols); ++i) {
+               if (tcpopt_protocols[i] == TCPOPT_OBSOLETE)
+                       continue;
+
+               if (!tcpopt_protocols[i]->name ||
+                   strcmp(option_str, tcpopt_protocols[i]->name))
+                       continue;
+
+               for (j = 0; j < array_size(tcpopt_protocols[i]->templates); ++j) {
+                       tmp = &tcpopt_protocols[i]->templates[j];
+                       if (!tmp->token || strcmp(option_field, tmp->token))
+                               continue;
+
+                       desc = tcpopt_protocols[i];
+                       tmpl = tmp;
+                       goto found;
+               }
+       }
+
+found:
+       /* tmpl still points to tcpopt_unknown_template if nothing was found and
+        * desc is null
+        */
+       expr = expr_alloc(loc, &exthdr_expr_ops, tmpl->dtype,
+                         BYTEORDER_BIG_ENDIAN, tmpl->len);
+       expr->exthdr.desc   = desc;
+       expr->exthdr.tmpl   = tmpl;
+       expr->exthdr.op     = NFT_EXTHDR_OP_TCPOPT;
+       expr->exthdr.offset = calc_offset(desc, tmpl, option_num);
+
+       return expr;
+}
+
+void tcpopt_init_raw(struct expr *expr, uint8_t type, unsigned int offset,
+                    unsigned int len)
+{
+       const struct proto_hdr_template *tmpl;
+       unsigned int i, off;
+
+       assert(expr->ops->type == EXPR_EXTHDR);
+
+       expr->len = len;
+       expr->exthdr.offset = offset;
+
+       assert(type < array_size(tcpopt_protocols));
+       expr->exthdr.desc = tcpopt_protocols[type];
+       assert(expr->exthdr.desc != TCPOPT_OBSOLETE);
+
+       for (i = 0; i < array_size(expr->exthdr.desc->templates); ++i) {
+               tmpl = &expr->exthdr.desc->templates[i];
+               /* We have to reverse calculate the offset for the sack options
+                * at this point
+                */
+               off = calc_offset_reverse(expr->exthdr.desc, tmpl, offset);
+               if (tmpl->offset != off || tmpl->len != len)
+                       continue;
+
+               expr->dtype       = tmpl->dtype;
+               expr->exthdr.tmpl = tmpl;
+               expr->exthdr.op   = NFT_EXTHDR_OP_TCPOPT;
+               break;
+       }
+}
+
+bool tcpopt_find_template(struct expr *expr, const struct expr *mask,
+                         unsigned int *shift)
+{
+       if (expr->exthdr.tmpl != &tcpopt_unknown_template)
+               return false;
+
+       tcpopt_init_raw(expr, expr->exthdr.desc->type, expr->exthdr.offset,
+                       expr->len);
+
+       if (expr->exthdr.tmpl == &tcpopt_unknown_template)
+               return false;
+
+       return true;
+}