1 // SPDX-License-Identifier: GPL-2.0-only
3 /* PIPAPO: PIle PAcket POlicies: AVX2 packet lookup routines
5 * Copyright (c) 2019-2020 Red Hat GmbH
7 * Author: Stefano Brivio <sbrivio@redhat.com>
10 #include <linux/kernel.h>
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/netlink.h>
14 #include <linux/netfilter.h>
15 #include <linux/netfilter/nf_tables.h>
16 #include <net/netfilter/nf_tables_core.h>
17 #include <uapi/linux/netfilter/nf_tables.h>
18 #include <linux/bitmap.h>
19 #include <linux/bitops.h>
21 #include <linux/compiler.h>
22 #include <asm/fpu/api.h>
24 #include "nft_set_pipapo_avx2.h"
25 #include "nft_set_pipapo.h"
27 #define NFT_PIPAPO_LONGS_PER_M256 (XSAVE_YMM_SIZE / BITS_PER_LONG)
29 /* Load from memory into YMM register with non-temporal hint ("stream load"),
30 * that is, don't fetch lines from memory into the cache. This avoids pushing
31 * precious packet data out of the cache hierarchy, and is appropriate when:
33 * - loading buckets from lookup tables, as they are not going to be used
34 * again before packets are entirely classified
36 * - loading the result bitmap from the previous field, as it's never used
39 #define NFT_PIPAPO_AVX2_LOAD(reg, loc) \
40 asm volatile("vmovntdqa %0, %%ymm" #reg : : "m" (loc))
42 /* Stream a single lookup table bucket into YMM register given lookup table,
43 * group index, value of packet bits, bucket size.
45 #define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize) \
46 NFT_PIPAPO_AVX2_LOAD(reg, \
47 lt[((group) * NFT_PIPAPO_BUCKETS(4) + \
49 #define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize) \
50 NFT_PIPAPO_AVX2_LOAD(reg, \
51 lt[((group) * NFT_PIPAPO_BUCKETS(8) + \
54 /* Bitwise AND: the staple operation of this algorithm */
55 #define NFT_PIPAPO_AVX2_AND(dst, a, b) \
56 asm volatile("vpand %ymm" #a ", %ymm" #b ", %ymm" #dst)
58 /* Jump to label if @reg is zero */
59 #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
60 asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
61 "je %l[" #label "]" : : : : label)
63 /* Store 256 bits from YMM register into memory. Contrary to bucket load
64 * operation, we don't bypass the cache here, as stored matching results
65 * are always used shortly after.
67 #define NFT_PIPAPO_AVX2_STORE(loc, reg) \
68 asm volatile("vmovdqa %%ymm" #reg ", %0" : "=m" (loc))
70 /* Zero out a complete YMM register, @reg */
71 #define NFT_PIPAPO_AVX2_ZERO(reg) \
72 asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
74 /* Current working bitmap index, toggled between field matches */
75 static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index
);
78 * nft_pipapo_avx2_prepare() - Prepare before main algorithm body
80 * This zeroes out ymm15, which is later used whenever we need to clear a
81 * memory location, by storing its content into memory.
83 static void nft_pipapo_avx2_prepare(void)
85 NFT_PIPAPO_AVX2_ZERO(15);
89 * nft_pipapo_avx2_fill() - Fill a bitmap region with ones
90 * @data: Base memory area
91 * @start: First bit to set
92 * @len: Count of bits to fill
94 * This is nothing else than a version of bitmap_set(), as used e.g. by
95 * pipapo_refill(), tailored for the microarchitectures using it and better
96 * suited for the specific usage: it's very likely that we'll set a small number
97 * of bits, not crossing a word boundary, and correct branch prediction is
100 * This function doesn't actually use any AVX2 instruction.
102 static void nft_pipapo_avx2_fill(unsigned long *data
, int start
, int len
)
104 int offset
= start
% BITS_PER_LONG
;
107 data
+= start
/ BITS_PER_LONG
;
109 if (likely(len
== 1)) {
110 *data
|= BIT(offset
);
114 if (likely(len
< BITS_PER_LONG
|| offset
)) {
115 if (likely(len
+ offset
<= BITS_PER_LONG
)) {
116 *data
|= GENMASK(len
- 1 + offset
, offset
);
120 *data
|= ~0UL << offset
;
121 len
-= BITS_PER_LONG
- offset
;
124 if (len
<= BITS_PER_LONG
) {
125 mask
= ~0UL >> (BITS_PER_LONG
- len
);
131 memset(data
, 0xff, len
/ BITS_PER_BYTE
);
132 data
+= len
/ BITS_PER_LONG
;
134 len
%= BITS_PER_LONG
;
136 *data
|= ~0UL >> (BITS_PER_LONG
- len
);
140 * nft_pipapo_avx2_refill() - Scan bitmap, select mapping table item, set bits
141 * @offset: Start from given bitmap (equivalent to bucket) offset, in longs
142 * @map: Bitmap to be scanned for set bits
143 * @dst: Destination bitmap
144 * @mt: Mapping table containing bit set specifiers
145 * @last: Return index of first set bit, if this is the last field
147 * This is an alternative implementation of pipapo_refill() suitable for usage
148 * with AVX2 lookup routines: we know there are four words to be scanned, at
149 * a given offset inside the map, for each matching iteration.
151 * This function doesn't actually use any AVX2 instruction.
153 * Return: first set bit index if @last, index of first filled word otherwise.
155 static int nft_pipapo_avx2_refill(int offset
, unsigned long *map
,
157 union nft_pipapo_map_bucket
*mt
, bool last
)
161 #define NFT_PIPAPO_AVX2_REFILL_ONE_WORD(x) \
164 int r = __builtin_ctzl(map[(x)]); \
165 int i = (offset + (x)) * BITS_PER_LONG + r; \
170 nft_pipapo_avx2_fill(dst, mt[i].to, mt[i].n); \
175 map[(x)] &= ~(1UL << r); \
179 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(0);
180 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(1);
181 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(2);
182 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(3);
183 #undef NFT_PIPAPO_AVX2_REFILL_ONE_WORD
189 * nft_pipapo_avx2_lookup_4b_2() - AVX2-based lookup for 2 four-bit groups
190 * @map: Previous match result, used as initial bitmap
191 * @fill: Destination bitmap to be filled with current match result
192 * @f: Field, containing lookup and mapping tables
193 * @offset: Ignore buckets before the given index, no bits are filled there
194 * @pkt: Packet data, pointer to input nftables register
195 * @first: If this is the first field, don't source previous result
196 * @last: Last field: stop at the first match and return bit index
198 * Load buckets from lookup table corresponding to the values of each 4-bit
199 * group of packet bytes, and perform a bitwise intersection between them. If
200 * this is the first field in the set, simply AND the buckets together
201 * (equivalent to using an all-ones starting bitmap), use the provided starting
202 * bitmap otherwise. Then call nft_pipapo_avx2_refill() to generate the next
203 * working bitmap, @fill.
205 * This is used for 8-bit fields (i.e. protocol numbers).
207 * Out-of-order (and superscalar) execution is vital here, so it's critical to
208 * avoid false data dependencies. CPU and compiler could (mostly) take care of
209 * this on their own, but the operation ordering is explicitly given here with
210 * a likely execution order in mind, to highlight possible stalls. That's why
211 * a number of logically distinct operations (i.e. loading buckets, intersecting
212 * buckets) are interleaved.
214 * Return: -1 on no match, rule index of match if @last, otherwise first long
215 * word index to be checked next (i.e. first filled word).
217 static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map
, unsigned long *fill
,
218 struct nft_pipapo_field
*f
, int offset
,
219 const u8
*pkt
, bool first
, bool last
)
221 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
222 u8 pg
[2] = { pkt
[0] >> 4, pkt
[0] & 0xf };
223 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
225 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
226 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
227 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
230 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
231 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
232 NFT_PIPAPO_AVX2_AND(4, 0, 1);
234 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
235 NFT_PIPAPO_AVX2_LOAD(2, map
[i_ul
]);
236 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
237 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nothing
);
238 NFT_PIPAPO_AVX2_AND(3, 0, 1);
239 NFT_PIPAPO_AVX2_AND(4, 2, 3);
242 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch
);
243 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 4);
245 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
249 if (unlikely(ret
== -1))
250 ret
= b
/ XSAVE_YMM_SIZE
;
254 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
263 * nft_pipapo_avx2_lookup_4b_4() - AVX2-based lookup for 4 four-bit groups
264 * @map: Previous match result, used as initial bitmap
265 * @fill: Destination bitmap to be filled with current match result
266 * @f: Field, containing lookup and mapping tables
267 * @offset: Ignore buckets before the given index, no bits are filled there
268 * @pkt: Packet data, pointer to input nftables register
269 * @first: If this is the first field, don't source previous result
270 * @last: Last field: stop at the first match and return bit index
272 * See nft_pipapo_avx2_lookup_4b_2().
274 * This is used for 16-bit fields (i.e. ports).
276 * Return: -1 on no match, rule index of match if @last, otherwise first long
277 * word index to be checked next (i.e. first filled word).
279 static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map
, unsigned long *fill
,
280 struct nft_pipapo_field
*f
, int offset
,
281 const u8
*pkt
, bool first
, bool last
)
283 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
284 u8 pg
[4] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf };
285 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
287 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
288 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
289 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
292 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
293 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
294 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 2, pg
[2], bsize
);
295 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 3, pg
[3], bsize
);
296 NFT_PIPAPO_AVX2_AND(4, 0, 1);
297 NFT_PIPAPO_AVX2_AND(5, 2, 3);
298 NFT_PIPAPO_AVX2_AND(7, 4, 5);
300 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
302 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
304 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
305 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
306 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
307 NFT_PIPAPO_AVX2_AND(5, 0, 1);
309 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
311 NFT_PIPAPO_AVX2_AND(6, 2, 3);
312 NFT_PIPAPO_AVX2_AND(7, 4, 5);
314 NFT_PIPAPO_AVX2_AND(7, 6, 7);
318 NFT_PIPAPO_AVX2_NOMATCH_GOTO(7, nomatch
);
319 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 7);
321 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
325 if (unlikely(ret
== -1))
326 ret
= b
/ XSAVE_YMM_SIZE
;
330 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
339 * nft_pipapo_avx2_lookup_4b_8() - AVX2-based lookup for 8 four-bit groups
340 * @map: Previous match result, used as initial bitmap
341 * @fill: Destination bitmap to be filled with current match result
342 * @f: Field, containing lookup and mapping tables
343 * @offset: Ignore buckets before the given index, no bits are filled there
344 * @pkt: Packet data, pointer to input nftables register
345 * @first: If this is the first field, don't source previous result
346 * @last: Last field: stop at the first match and return bit index
348 * See nft_pipapo_avx2_lookup_4b_2().
350 * This is used for 32-bit fields (i.e. IPv4 addresses).
352 * Return: -1 on no match, rule index of match if @last, otherwise first long
353 * word index to be checked next (i.e. first filled word).
355 static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map
, unsigned long *fill
,
356 struct nft_pipapo_field
*f
, int offset
,
357 const u8
*pkt
, bool first
, bool last
)
359 u8 pg
[8] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf,
360 pkt
[2] >> 4, pkt
[2] & 0xf, pkt
[3] >> 4, pkt
[3] & 0xf,
362 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
363 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
365 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
366 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
367 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
370 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
371 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
372 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 2, pg
[2], bsize
);
373 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 3, pg
[3], bsize
);
374 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 4, pg
[4], bsize
);
375 NFT_PIPAPO_AVX2_AND(5, 0, 1);
376 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 5, pg
[5], bsize
);
377 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 6, pg
[6], bsize
);
378 NFT_PIPAPO_AVX2_AND(8, 2, 3);
379 NFT_PIPAPO_AVX2_AND(9, 4, 5);
380 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 7, pg
[7], bsize
);
381 NFT_PIPAPO_AVX2_AND(11, 6, 7);
382 NFT_PIPAPO_AVX2_AND(12, 8, 9);
383 NFT_PIPAPO_AVX2_AND(13, 10, 11);
386 NFT_PIPAPO_AVX2_AND(1, 12, 13);
388 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
389 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
390 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
391 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
392 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
394 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
396 NFT_PIPAPO_AVX2_AND(5, 0, 1);
397 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 4, pg
[4], bsize
);
398 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 5, pg
[5], bsize
);
399 NFT_PIPAPO_AVX2_AND(8, 2, 3);
400 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt
, 6, pg
[6], bsize
);
401 NFT_PIPAPO_AVX2_AND(10, 4, 5);
402 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt
, 7, pg
[7], bsize
);
403 NFT_PIPAPO_AVX2_AND(12, 6, 7);
404 NFT_PIPAPO_AVX2_AND(13, 8, 9);
405 NFT_PIPAPO_AVX2_AND(14, 10, 11);
408 NFT_PIPAPO_AVX2_AND(1, 12, 13);
409 NFT_PIPAPO_AVX2_AND(1, 1, 14);
412 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nomatch
);
413 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 1);
415 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
419 if (unlikely(ret
== -1))
420 ret
= b
/ XSAVE_YMM_SIZE
;
425 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
434 * nft_pipapo_avx2_lookup_4b_12() - AVX2-based lookup for 12 four-bit groups
435 * @map: Previous match result, used as initial bitmap
436 * @fill: Destination bitmap to be filled with current match result
437 * @f: Field, containing lookup and mapping tables
438 * @offset: Ignore buckets before the given index, no bits are filled there
439 * @pkt: Packet data, pointer to input nftables register
440 * @first: If this is the first field, don't source previous result
441 * @last: Last field: stop at the first match and return bit index
443 * See nft_pipapo_avx2_lookup_4b_2().
445 * This is used for 48-bit fields (i.e. MAC addresses/EUI-48).
447 * Return: -1 on no match, rule index of match if @last, otherwise first long
448 * word index to be checked next (i.e. first filled word).
450 static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map
, unsigned long *fill
,
451 struct nft_pipapo_field
*f
, int offset
,
452 const u8
*pkt
, bool first
, bool last
)
454 u8 pg
[12] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf,
455 pkt
[2] >> 4, pkt
[2] & 0xf, pkt
[3] >> 4, pkt
[3] & 0xf,
456 pkt
[4] >> 4, pkt
[4] & 0xf, pkt
[5] >> 4, pkt
[5] & 0xf,
458 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
459 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
461 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
462 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
463 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
466 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
468 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 0, pg
[0], bsize
);
469 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
470 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
473 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
474 NFT_PIPAPO_AVX2_AND(1, 1, 0);
477 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
478 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 4, pg
[4], bsize
);
479 NFT_PIPAPO_AVX2_AND(6, 2, 3);
480 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 5, pg
[5], bsize
);
481 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 6, pg
[6], bsize
);
482 NFT_PIPAPO_AVX2_AND(9, 1, 4);
483 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 7, pg
[7], bsize
);
484 NFT_PIPAPO_AVX2_AND(11, 5, 6);
485 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 8, pg
[8], bsize
);
486 NFT_PIPAPO_AVX2_AND(13, 7, 8);
487 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt
, 9, pg
[9], bsize
);
489 NFT_PIPAPO_AVX2_AND(0, 9, 10);
490 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 10, pg
[10], bsize
);
491 NFT_PIPAPO_AVX2_AND(2, 11, 12);
492 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 11, pg
[11], bsize
);
493 NFT_PIPAPO_AVX2_AND(4, 13, 14);
494 NFT_PIPAPO_AVX2_AND(5, 0, 1);
496 NFT_PIPAPO_AVX2_AND(6, 2, 3);
499 NFT_PIPAPO_AVX2_AND(7, 4, 5);
500 NFT_PIPAPO_AVX2_AND(8, 6, 7);
502 NFT_PIPAPO_AVX2_NOMATCH_GOTO(8, nomatch
);
503 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 8);
505 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
509 if (unlikely(ret
== -1))
510 ret
= b
/ XSAVE_YMM_SIZE
;
514 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
523 * nft_pipapo_avx2_lookup_4b_32() - AVX2-based lookup for 32 four-bit groups
524 * @map: Previous match result, used as initial bitmap
525 * @fill: Destination bitmap to be filled with current match result
526 * @f: Field, containing lookup and mapping tables
527 * @offset: Ignore buckets before the given index, no bits are filled there
528 * @pkt: Packet data, pointer to input nftables register
529 * @first: If this is the first field, don't source previous result
530 * @last: Last field: stop at the first match and return bit index
532 * See nft_pipapo_avx2_lookup_4b_2().
534 * This is used for 128-bit fields (i.e. IPv6 addresses).
536 * Return: -1 on no match, rule index of match if @last, otherwise first long
537 * word index to be checked next (i.e. first filled word).
539 static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map
, unsigned long *fill
,
540 struct nft_pipapo_field
*f
, int offset
,
541 const u8
*pkt
, bool first
, bool last
)
543 u8 pg
[32] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf,
544 pkt
[2] >> 4, pkt
[2] & 0xf, pkt
[3] >> 4, pkt
[3] & 0xf,
545 pkt
[4] >> 4, pkt
[4] & 0xf, pkt
[5] >> 4, pkt
[5] & 0xf,
546 pkt
[6] >> 4, pkt
[6] & 0xf, pkt
[7] >> 4, pkt
[7] & 0xf,
547 pkt
[8] >> 4, pkt
[8] & 0xf, pkt
[9] >> 4, pkt
[9] & 0xf,
548 pkt
[10] >> 4, pkt
[10] & 0xf, pkt
[11] >> 4, pkt
[11] & 0xf,
549 pkt
[12] >> 4, pkt
[12] & 0xf, pkt
[13] >> 4, pkt
[13] & 0xf,
550 pkt
[14] >> 4, pkt
[14] & 0xf, pkt
[15] >> 4, pkt
[15] & 0xf,
552 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
553 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
555 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
556 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
557 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
560 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
562 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 0, pg
[0], bsize
);
563 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
564 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
565 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
567 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
568 NFT_PIPAPO_AVX2_AND(1, 1, 0);
571 NFT_PIPAPO_AVX2_AND(5, 2, 3);
572 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 4, pg
[4], bsize
);
573 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 5, pg
[5], bsize
);
574 NFT_PIPAPO_AVX2_AND(8, 1, 4);
575 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt
, 6, pg
[6], bsize
);
576 NFT_PIPAPO_AVX2_AND(10, 5, 6);
577 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt
, 7, pg
[7], bsize
);
578 NFT_PIPAPO_AVX2_AND(12, 7, 8);
579 NFT_PIPAPO_AVX2_BUCKET_LOAD4(13, lt
, 8, pg
[8], bsize
);
580 NFT_PIPAPO_AVX2_AND(14, 9, 10);
582 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 9, pg
[9], bsize
);
583 NFT_PIPAPO_AVX2_AND(1, 11, 12);
584 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 10, pg
[10], bsize
);
585 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 11, pg
[11], bsize
);
586 NFT_PIPAPO_AVX2_AND(4, 13, 14);
587 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 12, pg
[12], bsize
);
588 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 13, pg
[13], bsize
);
589 NFT_PIPAPO_AVX2_AND(7, 0, 1);
590 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 14, pg
[14], bsize
);
591 NFT_PIPAPO_AVX2_AND(9, 2, 3);
592 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 15, pg
[15], bsize
);
593 NFT_PIPAPO_AVX2_AND(11, 4, 5);
594 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 16, pg
[16], bsize
);
595 NFT_PIPAPO_AVX2_AND(13, 6, 7);
596 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt
, 17, pg
[17], bsize
);
598 NFT_PIPAPO_AVX2_AND(0, 8, 9);
599 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 18, pg
[18], bsize
);
600 NFT_PIPAPO_AVX2_AND(2, 10, 11);
601 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 19, pg
[19], bsize
);
602 NFT_PIPAPO_AVX2_AND(4, 12, 13);
603 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 20, pg
[20], bsize
);
604 NFT_PIPAPO_AVX2_AND(6, 14, 0);
605 NFT_PIPAPO_AVX2_AND(7, 1, 2);
606 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 21, pg
[21], bsize
);
607 NFT_PIPAPO_AVX2_AND(9, 3, 4);
608 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 22, pg
[22], bsize
);
609 NFT_PIPAPO_AVX2_AND(11, 5, 6);
610 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 23, pg
[23], bsize
);
611 NFT_PIPAPO_AVX2_AND(13, 7, 8);
613 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt
, 24, pg
[24], bsize
);
614 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 25, pg
[25], bsize
);
615 NFT_PIPAPO_AVX2_AND(1, 9, 10);
616 NFT_PIPAPO_AVX2_AND(2, 11, 12);
617 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 26, pg
[26], bsize
);
618 NFT_PIPAPO_AVX2_AND(4, 13, 14);
619 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 27, pg
[27], bsize
);
620 NFT_PIPAPO_AVX2_AND(6, 0, 1);
621 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 28, pg
[28], bsize
);
622 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 29, pg
[29], bsize
);
623 NFT_PIPAPO_AVX2_AND(9, 2, 3);
624 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 30, pg
[30], bsize
);
625 NFT_PIPAPO_AVX2_AND(11, 4, 5);
626 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 31, pg
[31], bsize
);
628 NFT_PIPAPO_AVX2_AND(0, 6, 7);
629 NFT_PIPAPO_AVX2_AND(1, 8, 9);
630 NFT_PIPAPO_AVX2_AND(2, 10, 11);
631 NFT_PIPAPO_AVX2_AND(3, 12, 0);
634 NFT_PIPAPO_AVX2_AND(4, 1, 2);
635 NFT_PIPAPO_AVX2_AND(5, 3, 4);
637 NFT_PIPAPO_AVX2_NOMATCH_GOTO(5, nomatch
);
638 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 5);
640 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
644 if (unlikely(ret
== -1))
645 ret
= b
/ XSAVE_YMM_SIZE
;
649 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
658 * nft_pipapo_avx2_lookup_8b_1() - AVX2-based lookup for one eight-bit group
659 * @map: Previous match result, used as initial bitmap
660 * @fill: Destination bitmap to be filled with current match result
661 * @f: Field, containing lookup and mapping tables
662 * @offset: Ignore buckets before the given index, no bits are filled there
663 * @pkt: Packet data, pointer to input nftables register
664 * @first: If this is the first field, don't source previous result
665 * @last: Last field: stop at the first match and return bit index
667 * See nft_pipapo_avx2_lookup_4b_2().
669 * This is used for 8-bit fields (i.e. protocol numbers).
671 * Return: -1 on no match, rule index of match if @last, otherwise first long
672 * word index to be checked next (i.e. first filled word).
674 static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map
, unsigned long *fill
,
675 struct nft_pipapo_field
*f
, int offset
,
676 const u8
*pkt
, bool first
, bool last
)
678 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
679 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
681 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
682 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
683 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
686 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 0, pkt
[0], bsize
);
688 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
689 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
690 NFT_PIPAPO_AVX2_AND(2, 0, 1);
691 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
694 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nomatch
);
695 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 2);
697 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
701 if (unlikely(ret
== -1))
702 ret
= b
/ XSAVE_YMM_SIZE
;
706 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
715 * nft_pipapo_avx2_lookup_8b_2() - AVX2-based lookup for 2 eight-bit groups
716 * @map: Previous match result, used as initial bitmap
717 * @fill: Destination bitmap to be filled with current match result
718 * @f: Field, containing lookup and mapping tables
719 * @offset: Ignore buckets before the given index, no bits are filled there
720 * @pkt: Packet data, pointer to input nftables register
721 * @first: If this is the first field, don't source previous result
722 * @last: Last field: stop at the first match and return bit index
724 * See nft_pipapo_avx2_lookup_4b_2().
726 * This is used for 16-bit fields (i.e. ports).
728 * Return: -1 on no match, rule index of match if @last, otherwise first long
729 * word index to be checked next (i.e. first filled word).
731 static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map
, unsigned long *fill
,
732 struct nft_pipapo_field
*f
, int offset
,
733 const u8
*pkt
, bool first
, bool last
)
735 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
736 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
738 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
739 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
740 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
743 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
744 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 1, pkt
[1], bsize
);
745 NFT_PIPAPO_AVX2_AND(4, 0, 1);
747 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
748 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 0, pkt
[0], bsize
);
749 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
752 NFT_PIPAPO_AVX2_AND(3, 0, 1);
753 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
754 NFT_PIPAPO_AVX2_AND(4, 3, 2);
758 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch
);
759 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 4);
761 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
765 if (unlikely(ret
== -1))
766 ret
= b
/ XSAVE_YMM_SIZE
;
770 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
779 * nft_pipapo_avx2_lookup_8b_4() - AVX2-based lookup for 4 eight-bit groups
780 * @map: Previous match result, used as initial bitmap
781 * @fill: Destination bitmap to be filled with current match result
782 * @f: Field, containing lookup and mapping tables
783 * @offset: Ignore buckets before the given index, no bits are filled there
784 * @pkt: Packet data, pointer to input nftables register
785 * @first: If this is the first field, don't source previous result
786 * @last: Last field: stop at the first match and return bit index
788 * See nft_pipapo_avx2_lookup_4b_2().
790 * This is used for 32-bit fields (i.e. IPv4 addresses).
792 * Return: -1 on no match, rule index of match if @last, otherwise first long
793 * word index to be checked next (i.e. first filled word).
795 static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map
, unsigned long *fill
,
796 struct nft_pipapo_field
*f
, int offset
,
797 const u8
*pkt
, bool first
, bool last
)
799 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
800 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
802 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
803 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
804 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
807 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
808 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 1, pkt
[1], bsize
);
809 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 2, pkt
[2], bsize
);
810 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 3, pkt
[3], bsize
);
813 NFT_PIPAPO_AVX2_AND(4, 0, 1);
814 NFT_PIPAPO_AVX2_AND(5, 2, 3);
815 NFT_PIPAPO_AVX2_AND(0, 4, 5);
817 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
818 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
819 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
820 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 2, pkt
[2], bsize
);
821 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 3, pkt
[3], bsize
);
823 NFT_PIPAPO_AVX2_AND(5, 0, 1);
824 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
825 NFT_PIPAPO_AVX2_AND(6, 2, 3);
828 NFT_PIPAPO_AVX2_AND(7, 4, 5);
829 NFT_PIPAPO_AVX2_AND(0, 6, 7);
832 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nomatch
);
833 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 0);
835 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
839 if (unlikely(ret
== -1))
840 ret
= b
/ XSAVE_YMM_SIZE
;
845 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
854 * nft_pipapo_avx2_lookup_8b_6() - AVX2-based lookup for 6 eight-bit groups
855 * @map: Previous match result, used as initial bitmap
856 * @fill: Destination bitmap to be filled with current match result
857 * @f: Field, containing lookup and mapping tables
858 * @offset: Ignore buckets before the given index, no bits are filled there
859 * @pkt: Packet data, pointer to input nftables register
860 * @first: If this is the first field, don't source previous result
861 * @last: Last field: stop at the first match and return bit index
863 * See nft_pipapo_avx2_lookup_4b_2().
865 * This is used for 48-bit fields (i.e. MAC addresses/EUI-48).
867 * Return: -1 on no match, rule index of match if @last, otherwise first long
868 * word index to be checked next (i.e. first filled word).
870 static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map
, unsigned long *fill
,
871 struct nft_pipapo_field
*f
, int offset
,
872 const u8
*pkt
, bool first
, bool last
)
874 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
875 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
877 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
878 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
879 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
882 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
883 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 1, pkt
[1], bsize
);
884 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 2, pkt
[2], bsize
);
885 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 3, pkt
[3], bsize
);
886 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 4, pkt
[4], bsize
);
888 NFT_PIPAPO_AVX2_AND(5, 0, 1);
889 NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt
, 5, pkt
[5], bsize
);
890 NFT_PIPAPO_AVX2_AND(7, 2, 3);
893 NFT_PIPAPO_AVX2_AND(0, 4, 5);
894 NFT_PIPAPO_AVX2_AND(1, 6, 7);
895 NFT_PIPAPO_AVX2_AND(4, 0, 1);
897 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
898 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
899 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
900 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 2, pkt
[2], bsize
);
901 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 3, pkt
[3], bsize
);
903 NFT_PIPAPO_AVX2_AND(5, 0, 1);
904 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
906 NFT_PIPAPO_AVX2_AND(6, 2, 3);
907 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 4, pkt
[4], bsize
);
908 NFT_PIPAPO_AVX2_AND(0, 4, 5);
909 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 5, pkt
[5], bsize
);
910 NFT_PIPAPO_AVX2_AND(2, 6, 7);
913 NFT_PIPAPO_AVX2_AND(3, 0, 1);
914 NFT_PIPAPO_AVX2_AND(4, 2, 3);
917 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch
);
918 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 4);
920 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
924 if (unlikely(ret
== -1))
925 ret
= b
/ XSAVE_YMM_SIZE
;
930 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
939 * nft_pipapo_avx2_lookup_8b_16() - AVX2-based lookup for 16 eight-bit groups
940 * @map: Previous match result, used as initial bitmap
941 * @fill: Destination bitmap to be filled with current match result
942 * @f: Field, containing lookup and mapping tables
943 * @offset: Ignore buckets before the given index, no bits are filled there
944 * @pkt: Packet data, pointer to input nftables register
945 * @first: If this is the first field, don't source previous result
946 * @last: Last field: stop at the first match and return bit index
948 * See nft_pipapo_avx2_lookup_4b_2().
950 * This is used for 128-bit fields (i.e. IPv6 addresses).
952 * Return: -1 on no match, rule index of match if @last, otherwise first long
953 * word index to be checked next (i.e. first filled word).
955 static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map
, unsigned long *fill
,
956 struct nft_pipapo_field
*f
, int offset
,
957 const u8
*pkt
, bool first
, bool last
)
959 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
960 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
962 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
963 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
964 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
967 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
969 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 0, pkt
[0], bsize
);
970 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
971 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 2, pkt
[2], bsize
);
973 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
974 NFT_PIPAPO_AVX2_AND(1, 1, 0);
976 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 3, pkt
[3], bsize
);
978 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt
, 4, pkt
[4], bsize
);
979 NFT_PIPAPO_AVX2_AND(6, 1, 2);
980 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 5, pkt
[5], bsize
);
981 NFT_PIPAPO_AVX2_AND(0, 3, 4);
982 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 6, pkt
[6], bsize
);
984 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 7, pkt
[7], bsize
);
985 NFT_PIPAPO_AVX2_AND(3, 5, 6);
986 NFT_PIPAPO_AVX2_AND(4, 0, 1);
987 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt
, 8, pkt
[8], bsize
);
989 NFT_PIPAPO_AVX2_AND(6, 2, 3);
990 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 9, pkt
[9], bsize
);
991 NFT_PIPAPO_AVX2_AND(0, 4, 5);
992 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 10, pkt
[10], bsize
);
993 NFT_PIPAPO_AVX2_AND(2, 6, 7);
994 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 11, pkt
[11], bsize
);
995 NFT_PIPAPO_AVX2_AND(4, 0, 1);
996 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt
, 12, pkt
[12], bsize
);
997 NFT_PIPAPO_AVX2_AND(6, 2, 3);
998 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 13, pkt
[13], bsize
);
999 NFT_PIPAPO_AVX2_AND(0, 4, 5);
1000 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 14, pkt
[14], bsize
);
1001 NFT_PIPAPO_AVX2_AND(2, 6, 7);
1002 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 15, pkt
[15], bsize
);
1003 NFT_PIPAPO_AVX2_AND(4, 0, 1);
1006 NFT_PIPAPO_AVX2_AND(5, 2, 3);
1007 NFT_PIPAPO_AVX2_AND(6, 4, 5);
1009 NFT_PIPAPO_AVX2_NOMATCH_GOTO(6, nomatch
);
1010 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 6);
1012 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
1016 if (unlikely(ret
== -1))
1017 ret
= b
/ XSAVE_YMM_SIZE
;
1022 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
1031 * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes
1032 * @map: Previous match result, used as initial bitmap
1033 * @fill: Destination bitmap to be filled with current match result
1034 * @f: Field, containing lookup and mapping tables
1035 * @offset: Ignore buckets before the given index, no bits are filled there
1036 * @pkt: Packet data, pointer to input nftables register
1037 * @first: If this is the first field, don't source previous result
1038 * @last: Last field: stop at the first match and return bit index
1040 * This function should never be called, but is provided for the case the field
1041 * size doesn't match any of the known data types. Matching rate is
1042 * substantially lower than AVX2 routines.
1044 * Return: -1 on no match, rule index of match if @last, otherwise first long
1045 * word index to be checked next (i.e. first filled word).
1047 static int nft_pipapo_avx2_lookup_slow(unsigned long *map
, unsigned long *fill
,
1048 struct nft_pipapo_field
*f
, int offset
,
1049 const u8
*pkt
, bool first
, bool last
)
1051 unsigned long bsize
= f
->bsize
;
1055 memset(map
, 0xff, bsize
* sizeof(*map
));
1057 for (i
= offset
; i
< bsize
; i
++) {
1059 pipapo_and_field_buckets_8bit(f
, map
, pkt
);
1061 pipapo_and_field_buckets_4bit(f
, map
, pkt
);
1062 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4
;
1064 b
= pipapo_refill(map
, bsize
, f
->rules
, fill
, f
->mt
, last
);
1070 ret
= b
/ XSAVE_YMM_SIZE
;
1077 * nft_pipapo_avx2_estimate() - Set size, space and lookup complexity
1078 * @desc: Set description, element count and field description used
1079 * @features: Flags: NFT_SET_INTERVAL needs to be there
1080 * @est: Storage for estimation data
1082 * Return: true if set is compatible and AVX2 available, false otherwise.
1084 bool nft_pipapo_avx2_estimate(const struct nft_set_desc
*desc
, u32 features
,
1085 struct nft_set_estimate
*est
)
1087 if (!(features
& NFT_SET_INTERVAL
) ||
1088 desc
->field_count
< NFT_PIPAPO_MIN_FIELDS
)
1091 if (!boot_cpu_has(X86_FEATURE_AVX2
) || !boot_cpu_has(X86_FEATURE_AVX
))
1094 est
->size
= pipapo_estimate_size(desc
);
1098 est
->lookup
= NFT_SET_CLASS_O_LOG_N
;
1100 est
->space
= NFT_SET_CLASS_O_N
;
1106 * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
1107 * @net: Network namespace
1108 * @set: nftables API set representation
1109 * @key: nftables API element representation containing key data
1110 * @ext: nftables API extension pointer, filled with matching reference
1112 * For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
1114 * This implementation exploits the repetitive characteristic of the algorithm
1115 * to provide a fast, vectorised version using the AVX2 SIMD instruction set.
1117 * Return: true on match, false otherwise.
1119 bool nft_pipapo_avx2_lookup(const struct net
*net
, const struct nft_set
*set
,
1120 const u32
*key
, const struct nft_set_ext
**ext
)
1122 struct nft_pipapo
*priv
= nft_set_priv(set
);
1123 unsigned long *res
, *fill
, *scratch
;
1124 u8 genmask
= nft_genmask_cur(net
);
1125 const u8
*rp
= (const u8
*)key
;
1126 struct nft_pipapo_match
*m
;
1127 struct nft_pipapo_field
*f
;
1131 if (unlikely(!irq_fpu_usable()))
1132 return nft_pipapo_lookup(net
, set
, key
, ext
);
1134 m
= rcu_dereference(priv
->match
);
1136 /* This also protects access to all data related to scratch maps.
1138 * Note that we don't need a valid MXCSR state for any of the
1139 * operations we use here, so pass 0 as mask and spare a LDMXCSR
1142 kernel_fpu_begin_mask(0);
1144 scratch
= *raw_cpu_ptr(m
->scratch_aligned
);
1145 if (unlikely(!scratch
)) {
1149 map_index
= raw_cpu_read(nft_pipapo_avx2_scratch_index
);
1151 res
= scratch
+ (map_index
? m
->bsize_max
: 0);
1152 fill
= scratch
+ (map_index
? 0 : m
->bsize_max
);
1154 /* Starting map doesn't need to be set for this implementation */
1156 nft_pipapo_avx2_prepare();
1159 nft_pipapo_for_each_field(f
, i
, m
) {
1160 bool last
= i
== m
->field_count
- 1, first
= !i
;
1162 #define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
1163 (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
1167 if (likely(f
->bb
== 8)) {
1168 if (f
->groups
== 1) {
1169 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 1);
1170 } else if (f
->groups
== 2) {
1171 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 2);
1172 } else if (f
->groups
== 4) {
1173 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 4);
1174 } else if (f
->groups
== 6) {
1175 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 6);
1176 } else if (f
->groups
== 16) {
1177 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
1179 ret
= nft_pipapo_avx2_lookup_slow(res
, fill
, f
,
1184 if (f
->groups
== 2) {
1185 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 2);
1186 } else if (f
->groups
== 4) {
1187 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 4);
1188 } else if (f
->groups
== 8) {
1189 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 8);
1190 } else if (f
->groups
== 12) {
1191 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 12);
1192 } else if (f
->groups
== 32) {
1193 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
1195 ret
= nft_pipapo_avx2_lookup_slow(res
, fill
, f
,
1200 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4
;
1202 #undef NFT_SET_PIPAPO_AVX2_LOOKUP
1208 *ext
= &f
->mt
[ret
].e
->ext
;
1209 if (unlikely(nft_set_elem_expired(*ext
) ||
1210 !nft_set_elem_active(*ext
, genmask
))) {
1219 rp
+= NFT_PIPAPO_GROUPS_PADDED_SIZE(f
);
1224 raw_cpu_write(nft_pipapo_avx2_scratch_index
, !map_index
);