1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org>
5 #ifndef _IP_SET_HASH_GEN_H
6 #define _IP_SET_HASH_GEN_H
8 #include <linux/rcupdate.h>
9 #include <linux/jhash.h>
10 #include <linux/types.h>
11 #include <linux/netfilter/nfnetlink.h>
12 #include <linux/netfilter/ipset/ip_set.h>
14 #define __ipset_dereference(p) \
15 rcu_dereference_protected(p, 1)
16 #define ipset_dereference_nfnl(p) \
17 rcu_dereference_protected(p, \
18 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
19 #define ipset_dereference_set(p, set) \
20 rcu_dereference_protected(p, \
21 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
22 lockdep_is_held(&(set)->lock))
23 #define ipset_dereference_bh_nfnl(p) \
24 rcu_dereference_bh_check(p, \
25 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
27 /* Hashing which uses arrays to resolve clashing. The hash table is resized
28 * (doubled) when searching becomes too long.
29 * Internally jhash is used with the assumption that the size of the
30 * stored data is a multiple of sizeof(u32).
32 * Readers and resizing
34 * Resizing can be triggered by userspace command only, and those
35 * are serialized by the nfnl mutex. During resizing the set is
36 * read-locked, so the only possible concurrent operations are
37 * the kernel side readers. Those must be protected by proper RCU locking.
40 /* Number of elements to store in an initial array block */
41 #define AHASH_INIT_SIZE 2
42 /* Max number of elements to store in an array block */
43 #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE)
44 /* Max muber of elements in the array block when tuned */
45 #define AHASH_MAX_TUNED 64
46 #define AHASH_MAX(h) ((h)->bucketsize)
50 struct rcu_head rcu
; /* for call_rcu_bh */
51 /* Which positions are used in the array */
52 DECLARE_BITMAP(used
, AHASH_MAX_TUNED
);
53 u8 size
; /* size of the array */
54 u8 pos
; /* position of the first free entry */
55 unsigned char value
[] /* the array of the values */
56 __aligned(__alignof__(u64
));
59 /* Region size for locking == 2^HTABLE_REGION_BITS */
60 #define HTABLE_REGION_BITS 10
61 #define ahash_numof_locks(htable_bits) \
62 ((htable_bits) < HTABLE_REGION_BITS ? 1 \
63 : jhash_size((htable_bits) - HTABLE_REGION_BITS))
64 #define ahash_sizeof_regions(htable_bits) \
65 (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
66 #define ahash_region(n, htable_bits) \
67 ((n) % ahash_numof_locks(htable_bits))
68 #define ahash_bucket_start(h, htable_bits) \
69 ((htable_bits) < HTABLE_REGION_BITS ? 0 \
70 : (h) * jhash_size(HTABLE_REGION_BITS))
71 #define ahash_bucket_end(h, htable_bits) \
72 ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \
73 : ((h) + 1) * jhash_size(HTABLE_REGION_BITS))
76 struct delayed_work dwork
;
77 struct ip_set
*set
; /* Set the gc belongs to */
78 u32 region
; /* Last gc run position */
81 /* The hash table: the table size stored here in order to make resizing easy */
83 atomic_t ref
; /* References for resizing */
84 atomic_t uref
; /* References for dumping and gc */
85 u8 htable_bits
; /* size of hash table == 2^htable_bits */
86 u32 maxelem
; /* Maxelem per region */
87 struct ip_set_region
*hregion
; /* Region locks and ext sizes */
88 struct hbucket __rcu
*bucket
[]; /* hashtable buckets */
91 #define hbucket(h, i) ((h)->bucket[i])
92 #define ext_size(n, dsize) \
93 (sizeof(struct hbucket) + (n) * (dsize))
95 #ifndef IPSET_NET_COUNT
96 #define IPSET_NET_COUNT 1
99 /* Book-keeping of the prefixes added to the set */
100 struct net_prefixes
{
101 u32 nets
[IPSET_NET_COUNT
]; /* number of elements for this cidr */
102 u8 cidr
[IPSET_NET_COUNT
]; /* the cidr value */
105 /* Compute the hash table size */
107 htable_size(u8 hbits
)
111 /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
114 hsize
= jhash_size(hbits
);
115 if ((INT_MAX
- sizeof(struct htable
)) / sizeof(struct hbucket
*)
119 return hsize
* sizeof(struct hbucket
*) + sizeof(struct htable
);
122 #ifdef IP_SET_HASH_WITH_NETS
123 #if IPSET_NET_COUNT > 1
124 #define __CIDR(cidr, i) (cidr[i])
126 #define __CIDR(cidr, i) (cidr)
129 /* cidr + 1 is stored in net_prefixes to support /0 */
130 #define NCIDR_PUT(cidr) ((cidr) + 1)
131 #define NCIDR_GET(cidr) ((cidr) - 1)
133 #ifdef IP_SET_HASH_WITH_NETS_PACKED
134 /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
135 #define DCIDR_PUT(cidr) ((cidr) - 1)
136 #define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1)
138 #define DCIDR_PUT(cidr) (cidr)
139 #define DCIDR_GET(cidr, i) __CIDR(cidr, i)
142 #define INIT_CIDR(cidr, host_mask) \
143 DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
145 #ifdef IP_SET_HASH_WITH_NET0
146 /* cidr from 0 to HOST_MASK value and c = cidr + 1 */
147 #define NLEN (HOST_MASK + 1)
148 #define CIDR_POS(c) ((c) - 1)
150 /* cidr from 1 to HOST_MASK value and c = cidr + 1 */
151 #define NLEN HOST_MASK
152 #define CIDR_POS(c) ((c) - 2)
157 #endif /* IP_SET_HASH_WITH_NETS */
159 #define SET_ELEM_EXPIRED(set, d) \
160 (SET_WITH_TIMEOUT(set) && \
161 ip_set_timeout_expired(ext_timeout(d, set)))
163 #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
164 static const union nf_inet_addr onesmask
= {
165 .all
[0] = 0xffffffff,
166 .all
[1] = 0xffffffff,
167 .all
[2] = 0xffffffff,
171 static const union nf_inet_addr zeromask
= {};
174 #endif /* _IP_SET_HASH_GEN_H */
177 #error "MTYPE is not defined!"
181 #error "HTYPE is not defined!"
185 #error "HOST_MASK is not defined!"
188 /* Family dependent templates */
191 #undef mtype_data_equal
192 #undef mtype_do_data_match
193 #undef mtype_data_set_flags
194 #undef mtype_data_reset_elem
195 #undef mtype_data_reset_flags
196 #undef mtype_data_netmask
197 #undef mtype_data_list
198 #undef mtype_data_next
201 #undef mtype_ahash_destroy
202 #undef mtype_ext_cleanup
203 #undef mtype_add_cidr
204 #undef mtype_del_cidr
205 #undef mtype_ahash_memsize
208 #undef mtype_same_set
214 #undef mtype_test_cidrs
218 #undef mtype_ext_size
219 #undef mtype_resize_ad
225 #undef mtype_cancel_gc
227 #undef mtype_data_match
232 #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal)
233 #ifdef IP_SET_HASH_WITH_NETS
234 #define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match)
236 #define mtype_do_data_match(d) 1
238 #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags)
239 #define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem)
240 #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags)
241 #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask)
242 #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list)
243 #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next)
244 #define mtype_elem IPSET_TOKEN(MTYPE, _elem)
246 #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy)
247 #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
248 #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr)
249 #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr)
250 #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize)
251 #define mtype_flush IPSET_TOKEN(MTYPE, _flush)
252 #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
253 #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
254 #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
255 #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
257 #define mtype_add IPSET_TOKEN(MTYPE, _add)
258 #define mtype_del IPSET_TOKEN(MTYPE, _del)
259 #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
260 #define mtype_test IPSET_TOKEN(MTYPE, _test)
261 #define mtype_uref IPSET_TOKEN(MTYPE, _uref)
262 #define mtype_resize IPSET_TOKEN(MTYPE, _resize)
263 #define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size)
264 #define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad)
265 #define mtype_head IPSET_TOKEN(MTYPE, _head)
266 #define mtype_list IPSET_TOKEN(MTYPE, _list)
267 #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
268 #define mtype_gc IPSET_TOKEN(MTYPE, _gc)
269 #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
270 #define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
271 #define mtype_variant IPSET_TOKEN(MTYPE, _variant)
272 #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
275 #define HKEY_DATALEN sizeof(struct mtype_elem)
280 #define HKEY(data, initval, htable_bits) \
282 const u32 *__k = (const u32 *)data; \
283 u32 __l = HKEY_DATALEN / sizeof(u32); \
285 BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \
287 jhash2(__k, __l, initval) & jhash_mask(htable_bits); \
290 /* The generic hash structure */
292 struct htable __rcu
*table
; /* the hash table */
293 struct htable_gc gc
; /* gc workqueue */
294 u32 maxelem
; /* max elements in the hash */
295 u32 initval
; /* random jhash init value */
296 #ifdef IP_SET_HASH_WITH_MARKMASK
297 u32 markmask
; /* markmask value for mark mask to store */
299 u8 bucketsize
; /* max elements in an array block */
300 #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
301 u8 netmask
; /* netmask value for subnets to store */
302 union nf_inet_addr bitmask
; /* stores bitmask */
304 struct list_head ad
; /* Resize add|del backlist */
305 struct mtype_elem next
; /* temporary storage for uadd */
306 #ifdef IP_SET_HASH_WITH_NETS
307 struct net_prefixes nets
[NLEN
]; /* book-keeping of prefixes */
311 /* ADD|DEL entries saved during resize */
312 struct mtype_resize_ad
{
313 struct list_head list
;
314 enum ipset_adt ad
; /* ADD|DEL element */
315 struct mtype_elem d
; /* Element value */
316 struct ip_set_ext ext
; /* Extensions for ADD */
317 struct ip_set_ext mext
; /* Target extensions for ADD */
318 u32 flags
; /* Flags for ADD */
321 #ifdef IP_SET_HASH_WITH_NETS
322 /* Network cidr size book keeping when the hash stores different
323 * sized networks. cidr == real cidr + 1 to support /0.
326 mtype_add_cidr(struct ip_set
*set
, struct htype
*h
, u8 cidr
, u8 n
)
330 spin_lock_bh(&set
->lock
);
331 /* Add in increasing prefix order, so larger cidr first */
332 for (i
= 0, j
= -1; i
< NLEN
&& h
->nets
[i
].cidr
[n
]; i
++) {
335 } else if (h
->nets
[i
].cidr
[n
] < cidr
) {
337 } else if (h
->nets
[i
].cidr
[n
] == cidr
) {
338 h
->nets
[CIDR_POS(cidr
)].nets
[n
]++;
344 h
->nets
[i
].cidr
[n
] = h
->nets
[i
- 1].cidr
[n
];
346 h
->nets
[i
].cidr
[n
] = cidr
;
347 h
->nets
[CIDR_POS(cidr
)].nets
[n
] = 1;
349 spin_unlock_bh(&set
->lock
);
353 mtype_del_cidr(struct ip_set
*set
, struct htype
*h
, u8 cidr
, u8 n
)
355 u8 i
, j
, net_end
= NLEN
- 1;
357 spin_lock_bh(&set
->lock
);
358 for (i
= 0; i
< NLEN
; i
++) {
359 if (h
->nets
[i
].cidr
[n
] != cidr
)
361 h
->nets
[CIDR_POS(cidr
)].nets
[n
]--;
362 if (h
->nets
[CIDR_POS(cidr
)].nets
[n
] > 0)
364 for (j
= i
; j
< net_end
&& h
->nets
[j
].cidr
[n
]; j
++)
365 h
->nets
[j
].cidr
[n
] = h
->nets
[j
+ 1].cidr
[n
];
366 h
->nets
[j
].cidr
[n
] = 0;
370 spin_unlock_bh(&set
->lock
);
374 /* Calculate the actual memory size of the set data */
376 mtype_ahash_memsize(const struct htype
*h
, const struct htable
*t
)
378 return sizeof(*h
) + sizeof(*t
) + ahash_sizeof_regions(t
->htable_bits
);
381 /* Get the ith element from the array block n */
382 #define ahash_data(n, i, dsize) \
383 ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
386 mtype_ext_cleanup(struct ip_set
*set
, struct hbucket
*n
)
390 for (i
= 0; i
< n
->pos
; i
++)
391 if (test_bit(i
, n
->used
))
392 ip_set_ext_destroy(set
, ahash_data(n
, i
, set
->dsize
));
395 /* Flush a hash type of set: destroy all elements */
397 mtype_flush(struct ip_set
*set
)
399 struct htype
*h
= set
->data
;
404 t
= ipset_dereference_nfnl(h
->table
);
405 for (r
= 0; r
< ahash_numof_locks(t
->htable_bits
); r
++) {
406 spin_lock_bh(&t
->hregion
[r
].lock
);
407 for (i
= ahash_bucket_start(r
, t
->htable_bits
);
408 i
< ahash_bucket_end(r
, t
->htable_bits
); i
++) {
409 n
= __ipset_dereference(hbucket(t
, i
));
412 if (set
->extensions
& IPSET_EXT_DESTROY
)
413 mtype_ext_cleanup(set
, n
);
414 /* FIXME: use slab cache */
415 rcu_assign_pointer(hbucket(t
, i
), NULL
);
418 t
->hregion
[r
].ext_size
= 0;
419 t
->hregion
[r
].elements
= 0;
420 spin_unlock_bh(&t
->hregion
[r
].lock
);
422 #ifdef IP_SET_HASH_WITH_NETS
423 memset(h
->nets
, 0, sizeof(h
->nets
));
427 /* Destroy the hashtable part of the set */
429 mtype_ahash_destroy(struct ip_set
*set
, struct htable
*t
, bool ext_destroy
)
434 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++) {
438 if (set
->extensions
& IPSET_EXT_DESTROY
&& ext_destroy
)
439 mtype_ext_cleanup(set
, n
);
440 /* FIXME: use slab cache */
444 ip_set_free(t
->hregion
);
448 /* Destroy a hash type of set */
450 mtype_destroy(struct ip_set
*set
)
452 struct htype
*h
= set
->data
;
453 struct list_head
*l
, *lt
;
455 mtype_ahash_destroy(set
, h
->table
, true);
456 list_for_each_safe(l
, lt
, &h
->ad
) {
466 mtype_same_set(const struct ip_set
*a
, const struct ip_set
*b
)
468 const struct htype
*x
= a
->data
;
469 const struct htype
*y
= b
->data
;
471 /* Resizing changes htable_bits, so we ignore it */
472 return x
->maxelem
== y
->maxelem
&&
473 a
->timeout
== b
->timeout
&&
474 #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
475 nf_inet_addr_cmp(&x
->bitmask
, &y
->bitmask
) &&
477 #ifdef IP_SET_HASH_WITH_MARKMASK
478 x
->markmask
== y
->markmask
&&
480 a
->extensions
== b
->extensions
;
484 mtype_gc_do(struct ip_set
*set
, struct htype
*h
, struct htable
*t
, u32 r
)
486 struct hbucket
*n
, *tmp
;
487 struct mtype_elem
*data
;
489 size_t dsize
= set
->dsize
;
490 #ifdef IP_SET_HASH_WITH_NETS
493 u8 htable_bits
= t
->htable_bits
;
495 spin_lock_bh(&t
->hregion
[r
].lock
);
496 for (i
= ahash_bucket_start(r
, htable_bits
);
497 i
< ahash_bucket_end(r
, htable_bits
); i
++) {
498 n
= __ipset_dereference(hbucket(t
, i
));
501 for (j
= 0, d
= 0; j
< n
->pos
; j
++) {
502 if (!test_bit(j
, n
->used
)) {
506 data
= ahash_data(n
, j
, dsize
);
507 if (!ip_set_timeout_expired(ext_timeout(data
, set
)))
509 pr_debug("expired %u/%u\n", i
, j
);
510 clear_bit(j
, n
->used
);
511 smp_mb__after_atomic();
512 #ifdef IP_SET_HASH_WITH_NETS
513 for (k
= 0; k
< IPSET_NET_COUNT
; k
++)
514 mtype_del_cidr(set
, h
,
515 NCIDR_PUT(DCIDR_GET(data
->cidr
, k
)),
518 t
->hregion
[r
].elements
--;
519 ip_set_ext_destroy(set
, data
);
522 if (d
>= AHASH_INIT_SIZE
) {
524 t
->hregion
[r
].ext_size
-=
525 ext_size(n
->size
, dsize
);
526 rcu_assign_pointer(hbucket(t
, i
), NULL
);
530 tmp
= kzalloc(sizeof(*tmp
) +
531 (n
->size
- AHASH_INIT_SIZE
) * dsize
,
534 /* Still try to delete expired elements. */
536 tmp
->size
= n
->size
- AHASH_INIT_SIZE
;
537 for (j
= 0, d
= 0; j
< n
->pos
; j
++) {
538 if (!test_bit(j
, n
->used
))
540 data
= ahash_data(n
, j
, dsize
);
541 memcpy(tmp
->value
+ d
* dsize
,
543 set_bit(d
, tmp
->used
);
547 t
->hregion
[r
].ext_size
-=
548 ext_size(AHASH_INIT_SIZE
, dsize
);
549 rcu_assign_pointer(hbucket(t
, i
), tmp
);
553 spin_unlock_bh(&t
->hregion
[r
].lock
);
557 mtype_gc(struct work_struct
*work
)
559 struct htable_gc
*gc
;
564 unsigned int next_run
;
566 gc
= container_of(work
, struct htable_gc
, dwork
.work
);
570 spin_lock_bh(&set
->lock
);
571 t
= ipset_dereference_set(h
->table
, set
);
572 atomic_inc(&t
->uref
);
573 numof_locks
= ahash_numof_locks(t
->htable_bits
);
575 if (r
>= numof_locks
) {
578 next_run
= (IPSET_GC_PERIOD(set
->timeout
) * HZ
) / numof_locks
;
579 if (next_run
< HZ
/10)
581 spin_unlock_bh(&set
->lock
);
583 mtype_gc_do(set
, h
, t
, r
);
585 if (atomic_dec_and_test(&t
->uref
) && atomic_read(&t
->ref
)) {
586 pr_debug("Table destroy after resize by expire: %p\n", t
);
587 mtype_ahash_destroy(set
, t
, false);
590 queue_delayed_work(system_power_efficient_wq
, &gc
->dwork
, next_run
);
594 mtype_gc_init(struct htable_gc
*gc
)
596 INIT_DEFERRABLE_WORK(&gc
->dwork
, mtype_gc
);
597 queue_delayed_work(system_power_efficient_wq
, &gc
->dwork
, HZ
);
601 mtype_cancel_gc(struct ip_set
*set
)
603 struct htype
*h
= set
->data
;
605 if (SET_WITH_TIMEOUT(set
))
606 cancel_delayed_work_sync(&h
->gc
.dwork
);
610 mtype_add(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
611 struct ip_set_ext
*mext
, u32 flags
);
613 mtype_del(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
614 struct ip_set_ext
*mext
, u32 flags
);
616 /* Resize a hash: create a new hash table with doubling the hashsize
617 * and inserting the elements to it. Repeat until we succeed or
618 * fail due to memory pressures.
621 mtype_resize(struct ip_set
*set
, bool retried
)
623 struct htype
*h
= set
->data
;
624 struct htable
*t
, *orig
;
626 size_t hsize
, dsize
= set
->dsize
;
627 #ifdef IP_SET_HASH_WITH_NETS
629 struct mtype_elem
*tmp
;
631 struct mtype_elem
*data
;
632 struct mtype_elem
*d
;
633 struct hbucket
*n
, *m
;
634 struct list_head
*l
, *lt
;
635 struct mtype_resize_ad
*x
;
636 u32 i
, j
, r
, nr
, key
;
639 #ifdef IP_SET_HASH_WITH_NETS
640 tmp
= kmalloc(dsize
, GFP_KERNEL
);
644 orig
= ipset_dereference_bh_nfnl(h
->table
);
645 htable_bits
= orig
->htable_bits
;
652 hsize
= htable_size(htable_bits
);
655 t
= ip_set_alloc(hsize
);
660 t
->hregion
= ip_set_alloc(ahash_sizeof_regions(htable_bits
));
666 t
->htable_bits
= htable_bits
;
667 t
->maxelem
= h
->maxelem
/ ahash_numof_locks(htable_bits
);
668 for (i
= 0; i
< ahash_numof_locks(htable_bits
); i
++)
669 spin_lock_init(&t
->hregion
[i
].lock
);
671 /* There can't be another parallel resizing,
672 * but dumping, gc, kernel side add/del are possible
674 orig
= ipset_dereference_bh_nfnl(h
->table
);
675 atomic_set(&orig
->ref
, 1);
676 atomic_inc(&orig
->uref
);
677 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
678 set
->name
, orig
->htable_bits
, htable_bits
, orig
);
679 for (r
= 0; r
< ahash_numof_locks(orig
->htable_bits
); r
++) {
680 /* Expire may replace a hbucket with another one */
682 for (i
= ahash_bucket_start(r
, orig
->htable_bits
);
683 i
< ahash_bucket_end(r
, orig
->htable_bits
); i
++) {
684 n
= __ipset_dereference(hbucket(orig
, i
));
687 for (j
= 0; j
< n
->pos
; j
++) {
688 if (!test_bit(j
, n
->used
))
690 data
= ahash_data(n
, j
, dsize
);
691 if (SET_ELEM_EXPIRED(set
, data
))
693 #ifdef IP_SET_HASH_WITH_NETS
694 /* We have readers running parallel with us,
695 * so the live data cannot be modified.
698 memcpy(tmp
, data
, dsize
);
700 mtype_data_reset_flags(data
, &flags
);
702 key
= HKEY(data
, h
->initval
, htable_bits
);
703 m
= __ipset_dereference(hbucket(t
, key
));
704 nr
= ahash_region(key
, htable_bits
);
706 m
= kzalloc(sizeof(*m
) +
707 AHASH_INIT_SIZE
* dsize
,
713 m
->size
= AHASH_INIT_SIZE
;
714 t
->hregion
[nr
].ext_size
+=
715 ext_size(AHASH_INIT_SIZE
,
717 RCU_INIT_POINTER(hbucket(t
, key
), m
);
718 } else if (m
->pos
>= m
->size
) {
721 if (m
->size
>= AHASH_MAX(h
)) {
724 ht
= kzalloc(sizeof(*ht
) +
725 (m
->size
+ AHASH_INIT_SIZE
)
733 memcpy(ht
, m
, sizeof(struct hbucket
) +
735 ht
->size
= m
->size
+ AHASH_INIT_SIZE
;
736 t
->hregion
[nr
].ext_size
+=
737 ext_size(AHASH_INIT_SIZE
,
741 RCU_INIT_POINTER(hbucket(t
, key
), ht
);
743 d
= ahash_data(m
, m
->pos
, dsize
);
744 memcpy(d
, data
, dsize
);
745 set_bit(m
->pos
++, m
->used
);
746 t
->hregion
[nr
].elements
++;
747 #ifdef IP_SET_HASH_WITH_NETS
748 mtype_data_reset_flags(d
, &flags
);
752 rcu_read_unlock_bh();
755 /* There can't be any other writer. */
756 rcu_assign_pointer(h
->table
, t
);
758 /* Give time to other readers of the set */
761 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set
->name
,
762 orig
->htable_bits
, orig
, t
->htable_bits
, t
);
763 /* Add/delete elements processed by the SET target during resize.
764 * Kernel-side add cannot trigger a resize and userspace actions
765 * are serialized by the mutex.
767 list_for_each_safe(l
, lt
, &h
->ad
) {
768 x
= list_entry(l
, struct mtype_resize_ad
, list
);
769 if (x
->ad
== IPSET_ADD
) {
770 mtype_add(set
, &x
->d
, &x
->ext
, &x
->mext
, x
->flags
);
772 mtype_del(set
, &x
->d
, NULL
, NULL
, 0);
777 /* If there's nobody else using the table, destroy it */
778 if (atomic_dec_and_test(&orig
->uref
)) {
779 pr_debug("Table destroy by resize %p\n", orig
);
780 mtype_ahash_destroy(set
, orig
, false);
784 #ifdef IP_SET_HASH_WITH_NETS
790 rcu_read_unlock_bh();
791 atomic_set(&orig
->ref
, 0);
792 atomic_dec(&orig
->uref
);
793 mtype_ahash_destroy(set
, t
, false);
799 /* In case we have plenty of memory :-) */
800 pr_warn("Cannot increase the hashsize of set %s further\n", set
->name
);
801 ret
= -IPSET_ERR_HASH_FULL
;
805 /* Get the current number of elements and ext_size in the set */
807 mtype_ext_size(struct ip_set
*set
, u32
*elements
, size_t *ext_size
)
809 struct htype
*h
= set
->data
;
810 const struct htable
*t
;
813 struct mtype_elem
*data
;
816 t
= rcu_dereference_bh(h
->table
);
817 for (r
= 0; r
< ahash_numof_locks(t
->htable_bits
); r
++) {
818 for (i
= ahash_bucket_start(r
, t
->htable_bits
);
819 i
< ahash_bucket_end(r
, t
->htable_bits
); i
++) {
820 n
= rcu_dereference_bh(hbucket(t
, i
));
823 for (j
= 0; j
< n
->pos
; j
++) {
824 if (!test_bit(j
, n
->used
))
826 data
= ahash_data(n
, j
, set
->dsize
);
827 if (!SET_ELEM_EXPIRED(set
, data
))
831 *ext_size
+= t
->hregion
[r
].ext_size
;
835 /* Add an element to a hash and update the internal counters when succeeded,
836 * otherwise report the proper error code.
839 mtype_add(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
840 struct ip_set_ext
*mext
, u32 flags
)
842 struct htype
*h
= set
->data
;
844 const struct mtype_elem
*d
= value
;
845 struct mtype_elem
*data
;
846 struct hbucket
*n
, *old
= ERR_PTR(-ENOENT
);
848 bool flag_exist
= flags
& IPSET_FLAG_EXIST
;
849 bool deleted
= false, forceadd
= false, reuse
= false;
850 u32 r
, key
, multi
= 0, elements
, maxelem
;
853 t
= rcu_dereference_bh(h
->table
);
854 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
855 r
= ahash_region(key
, t
->htable_bits
);
856 atomic_inc(&t
->uref
);
857 elements
= t
->hregion
[r
].elements
;
858 maxelem
= t
->maxelem
;
859 if (elements
>= maxelem
) {
861 if (SET_WITH_TIMEOUT(set
)) {
862 rcu_read_unlock_bh();
863 mtype_gc_do(set
, h
, t
, r
);
866 maxelem
= h
->maxelem
;
868 for (e
= 0; e
< ahash_numof_locks(t
->htable_bits
); e
++)
869 elements
+= t
->hregion
[e
].elements
;
870 if (elements
>= maxelem
&& SET_WITH_FORCEADD(set
))
873 rcu_read_unlock_bh();
875 spin_lock_bh(&t
->hregion
[r
].lock
);
876 n
= rcu_dereference_bh(hbucket(t
, key
));
878 if (forceadd
|| elements
>= maxelem
)
881 n
= kzalloc(sizeof(*n
) + AHASH_INIT_SIZE
* set
->dsize
,
887 n
->size
= AHASH_INIT_SIZE
;
888 t
->hregion
[r
].ext_size
+=
889 ext_size(AHASH_INIT_SIZE
, set
->dsize
);
892 for (i
= 0; i
< n
->pos
; i
++) {
893 if (!test_bit(i
, n
->used
)) {
894 /* Reuse first deleted entry */
896 deleted
= reuse
= true;
901 data
= ahash_data(n
, i
, set
->dsize
);
902 if (mtype_data_equal(data
, d
, &multi
)) {
903 if (flag_exist
|| SET_ELEM_EXPIRED(set
, data
)) {
904 /* Just the extensions could be overwritten */
906 goto overwrite_extensions
;
908 ret
= -IPSET_ERR_EXIST
;
911 /* Reuse first timed out entry */
912 if (SET_ELEM_EXPIRED(set
, data
) && j
== -1) {
917 if (reuse
|| forceadd
) {
920 data
= ahash_data(n
, j
, set
->dsize
);
922 #ifdef IP_SET_HASH_WITH_NETS
923 for (i
= 0; i
< IPSET_NET_COUNT
; i
++)
924 mtype_del_cidr(set
, h
,
925 NCIDR_PUT(DCIDR_GET(data
->cidr
, i
)),
928 ip_set_ext_destroy(set
, data
);
929 t
->hregion
[r
].elements
--;
933 if (elements
>= maxelem
)
935 /* Create a new slot */
936 if (n
->pos
>= n
->size
) {
937 #ifdef IP_SET_HASH_WITH_MULTI
938 if (h
->bucketsize
>= AHASH_MAX_TUNED
)
940 else if (h
->bucketsize
<= multi
)
941 h
->bucketsize
+= AHASH_INIT_SIZE
;
943 if (n
->size
>= AHASH_MAX(h
)) {
944 /* Trigger rehashing */
945 mtype_data_next(&h
->next
, d
);
950 n
= kzalloc(sizeof(*n
) +
951 (old
->size
+ AHASH_INIT_SIZE
) * set
->dsize
,
957 memcpy(n
, old
, sizeof(struct hbucket
) +
958 old
->size
* set
->dsize
);
959 n
->size
= old
->size
+ AHASH_INIT_SIZE
;
960 t
->hregion
[r
].ext_size
+=
961 ext_size(AHASH_INIT_SIZE
, set
->dsize
);
966 data
= ahash_data(n
, j
, set
->dsize
);
968 t
->hregion
[r
].elements
++;
969 #ifdef IP_SET_HASH_WITH_NETS
970 for (i
= 0; i
< IPSET_NET_COUNT
; i
++)
971 mtype_add_cidr(set
, h
, NCIDR_PUT(DCIDR_GET(d
->cidr
, i
)), i
);
973 memcpy(data
, d
, sizeof(struct mtype_elem
));
974 overwrite_extensions
:
975 #ifdef IP_SET_HASH_WITH_NETS
976 mtype_data_set_flags(data
, flags
);
978 if (SET_WITH_COUNTER(set
))
979 ip_set_init_counter(ext_counter(data
, set
), ext
);
980 if (SET_WITH_COMMENT(set
))
981 ip_set_init_comment(set
, ext_comment(data
, set
), ext
);
982 if (SET_WITH_SKBINFO(set
))
983 ip_set_init_skbinfo(ext_skbinfo(data
, set
), ext
);
984 /* Must come last for the case when timed out entry is reused */
985 if (SET_WITH_TIMEOUT(set
))
986 ip_set_timeout_set(ext_timeout(data
, set
), ext
->timeout
);
987 smp_mb__before_atomic();
989 if (old
!= ERR_PTR(-ENOENT
)) {
990 rcu_assign_pointer(hbucket(t
, key
), n
);
996 spin_unlock_bh(&t
->hregion
[r
].lock
);
997 if (atomic_read(&t
->ref
) && ext
->target
) {
998 /* Resize is in process and kernel side add, save values */
999 struct mtype_resize_ad
*x
;
1001 x
= kzalloc(sizeof(struct mtype_resize_ad
), GFP_ATOMIC
);
1006 memcpy(&x
->d
, value
, sizeof(struct mtype_elem
));
1007 memcpy(&x
->ext
, ext
, sizeof(struct ip_set_ext
));
1008 memcpy(&x
->mext
, mext
, sizeof(struct ip_set_ext
));
1010 spin_lock_bh(&set
->lock
);
1011 list_add_tail(&x
->list
, &h
->ad
);
1012 spin_unlock_bh(&set
->lock
);
1017 if (net_ratelimit())
1018 pr_warn("Set %s is full, maxelem %u reached\n",
1019 set
->name
, maxelem
);
1020 ret
= -IPSET_ERR_HASH_FULL
;
1022 spin_unlock_bh(&t
->hregion
[r
].lock
);
1024 if (atomic_dec_and_test(&t
->uref
) && atomic_read(&t
->ref
)) {
1025 pr_debug("Table destroy after resize by add: %p\n", t
);
1026 mtype_ahash_destroy(set
, t
, false);
1031 /* Delete an element from the hash and free up space if possible.
1034 mtype_del(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
1035 struct ip_set_ext
*mext
, u32 flags
)
1037 struct htype
*h
= set
->data
;
1039 const struct mtype_elem
*d
= value
;
1040 struct mtype_elem
*data
;
1042 struct mtype_resize_ad
*x
= NULL
;
1043 int i
, j
, k
, r
, ret
= -IPSET_ERR_EXIST
;
1045 size_t dsize
= set
->dsize
;
1047 /* Userspace add and resize is excluded by the mutex.
1048 * Kernespace add does not trigger resize.
1051 t
= rcu_dereference_bh(h
->table
);
1052 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
1053 r
= ahash_region(key
, t
->htable_bits
);
1054 atomic_inc(&t
->uref
);
1055 rcu_read_unlock_bh();
1057 spin_lock_bh(&t
->hregion
[r
].lock
);
1058 n
= rcu_dereference_bh(hbucket(t
, key
));
1061 for (i
= 0, k
= 0; i
< n
->pos
; i
++) {
1062 if (!test_bit(i
, n
->used
)) {
1066 data
= ahash_data(n
, i
, dsize
);
1067 if (!mtype_data_equal(data
, d
, &multi
))
1069 if (SET_ELEM_EXPIRED(set
, data
))
1073 clear_bit(i
, n
->used
);
1074 smp_mb__after_atomic();
1075 if (i
+ 1 == n
->pos
)
1077 t
->hregion
[r
].elements
--;
1078 #ifdef IP_SET_HASH_WITH_NETS
1079 for (j
= 0; j
< IPSET_NET_COUNT
; j
++)
1080 mtype_del_cidr(set
, h
,
1081 NCIDR_PUT(DCIDR_GET(d
->cidr
, j
)), j
);
1083 ip_set_ext_destroy(set
, data
);
1085 if (atomic_read(&t
->ref
) && ext
->target
) {
1086 /* Resize is in process and kernel side del,
1089 x
= kzalloc(sizeof(struct mtype_resize_ad
),
1093 memcpy(&x
->d
, value
,
1094 sizeof(struct mtype_elem
));
1098 for (; i
< n
->pos
; i
++) {
1099 if (!test_bit(i
, n
->used
))
1102 if (n
->pos
== 0 && k
== 0) {
1103 t
->hregion
[r
].ext_size
-= ext_size(n
->size
, dsize
);
1104 rcu_assign_pointer(hbucket(t
, key
), NULL
);
1106 } else if (k
>= AHASH_INIT_SIZE
) {
1107 struct hbucket
*tmp
= kzalloc(sizeof(*tmp
) +
1108 (n
->size
- AHASH_INIT_SIZE
) * dsize
,
1112 tmp
->size
= n
->size
- AHASH_INIT_SIZE
;
1113 for (j
= 0, k
= 0; j
< n
->pos
; j
++) {
1114 if (!test_bit(j
, n
->used
))
1116 data
= ahash_data(n
, j
, dsize
);
1117 memcpy(tmp
->value
+ k
* dsize
, data
, dsize
);
1118 set_bit(k
, tmp
->used
);
1122 t
->hregion
[r
].ext_size
-=
1123 ext_size(AHASH_INIT_SIZE
, dsize
);
1124 rcu_assign_pointer(hbucket(t
, key
), tmp
);
1131 spin_unlock_bh(&t
->hregion
[r
].lock
);
1133 spin_lock_bh(&set
->lock
);
1134 list_add(&x
->list
, &h
->ad
);
1135 spin_unlock_bh(&set
->lock
);
1137 if (atomic_dec_and_test(&t
->uref
) && atomic_read(&t
->ref
)) {
1138 pr_debug("Table destroy after resize by del: %p\n", t
);
1139 mtype_ahash_destroy(set
, t
, false);
1145 mtype_data_match(struct mtype_elem
*data
, const struct ip_set_ext
*ext
,
1146 struct ip_set_ext
*mext
, struct ip_set
*set
, u32 flags
)
1148 if (!ip_set_match_extensions(set
, ext
, mext
, flags
, data
))
1150 /* nomatch entries return -ENOTEMPTY */
1151 return mtype_do_data_match(data
);
1154 #ifdef IP_SET_HASH_WITH_NETS
1155 /* Special test function which takes into account the different network
1156 * sizes added to the set
1159 mtype_test_cidrs(struct ip_set
*set
, struct mtype_elem
*d
,
1160 const struct ip_set_ext
*ext
,
1161 struct ip_set_ext
*mext
, u32 flags
)
1163 struct htype
*h
= set
->data
;
1164 struct htable
*t
= rcu_dereference_bh(h
->table
);
1166 struct mtype_elem
*data
;
1167 #if IPSET_NET_COUNT == 2
1168 struct mtype_elem orig
= *d
;
1169 int ret
, i
, j
= 0, k
;
1175 pr_debug("test by nets\n");
1176 for (; j
< NLEN
&& h
->nets
[j
].cidr
[0] && !multi
; j
++) {
1177 #if IPSET_NET_COUNT == 2
1178 mtype_data_reset_elem(d
, &orig
);
1179 mtype_data_netmask(d
, NCIDR_GET(h
->nets
[j
].cidr
[0]), false);
1180 for (k
= 0; k
< NLEN
&& h
->nets
[k
].cidr
[1] && !multi
;
1182 mtype_data_netmask(d
, NCIDR_GET(h
->nets
[k
].cidr
[1]),
1185 mtype_data_netmask(d
, NCIDR_GET(h
->nets
[j
].cidr
[0]));
1187 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
1188 n
= rcu_dereference_bh(hbucket(t
, key
));
1191 for (i
= 0; i
< n
->pos
; i
++) {
1192 if (!test_bit(i
, n
->used
))
1194 data
= ahash_data(n
, i
, set
->dsize
);
1195 if (!mtype_data_equal(data
, d
, &multi
))
1197 ret
= mtype_data_match(data
, ext
, mext
, set
, flags
);
1200 #ifdef IP_SET_HASH_WITH_MULTI
1201 /* No match, reset multiple match flag */
1205 #if IPSET_NET_COUNT == 2
1213 /* Test whether the element is added to the set */
1215 mtype_test(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
1216 struct ip_set_ext
*mext
, u32 flags
)
1218 struct htype
*h
= set
->data
;
1220 struct mtype_elem
*d
= value
;
1222 struct mtype_elem
*data
;
1227 t
= rcu_dereference_bh(h
->table
);
1228 #ifdef IP_SET_HASH_WITH_NETS
1229 /* If we test an IP address and not a network address,
1230 * try all possible network sizes
1232 for (i
= 0; i
< IPSET_NET_COUNT
; i
++)
1233 if (DCIDR_GET(d
->cidr
, i
) != HOST_MASK
)
1235 if (i
== IPSET_NET_COUNT
) {
1236 ret
= mtype_test_cidrs(set
, d
, ext
, mext
, flags
);
1241 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
1242 n
= rcu_dereference_bh(hbucket(t
, key
));
1247 for (i
= 0; i
< n
->pos
; i
++) {
1248 if (!test_bit(i
, n
->used
))
1250 data
= ahash_data(n
, i
, set
->dsize
);
1251 if (!mtype_data_equal(data
, d
, &multi
))
1253 ret
= mtype_data_match(data
, ext
, mext
, set
, flags
);
1258 rcu_read_unlock_bh();
1262 /* Reply a HEADER request: fill out the header part of the set */
1264 mtype_head(struct ip_set
*set
, struct sk_buff
*skb
)
1266 struct htype
*h
= set
->data
;
1267 const struct htable
*t
;
1268 struct nlattr
*nested
;
1271 size_t ext_size
= 0;
1275 t
= rcu_dereference_bh(h
->table
);
1276 mtype_ext_size(set
, &elements
, &ext_size
);
1277 memsize
= mtype_ahash_memsize(h
, t
) + ext_size
+ set
->ext_size
;
1278 htable_bits
= t
->htable_bits
;
1279 rcu_read_unlock_bh();
1281 nested
= ipset_nest_start(skb
, IPSET_ATTR_DATA
);
1283 goto nla_put_failure
;
1284 if (nla_put_net32(skb
, IPSET_ATTR_HASHSIZE
,
1285 htonl(jhash_size(htable_bits
))) ||
1286 nla_put_net32(skb
, IPSET_ATTR_MAXELEM
, htonl(h
->maxelem
)))
1287 goto nla_put_failure
;
1288 #ifdef IP_SET_HASH_WITH_BITMASK
1289 /* if netmask is set to anything other than HOST_MASK we know that the user supplied netmask
1290 * and not bitmask. These two are mutually exclusive. */
1291 if (h
->netmask
== HOST_MASK
&& !nf_inet_addr_cmp(&onesmask
, &h
->bitmask
)) {
1292 if (set
->family
== NFPROTO_IPV4
) {
1293 if (nla_put_ipaddr4(skb
, IPSET_ATTR_BITMASK
, h
->bitmask
.ip
))
1294 goto nla_put_failure
;
1295 } else if (set
->family
== NFPROTO_IPV6
) {
1296 if (nla_put_ipaddr6(skb
, IPSET_ATTR_BITMASK
, &h
->bitmask
.in6
))
1297 goto nla_put_failure
;
1301 #ifdef IP_SET_HASH_WITH_NETMASK
1302 if (h
->netmask
!= HOST_MASK
&& nla_put_u8(skb
, IPSET_ATTR_NETMASK
, h
->netmask
))
1303 goto nla_put_failure
;
1305 #ifdef IP_SET_HASH_WITH_MARKMASK
1306 if (nla_put_u32(skb
, IPSET_ATTR_MARKMASK
, h
->markmask
))
1307 goto nla_put_failure
;
1309 if (set
->flags
& IPSET_CREATE_FLAG_BUCKETSIZE
) {
1310 if (nla_put_u8(skb
, IPSET_ATTR_BUCKETSIZE
, h
->bucketsize
) ||
1311 nla_put_net32(skb
, IPSET_ATTR_INITVAL
, htonl(h
->initval
)))
1312 goto nla_put_failure
;
1314 if (nla_put_net32(skb
, IPSET_ATTR_REFERENCES
, htonl(set
->ref
)) ||
1315 nla_put_net32(skb
, IPSET_ATTR_MEMSIZE
, htonl(memsize
)) ||
1316 nla_put_net32(skb
, IPSET_ATTR_ELEMENTS
, htonl(elements
)))
1317 goto nla_put_failure
;
1318 if (unlikely(ip_set_put_flags(skb
, set
)))
1319 goto nla_put_failure
;
1320 ipset_nest_end(skb
, nested
);
1327 /* Make possible to run dumping parallel with resizing */
1329 mtype_uref(struct ip_set
*set
, struct netlink_callback
*cb
, bool start
)
1331 struct htype
*h
= set
->data
;
1336 t
= ipset_dereference_bh_nfnl(h
->table
);
1337 atomic_inc(&t
->uref
);
1338 cb
->args
[IPSET_CB_PRIVATE
] = (unsigned long)t
;
1339 rcu_read_unlock_bh();
1340 } else if (cb
->args
[IPSET_CB_PRIVATE
]) {
1341 t
= (struct htable
*)cb
->args
[IPSET_CB_PRIVATE
];
1342 if (atomic_dec_and_test(&t
->uref
) && atomic_read(&t
->ref
)) {
1343 pr_debug("Table destroy after resize "
1344 " by dump: %p\n", t
);
1345 mtype_ahash_destroy(set
, t
, false);
1347 cb
->args
[IPSET_CB_PRIVATE
] = 0;
1351 /* Reply a LIST/SAVE request: dump the elements of the specified set */
1353 mtype_list(const struct ip_set
*set
,
1354 struct sk_buff
*skb
, struct netlink_callback
*cb
)
1356 const struct htable
*t
;
1357 struct nlattr
*atd
, *nested
;
1358 const struct hbucket
*n
;
1359 const struct mtype_elem
*e
;
1360 u32 first
= cb
->args
[IPSET_CB_ARG0
];
1361 /* We assume that one hash bucket fills into one page */
1365 atd
= ipset_nest_start(skb
, IPSET_ATTR_ADT
);
1369 pr_debug("list hash set %s\n", set
->name
);
1370 t
= (const struct htable
*)cb
->args
[IPSET_CB_PRIVATE
];
1371 /* Expire may replace a hbucket with another one */
1373 for (; cb
->args
[IPSET_CB_ARG0
] < jhash_size(t
->htable_bits
);
1374 cb
->args
[IPSET_CB_ARG0
]++) {
1376 incomplete
= skb_tail_pointer(skb
);
1377 n
= rcu_dereference(hbucket(t
, cb
->args
[IPSET_CB_ARG0
]));
1378 pr_debug("cb->arg bucket: %lu, t %p n %p\n",
1379 cb
->args
[IPSET_CB_ARG0
], t
, n
);
1382 for (i
= 0; i
< n
->pos
; i
++) {
1383 if (!test_bit(i
, n
->used
))
1385 e
= ahash_data(n
, i
, set
->dsize
);
1386 if (SET_ELEM_EXPIRED(set
, e
))
1388 pr_debug("list hash %lu hbucket %p i %u, data %p\n",
1389 cb
->args
[IPSET_CB_ARG0
], n
, i
, e
);
1390 nested
= ipset_nest_start(skb
, IPSET_ATTR_DATA
);
1392 if (cb
->args
[IPSET_CB_ARG0
] == first
) {
1393 nla_nest_cancel(skb
, atd
);
1397 goto nla_put_failure
;
1399 if (mtype_data_list(skb
, e
))
1400 goto nla_put_failure
;
1401 if (ip_set_put_extensions(skb
, set
, e
, true))
1402 goto nla_put_failure
;
1403 ipset_nest_end(skb
, nested
);
1406 ipset_nest_end(skb
, atd
);
1407 /* Set listing finished */
1408 cb
->args
[IPSET_CB_ARG0
] = 0;
1413 nlmsg_trim(skb
, incomplete
);
1414 if (unlikely(first
== cb
->args
[IPSET_CB_ARG0
])) {
1415 pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
1417 cb
->args
[IPSET_CB_ARG0
] = 0;
1420 ipset_nest_end(skb
, atd
);
1428 IPSET_TOKEN(MTYPE
, _kadt
)(struct ip_set
*set
, const struct sk_buff
*skb
,
1429 const struct xt_action_param
*par
,
1430 enum ipset_adt adt
, struct ip_set_adt_opt
*opt
);
1433 IPSET_TOKEN(MTYPE
, _uadt
)(struct ip_set
*set
, struct nlattr
*tb
[],
1434 enum ipset_adt adt
, u32
*lineno
, u32 flags
,
1437 static const struct ip_set_type_variant mtype_variant
= {
1441 [IPSET_ADD
] = mtype_add
,
1442 [IPSET_DEL
] = mtype_del
,
1443 [IPSET_TEST
] = mtype_test
,
1445 .destroy
= mtype_destroy
,
1446 .flush
= mtype_flush
,
1450 .resize
= mtype_resize
,
1451 .same_set
= mtype_same_set
,
1452 .cancel_gc
= mtype_cancel_gc
,
1453 .region_lock
= true,
1456 #ifdef IP_SET_EMIT_CREATE
1458 IPSET_TOKEN(HTYPE
, _create
)(struct net
*net
, struct ip_set
*set
,
1459 struct nlattr
*tb
[], u32 flags
)
1461 u32 hashsize
= IPSET_DEFAULT_HASHSIZE
, maxelem
= IPSET_DEFAULT_MAXELEM
;
1462 #ifdef IP_SET_HASH_WITH_MARKMASK
1466 #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
1467 int ret
__attribute__((unused
)) = 0;
1468 u8 netmask
= set
->family
== NFPROTO_IPV4
? 32 : 128;
1469 union nf_inet_addr bitmask
= onesmask
;
1476 pr_debug("Create set %s with family %s\n",
1477 set
->name
, set
->family
== NFPROTO_IPV4
? "inet" : "inet6");
1479 #ifdef IP_SET_PROTO_UNDEF
1480 if (set
->family
!= NFPROTO_UNSPEC
)
1481 return -IPSET_ERR_INVALID_FAMILY
;
1483 if (!(set
->family
== NFPROTO_IPV4
|| set
->family
== NFPROTO_IPV6
))
1484 return -IPSET_ERR_INVALID_FAMILY
;
1487 if (unlikely(!ip_set_optattr_netorder(tb
, IPSET_ATTR_HASHSIZE
) ||
1488 !ip_set_optattr_netorder(tb
, IPSET_ATTR_MAXELEM
) ||
1489 !ip_set_optattr_netorder(tb
, IPSET_ATTR_TIMEOUT
) ||
1490 !ip_set_optattr_netorder(tb
, IPSET_ATTR_CADT_FLAGS
)))
1491 return -IPSET_ERR_PROTOCOL
;
1493 #ifdef IP_SET_HASH_WITH_MARKMASK
1494 /* Separated condition in order to avoid directive in argument list */
1495 if (unlikely(!ip_set_optattr_netorder(tb
, IPSET_ATTR_MARKMASK
)))
1496 return -IPSET_ERR_PROTOCOL
;
1498 markmask
= 0xffffffff;
1499 if (tb
[IPSET_ATTR_MARKMASK
]) {
1500 markmask
= ntohl(nla_get_be32(tb
[IPSET_ATTR_MARKMASK
]));
1502 return -IPSET_ERR_INVALID_MARKMASK
;
1506 #ifdef IP_SET_HASH_WITH_NETMASK
1507 if (tb
[IPSET_ATTR_NETMASK
]) {
1508 netmask
= nla_get_u8(tb
[IPSET_ATTR_NETMASK
]);
1510 if ((set
->family
== NFPROTO_IPV4
&& netmask
> 32) ||
1511 (set
->family
== NFPROTO_IPV6
&& netmask
> 128) ||
1513 return -IPSET_ERR_INVALID_NETMASK
;
1515 /* we convert netmask to bitmask and store it */
1516 if (set
->family
== NFPROTO_IPV4
)
1517 bitmask
.ip
= ip_set_netmask(netmask
);
1519 ip6_netmask(&bitmask
, netmask
);
1523 #ifdef IP_SET_HASH_WITH_BITMASK
1524 if (tb
[IPSET_ATTR_BITMASK
]) {
1525 /* bitmask and netmask do the same thing, allow only one of these options */
1526 if (tb
[IPSET_ATTR_NETMASK
])
1527 return -IPSET_ERR_BITMASK_NETMASK_EXCL
;
1529 if (set
->family
== NFPROTO_IPV4
) {
1530 ret
= ip_set_get_ipaddr4(tb
[IPSET_ATTR_BITMASK
], &bitmask
.ip
);
1531 if (ret
|| !bitmask
.ip
)
1532 return -IPSET_ERR_INVALID_NETMASK
;
1533 } else if (set
->family
== NFPROTO_IPV6
) {
1534 ret
= ip_set_get_ipaddr6(tb
[IPSET_ATTR_BITMASK
], &bitmask
);
1535 if (ret
|| ipv6_addr_any(&bitmask
.in6
))
1536 return -IPSET_ERR_INVALID_NETMASK
;
1539 if (nf_inet_addr_cmp(&bitmask
, &zeromask
))
1540 return -IPSET_ERR_INVALID_NETMASK
;
1544 if (tb
[IPSET_ATTR_HASHSIZE
]) {
1545 hashsize
= ip_set_get_h32(tb
[IPSET_ATTR_HASHSIZE
]);
1546 if (hashsize
< IPSET_MIMINAL_HASHSIZE
)
1547 hashsize
= IPSET_MIMINAL_HASHSIZE
;
1550 if (tb
[IPSET_ATTR_MAXELEM
])
1551 maxelem
= ip_set_get_h32(tb
[IPSET_ATTR_MAXELEM
]);
1554 h
= kzalloc(hsize
, GFP_KERNEL
);
1558 /* Compute htable_bits from the user input parameter hashsize.
1559 * Assume that hashsize == 2^htable_bits,
1560 * otherwise round up to the first 2^n value.
1562 hbits
= fls(hashsize
- 1);
1563 hsize
= htable_size(hbits
);
1568 t
= ip_set_alloc(hsize
);
1573 t
->hregion
= ip_set_alloc(ahash_sizeof_regions(hbits
));
1580 for (i
= 0; i
< ahash_numof_locks(hbits
); i
++)
1581 spin_lock_init(&t
->hregion
[i
].lock
);
1582 h
->maxelem
= maxelem
;
1583 #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK)
1584 h
->bitmask
= bitmask
;
1585 h
->netmask
= netmask
;
1587 #ifdef IP_SET_HASH_WITH_MARKMASK
1588 h
->markmask
= markmask
;
1590 if (tb
[IPSET_ATTR_INITVAL
])
1591 h
->initval
= ntohl(nla_get_be32(tb
[IPSET_ATTR_INITVAL
]));
1593 get_random_bytes(&h
->initval
, sizeof(h
->initval
));
1594 h
->bucketsize
= AHASH_MAX_SIZE
;
1595 if (tb
[IPSET_ATTR_BUCKETSIZE
]) {
1596 h
->bucketsize
= nla_get_u8(tb
[IPSET_ATTR_BUCKETSIZE
]);
1597 if (h
->bucketsize
< AHASH_INIT_SIZE
)
1598 h
->bucketsize
= AHASH_INIT_SIZE
;
1599 else if (h
->bucketsize
> AHASH_MAX_SIZE
)
1600 h
->bucketsize
= AHASH_MAX_SIZE
;
1601 else if (h
->bucketsize
% 2)
1604 t
->htable_bits
= hbits
;
1605 t
->maxelem
= h
->maxelem
/ ahash_numof_locks(hbits
);
1606 RCU_INIT_POINTER(h
->table
, t
);
1608 INIT_LIST_HEAD(&h
->ad
);
1610 #ifndef IP_SET_PROTO_UNDEF
1611 if (set
->family
== NFPROTO_IPV4
) {
1613 set
->variant
= &IPSET_TOKEN(HTYPE
, 4_variant
);
1614 set
->dsize
= ip_set_elem_len(set
, tb
,
1615 sizeof(struct IPSET_TOKEN(HTYPE
, 4_elem
)),
1616 __alignof__(struct IPSET_TOKEN(HTYPE
, 4_elem
)));
1617 #ifndef IP_SET_PROTO_UNDEF
1619 set
->variant
= &IPSET_TOKEN(HTYPE
, 6_variant
);
1620 set
->dsize
= ip_set_elem_len(set
, tb
,
1621 sizeof(struct IPSET_TOKEN(HTYPE
, 6_elem
)),
1622 __alignof__(struct IPSET_TOKEN(HTYPE
, 6_elem
)));
1625 set
->timeout
= IPSET_NO_TIMEOUT
;
1626 if (tb
[IPSET_ATTR_TIMEOUT
]) {
1627 set
->timeout
= ip_set_timeout_uget(tb
[IPSET_ATTR_TIMEOUT
]);
1628 #ifndef IP_SET_PROTO_UNDEF
1629 if (set
->family
== NFPROTO_IPV4
)
1631 IPSET_TOKEN(HTYPE
, 4_gc_init
)(&h
->gc
);
1632 #ifndef IP_SET_PROTO_UNDEF
1634 IPSET_TOKEN(HTYPE
, 6_gc_init
)(&h
->gc
);
1637 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1638 set
->name
, jhash_size(t
->htable_bits
),
1639 t
->htable_bits
, h
->maxelem
, set
->data
, t
);
1643 #endif /* IP_SET_EMIT_CREATE */