]> git.ipfire.org Git - thirdparty/linux.git/blame - net/core/skbuff.c
Merge branch 'sonic-ethernet-cleanups'
[thirdparty/linux.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
113aa838 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
1da177e4
LT
7 * Fixes:
8 * Alan Cox : Fixed the worst of the load
9 * balancer bugs.
10 * Dave Platt : Interrupt stacking fix.
11 * Richard Kooijman : Timestamp fixes.
12 * Alan Cox : Changed buffer format.
13 * Alan Cox : destructor hook for AF_UNIX etc.
14 * Linus Torvalds : Better skb_clone.
15 * Alan Cox : Added skb_copy.
16 * Alan Cox : Added all the changed routines Linus
17 * only put in the headers
18 * Ray VanTassle : Fixed --skb->lock in free
19 * Alan Cox : skb_copy copy arp field
20 * Andi Kleen : slabified it.
21 * Robert Olsson : Removed skb_head_pool
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35/*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
e005d193
JP
39#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
40
1da177e4
LT
41#include <linux/module.h>
42#include <linux/types.h>
43#include <linux/kernel.h>
1da177e4
LT
44#include <linux/mm.h>
45#include <linux/interrupt.h>
46#include <linux/in.h>
47#include <linux/inet.h>
48#include <linux/slab.h>
de960aa9
FW
49#include <linux/tcp.h>
50#include <linux/udp.h>
90017acc 51#include <linux/sctp.h>
1da177e4
LT
52#include <linux/netdevice.h>
53#ifdef CONFIG_NET_CLS_ACT
54#include <net/pkt_sched.h>
55#endif
56#include <linux/string.h>
57#include <linux/skbuff.h>
9c55e01c 58#include <linux/splice.h>
1da177e4
LT
59#include <linux/cache.h>
60#include <linux/rtnetlink.h>
61#include <linux/init.h>
716ea3a7 62#include <linux/scatterlist.h>
ac45f602 63#include <linux/errqueue.h>
268bb0ce 64#include <linux/prefetch.h>
0d5501c1 65#include <linux/if_vlan.h>
1da177e4
LT
66
67#include <net/protocol.h>
68#include <net/dst.h>
69#include <net/sock.h>
70#include <net/checksum.h>
ed1f50c3 71#include <net/ip6_checksum.h>
1da177e4
LT
72#include <net/xfrm.h>
73
7c0f6ba6 74#include <linux/uaccess.h>
ad8d75ff 75#include <trace/events/skb.h>
51c56b00 76#include <linux/highmem.h>
b245be1f
WB
77#include <linux/capability.h>
78#include <linux/user_namespace.h>
a1f8e7f7 79
d7e8883c 80struct kmem_cache *skbuff_head_cache __read_mostly;
e18b890b 81static struct kmem_cache *skbuff_fclone_cache __read_mostly;
5f74f82e
HWR
82int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
83EXPORT_SYMBOL(sysctl_max_skb_frags);
1da177e4 84
1da177e4 85/**
f05de73b
JS
86 * skb_panic - private function for out-of-line support
87 * @skb: buffer
88 * @sz: size
89 * @addr: address
99d5851e 90 * @msg: skb_over_panic or skb_under_panic
1da177e4 91 *
f05de73b
JS
92 * Out-of-line support for skb_put() and skb_push().
93 * Called via the wrapper skb_over_panic() or skb_under_panic().
94 * Keep out of line to prevent kernel bloat.
95 * __builtin_return_address is not used because it is not always reliable.
1da177e4 96 */
f05de73b 97static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
99d5851e 98 const char msg[])
1da177e4 99{
e005d193 100 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
99d5851e 101 msg, addr, skb->len, sz, skb->head, skb->data,
e005d193
JP
102 (unsigned long)skb->tail, (unsigned long)skb->end,
103 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
104 BUG();
105}
106
f05de73b 107static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
1da177e4 108{
f05de73b 109 skb_panic(skb, sz, addr, __func__);
1da177e4
LT
110}
111
f05de73b
JS
112static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
113{
114 skb_panic(skb, sz, addr, __func__);
115}
c93bdd0e
MG
116
117/*
118 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
119 * the caller if emergency pfmemalloc reserves are being used. If it is and
120 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
121 * may be used. Otherwise, the packet data may be discarded until enough
122 * memory is free
123 */
124#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
125 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
61c5e88a 126
127static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
128 unsigned long ip, bool *pfmemalloc)
c93bdd0e
MG
129{
130 void *obj;
131 bool ret_pfmemalloc = false;
132
133 /*
134 * Try a regular allocation, when that fails and we're not entitled
135 * to the reserves, fail.
136 */
137 obj = kmalloc_node_track_caller(size,
138 flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
139 node);
140 if (obj || !(gfp_pfmemalloc_allowed(flags)))
141 goto out;
142
143 /* Try again but now we are using pfmemalloc reserves */
144 ret_pfmemalloc = true;
145 obj = kmalloc_node_track_caller(size, flags, node);
146
147out:
148 if (pfmemalloc)
149 *pfmemalloc = ret_pfmemalloc;
150
151 return obj;
152}
153
1da177e4
LT
154/* Allocate a new skbuff. We do this ourselves so we can fill in a few
155 * 'private' fields and also do memory statistics to find all the
156 * [BEEP] leaks.
157 *
158 */
159
160/**
d179cd12 161 * __alloc_skb - allocate a network buffer
1da177e4
LT
162 * @size: size to allocate
163 * @gfp_mask: allocation mask
c93bdd0e
MG
164 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
165 * instead of head cache and allocate a cloned (child) skb.
166 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
167 * allocations in case the data is required for writeback
b30973f8 168 * @node: numa node to allocate memory on
1da177e4
LT
169 *
170 * Allocate a new &sk_buff. The returned buffer has no headroom and a
94b6042c
BH
171 * tail room of at least size bytes. The object has a reference count
172 * of one. The return is the buffer. On a failure the return is %NULL.
1da177e4
LT
173 *
174 * Buffers may only be allocated from interrupts using a @gfp_mask of
175 * %GFP_ATOMIC.
176 */
dd0fc66f 177struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
c93bdd0e 178 int flags, int node)
1da177e4 179{
e18b890b 180 struct kmem_cache *cache;
4947d3ef 181 struct skb_shared_info *shinfo;
1da177e4
LT
182 struct sk_buff *skb;
183 u8 *data;
c93bdd0e 184 bool pfmemalloc;
1da177e4 185
c93bdd0e
MG
186 cache = (flags & SKB_ALLOC_FCLONE)
187 ? skbuff_fclone_cache : skbuff_head_cache;
188
189 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
190 gfp_mask |= __GFP_MEMALLOC;
8798b3fb 191
1da177e4 192 /* Get the HEAD */
b30973f8 193 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
194 if (!skb)
195 goto out;
ec7d2f2c 196 prefetchw(skb);
1da177e4 197
87fb4b7b
ED
198 /* We do our best to align skb_shared_info on a separate cache
199 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
200 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
201 * Both skb->head and skb_shared_info are cache line aligned.
202 */
bc417e30 203 size = SKB_DATA_ALIGN(size);
87fb4b7b 204 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
c93bdd0e 205 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
1da177e4
LT
206 if (!data)
207 goto nodata;
87fb4b7b
ED
208 /* kmalloc(size) might give us more room than requested.
209 * Put skb_shared_info exactly at the end of allocated zone,
210 * to allow max possible filling before reallocation.
211 */
212 size = SKB_WITH_OVERHEAD(ksize(data));
ec7d2f2c 213 prefetchw(data + size);
1da177e4 214
ca0605a7 215 /*
c8005785
JB
216 * Only clear those fields we need to clear, not those that we will
217 * actually initialise below. Hence, don't put any more fields after
218 * the tail pointer in struct sk_buff!
ca0605a7
ACM
219 */
220 memset(skb, 0, offsetof(struct sk_buff, tail));
87fb4b7b
ED
221 /* Account for allocated memory : skb + skb->head */
222 skb->truesize = SKB_TRUESIZE(size);
c93bdd0e 223 skb->pfmemalloc = pfmemalloc;
63354797 224 refcount_set(&skb->users, 1);
1da177e4
LT
225 skb->head = data;
226 skb->data = data;
27a884dc 227 skb_reset_tail_pointer(skb);
4305b541 228 skb->end = skb->tail + size;
35d04610
CW
229 skb->mac_header = (typeof(skb->mac_header))~0U;
230 skb->transport_header = (typeof(skb->transport_header))~0U;
19633e12 231
4947d3ef
BL
232 /* make sure we initialize shinfo sequentially */
233 shinfo = skb_shinfo(skb);
ec7d2f2c 234 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
4947d3ef 235 atomic_set(&shinfo->dataref, 1);
4947d3ef 236
c93bdd0e 237 if (flags & SKB_ALLOC_FCLONE) {
d0bf4a9e 238 struct sk_buff_fclones *fclones;
1da177e4 239
d0bf4a9e
ED
240 fclones = container_of(skb, struct sk_buff_fclones, skb1);
241
d179cd12 242 skb->fclone = SKB_FCLONE_ORIG;
2638595a 243 refcount_set(&fclones->fclone_ref, 1);
d179cd12 244
6ffe75eb 245 fclones->skb2.fclone = SKB_FCLONE_CLONE;
d179cd12 246 }
1da177e4
LT
247out:
248 return skb;
249nodata:
8798b3fb 250 kmem_cache_free(cache, skb);
1da177e4
LT
251 skb = NULL;
252 goto out;
1da177e4 253}
b4ac530f 254EXPORT_SYMBOL(__alloc_skb);
1da177e4 255
b2b5ce9d 256/**
2ea2f62c 257 * __build_skb - build a network buffer
b2b5ce9d 258 * @data: data buffer provided by caller
2ea2f62c 259 * @frag_size: size of data, or 0 if head was kmalloced
b2b5ce9d
ED
260 *
261 * Allocate a new &sk_buff. Caller provides space holding head and
deceb4c0 262 * skb_shared_info. @data must have been allocated by kmalloc() only if
2ea2f62c
ED
263 * @frag_size is 0, otherwise data should come from the page allocator
264 * or vmalloc()
b2b5ce9d
ED
265 * The return is the new skb buffer.
266 * On a failure the return is %NULL, and @data is not freed.
267 * Notes :
268 * Before IO, driver allocates only data buffer where NIC put incoming frame
269 * Driver should add room at head (NET_SKB_PAD) and
270 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
271 * After IO, driver calls build_skb(), to allocate sk_buff and populate it
272 * before giving packet to stack.
273 * RX rings only contains data buffers, not full skbs.
274 */
2ea2f62c 275struct sk_buff *__build_skb(void *data, unsigned int frag_size)
b2b5ce9d
ED
276{
277 struct skb_shared_info *shinfo;
278 struct sk_buff *skb;
d3836f21 279 unsigned int size = frag_size ? : ksize(data);
b2b5ce9d
ED
280
281 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
282 if (!skb)
283 return NULL;
284
d3836f21 285 size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
b2b5ce9d
ED
286
287 memset(skb, 0, offsetof(struct sk_buff, tail));
288 skb->truesize = SKB_TRUESIZE(size);
63354797 289 refcount_set(&skb->users, 1);
b2b5ce9d
ED
290 skb->head = data;
291 skb->data = data;
292 skb_reset_tail_pointer(skb);
293 skb->end = skb->tail + size;
35d04610
CW
294 skb->mac_header = (typeof(skb->mac_header))~0U;
295 skb->transport_header = (typeof(skb->transport_header))~0U;
b2b5ce9d
ED
296
297 /* make sure we initialize shinfo sequentially */
298 shinfo = skb_shinfo(skb);
299 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
300 atomic_set(&shinfo->dataref, 1);
b2b5ce9d
ED
301
302 return skb;
303}
2ea2f62c
ED
304
305/* build_skb() is wrapper over __build_skb(), that specifically
306 * takes care of skb->head and skb->pfmemalloc
307 * This means that if @frag_size is not zero, then @data must be backed
308 * by a page fragment, not kmalloc() or vmalloc()
309 */
310struct sk_buff *build_skb(void *data, unsigned int frag_size)
311{
312 struct sk_buff *skb = __build_skb(data, frag_size);
313
314 if (skb && frag_size) {
315 skb->head_frag = 1;
2f064f34 316 if (page_is_pfmemalloc(virt_to_head_page(data)))
2ea2f62c
ED
317 skb->pfmemalloc = 1;
318 }
319 return skb;
320}
b2b5ce9d
ED
321EXPORT_SYMBOL(build_skb);
322
795bb1c0
JDB
323#define NAPI_SKB_CACHE_SIZE 64
324
325struct napi_alloc_cache {
326 struct page_frag_cache page;
e0d7924a 327 unsigned int skb_count;
795bb1c0
JDB
328 void *skb_cache[NAPI_SKB_CACHE_SIZE];
329};
330
b63ae8ca 331static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
795bb1c0 332static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
ffde7328
AD
333
334static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
335{
b63ae8ca 336 struct page_frag_cache *nc;
ffde7328
AD
337 unsigned long flags;
338 void *data;
339
340 local_irq_save(flags);
9451980a 341 nc = this_cpu_ptr(&netdev_alloc_cache);
8c2dd3e4 342 data = page_frag_alloc(nc, fragsz, gfp_mask);
6f532612
ED
343 local_irq_restore(flags);
344 return data;
345}
c93bdd0e
MG
346
347/**
348 * netdev_alloc_frag - allocate a page fragment
349 * @fragsz: fragment size
350 *
351 * Allocates a frag from a page for receive buffer.
352 * Uses GFP_ATOMIC allocations.
353 */
354void *netdev_alloc_frag(unsigned int fragsz)
355{
453f85d4 356 return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
c93bdd0e 357}
6f532612
ED
358EXPORT_SYMBOL(netdev_alloc_frag);
359
ffde7328
AD
360static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
361{
795bb1c0 362 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
9451980a 363
8c2dd3e4 364 return page_frag_alloc(&nc->page, fragsz, gfp_mask);
ffde7328
AD
365}
366
367void *napi_alloc_frag(unsigned int fragsz)
368{
453f85d4 369 return __napi_alloc_frag(fragsz, GFP_ATOMIC);
ffde7328
AD
370}
371EXPORT_SYMBOL(napi_alloc_frag);
372
fd11a83d
AD
373/**
374 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
375 * @dev: network device to receive on
d7499160 376 * @len: length to allocate
fd11a83d
AD
377 * @gfp_mask: get_free_pages mask, passed to alloc_skb
378 *
379 * Allocate a new &sk_buff and assign it a usage count of one. The
380 * buffer has NET_SKB_PAD headroom built in. Users should allocate
381 * the headroom they think they need without accounting for the
382 * built in space. The built in space is used for optimisations.
383 *
384 * %NULL is returned if there is no free memory.
385 */
9451980a
AD
386struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
387 gfp_t gfp_mask)
fd11a83d 388{
b63ae8ca 389 struct page_frag_cache *nc;
9451980a 390 unsigned long flags;
fd11a83d 391 struct sk_buff *skb;
9451980a
AD
392 bool pfmemalloc;
393 void *data;
394
395 len += NET_SKB_PAD;
fd11a83d 396
9451980a 397 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
d0164adc 398 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd
AD
399 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
400 if (!skb)
401 goto skb_fail;
402 goto skb_success;
403 }
fd11a83d 404
9451980a
AD
405 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
406 len = SKB_DATA_ALIGN(len);
407
408 if (sk_memalloc_socks())
409 gfp_mask |= __GFP_MEMALLOC;
410
411 local_irq_save(flags);
412
413 nc = this_cpu_ptr(&netdev_alloc_cache);
8c2dd3e4 414 data = page_frag_alloc(nc, len, gfp_mask);
9451980a
AD
415 pfmemalloc = nc->pfmemalloc;
416
417 local_irq_restore(flags);
418
419 if (unlikely(!data))
420 return NULL;
421
422 skb = __build_skb(data, len);
423 if (unlikely(!skb)) {
181edb2b 424 skb_free_frag(data);
9451980a 425 return NULL;
7b2e497a 426 }
fd11a83d 427
9451980a
AD
428 /* use OR instead of assignment to avoid clearing of bits in mask */
429 if (pfmemalloc)
430 skb->pfmemalloc = 1;
431 skb->head_frag = 1;
432
a080e7bd 433skb_success:
9451980a
AD
434 skb_reserve(skb, NET_SKB_PAD);
435 skb->dev = dev;
436
a080e7bd 437skb_fail:
8af27456
CH
438 return skb;
439}
b4ac530f 440EXPORT_SYMBOL(__netdev_alloc_skb);
1da177e4 441
fd11a83d
AD
442/**
443 * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
444 * @napi: napi instance this buffer was allocated for
d7499160 445 * @len: length to allocate
fd11a83d
AD
446 * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
447 *
448 * Allocate a new sk_buff for use in NAPI receive. This buffer will
449 * attempt to allocate the head from a special reserved region used
450 * only for NAPI Rx allocation. By doing this we can save several
451 * CPU cycles by avoiding having to disable and re-enable IRQs.
452 *
453 * %NULL is returned if there is no free memory.
454 */
9451980a
AD
455struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
456 gfp_t gfp_mask)
fd11a83d 457{
795bb1c0 458 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
fd11a83d 459 struct sk_buff *skb;
9451980a
AD
460 void *data;
461
462 len += NET_SKB_PAD + NET_IP_ALIGN;
fd11a83d 463
9451980a 464 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
d0164adc 465 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd
AD
466 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
467 if (!skb)
468 goto skb_fail;
469 goto skb_success;
470 }
9451980a
AD
471
472 len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
473 len = SKB_DATA_ALIGN(len);
474
475 if (sk_memalloc_socks())
476 gfp_mask |= __GFP_MEMALLOC;
fd11a83d 477
8c2dd3e4 478 data = page_frag_alloc(&nc->page, len, gfp_mask);
9451980a
AD
479 if (unlikely(!data))
480 return NULL;
481
482 skb = __build_skb(data, len);
483 if (unlikely(!skb)) {
181edb2b 484 skb_free_frag(data);
9451980a 485 return NULL;
fd11a83d
AD
486 }
487
9451980a 488 /* use OR instead of assignment to avoid clearing of bits in mask */
795bb1c0 489 if (nc->page.pfmemalloc)
9451980a
AD
490 skb->pfmemalloc = 1;
491 skb->head_frag = 1;
492
a080e7bd 493skb_success:
9451980a
AD
494 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
495 skb->dev = napi->dev;
496
a080e7bd 497skb_fail:
fd11a83d
AD
498 return skb;
499}
500EXPORT_SYMBOL(__napi_alloc_skb);
501
654bed16 502void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
50269e19 503 int size, unsigned int truesize)
654bed16
PZ
504{
505 skb_fill_page_desc(skb, i, page, off, size);
506 skb->len += size;
507 skb->data_len += size;
50269e19 508 skb->truesize += truesize;
654bed16
PZ
509}
510EXPORT_SYMBOL(skb_add_rx_frag);
511
f8e617e1
JW
512void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
513 unsigned int truesize)
514{
515 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
516
517 skb_frag_size_add(frag, size);
518 skb->len += size;
519 skb->data_len += size;
520 skb->truesize += truesize;
521}
522EXPORT_SYMBOL(skb_coalesce_rx_frag);
523
27b437c8 524static void skb_drop_list(struct sk_buff **listp)
1da177e4 525{
bd8a7036 526 kfree_skb_list(*listp);
27b437c8 527 *listp = NULL;
1da177e4
LT
528}
529
27b437c8
HX
530static inline void skb_drop_fraglist(struct sk_buff *skb)
531{
532 skb_drop_list(&skb_shinfo(skb)->frag_list);
533}
534
1da177e4
LT
535static void skb_clone_fraglist(struct sk_buff *skb)
536{
537 struct sk_buff *list;
538
fbb398a8 539 skb_walk_frags(skb, list)
1da177e4
LT
540 skb_get(list);
541}
542
d3836f21
ED
543static void skb_free_head(struct sk_buff *skb)
544{
181edb2b
AD
545 unsigned char *head = skb->head;
546
d3836f21 547 if (skb->head_frag)
181edb2b 548 skb_free_frag(head);
d3836f21 549 else
181edb2b 550 kfree(head);
d3836f21
ED
551}
552
5bba1712 553static void skb_release_data(struct sk_buff *skb)
1da177e4 554{
ff04a771
ED
555 struct skb_shared_info *shinfo = skb_shinfo(skb);
556 int i;
1da177e4 557
ff04a771
ED
558 if (skb->cloned &&
559 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
560 &shinfo->dataref))
561 return;
a6686f2f 562
ff04a771
ED
563 for (i = 0; i < shinfo->nr_frags; i++)
564 __skb_frag_unref(&shinfo->frags[i]);
a6686f2f 565
ff04a771
ED
566 if (shinfo->frag_list)
567 kfree_skb_list(shinfo->frag_list);
568
1f8b977a 569 skb_zcopy_clear(skb, true);
ff04a771 570 skb_free_head(skb);
1da177e4
LT
571}
572
573/*
574 * Free an skbuff by memory without cleaning the state.
575 */
2d4baff8 576static void kfree_skbmem(struct sk_buff *skb)
1da177e4 577{
d0bf4a9e 578 struct sk_buff_fclones *fclones;
d179cd12 579
d179cd12
DM
580 switch (skb->fclone) {
581 case SKB_FCLONE_UNAVAILABLE:
582 kmem_cache_free(skbuff_head_cache, skb);
6ffe75eb 583 return;
d179cd12
DM
584
585 case SKB_FCLONE_ORIG:
d0bf4a9e 586 fclones = container_of(skb, struct sk_buff_fclones, skb1);
d179cd12 587
6ffe75eb
ED
588 /* We usually free the clone (TX completion) before original skb
589 * This test would have no chance to be true for the clone,
590 * while here, branch prediction will be good.
d179cd12 591 */
2638595a 592 if (refcount_read(&fclones->fclone_ref) == 1)
6ffe75eb
ED
593 goto fastpath;
594 break;
e7820e39 595
6ffe75eb
ED
596 default: /* SKB_FCLONE_CLONE */
597 fclones = container_of(skb, struct sk_buff_fclones, skb2);
d179cd12 598 break;
3ff50b79 599 }
2638595a 600 if (!refcount_dec_and_test(&fclones->fclone_ref))
6ffe75eb
ED
601 return;
602fastpath:
603 kmem_cache_free(skbuff_fclone_cache, fclones);
1da177e4
LT
604}
605
0a463c78 606void skb_release_head_state(struct sk_buff *skb)
1da177e4 607{
adf30907 608 skb_dst_drop(skb);
0a463c78 609 secpath_reset(skb);
9c2b3328
SH
610 if (skb->destructor) {
611 WARN_ON(in_irq());
1da177e4
LT
612 skb->destructor(skb);
613 }
a3bf7ae9 614#if IS_ENABLED(CONFIG_NF_CONNTRACK)
cb9c6836 615 nf_conntrack_put(skb_nfct(skb));
2fc72c7b 616#endif
1109a90c 617#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1da177e4
LT
618 nf_bridge_put(skb->nf_bridge);
619#endif
04a4bb55
LB
620}
621
622/* Free everything but the sk_buff shell. */
623static void skb_release_all(struct sk_buff *skb)
624{
625 skb_release_head_state(skb);
a28b1b90
FW
626 if (likely(skb->head))
627 skb_release_data(skb);
2d4baff8
HX
628}
629
630/**
631 * __kfree_skb - private function
632 * @skb: buffer
633 *
634 * Free an sk_buff. Release anything attached to the buffer.
635 * Clean the state. This is an internal helper function. Users should
636 * always call kfree_skb
637 */
1da177e4 638
2d4baff8
HX
639void __kfree_skb(struct sk_buff *skb)
640{
641 skb_release_all(skb);
1da177e4
LT
642 kfree_skbmem(skb);
643}
b4ac530f 644EXPORT_SYMBOL(__kfree_skb);
1da177e4 645