]> git.ipfire.org Git - thirdparty/linux.git/blame - net/core/skbuff.c
net: add missing bh_unlock_sock() calls
[thirdparty/linux.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
113aa838 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
1da177e4
LT
7 * Fixes:
8 * Alan Cox : Fixed the worst of the load
9 * balancer bugs.
10 * Dave Platt : Interrupt stacking fix.
11 * Richard Kooijman : Timestamp fixes.
12 * Alan Cox : Changed buffer format.
13 * Alan Cox : destructor hook for AF_UNIX etc.
14 * Linus Torvalds : Better skb_clone.
15 * Alan Cox : Added skb_copy.
16 * Alan Cox : Added all the changed routines Linus
17 * only put in the headers
18 * Ray VanTassle : Fixed --skb->lock in free
19 * Alan Cox : skb_copy copy arp field
20 * Andi Kleen : slabified it.
21 * Robert Olsson : Removed skb_head_pool
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35/*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
1da177e4
LT
39#include <linux/module.h>
40#include <linux/types.h>
41#include <linux/kernel.h>
fe55f6d5 42#include <linux/kmemcheck.h>
1da177e4
LT
43#include <linux/mm.h>
44#include <linux/interrupt.h>
45#include <linux/in.h>
46#include <linux/inet.h>
47#include <linux/slab.h>
48#include <linux/netdevice.h>
49#ifdef CONFIG_NET_CLS_ACT
50#include <net/pkt_sched.h>
51#endif
52#include <linux/string.h>
53#include <linux/skbuff.h>
9c55e01c 54#include <linux/splice.h>
1da177e4
LT
55#include <linux/cache.h>
56#include <linux/rtnetlink.h>
57#include <linux/init.h>
716ea3a7 58#include <linux/scatterlist.h>
ac45f602 59#include <linux/errqueue.h>
268bb0ce 60#include <linux/prefetch.h>
1da177e4
LT
61
62#include <net/protocol.h>
63#include <net/dst.h>
64#include <net/sock.h>
65#include <net/checksum.h>
66#include <net/xfrm.h>
67
68#include <asm/uaccess.h>
69#include <asm/system.h>
ad8d75ff 70#include <trace/events/skb.h>
1da177e4 71
a1f8e7f7
AV
72#include "kmap_skb.h"
73
e18b890b
CL
74static struct kmem_cache *skbuff_head_cache __read_mostly;
75static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1da177e4 76
9c55e01c
JA
77static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
78 struct pipe_buffer *buf)
79{
8b9d3728 80 put_page(buf->page);
9c55e01c
JA
81}
82
83static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
84 struct pipe_buffer *buf)
85{
8b9d3728 86 get_page(buf->page);
9c55e01c
JA
87}
88
89static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
90 struct pipe_buffer *buf)
91{
92 return 1;
93}
94
95
96/* Pipe buffer operations for a socket. */
28dfef8f 97static const struct pipe_buf_operations sock_pipe_buf_ops = {
9c55e01c
JA
98 .can_merge = 0,
99 .map = generic_pipe_buf_map,
100 .unmap = generic_pipe_buf_unmap,
101 .confirm = generic_pipe_buf_confirm,
102 .release = sock_pipe_buf_release,
103 .steal = sock_pipe_buf_steal,
104 .get = sock_pipe_buf_get,
105};
106
1da177e4
LT
107/*
108 * Keep out-of-line to prevent kernel bloat.
109 * __builtin_return_address is not used because it is not always
110 * reliable.
111 */
112
113/**
114 * skb_over_panic - private function
115 * @skb: buffer
116 * @sz: size
117 * @here: address
118 *
119 * Out of line support code for skb_put(). Not user callable.
120 */
ccb7c773 121static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
1da177e4 122{
26095455 123 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
4305b541 124 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 125 here, skb->len, sz, skb->head, skb->data,
4305b541 126 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 127 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
128 BUG();
129}
130
131/**
132 * skb_under_panic - private function
133 * @skb: buffer
134 * @sz: size
135 * @here: address
136 *
137 * Out of line support code for skb_push(). Not user callable.
138 */
139
ccb7c773 140static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
1da177e4 141{
26095455 142 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
4305b541 143 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 144 here, skb->len, sz, skb->head, skb->data,
4305b541 145 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 146 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
147 BUG();
148}
149
150/* Allocate a new skbuff. We do this ourselves so we can fill in a few
151 * 'private' fields and also do memory statistics to find all the
152 * [BEEP] leaks.
153 *
154 */
155
156/**
d179cd12 157 * __alloc_skb - allocate a network buffer
1da177e4
LT
158 * @size: size to allocate
159 * @gfp_mask: allocation mask
c83c2486
RD
160 * @fclone: allocate from fclone cache instead of head cache
161 * and allocate a cloned (child) skb
b30973f8 162 * @node: numa node to allocate memory on
1da177e4
LT
163 *
164 * Allocate a new &sk_buff. The returned buffer has no headroom and a
165 * tail room of size bytes. The object has a reference count of one.
166 * The return is the buffer. On a failure the return is %NULL.
167 *
168 * Buffers may only be allocated from interrupts using a @gfp_mask of
169 * %GFP_ATOMIC.
170 */
dd0fc66f 171struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
b30973f8 172 int fclone, int node)
1da177e4 173{
e18b890b 174 struct kmem_cache *cache;
4947d3ef 175 struct skb_shared_info *shinfo;
1da177e4
LT
176 struct sk_buff *skb;
177 u8 *data;
178
8798b3fb
HX
179 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
180
1da177e4 181 /* Get the HEAD */
b30973f8 182 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
183 if (!skb)
184 goto out;
ec7d2f2c 185 prefetchw(skb);
1da177e4 186
87fb4b7b
ED
187 /* We do our best to align skb_shared_info on a separate cache
188 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
189 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
190 * Both skb->head and skb_shared_info are cache line aligned.
191 */
192 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
193 data = kmalloc_node_track_caller(size, gfp_mask, node);
1da177e4
LT
194 if (!data)
195 goto nodata;
87fb4b7b
ED
196 /* kmalloc(size) might give us more room than requested.
197 * Put skb_shared_info exactly at the end of allocated zone,
198 * to allow max possible filling before reallocation.
199 */
200 size = SKB_WITH_OVERHEAD(ksize(data));
ec7d2f2c 201 prefetchw(data + size);
1da177e4 202
ca0605a7 203 /*
c8005785
JB
204 * Only clear those fields we need to clear, not those that we will
205 * actually initialise below. Hence, don't put any more fields after
206 * the tail pointer in struct sk_buff!
ca0605a7
ACM
207 */
208 memset(skb, 0, offsetof(struct sk_buff, tail));
87fb4b7b
ED
209 /* Account for allocated memory : skb + skb->head */
210 skb->truesize = SKB_TRUESIZE(size);
1da177e4
LT
211 atomic_set(&skb->users, 1);
212 skb->head = data;
213 skb->data = data;
27a884dc 214 skb_reset_tail_pointer(skb);
4305b541 215 skb->end = skb->tail + size;
19633e12
SH
216#ifdef NET_SKBUFF_DATA_USES_OFFSET
217 skb->mac_header = ~0U;
218#endif
219
4947d3ef
BL
220 /* make sure we initialize shinfo sequentially */
221 shinfo = skb_shinfo(skb);
ec7d2f2c 222 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
4947d3ef 223 atomic_set(&shinfo->dataref, 1);
c2aa3665 224 kmemcheck_annotate_variable(shinfo->destructor_arg);
4947d3ef 225
d179cd12
DM
226 if (fclone) {
227 struct sk_buff *child = skb + 1;
228 atomic_t *fclone_ref = (atomic_t *) (child + 1);
1da177e4 229
fe55f6d5
VN
230 kmemcheck_annotate_bitfield(child, flags1);
231 kmemcheck_annotate_bitfield(child, flags2);
d179cd12
DM
232 skb->fclone = SKB_FCLONE_ORIG;
233 atomic_set(fclone_ref, 1);
234
235 child->fclone = SKB_FCLONE_UNAVAILABLE;
236 }
1da177e4
LT
237out:
238 return skb;
239nodata:
8798b3fb 240 kmem_cache_free(cache, skb);
1da177e4
LT
241 skb = NULL;
242 goto out;
1da177e4 243}
b4ac530f 244EXPORT_SYMBOL(__alloc_skb);
1da177e4 245
8af27456
CH
246/**
247 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
248 * @dev: network device to receive on
249 * @length: length to allocate
250 * @gfp_mask: get_free_pages mask, passed to alloc_skb
251 *
252 * Allocate a new &sk_buff and assign it a usage count of one. The
253 * buffer has unspecified headroom built in. Users should allocate
254 * the headroom they think they need without accounting for the
255 * built in space. The built in space is used for optimisations.
256 *
257 * %NULL is returned if there is no free memory.
258 */
259struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
260 unsigned int length, gfp_t gfp_mask)
261{
262 struct sk_buff *skb;
263
564824b0 264 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
7b2e497a 265 if (likely(skb)) {
8af27456 266 skb_reserve(skb, NET_SKB_PAD);
7b2e497a
CH
267 skb->dev = dev;
268 }
8af27456
CH
269 return skb;
270}
b4ac530f 271EXPORT_SYMBOL(__netdev_alloc_skb);
1da177e4 272
654bed16
PZ
273void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
274 int size)
275{
276 skb_fill_page_desc(skb, i, page, off, size);
277 skb->len += size;
278 skb->data_len += size;
279 skb->truesize += size;
280}
281EXPORT_SYMBOL(skb_add_rx_frag);
282
f58518e6
IJ
283/**
284 * dev_alloc_skb - allocate an skbuff for receiving
285 * @length: length to allocate
286 *
287 * Allocate a new &sk_buff and assign it a usage count of one. The
288 * buffer has unspecified headroom built in. Users should allocate
289 * the headroom they think they need without accounting for the
290 * built in space. The built in space is used for optimisations.
291 *
292 * %NULL is returned if there is no free memory. Although this function
293 * allocates memory it can be called from an interrupt.
294 */
295struct sk_buff *dev_alloc_skb(unsigned int length)
296{
1483b874
DV
297 /*
298 * There is more code here than it seems:
a0f55e0e 299 * __dev_alloc_skb is an inline
1483b874 300 */
f58518e6
IJ
301 return __dev_alloc_skb(length, GFP_ATOMIC);
302}
303EXPORT_SYMBOL(dev_alloc_skb);
304
27b437c8 305static void skb_drop_list(struct sk_buff **listp)
1da177e4 306{
27b437c8 307 struct sk_buff *list = *listp;
1da177e4 308
27b437c8 309 *listp = NULL;
1da177e4
LT
310
311 do {
312 struct sk_buff *this = list;
313 list = list->next;
314 kfree_skb(this);
315 } while (list);
316}
317
27b437c8
HX
318static inline void skb_drop_fraglist(struct sk_buff *skb)
319{
320 skb_drop_list(&skb_shinfo(skb)->frag_list);
321}
322
1da177e4
LT
323static void skb_clone_fraglist(struct sk_buff *skb)
324{
325 struct sk_buff *list;
326
fbb398a8 327 skb_walk_frags(skb, list)
1da177e4
LT
328 skb_get(list);
329}
330
5bba1712 331static void skb_release_data(struct sk_buff *skb)
1da177e4
LT
332{
333 if (!skb->cloned ||
334 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
335 &skb_shinfo(skb)->dataref)) {
336 if (skb_shinfo(skb)->nr_frags) {
337 int i;
338 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
ea2ab693 339 skb_frag_unref(skb, i);
1da177e4
LT
340 }
341
a6686f2f
SM
342 /*
343 * If skb buf is from userspace, we need to notify the caller
344 * the lower device DMA has done;
345 */
346 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
347 struct ubuf_info *uarg;
348
349 uarg = skb_shinfo(skb)->destructor_arg;
350 if (uarg->callback)
351 uarg->callback(uarg);
352 }
353
21dc3301 354 if (skb_has_frag_list(skb))
1da177e4
LT
355 skb_drop_fraglist(skb);
356
357 kfree(skb->head);
358 }
359}
360
361/*
362 * Free an skbuff by memory without cleaning the state.
363 */
2d4baff8 364static void kfree_skbmem(struct sk_buff *skb)
1da177e4 365{
d179cd12
DM
366 struct sk_buff *other;
367 atomic_t *fclone_ref;
368
d179cd12
DM
369 switch (skb->fclone) {
370 case SKB_FCLONE_UNAVAILABLE:
371 kmem_cache_free(skbuff_head_cache, skb);
372 break;
373
374 case SKB_FCLONE_ORIG:
375 fclone_ref = (atomic_t *) (skb + 2);
376 if (atomic_dec_and_test(fclone_ref))
377 kmem_cache_free(skbuff_fclone_cache, skb);
378 break;
379
380 case SKB_FCLONE_CLONE:
381 fclone_ref = (atomic_t *) (skb + 1);
382 other = skb - 1;
383
384 /* The clone portion is available for
385 * fast-cloning again.
386 */
387 skb->fclone = SKB_FCLONE_UNAVAILABLE;
388
389 if (atomic_dec_and_test(fclone_ref))
390 kmem_cache_free(skbuff_fclone_cache, other);
391 break;
3ff50b79 392 }
1da177e4
LT
393}
394
04a4bb55 395static void skb_release_head_state(struct sk_buff *skb)
1da177e4 396{
adf30907 397 skb_dst_drop(skb);
1da177e4
LT
398#ifdef CONFIG_XFRM
399 secpath_put(skb->sp);
400#endif
9c2b3328
SH
401 if (skb->destructor) {
402 WARN_ON(in_irq());
1da177e4
LT
403 skb->destructor(skb);
404 }
9fb9cbb1 405#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
5f79e0f9 406 nf_conntrack_put(skb->nfct);
2fc72c7b
KK
407#endif
408#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
9fb9cbb1
YK
409 nf_conntrack_put_reasm(skb->nfct_reasm);
410#endif
1da177e4
LT
411#ifdef CONFIG_BRIDGE_NETFILTER
412 nf_bridge_put(skb->nf_bridge);
413#endif
1da177e4
LT
414/* XXX: IS this still necessary? - JHS */
415#ifdef CONFIG_NET_SCHED
416 skb->tc_index = 0;
417#ifdef CONFIG_NET_CLS_ACT
418 skb->tc_verd = 0;
1da177e4
LT
419#endif
420#endif
04a4bb55
LB
421}
422
423/* Free everything but the sk_buff shell. */
424static void skb_release_all(struct sk_buff *skb)
425{
426 skb_release_head_state(skb);
2d4baff8
HX
427 skb_release_data(skb);
428}
429
430/**
431 * __kfree_skb - private function
432 * @skb: buffer
433 *
434 * Free an sk_buff. Release anything attached to the buffer.
435 * Clean the state. This is an internal helper function. Users should
436 * always call kfree_skb
437 */
1da177e4 438
2d4baff8
HX
439void __kfree_skb(struct sk_buff *skb)
440{
441 skb_release_all(skb);
1da177e4
LT
442 kfree_skbmem(skb);
443}
b4ac530f 444EXPORT_SYMBOL(__kfree_skb);
1da177e4 445