]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.suse/SoN-21-netvm-skbuff-reserve.patch
Updated xen patches taken from suse.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.suse / SoN-21-netvm-skbuff-reserve.patch
1 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2 Subject: netvm: hook skb allocation to reserves
3 Patch-mainline: No
4 References: FATE#303834
5
6 Change the skb allocation api to indicate RX usage and use this to fall back to
7 the reserve when needed. SKBs allocated from the reserve are tagged in
8 skb->emergency.
9
10 Teach all other skb ops about emergency skbs and the reserve accounting.
11
12 Use the (new) packet split API to allocate and track fragment pages from the
13 emergency reserve. Do this using an atomic counter in page->index. This is
14 needed because the fragments have a different sharing semantic than that
15 indicated by skb_shinfo()->dataref.
16
17 Note that the decision to distinguish between regular and emergency SKBs allows
18 the accounting overhead to be limited to the later kind.
19
20 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
21 Acked-by: Neil Brown <neilb@suse.de>
22 Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
23
24 ---
25 include/linux/mm_types.h | 1
26 include/linux/skbuff.h | 27 +++++++--
27 net/core/skbuff.c | 135 +++++++++++++++++++++++++++++++++++++----------
28 3 files changed, 132 insertions(+), 31 deletions(-)
29
30 --- a/include/linux/mm_types.h
31 +++ b/include/linux/mm_types.h
32 @@ -75,6 +75,7 @@ struct page {
33 pgoff_t index; /* Our offset within mapping. */
34 void *freelist; /* SLUB: freelist req. slab lock */
35 int reserve; /* page_alloc: page is a reserve page */
36 + atomic_t frag_count; /* skb fragment use count */
37 };
38 struct list_head lru; /* Pageout list, eg. active_list
39 * protected by zone->lru_lock !
40 --- a/include/linux/skbuff.h
41 +++ b/include/linux/skbuff.h
42 @@ -320,7 +320,10 @@ struct sk_buff {
43 #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
44 __u8 do_not_encrypt:1;
45 #endif
46 - /* 0/13/14 bit hole */
47 +#ifdef CONFIG_NETVM
48 + __u8 emergency:1;
49 +#endif
50 + /* 12-16 bit hole */
51
52 #ifdef CONFIG_NET_DMA
53 dma_cookie_t dma_cookie;
54 @@ -353,10 +356,22 @@ struct sk_buff {
55
56 #include <asm/system.h>
57
58 +#define SKB_ALLOC_FCLONE 0x01
59 +#define SKB_ALLOC_RX 0x02
60 +
61 +static inline bool skb_emergency(const struct sk_buff *skb)
62 +{
63 +#ifdef CONFIG_NETVM
64 + return unlikely(skb->emergency);
65 +#else
66 + return false;
67 +#endif
68 +}
69 +
70 extern void kfree_skb(struct sk_buff *skb);
71 extern void __kfree_skb(struct sk_buff *skb);
72 extern struct sk_buff *__alloc_skb(unsigned int size,
73 - gfp_t priority, int fclone, int node);
74 + gfp_t priority, int flags, int node);
75 static inline struct sk_buff *alloc_skb(unsigned int size,
76 gfp_t priority)
77 {
78 @@ -366,7 +381,7 @@ static inline struct sk_buff *alloc_skb(
79 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
80 gfp_t priority)
81 {
82 - return __alloc_skb(size, priority, 1, -1);
83 + return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, -1);
84 }
85
86 extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
87 @@ -1216,7 +1231,8 @@ static inline void __skb_queue_purge(str
88 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
89 gfp_t gfp_mask)
90 {
91 - struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
92 + struct sk_buff *skb =
93 + __alloc_skb(length + NET_SKB_PAD, gfp_mask, SKB_ALLOC_RX, -1);
94 if (likely(skb))
95 skb_reserve(skb, NET_SKB_PAD);
96 return skb;
97 @@ -1247,6 +1263,7 @@ static inline struct sk_buff *netdev_all
98 }
99
100 extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask);
101 +extern void __netdev_free_page(struct net_device *dev, struct page *page);
102
103 /**
104 * netdev_alloc_page - allocate a page for ps-rx on a specific device
105 @@ -1263,7 +1280,7 @@ static inline struct page *netdev_alloc_
106
107 static inline void netdev_free_page(struct net_device *dev, struct page *page)
108 {
109 - __free_page(page);
110 + __netdev_free_page(dev, page);
111 }
112
113 /**
114 --- a/net/core/skbuff.c
115 +++ b/net/core/skbuff.c
116 @@ -173,23 +173,29 @@ EXPORT_SYMBOL(skb_truesize_bug);
117 * %GFP_ATOMIC.
118 */
119 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
120 - int fclone, int node)
121 + int flags, int node)
122 {
123 struct kmem_cache *cache;
124 struct skb_shared_info *shinfo;
125 struct sk_buff *skb;
126 u8 *data;
127 + int emergency = 0;
128 + int memalloc = sk_memalloc_socks();
129
130 - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
131 + size = SKB_DATA_ALIGN(size);
132 + cache = (flags & SKB_ALLOC_FCLONE)
133 + ? skbuff_fclone_cache : skbuff_head_cache;
134 +
135 + if (memalloc && (flags & SKB_ALLOC_RX))
136 + gfp_mask |= __GFP_MEMALLOC;
137
138 /* Get the HEAD */
139 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
140 if (!skb)
141 goto out;
142
143 - size = SKB_DATA_ALIGN(size);
144 - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
145 - gfp_mask, node);
146 + data = kmalloc_reserve(size + sizeof(struct skb_shared_info),
147 + gfp_mask, node, &net_skb_reserve, &emergency);
148 if (!data)
149 goto nodata;
150
151 @@ -199,6 +205,9 @@ struct sk_buff *__alloc_skb(unsigned int
152 * the tail pointer in struct sk_buff!
153 */
154 memset(skb, 0, offsetof(struct sk_buff, tail));
155 +#ifdef CONFIG_NETVM
156 + skb->emergency = emergency;
157 +#endif
158 skb->truesize = size + sizeof(struct sk_buff);
159 atomic_set(&skb->users, 1);
160 skb->head = data;
161 @@ -215,7 +224,7 @@ struct sk_buff *__alloc_skb(unsigned int
162 shinfo->ip6_frag_id = 0;
163 shinfo->frag_list = NULL;
164
165 - if (fclone) {
166 + if (flags & SKB_ALLOC_FCLONE) {
167 struct sk_buff *child = skb + 1;
168 atomic_t *fclone_ref = (atomic_t *) (child + 1);
169
170 @@ -223,6 +232,9 @@ struct sk_buff *__alloc_skb(unsigned int
171 atomic_set(fclone_ref, 1);
172
173 child->fclone = SKB_FCLONE_UNAVAILABLE;
174 +#ifdef CONFIG_NETVM
175 + child->emergency = skb->emergency;
176 +#endif
177 }
178 out:
179 return skb;
180 @@ -251,7 +263,7 @@ struct sk_buff *__netdev_alloc_skb(struc
181 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
182 struct sk_buff *skb;
183
184 - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
185 + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, SKB_ALLOC_RX, node);
186 if (likely(skb)) {
187 skb_reserve(skb, NET_SKB_PAD);
188 skb->dev = dev;
189 @@ -264,11 +276,19 @@ struct page *__netdev_alloc_page(struct
190 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
191 struct page *page;
192
193 - page = alloc_pages_node(node, gfp_mask, 0);
194 + page = alloc_pages_reserve(node, gfp_mask | __GFP_MEMALLOC, 0,
195 + &net_skb_reserve, NULL);
196 +
197 return page;
198 }
199 EXPORT_SYMBOL(__netdev_alloc_page);
200
201 +void __netdev_free_page(struct net_device *dev, struct page *page)
202 +{
203 + free_pages_reserve(page, 0, &net_skb_reserve, page->reserve);
204 +}
205 +EXPORT_SYMBOL(__netdev_free_page);
206 +
207 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
208 int size)
209 {
210 @@ -276,6 +296,27 @@ void skb_add_rx_frag(struct sk_buff *skb
211 skb->len += size;
212 skb->data_len += size;
213 skb->truesize += size;
214 +
215 +#ifdef CONFIG_NETVM
216 + /*
217 + * In the rare case that skb_emergency() != page->reserved we'll
218 + * skew the accounting slightly, but since its only a 'small' constant
219 + * shift its ok.
220 + */
221 + if (skb_emergency(skb)) {
222 + /*
223 + * We need to track fragment pages so that we properly
224 + * release their reserve in skb_put_page().
225 + */
226 + atomic_set(&page->frag_count, 1);
227 + } else if (unlikely(page->reserve)) {
228 + /*
229 + * Release the reserve now, because normal skbs don't
230 + * do the emergency accounting.
231 + */
232 + mem_reserve_pages_charge(&net_skb_reserve, -1);
233 + }
234 +#endif
235 }
236 EXPORT_SYMBOL(skb_add_rx_frag);
237
238 @@ -327,21 +368,38 @@ static void skb_clone_fraglist(struct sk
239 skb_get(list);
240 }
241
242 +static void skb_get_page(struct sk_buff *skb, struct page *page)
243 +{
244 + get_page(page);
245 + if (skb_emergency(skb))
246 + atomic_inc(&page->frag_count);
247 +}
248 +
249 +static void skb_put_page(struct sk_buff *skb, struct page *page)
250 +{
251 + if (skb_emergency(skb) && atomic_dec_and_test(&page->frag_count))
252 + mem_reserve_pages_charge(&net_skb_reserve, -1);
253 + put_page(page);
254 +}
255 +
256 static void skb_release_data(struct sk_buff *skb)
257 {
258 if (!skb->cloned ||
259 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
260 &skb_shinfo(skb)->dataref)) {
261 +
262 if (skb_shinfo(skb)->nr_frags) {
263 int i;
264 - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
265 - put_page(skb_shinfo(skb)->frags[i].page);
266 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
267 + skb_put_page(skb,
268 + skb_shinfo(skb)->frags[i].page);
269 + }
270 }
271
272 if (skb_shinfo(skb)->frag_list)
273 skb_drop_fraglist(skb);
274
275 - kfree(skb->head);
276 + kfree_reserve(skb->head, &net_skb_reserve, skb_emergency(skb));
277 }
278 }
279
280 @@ -462,6 +520,9 @@ static void __copy_skb_header(struct sk_
281 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
282 new->ipvs_property = old->ipvs_property;
283 #endif
284 +#ifdef CONFIG_NETVM
285 + new->emergency = old->emergency;
286 +#endif
287 new->protocol = old->protocol;
288 new->mark = old->mark;
289 __nf_copy(new, old);
290 @@ -555,6 +616,9 @@ struct sk_buff *skb_clone(struct sk_buff
291 n->fclone = SKB_FCLONE_CLONE;
292 atomic_inc(fclone_ref);
293 } else {
294 + if (skb_emergency(skb))
295 + gfp_mask |= __GFP_MEMALLOC;
296 +
297 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
298 if (!n)
299 return NULL;
300 @@ -586,6 +650,14 @@ static void copy_skb_header(struct sk_bu
301 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
302 }
303
304 +static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
305 +{
306 + if (skb_emergency(skb))
307 + return SKB_ALLOC_RX;
308 +
309 + return 0;
310 +}
311 +
312 /**
313 * skb_copy - create private copy of an sk_buff
314 * @skb: buffer to copy
315 @@ -606,15 +678,17 @@ static void copy_skb_header(struct sk_bu
316 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
317 {
318 int headerlen = skb->data - skb->head;
319 + int size;
320 /*
321 * Allocate the copy buffer
322 */
323 struct sk_buff *n;
324 #ifdef NET_SKBUFF_DATA_USES_OFFSET
325 - n = alloc_skb(skb->end + skb->data_len, gfp_mask);
326 + size = skb->end + skb->data_len;
327 #else
328 - n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
329 + size = skb->end - skb->head + skb->data_len;
330 #endif
331 + n = __alloc_skb(size, gfp_mask, skb_alloc_rx_flag(skb), -1);
332 if (!n)
333 return NULL;
334
335 @@ -649,12 +723,14 @@ struct sk_buff *pskb_copy(struct sk_buff
336 /*
337 * Allocate the copy buffer
338 */
339 + int size;
340 struct sk_buff *n;
341 #ifdef NET_SKBUFF_DATA_USES_OFFSET
342 - n = alloc_skb(skb->end, gfp_mask);
343 + size = skb->end;
344 #else
345 - n = alloc_skb(skb->end - skb->head, gfp_mask);
346 + size = skb->end - skb->head;
347 #endif
348 + n = __alloc_skb(size, gfp_mask, skb_alloc_rx_flag(skb), -1);
349 if (!n)
350 goto out;
351
352 @@ -673,8 +749,9 @@ struct sk_buff *pskb_copy(struct sk_buff
353 int i;
354
355 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
356 - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
357 - get_page(skb_shinfo(n)->frags[i].page);
358 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
359 + skb_shinfo(n)->frags[i] = *frag;
360 + skb_get_page(n, frag->page);
361 }
362 skb_shinfo(n)->nr_frags = i;
363 }
364 @@ -722,7 +799,11 @@ int pskb_expand_head(struct sk_buff *skb
365
366 size = SKB_DATA_ALIGN(size);
367
368 - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
369 + if (skb_emergency(skb))
370 + gfp_mask |= __GFP_MEMALLOC;
371 +
372 + data = kmalloc_reserve(size + sizeof(struct skb_shared_info),
373 + gfp_mask, -1, &net_skb_reserve, NULL);
374 if (!data)
375 goto nodata;
376
377 @@ -737,7 +818,7 @@ int pskb_expand_head(struct sk_buff *skb
378 sizeof(struct skb_shared_info));
379
380 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
381 - get_page(skb_shinfo(skb)->frags[i].page);
382 + skb_get_page(skb, skb_shinfo(skb)->frags[i].page);
383
384 if (skb_shinfo(skb)->frag_list)
385 skb_clone_fraglist(skb);
386 @@ -816,8 +897,8 @@ struct sk_buff *skb_copy_expand(const st
387 /*
388 * Allocate the copy buffer
389 */
390 - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
391 - gfp_mask);
392 + struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
393 + gfp_mask, skb_alloc_rx_flag(skb), -1);
394 int oldheadroom = skb_headroom(skb);
395 int head_copy_len, head_copy_off;
396 int off;
397 @@ -1006,7 +1087,7 @@ drop_pages:
398 skb_shinfo(skb)->nr_frags = i;
399
400 for (; i < nfrags; i++)
401 - put_page(skb_shinfo(skb)->frags[i].page);
402 + skb_put_page(skb, skb_shinfo(skb)->frags[i].page);
403
404 if (skb_shinfo(skb)->frag_list)
405 skb_drop_fraglist(skb);
406 @@ -1175,7 +1256,7 @@ pull_pages:
407 k = 0;
408 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
409 if (skb_shinfo(skb)->frags[i].size <= eat) {
410 - put_page(skb_shinfo(skb)->frags[i].page);
411 + skb_put_page(skb, skb_shinfo(skb)->frags[i].page);
412 eat -= skb_shinfo(skb)->frags[i].size;
413 } else {
414 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
415 @@ -1925,6 +2006,7 @@ static inline void skb_split_no_header(s
416 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
417
418 if (pos < len) {
419 + struct page *page = skb_shinfo(skb)->frags[i].page;
420 /* Split frag.
421 * We have two variants in this case:
422 * 1. Move all the frag to the second
423 @@ -1933,7 +2015,7 @@ static inline void skb_split_no_header(s
424 * where splitting is expensive.
425 * 2. Split is accurately. We make this.
426 */
427 - get_page(skb_shinfo(skb)->frags[i].page);
428 + skb_get_page(skb1, page);
429 skb_shinfo(skb1)->frags[0].page_offset += len - pos;
430 skb_shinfo(skb1)->frags[0].size -= len - pos;
431 skb_shinfo(skb)->frags[i].size = len - pos;
432 @@ -2264,7 +2346,8 @@ struct sk_buff *skb_segment(struct sk_bu
433 if (hsize > len || !sg)
434 hsize = len;
435
436 - nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
437 + nskb = __alloc_skb(hsize + doffset + headroom, GFP_ATOMIC,
438 + skb_alloc_rx_flag(skb), -1);
439 if (unlikely(!nskb))
440 goto err;
441
442 @@ -2302,7 +2385,7 @@ struct sk_buff *skb_segment(struct sk_bu
443 BUG_ON(i >= nfrags);
444
445 *frag = skb_shinfo(skb)->frags[i];
446 - get_page(frag->page);
447 + skb_get_page(nskb, frag->page);
448 size = frag->size;
449
450 if (pos < offset) {