]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Peter Zijlstra <a.p.zijlstra@chello.nl> |
2 | Subject: netvm: skb processing | |
3 | Patch-mainline: No | |
4 | References: FATE#303834 | |
5 | ||
6 | In order to make sure emergency packets receive all memory needed to proceed | |
7 | ensure processing of emergency SKBs happens under PF_MEMALLOC. | |
8 | ||
9 | Use the (new) sk_backlog_rcv() wrapper to ensure this for backlog processing. | |
10 | ||
11 | Skip taps, since those are user-space again. | |
12 | ||
13 | Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> | |
14 | Acked-by: Neil Brown <neilb@suse.de> | |
15 | Acked-by: Suresh Jayaraman <sjayaraman@suse.de> | |
16 | ||
17 | --- | |
18 | include/net/sock.h | 5 ++++ | |
19 | net/core/dev.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++----- | |
20 | net/core/sock.c | 16 ++++++++++++++ | |
21 | 3 files changed, 73 insertions(+), 5 deletions(-) | |
22 | ||
23 | --- a/include/net/sock.h | |
24 | +++ b/include/net/sock.h | |
25 | @@ -528,8 +528,13 @@ static inline void sk_add_backlog(struct | |
26 | skb->next = NULL; | |
27 | } | |
28 | ||
29 | +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); | |
30 | + | |
31 | static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) | |
32 | { | |
33 | + if (skb_emergency(skb)) | |
34 | + return __sk_backlog_rcv(sk, skb); | |
35 | + | |
36 | return sk->sk_backlog_rcv(sk, skb); | |
37 | } | |
38 | ||
39 | --- a/net/core/dev.c | |
40 | +++ b/net/core/dev.c | |
41 | @@ -2170,6 +2170,30 @@ void netif_nit_deliver(struct sk_buff *s | |
42 | rcu_read_unlock(); | |
43 | } | |
44 | ||
45 | +/* | |
46 | + * Filter the protocols for which the reserves are adequate. | |
47 | + * | |
48 | + * Before adding a protocol make sure that it is either covered by the existing | |
49 | + * reserves, or add reserves covering the memory need of the new protocol's | |
50 | + * packet processing. | |
51 | + */ | |
52 | +static int skb_emergency_protocol(struct sk_buff *skb) | |
53 | +{ | |
54 | + if (skb_emergency(skb)) | |
55 | + switch (skb->protocol) { | |
56 | + case __constant_htons(ETH_P_ARP): | |
57 | + case __constant_htons(ETH_P_IP): | |
58 | + case __constant_htons(ETH_P_IPV6): | |
59 | + case __constant_htons(ETH_P_8021Q): | |
60 | + break; | |
61 | + | |
62 | + default: | |
63 | + return 0; | |
64 | + } | |
65 | + | |
66 | + return 1; | |
67 | +} | |
68 | + | |
69 | /** | |
70 | * netif_receive_skb - process receive buffer from network | |
71 | * @skb: buffer to process | |
72 | @@ -2192,13 +2216,26 @@ int netif_receive_skb(struct sk_buff *sk | |
73 | struct net_device *null_or_orig; | |
74 | int ret = NET_RX_DROP; | |
75 | __be16 type; | |
76 | + unsigned long pflags = current->flags; | |
77 | ||
78 | if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) | |
79 | return NET_RX_SUCCESS; | |
80 | ||
81 | + /* Emergency skb are special, they should | |
82 | + * - be delivered to SOCK_MEMALLOC sockets only | |
83 | + * - stay away from userspace | |
84 | + * - have bounded memory usage | |
85 | + * | |
86 | + * Use PF_MEMALLOC as a poor mans memory pool - the grouping kind. | |
87 | + * This saves us from propagating the allocation context down to all | |
88 | + * allocation sites. | |
89 | + */ | |
90 | + if (skb_emergency(skb)) | |
91 | + current->flags |= PF_MEMALLOC; | |
92 | + | |
93 | /* if we've gotten here through NAPI, check netpoll */ | |
94 | if (netpoll_receive_skb(skb)) | |
95 | - return NET_RX_DROP; | |
96 | + goto out; | |
97 | ||
98 | if (!skb->tstamp.tv64) | |
99 | net_timestamp(skb); | |
100 | @@ -2237,6 +2274,9 @@ int netif_receive_skb(struct sk_buff *sk | |
101 | } | |
102 | #endif | |
103 | ||
104 | + if (skb_emergency(skb)) | |
105 | + goto skip_taps; | |
106 | + | |
107 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | |
108 | if (ptype->dev == null_or_orig || ptype->dev == skb->dev || | |
109 | ptype->dev == orig_dev) { | |
110 | @@ -2246,19 +2286,23 @@ int netif_receive_skb(struct sk_buff *sk | |
111 | } | |
112 | } | |
113 | ||
114 | +skip_taps: | |
115 | #ifdef CONFIG_NET_CLS_ACT | |
116 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); | |
117 | if (!skb) | |
118 | - goto out; | |
119 | + goto unlock; | |
120 | ncls: | |
121 | #endif | |
122 | ||
123 | + if (!skb_emergency_protocol(skb)) | |
124 | + goto drop; | |
125 | + | |
126 | skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); | |
127 | if (!skb) | |
128 | - goto out; | |
129 | + goto unlock; | |
130 | skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); | |
131 | if (!skb) | |
132 | - goto out; | |
133 | + goto unlock; | |
134 | ||
135 | type = skb->protocol; | |
136 | list_for_each_entry_rcu(ptype, | |
137 | @@ -2275,6 +2319,7 @@ ncls: | |
138 | if (pt_prev) { | |
139 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | |
140 | } else { | |
141 | +drop: | |
142 | kfree_skb(skb); | |
143 | /* Jamal, now you will not able to escape explaining | |
144 | * me how you were going to use this. :-) | |
145 | @@ -2282,8 +2327,10 @@ ncls: | |
146 | ret = NET_RX_DROP; | |
147 | } | |
148 | ||
149 | -out: | |
150 | +unlock: | |
151 | rcu_read_unlock(); | |
152 | +out: | |
153 | + tsk_restore_flags(current, pflags, PF_MEMALLOC); | |
154 | return ret; | |
155 | } | |
156 | ||
157 | --- a/net/core/sock.c | |
158 | +++ b/net/core/sock.c | |
159 | @@ -309,6 +309,22 @@ int sk_clear_memalloc(struct sock *sk) | |
160 | return set; | |
161 | } | |
162 | EXPORT_SYMBOL_GPL(sk_clear_memalloc); | |
163 | + | |
164 | +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) | |
165 | +{ | |
166 | + int ret; | |
167 | + unsigned long pflags = current->flags; | |
168 | + | |
169 | + /* these should have been dropped before queueing */ | |
170 | + BUG_ON(!sk_has_memalloc(sk)); | |
171 | + | |
172 | + current->flags |= PF_MEMALLOC; | |
173 | + ret = sk->sk_backlog_rcv(sk, skb); | |
174 | + tsk_restore_flags(current, pflags, PF_MEMALLOC); | |
175 | + | |
176 | + return ret; | |
177 | +} | |
178 | +EXPORT_SYMBOL(__sk_backlog_rcv); | |
179 | #endif | |
180 | ||
181 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |