]>
Commit | Line | Data |
---|---|---|
58c5fc13 MT |
1 | diff -urp v2.6.31/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h |
2 | --- v2.6.31/linux/include/linux/rtnetlink.h 2009-06-13 10:53:56.000000000 +0300 | |
3 | +++ linux/include/linux/rtnetlink.h 2009-09-11 22:11:20.000000000 +0300 | |
4 | @@ -311,6 +311,8 @@ struct rtnexthop | |
5 | #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ | |
6 | #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ | |
7 | #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ | |
8 | +#define RTNH_F_SUSPECT 8 /* We don't know the real state */ | |
9 | +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT) | |
10 | ||
11 | /* Macros to handle hexthops */ | |
12 | ||
13 | diff -urp v2.6.31/linux/include/net/flow.h linux/include/net/flow.h | |
14 | --- v2.6.31/linux/include/net/flow.h 2009-03-25 09:48:32.000000000 +0200 | |
15 | +++ linux/include/net/flow.h 2009-09-11 22:12:39.000000000 +0300 | |
16 | @@ -19,6 +19,8 @@ struct flowi { | |
17 | struct { | |
18 | __be32 daddr; | |
19 | __be32 saddr; | |
20 | + __be32 lsrc; | |
21 | + __be32 gw; | |
22 | __u8 tos; | |
23 | __u8 scope; | |
24 | } ip4_u; | |
25 | @@ -43,6 +45,8 @@ struct flowi { | |
26 | #define fl6_flowlabel nl_u.ip6_u.flowlabel | |
27 | #define fl4_dst nl_u.ip4_u.daddr | |
28 | #define fl4_src nl_u.ip4_u.saddr | |
29 | +#define fl4_lsrc nl_u.ip4_u.lsrc | |
30 | +#define fl4_gw nl_u.ip4_u.gw | |
31 | #define fl4_tos nl_u.ip4_u.tos | |
32 | #define fl4_scope nl_u.ip4_u.scope | |
33 | ||
34 | diff -urp v2.6.31/linux/include/net/ip_fib.h linux/include/net/ip_fib.h | |
35 | --- v2.6.31/linux/include/net/ip_fib.h 2009-09-11 10:27:15.000000000 +0300 | |
36 | +++ linux/include/net/ip_fib.h 2009-09-11 22:11:20.000000000 +0300 | |
37 | @@ -204,6 +204,8 @@ extern int fib_lookup(struct net *n, str | |
38 | extern struct fib_table *fib_new_table(struct net *net, u32 id); | |
39 | extern struct fib_table *fib_get_table(struct net *net, u32 id); | |
40 | ||
41 | +extern int fib_result_table(struct fib_result *res); | |
42 | + | |
43 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | |
44 | ||
45 | /* Exported by fib_frontend.c */ | |
46 | @@ -273,4 +275,6 @@ static inline void fib_proc_exit(struct | |
47 | } | |
48 | #endif | |
49 | ||
50 | +extern rwlock_t fib_nhflags_lock; | |
51 | + | |
52 | #endif /* _NET_FIB_H */ | |
53 | diff -urp v2.6.31/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h | |
54 | --- v2.6.31/linux/include/net/netfilter/nf_nat.h 2009-06-13 10:53:57.000000000 +0300 | |
55 | +++ linux/include/net/netfilter/nf_nat.h 2009-09-11 22:12:39.000000000 +0300 | |
56 | @@ -78,6 +78,13 @@ struct nf_conn_nat | |
57 | #endif | |
58 | }; | |
59 | ||
60 | +/* Call input routing for SNAT-ed traffic */ | |
61 | +extern unsigned int ip_nat_route_input(unsigned int hooknum, | |
62 | + struct sk_buff *skb, | |
63 | + const struct net_device *in, | |
64 | + const struct net_device *out, | |
65 | + int (*okfn)(struct sk_buff *)); | |
66 | + | |
67 | /* Set up the info structure to map into this range. */ | |
68 | extern unsigned int nf_nat_setup_info(struct nf_conn *ct, | |
69 | const struct nf_nat_range *range, | |
70 | diff -urp v2.6.31/linux/include/net/route.h linux/include/net/route.h | |
71 | --- v2.6.31/linux/include/net/route.h 2009-09-11 10:27:15.000000000 +0300 | |
72 | +++ linux/include/net/route.h 2009-09-11 22:12:39.000000000 +0300 | |
73 | @@ -116,6 +116,7 @@ extern int __ip_route_output_key(struct | |
74 | extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); | |
75 | extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); | |
76 | extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); | |
77 | +extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc); | |
78 | extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev); | |
79 | extern void ip_rt_send_redirect(struct sk_buff *skb); | |
80 | ||
81 | diff -urp v2.6.31/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c | |
82 | --- v2.6.31/linux/net/bridge/br_netfilter.c 2009-09-11 10:27:16.000000000 +0300 | |
83 | +++ linux/net/bridge/br_netfilter.c 2009-09-11 22:13:17.000000000 +0300 | |
84 | @@ -343,6 +343,9 @@ static int br_nf_pre_routing_finish(stru | |
85 | struct rtable *rt; | |
86 | int err; | |
87 | ||
88 | + /* Old skb->dst is not expected, it is lost in all cases */ | |
89 | + skb_dst_drop(skb); | |
90 | + | |
91 | if (nf_bridge->mask & BRNF_PKT_TYPE) { | |
92 | skb->pkt_type = PACKET_OTHERHOST; | |
93 | nf_bridge->mask ^= BRNF_PKT_TYPE; | |
94 | diff -urp v2.6.31/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c | |
95 | --- v2.6.31/linux/net/ipv4/fib_frontend.c 2009-09-11 10:27:17.000000000 +0300 | |
96 | +++ linux/net/ipv4/fib_frontend.c 2009-09-11 22:11:20.000000000 +0300 | |
97 | @@ -46,6 +46,8 @@ | |
98 | ||
99 | #ifndef CONFIG_IP_MULTIPLE_TABLES | |
100 | ||
101 | +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN) | |
102 | + | |
103 | static int __net_init fib4_rules_init(struct net *net) | |
104 | { | |
105 | struct fib_table *local_table, *main_table; | |
106 | @@ -70,6 +72,8 @@ fail: | |
107 | } | |
108 | #else | |
109 | ||
110 | +#define FIB_RES_TABLE(r) (fib_result_table(r)) | |
111 | + | |
112 | struct fib_table *fib_new_table(struct net *net, u32 id) | |
113 | { | |
114 | struct fib_table *tb; | |
115 | @@ -124,7 +128,8 @@ void fib_select_default(struct net *net, | |
116 | table = res->r->table; | |
117 | #endif | |
118 | tb = fib_get_table(net, table); | |
119 | - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | |
120 | + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) || | |
121 | + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST) | |
122 | tb->tb_select_default(tb, flp, res); | |
123 | } | |
124 | ||
125 | @@ -238,6 +243,9 @@ int fib_validate_source(__be32 src, __be | |
126 | .tos = tos } }, | |
127 | .iif = oif }; | |
128 | struct fib_result res; | |
129 | + int table; | |
130 | + unsigned char prefixlen; | |
131 | + unsigned char scope; | |
132 | int no_addr, rpf; | |
133 | int ret; | |
134 | struct net *net; | |
135 | @@ -261,31 +269,35 @@ int fib_validate_source(__be32 src, __be | |
136 | goto e_inval_res; | |
137 | *spec_dst = FIB_RES_PREFSRC(res); | |
138 | fib_combine_itag(itag, &res); | |
139 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
140 | - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) | |
141 | -#else | |
142 | if (FIB_RES_DEV(res) == dev) | |
143 | -#endif | |
144 | { | |
145 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | |
146 | fib_res_put(&res); | |
147 | return ret; | |
148 | } | |
149 | + table = FIB_RES_TABLE(&res); | |
150 | + prefixlen = res.prefixlen; | |
151 | + scope = res.scope; | |
152 | fib_res_put(&res); | |
153 | if (no_addr) | |
154 | goto last_resort; | |
155 | - if (rpf == 1) | |
156 | - goto e_inval; | |
157 | fl.oif = dev->ifindex; | |
158 | ||
159 | ret = 0; | |
160 | if (fib_lookup(net, &fl, &res) == 0) { | |
161 | - if (res.type == RTN_UNICAST) { | |
162 | + if (res.type == RTN_UNICAST && | |
163 | + ((table == FIB_RES_TABLE(&res) && | |
164 | + res.prefixlen >= prefixlen && res.scope >= scope) || | |
165 | + !rpf)) { | |
166 | *spec_dst = FIB_RES_PREFSRC(res); | |
167 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | |
168 | + fib_res_put(&res); | |
169 | + return ret; | |
170 | } | |
171 | fib_res_put(&res); | |
172 | } | |
173 | + if (rpf == 1) | |
174 | + goto e_inval; | |
175 | return ret; | |
176 | ||
177 | last_resort: | |
178 | @@ -908,9 +920,7 @@ static int fib_inetaddr_event(struct not | |
179 | switch (event) { | |
180 | case NETDEV_UP: | |
181 | fib_add_ifaddr(ifa); | |
182 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
183 | fib_sync_up(dev); | |
184 | -#endif | |
185 | rt_cache_flush(dev_net(dev), -1); | |
186 | break; | |
187 | case NETDEV_DOWN: | |
188 | @@ -946,9 +956,7 @@ static int fib_netdev_event(struct notif | |
189 | for_ifa(in_dev) { | |
190 | fib_add_ifaddr(ifa); | |
191 | } endfor_ifa(in_dev); | |
192 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
193 | fib_sync_up(dev); | |
194 | -#endif | |
195 | rt_cache_flush(dev_net(dev), -1); | |
196 | break; | |
197 | case NETDEV_DOWN: | |
198 | diff -urp v2.6.31/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c | |
199 | --- v2.6.31/linux/net/ipv4/fib_hash.c 2009-09-11 10:27:17.000000000 +0300 | |
200 | +++ linux/net/ipv4/fib_hash.c 2009-09-11 22:11:20.000000000 +0300 | |
201 | @@ -277,25 +277,35 @@ out: | |
202 | static void | |
203 | fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) | |
204 | { | |
205 | - int order, last_idx; | |
206 | + int order, last_idx, last_dflt, last_nhsel; | |
207 | + struct fib_alias *first_fa = NULL; | |
208 | + struct hlist_head *head; | |
209 | struct hlist_node *node; | |
210 | struct fib_node *f; | |
211 | struct fib_info *fi = NULL; | |
212 | struct fib_info *last_resort; | |
213 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | |
214 | - struct fn_zone *fz = t->fn_zones[0]; | |
215 | + struct fn_zone *fz = t->fn_zones[res->prefixlen]; | |
216 | + __be32 k; | |
217 | ||
218 | if (fz == NULL) | |
219 | return; | |
220 | ||
221 | + k = fz_key(flp->fl4_dst, fz); | |
222 | + last_dflt = -2; | |
223 | + last_nhsel = 0; | |
224 | last_idx = -1; | |
225 | last_resort = NULL; | |
226 | order = -1; | |
227 | ||
228 | read_lock(&fib_hash_lock); | |
229 | - hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) { | |
230 | + head = &fz->fz_hash[fn_hash(k, fz)]; | |
231 | + hlist_for_each_entry(f, node, head, fn_hash) { | |
232 | struct fib_alias *fa; | |
233 | ||
234 | + if (f->fn_key != k) | |
235 | + continue; | |
236 | + | |
237 | list_for_each_entry(fa, &f->fn_alias, fa_list) { | |
238 | struct fib_info *next_fi = fa->fa_info; | |
239 | ||
240 | @@ -303,42 +313,56 @@ fn_hash_select_default(struct fib_table | |
241 | fa->fa_type != RTN_UNICAST) | |
242 | continue; | |
243 | ||
244 | + if (fa->fa_tos && | |
245 | + fa->fa_tos != flp->fl4_tos) | |
246 | + continue; | |
247 | if (next_fi->fib_priority > res->fi->fib_priority) | |
248 | break; | |
249 | - if (!next_fi->fib_nh[0].nh_gw || | |
250 | - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | |
251 | - continue; | |
252 | fa->fa_state |= FA_S_ACCESSED; | |
253 | ||
254 | - if (fi == NULL) { | |
255 | - if (next_fi != res->fi) | |
256 | - break; | |
257 | - } else if (!fib_detect_death(fi, order, &last_resort, | |
258 | - &last_idx, tb->tb_default)) { | |
259 | + if (!first_fa) { | |
260 | + last_dflt = fa->fa_last_dflt; | |
261 | + first_fa = fa; | |
262 | + } | |
263 | + if (fi && !fib_detect_death(fi, order, &last_resort, | |
264 | + &last_idx, &last_dflt, &last_nhsel, flp)) { | |
265 | fib_result_assign(res, fi); | |
266 | - tb->tb_default = order; | |
267 | + first_fa->fa_last_dflt = order; | |
268 | goto out; | |
269 | } | |
270 | fi = next_fi; | |
271 | order++; | |
272 | } | |
273 | + break; | |
274 | } | |
275 | ||
276 | if (order <= 0 || fi == NULL) { | |
277 | - tb->tb_default = -1; | |
278 | + if (fi && fi->fib_nhs > 1 && | |
279 | + fib_detect_death(fi, order, &last_resort, &last_idx, | |
280 | + &last_dflt, &last_nhsel, flp) && | |
281 | + last_resort == fi) { | |
282 | + read_lock_bh(&fib_nhflags_lock); | |
283 | + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
284 | + read_unlock_bh(&fib_nhflags_lock); | |
285 | + } | |
286 | + if (first_fa) first_fa->fa_last_dflt = -1; | |
287 | goto out; | |
288 | } | |
289 | ||
290 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | |
291 | - tb->tb_default)) { | |
292 | + &last_dflt, &last_nhsel, flp)) { | |
293 | fib_result_assign(res, fi); | |
294 | - tb->tb_default = order; | |
295 | + first_fa->fa_last_dflt = order; | |
296 | goto out; | |
297 | } | |
298 | ||
299 | - if (last_idx >= 0) | |
300 | + if (last_idx >= 0) { | |
301 | fib_result_assign(res, last_resort); | |
302 | - tb->tb_default = last_idx; | |
303 | + read_lock_bh(&fib_nhflags_lock); | |
304 | + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
305 | + read_unlock_bh(&fib_nhflags_lock); | |
306 | + first_fa->fa_last_dflt = last_idx; | |
307 | + } | |
308 | out: | |
309 | read_unlock(&fib_hash_lock); | |
310 | } | |
311 | @@ -462,6 +486,7 @@ static int fn_hash_insert(struct fib_tab | |
312 | write_lock_bh(&fib_hash_lock); | |
313 | fi_drop = fa->fa_info; | |
314 | fa->fa_info = fi; | |
315 | + fa->fa_last_dflt = -1; | |
316 | fa->fa_type = cfg->fc_type; | |
317 | fa->fa_scope = cfg->fc_scope; | |
318 | state = fa->fa_state; | |
319 | @@ -516,6 +541,7 @@ static int fn_hash_insert(struct fib_tab | |
320 | new_fa->fa_type = cfg->fc_type; | |
321 | new_fa->fa_scope = cfg->fc_scope; | |
322 | new_fa->fa_state = 0; | |
323 | + new_fa->fa_last_dflt = -1; | |
324 | ||
325 | /* | |
326 | * Insert new entry to the list. | |
327 | diff -urp v2.6.31/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h | |
328 | --- v2.6.31/linux/net/ipv4/fib_lookup.h 2009-09-11 10:27:17.000000000 +0300 | |
329 | +++ linux/net/ipv4/fib_lookup.h 2009-09-11 22:11:20.000000000 +0300 | |
330 | @@ -8,6 +8,7 @@ | |
331 | struct fib_alias { | |
332 | struct list_head fa_list; | |
333 | struct fib_info *fa_info; | |
334 | + int fa_last_dflt; | |
335 | u8 fa_tos; | |
336 | u8 fa_type; | |
337 | u8 fa_scope; | |
338 | @@ -37,7 +38,8 @@ extern struct fib_alias *fib_find_alias( | |
339 | u8 tos, u32 prio); | |
340 | extern int fib_detect_death(struct fib_info *fi, int order, | |
341 | struct fib_info **last_resort, | |
342 | - int *last_idx, int dflt); | |
343 | + int *last_idx, int *dflt, int *last_nhsel, | |
344 | + const struct flowi *flp); | |
345 | ||
346 | static inline void fib_result_assign(struct fib_result *res, | |
347 | struct fib_info *fi) | |
348 | diff -urp v2.6.31/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c | |
349 | --- v2.6.31/linux/net/ipv4/fib_rules.c 2009-09-11 10:27:17.000000000 +0300 | |
350 | +++ linux/net/ipv4/fib_rules.c 2009-09-11 22:11:20.000000000 +0300 | |
351 | @@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result * | |
352 | } | |
353 | #endif | |
354 | ||
355 | +int fib_result_table(struct fib_result *res) | |
356 | +{ | |
357 | + return res->r->table; | |
358 | +} | |
359 | + | |
360 | int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | |
361 | { | |
362 | struct fib_lookup_arg arg = { | |
363 | diff -urp v2.6.31/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c | |
364 | --- v2.6.31/linux/net/ipv4/fib_semantics.c 2009-09-11 10:27:17.000000000 +0300 | |
365 | +++ linux/net/ipv4/fib_semantics.c 2009-09-11 22:12:39.000000000 +0300 | |
366 | @@ -50,6 +50,7 @@ static struct hlist_head *fib_info_hash; | |
367 | static struct hlist_head *fib_info_laddrhash; | |
368 | static unsigned int fib_hash_size; | |
369 | static unsigned int fib_info_cnt; | |
370 | +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED; | |
371 | ||
372 | #define DEVINDEX_HASHBITS 8 | |
373 | #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) | |
374 | @@ -186,7 +187,7 @@ static __inline__ int nh_comp(const stru | |
375 | #ifdef CONFIG_NET_CLS_ROUTE | |
376 | nh->nh_tclassid != onh->nh_tclassid || | |
377 | #endif | |
378 | - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) | |
379 | + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE)) | |
380 | return -1; | |
381 | onh++; | |
382 | } endfor_nexthops(fi); | |
383 | @@ -237,7 +238,7 @@ static struct fib_info *fib_find_info(co | |
384 | nfi->fib_priority == fi->fib_priority && | |
385 | memcmp(nfi->fib_metrics, fi->fib_metrics, | |
386 | sizeof(fi->fib_metrics)) == 0 && | |
387 | - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && | |
388 | + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 && | |
389 | (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) | |
390 | return fi; | |
391 | } | |
392 | @@ -349,26 +350,70 @@ struct fib_alias *fib_find_alias(struct | |
393 | } | |
394 | ||
395 | int fib_detect_death(struct fib_info *fi, int order, | |
396 | - struct fib_info **last_resort, int *last_idx, int dflt) | |
397 | + struct fib_info **last_resort, int *last_idx, int *dflt, | |
398 | + int *last_nhsel, const struct flowi *flp) | |
399 | { | |
400 | struct neighbour *n; | |
401 | - int state = NUD_NONE; | |
402 | + int nhsel; | |
403 | + int state; | |
404 | + struct fib_nh * nh; | |
405 | + __be32 dst; | |
406 | + int flag, dead = 1; | |
407 | + | |
408 | + /* change_nexthops(fi) { */ | |
409 | + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) { | |
410 | + if (flp->oif && flp->oif != nh->nh_oif) | |
411 | + continue; | |
412 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw && | |
413 | + nh->nh_scope == RT_SCOPE_LINK) | |
414 | + continue; | |
415 | + if (nh->nh_flags & RTNH_F_DEAD) | |
416 | + continue; | |
417 | ||
418 | - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); | |
419 | - if (n) { | |
420 | - state = n->nud_state; | |
421 | - neigh_release(n); | |
422 | - } | |
423 | - if (state == NUD_REACHABLE) | |
424 | - return 0; | |
425 | - if ((state&NUD_VALID) && order != dflt) | |
426 | - return 0; | |
427 | - if ((state&NUD_VALID) || | |
428 | - (*last_idx<0 && order > dflt)) { | |
429 | - *last_resort = fi; | |
430 | - *last_idx = order; | |
431 | + flag = 0; | |
432 | + if (nh->nh_dev->flags & IFF_NOARP) { | |
433 | + dead = 0; | |
434 | + goto setfl; | |
435 | + } | |
436 | + | |
437 | + dst = nh->nh_gw; | |
438 | + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK) | |
439 | + dst = flp->fl4_dst; | |
440 | + | |
441 | + state = NUD_NONE; | |
442 | + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev); | |
443 | + if (n) { | |
444 | + state = n->nud_state; | |
445 | + neigh_release(n); | |
446 | + } | |
447 | + if (state==NUD_REACHABLE || | |
448 | + ((state&NUD_VALID) && order != *dflt)) { | |
449 | + dead = 0; | |
450 | + goto setfl; | |
451 | + } | |
452 | + if (!(state&NUD_VALID)) | |
453 | + flag = 1; | |
454 | + if (!dead) | |
455 | + goto setfl; | |
456 | + if ((state&NUD_VALID) || | |
457 | + (*last_idx<0 && order >= *dflt)) { | |
458 | + *last_resort = fi; | |
459 | + *last_idx = order; | |
460 | + *last_nhsel = nhsel; | |
461 | + } | |
462 | + | |
463 | + setfl: | |
464 | + | |
465 | + read_lock_bh(&fib_nhflags_lock); | |
466 | + if (flag) | |
467 | + nh->nh_flags |= RTNH_F_SUSPECT; | |
468 | + else | |
469 | + nh->nh_flags &= ~RTNH_F_SUSPECT; | |
470 | + read_unlock_bh(&fib_nhflags_lock); | |
471 | } | |
472 | - return 1; | |
473 | + /* } endfor_nexthops(fi) */ | |
474 | + | |
475 | + return dead; | |
476 | } | |
477 | ||
478 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
479 | @@ -540,8 +585,11 @@ static int fib_check_nh(struct fib_confi | |
480 | return -EINVAL; | |
481 | if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) | |
482 | return -ENODEV; | |
483 | - if (!(dev->flags&IFF_UP)) | |
484 | - return -ENETDOWN; | |
485 | + if (!(dev->flags&IFF_UP)) { | |
486 | + if (fi->fib_protocol != RTPROT_STATIC) | |
487 | + return -ENETDOWN; | |
488 | + nh->nh_flags |= RTNH_F_DEAD; | |
489 | + } | |
490 | nh->nh_dev = dev; | |
491 | dev_hold(dev); | |
492 | nh->nh_scope = RT_SCOPE_LINK; | |
493 | @@ -561,24 +609,48 @@ static int fib_check_nh(struct fib_confi | |
494 | /* It is not necessary, but requires a bit of thinking */ | |
495 | if (fl.fl4_scope < RT_SCOPE_LINK) | |
496 | fl.fl4_scope = RT_SCOPE_LINK; | |
497 | - if ((err = fib_lookup(net, &fl, &res)) != 0) | |
498 | - return err; | |
499 | + err = fib_lookup(net, &fl, &res); | |
500 | } | |
501 | - err = -EINVAL; | |
502 | - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | |
503 | - goto out; | |
504 | - nh->nh_scope = res.scope; | |
505 | - nh->nh_oif = FIB_RES_OIF(res); | |
506 | - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) | |
507 | - goto out; | |
508 | - dev_hold(nh->nh_dev); | |
509 | - err = -ENETDOWN; | |
510 | - if (!(nh->nh_dev->flags & IFF_UP)) | |
511 | - goto out; | |
512 | - err = 0; | |
513 | + if (err) { | |
514 | + struct in_device *in_dev; | |
515 | + | |
516 | + if (err != -ENETUNREACH || | |
517 | + fi->fib_protocol != RTPROT_STATIC) | |
518 | + return err; | |
519 | + | |
520 | + in_dev = inetdev_by_index(net, nh->nh_oif); | |
521 | + if (in_dev == NULL || | |
522 | + in_dev->dev->flags & IFF_UP) { | |
523 | + if (in_dev) | |
524 | + in_dev_put(in_dev); | |
525 | + return err; | |
526 | + } | |
527 | + nh->nh_flags |= RTNH_F_DEAD; | |
528 | + nh->nh_scope = RT_SCOPE_LINK; | |
529 | + nh->nh_dev = in_dev->dev; | |
530 | + dev_hold(nh->nh_dev); | |
531 | + in_dev_put(in_dev); | |
532 | + } else { | |
533 | + err = -EINVAL; | |
534 | + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | |
535 | + goto out; | |
536 | + nh->nh_scope = res.scope; | |
537 | + nh->nh_oif = FIB_RES_OIF(res); | |
538 | + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) | |
539 | + goto out; | |
540 | + dev_hold(nh->nh_dev); | |
541 | + if (!(nh->nh_dev->flags & IFF_UP)) { | |
542 | + if (fi->fib_protocol != RTPROT_STATIC) { | |
543 | + err = -ENETDOWN; | |
544 | + goto out; | |
545 | + } | |
546 | + nh->nh_flags |= RTNH_F_DEAD; | |
547 | + } | |
548 | + err = 0; | |
549 | out: | |
550 | - fib_res_put(&res); | |
551 | - return err; | |
552 | + fib_res_put(&res); | |
553 | + return err; | |
554 | + } | |
555 | } else { | |
556 | struct in_device *in_dev; | |
557 | ||
558 | @@ -589,8 +661,11 @@ out: | |
559 | if (in_dev == NULL) | |
560 | return -ENODEV; | |
561 | if (!(in_dev->dev->flags&IFF_UP)) { | |
562 | - in_dev_put(in_dev); | |
563 | - return -ENETDOWN; | |
564 | + if (fi->fib_protocol != RTPROT_STATIC) { | |
565 | + in_dev_put(in_dev); | |
566 | + return -ENETDOWN; | |
567 | + } | |
568 | + nh->nh_flags |= RTNH_F_DEAD; | |
569 | } | |
570 | nh->nh_dev = in_dev->dev; | |
571 | dev_hold(nh->nh_dev); | |
572 | @@ -899,8 +974,12 @@ int fib_semantic_match(struct list_head | |
573 | for_nexthops(fi) { | |
574 | if (nh->nh_flags&RTNH_F_DEAD) | |
575 | continue; | |
576 | - if (!flp->oif || flp->oif == nh->nh_oif) | |
577 | - break; | |
578 | + if (flp->oif && flp->oif != nh->nh_oif) | |
579 | + continue; | |
580 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
581 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
582 | + continue; | |
583 | + break; | |
584 | } | |
585 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
586 | if (nhsel < fi->fib_nhs) { | |
587 | @@ -1080,18 +1159,29 @@ int fib_sync_down_dev(struct net_device | |
588 | prev_fi = fi; | |
589 | dead = 0; | |
590 | change_nexthops(fi) { | |
591 | - if (nh->nh_flags&RTNH_F_DEAD) | |
592 | - dead++; | |
593 | - else if (nh->nh_dev == dev && | |
594 | - nh->nh_scope != scope) { | |
595 | - nh->nh_flags |= RTNH_F_DEAD; | |
596 | + if (nh->nh_flags&RTNH_F_DEAD) { | |
597 | + if (fi->fib_protocol!=RTPROT_STATIC || | |
598 | + nh->nh_dev == NULL || | |
599 | + __in_dev_get_rtnl(nh->nh_dev) == NULL || | |
600 | + nh->nh_dev->flags&IFF_UP) | |
601 | + dead++; | |
602 | + } else if (nh->nh_dev == dev && | |
603 | + nh->nh_scope != scope) { | |
604 | + write_lock_bh(&fib_nhflags_lock); | |
605 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
606 | - spin_lock_bh(&fib_multipath_lock); | |
607 | + spin_lock(&fib_multipath_lock); | |
608 | + nh->nh_flags |= RTNH_F_DEAD; | |
609 | fi->fib_power -= nh->nh_power; | |
610 | nh->nh_power = 0; | |
611 | - spin_unlock_bh(&fib_multipath_lock); | |
612 | + spin_unlock(&fib_multipath_lock); | |
613 | +#else | |
614 | + nh->nh_flags |= RTNH_F_DEAD; | |
615 | #endif | |
616 | - dead++; | |
617 | + write_unlock_bh(&fib_nhflags_lock); | |
618 | + if (fi->fib_protocol!=RTPROT_STATIC || | |
619 | + force || | |
620 | + __in_dev_get_rtnl(dev) == NULL) | |
621 | + dead++; | |
622 | } | |
623 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
624 | if (force > 1 && nh->nh_dev == dev) { | |
625 | @@ -1109,11 +1199,8 @@ int fib_sync_down_dev(struct net_device | |
626 | return ret; | |
627 | } | |
628 | ||
629 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
630 | - | |
631 | /* | |
632 | - Dead device goes up. We wake up dead nexthops. | |
633 | - It takes sense only on multipath routes. | |
634 | + Dead device goes up or new address is added. We wake up dead nexthops. | |
635 | */ | |
636 | ||
637 | int fib_sync_up(struct net_device *dev) | |
638 | @@ -1123,8 +1210,10 @@ int fib_sync_up(struct net_device *dev) | |
639 | struct hlist_head *head; | |
640 | struct hlist_node *node; | |
641 | struct fib_nh *nh; | |
642 | - int ret; | |
643 | + struct fib_result res; | |
644 | + int ret, rep; | |
645 | ||
646 | +repeat: | |
647 | if (!(dev->flags&IFF_UP)) | |
648 | return 0; | |
649 | ||
650 | @@ -1132,6 +1221,7 @@ int fib_sync_up(struct net_device *dev) | |
651 | hash = fib_devindex_hashfn(dev->ifindex); | |
652 | head = &fib_info_devhash[hash]; | |
653 | ret = 0; | |
654 | + rep = 0; | |
655 | ||
656 | hlist_for_each_entry(nh, node, head, nh_hash) { | |
657 | struct fib_info *fi = nh->nh_parent; | |
658 | @@ -1144,19 +1234,39 @@ int fib_sync_up(struct net_device *dev) | |
659 | prev_fi = fi; | |
660 | alive = 0; | |
661 | change_nexthops(fi) { | |
662 | - if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
663 | - alive++; | |
664 | + if (!(nh->nh_flags&RTNH_F_DEAD)) | |
665 | continue; | |
666 | - } | |
667 | if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) | |
668 | continue; | |
669 | if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) | |
670 | continue; | |
671 | + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) { | |
672 | + struct flowi fl = { | |
673 | + .nl_u = { .ip4_u = | |
674 | + { .daddr = nh->nh_gw, | |
675 | + .scope = nh->nh_scope } }, | |
676 | + .oif = nh->nh_oif, | |
677 | + }; | |
678 | + if (fib_lookup(dev_net(dev), &fl, &res) != 0) | |
679 | + continue; | |
680 | + if (res.type != RTN_UNICAST && | |
681 | + res.type != RTN_LOCAL) { | |
682 | + fib_res_put(&res); | |
683 | + continue; | |
684 | + } | |
685 | + nh->nh_scope = res.scope; | |
686 | + fib_res_put(&res); | |
687 | + rep = 1; | |
688 | + } | |
689 | alive++; | |
690 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
691 | spin_lock_bh(&fib_multipath_lock); | |
692 | nh->nh_power = 0; | |
693 | +#endif | |
694 | nh->nh_flags &= ~RTNH_F_DEAD; | |
695 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
696 | spin_unlock_bh(&fib_multipath_lock); | |
697 | +#endif | |
698 | } endfor_nexthops(fi) | |
699 | ||
700 | if (alive > 0) { | |
701 | @@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev) | |
702 | ret++; | |
703 | } | |
704 | } | |
705 | + if (rep) | |
706 | + goto repeat; | |
707 | ||
708 | return ret; | |
709 | } | |
710 | ||
711 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
712 | + | |
713 | /* | |
714 | The algorithm is suboptimal, but it provides really | |
715 | fair weighted route distribution. | |
716 | @@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev) | |
717 | void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | |
718 | { | |
719 | struct fib_info *fi = res->fi; | |
720 | - int w; | |
721 | + int w, alive; | |
722 | ||
723 | spin_lock_bh(&fib_multipath_lock); | |
724 | + if (flp->oif) { | |
725 | + int sel = -1; | |
726 | + w = -1; | |
727 | + change_nexthops(fi) { | |
728 | + if (flp->oif != nh->nh_oif) | |
729 | + continue; | |
730 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
731 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
732 | + continue; | |
733 | + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { | |
734 | + if (nh->nh_power > w) { | |
735 | + w = nh->nh_power; | |
736 | + sel = nhsel; | |
737 | + } | |
738 | + } | |
739 | + } endfor_nexthops(fi); | |
740 | + if (sel >= 0) { | |
741 | + spin_unlock_bh(&fib_multipath_lock); | |
742 | + res->nh_sel = sel; | |
743 | + return; | |
744 | + } | |
745 | + goto last_resort; | |
746 | + } | |
747 | + | |
748 | +repeat: | |
749 | if (fi->fib_power <= 0) { | |
750 | int power = 0; | |
751 | change_nexthops(fi) { | |
752 | - if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
753 | + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { | |
754 | power += nh->nh_weight; | |
755 | nh->nh_power = nh->nh_weight; | |
756 | } | |
757 | } endfor_nexthops(fi); | |
758 | fi->fib_power = power; | |
759 | - if (power <= 0) { | |
760 | - spin_unlock_bh(&fib_multipath_lock); | |
761 | - /* Race condition: route has just become dead. */ | |
762 | - res->nh_sel = 0; | |
763 | - return; | |
764 | - } | |
765 | + if (power <= 0) | |
766 | + goto last_resort; | |
767 | } | |
768 | ||
769 | ||
770 | @@ -1203,20 +1338,40 @@ void fib_select_multipath(const struct f | |
771 | ||
772 | w = jiffies % fi->fib_power; | |
773 | ||
774 | + alive = 0; | |
775 | change_nexthops(fi) { | |
776 | - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { | |
777 | + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) { | |
778 | if ((w -= nh->nh_power) <= 0) { | |
779 | nh->nh_power--; | |
780 | fi->fib_power--; | |
781 | - res->nh_sel = nhsel; | |
782 | spin_unlock_bh(&fib_multipath_lock); | |
783 | + res->nh_sel = nhsel; | |
784 | return; | |
785 | } | |
786 | + alive = 1; | |
787 | + } | |
788 | + } endfor_nexthops(fi); | |
789 | + if (alive) { | |
790 | + fi->fib_power = 0; | |
791 | + goto repeat; | |
792 | + } | |
793 | + | |
794 | +last_resort: | |
795 | + | |
796 | + for_nexthops(fi) { | |
797 | + if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
798 | + if (flp->oif && flp->oif != nh->nh_oif) | |
799 | + continue; | |
800 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
801 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
802 | + continue; | |
803 | + spin_unlock_bh(&fib_multipath_lock); | |
804 | + res->nh_sel = nhsel; | |
805 | + return; | |
806 | } | |
807 | } endfor_nexthops(fi); | |
808 | ||
809 | /* Race condition: route has just become dead. */ | |
810 | - res->nh_sel = 0; | |
811 | spin_unlock_bh(&fib_multipath_lock); | |
812 | } | |
813 | #endif | |
814 | diff -urp v2.6.31/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c | |
815 | --- v2.6.31/linux/net/ipv4/fib_trie.c 2009-09-11 10:27:17.000000000 +0300 | |
816 | +++ linux/net/ipv4/fib_trie.c 2009-09-11 22:11:20.000000000 +0300 | |
817 | @@ -1291,6 +1291,7 @@ static int fn_trie_insert(struct fib_tab | |
818 | fi_drop = fa->fa_info; | |
819 | new_fa->fa_tos = fa->fa_tos; | |
820 | new_fa->fa_info = fi; | |
821 | + new_fa->fa_last_dflt = -1; | |
822 | new_fa->fa_type = cfg->fc_type; | |
823 | new_fa->fa_scope = cfg->fc_scope; | |
824 | state = fa->fa_state; | |
825 | @@ -1331,6 +1332,7 @@ static int fn_trie_insert(struct fib_tab | |
826 | new_fa->fa_type = cfg->fc_type; | |
827 | new_fa->fa_scope = cfg->fc_scope; | |
828 | new_fa->fa_state = 0; | |
829 | + new_fa->fa_last_dflt = -1; | |
830 | /* | |
831 | * Insert new entry to the list. | |
832 | */ | |
833 | @@ -1831,24 +1833,31 @@ static void fn_trie_select_default(struc | |
834 | struct fib_result *res) | |
835 | { | |
836 | struct trie *t = (struct trie *) tb->tb_data; | |
837 | - int order, last_idx; | |
838 | + int order, last_idx, last_dflt, last_nhsel; | |
839 | + struct fib_alias *first_fa = NULL; | |
840 | struct fib_info *fi = NULL; | |
841 | struct fib_info *last_resort; | |
842 | struct fib_alias *fa = NULL; | |
843 | struct list_head *fa_head; | |
844 | struct leaf *l; | |
845 | + u32 key, mask; | |
846 | ||
847 | + last_dflt = -2; | |
848 | + last_nhsel = 0; | |
849 | last_idx = -1; | |
850 | last_resort = NULL; | |
851 | order = -1; | |
852 | ||
853 | + mask = inet_make_mask(res->prefixlen); | |
854 | + key = ntohl(flp->fl4_dst & mask); | |
855 | + | |
856 | rcu_read_lock(); | |
857 | ||
858 | - l = fib_find_node(t, 0); | |
859 | + l = fib_find_node(t, key); | |
860 | if (!l) | |
861 | goto out; | |
862 | ||
863 | - fa_head = get_fa_head(l, 0); | |
864 | + fa_head = get_fa_head(l, res->prefixlen); | |
865 | if (!fa_head) | |
866 | goto out; | |
867 | ||
868 | @@ -1862,39 +1871,52 @@ static void fn_trie_select_default(struc | |
869 | fa->fa_type != RTN_UNICAST) | |
870 | continue; | |
871 | ||
872 | + if (fa->fa_tos && | |
873 | + fa->fa_tos != flp->fl4_tos) | |
874 | + continue; | |
875 | if (next_fi->fib_priority > res->fi->fib_priority) | |
876 | break; | |
877 | - if (!next_fi->fib_nh[0].nh_gw || | |
878 | - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | |
879 | - continue; | |
880 | fa->fa_state |= FA_S_ACCESSED; | |
881 | ||
882 | - if (fi == NULL) { | |
883 | - if (next_fi != res->fi) | |
884 | - break; | |
885 | - } else if (!fib_detect_death(fi, order, &last_resort, | |
886 | - &last_idx, tb->tb_default)) { | |
887 | + if (!first_fa) { | |
888 | + last_dflt = fa->fa_last_dflt; | |
889 | + first_fa = fa; | |
890 | + } | |
891 | + if (fi && !fib_detect_death(fi, order, &last_resort, | |
892 | + &last_idx, &last_dflt, &last_nhsel, flp)) { | |
893 | fib_result_assign(res, fi); | |
894 | - tb->tb_default = order; | |
895 | + first_fa->fa_last_dflt = order; | |
896 | goto out; | |
897 | } | |
898 | fi = next_fi; | |
899 | order++; | |
900 | } | |
901 | if (order <= 0 || fi == NULL) { | |
902 | - tb->tb_default = -1; | |
903 | + if (fi && fi->fib_nhs > 1 && | |
904 | + fib_detect_death(fi, order, &last_resort, &last_idx, | |
905 | + &last_dflt, &last_nhsel, flp) && | |
906 | + last_resort == fi) { | |
907 | + read_lock_bh(&fib_nhflags_lock); | |
908 | + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
909 | + read_unlock_bh(&fib_nhflags_lock); | |
910 | + } | |
911 | + if (first_fa) first_fa->fa_last_dflt = -1; | |
912 | goto out; | |
913 | } | |
914 | ||
915 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | |
916 | - tb->tb_default)) { | |
917 | + &last_dflt, &last_nhsel, flp)) { | |
918 | fib_result_assign(res, fi); | |
919 | - tb->tb_default = order; | |
920 | + first_fa->fa_last_dflt = order; | |
921 | goto out; | |
922 | } | |
923 | - if (last_idx >= 0) | |
924 | + if (last_idx >= 0) { | |
925 | fib_result_assign(res, last_resort); | |
926 | - tb->tb_default = last_idx; | |
927 | + read_lock_bh(&fib_nhflags_lock); | |
928 | + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
929 | + read_unlock_bh(&fib_nhflags_lock); | |
930 | + first_fa->fa_last_dflt = last_idx; | |
931 | + } | |
932 | out: | |
933 | rcu_read_unlock(); | |
934 | } | |
935 | diff -urp v2.6.31/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c | |
936 | --- v2.6.31/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2009-09-11 10:27:17.000000000 +0300 | |
937 | +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2009-09-11 22:14:42.000000000 +0300 | |
938 | @@ -51,7 +51,7 @@ masquerade_tg(struct sk_buff *skb, const | |
939 | enum ip_conntrack_info ctinfo; | |
940 | struct nf_nat_range newrange; | |
941 | const struct nf_nat_multi_range_compat *mr; | |
942 | - const struct rtable *rt; | |
943 | + struct rtable *rt; | |
944 | __be32 newsrc; | |
945 | ||
946 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); | |
947 | @@ -69,13 +69,28 @@ masquerade_tg(struct sk_buff *skb, const | |
948 | return NF_ACCEPT; | |
949 | ||
950 | mr = par->targinfo; | |
951 | - rt = skb_rtable(skb); | |
952 | - newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); | |
953 | - if (!newsrc) { | |
954 | - printk("MASQUERADE: %s ate my IP address\n", par->out->name); | |
955 | - return NF_DROP; | |
956 | + | |
957 | + { | |
958 | + struct flowi fl = { .nl_u = { .ip4_u = | |
959 | + { .daddr = ip_hdr(skb)->daddr, | |
960 | + .tos = (RT_TOS(ip_hdr(skb)->tos) | | |
961 | + RTO_CONN), | |
962 | + .gw = skb_rtable(skb)->rt_gateway, | |
963 | + } }, | |
964 | + .mark = skb->mark, | |
965 | + .oif = par->out->ifindex }; | |
966 | + if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) { | |
967 | + /* Funky routing can do this. */ | |
968 | + if (net_ratelimit()) | |
969 | + printk("MASQUERADE:" | |
970 | + " No route: Rusty's brain broke!\n"); | |
971 | + return NF_DROP; | |
972 | + } | |
973 | } | |
974 | ||
975 | + newsrc = rt->rt_src; | |
976 | + ip_rt_put(rt); | |
977 | + | |
978 | nat->masq_index = par->out->ifindex; | |
979 | ||
980 | /* Transfer from original range. */ | |
981 | diff -urp v2.6.31/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c | |
982 | --- v2.6.31/linux/net/ipv4/netfilter/nf_nat_core.c 2009-06-13 10:53:58.000000000 +0300 | |
983 | +++ linux/net/ipv4/netfilter/nf_nat_core.c 2009-09-11 22:13:59.000000000 +0300 | |
984 | @@ -711,6 +711,52 @@ static struct pernet_operations nf_nat_n | |
985 | .exit = nf_nat_net_exit, | |
986 | }; | |
987 | ||
988 | +unsigned int | |
989 | +ip_nat_route_input(unsigned int hooknum, | |
990 | + struct sk_buff *skb, | |
991 | + const struct net_device *in, | |
992 | + const struct net_device *out, | |
993 | + int (*okfn)(struct sk_buff *)) | |
994 | +{ | |
995 | + struct iphdr *iph; | |
996 | + struct nf_conn *conn; | |
997 | + enum ip_conntrack_info ctinfo; | |
998 | + enum ip_conntrack_dir dir; | |
999 | + unsigned long statusbit; | |
1000 | + __be32 saddr; | |
1001 | + | |
1002 | + if (!(conn = nf_ct_get(skb, &ctinfo))) | |
1003 | + return NF_ACCEPT; | |
1004 | + | |
1005 | + if (!(conn->status & IPS_NAT_DONE_MASK)) | |
1006 | + return NF_ACCEPT; | |
1007 | + dir = CTINFO2DIR(ctinfo); | |
1008 | + statusbit = IPS_SRC_NAT; | |
1009 | + if (dir == IP_CT_DIR_REPLY) | |
1010 | + statusbit ^= IPS_NAT_MASK; | |
1011 | + if (!(conn->status & statusbit)) | |
1012 | + return NF_ACCEPT; | |
1013 | + | |
1014 | + if (skb_dst(skb)) | |
1015 | + return NF_ACCEPT; | |
1016 | + | |
1017 | + if (skb->len < sizeof(struct iphdr)) | |
1018 | + return NF_ACCEPT; | |
1019 | + | |
1020 | + /* use daddr in other direction as masquerade address (lsrc) */ | |
1021 | + iph = ip_hdr(skb); | |
1022 | + saddr = conn->tuplehash[!dir].tuple.dst.u3.ip; | |
1023 | + if (saddr == iph->saddr) | |
1024 | + return NF_ACCEPT; | |
1025 | + | |
1026 | + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos, | |
1027 | + skb->dev, saddr)) | |
1028 | + return NF_DROP; | |
1029 | + | |
1030 | + return NF_ACCEPT; | |
1031 | +} | |
1032 | +EXPORT_SYMBOL_GPL(ip_nat_route_input); | |
1033 | + | |
1034 | static int __init nf_nat_init(void) | |
1035 | { | |
1036 | size_t i; | |
1037 | diff -urp v2.6.31/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c | |
1038 | --- v2.6.31/linux/net/ipv4/netfilter/nf_nat_standalone.c 2009-09-11 10:27:17.000000000 +0300 | |
1039 | +++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2009-09-11 22:12:39.000000000 +0300 | |
1040 | @@ -255,6 +255,14 @@ static struct nf_hook_ops nf_nat_ops[] _ | |
1041 | .hooknum = NF_INET_PRE_ROUTING, | |
1042 | .priority = NF_IP_PRI_NAT_DST, | |
1043 | }, | |
1044 | + /* Before routing, route before mangling */ | |
1045 | + { | |
1046 | + .hook = ip_nat_route_input, | |
1047 | + .owner = THIS_MODULE, | |
1048 | + .pf = PF_INET, | |
1049 | + .hooknum = NF_INET_PRE_ROUTING, | |
1050 | + .priority = NF_IP_PRI_LAST-1, | |
1051 | + }, | |
1052 | /* After packet filtering, change source */ | |
1053 | { | |
1054 | .hook = nf_nat_out, | |
1055 | diff -urp v2.6.31/linux/net/ipv4/route.c linux/net/ipv4/route.c | |
1056 | --- v2.6.31/linux/net/ipv4/route.c 2009-09-11 10:27:17.000000000 +0300 | |
1057 | +++ linux/net/ipv4/route.c 2009-09-11 22:12:39.000000000 +0300 | |
1058 | @@ -695,6 +695,8 @@ static inline int compare_keys(struct fl | |
1059 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | |
1060 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | | |
1061 | (fl1->mark ^ fl2->mark) | | |
1062 | + ((__force u32)(fl1->nl_u.ip4_u.lsrc ^ fl2->nl_u.ip4_u.lsrc)) | | |
1063 | + ((__force u32)(fl1->nl_u.ip4_u.gw ^ fl2->nl_u.ip4_u.gw)) | | |
1064 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ | |
1065 | *(u16 *)&fl2->nl_u.ip4_u.tos) | | |
1066 | (fl1->oif ^ fl2->oif) | | |
1067 | @@ -1424,6 +1426,7 @@ void ip_rt_redirect(__be32 old_gw, __be3 | |
1068 | ||
1069 | /* Gateway is different ... */ | |
1070 | rt->rt_gateway = new_gw; | |
1071 | + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw; | |
1072 | ||
1073 | /* Redirect received -> path was valid */ | |
1074 | dst_confirm(&rth->u.dst); | |
1075 | @@ -1870,6 +1873,7 @@ static int ip_route_input_mc(struct sk_b | |
1076 | rth->fl.fl4_tos = tos; | |
1077 | rth->fl.mark = skb->mark; | |
1078 | rth->fl.fl4_src = saddr; | |
1079 | + rth->fl.fl4_lsrc = 0; | |
1080 | rth->rt_src = saddr; | |
1081 | #ifdef CONFIG_NET_CLS_ROUTE | |
1082 | rth->u.dst.tclassid = itag; | |
1083 | @@ -1880,6 +1884,7 @@ static int ip_route_input_mc(struct sk_b | |
1084 | dev_hold(rth->u.dst.dev); | |
1085 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1086 | rth->fl.oif = 0; | |
1087 | + rth->fl.fl4_gw = 0; | |
1088 | rth->rt_gateway = daddr; | |
1089 | rth->rt_spec_dst= spec_dst; | |
1090 | rth->rt_genid = rt_genid(dev_net(dev)); | |
1091 | @@ -1944,7 +1949,7 @@ static int __mkroute_input(struct sk_buf | |
1092 | struct fib_result *res, | |
1093 | struct in_device *in_dev, | |
1094 | __be32 daddr, __be32 saddr, u32 tos, | |
1095 | - struct rtable **result) | |
1096 | + __be32 lsrc, struct rtable **result) | |
1097 | { | |
1098 | ||
1099 | struct rtable *rth; | |
1100 | @@ -1978,6 +1983,7 @@ static int __mkroute_input(struct sk_buf | |
1101 | flags |= RTCF_DIRECTSRC; | |
1102 | ||
1103 | if (out_dev == in_dev && err && | |
1104 | + !lsrc && | |
1105 | (IN_DEV_SHARED_MEDIA(out_dev) || | |
1106 | inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) | |
1107 | flags |= RTCF_DOREDIRECT; | |
1108 | @@ -2011,6 +2017,7 @@ static int __mkroute_input(struct sk_buf | |
1109 | rth->fl.mark = skb->mark; | |
1110 | rth->fl.fl4_src = saddr; | |
1111 | rth->rt_src = saddr; | |
1112 | + rth->fl.fl4_lsrc = lsrc; | |
1113 | rth->rt_gateway = daddr; | |
1114 | rth->rt_iif = | |
1115 | rth->fl.iif = in_dev->dev->ifindex; | |
1116 | @@ -2018,6 +2025,7 @@ static int __mkroute_input(struct sk_buf | |
1117 | dev_hold(rth->u.dst.dev); | |
1118 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1119 | rth->fl.oif = 0; | |
1120 | + rth->fl.fl4_gw = 0; | |
1121 | rth->rt_spec_dst= spec_dst; | |
1122 | ||
1123 | rth->u.dst.input = ip_forward; | |
1124 | @@ -2038,21 +2046,23 @@ static int __mkroute_input(struct sk_buf | |
1125 | ||
1126 | static int ip_mkroute_input(struct sk_buff *skb, | |
1127 | struct fib_result *res, | |
1128 | + struct net *net, | |
1129 | const struct flowi *fl, | |
1130 | struct in_device *in_dev, | |
1131 | - __be32 daddr, __be32 saddr, u32 tos) | |
1132 | + __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc) | |
1133 | { | |
1134 | struct rtable* rth = NULL; | |
1135 | int err; | |
1136 | unsigned hash; | |
1137 | ||
1138 | + fib_select_default(net, fl, res); | |
1139 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
1140 | - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) | |
1141 | + if (res->fi && res->fi->fib_nhs > 1) | |
1142 | fib_select_multipath(fl, res); | |
1143 | #endif | |
1144 | ||
1145 | /* create a routing cache entry */ | |
1146 | - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); | |
1147 | + err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth); | |
1148 | if (err) | |
1149 | return err; | |
1150 | ||
1151 | @@ -2073,18 +2083,19 @@ static int ip_mkroute_input(struct sk_bu | |
1152 | */ | |
1153 | ||
1154 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1155 | - u8 tos, struct net_device *dev) | |
1156 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1157 | { | |
1158 | struct fib_result res; | |
1159 | struct in_device *in_dev = in_dev_get(dev); | |
1160 | struct flowi fl = { .nl_u = { .ip4_u = | |
1161 | { .daddr = daddr, | |
1162 | - .saddr = saddr, | |
1163 | + .saddr = lsrc? : saddr, | |
1164 | .tos = tos, | |
1165 | .scope = RT_SCOPE_UNIVERSE, | |
1166 | } }, | |
1167 | .mark = skb->mark, | |
1168 | - .iif = dev->ifindex }; | |
1169 | + .iif = lsrc? | |
1170 | + dev_net(dev)->loopback_dev->ifindex : dev->ifindex }; | |
1171 | unsigned flags = 0; | |
1172 | u32 itag = 0; | |
1173 | struct rtable * rth; | |
1174 | @@ -2120,6 +2131,12 @@ static int ip_route_input_slow(struct sk | |
1175 | ipv4_is_loopback(daddr)) | |
1176 | goto martian_destination; | |
1177 | ||
1178 | + if (lsrc) { | |
1179 | + if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) || | |
1180 | + ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc)) | |
1181 | + goto e_inval; | |
1182 | + } | |
1183 | + | |
1184 | /* | |
1185 | * Now we are ready to route packet. | |
1186 | */ | |
1187 | @@ -2129,6 +2146,8 @@ static int ip_route_input_slow(struct sk | |
1188 | goto no_route; | |
1189 | } | |
1190 | free_res = 1; | |
1191 | + fl.iif = dev->ifindex; | |
1192 | + fl.fl4_src = saddr; | |
1193 | ||
1194 | RT_CACHE_STAT_INC(in_slow_tot); | |
1195 | ||
1196 | @@ -2153,7 +2172,7 @@ static int ip_route_input_slow(struct sk | |
1197 | if (res.type != RTN_UNICAST) | |
1198 | goto martian_destination; | |
1199 | ||
1200 | - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | |
1201 | + err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc); | |
1202 | done: | |
1203 | in_dev_put(in_dev); | |
1204 | if (free_res) | |
1205 | @@ -2163,6 +2182,8 @@ out: return err; | |
1206 | brd_input: | |
1207 | if (skb->protocol != htons(ETH_P_IP)) | |
1208 | goto e_inval; | |
1209 | + if (lsrc) | |
1210 | + goto e_inval; | |
1211 | ||
1212 | if (ipv4_is_zeronet(saddr)) | |
1213 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | |
1214 | @@ -2204,6 +2225,7 @@ local_input: | |
1215 | rth->u.dst.dev = net->loopback_dev; | |
1216 | dev_hold(rth->u.dst.dev); | |
1217 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1218 | + rth->fl.fl4_gw = 0; | |
1219 | rth->rt_gateway = daddr; | |
1220 | rth->rt_spec_dst= spec_dst; | |
1221 | rth->u.dst.input= ip_local_deliver; | |
1222 | @@ -2254,8 +2276,9 @@ martian_source: | |
1223 | goto e_inval; | |
1224 | } | |
1225 | ||
1226 | -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1227 | - u8 tos, struct net_device *dev) | |
1228 | +static inline int | |
1229 | +ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1230 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1231 | { | |
1232 | struct rtable * rth; | |
1233 | unsigned hash; | |
1234 | @@ -2276,6 +2299,7 @@ int ip_route_input(struct sk_buff *skb, | |
1235 | if (((rth->fl.fl4_dst ^ daddr) | | |
1236 | (rth->fl.fl4_src ^ saddr) | | |
1237 | (rth->fl.iif ^ iif) | | |
1238 | + (rth->fl.fl4_lsrc ^ lsrc) | | |
1239 | rth->fl.oif | | |
1240 | (rth->fl.fl4_tos ^ tos)) == 0 && | |
1241 | rth->fl.mark == skb->mark && | |
1242 | @@ -2324,7 +2348,19 @@ skip_cache: | |
1243 | rcu_read_unlock(); | |
1244 | return -EINVAL; | |
1245 | } | |
1246 | - return ip_route_input_slow(skb, daddr, saddr, tos, dev); | |
1247 | + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc); | |
1248 | +} | |
1249 | + | |
1250 | +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1251 | + u8 tos, struct net_device *dev) | |
1252 | +{ | |
1253 | + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0); | |
1254 | +} | |
1255 | + | |
1256 | +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1257 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1258 | +{ | |
1259 | + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc); | |
1260 | } | |
1261 | ||
1262 | static int __mkroute_output(struct rtable **result, | |
1263 | @@ -2396,6 +2432,7 @@ static int __mkroute_output(struct rtabl | |
1264 | rth->fl.fl4_tos = tos; | |
1265 | rth->fl.fl4_src = oldflp->fl4_src; | |
1266 | rth->fl.oif = oldflp->oif; | |
1267 | + rth->fl.fl4_gw = oldflp->fl4_gw; | |
1268 | rth->fl.mark = oldflp->mark; | |
1269 | rth->rt_dst = fl->fl4_dst; | |
1270 | rth->rt_src = fl->fl4_src; | |
1271 | @@ -2477,6 +2514,7 @@ static int ip_route_output_slow(struct n | |
1272 | struct flowi fl = { .nl_u = { .ip4_u = | |
1273 | { .daddr = oldflp->fl4_dst, | |
1274 | .saddr = oldflp->fl4_src, | |
1275 | + .gw = oldflp->fl4_gw, | |
1276 | .tos = tos & IPTOS_RT_MASK, | |
1277 | .scope = ((tos & RTO_ONLINK) ? | |
1278 | RT_SCOPE_LINK : | |
1279 | @@ -2588,6 +2626,7 @@ static int ip_route_output_slow(struct n | |
1280 | dev_out = net->loopback_dev; | |
1281 | dev_hold(dev_out); | |
1282 | fl.oif = net->loopback_dev->ifindex; | |
1283 | + fl.fl4_gw = 0; | |
1284 | res.type = RTN_LOCAL; | |
1285 | flags |= RTCF_LOCAL; | |
1286 | goto make_route; | |
1287 | @@ -2595,7 +2634,7 @@ static int ip_route_output_slow(struct n | |
1288 | ||
1289 | if (fib_lookup(net, &fl, &res)) { | |
1290 | res.fi = NULL; | |
1291 | - if (oldflp->oif) { | |
1292 | + if (oldflp->oif && dev_out->flags & IFF_UP) { | |
1293 | /* Apparently, routing tables are wrong. Assume, | |
1294 | that the destination is on link. | |
1295 | ||
1296 | @@ -2635,6 +2674,7 @@ static int ip_route_output_slow(struct n | |
1297 | dev_out = net->loopback_dev; | |
1298 | dev_hold(dev_out); | |
1299 | fl.oif = dev_out->ifindex; | |
1300 | + fl.fl4_gw = 0; | |
1301 | if (res.fi) | |
1302 | fib_info_put(res.fi); | |
1303 | res.fi = NULL; | |
1304 | @@ -2642,13 +2682,12 @@ static int ip_route_output_slow(struct n | |
1305 | goto make_route; | |
1306 | } | |
1307 | ||
1308 | + if (res.type == RTN_UNICAST) | |
1309 | + fib_select_default(net, &fl, &res); | |
1310 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
1311 | - if (res.fi->fib_nhs > 1 && fl.oif == 0) | |
1312 | + if (res.fi->fib_nhs > 1) | |
1313 | fib_select_multipath(&fl, &res); | |
1314 | - else | |
1315 | #endif | |
1316 | - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | |
1317 | - fib_select_default(net, &fl, &res); | |
1318 | ||
1319 | if (!fl.fl4_src) | |
1320 | fl.fl4_src = FIB_RES_PREFSRC(res); | |
1321 | @@ -2689,6 +2728,7 @@ int __ip_route_output_key(struct net *ne | |
1322 | rth->fl.fl4_src == flp->fl4_src && | |
1323 | rth->fl.iif == 0 && | |
1324 | rth->fl.oif == flp->oif && | |
1325 | + rth->fl.fl4_gw == flp->fl4_gw && | |
1326 | rth->fl.mark == flp->mark && | |
1327 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | |
1328 | (IPTOS_RT_MASK | RTO_ONLINK)) && | |
1329 | @@ -3466,3 +3506,4 @@ void __init ip_static_sysctl_init(void) | |
1330 | EXPORT_SYMBOL(__ip_select_ident); | |
1331 | EXPORT_SYMBOL(ip_route_input); | |
1332 | EXPORT_SYMBOL(ip_route_output_key); | |
1333 | +EXPORT_SYMBOL(ip_route_input_lookup); |