]> git.ipfire.org Git - people/ms/ipfire-3.x.git/blame - pkgs/core/kernel/patches/routes-2.6.36-16.diff
kernel: Update to 2.6.36.
[people/ms/ipfire-3.x.git] / pkgs / core / kernel / patches / routes-2.6.36-16.diff
CommitLineData
6892158b
MT
1diff -urp v2.6.36/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2--- v2.6.36/linux/include/linux/rtnetlink.h 2010-10-22 11:34:37.000000000 +0300
3+++ linux/include/linux/rtnetlink.h 2010-10-23 15:03:19.704274198 +0300
4@@ -312,6 +312,8 @@ struct rtnexthop {
58c5fc13
MT
5 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
6 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
7 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
8+#define RTNH_F_SUSPECT 8 /* We don't know the real state */
9+#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
10
11 /* Macros to handle hexthops */
12
6892158b
MT
13diff -urp v2.6.36/linux/include/net/flow.h linux/include/net/flow.h
14--- v2.6.36/linux/include/net/flow.h 2010-08-02 09:37:48.000000000 +0300
15+++ linux/include/net/flow.h 2010-10-23 15:04:36.408274819 +0300
58c5fc13
MT
16@@ -19,6 +19,8 @@ struct flowi {
17 struct {
18 __be32 daddr;
19 __be32 saddr;
20+ __be32 lsrc;
21+ __be32 gw;
22 __u8 tos;
23 __u8 scope;
24 } ip4_u;
25@@ -43,6 +45,8 @@ struct flowi {
26 #define fl6_flowlabel nl_u.ip6_u.flowlabel
27 #define fl4_dst nl_u.ip4_u.daddr
28 #define fl4_src nl_u.ip4_u.saddr
29+#define fl4_lsrc nl_u.ip4_u.lsrc
30+#define fl4_gw nl_u.ip4_u.gw
31 #define fl4_tos nl_u.ip4_u.tos
32 #define fl4_scope nl_u.ip4_u.scope
33
6892158b
MT
34diff -urp v2.6.36/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35--- v2.6.36/linux/include/net/ip_fib.h 2010-02-25 09:01:36.000000000 +0200
36+++ linux/include/net/ip_fib.h 2010-10-23 15:03:19.704274198 +0300
ae4e228f 37@@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str
58c5fc13
MT
38 extern struct fib_table *fib_new_table(struct net *net, u32 id);
39 extern struct fib_table *fib_get_table(struct net *net, u32 id);
40
41+extern int fib_result_table(struct fib_result *res);
42+
43 #endif /* CONFIG_IP_MULTIPLE_TABLES */
44
45 /* Exported by fib_frontend.c */
ae4e228f 46@@ -277,4 +279,6 @@ static inline void fib_proc_exit(struct
58c5fc13
MT
47 }
48 #endif
49
50+extern rwlock_t fib_nhflags_lock;
51+
52 #endif /* _NET_FIB_H */
6892158b
MT
53diff -urp v2.6.36/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54--- v2.6.36/linux/include/net/netfilter/nf_nat.h 2010-02-25 09:01:36.000000000 +0200
55+++ linux/include/net/netfilter/nf_nat.h 2010-10-23 15:04:36.408274819 +0300
ae4e228f 56@@ -73,6 +73,13 @@ struct nf_conn_nat {
58c5fc13
MT
57 #endif
58 };
59
60+/* Call input routing for SNAT-ed traffic */
61+extern unsigned int ip_nat_route_input(unsigned int hooknum,
62+ struct sk_buff *skb,
63+ const struct net_device *in,
64+ const struct net_device *out,
65+ int (*okfn)(struct sk_buff *));
66+
67 /* Set up the info structure to map into this range. */
68 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69 const struct nf_nat_range *range,
6892158b
MT
70diff -urp v2.6.36/linux/include/net/route.h linux/include/net/route.h
71--- v2.6.36/linux/include/net/route.h 2010-10-22 11:34:37.000000000 +0300
72+++ linux/include/net/route.h 2010-10-23 15:04:36.409274028 +0300
73@@ -126,6 +126,7 @@ static inline int ip_route_input_noref(s
57199397
MT
74 return ip_route_input_common(skb, dst, src, tos, devin, true);
75 }
76
58c5fc13
MT
77+extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
78 extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
79 extern void ip_rt_send_redirect(struct sk_buff *skb);
80
6892158b
MT
81diff -urp v2.6.36/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82--- v2.6.36/linux/net/bridge/br_netfilter.c 2010-10-22 11:34:37.000000000 +0300
83+++ linux/net/bridge/br_netfilter.c 2010-10-23 15:04:36.410274544 +0300
84@@ -337,6 +337,9 @@ static int br_nf_pre_routing_finish(stru
58c5fc13
MT
85 struct rtable *rt;
86 int err;
87
88+ /* Old skb->dst is not expected, it is lost in all cases */
89+ skb_dst_drop(skb);
90+
91 if (nf_bridge->mask & BRNF_PKT_TYPE) {
92 skb->pkt_type = PACKET_OTHERHOST;
93 nf_bridge->mask ^= BRNF_PKT_TYPE;
6892158b
MT
94diff -urp v2.6.36/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
95--- v2.6.36/linux/net/ipv4/fib_frontend.c 2010-10-22 11:34:38.000000000 +0300
96+++ linux/net/ipv4/fib_frontend.c 2010-10-23 15:03:19.706274107 +0300
df50ba0c 97@@ -47,6 +47,8 @@
58c5fc13
MT
98
99 #ifndef CONFIG_IP_MULTIPLE_TABLES
100
101+#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
102+
103 static int __net_init fib4_rules_init(struct net *net)
104 {
105 struct fib_table *local_table, *main_table;
df50ba0c 106@@ -71,6 +73,8 @@ fail:
58c5fc13
MT
107 }
108 #else
109
110+#define FIB_RES_TABLE(r) (fib_result_table(r))
111+
112 struct fib_table *fib_new_table(struct net *net, u32 id)
113 {
114 struct fib_table *tb;
df50ba0c 115@@ -125,7 +129,8 @@ void fib_select_default(struct net *net,
58c5fc13
MT
116 table = res->r->table;
117 #endif
118 tb = fib_get_table(net, table);
119- if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
120+ if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
121+ FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
ae4e228f 122 fib_table_select_default(tb, flp, res);
58c5fc13
MT
123 }
124
6892158b 125@@ -245,6 +250,9 @@ int fib_validate_source(__be32 src, __be
58c5fc13 126 .iif = oif };
ae4e228f 127
58c5fc13
MT
128 struct fib_result res;
129+ int table;
130+ unsigned char prefixlen;
131+ unsigned char scope;
ae4e228f 132 int no_addr, rpf, accept_local;
6892158b 133 bool dev_match;
58c5fc13 134 int ret;
6892158b 135@@ -294,21 +302,29 @@ int fib_validate_source(__be32 src, __be
58c5fc13
MT
136 fib_res_put(&res);
137 return ret;
138 }
139+ table = FIB_RES_TABLE(&res);
140+ prefixlen = res.prefixlen;
141+ scope = res.scope;
142 fib_res_put(&res);
143 if (no_addr)
144 goto last_resort;
145- if (rpf == 1)
6892158b 146- goto e_rpf;
58c5fc13
MT
147 fl.oif = dev->ifindex;
148
149 ret = 0;
150 if (fib_lookup(net, &fl, &res) == 0) {
151- if (res.type == RTN_UNICAST) {
152+ if (res.type == RTN_UNICAST &&
153+ ((table == FIB_RES_TABLE(&res) &&
154+ res.prefixlen >= prefixlen && res.scope >= scope) ||
155+ !rpf)) {
156 *spec_dst = FIB_RES_PREFSRC(res);
157 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
158+ fib_res_put(&res);
159+ return ret;
160 }
161 fib_res_put(&res);
162 }
163+ if (rpf == 1)
6892158b 164+ goto e_rpf;
58c5fc13
MT
165 return ret;
166
167 last_resort:
6892158b 168@@ -933,9 +949,7 @@ static int fib_inetaddr_event(struct not
58c5fc13
MT
169 switch (event) {
170 case NETDEV_UP:
171 fib_add_ifaddr(ifa);
172-#ifdef CONFIG_IP_ROUTE_MULTIPATH
173 fib_sync_up(dev);
174-#endif
175 rt_cache_flush(dev_net(dev), -1);
176 break;
177 case NETDEV_DOWN:
6892158b 178@@ -971,9 +985,7 @@ static int fib_netdev_event(struct notif
58c5fc13
MT
179 for_ifa(in_dev) {
180 fib_add_ifaddr(ifa);
181 } endfor_ifa(in_dev);
182-#ifdef CONFIG_IP_ROUTE_MULTIPATH
183 fib_sync_up(dev);
184-#endif
185 rt_cache_flush(dev_net(dev), -1);
186 break;
187 case NETDEV_DOWN:
6892158b
MT
188diff -urp v2.6.36/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
189--- v2.6.36/linux/net/ipv4/fib_hash.c 2010-05-17 10:49:01.000000000 +0300
190+++ linux/net/ipv4/fib_hash.c 2010-10-23 15:03:19.707274360 +0300
df50ba0c 191@@ -278,25 +278,35 @@ out:
ae4e228f
MT
192 void fib_table_select_default(struct fib_table *tb,
193 const struct flowi *flp, struct fib_result *res)
58c5fc13
MT
194 {
195- int order, last_idx;
196+ int order, last_idx, last_dflt, last_nhsel;
197+ struct fib_alias *first_fa = NULL;
198+ struct hlist_head *head;
199 struct hlist_node *node;
200 struct fib_node *f;
201 struct fib_info *fi = NULL;
202 struct fib_info *last_resort;
203 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
204- struct fn_zone *fz = t->fn_zones[0];
205+ struct fn_zone *fz = t->fn_zones[res->prefixlen];
206+ __be32 k;
207
208 if (fz == NULL)
209 return;
210
211+ k = fz_key(flp->fl4_dst, fz);
212+ last_dflt = -2;
213+ last_nhsel = 0;
214 last_idx = -1;
215 last_resort = NULL;
216 order = -1;
217
218 read_lock(&fib_hash_lock);
219- hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
220+ head = &fz->fz_hash[fn_hash(k, fz)];
221+ hlist_for_each_entry(f, node, head, fn_hash) {
222 struct fib_alias *fa;
223
224+ if (f->fn_key != k)
225+ continue;
226+
227 list_for_each_entry(fa, &f->fn_alias, fa_list) {
228 struct fib_info *next_fi = fa->fa_info;
229
df50ba0c 230@@ -304,42 +314,56 @@ void fib_table_select_default(struct fib
58c5fc13
MT
231 fa->fa_type != RTN_UNICAST)
232 continue;
233
234+ if (fa->fa_tos &&
235+ fa->fa_tos != flp->fl4_tos)
236+ continue;
237 if (next_fi->fib_priority > res->fi->fib_priority)
238 break;
239- if (!next_fi->fib_nh[0].nh_gw ||
240- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
241- continue;
242 fa->fa_state |= FA_S_ACCESSED;
243
244- if (fi == NULL) {
245- if (next_fi != res->fi)
246- break;
247- } else if (!fib_detect_death(fi, order, &last_resort,
248- &last_idx, tb->tb_default)) {
249+ if (!first_fa) {
250+ last_dflt = fa->fa_last_dflt;
251+ first_fa = fa;
252+ }
253+ if (fi && !fib_detect_death(fi, order, &last_resort,
254+ &last_idx, &last_dflt, &last_nhsel, flp)) {
255 fib_result_assign(res, fi);
256- tb->tb_default = order;
257+ first_fa->fa_last_dflt = order;
258 goto out;
259 }
260 fi = next_fi;
261 order++;
262 }
263+ break;
264 }
265
266 if (order <= 0 || fi == NULL) {
267- tb->tb_default = -1;
268+ if (fi && fi->fib_nhs > 1 &&
269+ fib_detect_death(fi, order, &last_resort, &last_idx,
270+ &last_dflt, &last_nhsel, flp) &&
271+ last_resort == fi) {
272+ read_lock_bh(&fib_nhflags_lock);
273+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
274+ read_unlock_bh(&fib_nhflags_lock);
275+ }
276+ if (first_fa) first_fa->fa_last_dflt = -1;
277 goto out;
278 }
279
280 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
281- tb->tb_default)) {
282+ &last_dflt, &last_nhsel, flp)) {
283 fib_result_assign(res, fi);
284- tb->tb_default = order;
285+ first_fa->fa_last_dflt = order;
286 goto out;
287 }
288
289- if (last_idx >= 0)
290+ if (last_idx >= 0) {
291 fib_result_assign(res, last_resort);
292- tb->tb_default = last_idx;
293+ read_lock_bh(&fib_nhflags_lock);
294+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
295+ read_unlock_bh(&fib_nhflags_lock);
296+ first_fa->fa_last_dflt = last_idx;
297+ }
298 out:
299 read_unlock(&fib_hash_lock);
300 }
df50ba0c 301@@ -463,6 +487,7 @@ int fib_table_insert(struct fib_table *t
58c5fc13
MT
302 write_lock_bh(&fib_hash_lock);
303 fi_drop = fa->fa_info;
304 fa->fa_info = fi;
305+ fa->fa_last_dflt = -1;
306 fa->fa_type = cfg->fc_type;
307 fa->fa_scope = cfg->fc_scope;
308 state = fa->fa_state;
df50ba0c 309@@ -517,6 +542,7 @@ int fib_table_insert(struct fib_table *t
58c5fc13
MT
310 new_fa->fa_type = cfg->fc_type;
311 new_fa->fa_scope = cfg->fc_scope;
312 new_fa->fa_state = 0;
313+ new_fa->fa_last_dflt = -1;
314
315 /*
316 * Insert new entry to the list.
6892158b
MT
317diff -urp v2.6.36/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
318--- v2.6.36/linux/net/ipv4/fib_lookup.h 2009-09-11 10:27:17.000000000 +0300
319+++ linux/net/ipv4/fib_lookup.h 2010-10-23 15:03:19.707274360 +0300
58c5fc13
MT
320@@ -8,6 +8,7 @@
321 struct fib_alias {
322 struct list_head fa_list;
323 struct fib_info *fa_info;
324+ int fa_last_dflt;
325 u8 fa_tos;
326 u8 fa_type;
327 u8 fa_scope;
328@@ -37,7 +38,8 @@ extern struct fib_alias *fib_find_alias(
329 u8 tos, u32 prio);
330 extern int fib_detect_death(struct fib_info *fi, int order,
331 struct fib_info **last_resort,
332- int *last_idx, int dflt);
333+ int *last_idx, int *dflt, int *last_nhsel,
334+ const struct flowi *flp);
335
336 static inline void fib_result_assign(struct fib_result *res,
337 struct fib_info *fi)
6892158b
MT
338diff -urp v2.6.36/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
339--- v2.6.36/linux/net/ipv4/fib_rules.c 2010-08-02 09:37:49.000000000 +0300
340+++ linux/net/ipv4/fib_rules.c 2010-10-23 15:03:19.708274528 +0300
58c5fc13
MT
341@@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result *
342 }
343 #endif
344
345+int fib_result_table(struct fib_result *res)
346+{
347+ return res->r->table;
348+}
349+
350 int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
351 {
352 struct fib_lookup_arg arg = {
6892158b
MT
353diff -urp v2.6.36/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
354--- v2.6.36/linux/net/ipv4/fib_semantics.c 2010-05-17 10:49:01.000000000 +0300
355+++ linux/net/ipv4/fib_semantics.c 2010-10-23 15:04:36.412272841 +0300
df50ba0c 356@@ -51,6 +51,7 @@ static struct hlist_head *fib_info_hash;
58c5fc13
MT
357 static struct hlist_head *fib_info_laddrhash;
358 static unsigned int fib_hash_size;
359 static unsigned int fib_info_cnt;
360+rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
361
362 #define DEVINDEX_HASHBITS 8
363 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
df50ba0c 364@@ -187,7 +188,7 @@ static __inline__ int nh_comp(const stru
58c5fc13
MT
365 #ifdef CONFIG_NET_CLS_ROUTE
366 nh->nh_tclassid != onh->nh_tclassid ||
367 #endif
368- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
369+ ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
370 return -1;
371 onh++;
372 } endfor_nexthops(fi);
df50ba0c 373@@ -238,7 +239,7 @@ static struct fib_info *fib_find_info(co
58c5fc13
MT
374 nfi->fib_priority == fi->fib_priority &&
375 memcmp(nfi->fib_metrics, fi->fib_metrics,
376 sizeof(fi->fib_metrics)) == 0 &&
377- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
378+ ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
379 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
380 return fi;
381 }
df50ba0c 382@@ -350,26 +351,70 @@ struct fib_alias *fib_find_alias(struct
58c5fc13
MT
383 }
384
385 int fib_detect_death(struct fib_info *fi, int order,
386- struct fib_info **last_resort, int *last_idx, int dflt)
387+ struct fib_info **last_resort, int *last_idx, int *dflt,
388+ int *last_nhsel, const struct flowi *flp)
389 {
390 struct neighbour *n;
391- int state = NUD_NONE;
392+ int nhsel;
393+ int state;
394+ struct fib_nh * nh;
395+ __be32 dst;
396+ int flag, dead = 1;
397+
398+ /* change_nexthops(fi) { */
399+ for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
400+ if (flp->oif && flp->oif != nh->nh_oif)
401+ continue;
402+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
403+ nh->nh_scope == RT_SCOPE_LINK)
404+ continue;
405+ if (nh->nh_flags & RTNH_F_DEAD)
406+ continue;
407
408- n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
409- if (n) {
410- state = n->nud_state;
411- neigh_release(n);
412- }
413- if (state == NUD_REACHABLE)
414- return 0;
415- if ((state&NUD_VALID) && order != dflt)
416- return 0;
417- if ((state&NUD_VALID) ||
418- (*last_idx<0 && order > dflt)) {
419- *last_resort = fi;
420- *last_idx = order;
421+ flag = 0;
422+ if (nh->nh_dev->flags & IFF_NOARP) {
423+ dead = 0;
424+ goto setfl;
425+ }
426+
427+ dst = nh->nh_gw;
428+ if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
429+ dst = flp->fl4_dst;
430+
431+ state = NUD_NONE;
432+ n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
433+ if (n) {
434+ state = n->nud_state;
435+ neigh_release(n);
436+ }
437+ if (state==NUD_REACHABLE ||
438+ ((state&NUD_VALID) && order != *dflt)) {
439+ dead = 0;
440+ goto setfl;
441+ }
442+ if (!(state&NUD_VALID))
443+ flag = 1;
444+ if (!dead)
445+ goto setfl;
446+ if ((state&NUD_VALID) ||
447+ (*last_idx<0 && order >= *dflt)) {
448+ *last_resort = fi;
449+ *last_idx = order;
450+ *last_nhsel = nhsel;
451+ }
452+
453+ setfl:
454+
455+ read_lock_bh(&fib_nhflags_lock);
456+ if (flag)
457+ nh->nh_flags |= RTNH_F_SUSPECT;
458+ else
459+ nh->nh_flags &= ~RTNH_F_SUSPECT;
460+ read_unlock_bh(&fib_nhflags_lock);
461 }
462- return 1;
463+ /* } endfor_nexthops(fi) */
464+
465+ return dead;
466 }
467
468 #ifdef CONFIG_IP_ROUTE_MULTIPATH
df50ba0c 469@@ -538,8 +583,11 @@ static int fib_check_nh(struct fib_confi
58c5fc13
MT
470 return -EINVAL;
471 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
472 return -ENODEV;
473- if (!(dev->flags&IFF_UP))
474- return -ENETDOWN;
475+ if (!(dev->flags&IFF_UP)) {
476+ if (fi->fib_protocol != RTPROT_STATIC)
477+ return -ENETDOWN;
478+ nh->nh_flags |= RTNH_F_DEAD;
479+ }
480 nh->nh_dev = dev;
481 dev_hold(dev);
482 nh->nh_scope = RT_SCOPE_LINK;
df50ba0c 483@@ -559,24 +607,48 @@ static int fib_check_nh(struct fib_confi
58c5fc13
MT
484 /* It is not necessary, but requires a bit of thinking */
485 if (fl.fl4_scope < RT_SCOPE_LINK)
486 fl.fl4_scope = RT_SCOPE_LINK;
487- if ((err = fib_lookup(net, &fl, &res)) != 0)
488- return err;
489+ err = fib_lookup(net, &fl, &res);
490 }
491- err = -EINVAL;
492- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
493- goto out;
494- nh->nh_scope = res.scope;
495- nh->nh_oif = FIB_RES_OIF(res);
496- if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
497- goto out;
498- dev_hold(nh->nh_dev);
499- err = -ENETDOWN;
500- if (!(nh->nh_dev->flags & IFF_UP))
501- goto out;
502- err = 0;
503+ if (err) {
504+ struct in_device *in_dev;
505+
506+ if (err != -ENETUNREACH ||
507+ fi->fib_protocol != RTPROT_STATIC)
508+ return err;
509+
510+ in_dev = inetdev_by_index(net, nh->nh_oif);
511+ if (in_dev == NULL ||
512+ in_dev->dev->flags & IFF_UP) {
513+ if (in_dev)
514+ in_dev_put(in_dev);
515+ return err;
516+ }
517+ nh->nh_flags |= RTNH_F_DEAD;
518+ nh->nh_scope = RT_SCOPE_LINK;
519+ nh->nh_dev = in_dev->dev;
520+ dev_hold(nh->nh_dev);
521+ in_dev_put(in_dev);
522+ } else {
523+ err = -EINVAL;
524+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
525+ goto out;
526+ nh->nh_scope = res.scope;
527+ nh->nh_oif = FIB_RES_OIF(res);
528+ if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
529+ goto out;
530+ dev_hold(nh->nh_dev);
531+ if (!(nh->nh_dev->flags & IFF_UP)) {
532+ if (fi->fib_protocol != RTPROT_STATIC) {
533+ err = -ENETDOWN;
534+ goto out;
535+ }
536+ nh->nh_flags |= RTNH_F_DEAD;
537+ }
538+ err = 0;
539 out:
540- fib_res_put(&res);
541- return err;
542+ fib_res_put(&res);
543+ return err;
544+ }
545 } else {
546 struct in_device *in_dev;
547
df50ba0c 548@@ -587,8 +659,11 @@ out:
58c5fc13
MT
549 if (in_dev == NULL)
550 return -ENODEV;
551 if (!(in_dev->dev->flags&IFF_UP)) {
552- in_dev_put(in_dev);
553- return -ENETDOWN;
554+ if (fi->fib_protocol != RTPROT_STATIC) {
555+ in_dev_put(in_dev);
556+ return -ENETDOWN;
557+ }
558+ nh->nh_flags |= RTNH_F_DEAD;
559 }
560 nh->nh_dev = in_dev->dev;
561 dev_hold(nh->nh_dev);
df50ba0c 562@@ -897,8 +972,12 @@ int fib_semantic_match(struct list_head
58c5fc13
MT
563 for_nexthops(fi) {
564 if (nh->nh_flags&RTNH_F_DEAD)
565 continue;
566- if (!flp->oif || flp->oif == nh->nh_oif)
567- break;
568+ if (flp->oif && flp->oif != nh->nh_oif)
569+ continue;
570+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
571+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
572+ continue;
573+ break;
574 }
575 #ifdef CONFIG_IP_ROUTE_MULTIPATH
576 if (nhsel < fi->fib_nhs) {
df50ba0c 577@@ -1078,18 +1157,29 @@ int fib_sync_down_dev(struct net_device
58c5fc13
MT
578 prev_fi = fi;
579 dead = 0;
580 change_nexthops(fi) {
df50ba0c 581- if (nexthop_nh->nh_flags&RTNH_F_DEAD)
58c5fc13 582- dead++;
df50ba0c
MT
583- else if (nexthop_nh->nh_dev == dev &&
584- nexthop_nh->nh_scope != scope) {
585- nexthop_nh->nh_flags |= RTNH_F_DEAD;
586+ if (nexthop_nh->nh_flags&RTNH_F_DEAD) {
58c5fc13 587+ if (fi->fib_protocol!=RTPROT_STATIC ||
df50ba0c
MT
588+ nexthop_nh->nh_dev == NULL ||
589+ __in_dev_get_rtnl(nexthop_nh->nh_dev) == NULL ||
590+ nexthop_nh->nh_dev->flags&IFF_UP)
58c5fc13 591+ dead++;
df50ba0c
MT
592+ } else if (nexthop_nh->nh_dev == dev &&
593+ nexthop_nh->nh_scope != scope) {
58c5fc13
MT
594+ write_lock_bh(&fib_nhflags_lock);
595 #ifdef CONFIG_IP_ROUTE_MULTIPATH
596- spin_lock_bh(&fib_multipath_lock);
597+ spin_lock(&fib_multipath_lock);
df50ba0c
MT
598+ nexthop_nh->nh_flags |= RTNH_F_DEAD;
599 fi->fib_power -= nexthop_nh->nh_power;
600 nexthop_nh->nh_power = 0;
58c5fc13
MT
601- spin_unlock_bh(&fib_multipath_lock);
602+ spin_unlock(&fib_multipath_lock);
603+#else
df50ba0c 604+ nexthop_nh->nh_flags |= RTNH_F_DEAD;
58c5fc13
MT
605 #endif
606- dead++;
607+ write_unlock_bh(&fib_nhflags_lock);
608+ if (fi->fib_protocol!=RTPROT_STATIC ||
609+ force ||
610+ __in_dev_get_rtnl(dev) == NULL)
611+ dead++;
612 }
613 #ifdef CONFIG_IP_ROUTE_MULTIPATH
df50ba0c
MT
614 if (force > 1 && nexthop_nh->nh_dev == dev) {
615@@ -1107,11 +1197,8 @@ int fib_sync_down_dev(struct net_device
58c5fc13
MT
616 return ret;
617 }
618
619-#ifdef CONFIG_IP_ROUTE_MULTIPATH
620-
621 /*
622- Dead device goes up. We wake up dead nexthops.
623- It takes sense only on multipath routes.
624+ Dead device goes up or new address is added. We wake up dead nexthops.
625 */
626
627 int fib_sync_up(struct net_device *dev)
df50ba0c 628@@ -1121,8 +1208,10 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
629 struct hlist_head *head;
630 struct hlist_node *node;
631 struct fib_nh *nh;
632- int ret;
633+ struct fib_result res;
634+ int ret, rep;
635
636+repeat:
637 if (!(dev->flags&IFF_UP))
638 return 0;
639
df50ba0c 640@@ -1130,6 +1219,7 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
641 hash = fib_devindex_hashfn(dev->ifindex);
642 head = &fib_info_devhash[hash];
643 ret = 0;
644+ rep = 0;
645
646 hlist_for_each_entry(nh, node, head, nh_hash) {
647 struct fib_info *fi = nh->nh_parent;
df50ba0c 648@@ -1142,21 +1232,41 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
649 prev_fi = fi;
650 alive = 0;
651 change_nexthops(fi) {
df50ba0c 652- if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
58c5fc13 653- alive++;
df50ba0c 654+ if (!(nexthop_nh->nh_flags&RTNH_F_DEAD))
58c5fc13
MT
655 continue;
656- }
df50ba0c
MT
657 if (nexthop_nh->nh_dev == NULL ||
658 !(nexthop_nh->nh_dev->flags&IFF_UP))
58c5fc13 659 continue;
df50ba0c
MT
660 if (nexthop_nh->nh_dev != dev ||
661 !__in_dev_get_rtnl(dev))
58c5fc13 662 continue;
df50ba0c 663+ if (nexthop_nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
58c5fc13
MT
664+ struct flowi fl = {
665+ .nl_u = { .ip4_u =
df50ba0c
MT
666+ { .daddr = nexthop_nh->nh_gw,
667+ .scope = nexthop_nh->nh_scope } },
668+ .oif = nexthop_nh->nh_oif,
58c5fc13
MT
669+ };
670+ if (fib_lookup(dev_net(dev), &fl, &res) != 0)
671+ continue;
672+ if (res.type != RTN_UNICAST &&
673+ res.type != RTN_LOCAL) {
674+ fib_res_put(&res);
675+ continue;
676+ }
df50ba0c 677+ nexthop_nh->nh_scope = res.scope;
58c5fc13
MT
678+ fib_res_put(&res);
679+ rep = 1;
680+ }
681 alive++;
682+#ifdef CONFIG_IP_ROUTE_MULTIPATH
683 spin_lock_bh(&fib_multipath_lock);
df50ba0c 684 nexthop_nh->nh_power = 0;
58c5fc13 685+#endif
df50ba0c 686 nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
58c5fc13
MT
687+#ifdef CONFIG_IP_ROUTE_MULTIPATH
688 spin_unlock_bh(&fib_multipath_lock);
689+#endif
690 } endfor_nexthops(fi)
691
692 if (alive > 0) {
693@@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev)
694 ret++;
695 }
696 }
697+ if (rep)
698+ goto repeat;
699
700 return ret;
701 }
702
703+#ifdef CONFIG_IP_ROUTE_MULTIPATH
704+
705 /*
706 The algorithm is suboptimal, but it provides really
707 fair weighted route distribution.
708@@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev)
709 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
710 {
711 struct fib_info *fi = res->fi;
712- int w;
713+ int w, alive;
714
715 spin_lock_bh(&fib_multipath_lock);
716+ if (flp->oif) {
717+ int sel = -1;
718+ w = -1;
719+ change_nexthops(fi) {
df50ba0c 720+ if (flp->oif != nexthop_nh->nh_oif)
58c5fc13 721+ continue;
df50ba0c
MT
722+ if (flp->fl4_gw && flp->fl4_gw != nexthop_nh->nh_gw &&
723+ nexthop_nh->nh_gw && nexthop_nh->nh_scope == RT_SCOPE_LINK)
58c5fc13 724+ continue;
df50ba0c
MT
725+ if (!(nexthop_nh->nh_flags&RTNH_F_BADSTATE)) {
726+ if (nexthop_nh->nh_power > w) {
727+ w = nexthop_nh->nh_power;
58c5fc13
MT
728+ sel = nhsel;
729+ }
730+ }
731+ } endfor_nexthops(fi);
732+ if (sel >= 0) {
733+ spin_unlock_bh(&fib_multipath_lock);
734+ res->nh_sel = sel;
735+ return;
736+ }
737+ goto last_resort;
738+ }
739+
740+repeat:
741 if (fi->fib_power <= 0) {
742 int power = 0;
743 change_nexthops(fi) {
df50ba0c
MT
744- if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
745+ if (!(nexthop_nh->nh_flags&RTNH_F_BADSTATE)) {
746 power += nexthop_nh->nh_weight;
747 nexthop_nh->nh_power = nexthop_nh->nh_weight;
58c5fc13
MT
748 }
749 } endfor_nexthops(fi);
750 fi->fib_power = power;
751- if (power <= 0) {
752- spin_unlock_bh(&fib_multipath_lock);
753- /* Race condition: route has just become dead. */
754- res->nh_sel = 0;
755- return;
756- }
757+ if (power <= 0)
758+ goto last_resort;
759 }
760
761
df50ba0c 762@@ -1203,21 +1338,41 @@ void fib_select_multipath(const struct f
58c5fc13
MT
763
764 w = jiffies % fi->fib_power;
765
766+ alive = 0;
767 change_nexthops(fi) {
df50ba0c
MT
768- if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
769+ if (!(nexthop_nh->nh_flags&RTNH_F_BADSTATE) &&
770 nexthop_nh->nh_power) {
771 if ((w -= nexthop_nh->nh_power) <= 0) {
772 nexthop_nh->nh_power--;
58c5fc13
MT
773 fi->fib_power--;
774- res->nh_sel = nhsel;
775 spin_unlock_bh(&fib_multipath_lock);
776+ res->nh_sel = nhsel;
777 return;
778 }
779+ alive = 1;
780+ }
781+ } endfor_nexthops(fi);
782+ if (alive) {
783+ fi->fib_power = 0;
784+ goto repeat;
785+ }
786+
787+last_resort:
788+
789+ for_nexthops(fi) {
790+ if (!(nh->nh_flags&RTNH_F_DEAD)) {
791+ if (flp->oif && flp->oif != nh->nh_oif)
792+ continue;
793+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
794+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
795+ continue;
796+ spin_unlock_bh(&fib_multipath_lock);
797+ res->nh_sel = nhsel;
798+ return;
799 }
800 } endfor_nexthops(fi);
801
802 /* Race condition: route has just become dead. */
803- res->nh_sel = 0;
804 spin_unlock_bh(&fib_multipath_lock);
805 }
806 #endif
6892158b
MT
807diff -urp v2.6.36/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c
808--- v2.6.36/linux/net/ipv4/fib_trie.c 2010-10-22 11:34:38.000000000 +0300
809+++ linux/net/ipv4/fib_trie.c 2010-10-23 15:03:19.712272951 +0300
810@@ -1277,6 +1277,7 @@ int fib_table_insert(struct fib_table *t
58c5fc13
MT
811 fi_drop = fa->fa_info;
812 new_fa->fa_tos = fa->fa_tos;
813 new_fa->fa_info = fi;
814+ new_fa->fa_last_dflt = -1;
815 new_fa->fa_type = cfg->fc_type;
816 new_fa->fa_scope = cfg->fc_scope;
817 state = fa->fa_state;
6892158b 818@@ -1317,6 +1318,7 @@ int fib_table_insert(struct fib_table *t
58c5fc13
MT
819 new_fa->fa_type = cfg->fc_type;
820 new_fa->fa_scope = cfg->fc_scope;
821 new_fa->fa_state = 0;
822+ new_fa->fa_last_dflt = -1;
823 /*
824 * Insert new entry to the list.
825 */
6892158b 826@@ -1819,24 +1821,31 @@ void fib_table_select_default(struct fib
ae4e228f 827 struct fib_result *res)
58c5fc13
MT
828 {
829 struct trie *t = (struct trie *) tb->tb_data;
830- int order, last_idx;
831+ int order, last_idx, last_dflt, last_nhsel;
832+ struct fib_alias *first_fa = NULL;
833 struct fib_info *fi = NULL;
834 struct fib_info *last_resort;
835 struct fib_alias *fa = NULL;
836 struct list_head *fa_head;
837 struct leaf *l;
838+ u32 key, mask;
839
840+ last_dflt = -2;
841+ last_nhsel = 0;
842 last_idx = -1;
843 last_resort = NULL;
844 order = -1;
845
846+ mask = inet_make_mask(res->prefixlen);
847+ key = ntohl(flp->fl4_dst & mask);
848+
849 rcu_read_lock();
850
851- l = fib_find_node(t, 0);
852+ l = fib_find_node(t, key);
853 if (!l)
854 goto out;
855
856- fa_head = get_fa_head(l, 0);
857+ fa_head = get_fa_head(l, res->prefixlen);
858 if (!fa_head)
859 goto out;
860
6892158b 861@@ -1850,39 +1859,52 @@ void fib_table_select_default(struct fib
58c5fc13
MT
862 fa->fa_type != RTN_UNICAST)
863 continue;
864
865+ if (fa->fa_tos &&
866+ fa->fa_tos != flp->fl4_tos)
867+ continue;
868 if (next_fi->fib_priority > res->fi->fib_priority)
869 break;
870- if (!next_fi->fib_nh[0].nh_gw ||
871- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
872- continue;
873 fa->fa_state |= FA_S_ACCESSED;
874
875- if (fi == NULL) {
876- if (next_fi != res->fi)
877- break;
878- } else if (!fib_detect_death(fi, order, &last_resort,
879- &last_idx, tb->tb_default)) {
880+ if (!first_fa) {
881+ last_dflt = fa->fa_last_dflt;
882+ first_fa = fa;
883+ }
884+ if (fi && !fib_detect_death(fi, order, &last_resort,
885+ &last_idx, &last_dflt, &last_nhsel, flp)) {
886 fib_result_assign(res, fi);
887- tb->tb_default = order;
888+ first_fa->fa_last_dflt = order;
889 goto out;
890 }
891 fi = next_fi;
892 order++;
893 }
894 if (order <= 0 || fi == NULL) {
895- tb->tb_default = -1;
896+ if (fi && fi->fib_nhs > 1 &&
897+ fib_detect_death(fi, order, &last_resort, &last_idx,
898+ &last_dflt, &last_nhsel, flp) &&
899+ last_resort == fi) {
900+ read_lock_bh(&fib_nhflags_lock);
901+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
902+ read_unlock_bh(&fib_nhflags_lock);
903+ }
904+ if (first_fa) first_fa->fa_last_dflt = -1;
905 goto out;
906 }
907
908 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
909- tb->tb_default)) {
910+ &last_dflt, &last_nhsel, flp)) {
911 fib_result_assign(res, fi);
912- tb->tb_default = order;
913+ first_fa->fa_last_dflt = order;
914 goto out;
915 }
916- if (last_idx >= 0)
917+ if (last_idx >= 0) {
918 fib_result_assign(res, last_resort);
919- tb->tb_default = last_idx;
920+ read_lock_bh(&fib_nhflags_lock);
921+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
922+ read_unlock_bh(&fib_nhflags_lock);
923+ first_fa->fa_last_dflt = last_idx;
924+ }
925 out:
926 rcu_read_unlock();
927 }
6892158b
MT
928diff -urp v2.6.36/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
929--- v2.6.36/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2010-08-02 09:37:49.000000000 +0300
930+++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2010-10-23 15:04:36.412272841 +0300
58c5fc13
MT
931@@ -51,7 +51,7 @@ masquerade_tg(struct sk_buff *skb, const
932 enum ip_conntrack_info ctinfo;
933 struct nf_nat_range newrange;
934 const struct nf_nat_multi_range_compat *mr;
935- const struct rtable *rt;
936+ struct rtable *rt;
937 __be32 newsrc;
938
939 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
57199397 940@@ -69,13 +69,29 @@ masquerade_tg(struct sk_buff *skb, const
58c5fc13
MT
941 return NF_ACCEPT;
942
943 mr = par->targinfo;
944- rt = skb_rtable(skb);
945- newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
946- if (!newsrc) {
57199397 947- pr_info("%s ate my IP address\n", par->out->name);
58c5fc13
MT
948- return NF_DROP;
949+
950+ {
951+ struct flowi fl = { .nl_u = { .ip4_u =
952+ { .daddr = ip_hdr(skb)->daddr,
953+ .tos = (RT_TOS(ip_hdr(skb)->tos) |
954+ RTO_CONN),
955+ .gw = skb_rtable(skb)->rt_gateway,
956+ } },
957+ .mark = skb->mark,
958+ .oif = par->out->ifindex };
959+ if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) {
960+ /* Funky routing can do this. */
961+ if (net_ratelimit())
57199397
MT
962+ pr_info("%s:"
963+ " No route: Rusty's brain broke!\n",
964+ par->out->name);
58c5fc13
MT
965+ return NF_DROP;
966+ }
967 }
968
969+ newsrc = rt->rt_src;
970+ ip_rt_put(rt);
971+
972 nat->masq_index = par->out->ifindex;
973
974 /* Transfer from original range. */
6892158b
MT
975diff -urp v2.6.36/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
976--- v2.6.36/linux/net/ipv4/netfilter/nf_nat_core.c 2010-10-22 11:34:38.000000000 +0300
977+++ linux/net/ipv4/netfilter/nf_nat_core.c 2010-10-23 15:04:36.413274353 +0300
978@@ -706,6 +706,52 @@ static struct pernet_operations nf_nat_n
58c5fc13
MT
979 .exit = nf_nat_net_exit,
980 };
981
982+unsigned int
983+ip_nat_route_input(unsigned int hooknum,
984+ struct sk_buff *skb,
985+ const struct net_device *in,
986+ const struct net_device *out,
987+ int (*okfn)(struct sk_buff *))
988+{
989+ struct iphdr *iph;
990+ struct nf_conn *conn;
991+ enum ip_conntrack_info ctinfo;
992+ enum ip_conntrack_dir dir;
993+ unsigned long statusbit;
994+ __be32 saddr;
995+
996+ if (!(conn = nf_ct_get(skb, &ctinfo)))
997+ return NF_ACCEPT;
998+
999+ if (!(conn->status & IPS_NAT_DONE_MASK))
1000+ return NF_ACCEPT;
1001+ dir = CTINFO2DIR(ctinfo);
1002+ statusbit = IPS_SRC_NAT;
1003+ if (dir == IP_CT_DIR_REPLY)
1004+ statusbit ^= IPS_NAT_MASK;
1005+ if (!(conn->status & statusbit))
1006+ return NF_ACCEPT;
1007+
1008+ if (skb_dst(skb))
1009+ return NF_ACCEPT;
1010+
1011+ if (skb->len < sizeof(struct iphdr))
1012+ return NF_ACCEPT;
1013+
1014+ /* use daddr in other direction as masquerade address (lsrc) */
1015+ iph = ip_hdr(skb);
1016+ saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
1017+ if (saddr == iph->saddr)
1018+ return NF_ACCEPT;
1019+
1020+ if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
1021+ skb->dev, saddr))
1022+ return NF_DROP;
1023+
1024+ return NF_ACCEPT;
1025+}
1026+EXPORT_SYMBOL_GPL(ip_nat_route_input);
1027+
1028 static int __init nf_nat_init(void)
1029 {
1030 size_t i;
6892158b
MT
1031diff -urp v2.6.36/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
1032--- v2.6.36/linux/net/ipv4/netfilter/nf_nat_standalone.c 2010-10-22 11:34:38.000000000 +0300
1033+++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2010-10-23 15:04:36.414274319 +0300
1034@@ -249,6 +249,14 @@ static struct nf_hook_ops nf_nat_ops[] _
58c5fc13
MT
1035 .hooknum = NF_INET_PRE_ROUTING,
1036 .priority = NF_IP_PRI_NAT_DST,
1037 },
1038+ /* Before routing, route before mangling */
1039+ {
1040+ .hook = ip_nat_route_input,
1041+ .owner = THIS_MODULE,
ae4e228f 1042+ .pf = NFPROTO_IPV4,
58c5fc13
MT
1043+ .hooknum = NF_INET_PRE_ROUTING,
1044+ .priority = NF_IP_PRI_LAST-1,
1045+ },
1046 /* After packet filtering, change source */
1047 {
1048 .hook = nf_nat_out,
6892158b
MT
1049diff -urp v2.6.36/linux/net/ipv4/route.c linux/net/ipv4/route.c
1050--- v2.6.36/linux/net/ipv4/route.c 2010-10-22 11:34:38.000000000 +0300
1051+++ linux/net/ipv4/route.c 2010-10-23 15:08:07.188273891 +0300
1052@@ -693,6 +693,8 @@ static inline int compare_keys(struct fl
57199397
MT
1053 return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
1054 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
58c5fc13 1055 (fl1->mark ^ fl2->mark) |
57199397
MT
1056+ ((__force u32)fl1->nl_u.ip4_u.lsrc ^ (__force u32)fl2->nl_u.ip4_u.lsrc) |
1057+ ((__force u32)fl1->nl_u.ip4_u.gw ^ (__force u32)fl2->nl_u.ip4_u.gw) |
1058 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
58c5fc13 1059 (fl1->oif ^ fl2->oif) |
57199397 1060 (fl1->iif ^ fl2->iif)) == 0;
6892158b 1061@@ -1435,6 +1437,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
58c5fc13
MT
1062
1063 /* Gateway is different ... */
1064 rt->rt_gateway = new_gw;
1065+ if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
1066
1067 /* Redirect received -> path was valid */
6892158b 1068 dst_confirm(&rth->dst);
57199397 1069@@ -1886,6 +1889,7 @@ static int ip_route_input_mc(struct sk_b
58c5fc13
MT
1070 rth->fl.fl4_tos = tos;
1071 rth->fl.mark = skb->mark;
1072 rth->fl.fl4_src = saddr;
1073+ rth->fl.fl4_lsrc = 0;
1074 rth->rt_src = saddr;
1075 #ifdef CONFIG_NET_CLS_ROUTE
6892158b 1076 rth->dst.tclassid = itag;
57199397 1077@@ -1896,6 +1900,7 @@ static int ip_route_input_mc(struct sk_b
6892158b
MT
1078 dev_hold(rth->dst.dev);
1079 rth->idev = in_dev_get(rth->dst.dev);
58c5fc13
MT
1080 rth->fl.oif = 0;
1081+ rth->fl.fl4_gw = 0;
1082 rth->rt_gateway = daddr;
1083 rth->rt_spec_dst= spec_dst;
1084 rth->rt_genid = rt_genid(dev_net(dev));
6892158b 1085@@ -1959,7 +1964,7 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1086 struct fib_result *res,
1087 struct in_device *in_dev,
1088 __be32 daddr, __be32 saddr, u32 tos,
1089- struct rtable **result)
1090+ __be32 lsrc, struct rtable **result)
1091 {
58c5fc13 1092 struct rtable *rth;
6892158b
MT
1093 int err;
1094@@ -1991,6 +1996,7 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1095 flags |= RTCF_DIRECTSRC;
1096
1097 if (out_dev == in_dev && err &&
1098+ !lsrc &&
1099 (IN_DEV_SHARED_MEDIA(out_dev) ||
1100 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1101 flags |= RTCF_DOREDIRECT;
6892158b 1102@@ -2029,6 +2035,7 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1103 rth->fl.mark = skb->mark;
1104 rth->fl.fl4_src = saddr;
1105 rth->rt_src = saddr;
1106+ rth->fl.fl4_lsrc = lsrc;
1107 rth->rt_gateway = daddr;
1108 rth->rt_iif =
1109 rth->fl.iif = in_dev->dev->ifindex;
6892158b
MT
1110@@ -2036,6 +2043,7 @@ static int __mkroute_input(struct sk_buf
1111 dev_hold(rth->dst.dev);
1112 rth->idev = in_dev_get(rth->dst.dev);
58c5fc13
MT
1113 rth->fl.oif = 0;
1114+ rth->fl.fl4_gw = 0;
1115 rth->rt_spec_dst= spec_dst;
1116
6892158b
MT
1117 rth->dst.obsolete = -1;
1118@@ -2055,21 +2063,23 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1119
1120 static int ip_mkroute_input(struct sk_buff *skb,
1121 struct fib_result *res,
1122+ struct net *net,
1123 const struct flowi *fl,
1124 struct in_device *in_dev,
1125- __be32 daddr, __be32 saddr, u32 tos)
1126+ __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1127 {
1128 struct rtable* rth = NULL;
1129 int err;
1130 unsigned hash;
1131
1132+ fib_select_default(net, fl, res);
1133 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1134- if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1135+ if (res->fi && res->fi->fib_nhs > 1)
1136 fib_select_multipath(fl, res);
1137 #endif
1138
1139 /* create a routing cache entry */
1140- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1141+ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1142 if (err)
1143 return err;
1144
6892158b 1145@@ -2090,18 +2100,20 @@ static int ip_mkroute_input(struct sk_bu
58c5fc13
MT
1146 */
1147
1148 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1149- u8 tos, struct net_device *dev)
1150+ u8 tos, struct net_device *dev, __be32 lsrc)
1151 {
1152 struct fib_result res;
6892158b 1153 struct in_device *in_dev = __in_dev_get_rcu(dev);
58c5fc13
MT
1154 struct flowi fl = { .nl_u = { .ip4_u =
1155 { .daddr = daddr,
1156- .saddr = saddr,
1157+ .saddr = lsrc? : saddr,
1158 .tos = tos,
1159 .scope = RT_SCOPE_UNIVERSE,
1160 } },
1161 .mark = skb->mark,
1162- .iif = dev->ifindex };
1163+ .iif = lsrc?
6892158b
MT
1164+ dev_net(dev)->loopback_dev->ifindex :
1165+ dev->ifindex };
58c5fc13
MT
1166 unsigned flags = 0;
1167 u32 itag = 0;
1168 struct rtable * rth;
6892158b 1169@@ -2137,6 +2149,12 @@ static int ip_route_input_slow(struct sk
58c5fc13
MT
1170 ipv4_is_loopback(daddr))
1171 goto martian_destination;
1172
1173+ if (lsrc) {
1174+ if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1175+ ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1176+ goto e_inval;
1177+ }
1178+
1179 /*
1180 * Now we are ready to route packet.
1181 */
6892158b 1182@@ -2146,6 +2164,8 @@ static int ip_route_input_slow(struct sk
58c5fc13
MT
1183 goto no_route;
1184 }
1185 free_res = 1;
1186+ fl.iif = dev->ifindex;
1187+ fl.fl4_src = saddr;
1188
1189 RT_CACHE_STAT_INC(in_slow_tot);
1190
6892158b 1191@@ -2169,7 +2189,7 @@ static int ip_route_input_slow(struct sk
58c5fc13
MT
1192 if (res.type != RTN_UNICAST)
1193 goto martian_destination;
1194
1195- err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1196+ err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
1197 done:
58c5fc13 1198 if (free_res)
6892158b
MT
1199 fib_res_put(&res);
1200@@ -2178,6 +2198,8 @@ out: return err;
58c5fc13
MT
1201 brd_input:
1202 if (skb->protocol != htons(ETH_P_IP))
1203 goto e_inval;
1204+ if (lsrc)
1205+ goto e_inval;
1206
1207 if (ipv4_is_zeronet(saddr))
1208 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
6892158b
MT
1209@@ -2220,6 +2242,7 @@ local_input:
1210 rth->dst.dev = net->loopback_dev;
1211 dev_hold(rth->dst.dev);
1212 rth->idev = in_dev_get(rth->dst.dev);
58c5fc13
MT
1213+ rth->fl.fl4_gw = 0;
1214 rth->rt_gateway = daddr;
1215 rth->rt_spec_dst= spec_dst;
6892158b
MT
1216 rth->dst.input= ip_local_deliver;
1217@@ -2272,8 +2295,9 @@ martian_source_keep_err:
1218 goto done;
58c5fc13
MT
1219 }
1220
57199397
MT
1221-int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1222- u8 tos, struct net_device *dev, bool noref)
1223+int ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1224+ u8 tos, struct net_device *dev, bool noref,
1225+ __be32 lsrc)
58c5fc13
MT
1226 {
1227 struct rtable * rth;
1228 unsigned hash;
6892158b 1229@@ -2296,6 +2320,7 @@ int ip_route_input_common(struct sk_buff
57199397
MT
1230 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
1231 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
58c5fc13
MT
1232 (rth->fl.iif ^ iif) |
1233+ (rth->fl.fl4_lsrc ^ lsrc) |
1234 rth->fl.oif |
1235 (rth->fl.fl4_tos ^ tos)) == 0 &&
1236 rth->fl.mark == skb->mark &&
6892158b 1237@@ -2349,12 +2374,25 @@ skip_cache:
58c5fc13
MT
1238 rcu_read_unlock();
1239 return -EINVAL;
1240 }
6892158b
MT
1241- res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
1242+ res = ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1243 rcu_read_unlock();
1244 return res;
1245 }
58c5fc13 1246+
57199397
MT
1247+int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1248+ u8 tos, struct net_device *dev, bool noref)
58c5fc13 1249+{
57199397 1250+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, noref, 0);
6892158b 1251+}
57199397
MT
1252 EXPORT_SYMBOL(ip_route_input_common);
1253
58c5fc13
MT
1254+int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1255+ u8 tos, struct net_device *dev, __be32 lsrc)
1256+{
57199397
MT
1257+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, true, lsrc);
1258+}
6892158b 1259+EXPORT_SYMBOL(ip_route_input_lookup);
57199397 1260+
58c5fc13 1261 static int __mkroute_output(struct rtable **result,
57199397
MT
1262 struct fib_result *res,
1263 const struct flowi *fl,
6892158b 1264@@ -2424,6 +2462,7 @@ static int __mkroute_output(struct rtabl
58c5fc13
MT
1265 rth->fl.fl4_tos = tos;
1266 rth->fl.fl4_src = oldflp->fl4_src;
1267 rth->fl.oif = oldflp->oif;
1268+ rth->fl.fl4_gw = oldflp->fl4_gw;
1269 rth->fl.mark = oldflp->mark;
1270 rth->rt_dst = fl->fl4_dst;
1271 rth->rt_src = fl->fl4_src;
6892158b 1272@@ -2506,6 +2545,7 @@ static int ip_route_output_slow(struct n
58c5fc13
MT
1273 struct flowi fl = { .nl_u = { .ip4_u =
1274 { .daddr = oldflp->fl4_dst,
1275 .saddr = oldflp->fl4_src,
1276+ .gw = oldflp->fl4_gw,
1277 .tos = tos & IPTOS_RT_MASK,
1278 .scope = ((tos & RTO_ONLINK) ?
1279 RT_SCOPE_LINK :
6892158b 1280@@ -2617,6 +2657,7 @@ static int ip_route_output_slow(struct n
58c5fc13
MT
1281 dev_out = net->loopback_dev;
1282 dev_hold(dev_out);
1283 fl.oif = net->loopback_dev->ifindex;
1284+ fl.fl4_gw = 0;
1285 res.type = RTN_LOCAL;
1286 flags |= RTCF_LOCAL;
1287 goto make_route;
6892158b 1288@@ -2624,7 +2665,7 @@ static int ip_route_output_slow(struct n
58c5fc13
MT
1289
1290 if (fib_lookup(net, &fl, &res)) {
1291 res.fi = NULL;
1292- if (oldflp->oif) {
1293+ if (oldflp->oif && dev_out->flags & IFF_UP) {
1294 /* Apparently, routing tables are wrong. Assume,
1295 that the destination is on link.
1296
6892158b 1297@@ -2664,6 +2705,7 @@ static int ip_route_output_slow(struct n
58c5fc13
MT
1298 dev_out = net->loopback_dev;
1299 dev_hold(dev_out);
1300 fl.oif = dev_out->ifindex;
1301+ fl.fl4_gw = 0;
1302 if (res.fi)
1303 fib_info_put(res.fi);
1304 res.fi = NULL;
6892158b 1305@@ -2671,13 +2713,12 @@ static int ip_route_output_slow(struct n
58c5fc13
MT
1306 goto make_route;
1307 }
1308
1309+ if (res.type == RTN_UNICAST)
1310+ fib_select_default(net, &fl, &res);
1311 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1312- if (res.fi->fib_nhs > 1 && fl.oif == 0)
1313+ if (res.fi->fib_nhs > 1)
1314 fib_select_multipath(&fl, &res);
1315- else
1316 #endif
1317- if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1318- fib_select_default(net, &fl, &res);
1319
1320 if (!fl.fl4_src)
1321 fl.fl4_src = FIB_RES_PREFSRC(res);
6892158b 1322@@ -2718,6 +2759,7 @@ int __ip_route_output_key(struct net *ne
58c5fc13
MT
1323 rth->fl.fl4_src == flp->fl4_src &&
1324 rth->fl.iif == 0 &&
1325 rth->fl.oif == flp->oif &&
1326+ rth->fl.fl4_gw == flp->fl4_gw &&
1327 rth->fl.mark == flp->mark &&
1328 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1329 (IPTOS_RT_MASK | RTO_ONLINK)) &&