1 diff -urp v2.6.36/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2 --- v2.6.36/linux/include/linux/rtnetlink.h 2010-10-22 11:34:37.000000000 +0300
3 +++ linux/include/linux/rtnetlink.h 2010-10-23 15:03:19.704274198 +0300
4 @@ -312,6 +312,8 @@ struct rtnexthop {
5 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
6 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
7 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
8 +#define RTNH_F_SUSPECT 8 /* We don't know the real state */
9 +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
11 /* Macros to handle hexthops */
13 diff -urp v2.6.36/linux/include/net/flow.h linux/include/net/flow.h
14 --- v2.6.36/linux/include/net/flow.h 2010-08-02 09:37:48.000000000 +0300
15 +++ linux/include/net/flow.h 2010-10-23 15:04:36.408274819 +0300
16 @@ -19,6 +19,8 @@ struct flowi {
25 @@ -43,6 +45,8 @@ struct flowi {
26 #define fl6_flowlabel nl_u.ip6_u.flowlabel
27 #define fl4_dst nl_u.ip4_u.daddr
28 #define fl4_src nl_u.ip4_u.saddr
29 +#define fl4_lsrc nl_u.ip4_u.lsrc
30 +#define fl4_gw nl_u.ip4_u.gw
31 #define fl4_tos nl_u.ip4_u.tos
32 #define fl4_scope nl_u.ip4_u.scope
34 diff -urp v2.6.36/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35 --- v2.6.36/linux/include/net/ip_fib.h 2010-02-25 09:01:36.000000000 +0200
36 +++ linux/include/net/ip_fib.h 2010-10-23 15:03:19.704274198 +0300
37 @@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str
38 extern struct fib_table *fib_new_table(struct net *net, u32 id);
39 extern struct fib_table *fib_get_table(struct net *net, u32 id);
41 +extern int fib_result_table(struct fib_result *res);
43 #endif /* CONFIG_IP_MULTIPLE_TABLES */
45 /* Exported by fib_frontend.c */
46 @@ -277,4 +279,6 @@ static inline void fib_proc_exit(struct
50 +extern rwlock_t fib_nhflags_lock;
52 #endif /* _NET_FIB_H */
53 diff -urp v2.6.36/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54 --- v2.6.36/linux/include/net/netfilter/nf_nat.h 2010-02-25 09:01:36.000000000 +0200
55 +++ linux/include/net/netfilter/nf_nat.h 2010-10-23 15:04:36.408274819 +0300
56 @@ -73,6 +73,13 @@ struct nf_conn_nat {
60 +/* Call input routing for SNAT-ed traffic */
61 +extern unsigned int ip_nat_route_input(unsigned int hooknum,
62 + struct sk_buff *skb,
63 + const struct net_device *in,
64 + const struct net_device *out,
65 + int (*okfn)(struct sk_buff *));
67 /* Set up the info structure to map into this range. */
68 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69 const struct nf_nat_range *range,
70 diff -urp v2.6.36/linux/include/net/route.h linux/include/net/route.h
71 --- v2.6.36/linux/include/net/route.h 2010-10-22 11:34:37.000000000 +0300
72 +++ linux/include/net/route.h 2010-10-23 15:04:36.409274028 +0300
73 @@ -126,6 +126,7 @@ static inline int ip_route_input_noref(s
74 return ip_route_input_common(skb, dst, src, tos, devin, true);
77 +extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
78 extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
79 extern void ip_rt_send_redirect(struct sk_buff *skb);
81 diff -urp v2.6.36/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82 --- v2.6.36/linux/net/bridge/br_netfilter.c 2010-10-22 11:34:37.000000000 +0300
83 +++ linux/net/bridge/br_netfilter.c 2010-10-23 15:04:36.410274544 +0300
84 @@ -337,6 +337,9 @@ static int br_nf_pre_routing_finish(stru
88 + /* Old skb->dst is not expected, it is lost in all cases */
91 if (nf_bridge->mask & BRNF_PKT_TYPE) {
92 skb->pkt_type = PACKET_OTHERHOST;
93 nf_bridge->mask ^= BRNF_PKT_TYPE;
94 diff -urp v2.6.36/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
95 --- v2.6.36/linux/net/ipv4/fib_frontend.c 2010-10-22 11:34:38.000000000 +0300
96 +++ linux/net/ipv4/fib_frontend.c 2010-10-23 15:03:19.706274107 +0300
99 #ifndef CONFIG_IP_MULTIPLE_TABLES
101 +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
103 static int __net_init fib4_rules_init(struct net *net)
105 struct fib_table *local_table, *main_table;
106 @@ -71,6 +73,8 @@ fail:
110 +#define FIB_RES_TABLE(r) (fib_result_table(r))
112 struct fib_table *fib_new_table(struct net *net, u32 id)
114 struct fib_table *tb;
115 @@ -125,7 +129,8 @@ void fib_select_default(struct net *net,
116 table = res->r->table;
118 tb = fib_get_table(net, table);
119 - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
120 + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
121 + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
122 fib_table_select_default(tb, flp, res);
125 @@ -245,6 +250,9 @@ int fib_validate_source(__be32 src, __be
128 struct fib_result res;
130 + unsigned char prefixlen;
131 + unsigned char scope;
132 int no_addr, rpf, accept_local;
135 @@ -294,21 +302,29 @@ int fib_validate_source(__be32 src, __be
139 + table = FIB_RES_TABLE(&res);
140 + prefixlen = res.prefixlen;
147 fl.oif = dev->ifindex;
150 if (fib_lookup(net, &fl, &res) == 0) {
151 - if (res.type == RTN_UNICAST) {
152 + if (res.type == RTN_UNICAST &&
153 + ((table == FIB_RES_TABLE(&res) &&
154 + res.prefixlen >= prefixlen && res.scope >= scope) ||
156 *spec_dst = FIB_RES_PREFSRC(res);
157 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
168 @@ -933,9 +949,7 @@ static int fib_inetaddr_event(struct not
172 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
175 rt_cache_flush(dev_net(dev), -1);
178 @@ -971,9 +985,7 @@ static int fib_netdev_event(struct notif
181 } endfor_ifa(in_dev);
182 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
185 rt_cache_flush(dev_net(dev), -1);
188 diff -urp v2.6.36/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
189 --- v2.6.36/linux/net/ipv4/fib_hash.c 2010-05-17 10:49:01.000000000 +0300
190 +++ linux/net/ipv4/fib_hash.c 2010-10-23 15:03:19.707274360 +0300
191 @@ -278,25 +278,35 @@ out:
192 void fib_table_select_default(struct fib_table *tb,
193 const struct flowi *flp, struct fib_result *res)
195 - int order, last_idx;
196 + int order, last_idx, last_dflt, last_nhsel;
197 + struct fib_alias *first_fa = NULL;
198 + struct hlist_head *head;
199 struct hlist_node *node;
201 struct fib_info *fi = NULL;
202 struct fib_info *last_resort;
203 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
204 - struct fn_zone *fz = t->fn_zones[0];
205 + struct fn_zone *fz = t->fn_zones[res->prefixlen];
211 + k = fz_key(flp->fl4_dst, fz);
218 read_lock(&fib_hash_lock);
219 - hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
220 + head = &fz->fz_hash[fn_hash(k, fz)];
221 + hlist_for_each_entry(f, node, head, fn_hash) {
222 struct fib_alias *fa;
224 + if (f->fn_key != k)
227 list_for_each_entry(fa, &f->fn_alias, fa_list) {
228 struct fib_info *next_fi = fa->fa_info;
230 @@ -304,42 +314,56 @@ void fib_table_select_default(struct fib
231 fa->fa_type != RTN_UNICAST)
235 + fa->fa_tos != flp->fl4_tos)
237 if (next_fi->fib_priority > res->fi->fib_priority)
239 - if (!next_fi->fib_nh[0].nh_gw ||
240 - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
242 fa->fa_state |= FA_S_ACCESSED;
245 - if (next_fi != res->fi)
247 - } else if (!fib_detect_death(fi, order, &last_resort,
248 - &last_idx, tb->tb_default)) {
250 + last_dflt = fa->fa_last_dflt;
253 + if (fi && !fib_detect_death(fi, order, &last_resort,
254 + &last_idx, &last_dflt, &last_nhsel, flp)) {
255 fib_result_assign(res, fi);
256 - tb->tb_default = order;
257 + first_fa->fa_last_dflt = order;
266 if (order <= 0 || fi == NULL) {
267 - tb->tb_default = -1;
268 + if (fi && fi->fib_nhs > 1 &&
269 + fib_detect_death(fi, order, &last_resort, &last_idx,
270 + &last_dflt, &last_nhsel, flp) &&
271 + last_resort == fi) {
272 + read_lock_bh(&fib_nhflags_lock);
273 + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
274 + read_unlock_bh(&fib_nhflags_lock);
276 + if (first_fa) first_fa->fa_last_dflt = -1;
280 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
282 + &last_dflt, &last_nhsel, flp)) {
283 fib_result_assign(res, fi);
284 - tb->tb_default = order;
285 + first_fa->fa_last_dflt = order;
290 + if (last_idx >= 0) {
291 fib_result_assign(res, last_resort);
292 - tb->tb_default = last_idx;
293 + read_lock_bh(&fib_nhflags_lock);
294 + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
295 + read_unlock_bh(&fib_nhflags_lock);
296 + first_fa->fa_last_dflt = last_idx;
299 read_unlock(&fib_hash_lock);
301 @@ -463,6 +487,7 @@ int fib_table_insert(struct fib_table *t
302 write_lock_bh(&fib_hash_lock);
303 fi_drop = fa->fa_info;
305 + fa->fa_last_dflt = -1;
306 fa->fa_type = cfg->fc_type;
307 fa->fa_scope = cfg->fc_scope;
308 state = fa->fa_state;
309 @@ -517,6 +542,7 @@ int fib_table_insert(struct fib_table *t
310 new_fa->fa_type = cfg->fc_type;
311 new_fa->fa_scope = cfg->fc_scope;
312 new_fa->fa_state = 0;
313 + new_fa->fa_last_dflt = -1;
316 * Insert new entry to the list.
317 diff -urp v2.6.36/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
318 --- v2.6.36/linux/net/ipv4/fib_lookup.h 2009-09-11 10:27:17.000000000 +0300
319 +++ linux/net/ipv4/fib_lookup.h 2010-10-23 15:03:19.707274360 +0300
322 struct list_head fa_list;
323 struct fib_info *fa_info;
328 @@ -37,7 +38,8 @@ extern struct fib_alias *fib_find_alias(
330 extern int fib_detect_death(struct fib_info *fi, int order,
331 struct fib_info **last_resort,
332 - int *last_idx, int dflt);
333 + int *last_idx, int *dflt, int *last_nhsel,
334 + const struct flowi *flp);
336 static inline void fib_result_assign(struct fib_result *res,
338 diff -urp v2.6.36/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
339 --- v2.6.36/linux/net/ipv4/fib_rules.c 2010-08-02 09:37:49.000000000 +0300
340 +++ linux/net/ipv4/fib_rules.c 2010-10-23 15:03:19.708274528 +0300
341 @@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result *
345 +int fib_result_table(struct fib_result *res)
347 + return res->r->table;
350 int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
352 struct fib_lookup_arg arg = {
353 diff -urp v2.6.36/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
354 --- v2.6.36/linux/net/ipv4/fib_semantics.c 2010-05-17 10:49:01.000000000 +0300
355 +++ linux/net/ipv4/fib_semantics.c 2010-10-23 15:04:36.412272841 +0300
356 @@ -51,6 +51,7 @@ static struct hlist_head *fib_info_hash;
357 static struct hlist_head *fib_info_laddrhash;
358 static unsigned int fib_hash_size;
359 static unsigned int fib_info_cnt;
360 +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
362 #define DEVINDEX_HASHBITS 8
363 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
364 @@ -187,7 +188,7 @@ static __inline__ int nh_comp(const stru
365 #ifdef CONFIG_NET_CLS_ROUTE
366 nh->nh_tclassid != onh->nh_tclassid ||
368 - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
369 + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
372 } endfor_nexthops(fi);
373 @@ -238,7 +239,7 @@ static struct fib_info *fib_find_info(co
374 nfi->fib_priority == fi->fib_priority &&
375 memcmp(nfi->fib_metrics, fi->fib_metrics,
376 sizeof(fi->fib_metrics)) == 0 &&
377 - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
378 + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
379 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
382 @@ -350,26 +351,70 @@ struct fib_alias *fib_find_alias(struct
385 int fib_detect_death(struct fib_info *fi, int order,
386 - struct fib_info **last_resort, int *last_idx, int dflt)
387 + struct fib_info **last_resort, int *last_idx, int *dflt,
388 + int *last_nhsel, const struct flowi *flp)
391 - int state = NUD_NONE;
394 + struct fib_nh * nh;
396 + int flag, dead = 1;
398 + /* change_nexthops(fi) { */
399 + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
400 + if (flp->oif && flp->oif != nh->nh_oif)
402 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
403 + nh->nh_scope == RT_SCOPE_LINK)
405 + if (nh->nh_flags & RTNH_F_DEAD)
408 - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
410 - state = n->nud_state;
413 - if (state == NUD_REACHABLE)
415 - if ((state&NUD_VALID) && order != dflt)
417 - if ((state&NUD_VALID) ||
418 - (*last_idx<0 && order > dflt)) {
422 + if (nh->nh_dev->flags & IFF_NOARP) {
428 + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
429 + dst = flp->fl4_dst;
432 + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
434 + state = n->nud_state;
437 + if (state==NUD_REACHABLE ||
438 + ((state&NUD_VALID) && order != *dflt)) {
442 + if (!(state&NUD_VALID))
446 + if ((state&NUD_VALID) ||
447 + (*last_idx<0 && order >= *dflt)) {
450 + *last_nhsel = nhsel;
455 + read_lock_bh(&fib_nhflags_lock);
457 + nh->nh_flags |= RTNH_F_SUSPECT;
459 + nh->nh_flags &= ~RTNH_F_SUSPECT;
460 + read_unlock_bh(&fib_nhflags_lock);
463 + /* } endfor_nexthops(fi) */
468 #ifdef CONFIG_IP_ROUTE_MULTIPATH
469 @@ -538,8 +583,11 @@ static int fib_check_nh(struct fib_confi
471 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
473 - if (!(dev->flags&IFF_UP))
475 + if (!(dev->flags&IFF_UP)) {
476 + if (fi->fib_protocol != RTPROT_STATIC)
478 + nh->nh_flags |= RTNH_F_DEAD;
482 nh->nh_scope = RT_SCOPE_LINK;
483 @@ -559,24 +607,48 @@ static int fib_check_nh(struct fib_confi
484 /* It is not necessary, but requires a bit of thinking */
485 if (fl.fl4_scope < RT_SCOPE_LINK)
486 fl.fl4_scope = RT_SCOPE_LINK;
487 - if ((err = fib_lookup(net, &fl, &res)) != 0)
489 + err = fib_lookup(net, &fl, &res);
492 - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
494 - nh->nh_scope = res.scope;
495 - nh->nh_oif = FIB_RES_OIF(res);
496 - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
498 - dev_hold(nh->nh_dev);
500 - if (!(nh->nh_dev->flags & IFF_UP))
504 + struct in_device *in_dev;
506 + if (err != -ENETUNREACH ||
507 + fi->fib_protocol != RTPROT_STATIC)
510 + in_dev = inetdev_by_index(net, nh->nh_oif);
511 + if (in_dev == NULL ||
512 + in_dev->dev->flags & IFF_UP) {
514 + in_dev_put(in_dev);
517 + nh->nh_flags |= RTNH_F_DEAD;
518 + nh->nh_scope = RT_SCOPE_LINK;
519 + nh->nh_dev = in_dev->dev;
520 + dev_hold(nh->nh_dev);
521 + in_dev_put(in_dev);
524 + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
526 + nh->nh_scope = res.scope;
527 + nh->nh_oif = FIB_RES_OIF(res);
528 + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
530 + dev_hold(nh->nh_dev);
531 + if (!(nh->nh_dev->flags & IFF_UP)) {
532 + if (fi->fib_protocol != RTPROT_STATIC) {
536 + nh->nh_flags |= RTNH_F_DEAD;
546 struct in_device *in_dev;
548 @@ -587,8 +659,11 @@ out:
551 if (!(in_dev->dev->flags&IFF_UP)) {
552 - in_dev_put(in_dev);
554 + if (fi->fib_protocol != RTPROT_STATIC) {
555 + in_dev_put(in_dev);
558 + nh->nh_flags |= RTNH_F_DEAD;
560 nh->nh_dev = in_dev->dev;
561 dev_hold(nh->nh_dev);
562 @@ -897,8 +972,12 @@ int fib_semantic_match(struct list_head
564 if (nh->nh_flags&RTNH_F_DEAD)
566 - if (!flp->oif || flp->oif == nh->nh_oif)
568 + if (flp->oif && flp->oif != nh->nh_oif)
570 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
571 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
575 #ifdef CONFIG_IP_ROUTE_MULTIPATH
576 if (nhsel < fi->fib_nhs) {
577 @@ -1078,18 +1157,29 @@ int fib_sync_down_dev(struct net_device
580 change_nexthops(fi) {
581 - if (nexthop_nh->nh_flags&RTNH_F_DEAD)
583 - else if (nexthop_nh->nh_dev == dev &&
584 - nexthop_nh->nh_scope != scope) {
585 - nexthop_nh->nh_flags |= RTNH_F_DEAD;
586 + if (nexthop_nh->nh_flags&RTNH_F_DEAD) {
587 + if (fi->fib_protocol!=RTPROT_STATIC ||
588 + nexthop_nh->nh_dev == NULL ||
589 + __in_dev_get_rtnl(nexthop_nh->nh_dev) == NULL ||
590 + nexthop_nh->nh_dev->flags&IFF_UP)
592 + } else if (nexthop_nh->nh_dev == dev &&
593 + nexthop_nh->nh_scope != scope) {
594 + write_lock_bh(&fib_nhflags_lock);
595 #ifdef CONFIG_IP_ROUTE_MULTIPATH
596 - spin_lock_bh(&fib_multipath_lock);
597 + spin_lock(&fib_multipath_lock);
598 + nexthop_nh->nh_flags |= RTNH_F_DEAD;
599 fi->fib_power -= nexthop_nh->nh_power;
600 nexthop_nh->nh_power = 0;
601 - spin_unlock_bh(&fib_multipath_lock);
602 + spin_unlock(&fib_multipath_lock);
604 + nexthop_nh->nh_flags |= RTNH_F_DEAD;
607 + write_unlock_bh(&fib_nhflags_lock);
608 + if (fi->fib_protocol!=RTPROT_STATIC ||
610 + __in_dev_get_rtnl(dev) == NULL)
613 #ifdef CONFIG_IP_ROUTE_MULTIPATH
614 if (force > 1 && nexthop_nh->nh_dev == dev) {
615 @@ -1107,11 +1197,8 @@ int fib_sync_down_dev(struct net_device
619 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
622 - Dead device goes up. We wake up dead nexthops.
623 - It takes sense only on multipath routes.
624 + Dead device goes up or new address is added. We wake up dead nexthops.
627 int fib_sync_up(struct net_device *dev)
628 @@ -1121,8 +1208,10 @@ int fib_sync_up(struct net_device *dev)
629 struct hlist_head *head;
630 struct hlist_node *node;
633 + struct fib_result res;
637 if (!(dev->flags&IFF_UP))
640 @@ -1130,6 +1219,7 @@ int fib_sync_up(struct net_device *dev)
641 hash = fib_devindex_hashfn(dev->ifindex);
642 head = &fib_info_devhash[hash];
646 hlist_for_each_entry(nh, node, head, nh_hash) {
647 struct fib_info *fi = nh->nh_parent;
648 @@ -1142,21 +1232,41 @@ int fib_sync_up(struct net_device *dev)
651 change_nexthops(fi) {
652 - if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
654 + if (!(nexthop_nh->nh_flags&RTNH_F_DEAD))
657 if (nexthop_nh->nh_dev == NULL ||
658 !(nexthop_nh->nh_dev->flags&IFF_UP))
660 if (nexthop_nh->nh_dev != dev ||
661 !__in_dev_get_rtnl(dev))
663 + if (nexthop_nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
664 + struct flowi fl = {
666 + { .daddr = nexthop_nh->nh_gw,
667 + .scope = nexthop_nh->nh_scope } },
668 + .oif = nexthop_nh->nh_oif,
670 + if (fib_lookup(dev_net(dev), &fl, &res) != 0)
672 + if (res.type != RTN_UNICAST &&
673 + res.type != RTN_LOCAL) {
677 + nexthop_nh->nh_scope = res.scope;
682 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
683 spin_lock_bh(&fib_multipath_lock);
684 nexthop_nh->nh_power = 0;
686 nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
687 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
688 spin_unlock_bh(&fib_multipath_lock);
690 } endfor_nexthops(fi)
693 @@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev)
703 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
706 The algorithm is suboptimal, but it provides really
707 fair weighted route distribution.
708 @@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev)
709 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
711 struct fib_info *fi = res->fi;
715 spin_lock_bh(&fib_multipath_lock);
719 + change_nexthops(fi) {
720 + if (flp->oif != nexthop_nh->nh_oif)
722 + if (flp->fl4_gw && flp->fl4_gw != nexthop_nh->nh_gw &&
723 + nexthop_nh->nh_gw && nexthop_nh->nh_scope == RT_SCOPE_LINK)
725 + if (!(nexthop_nh->nh_flags&RTNH_F_BADSTATE)) {
726 + if (nexthop_nh->nh_power > w) {
727 + w = nexthop_nh->nh_power;
731 + } endfor_nexthops(fi);
733 + spin_unlock_bh(&fib_multipath_lock);
741 if (fi->fib_power <= 0) {
743 change_nexthops(fi) {
744 - if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
745 + if (!(nexthop_nh->nh_flags&RTNH_F_BADSTATE)) {
746 power += nexthop_nh->nh_weight;
747 nexthop_nh->nh_power = nexthop_nh->nh_weight;
749 } endfor_nexthops(fi);
750 fi->fib_power = power;
752 - spin_unlock_bh(&fib_multipath_lock);
753 - /* Race condition: route has just become dead. */
762 @@ -1203,21 +1338,41 @@ void fib_select_multipath(const struct f
764 w = jiffies % fi->fib_power;
767 change_nexthops(fi) {
768 - if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
769 + if (!(nexthop_nh->nh_flags&RTNH_F_BADSTATE) &&
770 nexthop_nh->nh_power) {
771 if ((w -= nexthop_nh->nh_power) <= 0) {
772 nexthop_nh->nh_power--;
774 - res->nh_sel = nhsel;
775 spin_unlock_bh(&fib_multipath_lock);
776 + res->nh_sel = nhsel;
781 + } endfor_nexthops(fi);
790 + if (!(nh->nh_flags&RTNH_F_DEAD)) {
791 + if (flp->oif && flp->oif != nh->nh_oif)
793 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
794 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
796 + spin_unlock_bh(&fib_multipath_lock);
797 + res->nh_sel = nhsel;
800 } endfor_nexthops(fi);
802 /* Race condition: route has just become dead. */
804 spin_unlock_bh(&fib_multipath_lock);
807 diff -urp v2.6.36/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c
808 --- v2.6.36/linux/net/ipv4/fib_trie.c 2010-10-22 11:34:38.000000000 +0300
809 +++ linux/net/ipv4/fib_trie.c 2010-10-23 15:03:19.712272951 +0300
810 @@ -1277,6 +1277,7 @@ int fib_table_insert(struct fib_table *t
811 fi_drop = fa->fa_info;
812 new_fa->fa_tos = fa->fa_tos;
813 new_fa->fa_info = fi;
814 + new_fa->fa_last_dflt = -1;
815 new_fa->fa_type = cfg->fc_type;
816 new_fa->fa_scope = cfg->fc_scope;
817 state = fa->fa_state;
818 @@ -1317,6 +1318,7 @@ int fib_table_insert(struct fib_table *t
819 new_fa->fa_type = cfg->fc_type;
820 new_fa->fa_scope = cfg->fc_scope;
821 new_fa->fa_state = 0;
822 + new_fa->fa_last_dflt = -1;
824 * Insert new entry to the list.
826 @@ -1819,24 +1821,31 @@ void fib_table_select_default(struct fib
827 struct fib_result *res)
829 struct trie *t = (struct trie *) tb->tb_data;
830 - int order, last_idx;
831 + int order, last_idx, last_dflt, last_nhsel;
832 + struct fib_alias *first_fa = NULL;
833 struct fib_info *fi = NULL;
834 struct fib_info *last_resort;
835 struct fib_alias *fa = NULL;
836 struct list_head *fa_head;
846 + mask = inet_make_mask(res->prefixlen);
847 + key = ntohl(flp->fl4_dst & mask);
851 - l = fib_find_node(t, 0);
852 + l = fib_find_node(t, key);
856 - fa_head = get_fa_head(l, 0);
857 + fa_head = get_fa_head(l, res->prefixlen);
861 @@ -1850,39 +1859,52 @@ void fib_table_select_default(struct fib
862 fa->fa_type != RTN_UNICAST)
866 + fa->fa_tos != flp->fl4_tos)
868 if (next_fi->fib_priority > res->fi->fib_priority)
870 - if (!next_fi->fib_nh[0].nh_gw ||
871 - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
873 fa->fa_state |= FA_S_ACCESSED;
876 - if (next_fi != res->fi)
878 - } else if (!fib_detect_death(fi, order, &last_resort,
879 - &last_idx, tb->tb_default)) {
881 + last_dflt = fa->fa_last_dflt;
884 + if (fi && !fib_detect_death(fi, order, &last_resort,
885 + &last_idx, &last_dflt, &last_nhsel, flp)) {
886 fib_result_assign(res, fi);
887 - tb->tb_default = order;
888 + first_fa->fa_last_dflt = order;
894 if (order <= 0 || fi == NULL) {
895 - tb->tb_default = -1;
896 + if (fi && fi->fib_nhs > 1 &&
897 + fib_detect_death(fi, order, &last_resort, &last_idx,
898 + &last_dflt, &last_nhsel, flp) &&
899 + last_resort == fi) {
900 + read_lock_bh(&fib_nhflags_lock);
901 + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
902 + read_unlock_bh(&fib_nhflags_lock);
904 + if (first_fa) first_fa->fa_last_dflt = -1;
908 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
910 + &last_dflt, &last_nhsel, flp)) {
911 fib_result_assign(res, fi);
912 - tb->tb_default = order;
913 + first_fa->fa_last_dflt = order;
917 + if (last_idx >= 0) {
918 fib_result_assign(res, last_resort);
919 - tb->tb_default = last_idx;
920 + read_lock_bh(&fib_nhflags_lock);
921 + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
922 + read_unlock_bh(&fib_nhflags_lock);
923 + first_fa->fa_last_dflt = last_idx;
928 diff -urp v2.6.36/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
929 --- v2.6.36/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2010-08-02 09:37:49.000000000 +0300
930 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2010-10-23 15:04:36.412272841 +0300
931 @@ -51,7 +51,7 @@ masquerade_tg(struct sk_buff *skb, const
932 enum ip_conntrack_info ctinfo;
933 struct nf_nat_range newrange;
934 const struct nf_nat_multi_range_compat *mr;
935 - const struct rtable *rt;
939 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
940 @@ -69,13 +69,29 @@ masquerade_tg(struct sk_buff *skb, const
944 - rt = skb_rtable(skb);
945 - newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
947 - pr_info("%s ate my IP address\n", par->out->name);
951 + struct flowi fl = { .nl_u = { .ip4_u =
952 + { .daddr = ip_hdr(skb)->daddr,
953 + .tos = (RT_TOS(ip_hdr(skb)->tos) |
955 + .gw = skb_rtable(skb)->rt_gateway,
958 + .oif = par->out->ifindex };
959 + if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) {
960 + /* Funky routing can do this. */
961 + if (net_ratelimit())
963 + " No route: Rusty's brain broke!\n",
969 + newsrc = rt->rt_src;
972 nat->masq_index = par->out->ifindex;
974 /* Transfer from original range. */
975 diff -urp v2.6.36/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
976 --- v2.6.36/linux/net/ipv4/netfilter/nf_nat_core.c 2010-10-22 11:34:38.000000000 +0300
977 +++ linux/net/ipv4/netfilter/nf_nat_core.c 2010-10-23 15:04:36.413274353 +0300
978 @@ -706,6 +706,52 @@ static struct pernet_operations nf_nat_n
979 .exit = nf_nat_net_exit,
983 +ip_nat_route_input(unsigned int hooknum,
984 + struct sk_buff *skb,
985 + const struct net_device *in,
986 + const struct net_device *out,
987 + int (*okfn)(struct sk_buff *))
990 + struct nf_conn *conn;
991 + enum ip_conntrack_info ctinfo;
992 + enum ip_conntrack_dir dir;
993 + unsigned long statusbit;
996 + if (!(conn = nf_ct_get(skb, &ctinfo)))
999 + if (!(conn->status & IPS_NAT_DONE_MASK))
1001 + dir = CTINFO2DIR(ctinfo);
1002 + statusbit = IPS_SRC_NAT;
1003 + if (dir == IP_CT_DIR_REPLY)
1004 + statusbit ^= IPS_NAT_MASK;
1005 + if (!(conn->status & statusbit))
1011 + if (skb->len < sizeof(struct iphdr))
1014 + /* use daddr in other direction as masquerade address (lsrc) */
1015 + iph = ip_hdr(skb);
1016 + saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
1017 + if (saddr == iph->saddr)
1020 + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
1026 +EXPORT_SYMBOL_GPL(ip_nat_route_input);
1028 static int __init nf_nat_init(void)
1031 diff -urp v2.6.36/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
1032 --- v2.6.36/linux/net/ipv4/netfilter/nf_nat_standalone.c 2010-10-22 11:34:38.000000000 +0300
1033 +++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2010-10-23 15:04:36.414274319 +0300
1034 @@ -249,6 +249,14 @@ static struct nf_hook_ops nf_nat_ops[] _
1035 .hooknum = NF_INET_PRE_ROUTING,
1036 .priority = NF_IP_PRI_NAT_DST,
1038 + /* Before routing, route before mangling */
1040 + .hook = ip_nat_route_input,
1041 + .owner = THIS_MODULE,
1042 + .pf = NFPROTO_IPV4,
1043 + .hooknum = NF_INET_PRE_ROUTING,
1044 + .priority = NF_IP_PRI_LAST-1,
1046 /* After packet filtering, change source */
1049 diff -urp v2.6.36/linux/net/ipv4/route.c linux/net/ipv4/route.c
1050 --- v2.6.36/linux/net/ipv4/route.c 2010-10-22 11:34:38.000000000 +0300
1051 +++ linux/net/ipv4/route.c 2010-10-23 15:08:07.188273891 +0300
1052 @@ -693,6 +693,8 @@ static inline int compare_keys(struct fl
1053 return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
1054 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
1055 (fl1->mark ^ fl2->mark) |
1056 + ((__force u32)fl1->nl_u.ip4_u.lsrc ^ (__force u32)fl2->nl_u.ip4_u.lsrc) |
1057 + ((__force u32)fl1->nl_u.ip4_u.gw ^ (__force u32)fl2->nl_u.ip4_u.gw) |
1058 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
1059 (fl1->oif ^ fl2->oif) |
1060 (fl1->iif ^ fl2->iif)) == 0;
1061 @@ -1435,6 +1437,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
1063 /* Gateway is different ... */
1064 rt->rt_gateway = new_gw;
1065 + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
1067 /* Redirect received -> path was valid */
1068 dst_confirm(&rth->dst);
1069 @@ -1886,6 +1889,7 @@ static int ip_route_input_mc(struct sk_b
1070 rth->fl.fl4_tos = tos;
1071 rth->fl.mark = skb->mark;
1072 rth->fl.fl4_src = saddr;
1073 + rth->fl.fl4_lsrc = 0;
1074 rth->rt_src = saddr;
1075 #ifdef CONFIG_NET_CLS_ROUTE
1076 rth->dst.tclassid = itag;
1077 @@ -1896,6 +1900,7 @@ static int ip_route_input_mc(struct sk_b
1078 dev_hold(rth->dst.dev);
1079 rth->idev = in_dev_get(rth->dst.dev);
1081 + rth->fl.fl4_gw = 0;
1082 rth->rt_gateway = daddr;
1083 rth->rt_spec_dst= spec_dst;
1084 rth->rt_genid = rt_genid(dev_net(dev));
1085 @@ -1959,7 +1964,7 @@ static int __mkroute_input(struct sk_buf
1086 struct fib_result *res,
1087 struct in_device *in_dev,
1088 __be32 daddr, __be32 saddr, u32 tos,
1089 - struct rtable **result)
1090 + __be32 lsrc, struct rtable **result)
1094 @@ -1991,6 +1996,7 @@ static int __mkroute_input(struct sk_buf
1095 flags |= RTCF_DIRECTSRC;
1097 if (out_dev == in_dev && err &&
1099 (IN_DEV_SHARED_MEDIA(out_dev) ||
1100 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1101 flags |= RTCF_DOREDIRECT;
1102 @@ -2029,6 +2035,7 @@ static int __mkroute_input(struct sk_buf
1103 rth->fl.mark = skb->mark;
1104 rth->fl.fl4_src = saddr;
1105 rth->rt_src = saddr;
1106 + rth->fl.fl4_lsrc = lsrc;
1107 rth->rt_gateway = daddr;
1109 rth->fl.iif = in_dev->dev->ifindex;
1110 @@ -2036,6 +2043,7 @@ static int __mkroute_input(struct sk_buf
1111 dev_hold(rth->dst.dev);
1112 rth->idev = in_dev_get(rth->dst.dev);
1114 + rth->fl.fl4_gw = 0;
1115 rth->rt_spec_dst= spec_dst;
1117 rth->dst.obsolete = -1;
1118 @@ -2055,21 +2063,23 @@ static int __mkroute_input(struct sk_buf
1120 static int ip_mkroute_input(struct sk_buff *skb,
1121 struct fib_result *res,
1123 const struct flowi *fl,
1124 struct in_device *in_dev,
1125 - __be32 daddr, __be32 saddr, u32 tos)
1126 + __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1128 struct rtable* rth = NULL;
1132 + fib_select_default(net, fl, res);
1133 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1134 - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1135 + if (res->fi && res->fi->fib_nhs > 1)
1136 fib_select_multipath(fl, res);
1139 /* create a routing cache entry */
1140 - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1141 + err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1145 @@ -2090,18 +2100,20 @@ static int ip_mkroute_input(struct sk_bu
1148 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1149 - u8 tos, struct net_device *dev)
1150 + u8 tos, struct net_device *dev, __be32 lsrc)
1152 struct fib_result res;
1153 struct in_device *in_dev = __in_dev_get_rcu(dev);
1154 struct flowi fl = { .nl_u = { .ip4_u =
1157 + .saddr = lsrc? : saddr,
1159 .scope = RT_SCOPE_UNIVERSE,
1162 - .iif = dev->ifindex };
1164 + dev_net(dev)->loopback_dev->ifindex :
1168 struct rtable * rth;
1169 @@ -2137,6 +2149,12 @@ static int ip_route_input_slow(struct sk
1170 ipv4_is_loopback(daddr))
1171 goto martian_destination;
1174 + if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1175 + ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1180 * Now we are ready to route packet.
1182 @@ -2146,6 +2164,8 @@ static int ip_route_input_slow(struct sk
1186 + fl.iif = dev->ifindex;
1187 + fl.fl4_src = saddr;
1189 RT_CACHE_STAT_INC(in_slow_tot);
1191 @@ -2169,7 +2189,7 @@ static int ip_route_input_slow(struct sk
1192 if (res.type != RTN_UNICAST)
1193 goto martian_destination;
1195 - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1196 + err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
1200 @@ -2178,6 +2198,8 @@ out: return err;
1202 if (skb->protocol != htons(ETH_P_IP))
1207 if (ipv4_is_zeronet(saddr))
1208 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1209 @@ -2220,6 +2242,7 @@ local_input:
1210 rth->dst.dev = net->loopback_dev;
1211 dev_hold(rth->dst.dev);
1212 rth->idev = in_dev_get(rth->dst.dev);
1213 + rth->fl.fl4_gw = 0;
1214 rth->rt_gateway = daddr;
1215 rth->rt_spec_dst= spec_dst;
1216 rth->dst.input= ip_local_deliver;
1217 @@ -2272,8 +2295,9 @@ martian_source_keep_err:
1221 -int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1222 - u8 tos, struct net_device *dev, bool noref)
1223 +int ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1224 + u8 tos, struct net_device *dev, bool noref,
1227 struct rtable * rth;
1229 @@ -2296,6 +2320,7 @@ int ip_route_input_common(struct sk_buff
1230 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
1231 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
1232 (rth->fl.iif ^ iif) |
1233 + (rth->fl.fl4_lsrc ^ lsrc) |
1235 (rth->fl.fl4_tos ^ tos)) == 0 &&
1236 rth->fl.mark == skb->mark &&
1237 @@ -2349,12 +2374,25 @@ skip_cache:
1241 - res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
1242 + res = ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1247 +int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1248 + u8 tos, struct net_device *dev, bool noref)
1250 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, noref, 0);
1252 EXPORT_SYMBOL(ip_route_input_common);
1254 +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1255 + u8 tos, struct net_device *dev, __be32 lsrc)
1257 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, true, lsrc);
1259 +EXPORT_SYMBOL(ip_route_input_lookup);
1261 static int __mkroute_output(struct rtable **result,
1262 struct fib_result *res,
1263 const struct flowi *fl,
1264 @@ -2424,6 +2462,7 @@ static int __mkroute_output(struct rtabl
1265 rth->fl.fl4_tos = tos;
1266 rth->fl.fl4_src = oldflp->fl4_src;
1267 rth->fl.oif = oldflp->oif;
1268 + rth->fl.fl4_gw = oldflp->fl4_gw;
1269 rth->fl.mark = oldflp->mark;
1270 rth->rt_dst = fl->fl4_dst;
1271 rth->rt_src = fl->fl4_src;
1272 @@ -2506,6 +2545,7 @@ static int ip_route_output_slow(struct n
1273 struct flowi fl = { .nl_u = { .ip4_u =
1274 { .daddr = oldflp->fl4_dst,
1275 .saddr = oldflp->fl4_src,
1276 + .gw = oldflp->fl4_gw,
1277 .tos = tos & IPTOS_RT_MASK,
1278 .scope = ((tos & RTO_ONLINK) ?
1280 @@ -2617,6 +2657,7 @@ static int ip_route_output_slow(struct n
1281 dev_out = net->loopback_dev;
1283 fl.oif = net->loopback_dev->ifindex;
1285 res.type = RTN_LOCAL;
1286 flags |= RTCF_LOCAL;
1288 @@ -2624,7 +2665,7 @@ static int ip_route_output_slow(struct n
1290 if (fib_lookup(net, &fl, &res)) {
1292 - if (oldflp->oif) {
1293 + if (oldflp->oif && dev_out->flags & IFF_UP) {
1294 /* Apparently, routing tables are wrong. Assume,
1295 that the destination is on link.
1297 @@ -2664,6 +2705,7 @@ static int ip_route_output_slow(struct n
1298 dev_out = net->loopback_dev;
1300 fl.oif = dev_out->ifindex;
1303 fib_info_put(res.fi);
1305 @@ -2671,13 +2713,12 @@ static int ip_route_output_slow(struct n
1309 + if (res.type == RTN_UNICAST)
1310 + fib_select_default(net, &fl, &res);
1311 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1312 - if (res.fi->fib_nhs > 1 && fl.oif == 0)
1313 + if (res.fi->fib_nhs > 1)
1314 fib_select_multipath(&fl, &res);
1317 - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1318 - fib_select_default(net, &fl, &res);
1321 fl.fl4_src = FIB_RES_PREFSRC(res);
1322 @@ -2718,6 +2759,7 @@ int __ip_route_output_key(struct net *ne
1323 rth->fl.fl4_src == flp->fl4_src &&
1325 rth->fl.oif == flp->oif &&
1326 + rth->fl.fl4_gw == flp->fl4_gw &&
1327 rth->fl.mark == flp->mark &&
1328 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1329 (IPTOS_RT_MASK | RTO_ONLINK)) &&