]> git.ipfire.org Git - people/ms/ipfire-3.x.git/blame - pkgs/kernel/patches/routes-2.6.38-16.diff
kernel: Update to 2.6.38.1.
[people/ms/ipfire-3.x.git] / pkgs / kernel / patches / routes-2.6.38-16.diff
CommitLineData
16454cff
MT
1diff -urp v2.6.38/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2--- v2.6.38/linux/include/linux/rtnetlink.h 2011-03-20 12:05:41.000000000 +0200
3+++ linux/include/linux/rtnetlink.h 2011-03-20 12:12:11.107248055 +0200
6892158b 4@@ -312,6 +312,8 @@ struct rtnexthop {
58c5fc13
MT
5 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
6 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
7 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
8+#define RTNH_F_SUSPECT 8 /* We don't know the real state */
9+#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
10
11 /* Macros to handle hexthops */
12
16454cff
MT
13diff -urp v2.6.38/linux/include/net/flow.h linux/include/net/flow.h
14--- v2.6.38/linux/include/net/flow.h 2011-03-20 12:01:11.000000000 +0200
15+++ linux/include/net/flow.h 2011-03-20 12:13:20.139247270 +0200
58c5fc13
MT
16@@ -19,6 +19,8 @@ struct flowi {
17 struct {
18 __be32 daddr;
19 __be32 saddr;
20+ __be32 lsrc;
21+ __be32 gw;
22 __u8 tos;
23 __u8 scope;
24 } ip4_u;
25@@ -43,6 +45,8 @@ struct flowi {
26 #define fl6_flowlabel nl_u.ip6_u.flowlabel
27 #define fl4_dst nl_u.ip4_u.daddr
28 #define fl4_src nl_u.ip4_u.saddr
29+#define fl4_lsrc nl_u.ip4_u.lsrc
30+#define fl4_gw nl_u.ip4_u.gw
31 #define fl4_tos nl_u.ip4_u.tos
32 #define fl4_scope nl_u.ip4_u.scope
33
16454cff
MT
34diff -urp v2.6.38/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35--- v2.6.38/linux/include/net/ip_fib.h 2011-03-20 12:05:50.000000000 +0200
36+++ linux/include/net/ip_fib.h 2011-03-20 12:12:11.107248055 +0200
bc901d79 37@@ -210,6 +210,8 @@ extern int fib_lookup(struct net *n, str
58c5fc13
MT
38 extern struct fib_table *fib_new_table(struct net *net, u32 id);
39 extern struct fib_table *fib_get_table(struct net *net, u32 id);
40
41+extern int fib_result_table(struct fib_result *res);
42+
43 #endif /* CONFIG_IP_MULTIPLE_TABLES */
44
45 /* Exported by fib_frontend.c */
bc901d79 46@@ -270,4 +272,6 @@ static inline void fib_proc_exit(struct
58c5fc13
MT
47 }
48 #endif
49
50+extern rwlock_t fib_nhflags_lock;
51+
52 #endif /* _NET_FIB_H */
16454cff
MT
53diff -urp v2.6.38/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54--- v2.6.38/linux/include/net/netfilter/nf_nat.h 2011-03-20 12:01:11.000000000 +0200
55+++ linux/include/net/netfilter/nf_nat.h 2011-03-20 12:13:20.140246808 +0200
ae4e228f 56@@ -73,6 +73,13 @@ struct nf_conn_nat {
58c5fc13
MT
57 #endif
58 };
59
60+/* Call input routing for SNAT-ed traffic */
61+extern unsigned int ip_nat_route_input(unsigned int hooknum,
62+ struct sk_buff *skb,
63+ const struct net_device *in,
64+ const struct net_device *out,
65+ int (*okfn)(struct sk_buff *));
66+
67 /* Set up the info structure to map into this range. */
68 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69 const struct nf_nat_range *range,
16454cff
MT
70diff -urp v2.6.38/linux/include/net/route.h linux/include/net/route.h
71--- v2.6.38/linux/include/net/route.h 2011-03-20 12:01:11.000000000 +0200
72+++ linux/include/net/route.h 2011-03-20 12:13:20.141248044 +0200
73@@ -134,6 +134,7 @@ static inline int ip_route_input_noref(s
57199397
MT
74 return ip_route_input_common(skb, dst, src, tos, devin, true);
75 }
76
58c5fc13
MT
77+extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
78 extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
79 extern void ip_rt_send_redirect(struct sk_buff *skb);
80
16454cff
MT
81diff -urp v2.6.38/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82--- v2.6.38/linux/net/bridge/br_netfilter.c 2011-03-20 12:01:11.000000000 +0200
83+++ linux/net/bridge/br_netfilter.c 2011-03-20 12:13:20.142247890 +0200
84@@ -405,6 +405,9 @@ static int br_nf_pre_routing_finish(stru
58c5fc13
MT
85 struct rtable *rt;
86 int err;
87
88+ /* Old skb->dst is not expected, it is lost in all cases */
89+ skb_dst_drop(skb);
90+
91 if (nf_bridge->mask & BRNF_PKT_TYPE) {
92 skb->pkt_type = PACKET_OTHERHOST;
93 nf_bridge->mask ^= BRNF_PKT_TYPE;
16454cff
MT
94diff -urp v2.6.38/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
95--- v2.6.38/linux/net/ipv4/fib_frontend.c 2011-03-20 12:05:50.000000000 +0200
96+++ linux/net/ipv4/fib_frontend.c 2011-03-20 12:12:11.109247911 +0200
df50ba0c 97@@ -47,6 +47,8 @@
58c5fc13
MT
98
99 #ifndef CONFIG_IP_MULTIPLE_TABLES
100
101+#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
102+
103 static int __net_init fib4_rules_init(struct net *net)
104 {
105 struct fib_table *local_table, *main_table;
df50ba0c 106@@ -71,6 +73,8 @@ fail:
58c5fc13
MT
107 }
108 #else
109
110+#define FIB_RES_TABLE(r) (fib_result_table(r))
111+
112 struct fib_table *fib_new_table(struct net *net, u32 id)
113 {
114 struct fib_table *tb;
df50ba0c 115@@ -125,7 +129,8 @@ void fib_select_default(struct net *net,
58c5fc13
MT
116 table = res->r->table;
117 #endif
118 tb = fib_get_table(net, table);
119- if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
120+ if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
121+ FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
ae4e228f 122 fib_table_select_default(tb, flp, res);
58c5fc13
MT
123 }
124
16454cff 125@@ -256,6 +261,9 @@ int fib_validate_source(__be32 src, __be
bc901d79
MT
126 .iif = oif
127 };
58c5fc13
MT
128 struct fib_result res;
129+ int table;
130+ unsigned char prefixlen;
131+ unsigned char scope;
ae4e228f 132 int no_addr, rpf, accept_local;
6892158b 133 bool dev_match;
58c5fc13 134 int ret;
16454cff 135@@ -302,19 +310,26 @@ int fib_validate_source(__be32 src, __be
bc901d79 136 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
58c5fc13
MT
137 return ret;
138 }
139+ table = FIB_RES_TABLE(&res);
140+ prefixlen = res.prefixlen;
141+ scope = res.scope;
58c5fc13
MT
142 if (no_addr)
143 goto last_resort;
144- if (rpf == 1)
6892158b 145- goto e_rpf;
58c5fc13
MT
146 fl.oif = dev->ifindex;
147
148 ret = 0;
149 if (fib_lookup(net, &fl, &res) == 0) {
150- if (res.type == RTN_UNICAST) {
151+ if (res.type == RTN_UNICAST &&
152+ ((table == FIB_RES_TABLE(&res) &&
153+ res.prefixlen >= prefixlen && res.scope >= scope) ||
154+ !rpf)) {
155 *spec_dst = FIB_RES_PREFSRC(res);
156 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
58c5fc13
MT
157+ return ret;
158 }
58c5fc13
MT
159 }
160+ if (rpf == 1)
6892158b 161+ goto e_rpf;
58c5fc13
MT
162 return ret;
163
164 last_resort:
16454cff 165@@ -942,9 +957,7 @@ static int fib_inetaddr_event(struct not
58c5fc13
MT
166 switch (event) {
167 case NETDEV_UP:
168 fib_add_ifaddr(ifa);
169-#ifdef CONFIG_IP_ROUTE_MULTIPATH
170 fib_sync_up(dev);
171-#endif
172 rt_cache_flush(dev_net(dev), -1);
173 break;
174 case NETDEV_DOWN:
16454cff 175@@ -980,9 +993,7 @@ static int fib_netdev_event(struct notif
58c5fc13
MT
176 for_ifa(in_dev) {
177 fib_add_ifaddr(ifa);
178 } endfor_ifa(in_dev);
179-#ifdef CONFIG_IP_ROUTE_MULTIPATH
180 fib_sync_up(dev);
181-#endif
182 rt_cache_flush(dev_net(dev), -1);
183 break;
184 case NETDEV_DOWN:
16454cff
MT
185diff -urp v2.6.38/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
186--- v2.6.38/linux/net/ipv4/fib_hash.c 2011-03-20 12:05:41.000000000 +0200
187+++ linux/net/ipv4/fib_hash.c 2011-03-20 12:12:11.110247911 +0200
bc901d79 188@@ -305,27 +305,43 @@ out:
ae4e228f
MT
189 void fib_table_select_default(struct fib_table *tb,
190 const struct flowi *flp, struct fib_result *res)
58c5fc13
MT
191 {
192- int order, last_idx;
bc901d79
MT
193+ int order, last_idx, last_dflt, last_nhsel, good;
194+ struct fib_alias *first_fa;
58c5fc13
MT
195 struct hlist_node *node;
196 struct fib_node *f;
bc901d79
MT
197- struct fib_info *fi = NULL;
198+ struct fib_info *fi;
58c5fc13
MT
199 struct fib_info *last_resort;
200 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
201- struct fn_zone *fz = t->fn_zones[0];
202+ struct fn_zone *fz = t->fn_zones[res->prefixlen];
bc901d79 203 struct hlist_head *head;
58c5fc13 204+ __be32 k;
bc901d79 205+ unsigned int seq;
58c5fc13
MT
206
207 if (fz == NULL)
208 return;
209
210+ k = fz_key(flp->fl4_dst, fz);
bc901d79
MT
211+
212+ rcu_read_lock();
213+
214+retry:
58c5fc13
MT
215+ last_dflt = -2;
216+ last_nhsel = 0;
217 last_idx = -1;
218 last_resort = NULL;
219 order = -1;
bc901d79
MT
220+ fi = NULL;
221+ first_fa = NULL;
222+ good = 0;
223
224- rcu_read_lock();
225- head = rcu_dereference(fz->fz_hash);
226+ seq = read_seqbegin(&fz->fz_lock);
227+ head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
228 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
58c5fc13
MT
229 struct fib_alias *fa;
230
231+ if (f->fn_key != k)
232+ continue;
233+
bc901d79 234 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
58c5fc13
MT
235 struct fib_info *next_fi = fa->fa_info;
236
bc901d79 237@@ -333,43 +349,66 @@ void fib_table_select_default(struct fib
58c5fc13
MT
238 fa->fa_type != RTN_UNICAST)
239 continue;
240
241+ if (fa->fa_tos &&
242+ fa->fa_tos != flp->fl4_tos)
243+ continue;
244 if (next_fi->fib_priority > res->fi->fib_priority)
245 break;
246- if (!next_fi->fib_nh[0].nh_gw ||
247- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
248- continue;
bc901d79
MT
249
250 fib_alias_accessed(fa);
58c5fc13
MT
251
252- if (fi == NULL) {
253- if (next_fi != res->fi)
254- break;
255- } else if (!fib_detect_death(fi, order, &last_resort,
256- &last_idx, tb->tb_default)) {
bc901d79
MT
257- fib_result_assign(res, fi);
258- tb->tb_default = order;
259- goto out;
58c5fc13
MT
260+ if (!first_fa) {
261+ last_dflt = fa->fa_last_dflt;
262+ first_fa = fa;
263+ }
264+ if (fi && !fib_detect_death(fi, order, &last_resort,
265+ &last_idx, &last_dflt, &last_nhsel, flp)) {
bc901d79
MT
266+ good = 1;
267+ goto done1;
58c5fc13
MT
268 }
269 fi = next_fi;
270 order++;
271 }
272+ break;
bc901d79
MT
273+ }
274+
275+done1:
276+ if (read_seqretry(&fz->fz_lock, seq))
277+ goto retry;
278+
279+ if (good) {
280+ fib_result_assign(res, fi);
281+ first_fa->fa_last_dflt = order;
282+ goto out;
58c5fc13
MT
283 }
284
285 if (order <= 0 || fi == NULL) {
286- tb->tb_default = -1;
287+ if (fi && fi->fib_nhs > 1 &&
288+ fib_detect_death(fi, order, &last_resort, &last_idx,
289+ &last_dflt, &last_nhsel, flp) &&
290+ last_resort == fi) {
291+ read_lock_bh(&fib_nhflags_lock);
292+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
293+ read_unlock_bh(&fib_nhflags_lock);
294+ }
295+ if (first_fa) first_fa->fa_last_dflt = -1;
296 goto out;
297 }
298
299 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
300- tb->tb_default)) {
301+ &last_dflt, &last_nhsel, flp)) {
302 fib_result_assign(res, fi);
303- tb->tb_default = order;
304+ first_fa->fa_last_dflt = order;
305 goto out;
306 }
307
308- if (last_idx >= 0)
309+ if (last_idx >= 0) {
310 fib_result_assign(res, last_resort);
311- tb->tb_default = last_idx;
312+ read_lock_bh(&fib_nhflags_lock);
313+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
314+ read_unlock_bh(&fib_nhflags_lock);
315+ first_fa->fa_last_dflt = last_idx;
316+ }
317 out:
bc901d79 318 rcu_read_unlock();
58c5fc13 319 }
bc901d79
MT
320@@ -507,6 +546,7 @@ int fib_table_insert(struct fib_table *t
321
322 new_fa->fa_tos = fa->fa_tos;
323 new_fa->fa_info = fi;
324+ new_fa->fa_last_dflt = -1;
325 new_fa->fa_type = cfg->fc_type;
326 new_fa->fa_scope = cfg->fc_scope;
58c5fc13 327 state = fa->fa_state;
bc901d79 328@@ -559,6 +599,7 @@ int fib_table_insert(struct fib_table *t
58c5fc13
MT
329 new_fa->fa_type = cfg->fc_type;
330 new_fa->fa_scope = cfg->fc_scope;
331 new_fa->fa_state = 0;
332+ new_fa->fa_last_dflt = -1;
333
334 /*
335 * Insert new entry to the list.
16454cff
MT
336diff -urp v2.6.38/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
337--- v2.6.38/linux/net/ipv4/fib_lookup.h 2011-03-20 12:05:41.000000000 +0200
338+++ linux/net/ipv4/fib_lookup.h 2011-03-20 12:12:11.111246945 +0200
58c5fc13
MT
339@@ -8,6 +8,7 @@
340 struct fib_alias {
341 struct list_head fa_list;
342 struct fib_info *fa_info;
343+ int fa_last_dflt;
344 u8 fa_tos;
345 u8 fa_type;
346 u8 fa_scope;
bc901d79 347@@ -42,7 +43,8 @@ extern struct fib_alias *fib_find_alias(
58c5fc13
MT
348 u8 tos, u32 prio);
349 extern int fib_detect_death(struct fib_info *fi, int order,
350 struct fib_info **last_resort,
351- int *last_idx, int dflt);
352+ int *last_idx, int *dflt, int *last_nhsel,
353+ const struct flowi *flp);
354
355 static inline void fib_result_assign(struct fib_result *res,
356 struct fib_info *fi)
16454cff
MT
357diff -urp v2.6.38/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
358--- v2.6.38/linux/net/ipv4/fib_rules.c 2011-03-20 12:05:41.000000000 +0200
359+++ linux/net/ipv4/fib_rules.c 2011-03-20 12:12:11.111246945 +0200
bc901d79 360@@ -53,6 +53,11 @@ u32 fib_rules_tclass(struct fib_result *
58c5fc13
MT
361 }
362 #endif
363
364+int fib_result_table(struct fib_result *res)
365+{
366+ return res->r->table;
367+}
368+
369 int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
370 {
371 struct fib_lookup_arg arg = {
16454cff
MT
372diff -urp v2.6.38/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
373--- v2.6.38/linux/net/ipv4/fib_semantics.c 2011-03-20 12:05:50.000000000 +0200
374+++ linux/net/ipv4/fib_semantics.c 2011-03-20 12:13:20.143248500 +0200
df50ba0c 375@@ -51,6 +51,7 @@ static struct hlist_head *fib_info_hash;
58c5fc13
MT
376 static struct hlist_head *fib_info_laddrhash;
377 static unsigned int fib_hash_size;
378 static unsigned int fib_info_cnt;
379+rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
380
381 #define DEVINDEX_HASHBITS 8
382 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
bc901d79 383@@ -203,7 +204,7 @@ static inline int nh_comp(const struct f
58c5fc13
MT
384 #ifdef CONFIG_NET_CLS_ROUTE
385 nh->nh_tclassid != onh->nh_tclassid ||
386 #endif
bc901d79
MT
387- ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
388+ ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_BADSTATE))
58c5fc13
MT
389 return -1;
390 onh++;
391 } endfor_nexthops(fi);
bc901d79 392@@ -254,7 +255,7 @@ static struct fib_info *fib_find_info(co
58c5fc13
MT
393 nfi->fib_priority == fi->fib_priority &&
394 memcmp(nfi->fib_metrics, fi->fib_metrics,
395 sizeof(fi->fib_metrics)) == 0 &&
bc901d79
MT
396- ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
397+ ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_BADSTATE) == 0 &&
58c5fc13
MT
398 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
399 return fi;
400 }
bc901d79 401@@ -365,26 +366,70 @@ struct fib_alias *fib_find_alias(struct
58c5fc13
MT
402 }
403
404 int fib_detect_death(struct fib_info *fi, int order,
405- struct fib_info **last_resort, int *last_idx, int dflt)
406+ struct fib_info **last_resort, int *last_idx, int *dflt,
407+ int *last_nhsel, const struct flowi *flp)
408 {
409 struct neighbour *n;
410- int state = NUD_NONE;
411+ int nhsel;
412+ int state;
413+ struct fib_nh * nh;
414+ __be32 dst;
415+ int flag, dead = 1;
416+
417+ /* change_nexthops(fi) { */
418+ for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
419+ if (flp->oif && flp->oif != nh->nh_oif)
420+ continue;
421+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
422+ nh->nh_scope == RT_SCOPE_LINK)
423+ continue;
424+ if (nh->nh_flags & RTNH_F_DEAD)
425+ continue;
426
427- n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
428- if (n) {
429- state = n->nud_state;
430- neigh_release(n);
431- }
432- if (state == NUD_REACHABLE)
433- return 0;
bc901d79 434- if ((state & NUD_VALID) && order != dflt)
58c5fc13 435- return 0;
bc901d79
MT
436- if ((state & NUD_VALID) ||
437- (*last_idx < 0 && order > dflt)) {
58c5fc13
MT
438- *last_resort = fi;
439- *last_idx = order;
440+ flag = 0;
441+ if (nh->nh_dev->flags & IFF_NOARP) {
442+ dead = 0;
443+ goto setfl;
444+ }
445+
446+ dst = nh->nh_gw;
447+ if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
448+ dst = flp->fl4_dst;
449+
450+ state = NUD_NONE;
451+ n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
452+ if (n) {
453+ state = n->nud_state;
454+ neigh_release(n);
455+ }
bc901d79
MT
456+ if (state == NUD_REACHABLE ||
457+ ((state & NUD_VALID) && order != *dflt)) {
58c5fc13
MT
458+ dead = 0;
459+ goto setfl;
460+ }
bc901d79 461+ if (!(state & NUD_VALID))
58c5fc13
MT
462+ flag = 1;
463+ if (!dead)
464+ goto setfl;
bc901d79
MT
465+ if ((state & NUD_VALID) ||
466+ (*last_idx < 0 && order >= *dflt)) {
58c5fc13
MT
467+ *last_resort = fi;
468+ *last_idx = order;
469+ *last_nhsel = nhsel;
470+ }
471+
472+ setfl:
473+
474+ read_lock_bh(&fib_nhflags_lock);
475+ if (flag)
476+ nh->nh_flags |= RTNH_F_SUSPECT;
477+ else
478+ nh->nh_flags &= ~RTNH_F_SUSPECT;
479+ read_unlock_bh(&fib_nhflags_lock);
480 }
481- return 1;
482+ /* } endfor_nexthops(fi) */
483+
484+ return dead;
485 }
486
487 #ifdef CONFIG_IP_ROUTE_MULTIPATH
bc901d79
MT
488@@ -553,8 +598,11 @@ static int fib_check_nh(struct fib_confi
489 dev = __dev_get_by_index(net, nh->nh_oif);
490 if (!dev)
58c5fc13 491 return -ENODEV;
bc901d79 492- if (!(dev->flags & IFF_UP))
58c5fc13 493- return -ENETDOWN;
bc901d79 494+ if (!(dev->flags & IFF_UP)) {
58c5fc13
MT
495+ if (fi->fib_protocol != RTPROT_STATIC)
496+ return -ENETDOWN;
497+ nh->nh_flags |= RTNH_F_DEAD;
498+ }
499 nh->nh_dev = dev;
500 dev_hold(dev);
501 nh->nh_scope = RT_SCOPE_LINK;
16454cff 502@@ -572,21 +620,41 @@ static int fib_check_nh(struct fib_confi
58c5fc13
MT
503 if (fl.fl4_scope < RT_SCOPE_LINK)
504 fl.fl4_scope = RT_SCOPE_LINK;
bc901d79
MT
505 err = fib_lookup(net, &fl, &res);
506- if (err) {
507- rcu_read_unlock();
58c5fc13 508- return err;
bc901d79 509+ }
58c5fc13
MT
510+ if (err) {
511+ struct in_device *in_dev;
512+
513+ if (err != -ENETUNREACH ||
514+ fi->fib_protocol != RTPROT_STATIC)
bc901d79 515+ goto out;
58c5fc13
MT
516+
517+ in_dev = inetdev_by_index(net, nh->nh_oif);
518+ if (in_dev == NULL ||
bc901d79
MT
519+ in_dev->dev->flags & IFF_UP)
520+ goto out;
58c5fc13
MT
521+ nh->nh_flags |= RTNH_F_DEAD;
522+ nh->nh_scope = RT_SCOPE_LINK;
523+ nh->nh_dev = in_dev->dev;
524+ dev_hold(nh->nh_dev);
58c5fc13
MT
525+ } else {
526+ err = -EINVAL;
527+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
528+ goto out;
529+ nh->nh_scope = res.scope;
530+ nh->nh_oif = FIB_RES_OIF(res);
bc901d79
MT
531+ nh->nh_dev = dev = FIB_RES_DEV(res);
532+ if (!dev)
58c5fc13 533+ goto out;
bc901d79 534+ dev_hold(dev);
58c5fc13
MT
535+ if (!(nh->nh_dev->flags & IFF_UP)) {
536+ if (fi->fib_protocol != RTPROT_STATIC) {
537+ err = -ENETDOWN;
538+ goto out;
539+ }
540+ nh->nh_flags |= RTNH_F_DEAD;
bc901d79 541 }
58c5fc13 542+ err = 0;
bc901d79
MT
543 }
544- err = -EINVAL;
545- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
546- goto out;
547- nh->nh_scope = res.scope;
548- nh->nh_oif = FIB_RES_OIF(res);
549- nh->nh_dev = dev = FIB_RES_DEV(res);
550- if (!dev)
551- goto out;
552- dev_hold(dev);
553- err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
58c5fc13
MT
554 } else {
555 struct in_device *in_dev;
556
16454cff 557@@ -599,8 +667,11 @@ static int fib_check_nh(struct fib_confi
58c5fc13 558 if (in_dev == NULL)
bc901d79
MT
559 goto out;
560 err = -ENETDOWN;
561- if (!(in_dev->dev->flags & IFF_UP))
562- goto out;
563+ if (!(in_dev->dev->flags & IFF_UP)) {
564+ if (fi->fib_protocol != RTPROT_STATIC)
565+ goto out;
58c5fc13 566+ nh->nh_flags |= RTNH_F_DEAD;
bc901d79 567+ }
58c5fc13
MT
568 nh->nh_dev = in_dev->dev;
569 dev_hold(nh->nh_dev);
bc901d79 570 nh->nh_scope = RT_SCOPE_HOST;
16454cff 571@@ -915,8 +986,12 @@ int fib_semantic_match(struct list_head
58c5fc13 572 for_nexthops(fi) {
bc901d79 573 if (nh->nh_flags & RTNH_F_DEAD)
58c5fc13
MT
574 continue;
575- if (!flp->oif || flp->oif == nh->nh_oif)
576- break;
577+ if (flp->oif && flp->oif != nh->nh_oif)
578+ continue;
579+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
580+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
581+ continue;
582+ break;
583 }
584 #ifdef CONFIG_IP_ROUTE_MULTIPATH
585 if (nhsel < fi->fib_nhs) {
16454cff 586@@ -1096,18 +1171,29 @@ int fib_sync_down_dev(struct net_device
58c5fc13
MT
587 prev_fi = fi;
588 dead = 0;
589 change_nexthops(fi) {
bc901d79 590- if (nexthop_nh->nh_flags & RTNH_F_DEAD)
58c5fc13 591- dead++;
df50ba0c
MT
592- else if (nexthop_nh->nh_dev == dev &&
593- nexthop_nh->nh_scope != scope) {
594- nexthop_nh->nh_flags |= RTNH_F_DEAD;
bc901d79
MT
595+ if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
596+ if (fi->fib_protocol != RTPROT_STATIC ||
df50ba0c
MT
597+ nexthop_nh->nh_dev == NULL ||
598+ __in_dev_get_rtnl(nexthop_nh->nh_dev) == NULL ||
599+ nexthop_nh->nh_dev->flags&IFF_UP)
58c5fc13 600+ dead++;
df50ba0c
MT
601+ } else if (nexthop_nh->nh_dev == dev &&
602+ nexthop_nh->nh_scope != scope) {
58c5fc13
MT
603+ write_lock_bh(&fib_nhflags_lock);
604 #ifdef CONFIG_IP_ROUTE_MULTIPATH
605- spin_lock_bh(&fib_multipath_lock);
606+ spin_lock(&fib_multipath_lock);
df50ba0c
MT
607+ nexthop_nh->nh_flags |= RTNH_F_DEAD;
608 fi->fib_power -= nexthop_nh->nh_power;
609 nexthop_nh->nh_power = 0;
58c5fc13
MT
610- spin_unlock_bh(&fib_multipath_lock);
611+ spin_unlock(&fib_multipath_lock);
612+#else
df50ba0c 613+ nexthop_nh->nh_flags |= RTNH_F_DEAD;
58c5fc13
MT
614 #endif
615- dead++;
616+ write_unlock_bh(&fib_nhflags_lock);
617+ if (fi->fib_protocol!=RTPROT_STATIC ||
618+ force ||
619+ __in_dev_get_rtnl(dev) == NULL)
620+ dead++;
621 }
622 #ifdef CONFIG_IP_ROUTE_MULTIPATH
df50ba0c 623 if (force > 1 && nexthop_nh->nh_dev == dev) {
16454cff 624@@ -1125,11 +1211,8 @@ int fib_sync_down_dev(struct net_device
58c5fc13
MT
625 return ret;
626 }
627
628-#ifdef CONFIG_IP_ROUTE_MULTIPATH
629-
630 /*
bc901d79
MT
631- * Dead device goes up. We wake up dead nexthops.
632- * It takes sense only on multipath routes.
633++ Dead device goes up or new address is added. We wake up dead nexthops.
58c5fc13 634 */
58c5fc13 635 int fib_sync_up(struct net_device *dev)
bc901d79 636 {
16454cff 637@@ -1138,8 +1221,10 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
638 struct hlist_head *head;
639 struct hlist_node *node;
640 struct fib_nh *nh;
641- int ret;
642+ struct fib_result res;
643+ int ret, rep;
644
645+repeat:
bc901d79 646 if (!(dev->flags & IFF_UP))
58c5fc13
MT
647 return 0;
648
16454cff 649@@ -1147,6 +1232,7 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
650 hash = fib_devindex_hashfn(dev->ifindex);
651 head = &fib_info_devhash[hash];
652 ret = 0;
653+ rep = 0;
654
655 hlist_for_each_entry(nh, node, head, nh_hash) {
656 struct fib_info *fi = nh->nh_parent;
16454cff 657@@ -1159,21 +1245,45 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
658 prev_fi = fi;
659 alive = 0;
660 change_nexthops(fi) {
bc901d79 661- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
58c5fc13 662- alive++;
bc901d79 663+ if (!(nexthop_nh->nh_flags & RTNH_F_DEAD))
58c5fc13
MT
664 continue;
665- }
df50ba0c 666 if (nexthop_nh->nh_dev == NULL ||
bc901d79 667 !(nexthop_nh->nh_dev->flags & IFF_UP))
58c5fc13 668 continue;
df50ba0c
MT
669 if (nexthop_nh->nh_dev != dev ||
670 !__in_dev_get_rtnl(dev))
58c5fc13 671 continue;
df50ba0c 672+ if (nexthop_nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
58c5fc13
MT
673+ struct flowi fl = {
674+ .nl_u = { .ip4_u =
df50ba0c
MT
675+ { .daddr = nexthop_nh->nh_gw,
676+ .scope = nexthop_nh->nh_scope } },
677+ .oif = nexthop_nh->nh_oif,
58c5fc13 678+ };
bc901d79
MT
679+
680+ rcu_read_lock();
681+ if (fib_lookup(dev_net(dev), &fl, &res) != 0) {
682+ rcu_read_unlock();
58c5fc13 683+ continue;
bc901d79 684+ }
58c5fc13
MT
685+ if (res.type != RTN_UNICAST &&
686+ res.type != RTN_LOCAL) {
bc901d79 687+ rcu_read_unlock();
58c5fc13
MT
688+ continue;
689+ }
df50ba0c 690+ nexthop_nh->nh_scope = res.scope;
bc901d79 691+ rcu_read_unlock();
58c5fc13
MT
692+ rep = 1;
693+ }
694 alive++;
695+#ifdef CONFIG_IP_ROUTE_MULTIPATH
696 spin_lock_bh(&fib_multipath_lock);
df50ba0c 697 nexthop_nh->nh_power = 0;
58c5fc13 698+#endif
df50ba0c 699 nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
58c5fc13
MT
700+#ifdef CONFIG_IP_ROUTE_MULTIPATH
701 spin_unlock_bh(&fib_multipath_lock);
702+#endif
703 } endfor_nexthops(fi)
704
705 if (alive > 0) {
16454cff 706@@ -1181,10 +1291,14 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
707 ret++;
708 }
709 }
710+ if (rep)
711+ goto repeat;
712
713 return ret;
714 }
715
716+#ifdef CONFIG_IP_ROUTE_MULTIPATH
717+
718 /*
bc901d79
MT
719 * The algorithm is suboptimal, but it provides really
720 * fair weighted route distribution.
16454cff 721@@ -1192,24 +1306,46 @@ int fib_sync_up(struct net_device *dev)
58c5fc13
MT
722 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
723 {
724 struct fib_info *fi = res->fi;
725- int w;
726+ int w, alive;
727
728 spin_lock_bh(&fib_multipath_lock);
729+ if (flp->oif) {
730+ int sel = -1;
731+ w = -1;
732+ change_nexthops(fi) {
df50ba0c 733+ if (flp->oif != nexthop_nh->nh_oif)
58c5fc13 734+ continue;
df50ba0c 735+ if (flp->fl4_gw && flp->fl4_gw != nexthop_nh->nh_gw &&
bc901d79
MT
736+ nexthop_nh->nh_gw &&
737+ nexthop_nh->nh_scope == RT_SCOPE_LINK)
58c5fc13 738+ continue;
bc901d79 739+ if (!(nexthop_nh->nh_flags & RTNH_F_BADSTATE)) {
df50ba0c
MT
740+ if (nexthop_nh->nh_power > w) {
741+ w = nexthop_nh->nh_power;
58c5fc13
MT
742+ sel = nhsel;
743+ }
744+ }
745+ } endfor_nexthops(fi);
746+ if (sel >= 0) {
747+ spin_unlock_bh(&fib_multipath_lock);
748+ res->nh_sel = sel;
749+ return;
750+ }
751+ goto last_resort;
752+ }
753+
754+repeat:
755 if (fi->fib_power <= 0) {
756 int power = 0;
757 change_nexthops(fi) {
bc901d79
MT
758- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
759+ if (!(nexthop_nh->nh_flags & RTNH_F_BADSTATE)) {
df50ba0c
MT
760 power += nexthop_nh->nh_weight;
761 nexthop_nh->nh_power = nexthop_nh->nh_weight;
58c5fc13
MT
762 }
763 } endfor_nexthops(fi);
764 fi->fib_power = power;
765- if (power <= 0) {
766- spin_unlock_bh(&fib_multipath_lock);
767- /* Race condition: route has just become dead. */
768- res->nh_sel = 0;
769- return;
770- }
771+ if (power <= 0)
772+ goto last_resort;
773 }
774
775
16454cff 776@@ -1219,8 +1355,9 @@ void fib_select_multipath(const struct f
58c5fc13
MT
777
778 w = jiffies % fi->fib_power;
779
780+ alive = 0;
781 change_nexthops(fi) {
bc901d79
MT
782- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
783+ if (!(nexthop_nh->nh_flags & RTNH_F_BADSTATE) &&
df50ba0c 784 nexthop_nh->nh_power) {
bc901d79
MT
785 w -= nexthop_nh->nh_power;
786 if (w <= 0) {
16454cff 787@@ -1230,11 +1367,29 @@ void fib_select_multipath(const struct f
58c5fc13 788 spin_unlock_bh(&fib_multipath_lock);
58c5fc13
MT
789 return;
790 }
791+ alive = 1;
792+ }
793+ } endfor_nexthops(fi);
794+ if (alive) {
795+ fi->fib_power = 0;
796+ goto repeat;
797+ }
798+
799+last_resort:
58c5fc13 800+ for_nexthops(fi) {
bc901d79 801+ if (!(nh->nh_flags & RTNH_F_DEAD)) {
58c5fc13
MT
802+ if (flp->oif && flp->oif != nh->nh_oif)
803+ continue;
804+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
805+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
806+ continue;
807+ spin_unlock_bh(&fib_multipath_lock);
808+ res->nh_sel = nhsel;
809+ return;
810 }
811 } endfor_nexthops(fi);
812
813 /* Race condition: route has just become dead. */
814- res->nh_sel = 0;
815 spin_unlock_bh(&fib_multipath_lock);
816 }
817 #endif
16454cff
MT
818diff -urp v2.6.38/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c
819--- v2.6.38/linux/net/ipv4/fib_trie.c 2011-03-20 12:05:41.000000000 +0200
820+++ linux/net/ipv4/fib_trie.c 2011-03-20 12:12:11.115247884 +0200
bc901d79 821@@ -1270,6 +1270,7 @@ int fib_table_insert(struct fib_table *t
58c5fc13
MT
822 fi_drop = fa->fa_info;
823 new_fa->fa_tos = fa->fa_tos;
824 new_fa->fa_info = fi;
825+ new_fa->fa_last_dflt = -1;
826 new_fa->fa_type = cfg->fc_type;
827 new_fa->fa_scope = cfg->fc_scope;
828 state = fa->fa_state;
bc901d79 829@@ -1310,6 +1311,7 @@ int fib_table_insert(struct fib_table *t
58c5fc13
MT
830 new_fa->fa_type = cfg->fc_type;
831 new_fa->fa_scope = cfg->fc_scope;
832 new_fa->fa_state = 0;
833+ new_fa->fa_last_dflt = -1;
834 /*
835 * Insert new entry to the list.
836 */
bc901d79 837@@ -1807,24 +1809,31 @@ void fib_table_select_default(struct fib
ae4e228f 838 struct fib_result *res)
58c5fc13
MT
839 {
840 struct trie *t = (struct trie *) tb->tb_data;
841- int order, last_idx;
842+ int order, last_idx, last_dflt, last_nhsel;
843+ struct fib_alias *first_fa = NULL;
844 struct fib_info *fi = NULL;
845 struct fib_info *last_resort;
846 struct fib_alias *fa = NULL;
847 struct list_head *fa_head;
848 struct leaf *l;
849+ u32 key, mask;
850
851+ last_dflt = -2;
852+ last_nhsel = 0;
853 last_idx = -1;
854 last_resort = NULL;
855 order = -1;
856
857+ mask = inet_make_mask(res->prefixlen);
858+ key = ntohl(flp->fl4_dst & mask);
859+
860 rcu_read_lock();
861
862- l = fib_find_node(t, 0);
863+ l = fib_find_node(t, key);
864 if (!l)
865 goto out;
866
867- fa_head = get_fa_head(l, 0);
868+ fa_head = get_fa_head(l, res->prefixlen);
869 if (!fa_head)
870 goto out;
871
bc901d79 872@@ -1838,40 +1847,53 @@ void fib_table_select_default(struct fib
58c5fc13
MT
873 fa->fa_type != RTN_UNICAST)
874 continue;
875
876+ if (fa->fa_tos &&
877+ fa->fa_tos != flp->fl4_tos)
878+ continue;
879 if (next_fi->fib_priority > res->fi->fib_priority)
880 break;
881- if (!next_fi->fib_nh[0].nh_gw ||
882- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
883- continue;
bc901d79
MT
884
885 fib_alias_accessed(fa);
58c5fc13
MT
886
887- if (fi == NULL) {
888- if (next_fi != res->fi)
889- break;
890- } else if (!fib_detect_death(fi, order, &last_resort,
891- &last_idx, tb->tb_default)) {
892+ if (!first_fa) {
893+ last_dflt = fa->fa_last_dflt;
894+ first_fa = fa;
895+ }
896+ if (fi && !fib_detect_death(fi, order, &last_resort,
897+ &last_idx, &last_dflt, &last_nhsel, flp)) {
898 fib_result_assign(res, fi);
899- tb->tb_default = order;
900+ first_fa->fa_last_dflt = order;
901 goto out;
902 }
903 fi = next_fi;
904 order++;
905 }
906 if (order <= 0 || fi == NULL) {
907- tb->tb_default = -1;
908+ if (fi && fi->fib_nhs > 1 &&
909+ fib_detect_death(fi, order, &last_resort, &last_idx,
910+ &last_dflt, &last_nhsel, flp) &&
911+ last_resort == fi) {
912+ read_lock_bh(&fib_nhflags_lock);
913+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
914+ read_unlock_bh(&fib_nhflags_lock);
915+ }
916+ if (first_fa) first_fa->fa_last_dflt = -1;
917 goto out;
918 }
919
920 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
921- tb->tb_default)) {
922+ &last_dflt, &last_nhsel, flp)) {
923 fib_result_assign(res, fi);
924- tb->tb_default = order;
925+ first_fa->fa_last_dflt = order;
926 goto out;
927 }
928- if (last_idx >= 0)
929+ if (last_idx >= 0) {
930 fib_result_assign(res, last_resort);
931- tb->tb_default = last_idx;
932+ read_lock_bh(&fib_nhflags_lock);
933+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
934+ read_unlock_bh(&fib_nhflags_lock);
935+ first_fa->fa_last_dflt = last_idx;
936+ }
937 out:
938 rcu_read_unlock();
939 }
16454cff
MT
940diff -urp v2.6.38/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
941--- v2.6.38/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2011-03-20 12:01:11.000000000 +0200
942+++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2011-03-20 12:16:02.963248753 +0200
58c5fc13
MT
943@@ -51,7 +51,7 @@ masquerade_tg(struct sk_buff *skb, const
944 enum ip_conntrack_info ctinfo;
945 struct nf_nat_range newrange;
946 const struct nf_nat_multi_range_compat *mr;
947- const struct rtable *rt;
948+ struct rtable *rt;
949 __be32 newsrc;
950
951 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
16454cff 952@@ -69,13 +69,27 @@ masquerade_tg(struct sk_buff *skb, const
58c5fc13
MT
953 return NF_ACCEPT;
954
955 mr = par->targinfo;
956- rt = skb_rtable(skb);
957- newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
958- if (!newsrc) {
57199397 959- pr_info("%s ate my IP address\n", par->out->name);
58c5fc13
MT
960- return NF_DROP;
961+
962+ {
16454cff
MT
963+ struct flowi fl = { .fl4_dst = ip_hdr(skb)->daddr,
964+ .fl4_tos = (RT_TOS(ip_hdr(skb)->tos) |
965+ RTO_CONN),
966+ .fl4_gw = skb_rtable(skb)->rt_gateway,
58c5fc13
MT
967+ .mark = skb->mark,
968+ .oif = par->out->ifindex };
969+ if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) {
970+ /* Funky routing can do this. */
971+ if (net_ratelimit())
57199397
MT
972+ pr_info("%s:"
973+ " No route: Rusty's brain broke!\n",
974+ par->out->name);
58c5fc13
MT
975+ return NF_DROP;
976+ }
977 }
978
979+ newsrc = rt->rt_src;
980+ ip_rt_put(rt);
981+
982 nat->masq_index = par->out->ifindex;
983
984 /* Transfer from original range. */
16454cff
MT
985diff -urp v2.6.38/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
986--- v2.6.38/linux/net/ipv4/netfilter/nf_nat_core.c 2011-03-20 12:01:11.000000000 +0200
987+++ linux/net/ipv4/netfilter/nf_nat_core.c 2011-03-20 12:13:20.145247903 +0200
bc901d79 988@@ -711,6 +711,52 @@ static struct pernet_operations nf_nat_n
58c5fc13
MT
989 .exit = nf_nat_net_exit,
990 };
991
992+unsigned int
993+ip_nat_route_input(unsigned int hooknum,
994+ struct sk_buff *skb,
995+ const struct net_device *in,
996+ const struct net_device *out,
997+ int (*okfn)(struct sk_buff *))
998+{
999+ struct iphdr *iph;
1000+ struct nf_conn *conn;
1001+ enum ip_conntrack_info ctinfo;
1002+ enum ip_conntrack_dir dir;
1003+ unsigned long statusbit;
1004+ __be32 saddr;
1005+
1006+ if (!(conn = nf_ct_get(skb, &ctinfo)))
1007+ return NF_ACCEPT;
1008+
1009+ if (!(conn->status & IPS_NAT_DONE_MASK))
1010+ return NF_ACCEPT;
1011+ dir = CTINFO2DIR(ctinfo);
1012+ statusbit = IPS_SRC_NAT;
1013+ if (dir == IP_CT_DIR_REPLY)
1014+ statusbit ^= IPS_NAT_MASK;
1015+ if (!(conn->status & statusbit))
1016+ return NF_ACCEPT;
1017+
1018+ if (skb_dst(skb))
1019+ return NF_ACCEPT;
1020+
1021+ if (skb->len < sizeof(struct iphdr))
1022+ return NF_ACCEPT;
1023+
1024+ /* use daddr in other direction as masquerade address (lsrc) */
1025+ iph = ip_hdr(skb);
1026+ saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
1027+ if (saddr == iph->saddr)
1028+ return NF_ACCEPT;
1029+
1030+ if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
1031+ skb->dev, saddr))
1032+ return NF_DROP;
1033+
1034+ return NF_ACCEPT;
1035+}
1036+EXPORT_SYMBOL_GPL(ip_nat_route_input);
1037+
1038 static int __init nf_nat_init(void)
1039 {
1040 size_t i;
16454cff
MT
1041diff -urp v2.6.38/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
1042--- v2.6.38/linux/net/ipv4/netfilter/nf_nat_standalone.c 2011-03-20 12:01:11.000000000 +0200
1043+++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2011-03-20 12:13:20.145247903 +0200
6892158b 1044@@ -249,6 +249,14 @@ static struct nf_hook_ops nf_nat_ops[] _
58c5fc13
MT
1045 .hooknum = NF_INET_PRE_ROUTING,
1046 .priority = NF_IP_PRI_NAT_DST,
1047 },
1048+ /* Before routing, route before mangling */
1049+ {
1050+ .hook = ip_nat_route_input,
1051+ .owner = THIS_MODULE,
ae4e228f 1052+ .pf = NFPROTO_IPV4,
58c5fc13
MT
1053+ .hooknum = NF_INET_PRE_ROUTING,
1054+ .priority = NF_IP_PRI_LAST-1,
1055+ },
1056 /* After packet filtering, change source */
1057 {
1058 .hook = nf_nat_out,
16454cff
MT
1059diff -urp v2.6.38/linux/net/ipv4/route.c linux/net/ipv4/route.c
1060--- v2.6.38/linux/net/ipv4/route.c 2011-03-20 12:05:41.000000000 +0200
1061+++ linux/net/ipv4/route.c 2011-03-20 12:19:36.366249014 +0200
1062@@ -697,6 +697,8 @@ static inline int compare_keys(struct fl
1063 return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
1064 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
58c5fc13 1065 (fl1->mark ^ fl2->mark) |
16454cff
MT
1066+ ((__force u32)fl1->fl4_lsrc ^ (__force u32)fl2->fl4_lsrc) |
1067+ ((__force u32)fl1->fl4_gw ^ (__force u32)fl2->fl4_gw) |
1068 (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
58c5fc13 1069 (fl1->oif ^ fl2->oif) |
57199397 1070 (fl1->iif ^ fl2->iif)) == 0;
16454cff 1071@@ -1435,6 +1437,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
58c5fc13
MT
1072
1073 /* Gateway is different ... */
1074 rt->rt_gateway = new_gw;
1075+ if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
1076
1077 /* Redirect received -> path was valid */
6892158b 1078 dst_confirm(&rth->dst);
16454cff 1079@@ -1890,6 +1893,7 @@ static int ip_route_input_mc(struct sk_b
58c5fc13
MT
1080 rth->fl.fl4_tos = tos;
1081 rth->fl.mark = skb->mark;
1082 rth->fl.fl4_src = saddr;
1083+ rth->fl.fl4_lsrc = 0;
1084 rth->rt_src = saddr;
1085 #ifdef CONFIG_NET_CLS_ROUTE
6892158b 1086 rth->dst.tclassid = itag;
16454cff
MT
1087@@ -1899,6 +1903,7 @@ static int ip_route_input_mc(struct sk_b
1088 rth->dst.dev = init_net.loopback_dev;
6892158b 1089 dev_hold(rth->dst.dev);
58c5fc13
MT
1090 rth->fl.oif = 0;
1091+ rth->fl.fl4_gw = 0;
1092 rth->rt_gateway = daddr;
1093 rth->rt_spec_dst= spec_dst;
1094 rth->rt_genid = rt_genid(dev_net(dev));
16454cff 1095@@ -1962,7 +1967,7 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1096 struct fib_result *res,
1097 struct in_device *in_dev,
1098 __be32 daddr, __be32 saddr, u32 tos,
1099- struct rtable **result)
1100+ __be32 lsrc, struct rtable **result)
1101 {
58c5fc13 1102 struct rtable *rth;
6892158b 1103 int err;
16454cff 1104@@ -1994,6 +1999,7 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1105 flags |= RTCF_DIRECTSRC;
1106
1107 if (out_dev == in_dev && err &&
1108+ !lsrc &&
1109 (IN_DEV_SHARED_MEDIA(out_dev) ||
1110 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1111 flags |= RTCF_DOREDIRECT;
16454cff 1112@@ -2032,12 +2038,14 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1113 rth->fl.mark = skb->mark;
1114 rth->fl.fl4_src = saddr;
1115 rth->rt_src = saddr;
1116+ rth->fl.fl4_lsrc = lsrc;
1117 rth->rt_gateway = daddr;
1118 rth->rt_iif =
1119 rth->fl.iif = in_dev->dev->ifindex;
16454cff 1120 rth->dst.dev = (out_dev)->dev;
6892158b 1121 dev_hold(rth->dst.dev);
58c5fc13
MT
1122 rth->fl.oif = 0;
1123+ rth->fl.fl4_gw = 0;
1124 rth->rt_spec_dst= spec_dst;
1125
6892158b 1126 rth->dst.obsolete = -1;
16454cff 1127@@ -2057,21 +2065,23 @@ static int __mkroute_input(struct sk_buf
58c5fc13
MT
1128
1129 static int ip_mkroute_input(struct sk_buff *skb,
1130 struct fib_result *res,
1131+ struct net *net,
1132 const struct flowi *fl,
1133 struct in_device *in_dev,
1134- __be32 daddr, __be32 saddr, u32 tos)
1135+ __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1136 {
1137 struct rtable* rth = NULL;
1138 int err;
1139 unsigned hash;
1140
1141+ fib_select_default(net, fl, res);
1142 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1143- if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1144+ if (res->fi && res->fi->fib_nhs > 1)
1145 fib_select_multipath(fl, res);
1146 #endif
1147
1148 /* create a routing cache entry */
1149- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1150+ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1151 if (err)
1152 return err;
1153
16454cff 1154@@ -2093,16 +2103,18 @@ static int ip_mkroute_input(struct sk_bu
58c5fc13
MT
1155 */
1156
1157 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1158- u8 tos, struct net_device *dev)
1159+ u8 tos, struct net_device *dev, __be32 lsrc)
1160 {
1161 struct fib_result res;
6892158b 1162 struct in_device *in_dev = __in_dev_get_rcu(dev);
16454cff
MT
1163 struct flowi fl = { .fl4_dst = daddr,
1164- .fl4_src = saddr,
1165+ .fl4_src = lsrc? : saddr,
1166 .fl4_tos = tos,
1167 .fl4_scope = RT_SCOPE_UNIVERSE,
58c5fc13
MT
1168 .mark = skb->mark,
1169- .iif = dev->ifindex };
1170+ .iif = lsrc?
6892158b
MT
1171+ dev_net(dev)->loopback_dev->ifindex :
1172+ dev->ifindex };
58c5fc13
MT
1173 unsigned flags = 0;
1174 u32 itag = 0;
1175 struct rtable * rth;
16454cff 1176@@ -2136,6 +2148,12 @@ static int ip_route_input_slow(struct sk
bc901d79 1177 if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
58c5fc13
MT
1178 goto martian_destination;
1179
1180+ if (lsrc) {
1181+ if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1182+ ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1183+ goto e_inval;
1184+ }
1185+
1186 /*
1187 * Now we are ready to route packet.
1188 */
16454cff 1189@@ -2145,6 +2163,8 @@ static int ip_route_input_slow(struct sk
bc901d79 1190 goto e_hostunreach;
58c5fc13
MT
1191 goto no_route;
1192 }
58c5fc13
MT
1193+ fl.iif = dev->ifindex;
1194+ fl.fl4_src = saddr;
1195
1196 RT_CACHE_STAT_INC(in_slow_tot);
1197
16454cff 1198@@ -2168,12 +2188,14 @@ static int ip_route_input_slow(struct sk
58c5fc13
MT
1199 if (res.type != RTN_UNICAST)
1200 goto martian_destination;
1201
1202- err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1203+ err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
bc901d79
MT
1204 out: return err;
1205
58c5fc13
MT
1206 brd_input:
1207 if (skb->protocol != htons(ETH_P_IP))
1208 goto e_inval;
1209+ if (lsrc)
1210+ goto e_inval;
1211
1212 if (ipv4_is_zeronet(saddr))
1213 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
16454cff
MT
1214@@ -2215,6 +2237,7 @@ local_input:
1215 rth->fl.iif = dev->ifindex;
6892158b
MT
1216 rth->dst.dev = net->loopback_dev;
1217 dev_hold(rth->dst.dev);
58c5fc13
MT
1218+ rth->fl.fl4_gw = 0;
1219 rth->rt_gateway = daddr;
1220 rth->rt_spec_dst= spec_dst;
6892158b 1221 rth->dst.input= ip_local_deliver;
16454cff 1222@@ -2267,8 +2290,9 @@ martian_source_keep_err:
bc901d79 1223 goto out;
58c5fc13
MT
1224 }
1225
57199397
MT
1226-int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1227- u8 tos, struct net_device *dev, bool noref)
1228+int ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1229+ u8 tos, struct net_device *dev, bool noref,
1230+ __be32 lsrc)
58c5fc13
MT
1231 {
1232 struct rtable * rth;
1233 unsigned hash;
16454cff 1234@@ -2291,6 +2315,7 @@ int ip_route_input_common(struct sk_buff
57199397
MT
1235 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
1236 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
58c5fc13
MT
1237 (rth->fl.iif ^ iif) |
1238+ (rth->fl.fl4_lsrc ^ lsrc) |
1239 rth->fl.oif |
1240 (rth->fl.fl4_tos ^ tos)) == 0 &&
1241 rth->fl.mark == skb->mark &&
16454cff 1242@@ -2344,12 +2369,25 @@ skip_cache:
58c5fc13
MT
1243 rcu_read_unlock();
1244 return -EINVAL;
1245 }
6892158b
MT
1246- res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
1247+ res = ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1248 rcu_read_unlock();
1249 return res;
1250 }
58c5fc13 1251+
57199397 1252+int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
bc901d79 1253+ u8 tos, struct net_device *dev, bool noref)
58c5fc13 1254+{
57199397 1255+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, noref, 0);
6892158b 1256+}
57199397
MT
1257 EXPORT_SYMBOL(ip_route_input_common);
1258
58c5fc13
MT
1259+int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1260+ u8 tos, struct net_device *dev, __be32 lsrc)
1261+{
57199397
MT
1262+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, true, lsrc);
1263+}
6892158b 1264+EXPORT_SYMBOL(ip_route_input_lookup);
57199397 1265+
bc901d79 1266 /* called with rcu_read_lock() */
58c5fc13 1267 static int __mkroute_output(struct rtable **result,
57199397 1268 struct fib_result *res,
16454cff 1269@@ -2411,6 +2449,7 @@ static int __mkroute_output(struct rtabl
58c5fc13
MT
1270 rth->fl.fl4_tos = tos;
1271 rth->fl.fl4_src = oldflp->fl4_src;
1272 rth->fl.oif = oldflp->oif;
1273+ rth->fl.fl4_gw = oldflp->fl4_gw;
1274 rth->fl.mark = oldflp->mark;
1275 rth->rt_dst = fl->fl4_dst;
1276 rth->rt_src = fl->fl4_src;
16454cff
MT
1277@@ -2488,6 +2527,7 @@ static int ip_route_output_slow(struct n
1278 u32 tos = RT_FL_TOS(oldflp);
1279 struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
1280 .fl4_src = oldflp->fl4_src,
1281+ .fl4_gw = oldflp->fl4_gw,
1282 .fl4_tos = tos & IPTOS_RT_MASK,
1283 .fl4_scope = ((tos & RTO_ONLINK) ?
1284 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
1285@@ -2589,6 +2629,7 @@ static int ip_route_output_slow(struct n
bc901d79 1286 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
58c5fc13 1287 dev_out = net->loopback_dev;
58c5fc13
MT
1288 fl.oif = net->loopback_dev->ifindex;
1289+ fl.fl4_gw = 0;
1290 res.type = RTN_LOCAL;
1291 flags |= RTCF_LOCAL;
1292 goto make_route;
16454cff 1293@@ -2596,7 +2637,7 @@ static int ip_route_output_slow(struct n
58c5fc13
MT
1294
1295 if (fib_lookup(net, &fl, &res)) {
1296 res.fi = NULL;
1297- if (oldflp->oif) {
1298+ if (oldflp->oif && dev_out->flags & IFF_UP) {
1299 /* Apparently, routing tables are wrong. Assume,
1300 that the destination is on link.
1301
16454cff 1302@@ -2634,18 +2675,18 @@ static int ip_route_output_slow(struct n
bc901d79 1303 }
58c5fc13 1304 dev_out = net->loopback_dev;
58c5fc13
MT
1305 fl.oif = dev_out->ifindex;
1306+ fl.fl4_gw = 0;
58c5fc13 1307 res.fi = NULL;
bc901d79 1308 flags |= RTCF_LOCAL;
58c5fc13
MT
1309 goto make_route;
1310 }
1311
1312+ if (res.type == RTN_UNICAST)
1313+ fib_select_default(net, &fl, &res);
1314 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1315- if (res.fi->fib_nhs > 1 && fl.oif == 0)
1316+ if (res.fi->fib_nhs > 1)
1317 fib_select_multipath(&fl, &res);
1318- else
1319 #endif
1320- if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1321- fib_select_default(net, &fl, &res);
1322
1323 if (!fl.fl4_src)
1324 fl.fl4_src = FIB_RES_PREFSRC(res);
16454cff 1325@@ -2679,6 +2720,7 @@ int __ip_route_output_key(struct net *ne
58c5fc13 1326 rth->fl.fl4_src == flp->fl4_src &&
16454cff 1327 rt_is_output_route(rth) &&
58c5fc13
MT
1328 rth->fl.oif == flp->oif &&
1329+ rth->fl.fl4_gw == flp->fl4_gw &&
1330 rth->fl.mark == flp->mark &&
1331 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1332 (IPTOS_RT_MASK | RTO_ONLINK)) &&