From 2fff11756f9c81b3e8d861107e1b074695ff6dff Mon Sep 17 00:00:00 2001 From: Arne Fitzenreiter Date: Fri, 14 Feb 2014 22:24:05 +0100 Subject: [PATCH] kernel: fix imq patch for 3.10.30. kernel: fix imq patch for 3.10.30 and remove old patches. --- lfs/linux | 2 +- .../grsecurity-haswell-32bit-fix.patch | 53 - src/patches/linux-3.10.25-imq.patch | 6800 ----------------- src/patches/linux-3.10.30-imq.patch | 3304 ++++++++ 4 files changed, 3305 insertions(+), 6854 deletions(-) delete mode 100644 src/patches/grsecurity-haswell-32bit-fix.patch delete mode 100644 src/patches/linux-3.10.25-imq.patch create mode 100644 src/patches/linux-3.10.30-imq.patch diff --git a/lfs/linux b/lfs/linux index 9476320d7..6aaa60e78 100644 --- a/lfs/linux +++ b/lfs/linux @@ -112,7 +112,7 @@ $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects)) ln -svf linux-$(VER) $(DIR_SRC)/linux # Linux Intermediate Queueing Device - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-3.10.25-imq.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-3.10.30-imq.patch # ipp2p 0.8.2-ipfire cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-3.10-ipp2p-0.8.2-ipfire.patch diff --git a/src/patches/grsecurity-haswell-32bit-fix.patch b/src/patches/grsecurity-haswell-32bit-fix.patch deleted file mode 100644 index abff2b02d..000000000 --- a/src/patches/grsecurity-haswell-32bit-fix.patch +++ /dev/null @@ -1,53 +0,0 @@ -diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h -index 7430027..2124e35 100644 ---- a/arch/x86/include/asm/mmu_context.h -+++ b/arch/x86/include/asm/mmu_context.h -@@ -80,7 +80,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - #if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) - if (static_cpu_has(X86_FEATURE_PCID)) { - if (static_cpu_has(X86_FEATURE_INVPCID)) { -- unsigned long descriptor[2]; -+ u64 descriptor[2]; - descriptor[0] = PCID_USER; - asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_CONTEXT) : "memory"); - } else { -@@ -144,7 +144,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - #if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) - if (static_cpu_has(X86_FEATURE_PCID)) { - if (static_cpu_has(X86_FEATURE_INVPCID)) { -- unsigned long descriptor[2]; -+ u64 descriptor[2]; - descriptor[0] = PCID_USER; - asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_CONTEXT) : "memory"); - } else { -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 45844c0..ada2172 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -18,7 +18,7 @@ - static inline void __native_flush_tlb(void) - { - if (static_cpu_has(X86_FEATURE_INVPCID)) { -- unsigned long descriptor[2]; -+ u64 descriptor[2]; - - descriptor[0] = PCID_KERNEL; - asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_ALL_MONGLOBAL) : "memory"); -@@ -42,7 +42,7 @@ static inline void __native_flush_tlb(void) - static inline void __native_flush_tlb_global_irq_disabled(void) - { - if (static_cpu_has(X86_FEATURE_INVPCID)) { -- unsigned long descriptor[2]; -+ u64 descriptor[2]; - - descriptor[0] = PCID_KERNEL; - asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_ALL_GLOBAL) : "memory"); -@@ -77,7 +77,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) - { - - if (static_cpu_has(X86_FEATURE_INVPCID)) { -- unsigned long descriptor[2]; -+ u64 descriptor[2]; - - descriptor[0] = PCID_KERNEL; - descriptor[1] = addr; diff --git a/src/patches/linux-3.10.25-imq.patch b/src/patches/linux-3.10.25-imq.patch deleted file mode 100644 index cb4a2d418..000000000 --- a/src/patches/linux-3.10.25-imq.patch +++ /dev/null @@ -1,6800 +0,0 @@ -diff -ruN linux-3.10.27/drivers/net/imq.c linux-3.10.27-imq/drivers/net/imq.c ---- linux-3.10.27/drivers/net/imq.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/drivers/net/imq.c 2014-01-18 10:19:59.342342913 +0100 -@@ -0,0 +1,1001 @@ -+/* -+ * Pseudo-driver for the intermediate queue device. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * Authors: Patrick McHardy, -+ * -+ * The first version was written by Martin Devera, -+ * -+ * Credits: Jan Rafaj -+ * - Update patch to 2.4.21 -+ * Sebastian Strollo -+ * - Fix "Dead-loop on netdevice imq"-issue -+ * Marcel Sebek -+ * - Update to 2.6.2-rc1 -+ * -+ * After some time of inactivity there is a group taking care -+ * of IMQ again: http://www.linuximq.net -+ * -+ * -+ * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7 -+ * including the following changes: -+ * -+ * - Correction of ipv6 support "+"s issue (Hasso Tepper) -+ * - Correction of imq_init_devs() issue that resulted in -+ * kernel OOPS unloading IMQ as module (Norbert Buchmuller) -+ * - Addition of functionality to choose number of IMQ devices -+ * during kernel config (Andre Correa) -+ * - Addition of functionality to choose how IMQ hooks on -+ * PRE and POSTROUTING (after or before NAT) (Andre Correa) -+ * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa) -+ * -+ * -+ * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were -+ * released with almost no problems. 2.6.14-x was released -+ * with some important changes: nfcache was removed; After -+ * some weeks of trouble we figured out that some IMQ fields -+ * in skb were missing in skbuff.c - skb_clone and copy_skb_header. -+ * These functions are correctly patched by this new patch version. -+ * -+ * Thanks for all who helped to figure out all the problems with -+ * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX, -+ * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully -+ * I didn't forget anybody). I apologize again for my lack of time. -+ * -+ * -+ * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead -+ * of qdisc_restart() and moved qdisc_run() to tasklet to avoid -+ * recursive locking. New initialization routines to fix 'rmmod' not -+ * working anymore. Used code from ifb.c. (Jussi Kivilinna) -+ * -+ * 2008/08/06 - 2.6.26 - (JK) -+ * - Replaced tasklet with 'netif_schedule()'. -+ * - Cleaned up and added comments for imq_nf_queue(). -+ * -+ * 2009/04/12 -+ * - Add skb_save_cb/skb_restore_cb helper functions for backuping -+ * control buffer. This is needed because qdisc-layer on kernels -+ * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna) -+ * - Add better locking for IMQ device. Hopefully this will solve -+ * SMP issues. (Jussi Kivilinna) -+ * - Port to 2.6.27 -+ * - Port to 2.6.28 -+ * - Port to 2.6.29 + fix rmmod not working -+ * -+ * 2009/04/20 - (Jussi Kivilinna) -+ * - Use netdevice feature flags to avoid extra packet handling -+ * by core networking layer and possibly increase performance. -+ * -+ * 2009/09/26 - (Jussi Kivilinna) -+ * - Add imq_nf_reinject_lockless to fix deadlock with -+ * imq_nf_queue/imq_nf_reinject. -+ * -+ * 2009/12/08 - (Jussi Kivilinna) -+ * - Port to 2.6.32 -+ * - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit() -+ * - Also add better error checking for skb->nf_queue_entry usage -+ * -+ * 2010/02/25 - (Jussi Kivilinna) -+ * - Port to 2.6.33 -+ * -+ * 2010/08/15 - (Jussi Kivilinna) -+ * - Port to 2.6.35 -+ * - Simplify hook registration by using nf_register_hooks. -+ * - nf_reinject doesn't need spinlock around it, therefore remove -+ * imq_nf_reinject function. Other nf_reinject users protect -+ * their own data with spinlock. With IMQ however all data is -+ * needed is stored per skbuff, so no locking is needed. -+ * - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of -+ * NF_QUEUE, this allows working coexistance of IMQ and other -+ * NF_QUEUE users. -+ * - Make IMQ multi-queue. Number of IMQ device queues can be -+ * increased with 'numqueues' module parameters. Default number -+ * of queues is 1, in other words by default IMQ works as -+ * single-queue device. Multi-queue selection is based on -+ * IFB multi-queue patch by Changli Gao . -+ * -+ * 2011/03/18 - (Jussi Kivilinna) -+ * - Port to 2.6.38 -+ * -+ * 2011/07/12 - (syoder89@gmail.com) -+ * - Crash fix that happens when the receiving interface has more -+ * than one queue (add missing skb_set_queue_mapping in -+ * imq_select_queue). -+ * -+ * 2011/07/26 - (Jussi Kivilinna) -+ * - Add queue mapping checks for packets exiting IMQ. -+ * - Port to 3.0 -+ * -+ * 2011/08/16 - (Jussi Kivilinna) -+ * - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2 -+ * -+ * 2011/11/03 - Germano Michel -+ * - Fix IMQ for net namespaces -+ * -+ * 2011/11/04 - Jussi Kivilinna -+ * - Port to 3.1 -+ * - Clean-up, move 'get imq device pointer by imqX name' to -+ * separate function from imq_nf_queue(). -+ * -+ * 2012/01/05 - Jussi Kivilinna -+ * - Port to 3.2 -+ * -+ * 2012/03/19 - Jussi Kivilinna -+ * - Port to 3.3 -+ * -+ * 2012/12/12 - Jussi Kivilinna -+ * - Port to 3.7 -+ * - Fix checkpatch.pl warnings -+ * -+ * 2013/09/10 - Jussi Kivilinna -+ * - Fixed GSO handling for 3.10, see imq_nf_queue() for comments. -+ * - Don't copy skb->cb_next when copying or cloning skbuffs. -+ * -+ * Also, many thanks to pablo Sebastian Greco for making the initial -+ * patch and to those who helped the testing. -+ * -+ * More info at: http://www.linuximq.net/ (Andre Correa) -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -+ #include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num); -+ -+static nf_hookfn imq_nf_hook; -+ -+static struct nf_hook_ops imq_ops[] = { -+ { -+ /* imq_ingress_ipv4 */ -+ .hook = imq_nf_hook, -+ .owner = THIS_MODULE, -+ .pf = PF_INET, -+ .hooknum = NF_INET_PRE_ROUTING, -+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) -+ .priority = NF_IP_PRI_MANGLE + 1, -+#else -+ .priority = NF_IP_PRI_NAT_DST + 1, -+#endif -+ }, -+ { -+ /* imq_egress_ipv4 */ -+ .hook = imq_nf_hook, -+ .owner = THIS_MODULE, -+ .pf = PF_INET, -+ .hooknum = NF_INET_POST_ROUTING, -+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) -+ .priority = NF_IP_PRI_LAST, -+#else -+ .priority = NF_IP_PRI_NAT_SRC - 1, -+#endif -+ }, -+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -+ { -+ /* imq_ingress_ipv6 */ -+ .hook = imq_nf_hook, -+ .owner = THIS_MODULE, -+ .pf = PF_INET6, -+ .hooknum = NF_INET_PRE_ROUTING, -+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) -+ .priority = NF_IP6_PRI_MANGLE + 1, -+#else -+ .priority = NF_IP6_PRI_NAT_DST + 1, -+#endif -+ }, -+ { -+ /* imq_egress_ipv6 */ -+ .hook = imq_nf_hook, -+ .owner = THIS_MODULE, -+ .pf = PF_INET6, -+ .hooknum = NF_INET_POST_ROUTING, -+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) -+ .priority = NF_IP6_PRI_LAST, -+#else -+ .priority = NF_IP6_PRI_NAT_SRC - 1, -+#endif -+ }, -+#endif -+}; -+ -+#if defined(CONFIG_IMQ_NUM_DEVS) -+static int numdevs = CONFIG_IMQ_NUM_DEVS; -+#else -+static int numdevs = IMQ_MAX_DEVS; -+#endif -+ -+static struct net_device *imq_devs_cache[IMQ_MAX_DEVS]; -+ -+#define IMQ_MAX_QUEUES 32 -+static int numqueues = 1; -+static u32 imq_hashrnd; -+ -+static inline __be16 pppoe_proto(const struct sk_buff *skb) -+{ -+ return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + -+ sizeof(struct pppoe_hdr))); -+} -+ -+static u16 imq_hash(struct net_device *dev, struct sk_buff *skb) -+{ -+ unsigned int pull_len; -+ u16 protocol = skb->protocol; -+ u32 addr1, addr2; -+ u32 hash, ihl = 0; -+ union { -+ u16 in16[2]; -+ u32 in32; -+ } ports; -+ u8 ip_proto; -+ -+ pull_len = 0; -+ -+recheck: -+ switch (protocol) { -+ case htons(ETH_P_8021Q): { -+ if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL)) -+ goto other; -+ -+ pull_len += VLAN_HLEN; -+ skb->network_header += VLAN_HLEN; -+ -+ protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; -+ goto recheck; -+ } -+ -+ case htons(ETH_P_PPP_SES): { -+ if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL)) -+ goto other; -+ -+ pull_len += PPPOE_SES_HLEN; -+ skb->network_header += PPPOE_SES_HLEN; -+ -+ protocol = pppoe_proto(skb); -+ goto recheck; -+ } -+ -+ case htons(ETH_P_IP): { -+ const struct iphdr *iph = ip_hdr(skb); -+ -+ if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) -+ goto other; -+ -+ addr1 = iph->daddr; -+ addr2 = iph->saddr; -+ -+ ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ? -+ iph->protocol : 0; -+ ihl = ip_hdrlen(skb); -+ -+ break; -+ } -+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -+ case htons(ETH_P_IPV6): { -+ const struct ipv6hdr *iph = ipv6_hdr(skb); -+ __be16 fo = 0; -+ -+ if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr)))) -+ goto other; -+ -+ addr1 = iph->daddr.s6_addr32[3]; -+ addr2 = iph->saddr.s6_addr32[3]; -+ ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto, -+ &fo); -+ if (unlikely(ihl < 0)) -+ goto other; -+ -+ break; -+ } -+#endif -+ default: -+other: -+ if (pull_len != 0) { -+ skb_push(skb, pull_len); -+ skb->network_header -= pull_len; -+ } -+ -+ return (u16)(ntohs(protocol) % dev->real_num_tx_queues); -+ } -+ -+ if (addr1 > addr2) -+ swap(addr1, addr2); -+ -+ switch (ip_proto) { -+ case IPPROTO_TCP: -+ case IPPROTO_UDP: -+ case IPPROTO_DCCP: -+ case IPPROTO_ESP: -+ case IPPROTO_AH: -+ case IPPROTO_SCTP: -+ case IPPROTO_UDPLITE: { -+ if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) { -+ if (ports.in16[0] > ports.in16[1]) -+ swap(ports.in16[0], ports.in16[1]); -+ break; -+ } -+ /* fall-through */ -+ } -+ default: -+ ports.in32 = 0; -+ break; -+ } -+ -+ if (pull_len != 0) { -+ skb_push(skb, pull_len); -+ skb->network_header -= pull_len; -+ } -+ -+ hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto); -+ -+ return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); -+} -+ -+static inline bool sk_tx_queue_recorded(struct sock *sk) -+{ -+ return (sk_tx_queue_get(sk) >= 0); -+} -+ -+static struct netdev_queue *imq_select_queue(struct net_device *dev, -+ struct sk_buff *skb) -+{ -+ u16 queue_index = 0; -+ u32 hash; -+ -+ if (likely(dev->real_num_tx_queues == 1)) -+ goto out; -+ -+ /* IMQ can be receiving ingress or engress packets. */ -+ -+ /* Check first for if rx_queue is set */ -+ if (skb_rx_queue_recorded(skb)) { -+ queue_index = skb_get_rx_queue(skb); -+ goto out; -+ } -+ -+ /* Check if socket has tx_queue set */ -+ if (sk_tx_queue_recorded(skb->sk)) { -+ queue_index = sk_tx_queue_get(skb->sk); -+ goto out; -+ } -+ -+ /* Try use socket hash */ -+ if (skb->sk && skb->sk->sk_hash) { -+ hash = skb->sk->sk_hash; -+ queue_index = -+ (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); -+ goto out; -+ } -+ -+ /* Generate hash from packet data */ -+ queue_index = imq_hash(dev, skb); -+ -+out: -+ if (unlikely(queue_index >= dev->real_num_tx_queues)) -+ queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues); -+ -+ skb_set_queue_mapping(skb, queue_index); -+ return netdev_get_tx_queue(dev, queue_index); -+} -+ -+static struct net_device_stats *imq_get_stats(struct net_device *dev) -+{ -+ return &dev->stats; -+} -+ -+/* called for packets kfree'd in qdiscs at places other than enqueue */ -+static void imq_skb_destructor(struct sk_buff *skb) -+{ -+ struct nf_queue_entry *entry = skb->nf_queue_entry; -+ -+ skb->nf_queue_entry = NULL; -+ -+ if (entry) { -+ nf_queue_entry_release_refs(entry); -+ kfree(entry); -+ } -+ -+ skb_restore_cb(skb); /* kfree backup */ -+} -+ -+static void imq_done_check_queue_mapping(struct sk_buff *skb, -+ struct net_device *dev) -+{ -+ unsigned int queue_index; -+ -+ /* Don't let queue_mapping be left too large after exiting IMQ */ -+ if (likely(skb->dev != dev && skb->dev != NULL)) { -+ queue_index = skb_get_queue_mapping(skb); -+ if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) { -+ queue_index = (u16)((u32)queue_index % -+ skb->dev->real_num_tx_queues); -+ skb_set_queue_mapping(skb, queue_index); -+ } -+ } else { -+ /* skb->dev was IMQ device itself or NULL, be on safe side and -+ * just clear queue mapping. -+ */ -+ skb_set_queue_mapping(skb, 0); -+ } -+} -+ -+static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct nf_queue_entry *entry = skb->nf_queue_entry; -+ -+ skb->nf_queue_entry = NULL; -+ dev->trans_start = jiffies; -+ -+ dev->stats.tx_bytes += skb->len; -+ dev->stats.tx_packets++; -+ -+ if (unlikely(entry == NULL)) { -+ /* We don't know what is going on here.. packet is queued for -+ * imq device, but (probably) not by us. -+ * -+ * If this packet was not send here by imq_nf_queue(), then -+ * skb_save_cb() was not used and skb_free() should not show: -+ * WARNING: IMQ: kfree_skb: skb->cb_next:.. -+ * and/or -+ * WARNING: IMQ: kfree_skb: skb->nf_queue_entry... -+ * -+ * However if this message is shown, then IMQ is somehow broken -+ * and you should report this to linuximq.net. -+ */ -+ -+ /* imq_dev_xmit is black hole that eats all packets, report that -+ * we eat this packet happily and increase dropped counters. -+ */ -+ -+ dev->stats.tx_dropped++; -+ dev_kfree_skb(skb); -+ -+ return NETDEV_TX_OK; -+ } -+ -+ skb_restore_cb(skb); /* restore skb->cb */ -+ -+ skb->imq_flags = 0; -+ skb->destructor = NULL; -+ -+ imq_done_check_queue_mapping(skb, dev); -+ -+ nf_reinject(entry, NF_ACCEPT); -+ -+ return NETDEV_TX_OK; -+} -+ -+static struct net_device *get_imq_device_by_index(int index) -+{ -+ struct net_device *dev = NULL; -+ struct net *net; -+ char buf[8]; -+ -+ /* get device by name and cache result */ -+ snprintf(buf, sizeof(buf), "imq%d", index); -+ -+ /* Search device from all namespaces. */ -+ for_each_net(net) { -+ dev = dev_get_by_name(net, buf); -+ if (dev) -+ break; -+ } -+ -+ if (WARN_ON_ONCE(dev == NULL)) { -+ /* IMQ device not found. Exotic config? */ -+ return ERR_PTR(-ENODEV); -+ } -+ -+ imq_devs_cache[index] = dev; -+ dev_put(dev); -+ -+ return dev; -+} -+ -+static struct nf_queue_entry *nf_queue_entry_dup(struct nf_queue_entry *e) -+{ -+ struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); -+ if (entry) { -+ if (nf_queue_entry_get_refs(entry)) -+ return entry; -+ kfree(entry); -+ } -+ return NULL; -+} -+ -+#ifdef CONFIG_BRIDGE_NETFILTER -+/* When called from bridge netfilter, skb->data must point to MAC header -+ * before calling skb_gso_segment(). Else, original MAC header is lost -+ * and segmented skbs will be sent to wrong destination. -+ */ -+static void nf_bridge_adjust_skb_data(struct sk_buff *skb) -+{ -+ if (skb->nf_bridge) -+ __skb_push(skb, skb->network_header - skb->mac_header); -+} -+ -+static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) -+{ -+ if (skb->nf_bridge) -+ __skb_pull(skb, skb->network_header - skb->mac_header); -+} -+#else -+#define nf_bridge_adjust_skb_data(s) do {} while (0) -+#define nf_bridge_adjust_segmented_data(s) do {} while (0) -+#endif -+ -+static void free_entry(struct nf_queue_entry *entry) -+{ -+ nf_queue_entry_release_refs(entry); -+ kfree(entry); -+} -+ -+static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev); -+ -+static int __imq_nf_queue_gso(struct nf_queue_entry *entry, -+ struct net_device *dev, struct sk_buff *skb) -+{ -+ int ret = -ENOMEM; -+ struct nf_queue_entry *entry_seg; -+ -+ nf_bridge_adjust_segmented_data(skb); -+ -+ if (skb->next == NULL) { /* last packet, no need to copy entry */ -+ struct sk_buff *gso_skb = entry->skb; -+ entry->skb = skb; -+ ret = __imq_nf_queue(entry, dev); -+ if (ret) -+ entry->skb = gso_skb; -+ return ret; -+ } -+ -+ skb->next = NULL; -+ -+ entry_seg = nf_queue_entry_dup(entry); -+ if (entry_seg) { -+ entry_seg->skb = skb; -+ ret = __imq_nf_queue(entry_seg, dev); -+ if (ret) -+ free_entry(entry_seg); -+ } -+ return ret; -+} -+ -+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num) -+{ -+ struct sk_buff *skb, *segs; -+ struct net_device *dev; -+ unsigned int queued; -+ int index, retval, err; -+ -+ index = entry->skb->imq_flags & IMQ_F_IFMASK; -+ if (unlikely(index > numdevs - 1)) { -+ if (net_ratelimit()) -+ pr_warn("IMQ: invalid device specified, highest is %u\n", -+ numdevs - 1); -+ retval = -EINVAL; -+ goto out_no_dev; -+ } -+ -+ /* check for imq device by index from cache */ -+ dev = imq_devs_cache[index]; -+ if (unlikely(!dev)) { -+ dev = get_imq_device_by_index(index); -+ if (IS_ERR(dev)) { -+ retval = PTR_ERR(dev); -+ goto out_no_dev; -+ } -+ } -+ -+ if (unlikely(!(dev->flags & IFF_UP))) { -+ entry->skb->imq_flags = 0; -+ retval = -ECANCELED; -+ goto out_no_dev; -+ } -+ -+ if (!skb_is_gso(entry->skb)) -+ return __imq_nf_queue(entry, dev); -+ -+ /* Since 3.10.x, GSO handling moved here as result of upstream commit -+ * a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (netfilter: move -+ * skb_gso_segment into nfnetlink_queue module). -+ * -+ * Following code replicates the gso handling from -+ * 'net/netfilter/nfnetlink_queue_core.c':nfqnl_enqueue_packet(). -+ */ -+ -+ skb = entry->skb; -+ -+ switch (entry->pf) { -+ case NFPROTO_IPV4: -+ skb->protocol = htons(ETH_P_IP); -+ break; -+ case NFPROTO_IPV6: -+ skb->protocol = htons(ETH_P_IPV6); -+ break; -+ } -+ -+ nf_bridge_adjust_skb_data(skb); -+ segs = skb_gso_segment(skb, 0); -+ /* Does not use PTR_ERR to limit the number of error codes that can be -+ * returned by nf_queue. For instance, callers rely on -ECANCELED to -+ * mean 'ignore this hook'. -+ */ -+ err = -ENOBUFS; -+ if (IS_ERR(segs)) -+ goto out_err; -+ queued = 0; -+ err = 0; -+ do { -+ struct sk_buff *nskb = segs->next; -+ if (nskb && nskb->next) -+ nskb->cb_next = NULL; -+ if (err == 0) -+ err = __imq_nf_queue_gso(entry, dev, segs); -+ if (err == 0) -+ queued++; -+ else -+ kfree_skb(segs); -+ segs = nskb; -+ } while (segs); -+ -+ if (queued) { -+ if (err) /* some segments are already queued */ -+ free_entry(entry); -+ kfree_skb(skb); -+ return 0; -+ } -+ -+out_err: -+ nf_bridge_adjust_segmented_data(skb); -+ retval = err; -+out_no_dev: -+ return retval; -+} -+ -+static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev) -+{ -+ struct sk_buff *skb_orig, *skb, *skb_shared; -+ struct Qdisc *q; -+ struct netdev_queue *txq; -+ spinlock_t *root_lock; -+ int users; -+ int retval = -EINVAL; -+ unsigned int orig_queue_index; -+ -+ dev->last_rx = jiffies; -+ -+ skb = entry->skb; -+ skb_orig = NULL; -+ -+ /* skb has owner? => make clone */ -+ if (unlikely(skb->destructor)) { -+ skb_orig = skb; -+ skb = skb_clone(skb, GFP_ATOMIC); -+ if (unlikely(!skb)) { -+ retval = -ENOMEM; -+ goto out; -+ } -+ skb->cb_next = NULL; -+ entry->skb = skb; -+ } -+ -+ skb->nf_queue_entry = entry; -+ -+ dev->stats.rx_bytes += skb->len; -+ dev->stats.rx_packets++; -+ -+ if (!skb->dev) { -+ /* skb->dev == NULL causes problems, try the find cause. */ -+ if (net_ratelimit()) { -+ dev_warn(&dev->dev, -+ "received packet with skb->dev == NULL\n"); -+ dump_stack(); -+ } -+ -+ skb->dev = dev; -+ } -+ -+ /* Disables softirqs for lock below */ -+ rcu_read_lock_bh(); -+ -+ /* Multi-queue selection */ -+ orig_queue_index = skb_get_queue_mapping(skb); -+ txq = imq_select_queue(dev, skb); -+ -+ q = rcu_dereference(txq->qdisc); -+ if (unlikely(!q->enqueue)) -+ goto packet_not_eaten_by_imq_dev; -+ -+ root_lock = qdisc_lock(q); -+ spin_lock(root_lock); -+ -+ users = atomic_read(&skb->users); -+ -+ skb_shared = skb_get(skb); /* increase reference count by one */ -+ -+ /* backup skb->cb, as qdisc layer will overwrite it */ -+ skb_save_cb(skb_shared); -+ qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */ -+ -+ if (likely(atomic_read(&skb_shared->users) == users + 1)) { -+ kfree_skb(skb_shared); /* decrease reference count by one */ -+ -+ skb->destructor = &imq_skb_destructor; -+ -+ /* cloned? */ -+ if (unlikely(skb_orig)) -+ kfree_skb(skb_orig); /* free original */ -+ -+ spin_unlock(root_lock); -+ rcu_read_unlock_bh(); -+ -+ /* schedule qdisc dequeue */ -+ __netif_schedule(q); -+ -+ retval = 0; -+ goto out; -+ } else { -+ skb_restore_cb(skb_shared); /* restore skb->cb */ -+ skb->nf_queue_entry = NULL; -+ /* -+ * qdisc dropped packet and decreased skb reference count of -+ * skb, so we don't really want to and try refree as that would -+ * actually destroy the skb. -+ */ -+ spin_unlock(root_lock); -+ goto packet_not_eaten_by_imq_dev; -+ } -+ -+packet_not_eaten_by_imq_dev: -+ skb_set_queue_mapping(skb, orig_queue_index); -+ rcu_read_unlock_bh(); -+ -+ /* cloned? restore original */ -+ if (unlikely(skb_orig)) { -+ kfree_skb(skb); -+ entry->skb = skb_orig; -+ } -+ retval = -1; -+out: -+ return retval; -+} -+ -+static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb, -+ const struct net_device *indev, -+ const struct net_device *outdev, -+ int (*okfn)(struct sk_buff *)) -+{ -+ return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT; -+} -+ -+static int imq_close(struct net_device *dev) -+{ -+ netif_stop_queue(dev); -+ return 0; -+} -+ -+static int imq_open(struct net_device *dev) -+{ -+ netif_start_queue(dev); -+ return 0; -+} -+ -+static const struct net_device_ops imq_netdev_ops = { -+ .ndo_open = imq_open, -+ .ndo_stop = imq_close, -+ .ndo_start_xmit = imq_dev_xmit, -+ .ndo_get_stats = imq_get_stats, -+}; -+ -+static void imq_setup(struct net_device *dev) -+{ -+ dev->netdev_ops = &imq_netdev_ops; -+ dev->type = ARPHRD_VOID; -+ dev->mtu = 16000; /* too small? */ -+ dev->tx_queue_len = 11000; /* too big? */ -+ dev->flags = IFF_NOARP; -+ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | -+ NETIF_F_GSO | NETIF_F_HW_CSUM | -+ NETIF_F_HIGHDMA; -+ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | -+ IFF_TX_SKB_SHARING); -+} -+ -+static int imq_validate(struct nlattr *tb[], struct nlattr *data[]) -+{ -+ int ret = 0; -+ -+ if (tb[IFLA_ADDRESS]) { -+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { -+ ret = -EINVAL; -+ goto end; -+ } -+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { -+ ret = -EADDRNOTAVAIL; -+ goto end; -+ } -+ } -+ return 0; -+end: -+ pr_warn("IMQ: imq_validate failed (%d)\n", ret); -+ return ret; -+} -+ -+static struct rtnl_link_ops imq_link_ops __read_mostly = { -+ .kind = "imq", -+ .priv_size = 0, -+ .setup = imq_setup, -+ .validate = imq_validate, -+}; -+ -+static const struct nf_queue_handler imq_nfqh = { -+ .outfn = imq_nf_queue, -+}; -+ -+static int __init imq_init_hooks(void) -+{ -+ int ret; -+ -+ nf_register_queue_imq_handler(&imq_nfqh); -+ -+ ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops)); -+ if (ret < 0) -+ nf_unregister_queue_imq_handler(); -+ -+ return ret; -+} -+ -+static int __init imq_init_one(int index) -+{ -+ struct net_device *dev; -+ int ret; -+ -+ dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues); -+ if (!dev) -+ return -ENOMEM; -+ -+ ret = dev_alloc_name(dev, dev->name); -+ if (ret < 0) -+ goto fail; -+ -+ dev->rtnl_link_ops = &imq_link_ops; -+ ret = register_netdevice(dev); -+ if (ret < 0) -+ goto fail; -+ -+ return 0; -+fail: -+ free_netdev(dev); -+ return ret; -+} -+ -+static int __init imq_init_devs(void) -+{ -+ int err, i; -+ -+ if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) { -+ pr_err("IMQ: numdevs has to be betweed 1 and %u\n", -+ IMQ_MAX_DEVS); -+ return -EINVAL; -+ } -+ -+ if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) { -+ pr_err("IMQ: numqueues has to be betweed 1 and %u\n", -+ IMQ_MAX_QUEUES); -+ return -EINVAL; -+ } -+ -+ get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd)); -+ -+ rtnl_lock(); -+ err = __rtnl_link_register(&imq_link_ops); -+ -+ for (i = 0; i < numdevs && !err; i++) -+ err = imq_init_one(i); -+ -+ if (err) { -+ __rtnl_link_unregister(&imq_link_ops); -+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); -+ } -+ rtnl_unlock(); -+ -+ return err; -+} -+ -+static int __init imq_init_module(void) -+{ -+ int err; -+ -+#if defined(CONFIG_IMQ_NUM_DEVS) -+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16); -+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2); -+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK); -+#endif -+ -+ err = imq_init_devs(); -+ if (err) { -+ pr_err("IMQ: Error trying imq_init_devs(net)\n"); -+ return err; -+ } -+ -+ err = imq_init_hooks(); -+ if (err) { -+ pr_err(KERN_ERR "IMQ: Error trying imq_init_hooks()\n"); -+ rtnl_link_unregister(&imq_link_ops); -+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); -+ return err; -+ } -+ -+ pr_info("IMQ driver loaded successfully. (numdevs = %d, numqueues = %d)\n", -+ numdevs, numqueues); -+ -+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) -+ pr_info("\tHooking IMQ before NAT on PREROUTING.\n"); -+#else -+ pr_info("\tHooking IMQ after NAT on PREROUTING.\n"); -+#endif -+#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB) -+ pr_info("\tHooking IMQ before NAT on POSTROUTING.\n"); -+#else -+ pr_info("\tHooking IMQ after NAT on POSTROUTING.\n"); -+#endif -+ -+ return 0; -+} -+ -+static void __exit imq_unhook(void) -+{ -+ nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops)); -+ nf_unregister_queue_imq_handler(); -+} -+ -+static void __exit imq_cleanup_devs(void) -+{ -+ rtnl_link_unregister(&imq_link_ops); -+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); -+} -+ -+static void __exit imq_exit_module(void) -+{ -+ imq_unhook(); -+ imq_cleanup_devs(); -+ pr_info("IMQ driver unloaded successfully.\n"); -+} -+ -+module_init(imq_init_module); -+module_exit(imq_exit_module); -+ -+module_param(numdevs, int, 0); -+module_param(numqueues, int, 0); -+MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will be created)"); -+MODULE_PARM_DESC(numqueues, "number of queues per IMQ device"); -+MODULE_AUTHOR("http://www.linuximq.net"); -+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information."); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS_RTNL_LINK("imq"); -+ -diff -ruN linux-3.10.27/drivers/net/Kconfig linux-3.10.27-imq/drivers/net/Kconfig ---- linux-3.10.27/drivers/net/Kconfig 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/drivers/net/Kconfig 2014-01-18 10:19:59.341342885 +0100 -@@ -207,6 +207,125 @@ - depends on RIONET - default "128" - -+config IMQ -+ tristate "IMQ (intermediate queueing device) support" -+ depends on NETDEVICES && NETFILTER -+ ---help--- -+ The IMQ device(s) is used as placeholder for QoS queueing -+ disciplines. Every packet entering/leaving the IP stack can be -+ directed through the IMQ device where it's enqueued/dequeued to the -+ attached qdisc. This allows you to treat network devices as classes -+ and distribute bandwidth among them. Iptables is used to specify -+ through which IMQ device, if any, packets travel. -+ -+ More information at: http://www.linuximq.net/ -+ -+ To compile this driver as a module, choose M here: the module -+ will be called imq. If unsure, say N. -+ -+choice -+ prompt "IMQ behavior (PRE/POSTROUTING)" -+ depends on IMQ -+ default IMQ_BEHAVIOR_AB -+ help -+ This setting defines how IMQ behaves in respect to its -+ hooking in PREROUTING and POSTROUTING. -+ -+ IMQ can work in any of the following ways: -+ -+ PREROUTING | POSTROUTING -+ -----------------|------------------- -+ #1 After NAT | After NAT -+ #2 After NAT | Before NAT -+ #3 Before NAT | After NAT -+ #4 Before NAT | Before NAT -+ -+ The default behavior is to hook before NAT on PREROUTING -+ and after NAT on POSTROUTING (#3). -+ -+ This settings are specially usefull when trying to use IMQ -+ to shape NATed clients. -+ -+ More information can be found at: www.linuximq.net -+ -+ If not sure leave the default settings alone. -+ -+config IMQ_BEHAVIOR_AA -+ bool "IMQ AA" -+ help -+ This setting defines how IMQ behaves in respect to its -+ hooking in PREROUTING and POSTROUTING. -+ -+ Choosing this option will make IMQ hook like this: -+ -+ PREROUTING: After NAT -+ POSTROUTING: After NAT -+ -+ More information can be found at: www.linuximq.net -+ -+ If not sure leave the default settings alone. -+ -+config IMQ_BEHAVIOR_AB -+ bool "IMQ AB" -+ help -+ This setting defines how IMQ behaves in respect to its -+ hooking in PREROUTING and POSTROUTING. -+ -+ Choosing this option will make IMQ hook like this: -+ -+ PREROUTING: After NAT -+ POSTROUTING: Before NAT -+ -+ More information can be found at: www.linuximq.net -+ -+ If not sure leave the default settings alone. -+ -+config IMQ_BEHAVIOR_BA -+ bool "IMQ BA" -+ help -+ This setting defines how IMQ behaves in respect to its -+ hooking in PREROUTING and POSTROUTING. -+ -+ Choosing this option will make IMQ hook like this: -+ -+ PREROUTING: Before NAT -+ POSTROUTING: After NAT -+ -+ More information can be found at: www.linuximq.net -+ -+ If not sure leave the default settings alone. -+ -+config IMQ_BEHAVIOR_BB -+ bool "IMQ BB" -+ help -+ This setting defines how IMQ behaves in respect to its -+ hooking in PREROUTING and POSTROUTING. -+ -+ Choosing this option will make IMQ hook like this: -+ -+ PREROUTING: Before NAT -+ POSTROUTING: Before NAT -+ -+ More information can be found at: www.linuximq.net -+ -+ If not sure leave the default settings alone. -+ -+endchoice -+ -+config IMQ_NUM_DEVS -+ int "Number of IMQ devices" -+ range 2 16 -+ depends on IMQ -+ default "16" -+ help -+ This setting defines how many IMQ devices will be created. -+ -+ The default value is 16. -+ -+ More information can be found at: www.linuximq.net -+ -+ If not sure leave the default settings alone. -+ - config TUN - tristate "Universal TUN/TAP device driver support" - select CRC32 -diff -ruN linux-3.10.27/drivers/net/Makefile linux-3.10.27-imq/drivers/net/Makefile ---- linux-3.10.27/drivers/net/Makefile 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/drivers/net/Makefile 2014-01-18 10:19:59.341342885 +0100 -@@ -9,6 +9,7 @@ - obj-$(CONFIG_DUMMY) += dummy.o - obj-$(CONFIG_EQUALIZER) += eql.o - obj-$(CONFIG_IFB) += ifb.o -+obj-$(CONFIG_IMQ) += imq.o - obj-$(CONFIG_MACVLAN) += macvlan.o - obj-$(CONFIG_MACVTAP) += macvtap.o - obj-$(CONFIG_MII) += mii.o -diff -ruN linux-3.10.27/include/linux/imq.h linux-3.10.27-imq/include/linux/imq.h ---- linux-3.10.27/include/linux/imq.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/include/linux/imq.h 2014-01-18 10:19:59.342342913 +0100 -@@ -0,0 +1,13 @@ -+#ifndef _IMQ_H -+#define _IMQ_H -+ -+/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */ -+#define IMQ_F_BITS 5 -+ -+#define IMQ_F_IFMASK 0x0f -+#define IMQ_F_ENQUEUE 0x10 -+ -+#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1) -+ -+#endif /* _IMQ_H */ -+ -diff -ruN linux-3.10.27/include/linux/netfilter/xt_IMQ.h linux-3.10.27-imq/include/linux/netfilter/xt_IMQ.h ---- linux-3.10.27/include/linux/netfilter/xt_IMQ.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/include/linux/netfilter/xt_IMQ.h 2014-01-18 10:19:59.342342913 +0100 -@@ -0,0 +1,9 @@ -+#ifndef _XT_IMQ_H -+#define _XT_IMQ_H -+ -+struct xt_imq_info { -+ unsigned int todev; /* target imq device */ -+}; -+ -+#endif /* _XT_IMQ_H */ -+ -diff -ruN linux-3.10.27/include/linux/netfilter_ipv4/ipt_IMQ.h linux-3.10.27-imq/include/linux/netfilter_ipv4/ipt_IMQ.h ---- linux-3.10.27/include/linux/netfilter_ipv4/ipt_IMQ.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/include/linux/netfilter_ipv4/ipt_IMQ.h 2014-01-18 10:19:59.343342933 +0100 -@@ -0,0 +1,10 @@ -+#ifndef _IPT_IMQ_H -+#define _IPT_IMQ_H -+ -+/* Backwards compatibility for old userspace */ -+#include -+ -+#define ipt_imq_info xt_imq_info -+ -+#endif /* _IPT_IMQ_H */ -+ -diff -ruN linux-3.10.27/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-3.10.27-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h ---- linux-3.10.27/include/linux/netfilter_ipv6/ip6t_IMQ.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h 2014-01-18 10:19:59.343342933 +0100 -@@ -0,0 +1,10 @@ -+#ifndef _IP6T_IMQ_H -+#define _IP6T_IMQ_H -+ -+/* Backwards compatibility for old userspace */ -+#include -+ -+#define ip6t_imq_info xt_imq_info -+ -+#endif /* _IP6T_IMQ_H */ -+ -diff -ruN linux-3.10.27/include/linux/skbuff.h linux-3.10.27-imq/include/linux/skbuff.h ---- linux-3.10.27/include/linux/skbuff.h 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/include/linux/skbuff.h 2014-01-18 10:18:22.220271201 +0100 -@@ -33,6 +33,9 @@ - #include - #include - #include -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+#include -+#endif - - /* Don't change this without changing skb_csum_unnecessary! */ - #define CHECKSUM_NONE 0 -@@ -414,6 +417,9 @@ - * first. This is owned by whoever has the skb queued ATM. - */ - char cb[48] __aligned(8); -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ void *cb_next; -+#endif - - unsigned long _skb_refdst; - #ifdef CONFIG_XFRM -@@ -449,6 +455,9 @@ - #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; - #endif -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ struct nf_queue_entry *nf_queue_entry; -+#endif - #ifdef CONFIG_BRIDGE_NETFILTER - struct nf_bridge_info *nf_bridge; - #endif -@@ -487,7 +496,9 @@ - __u8 encapsulation:1; - /* 7/9 bit hole (depending on ndisc_nodetype presence) */ - kmemcheck_bitfield_end(flags2); -- -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ __u8 imq_flags:IMQ_F_BITS; -+#endif - #ifdef CONFIG_NET_DMA - dma_cookie_t dma_cookie; - #endif -@@ -616,7 +627,10 @@ - { - return (struct rtable *)skb_dst(skb); - } -- -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+extern int skb_save_cb(struct sk_buff *skb); -+extern int skb_restore_cb(struct sk_buff *skb); -+#endif - extern void kfree_skb(struct sk_buff *skb); - extern void kfree_skb_list(struct sk_buff *segs); - extern void skb_tx_error(struct sk_buff *skb); -@@ -2735,6 +2749,10 @@ - nf_conntrack_get(src->nfct); - dst->nfctinfo = src->nfctinfo; - #endif -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ dst->imq_flags = src->imq_flags; -+ dst->nf_queue_entry = src->nf_queue_entry; -+#endif - #ifdef CONFIG_BRIDGE_NETFILTER - dst->nf_bridge = src->nf_bridge; - nf_bridge_get(src->nf_bridge); -diff -ruN linux-3.10.27/include/net/netfilter/nf_queue.h linux-3.10.27-imq/include/net/netfilter/nf_queue.h ---- linux-3.10.27/include/net/netfilter/nf_queue.h 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/include/net/netfilter/nf_queue.h 2014-01-18 10:19:59.345342949 +0100 -@@ -29,6 +29,12 @@ - void nf_register_queue_handler(const struct nf_queue_handler *qh); - void nf_unregister_queue_handler(void); - extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -+extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry); -+ -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh); -+extern void nf_unregister_queue_imq_handler(void); -+#endif - - bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); - void nf_queue_entry_release_refs(struct nf_queue_entry *entry); -diff -ruN linux-3.10.27/include/uapi/linux/netfilter.h linux-3.10.27-imq/include/uapi/linux/netfilter.h ---- linux-3.10.27/include/uapi/linux/netfilter.h 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/include/uapi/linux/netfilter.h 2014-01-18 10:19:59.345342949 +0100 -@@ -13,7 +13,8 @@ - #define NF_QUEUE 3 - #define NF_REPEAT 4 - #define NF_STOP 5 --#define NF_MAX_VERDICT NF_STOP -+#define NF_IMQ_QUEUE 6 -+#define NF_MAX_VERDICT NF_IMQ_QUEUE - - /* we overload the higher bits for encoding auxiliary data such as the queue - * number or errno values. Not nice, but better than additional function -diff -ruN linux-3.10.27/net/core/dev.c linux-3.10.27-imq/net/core/dev.c ---- linux-3.10.27/net/core/dev.c 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/core/dev.c 2014-01-18 10:19:59.347342963 +0100 -@@ -129,6 +129,9 @@ - #include - #include - #include -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+#include -+#endif - - #include "net-sysfs.h" - -@@ -2573,7 +2576,12 @@ - } - } - -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ if (!list_empty(&ptype_all) && -+ !(skb->imq_flags & IMQ_F_ENQUEUE)) -+#else - if (!list_empty(&ptype_all)) -+#endif - dev_queue_xmit_nit(skb, dev); - - skb_len = skb->len; -diff -ruN linux-3.10.27/net/core/skbuff.c linux-3.10.27-imq/net/core/skbuff.c ---- linux-3.10.27/net/core/skbuff.c 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/core/skbuff.c 2014-01-18 10:19:59.348342972 +0100 -@@ -73,6 +73,9 @@ - - struct kmem_cache *skbuff_head_cache __read_mostly; - static struct kmem_cache *skbuff_fclone_cache __read_mostly; -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+static struct kmem_cache *skbuff_cb_store_cache __read_mostly; -+#endif - - static void sock_pipe_buf_release(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -@@ -92,6 +95,82 @@ - return 1; - } - -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+/* Control buffer save/restore for IMQ devices */ -+struct skb_cb_table { -+ char cb[48] __aligned(8); -+ void *cb_next; -+ atomic_t refcnt; -+}; -+ -+static DEFINE_SPINLOCK(skb_cb_store_lock); -+ -+int skb_save_cb(struct sk_buff *skb) -+{ -+ struct skb_cb_table *next; -+ -+ next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC); -+ if (!next) -+ return -ENOMEM; -+ -+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); -+ -+ memcpy(next->cb, skb->cb, sizeof(skb->cb)); -+ next->cb_next = skb->cb_next; -+ -+ atomic_set(&next->refcnt, 1); -+ -+ skb->cb_next = next; -+ return 0; -+} -+EXPORT_SYMBOL(skb_save_cb); -+ -+int skb_restore_cb(struct sk_buff *skb) -+{ -+ struct skb_cb_table *next; -+ -+ if (!skb->cb_next) -+ return 0; -+ -+ next = skb->cb_next; -+ -+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); -+ -+ memcpy(skb->cb, next->cb, sizeof(skb->cb)); -+ skb->cb_next = next->cb_next; -+ -+ spin_lock(&skb_cb_store_lock); -+ -+ if (atomic_dec_and_test(&next->refcnt)) -+ kmem_cache_free(skbuff_cb_store_cache, next); -+ -+ spin_unlock(&skb_cb_store_lock); -+ -+ return 0; -+} -+EXPORT_SYMBOL(skb_restore_cb); -+ -+static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old) -+{ -+ struct skb_cb_table *next; -+ struct sk_buff *old; -+ -+ if (!__old->cb_next) { -+ new->cb_next = NULL; -+ return; -+ } -+ -+ spin_lock(&skb_cb_store_lock); -+ -+ old = (struct sk_buff *)__old; -+ -+ next = old->cb_next; -+ atomic_inc(&next->refcnt); -+ new->cb_next = next; -+ -+ spin_unlock(&skb_cb_store_lock); -+} -+#endif - - /* Pipe buffer operations for a socket. */ - static const struct pipe_buf_operations sock_pipe_buf_ops = { -@@ -582,6 +661,28 @@ - WARN_ON(in_irq()); - skb->destructor(skb); - } -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ /* -+ * This should not happen. When it does, avoid memleak by restoring -+ * the chain of cb-backups. -+ */ -+ while (skb->cb_next != NULL) { -+ if (net_ratelimit()) -+ pr_warn("IMQ: kfree_skb: skb->cb_next: %08x\n", -+ (unsigned int)skb->cb_next); -+ -+ skb_restore_cb(skb); -+ } -+ /* -+ * This should not happen either, nf_queue_entry is nullified in -+ * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are -+ * leaking entry pointers, maybe memory. We don't know if this is -+ * pointer to already freed memory, or should this be freed. -+ * If this happens we need to add refcounting, etc for nf_queue_entry. -+ */ -+ if (skb->nf_queue_entry && net_ratelimit()) -+ pr_warn("%s\n", "IMQ: kfree_skb: skb->nf_queue_entry != NULL"); -+#endif - #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb->nfct); - #endif -@@ -713,6 +814,10 @@ - new->sp = secpath_get(old->sp); - #endif - memcpy(new->cb, old->cb, sizeof(old->cb)); -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ new->cb_next = NULL; -+ /*skb_copy_stored_cb(new, old);*/ -+#endif - new->csum = old->csum; - new->local_df = old->local_df; - new->pkt_type = old->pkt_type; -@@ -3093,6 +3198,13 @@ - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache", -+ sizeof(struct skb_cb_table), -+ 0, -+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, -+ NULL); -+#endif - } - - /** -diff -ruN linux-3.10.27/net/core/skbuff.c.orig linux-3.10.27-imq/net/core/skbuff.c.orig ---- linux-3.10.27/net/core/skbuff.c.orig 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/net/core/skbuff.c.orig 2014-01-16 00:29:14.000000000 +0100 -@@ -0,0 +1,3503 @@ -+/* -+ * Routines having to do with the 'struct sk_buff' memory handlers. -+ * -+ * Authors: Alan Cox -+ * Florian La Roche -+ * -+ * Fixes: -+ * Alan Cox : Fixed the worst of the load -+ * balancer bugs. -+ * Dave Platt : Interrupt stacking fix. -+ * Richard Kooijman : Timestamp fixes. -+ * Alan Cox : Changed buffer format. -+ * Alan Cox : destructor hook for AF_UNIX etc. -+ * Linus Torvalds : Better skb_clone. -+ * Alan Cox : Added skb_copy. -+ * Alan Cox : Added all the changed routines Linus -+ * only put in the headers -+ * Ray VanTassle : Fixed --skb->lock in free -+ * Alan Cox : skb_copy copy arp field -+ * Andi Kleen : slabified it. -+ * Robert Olsson : Removed skb_head_pool -+ * -+ * NOTE: -+ * The __skb_ routines should be called with interrupts -+ * disabled, or you better be *real* sure that the operation is atomic -+ * with respect to whatever list is being frobbed (e.g. via lock_sock() -+ * or via disabling bottom half handlers, etc). -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+/* -+ * The functions in this file will not compile correctly with gcc 2.4.x -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_NET_CLS_ACT -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+struct kmem_cache *skbuff_head_cache __read_mostly; -+static struct kmem_cache *skbuff_fclone_cache __read_mostly; -+ -+static void sock_pipe_buf_release(struct pipe_inode_info *pipe, -+ struct pipe_buffer *buf) -+{ -+ put_page(buf->page); -+} -+ -+static void sock_pipe_buf_get(struct pipe_inode_info *pipe, -+ struct pipe_buffer *buf) -+{ -+ get_page(buf->page); -+} -+ -+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, -+ struct pipe_buffer *buf) -+{ -+ return 1; -+} -+ -+ -+/* Pipe buffer operations for a socket. */ -+static const struct pipe_buf_operations sock_pipe_buf_ops = { -+ .can_merge = 0, -+ .map = generic_pipe_buf_map, -+ .unmap = generic_pipe_buf_unmap, -+ .confirm = generic_pipe_buf_confirm, -+ .release = sock_pipe_buf_release, -+ .steal = sock_pipe_buf_steal, -+ .get = sock_pipe_buf_get, -+}; -+ -+/** -+ * skb_panic - private function for out-of-line support -+ * @skb: buffer -+ * @sz: size -+ * @addr: address -+ * @msg: skb_over_panic or skb_under_panic -+ * -+ * Out-of-line support for skb_put() and skb_push(). -+ * Called via the wrapper skb_over_panic() or skb_under_panic(). -+ * Keep out of line to prevent kernel bloat. -+ * __builtin_return_address is not used because it is not always reliable. -+ */ -+static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, -+ const char msg[]) -+{ -+ pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", -+ msg, addr, skb->len, sz, skb->head, skb->data, -+ (unsigned long)skb->tail, (unsigned long)skb->end, -+ skb->dev ? skb->dev->name : ""); -+ BUG(); -+} -+ -+static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) -+{ -+ skb_panic(skb, sz, addr, __func__); -+} -+ -+static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) -+{ -+ skb_panic(skb, sz, addr, __func__); -+} -+ -+/* -+ * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells -+ * the caller if emergency pfmemalloc reserves are being used. If it is and -+ * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves -+ * may be used. Otherwise, the packet data may be discarded until enough -+ * memory is free -+ */ -+#define kmalloc_reserve(size, gfp, node, pfmemalloc) \ -+ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) -+ -+static void *__kmalloc_reserve(size_t size, gfp_t flags, int node, -+ unsigned long ip, bool *pfmemalloc) -+{ -+ void *obj; -+ bool ret_pfmemalloc = false; -+ -+ /* -+ * Try a regular allocation, when that fails and we're not entitled -+ * to the reserves, fail. -+ */ -+ obj = kmalloc_node_track_caller(size, -+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN, -+ node); -+ if (obj || !(gfp_pfmemalloc_allowed(flags))) -+ goto out; -+ -+ /* Try again but now we are using pfmemalloc reserves */ -+ ret_pfmemalloc = true; -+ obj = kmalloc_node_track_caller(size, flags, node); -+ -+out: -+ if (pfmemalloc) -+ *pfmemalloc = ret_pfmemalloc; -+ -+ return obj; -+} -+ -+/* Allocate a new skbuff. We do this ourselves so we can fill in a few -+ * 'private' fields and also do memory statistics to find all the -+ * [BEEP] leaks. -+ * -+ */ -+ -+struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) -+{ -+ struct sk_buff *skb; -+ -+ /* Get the HEAD */ -+ skb = kmem_cache_alloc_node(skbuff_head_cache, -+ gfp_mask & ~__GFP_DMA, node); -+ if (!skb) -+ goto out; -+ -+ /* -+ * Only clear those fields we need to clear, not those that we will -+ * actually initialise below. Hence, don't put any more fields after -+ * the tail pointer in struct sk_buff! -+ */ -+ memset(skb, 0, offsetof(struct sk_buff, tail)); -+ skb->head = NULL; -+ skb->truesize = sizeof(struct sk_buff); -+ atomic_set(&skb->users, 1); -+ -+#ifdef NET_SKBUFF_DATA_USES_OFFSET -+ skb->mac_header = ~0U; -+#endif -+out: -+ return skb; -+} -+ -+/** -+ * __alloc_skb - allocate a network buffer -+ * @size: size to allocate -+ * @gfp_mask: allocation mask -+ * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache -+ * instead of head cache and allocate a cloned (child) skb. -+ * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for -+ * allocations in case the data is required for writeback -+ * @node: numa node to allocate memory on -+ * -+ * Allocate a new &sk_buff. The returned buffer has no headroom and a -+ * tail room of at least size bytes. The object has a reference count -+ * of one. The return is the buffer. On a failure the return is %NULL. -+ * -+ * Buffers may only be allocated from interrupts using a @gfp_mask of -+ * %GFP_ATOMIC. -+ */ -+struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, -+ int flags, int node) -+{ -+ struct kmem_cache *cache; -+ struct skb_shared_info *shinfo; -+ struct sk_buff *skb; -+ u8 *data; -+ bool pfmemalloc; -+ -+ cache = (flags & SKB_ALLOC_FCLONE) -+ ? skbuff_fclone_cache : skbuff_head_cache; -+ -+ if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) -+ gfp_mask |= __GFP_MEMALLOC; -+ -+ /* Get the HEAD */ -+ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); -+ if (!skb) -+ goto out; -+ prefetchw(skb); -+ -+ /* We do our best to align skb_shared_info on a separate cache -+ * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives -+ * aligned memory blocks, unless SLUB/SLAB debug is enabled. -+ * Both skb->head and skb_shared_info are cache line aligned. -+ */ -+ size = SKB_DATA_ALIGN(size); -+ size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -+ data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); -+ if (!data) -+ goto nodata; -+ /* kmalloc(size) might give us more room than requested. -+ * Put skb_shared_info exactly at the end of allocated zone, -+ * to allow max possible filling before reallocation. -+ */ -+ size = SKB_WITH_OVERHEAD(ksize(data)); -+ prefetchw(data + size); -+ -+ /* -+ * Only clear those fields we need to clear, not those that we will -+ * actually initialise below. Hence, don't put any more fields after -+ * the tail pointer in struct sk_buff! -+ */ -+ memset(skb, 0, offsetof(struct sk_buff, tail)); -+ /* Account for allocated memory : skb + skb->head */ -+ skb->truesize = SKB_TRUESIZE(size); -+ skb->pfmemalloc = pfmemalloc; -+ atomic_set(&skb->users, 1); -+ skb->head = data; -+ skb->data = data; -+ skb_reset_tail_pointer(skb); -+ skb->end = skb->tail + size; -+#ifdef NET_SKBUFF_DATA_USES_OFFSET -+ skb->mac_header = ~0U; -+ skb->transport_header = ~0U; -+#endif -+ -+ /* make sure we initialize shinfo sequentially */ -+ shinfo = skb_shinfo(skb); -+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); -+ atomic_set(&shinfo->dataref, 1); -+ kmemcheck_annotate_variable(shinfo->destructor_arg); -+ -+ if (flags & SKB_ALLOC_FCLONE) { -+ struct sk_buff *child = skb + 1; -+ atomic_t *fclone_ref = (atomic_t *) (child + 1); -+ -+ kmemcheck_annotate_bitfield(child, flags1); -+ kmemcheck_annotate_bitfield(child, flags2); -+ skb->fclone = SKB_FCLONE_ORIG; -+ atomic_set(fclone_ref, 1); -+ -+ child->fclone = SKB_FCLONE_UNAVAILABLE; -+ child->pfmemalloc = pfmemalloc; -+ } -+out: -+ return skb; -+nodata: -+ kmem_cache_free(cache, skb); -+ skb = NULL; -+ goto out; -+} -+EXPORT_SYMBOL(__alloc_skb); -+ -+/** -+ * build_skb - build a network buffer -+ * @data: data buffer provided by caller -+ * @frag_size: size of fragment, or 0 if head was kmalloced -+ * -+ * Allocate a new &sk_buff. Caller provides space holding head and -+ * skb_shared_info. @data must have been allocated by kmalloc() -+ * The return is the new skb buffer. -+ * On a failure the return is %NULL, and @data is not freed. -+ * Notes : -+ * Before IO, driver allocates only data buffer where NIC put incoming frame -+ * Driver should add room at head (NET_SKB_PAD) and -+ * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) -+ * After IO, driver calls build_skb(), to allocate sk_buff and populate it -+ * before giving packet to stack. -+ * RX rings only contains data buffers, not full skbs. -+ */ -+struct sk_buff *build_skb(void *data, unsigned int frag_size) -+{ -+ struct skb_shared_info *shinfo; -+ struct sk_buff *skb; -+ unsigned int size = frag_size ? : ksize(data); -+ -+ skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); -+ if (!skb) -+ return NULL; -+ -+ size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -+ -+ memset(skb, 0, offsetof(struct sk_buff, tail)); -+ skb->truesize = SKB_TRUESIZE(size); -+ skb->head_frag = frag_size != 0; -+ atomic_set(&skb->users, 1); -+ skb->head = data; -+ skb->data = data; -+ skb_reset_tail_pointer(skb); -+ skb->end = skb->tail + size; -+#ifdef NET_SKBUFF_DATA_USES_OFFSET -+ skb->mac_header = ~0U; -+ skb->transport_header = ~0U; -+#endif -+ -+ /* make sure we initialize shinfo sequentially */ -+ shinfo = skb_shinfo(skb); -+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); -+ atomic_set(&shinfo->dataref, 1); -+ kmemcheck_annotate_variable(shinfo->destructor_arg); -+ -+ return skb; -+} -+EXPORT_SYMBOL(build_skb); -+ -+struct netdev_alloc_cache { -+ struct page_frag frag; -+ /* we maintain a pagecount bias, so that we dont dirty cache line -+ * containing page->_count every time we allocate a fragment. -+ */ -+ unsigned int pagecnt_bias; -+}; -+static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -+ -+static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) -+{ -+ struct netdev_alloc_cache *nc; -+ void *data = NULL; -+ int order; -+ unsigned long flags; -+ -+ local_irq_save(flags); -+ nc = &__get_cpu_var(netdev_alloc_cache); -+ if (unlikely(!nc->frag.page)) { -+refill: -+ for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { -+ gfp_t gfp = gfp_mask; -+ -+ if (order) -+ gfp |= __GFP_COMP | __GFP_NOWARN; -+ nc->frag.page = alloc_pages(gfp, order); -+ if (likely(nc->frag.page)) -+ break; -+ if (--order < 0) -+ goto end; -+ } -+ nc->frag.size = PAGE_SIZE << order; -+recycle: -+ atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); -+ nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; -+ nc->frag.offset = 0; -+ } -+ -+ if (nc->frag.offset + fragsz > nc->frag.size) { -+ /* avoid unnecessary locked operations if possible */ -+ if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || -+ atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) -+ goto recycle; -+ goto refill; -+ } -+ -+ data = page_address(nc->frag.page) + nc->frag.offset; -+ nc->frag.offset += fragsz; -+ nc->pagecnt_bias--; -+end: -+ local_irq_restore(flags); -+ return data; -+} -+ -+/** -+ * netdev_alloc_frag - allocate a page fragment -+ * @fragsz: fragment size -+ * -+ * Allocates a frag from a page for receive buffer. -+ * Uses GFP_ATOMIC allocations. -+ */ -+void *netdev_alloc_frag(unsigned int fragsz) -+{ -+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); -+} -+EXPORT_SYMBOL(netdev_alloc_frag); -+ -+/** -+ * __netdev_alloc_skb - allocate an skbuff for rx on a specific device -+ * @dev: network device to receive on -+ * @length: length to allocate -+ * @gfp_mask: get_free_pages mask, passed to alloc_skb -+ * -+ * Allocate a new &sk_buff and assign it a usage count of one. The -+ * buffer has unspecified headroom built in. Users should allocate -+ * the headroom they think they need without accounting for the -+ * built in space. The built in space is used for optimisations. -+ * -+ * %NULL is returned if there is no free memory. -+ */ -+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, -+ unsigned int length, gfp_t gfp_mask) -+{ -+ struct sk_buff *skb = NULL; -+ unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + -+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -+ -+ if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { -+ void *data; -+ -+ if (sk_memalloc_socks()) -+ gfp_mask |= __GFP_MEMALLOC; -+ -+ data = __netdev_alloc_frag(fragsz, gfp_mask); -+ -+ if (likely(data)) { -+ skb = build_skb(data, fragsz); -+ if (unlikely(!skb)) -+ put_page(virt_to_head_page(data)); -+ } -+ } else { -+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, -+ SKB_ALLOC_RX, NUMA_NO_NODE); -+ } -+ if (likely(skb)) { -+ skb_reserve(skb, NET_SKB_PAD); -+ skb->dev = dev; -+ } -+ return skb; -+} -+EXPORT_SYMBOL(__netdev_alloc_skb); -+ -+void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, -+ int size, unsigned int truesize) -+{ -+ skb_fill_page_desc(skb, i, page, off, size); -+ skb->len += size; -+ skb->data_len += size; -+ skb->truesize += truesize; -+} -+EXPORT_SYMBOL(skb_add_rx_frag); -+ -+static void skb_drop_list(struct sk_buff **listp) -+{ -+ kfree_skb_list(*listp); -+ *listp = NULL; -+} -+ -+static inline void skb_drop_fraglist(struct sk_buff *skb) -+{ -+ skb_drop_list(&skb_shinfo(skb)->frag_list); -+} -+ -+static void skb_clone_fraglist(struct sk_buff *skb) -+{ -+ struct sk_buff *list; -+ -+ skb_walk_frags(skb, list) -+ skb_get(list); -+} -+ -+static void skb_free_head(struct sk_buff *skb) -+{ -+ if (skb->head_frag) -+ put_page(virt_to_head_page(skb->head)); -+ else -+ kfree(skb->head); -+} -+ -+static void skb_release_data(struct sk_buff *skb) -+{ -+ if (!skb->cloned || -+ !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, -+ &skb_shinfo(skb)->dataref)) { -+ if (skb_shinfo(skb)->nr_frags) { -+ int i; -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -+ skb_frag_unref(skb, i); -+ } -+ -+ /* -+ * If skb buf is from userspace, we need to notify the caller -+ * the lower device DMA has done; -+ */ -+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { -+ struct ubuf_info *uarg; -+ -+ uarg = skb_shinfo(skb)->destructor_arg; -+ if (uarg->callback) -+ uarg->callback(uarg, true); -+ } -+ -+ if (skb_has_frag_list(skb)) -+ skb_drop_fraglist(skb); -+ -+ skb_free_head(skb); -+ } -+} -+ -+/* -+ * Free an skbuff by memory without cleaning the state. -+ */ -+static void kfree_skbmem(struct sk_buff *skb) -+{ -+ struct sk_buff *other; -+ atomic_t *fclone_ref; -+ -+ switch (skb->fclone) { -+ case SKB_FCLONE_UNAVAILABLE: -+ kmem_cache_free(skbuff_head_cache, skb); -+ break; -+ -+ case SKB_FCLONE_ORIG: -+ fclone_ref = (atomic_t *) (skb + 2); -+ if (atomic_dec_and_test(fclone_ref)) -+ kmem_cache_free(skbuff_fclone_cache, skb); -+ break; -+ -+ case SKB_FCLONE_CLONE: -+ fclone_ref = (atomic_t *) (skb + 1); -+ other = skb - 1; -+ -+ /* The clone portion is available for -+ * fast-cloning again. -+ */ -+ skb->fclone = SKB_FCLONE_UNAVAILABLE; -+ -+ if (atomic_dec_and_test(fclone_ref)) -+ kmem_cache_free(skbuff_fclone_cache, other); -+ break; -+ } -+} -+ -+static void skb_release_head_state(struct sk_buff *skb) -+{ -+ skb_dst_drop(skb); -+#ifdef CONFIG_XFRM -+ secpath_put(skb->sp); -+#endif -+ if (skb->destructor) { -+ WARN_ON(in_irq()); -+ skb->destructor(skb); -+ } -+#if IS_ENABLED(CONFIG_NF_CONNTRACK) -+ nf_conntrack_put(skb->nfct); -+#endif -+#ifdef CONFIG_BRIDGE_NETFILTER -+ nf_bridge_put(skb->nf_bridge); -+#endif -+/* XXX: IS this still necessary? - JHS */ -+#ifdef CONFIG_NET_SCHED -+ skb->tc_index = 0; -+#ifdef CONFIG_NET_CLS_ACT -+ skb->tc_verd = 0; -+#endif -+#endif -+} -+ -+/* Free everything but the sk_buff shell. */ -+static void skb_release_all(struct sk_buff *skb) -+{ -+ skb_release_head_state(skb); -+ if (likely(skb->head)) -+ skb_release_data(skb); -+} -+ -+/** -+ * __kfree_skb - private function -+ * @skb: buffer -+ * -+ * Free an sk_buff. Release anything attached to the buffer. -+ * Clean the state. This is an internal helper function. Users should -+ * always call kfree_skb -+ */ -+ -+void __kfree_skb(struct sk_buff *skb) -+{ -+ skb_release_all(skb); -+ kfree_skbmem(skb); -+} -+EXPORT_SYMBOL(__kfree_skb); -+ -+/** -+ * kfree_skb - free an sk_buff -+ * @skb: buffer to free -+ * -+ * Drop a reference to the buffer and free it if the usage count has -+ * hit zero. -+ */ -+void kfree_skb(struct sk_buff *skb) -+{ -+ if (unlikely(!skb)) -+ return; -+ if (likely(atomic_read(&skb->users) == 1)) -+ smp_rmb(); -+ else if (likely(!atomic_dec_and_test(&skb->users))) -+ return; -+ trace_kfree_skb(skb, __builtin_return_address(0)); -+ __kfree_skb(skb); -+} -+EXPORT_SYMBOL(kfree_skb); -+ -+void kfree_skb_list(struct sk_buff *segs) -+{ -+ while (segs) { -+ struct sk_buff *next = segs->next; -+ -+ kfree_skb(segs); -+ segs = next; -+ } -+} -+EXPORT_SYMBOL(kfree_skb_list); -+ -+/** -+ * skb_tx_error - report an sk_buff xmit error -+ * @skb: buffer that triggered an error -+ * -+ * Report xmit error if a device callback is tracking this skb. -+ * skb must be freed afterwards. -+ */ -+void skb_tx_error(struct sk_buff *skb) -+{ -+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { -+ struct ubuf_info *uarg; -+ -+ uarg = skb_shinfo(skb)->destructor_arg; -+ if (uarg->callback) -+ uarg->callback(uarg, false); -+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; -+ } -+} -+EXPORT_SYMBOL(skb_tx_error); -+ -+/** -+ * consume_skb - free an skbuff -+ * @skb: buffer to free -+ * -+ * Drop a ref to the buffer and free it if the usage count has hit zero -+ * Functions identically to kfree_skb, but kfree_skb assumes that the frame -+ * is being dropped after a failure and notes that -+ */ -+void consume_skb(struct sk_buff *skb) -+{ -+ if (unlikely(!skb)) -+ return; -+ if (likely(atomic_read(&skb->users) == 1)) -+ smp_rmb(); -+ else if (likely(!atomic_dec_and_test(&skb->users))) -+ return; -+ trace_consume_skb(skb); -+ __kfree_skb(skb); -+} -+EXPORT_SYMBOL(consume_skb); -+ -+static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -+{ -+ new->tstamp = old->tstamp; -+ new->dev = old->dev; -+ new->transport_header = old->transport_header; -+ new->network_header = old->network_header; -+ new->mac_header = old->mac_header; -+ new->inner_transport_header = old->inner_transport_header; -+ new->inner_network_header = old->inner_network_header; -+ new->inner_mac_header = old->inner_mac_header; -+ skb_dst_copy(new, old); -+ new->rxhash = old->rxhash; -+ new->ooo_okay = old->ooo_okay; -+ new->l4_rxhash = old->l4_rxhash; -+ new->no_fcs = old->no_fcs; -+ new->encapsulation = old->encapsulation; -+#ifdef CONFIG_XFRM -+ new->sp = secpath_get(old->sp); -+#endif -+ memcpy(new->cb, old->cb, sizeof(old->cb)); -+ new->csum = old->csum; -+ new->local_df = old->local_df; -+ new->pkt_type = old->pkt_type; -+ new->ip_summed = old->ip_summed; -+ skb_copy_queue_mapping(new, old); -+ new->priority = old->priority; -+#if IS_ENABLED(CONFIG_IP_VS) -+ new->ipvs_property = old->ipvs_property; -+#endif -+ new->pfmemalloc = old->pfmemalloc; -+ new->protocol = old->protocol; -+ new->mark = old->mark; -+ new->skb_iif = old->skb_iif; -+ __nf_copy(new, old); -+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) -+ new->nf_trace = old->nf_trace; -+#endif -+#ifdef CONFIG_NET_SCHED -+ new->tc_index = old->tc_index; -+#ifdef CONFIG_NET_CLS_ACT -+ new->tc_verd = old->tc_verd; -+#endif -+#endif -+ new->vlan_proto = old->vlan_proto; -+ new->vlan_tci = old->vlan_tci; -+ -+ skb_copy_secmark(new, old); -+} -+ -+/* -+ * You should not add any new code to this function. Add it to -+ * __copy_skb_header above instead. -+ */ -+static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) -+{ -+#define C(x) n->x = skb->x -+ -+ n->next = n->prev = NULL; -+ n->sk = NULL; -+ __copy_skb_header(n, skb); -+ -+ C(len); -+ C(data_len); -+ C(mac_len); -+ n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; -+ n->cloned = 1; -+ n->nohdr = 0; -+ n->destructor = NULL; -+ C(tail); -+ C(end); -+ C(head); -+ C(head_frag); -+ C(data); -+ C(truesize); -+ atomic_set(&n->users, 1); -+ -+ atomic_inc(&(skb_shinfo(skb)->dataref)); -+ skb->cloned = 1; -+ -+ return n; -+#undef C -+} -+ -+/** -+ * skb_morph - morph one skb into another -+ * @dst: the skb to receive the contents -+ * @src: the skb to supply the contents -+ * -+ * This is identical to skb_clone except that the target skb is -+ * supplied by the user. -+ * -+ * The target skb is returned upon exit. -+ */ -+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) -+{ -+ skb_release_all(dst); -+ return __skb_clone(dst, src); -+} -+EXPORT_SYMBOL_GPL(skb_morph); -+ -+/** -+ * skb_copy_ubufs - copy userspace skb frags buffers to kernel -+ * @skb: the skb to modify -+ * @gfp_mask: allocation priority -+ * -+ * This must be called on SKBTX_DEV_ZEROCOPY skb. -+ * It will copy all frags into kernel and drop the reference -+ * to userspace pages. -+ * -+ * If this function is called from an interrupt gfp_mask() must be -+ * %GFP_ATOMIC. -+ * -+ * Returns 0 on success or a negative error code on failure -+ * to allocate kernel memory to copy to. -+ */ -+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) -+{ -+ int i; -+ int num_frags = skb_shinfo(skb)->nr_frags; -+ struct page *page, *head = NULL; -+ struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; -+ -+ for (i = 0; i < num_frags; i++) { -+ u8 *vaddr; -+ skb_frag_t *f = &skb_shinfo(skb)->frags[i]; -+ -+ page = alloc_page(gfp_mask); -+ if (!page) { -+ while (head) { -+ struct page *next = (struct page *)head->private; -+ put_page(head); -+ head = next; -+ } -+ return -ENOMEM; -+ } -+ vaddr = kmap_atomic(skb_frag_page(f)); -+ memcpy(page_address(page), -+ vaddr + f->page_offset, skb_frag_size(f)); -+ kunmap_atomic(vaddr); -+ page->private = (unsigned long)head; -+ head = page; -+ } -+ -+ /* skb frags release userspace buffers */ -+ for (i = 0; i < num_frags; i++) -+ skb_frag_unref(skb, i); -+ -+ uarg->callback(uarg, false); -+ -+ /* skb frags point to kernel buffers */ -+ for (i = num_frags - 1; i >= 0; i--) { -+ __skb_fill_page_desc(skb, i, head, 0, -+ skb_shinfo(skb)->frags[i].size); -+ head = (struct page *)head->private; -+ } -+ -+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; -+ return 0; -+} -+EXPORT_SYMBOL_GPL(skb_copy_ubufs); -+ -+/** -+ * skb_clone - duplicate an sk_buff -+ * @skb: buffer to clone -+ * @gfp_mask: allocation priority -+ * -+ * Duplicate an &sk_buff. The new one is not owned by a socket. Both -+ * copies share the same packet data but not structure. The new -+ * buffer has a reference count of 1. If the allocation fails the -+ * function returns %NULL otherwise the new buffer is returned. -+ * -+ * If this function is called from an interrupt gfp_mask() must be -+ * %GFP_ATOMIC. -+ */ -+ -+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) -+{ -+ struct sk_buff *n; -+ -+ if (skb_orphan_frags(skb, gfp_mask)) -+ return NULL; -+ -+ n = skb + 1; -+ if (skb->fclone == SKB_FCLONE_ORIG && -+ n->fclone == SKB_FCLONE_UNAVAILABLE) { -+ atomic_t *fclone_ref = (atomic_t *) (n + 1); -+ n->fclone = SKB_FCLONE_CLONE; -+ atomic_inc(fclone_ref); -+ } else { -+ if (skb_pfmemalloc(skb)) -+ gfp_mask |= __GFP_MEMALLOC; -+ -+ n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); -+ if (!n) -+ return NULL; -+ -+ kmemcheck_annotate_bitfield(n, flags1); -+ kmemcheck_annotate_bitfield(n, flags2); -+ n->fclone = SKB_FCLONE_UNAVAILABLE; -+ } -+ -+ return __skb_clone(n, skb); -+} -+EXPORT_SYMBOL(skb_clone); -+ -+static void skb_headers_offset_update(struct sk_buff *skb, int off) -+{ -+ /* {transport,network,mac}_header and tail are relative to skb->head */ -+ skb->transport_header += off; -+ skb->network_header += off; -+ if (skb_mac_header_was_set(skb)) -+ skb->mac_header += off; -+ skb->inner_transport_header += off; -+ skb->inner_network_header += off; -+ skb->inner_mac_header += off; -+} -+ -+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -+{ -+#ifndef NET_SKBUFF_DATA_USES_OFFSET -+ /* -+ * Shift between the two data areas in bytes -+ */ -+ unsigned long offset = new->data - old->data; -+#endif -+ -+ __copy_skb_header(new, old); -+ -+#ifndef NET_SKBUFF_DATA_USES_OFFSET -+ skb_headers_offset_update(new, offset); -+#endif -+ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; -+ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; -+ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; -+} -+ -+static inline int skb_alloc_rx_flag(const struct sk_buff *skb) -+{ -+ if (skb_pfmemalloc(skb)) -+ return SKB_ALLOC_RX; -+ return 0; -+} -+ -+/** -+ * skb_copy - create private copy of an sk_buff -+ * @skb: buffer to copy -+ * @gfp_mask: allocation priority -+ * -+ * Make a copy of both an &sk_buff and its data. This is used when the -+ * caller wishes to modify the data and needs a private copy of the -+ * data to alter. Returns %NULL on failure or the pointer to the buffer -+ * on success. The returned buffer has a reference count of 1. -+ * -+ * As by-product this function converts non-linear &sk_buff to linear -+ * one, so that &sk_buff becomes completely private and caller is allowed -+ * to modify all the data of returned buffer. This means that this -+ * function is not recommended for use in circumstances when only -+ * header is going to be modified. Use pskb_copy() instead. -+ */ -+ -+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) -+{ -+ int headerlen = skb_headroom(skb); -+ unsigned int size = skb_end_offset(skb) + skb->data_len; -+ struct sk_buff *n = __alloc_skb(size, gfp_mask, -+ skb_alloc_rx_flag(skb), NUMA_NO_NODE); -+ -+ if (!n) -+ return NULL; -+ -+ /* Set the data pointer */ -+ skb_reserve(n, headerlen); -+ /* Set the tail pointer and length */ -+ skb_put(n, skb->len); -+ -+ if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) -+ BUG(); -+ -+ copy_skb_header(n, skb); -+ return n; -+} -+EXPORT_SYMBOL(skb_copy); -+ -+/** -+ * __pskb_copy - create copy of an sk_buff with private head. -+ * @skb: buffer to copy -+ * @headroom: headroom of new skb -+ * @gfp_mask: allocation priority -+ * -+ * Make a copy of both an &sk_buff and part of its data, located -+ * in header. Fragmented data remain shared. This is used when -+ * the caller wishes to modify only header of &sk_buff and needs -+ * private copy of the header to alter. Returns %NULL on failure -+ * or the pointer to the buffer on success. -+ * The returned buffer has a reference count of 1. -+ */ -+ -+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) -+{ -+ unsigned int size = skb_headlen(skb) + headroom; -+ struct sk_buff *n = __alloc_skb(size, gfp_mask, -+ skb_alloc_rx_flag(skb), NUMA_NO_NODE); -+ -+ if (!n) -+ goto out; -+ -+ /* Set the data pointer */ -+ skb_reserve(n, headroom); -+ /* Set the tail pointer and length */ -+ skb_put(n, skb_headlen(skb)); -+ /* Copy the bytes */ -+ skb_copy_from_linear_data(skb, n->data, n->len); -+ -+ n->truesize += skb->data_len; -+ n->data_len = skb->data_len; -+ n->len = skb->len; -+ -+ if (skb_shinfo(skb)->nr_frags) { -+ int i; -+ -+ if (skb_orphan_frags(skb, gfp_mask)) { -+ kfree_skb(n); -+ n = NULL; -+ goto out; -+ } -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; -+ skb_frag_ref(skb, i); -+ } -+ skb_shinfo(n)->nr_frags = i; -+ } -+ -+ if (skb_has_frag_list(skb)) { -+ skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; -+ skb_clone_fraglist(n); -+ } -+ -+ copy_skb_header(n, skb); -+out: -+ return n; -+} -+EXPORT_SYMBOL(__pskb_copy); -+ -+/** -+ * pskb_expand_head - reallocate header of &sk_buff -+ * @skb: buffer to reallocate -+ * @nhead: room to add at head -+ * @ntail: room to add at tail -+ * @gfp_mask: allocation priority -+ * -+ * Expands (or creates identical copy, if &nhead and &ntail are zero) -+ * header of skb. &sk_buff itself is not changed. &sk_buff MUST have -+ * reference count of 1. Returns zero in the case of success or error, -+ * if expansion failed. In the last case, &sk_buff is not changed. -+ * -+ * All the pointers pointing into skb header may change and must be -+ * reloaded after call to this function. -+ */ -+ -+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, -+ gfp_t gfp_mask) -+{ -+ int i; -+ u8 *data; -+ int size = nhead + skb_end_offset(skb) + ntail; -+ long off; -+ -+ BUG_ON(nhead < 0); -+ -+ if (skb_shared(skb)) -+ BUG(); -+ -+ size = SKB_DATA_ALIGN(size); -+ -+ if (skb_pfmemalloc(skb)) -+ gfp_mask |= __GFP_MEMALLOC; -+ data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), -+ gfp_mask, NUMA_NO_NODE, NULL); -+ if (!data) -+ goto nodata; -+ size = SKB_WITH_OVERHEAD(ksize(data)); -+ -+ /* Copy only real data... and, alas, header. This should be -+ * optimized for the cases when header is void. -+ */ -+ memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head); -+ -+ memcpy((struct skb_shared_info *)(data + size), -+ skb_shinfo(skb), -+ offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); -+ -+ /* -+ * if shinfo is shared we must drop the old head gracefully, but if it -+ * is not we can just drop the old head and let the existing refcount -+ * be since all we did is relocate the values -+ */ -+ if (skb_cloned(skb)) { -+ /* copy this zero copy skb frags */ -+ if (skb_orphan_frags(skb, gfp_mask)) -+ goto nofrags; -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -+ skb_frag_ref(skb, i); -+ -+ if (skb_has_frag_list(skb)) -+ skb_clone_fraglist(skb); -+ -+ skb_release_data(skb); -+ } else { -+ skb_free_head(skb); -+ } -+ off = (data + nhead) - skb->head; -+ -+ skb->head = data; -+ skb->head_frag = 0; -+ skb->data += off; -+#ifdef NET_SKBUFF_DATA_USES_OFFSET -+ skb->end = size; -+ off = nhead; -+#else -+ skb->end = skb->head + size; -+#endif -+ skb->tail += off; -+ skb_headers_offset_update(skb, off); -+ /* Only adjust this if it actually is csum_start rather than csum */ -+ if (skb->ip_summed == CHECKSUM_PARTIAL) -+ skb->csum_start += nhead; -+ skb->cloned = 0; -+ skb->hdr_len = 0; -+ skb->nohdr = 0; -+ atomic_set(&skb_shinfo(skb)->dataref, 1); -+ return 0; -+ -+nofrags: -+ kfree(data); -+nodata: -+ return -ENOMEM; -+} -+EXPORT_SYMBOL(pskb_expand_head); -+ -+/* Make private copy of skb with writable head and some headroom */ -+ -+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) -+{ -+ struct sk_buff *skb2; -+ int delta = headroom - skb_headroom(skb); -+ -+ if (delta <= 0) -+ skb2 = pskb_copy(skb, GFP_ATOMIC); -+ else { -+ skb2 = skb_clone(skb, GFP_ATOMIC); -+ if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, -+ GFP_ATOMIC)) { -+ kfree_skb(skb2); -+ skb2 = NULL; -+ } -+ } -+ return skb2; -+} -+EXPORT_SYMBOL(skb_realloc_headroom); -+ -+/** -+ * skb_copy_expand - copy and expand sk_buff -+ * @skb: buffer to copy -+ * @newheadroom: new free bytes at head -+ * @newtailroom: new free bytes at tail -+ * @gfp_mask: allocation priority -+ * -+ * Make a copy of both an &sk_buff and its data and while doing so -+ * allocate additional space. -+ * -+ * This is used when the caller wishes to modify the data and needs a -+ * private copy of the data to alter as well as more space for new fields. -+ * Returns %NULL on failure or the pointer to the buffer -+ * on success. The returned buffer has a reference count of 1. -+ * -+ * You must pass %GFP_ATOMIC as the allocation priority if this function -+ * is called from an interrupt. -+ */ -+struct sk_buff *skb_copy_expand(const struct sk_buff *skb, -+ int newheadroom, int newtailroom, -+ gfp_t gfp_mask) -+{ -+ /* -+ * Allocate the copy buffer -+ */ -+ struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, -+ gfp_mask, skb_alloc_rx_flag(skb), -+ NUMA_NO_NODE); -+ int oldheadroom = skb_headroom(skb); -+ int head_copy_len, head_copy_off; -+ int off; -+ -+ if (!n) -+ return NULL; -+ -+ skb_reserve(n, newheadroom); -+ -+ /* Set the tail pointer and length */ -+ skb_put(n, skb->len); -+ -+ head_copy_len = oldheadroom; -+ head_copy_off = 0; -+ if (newheadroom <= head_copy_len) -+ head_copy_len = newheadroom; -+ else -+ head_copy_off = newheadroom - head_copy_len; -+ -+ /* Copy the linear header and data. */ -+ if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, -+ skb->len + head_copy_len)) -+ BUG(); -+ -+ copy_skb_header(n, skb); -+ -+ off = newheadroom - oldheadroom; -+ if (n->ip_summed == CHECKSUM_PARTIAL) -+ n->csum_start += off; -+#ifdef NET_SKBUFF_DATA_USES_OFFSET -+ skb_headers_offset_update(n, off); -+#endif -+ -+ return n; -+} -+EXPORT_SYMBOL(skb_copy_expand); -+ -+/** -+ * skb_pad - zero pad the tail of an skb -+ * @skb: buffer to pad -+ * @pad: space to pad -+ * -+ * Ensure that a buffer is followed by a padding area that is zero -+ * filled. Used by network drivers which may DMA or transfer data -+ * beyond the buffer end onto the wire. -+ * -+ * May return error in out of memory cases. The skb is freed on error. -+ */ -+ -+int skb_pad(struct sk_buff *skb, int pad) -+{ -+ int err; -+ int ntail; -+ -+ /* If the skbuff is non linear tailroom is always zero.. */ -+ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { -+ memset(skb->data+skb->len, 0, pad); -+ return 0; -+ } -+ -+ ntail = skb->data_len + pad - (skb->end - skb->tail); -+ if (likely(skb_cloned(skb) || ntail > 0)) { -+ err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); -+ if (unlikely(err)) -+ goto free_skb; -+ } -+ -+ /* FIXME: The use of this function with non-linear skb's really needs -+ * to be audited. -+ */ -+ err = skb_linearize(skb); -+ if (unlikely(err)) -+ goto free_skb; -+ -+ memset(skb->data + skb->len, 0, pad); -+ return 0; -+ -+free_skb: -+ kfree_skb(skb); -+ return err; -+} -+EXPORT_SYMBOL(skb_pad); -+ -+/** -+ * skb_put - add data to a buffer -+ * @skb: buffer to use -+ * @len: amount of data to add -+ * -+ * This function extends the used data area of the buffer. If this would -+ * exceed the total buffer size the kernel will panic. A pointer to the -+ * first byte of the extra data is returned. -+ */ -+unsigned char *skb_put(struct sk_buff *skb, unsigned int len) -+{ -+ unsigned char *tmp = skb_tail_pointer(skb); -+ SKB_LINEAR_ASSERT(skb); -+ skb->tail += len; -+ skb->len += len; -+ if (unlikely(skb->tail > skb->end)) -+ skb_over_panic(skb, len, __builtin_return_address(0)); -+ return tmp; -+} -+EXPORT_SYMBOL(skb_put); -+ -+/** -+ * skb_push - add data to the start of a buffer -+ * @skb: buffer to use -+ * @len: amount of data to add -+ * -+ * This function extends the used data area of the buffer at the buffer -+ * start. If this would exceed the total buffer headroom the kernel will -+ * panic. A pointer to the first byte of the extra data is returned. -+ */ -+unsigned char *skb_push(struct sk_buff *skb, unsigned int len) -+{ -+ skb->data -= len; -+ skb->len += len; -+ if (unlikely(skb->datahead)) -+ skb_under_panic(skb, len, __builtin_return_address(0)); -+ return skb->data; -+} -+EXPORT_SYMBOL(skb_push); -+ -+/** -+ * skb_pull - remove data from the start of a buffer -+ * @skb: buffer to use -+ * @len: amount of data to remove -+ * -+ * This function removes data from the start of a buffer, returning -+ * the memory to the headroom. A pointer to the next data in the buffer -+ * is returned. Once the data has been pulled future pushes will overwrite -+ * the old data. -+ */ -+unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) -+{ -+ return skb_pull_inline(skb, len); -+} -+EXPORT_SYMBOL(skb_pull); -+ -+/** -+ * skb_trim - remove end from a buffer -+ * @skb: buffer to alter -+ * @len: new length -+ * -+ * Cut the length of a buffer down by removing data from the tail. If -+ * the buffer is already under the length specified it is not modified. -+ * The skb must be linear. -+ */ -+void skb_trim(struct sk_buff *skb, unsigned int len) -+{ -+ if (skb->len > len) -+ __skb_trim(skb, len); -+} -+EXPORT_SYMBOL(skb_trim); -+ -+/* Trims skb to length len. It can change skb pointers. -+ */ -+ -+int ___pskb_trim(struct sk_buff *skb, unsigned int len) -+{ -+ struct sk_buff **fragp; -+ struct sk_buff *frag; -+ int offset = skb_headlen(skb); -+ int nfrags = skb_shinfo(skb)->nr_frags; -+ int i; -+ int err; -+ -+ if (skb_cloned(skb) && -+ unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) -+ return err; -+ -+ i = 0; -+ if (offset >= len) -+ goto drop_pages; -+ -+ for (; i < nfrags; i++) { -+ int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); -+ -+ if (end < len) { -+ offset = end; -+ continue; -+ } -+ -+ skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); -+ -+drop_pages: -+ skb_shinfo(skb)->nr_frags = i; -+ -+ for (; i < nfrags; i++) -+ skb_frag_unref(skb, i); -+ -+ if (skb_has_frag_list(skb)) -+ skb_drop_fraglist(skb); -+ goto done; -+ } -+ -+ for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); -+ fragp = &frag->next) { -+ int end = offset + frag->len; -+ -+ if (skb_shared(frag)) { -+ struct sk_buff *nfrag; -+ -+ nfrag = skb_clone(frag, GFP_ATOMIC); -+ if (unlikely(!nfrag)) -+ return -ENOMEM; -+ -+ nfrag->next = frag->next; -+ consume_skb(frag); -+ frag = nfrag; -+ *fragp = frag; -+ } -+ -+ if (end < len) { -+ offset = end; -+ continue; -+ } -+ -+ if (end > len && -+ unlikely((err = pskb_trim(frag, len - offset)))) -+ return err; -+ -+ if (frag->next) -+ skb_drop_list(&frag->next); -+ break; -+ } -+ -+done: -+ if (len > skb_headlen(skb)) { -+ skb->data_len -= skb->len - len; -+ skb->len = len; -+ } else { -+ skb->len = len; -+ skb->data_len = 0; -+ skb_set_tail_pointer(skb, len); -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL(___pskb_trim); -+ -+/** -+ * __pskb_pull_tail - advance tail of skb header -+ * @skb: buffer to reallocate -+ * @delta: number of bytes to advance tail -+ * -+ * The function makes a sense only on a fragmented &sk_buff, -+ * it expands header moving its tail forward and copying necessary -+ * data from fragmented part. -+ * -+ * &sk_buff MUST have reference count of 1. -+ * -+ * Returns %NULL (and &sk_buff does not change) if pull failed -+ * or value of new tail of skb in the case of success. -+ * -+ * All the pointers pointing into skb header may change and must be -+ * reloaded after call to this function. -+ */ -+ -+/* Moves tail of skb head forward, copying data from fragmented part, -+ * when it is necessary. -+ * 1. It may fail due to malloc failure. -+ * 2. It may change skb pointers. -+ * -+ * It is pretty complicated. Luckily, it is called only in exceptional cases. -+ */ -+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) -+{ -+ /* If skb has not enough free space at tail, get new one -+ * plus 128 bytes for future expansions. If we have enough -+ * room at tail, reallocate without expansion only if skb is cloned. -+ */ -+ int i, k, eat = (skb->tail + delta) - skb->end; -+ -+ if (eat > 0 || skb_cloned(skb)) { -+ if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, -+ GFP_ATOMIC)) -+ return NULL; -+ } -+ -+ if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) -+ BUG(); -+ -+ /* Optimization: no fragments, no reasons to preestimate -+ * size of pulled pages. Superb. -+ */ -+ if (!skb_has_frag_list(skb)) -+ goto pull_pages; -+ -+ /* Estimate size of pulled pages. */ -+ eat = delta; -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); -+ -+ if (size >= eat) -+ goto pull_pages; -+ eat -= size; -+ } -+ -+ /* If we need update frag list, we are in troubles. -+ * Certainly, it possible to add an offset to skb data, -+ * but taking into account that pulling is expected to -+ * be very rare operation, it is worth to fight against -+ * further bloating skb head and crucify ourselves here instead. -+ * Pure masohism, indeed. 8)8) -+ */ -+ if (eat) { -+ struct sk_buff *list = skb_shinfo(skb)->frag_list; -+ struct sk_buff *clone = NULL; -+ struct sk_buff *insp = NULL; -+ -+ do { -+ BUG_ON(!list); -+ -+ if (list->len <= eat) { -+ /* Eaten as whole. */ -+ eat -= list->len; -+ list = list->next; -+ insp = list; -+ } else { -+ /* Eaten partially. */ -+ -+ if (skb_shared(list)) { -+ /* Sucks! We need to fork list. :-( */ -+ clone = skb_clone(list, GFP_ATOMIC); -+ if (!clone) -+ return NULL; -+ insp = list->next; -+ list = clone; -+ } else { -+ /* This may be pulled without -+ * problems. */ -+ insp = list; -+ } -+ if (!pskb_pull(list, eat)) { -+ kfree_skb(clone); -+ return NULL; -+ } -+ break; -+ } -+ } while (eat); -+ -+ /* Free pulled out fragments. */ -+ while ((list = skb_shinfo(skb)->frag_list) != insp) { -+ skb_shinfo(skb)->frag_list = list->next; -+ kfree_skb(list); -+ } -+ /* And insert new clone at head. */ -+ if (clone) { -+ clone->next = list; -+ skb_shinfo(skb)->frag_list = clone; -+ } -+ } -+ /* Success! Now we may commit changes to skb data. */ -+ -+pull_pages: -+ eat = delta; -+ k = 0; -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); -+ -+ if (size <= eat) { -+ skb_frag_unref(skb, i); -+ eat -= size; -+ } else { -+ skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; -+ if (eat) { -+ skb_shinfo(skb)->frags[k].page_offset += eat; -+ skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); -+ eat = 0; -+ } -+ k++; -+ } -+ } -+ skb_shinfo(skb)->nr_frags = k; -+ -+ skb->tail += delta; -+ skb->data_len -= delta; -+ -+ return skb_tail_pointer(skb); -+} -+EXPORT_SYMBOL(__pskb_pull_tail); -+ -+/** -+ * skb_copy_bits - copy bits from skb to kernel buffer -+ * @skb: source skb -+ * @offset: offset in source -+ * @to: destination buffer -+ * @len: number of bytes to copy -+ * -+ * Copy the specified number of bytes from the source skb to the -+ * destination buffer. -+ * -+ * CAUTION ! : -+ * If its prototype is ever changed, -+ * check arch/{*}/net/{*}.S files, -+ * since it is called from BPF assembly code. -+ */ -+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) -+{ -+ int start = skb_headlen(skb); -+ struct sk_buff *frag_iter; -+ int i, copy; -+ -+ if (offset > (int)skb->len - len) -+ goto fault; -+ -+ /* Copy header. */ -+ if ((copy = start - offset) > 0) { -+ if (copy > len) -+ copy = len; -+ skb_copy_from_linear_data_offset(skb, offset, to, copy); -+ if ((len -= copy) == 0) -+ return 0; -+ offset += copy; -+ to += copy; -+ } -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ int end; -+ skb_frag_t *f = &skb_shinfo(skb)->frags[i]; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + skb_frag_size(f); -+ if ((copy = end - offset) > 0) { -+ u8 *vaddr; -+ -+ if (copy > len) -+ copy = len; -+ -+ vaddr = kmap_atomic(skb_frag_page(f)); -+ memcpy(to, -+ vaddr + f->page_offset + offset - start, -+ copy); -+ kunmap_atomic(vaddr); -+ -+ if ((len -= copy) == 0) -+ return 0; -+ offset += copy; -+ to += copy; -+ } -+ start = end; -+ } -+ -+ skb_walk_frags(skb, frag_iter) { -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + frag_iter->len; -+ if ((copy = end - offset) > 0) { -+ if (copy > len) -+ copy = len; -+ if (skb_copy_bits(frag_iter, offset - start, to, copy)) -+ goto fault; -+ if ((len -= copy) == 0) -+ return 0; -+ offset += copy; -+ to += copy; -+ } -+ start = end; -+ } -+ -+ if (!len) -+ return 0; -+ -+fault: -+ return -EFAULT; -+} -+EXPORT_SYMBOL(skb_copy_bits); -+ -+/* -+ * Callback from splice_to_pipe(), if we need to release some pages -+ * at the end of the spd in case we error'ed out in filling the pipe. -+ */ -+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) -+{ -+ put_page(spd->pages[i]); -+} -+ -+static struct page *linear_to_page(struct page *page, unsigned int *len, -+ unsigned int *offset, -+ struct sock *sk) -+{ -+ struct page_frag *pfrag = sk_page_frag(sk); -+ -+ if (!sk_page_frag_refill(sk, pfrag)) -+ return NULL; -+ -+ *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); -+ -+ memcpy(page_address(pfrag->page) + pfrag->offset, -+ page_address(page) + *offset, *len); -+ *offset = pfrag->offset; -+ pfrag->offset += *len; -+ -+ return pfrag->page; -+} -+ -+static bool spd_can_coalesce(const struct splice_pipe_desc *spd, -+ struct page *page, -+ unsigned int offset) -+{ -+ return spd->nr_pages && -+ spd->pages[spd->nr_pages - 1] == page && -+ (spd->partial[spd->nr_pages - 1].offset + -+ spd->partial[spd->nr_pages - 1].len == offset); -+} -+ -+/* -+ * Fill page/offset/length into spd, if it can hold more pages. -+ */ -+static bool spd_fill_page(struct splice_pipe_desc *spd, -+ struct pipe_inode_info *pipe, struct page *page, -+ unsigned int *len, unsigned int offset, -+ bool linear, -+ struct sock *sk) -+{ -+ if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) -+ return true; -+ -+ if (linear) { -+ page = linear_to_page(page, len, &offset, sk); -+ if (!page) -+ return true; -+ } -+ if (spd_can_coalesce(spd, page, offset)) { -+ spd->partial[spd->nr_pages - 1].len += *len; -+ return false; -+ } -+ get_page(page); -+ spd->pages[spd->nr_pages] = page; -+ spd->partial[spd->nr_pages].len = *len; -+ spd->partial[spd->nr_pages].offset = offset; -+ spd->nr_pages++; -+ -+ return false; -+} -+ -+static bool __splice_segment(struct page *page, unsigned int poff, -+ unsigned int plen, unsigned int *off, -+ unsigned int *len, -+ struct splice_pipe_desc *spd, bool linear, -+ struct sock *sk, -+ struct pipe_inode_info *pipe) -+{ -+ if (!*len) -+ return true; -+ -+ /* skip this segment if already processed */ -+ if (*off >= plen) { -+ *off -= plen; -+ return false; -+ } -+ -+ /* ignore any bits we already processed */ -+ poff += *off; -+ plen -= *off; -+ *off = 0; -+ -+ do { -+ unsigned int flen = min(*len, plen); -+ -+ if (spd_fill_page(spd, pipe, page, &flen, poff, -+ linear, sk)) -+ return true; -+ poff += flen; -+ plen -= flen; -+ *len -= flen; -+ } while (*len && plen); -+ -+ return false; -+} -+ -+/* -+ * Map linear and fragment data from the skb to spd. It reports true if the -+ * pipe is full or if we already spliced the requested length. -+ */ -+static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, -+ unsigned int *offset, unsigned int *len, -+ struct splice_pipe_desc *spd, struct sock *sk) -+{ -+ int seg; -+ -+ /* map the linear part : -+ * If skb->head_frag is set, this 'linear' part is backed by a -+ * fragment, and if the head is not shared with any clones then -+ * we can avoid a copy since we own the head portion of this page. -+ */ -+ if (__splice_segment(virt_to_page(skb->data), -+ (unsigned long) skb->data & (PAGE_SIZE - 1), -+ skb_headlen(skb), -+ offset, len, spd, -+ skb_head_is_locked(skb), -+ sk, pipe)) -+ return true; -+ -+ /* -+ * then map the fragments -+ */ -+ for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { -+ const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; -+ -+ if (__splice_segment(skb_frag_page(f), -+ f->page_offset, skb_frag_size(f), -+ offset, len, spd, false, sk, pipe)) -+ return true; -+ } -+ -+ return false; -+} -+ -+/* -+ * Map data from the skb to a pipe. Should handle both the linear part, -+ * the fragments, and the frag list. It does NOT handle frag lists within -+ * the frag list, if such a thing exists. We'd probably need to recurse to -+ * handle that cleanly. -+ */ -+int skb_splice_bits(struct sk_buff *skb, unsigned int offset, -+ struct pipe_inode_info *pipe, unsigned int tlen, -+ unsigned int flags) -+{ -+ struct partial_page partial[MAX_SKB_FRAGS]; -+ struct page *pages[MAX_SKB_FRAGS]; -+ struct splice_pipe_desc spd = { -+ .pages = pages, -+ .partial = partial, -+ .nr_pages_max = MAX_SKB_FRAGS, -+ .flags = flags, -+ .ops = &sock_pipe_buf_ops, -+ .spd_release = sock_spd_release, -+ }; -+ struct sk_buff *frag_iter; -+ struct sock *sk = skb->sk; -+ int ret = 0; -+ -+ /* -+ * __skb_splice_bits() only fails if the output has no room left, -+ * so no point in going over the frag_list for the error case. -+ */ -+ if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) -+ goto done; -+ else if (!tlen) -+ goto done; -+ -+ /* -+ * now see if we have a frag_list to map -+ */ -+ skb_walk_frags(skb, frag_iter) { -+ if (!tlen) -+ break; -+ if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) -+ break; -+ } -+ -+done: -+ if (spd.nr_pages) { -+ /* -+ * Drop the socket lock, otherwise we have reverse -+ * locking dependencies between sk_lock and i_mutex -+ * here as compared to sendfile(). We enter here -+ * with the socket lock held, and splice_to_pipe() will -+ * grab the pipe inode lock. For sendfile() emulation, -+ * we call into ->sendpage() with the i_mutex lock held -+ * and networking will grab the socket lock. -+ */ -+ release_sock(sk); -+ ret = splice_to_pipe(pipe, &spd); -+ lock_sock(sk); -+ } -+ -+ return ret; -+} -+ -+/** -+ * skb_store_bits - store bits from kernel buffer to skb -+ * @skb: destination buffer -+ * @offset: offset in destination -+ * @from: source buffer -+ * @len: number of bytes to copy -+ * -+ * Copy the specified number of bytes from the source buffer to the -+ * destination skb. This function handles all the messy bits of -+ * traversing fragment lists and such. -+ */ -+ -+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) -+{ -+ int start = skb_headlen(skb); -+ struct sk_buff *frag_iter; -+ int i, copy; -+ -+ if (offset > (int)skb->len - len) -+ goto fault; -+ -+ if ((copy = start - offset) > 0) { -+ if (copy > len) -+ copy = len; -+ skb_copy_to_linear_data_offset(skb, offset, from, copy); -+ if ((len -= copy) == 0) -+ return 0; -+ offset += copy; -+ from += copy; -+ } -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + skb_frag_size(frag); -+ if ((copy = end - offset) > 0) { -+ u8 *vaddr; -+ -+ if (copy > len) -+ copy = len; -+ -+ vaddr = kmap_atomic(skb_frag_page(frag)); -+ memcpy(vaddr + frag->page_offset + offset - start, -+ from, copy); -+ kunmap_atomic(vaddr); -+ -+ if ((len -= copy) == 0) -+ return 0; -+ offset += copy; -+ from += copy; -+ } -+ start = end; -+ } -+ -+ skb_walk_frags(skb, frag_iter) { -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + frag_iter->len; -+ if ((copy = end - offset) > 0) { -+ if (copy > len) -+ copy = len; -+ if (skb_store_bits(frag_iter, offset - start, -+ from, copy)) -+ goto fault; -+ if ((len -= copy) == 0) -+ return 0; -+ offset += copy; -+ from += copy; -+ } -+ start = end; -+ } -+ if (!len) -+ return 0; -+ -+fault: -+ return -EFAULT; -+} -+EXPORT_SYMBOL(skb_store_bits); -+ -+/* Checksum skb data. */ -+ -+__wsum skb_checksum(const struct sk_buff *skb, int offset, -+ int len, __wsum csum) -+{ -+ int start = skb_headlen(skb); -+ int i, copy = start - offset; -+ struct sk_buff *frag_iter; -+ int pos = 0; -+ -+ /* Checksum header. */ -+ if (copy > 0) { -+ if (copy > len) -+ copy = len; -+ csum = csum_partial(skb->data + offset, copy, csum); -+ if ((len -= copy) == 0) -+ return csum; -+ offset += copy; -+ pos = copy; -+ } -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ int end; -+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + skb_frag_size(frag); -+ if ((copy = end - offset) > 0) { -+ __wsum csum2; -+ u8 *vaddr; -+ -+ if (copy > len) -+ copy = len; -+ vaddr = kmap_atomic(skb_frag_page(frag)); -+ csum2 = csum_partial(vaddr + frag->page_offset + -+ offset - start, copy, 0); -+ kunmap_atomic(vaddr); -+ csum = csum_block_add(csum, csum2, pos); -+ if (!(len -= copy)) -+ return csum; -+ offset += copy; -+ pos += copy; -+ } -+ start = end; -+ } -+ -+ skb_walk_frags(skb, frag_iter) { -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + frag_iter->len; -+ if ((copy = end - offset) > 0) { -+ __wsum csum2; -+ if (copy > len) -+ copy = len; -+ csum2 = skb_checksum(frag_iter, offset - start, -+ copy, 0); -+ csum = csum_block_add(csum, csum2, pos); -+ if ((len -= copy) == 0) -+ return csum; -+ offset += copy; -+ pos += copy; -+ } -+ start = end; -+ } -+ BUG_ON(len); -+ -+ return csum; -+} -+EXPORT_SYMBOL(skb_checksum); -+ -+/* Both of above in one bottle. */ -+ -+__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, -+ u8 *to, int len, __wsum csum) -+{ -+ int start = skb_headlen(skb); -+ int i, copy = start - offset; -+ struct sk_buff *frag_iter; -+ int pos = 0; -+ -+ /* Copy header. */ -+ if (copy > 0) { -+ if (copy > len) -+ copy = len; -+ csum = csum_partial_copy_nocheck(skb->data + offset, to, -+ copy, csum); -+ if ((len -= copy) == 0) -+ return csum; -+ offset += copy; -+ to += copy; -+ pos = copy; -+ } -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); -+ if ((copy = end - offset) > 0) { -+ __wsum csum2; -+ u8 *vaddr; -+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; -+ -+ if (copy > len) -+ copy = len; -+ vaddr = kmap_atomic(skb_frag_page(frag)); -+ csum2 = csum_partial_copy_nocheck(vaddr + -+ frag->page_offset + -+ offset - start, to, -+ copy, 0); -+ kunmap_atomic(vaddr); -+ csum = csum_block_add(csum, csum2, pos); -+ if (!(len -= copy)) -+ return csum; -+ offset += copy; -+ to += copy; -+ pos += copy; -+ } -+ start = end; -+ } -+ -+ skb_walk_frags(skb, frag_iter) { -+ __wsum csum2; -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + frag_iter->len; -+ if ((copy = end - offset) > 0) { -+ if (copy > len) -+ copy = len; -+ csum2 = skb_copy_and_csum_bits(frag_iter, -+ offset - start, -+ to, copy, 0); -+ csum = csum_block_add(csum, csum2, pos); -+ if ((len -= copy) == 0) -+ return csum; -+ offset += copy; -+ to += copy; -+ pos += copy; -+ } -+ start = end; -+ } -+ BUG_ON(len); -+ return csum; -+} -+EXPORT_SYMBOL(skb_copy_and_csum_bits); -+ -+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) -+{ -+ __wsum csum; -+ long csstart; -+ -+ if (skb->ip_summed == CHECKSUM_PARTIAL) -+ csstart = skb_checksum_start_offset(skb); -+ else -+ csstart = skb_headlen(skb); -+ -+ BUG_ON(csstart > skb_headlen(skb)); -+ -+ skb_copy_from_linear_data(skb, to, csstart); -+ -+ csum = 0; -+ if (csstart != skb->len) -+ csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, -+ skb->len - csstart, 0); -+ -+ if (skb->ip_summed == CHECKSUM_PARTIAL) { -+ long csstuff = csstart + skb->csum_offset; -+ -+ *((__sum16 *)(to + csstuff)) = csum_fold(csum); -+ } -+} -+EXPORT_SYMBOL(skb_copy_and_csum_dev); -+ -+/** -+ * skb_dequeue - remove from the head of the queue -+ * @list: list to dequeue from -+ * -+ * Remove the head of the list. The list lock is taken so the function -+ * may be used safely with other locking list functions. The head item is -+ * returned or %NULL if the list is empty. -+ */ -+ -+struct sk_buff *skb_dequeue(struct sk_buff_head *list) -+{ -+ unsigned long flags; -+ struct sk_buff *result; -+ -+ spin_lock_irqsave(&list->lock, flags); -+ result = __skb_dequeue(list); -+ spin_unlock_irqrestore(&list->lock, flags); -+ return result; -+} -+EXPORT_SYMBOL(skb_dequeue); -+ -+/** -+ * skb_dequeue_tail - remove from the tail of the queue -+ * @list: list to dequeue from -+ * -+ * Remove the tail of the list. The list lock is taken so the function -+ * may be used safely with other locking list functions. The tail item is -+ * returned or %NULL if the list is empty. -+ */ -+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) -+{ -+ unsigned long flags; -+ struct sk_buff *result; -+ -+ spin_lock_irqsave(&list->lock, flags); -+ result = __skb_dequeue_tail(list); -+ spin_unlock_irqrestore(&list->lock, flags); -+ return result; -+} -+EXPORT_SYMBOL(skb_dequeue_tail); -+ -+/** -+ * skb_queue_purge - empty a list -+ * @list: list to empty -+ * -+ * Delete all buffers on an &sk_buff list. Each buffer is removed from -+ * the list and one reference dropped. This function takes the list -+ * lock and is atomic with respect to other list locking functions. -+ */ -+void skb_queue_purge(struct sk_buff_head *list) -+{ -+ struct sk_buff *skb; -+ while ((skb = skb_dequeue(list)) != NULL) -+ kfree_skb(skb); -+} -+EXPORT_SYMBOL(skb_queue_purge); -+ -+/** -+ * skb_queue_head - queue a buffer at the list head -+ * @list: list to use -+ * @newsk: buffer to queue -+ * -+ * Queue a buffer at the start of the list. This function takes the -+ * list lock and can be used safely with other locking &sk_buff functions -+ * safely. -+ * -+ * A buffer cannot be placed on two lists at the same time. -+ */ -+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&list->lock, flags); -+ __skb_queue_head(list, newsk); -+ spin_unlock_irqrestore(&list->lock, flags); -+} -+EXPORT_SYMBOL(skb_queue_head); -+ -+/** -+ * skb_queue_tail - queue a buffer at the list tail -+ * @list: list to use -+ * @newsk: buffer to queue -+ * -+ * Queue a buffer at the tail of the list. This function takes the -+ * list lock and can be used safely with other locking &sk_buff functions -+ * safely. -+ * -+ * A buffer cannot be placed on two lists at the same time. -+ */ -+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&list->lock, flags); -+ __skb_queue_tail(list, newsk); -+ spin_unlock_irqrestore(&list->lock, flags); -+} -+EXPORT_SYMBOL(skb_queue_tail); -+ -+/** -+ * skb_unlink - remove a buffer from a list -+ * @skb: buffer to remove -+ * @list: list to use -+ * -+ * Remove a packet from a list. The list locks are taken and this -+ * function is atomic with respect to other list locked calls -+ * -+ * You must know what list the SKB is on. -+ */ -+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&list->lock, flags); -+ __skb_unlink(skb, list); -+ spin_unlock_irqrestore(&list->lock, flags); -+} -+EXPORT_SYMBOL(skb_unlink); -+ -+/** -+ * skb_append - append a buffer -+ * @old: buffer to insert after -+ * @newsk: buffer to insert -+ * @list: list to use -+ * -+ * Place a packet after a given packet in a list. The list locks are taken -+ * and this function is atomic with respect to other list locked calls. -+ * A buffer cannot be placed on two lists at the same time. -+ */ -+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&list->lock, flags); -+ __skb_queue_after(list, old, newsk); -+ spin_unlock_irqrestore(&list->lock, flags); -+} -+EXPORT_SYMBOL(skb_append); -+ -+/** -+ * skb_insert - insert a buffer -+ * @old: buffer to insert before -+ * @newsk: buffer to insert -+ * @list: list to use -+ * -+ * Place a packet before a given packet in a list. The list locks are -+ * taken and this function is atomic with respect to other list locked -+ * calls. -+ * -+ * A buffer cannot be placed on two lists at the same time. -+ */ -+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&list->lock, flags); -+ __skb_insert(newsk, old->prev, old, list); -+ spin_unlock_irqrestore(&list->lock, flags); -+} -+EXPORT_SYMBOL(skb_insert); -+ -+static inline void skb_split_inside_header(struct sk_buff *skb, -+ struct sk_buff* skb1, -+ const u32 len, const int pos) -+{ -+ int i; -+ -+ skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), -+ pos - len); -+ /* And move data appendix as is. */ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -+ skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; -+ -+ skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; -+ skb_shinfo(skb)->nr_frags = 0; -+ skb1->data_len = skb->data_len; -+ skb1->len += skb1->data_len; -+ skb->data_len = 0; -+ skb->len = len; -+ skb_set_tail_pointer(skb, len); -+} -+ -+static inline void skb_split_no_header(struct sk_buff *skb, -+ struct sk_buff* skb1, -+ const u32 len, int pos) -+{ -+ int i, k = 0; -+ const int nfrags = skb_shinfo(skb)->nr_frags; -+ -+ skb_shinfo(skb)->nr_frags = 0; -+ skb1->len = skb1->data_len = skb->len - len; -+ skb->len = len; -+ skb->data_len = len - pos; -+ -+ for (i = 0; i < nfrags; i++) { -+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); -+ -+ if (pos + size > len) { -+ skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; -+ -+ if (pos < len) { -+ /* Split frag. -+ * We have two variants in this case: -+ * 1. Move all the frag to the second -+ * part, if it is possible. F.e. -+ * this approach is mandatory for TUX, -+ * where splitting is expensive. -+ * 2. Split is accurately. We make this. -+ */ -+ skb_frag_ref(skb, i); -+ skb_shinfo(skb1)->frags[0].page_offset += len - pos; -+ skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); -+ skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); -+ skb_shinfo(skb)->nr_frags++; -+ } -+ k++; -+ } else -+ skb_shinfo(skb)->nr_frags++; -+ pos += size; -+ } -+ skb_shinfo(skb1)->nr_frags = k; -+} -+ -+/** -+ * skb_split - Split fragmented skb to two parts at length len. -+ * @skb: the buffer to split -+ * @skb1: the buffer to receive the second part -+ * @len: new length for skb -+ */ -+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) -+{ -+ int pos = skb_headlen(skb); -+ -+ skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; -+ if (len < pos) /* Split line is inside header. */ -+ skb_split_inside_header(skb, skb1, len, pos); -+ else /* Second chunk has no header, nothing to copy. */ -+ skb_split_no_header(skb, skb1, len, pos); -+} -+EXPORT_SYMBOL(skb_split); -+ -+/* Shifting from/to a cloned skb is a no-go. -+ * -+ * Caller cannot keep skb_shinfo related pointers past calling here! -+ */ -+static int skb_prepare_for_shift(struct sk_buff *skb) -+{ -+ return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); -+} -+ -+/** -+ * skb_shift - Shifts paged data partially from skb to another -+ * @tgt: buffer into which tail data gets added -+ * @skb: buffer from which the paged data comes from -+ * @shiftlen: shift up to this many bytes -+ * -+ * Attempts to shift up to shiftlen worth of bytes, which may be less than -+ * the length of the skb, from skb to tgt. Returns number bytes shifted. -+ * It's up to caller to free skb if everything was shifted. -+ * -+ * If @tgt runs out of frags, the whole operation is aborted. -+ * -+ * Skb cannot include anything else but paged data while tgt is allowed -+ * to have non-paged data as well. -+ * -+ * TODO: full sized shift could be optimized but that would need -+ * specialized skb free'er to handle frags without up-to-date nr_frags. -+ */ -+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) -+{ -+ int from, to, merge, todo; -+ struct skb_frag_struct *fragfrom, *fragto; -+ -+ BUG_ON(shiftlen > skb->len); -+ BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ -+ -+ todo = shiftlen; -+ from = 0; -+ to = skb_shinfo(tgt)->nr_frags; -+ fragfrom = &skb_shinfo(skb)->frags[from]; -+ -+ /* Actual merge is delayed until the point when we know we can -+ * commit all, so that we don't have to undo partial changes -+ */ -+ if (!to || -+ !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), -+ fragfrom->page_offset)) { -+ merge = -1; -+ } else { -+ merge = to - 1; -+ -+ todo -= skb_frag_size(fragfrom); -+ if (todo < 0) { -+ if (skb_prepare_for_shift(skb) || -+ skb_prepare_for_shift(tgt)) -+ return 0; -+ -+ /* All previous frag pointers might be stale! */ -+ fragfrom = &skb_shinfo(skb)->frags[from]; -+ fragto = &skb_shinfo(tgt)->frags[merge]; -+ -+ skb_frag_size_add(fragto, shiftlen); -+ skb_frag_size_sub(fragfrom, shiftlen); -+ fragfrom->page_offset += shiftlen; -+ -+ goto onlymerged; -+ } -+ -+ from++; -+ } -+ -+ /* Skip full, not-fitting skb to avoid expensive operations */ -+ if ((shiftlen == skb->len) && -+ (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) -+ return 0; -+ -+ if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) -+ return 0; -+ -+ while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { -+ if (to == MAX_SKB_FRAGS) -+ return 0; -+ -+ fragfrom = &skb_shinfo(skb)->frags[from]; -+ fragto = &skb_shinfo(tgt)->frags[to]; -+ -+ if (todo >= skb_frag_size(fragfrom)) { -+ *fragto = *fragfrom; -+ todo -= skb_frag_size(fragfrom); -+ from++; -+ to++; -+ -+ } else { -+ __skb_frag_ref(fragfrom); -+ fragto->page = fragfrom->page; -+ fragto->page_offset = fragfrom->page_offset; -+ skb_frag_size_set(fragto, todo); -+ -+ fragfrom->page_offset += todo; -+ skb_frag_size_sub(fragfrom, todo); -+ todo = 0; -+ -+ to++; -+ break; -+ } -+ } -+ -+ /* Ready to "commit" this state change to tgt */ -+ skb_shinfo(tgt)->nr_frags = to; -+ -+ if (merge >= 0) { -+ fragfrom = &skb_shinfo(skb)->frags[0]; -+ fragto = &skb_shinfo(tgt)->frags[merge]; -+ -+ skb_frag_size_add(fragto, skb_frag_size(fragfrom)); -+ __skb_frag_unref(fragfrom); -+ } -+ -+ /* Reposition in the original skb */ -+ to = 0; -+ while (from < skb_shinfo(skb)->nr_frags) -+ skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; -+ skb_shinfo(skb)->nr_frags = to; -+ -+ BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); -+ -+onlymerged: -+ /* Most likely the tgt won't ever need its checksum anymore, skb on -+ * the other hand might need it if it needs to be resent -+ */ -+ tgt->ip_summed = CHECKSUM_PARTIAL; -+ skb->ip_summed = CHECKSUM_PARTIAL; -+ -+ /* Yak, is it really working this way? Some helper please? */ -+ skb->len -= shiftlen; -+ skb->data_len -= shiftlen; -+ skb->truesize -= shiftlen; -+ tgt->len += shiftlen; -+ tgt->data_len += shiftlen; -+ tgt->truesize += shiftlen; -+ -+ return shiftlen; -+} -+ -+/** -+ * skb_prepare_seq_read - Prepare a sequential read of skb data -+ * @skb: the buffer to read -+ * @from: lower offset of data to be read -+ * @to: upper offset of data to be read -+ * @st: state variable -+ * -+ * Initializes the specified state variable. Must be called before -+ * invoking skb_seq_read() for the first time. -+ */ -+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, -+ unsigned int to, struct skb_seq_state *st) -+{ -+ st->lower_offset = from; -+ st->upper_offset = to; -+ st->root_skb = st->cur_skb = skb; -+ st->frag_idx = st->stepped_offset = 0; -+ st->frag_data = NULL; -+} -+EXPORT_SYMBOL(skb_prepare_seq_read); -+ -+/** -+ * skb_seq_read - Sequentially read skb data -+ * @consumed: number of bytes consumed by the caller so far -+ * @data: destination pointer for data to be returned -+ * @st: state variable -+ * -+ * Reads a block of skb data at &consumed relative to the -+ * lower offset specified to skb_prepare_seq_read(). Assigns -+ * the head of the data block to &data and returns the length -+ * of the block or 0 if the end of the skb data or the upper -+ * offset has been reached. -+ * -+ * The caller is not required to consume all of the data -+ * returned, i.e. &consumed is typically set to the number -+ * of bytes already consumed and the next call to -+ * skb_seq_read() will return the remaining part of the block. -+ * -+ * Note 1: The size of each block of data returned can be arbitrary, -+ * this limitation is the cost for zerocopy seqeuental -+ * reads of potentially non linear data. -+ * -+ * Note 2: Fragment lists within fragments are not implemented -+ * at the moment, state->root_skb could be replaced with -+ * a stack for this purpose. -+ */ -+unsigned int skb_seq_read(unsigned int consumed, const u8 **data, -+ struct skb_seq_state *st) -+{ -+ unsigned int block_limit, abs_offset = consumed + st->lower_offset; -+ skb_frag_t *frag; -+ -+ if (unlikely(abs_offset >= st->upper_offset)) -+ return 0; -+ -+next_skb: -+ block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; -+ -+ if (abs_offset < block_limit && !st->frag_data) { -+ *data = st->cur_skb->data + (abs_offset - st->stepped_offset); -+ return block_limit - abs_offset; -+ } -+ -+ if (st->frag_idx == 0 && !st->frag_data) -+ st->stepped_offset += skb_headlen(st->cur_skb); -+ -+ while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { -+ frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; -+ block_limit = skb_frag_size(frag) + st->stepped_offset; -+ -+ if (abs_offset < block_limit) { -+ if (!st->frag_data) -+ st->frag_data = kmap_atomic(skb_frag_page(frag)); -+ -+ *data = (u8 *) st->frag_data + frag->page_offset + -+ (abs_offset - st->stepped_offset); -+ -+ return block_limit - abs_offset; -+ } -+ -+ if (st->frag_data) { -+ kunmap_atomic(st->frag_data); -+ st->frag_data = NULL; -+ } -+ -+ st->frag_idx++; -+ st->stepped_offset += skb_frag_size(frag); -+ } -+ -+ if (st->frag_data) { -+ kunmap_atomic(st->frag_data); -+ st->frag_data = NULL; -+ } -+ -+ if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { -+ st->cur_skb = skb_shinfo(st->root_skb)->frag_list; -+ st->frag_idx = 0; -+ goto next_skb; -+ } else if (st->cur_skb->next) { -+ st->cur_skb = st->cur_skb->next; -+ st->frag_idx = 0; -+ goto next_skb; -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL(skb_seq_read); -+ -+/** -+ * skb_abort_seq_read - Abort a sequential read of skb data -+ * @st: state variable -+ * -+ * Must be called if skb_seq_read() was not called until it -+ * returned 0. -+ */ -+void skb_abort_seq_read(struct skb_seq_state *st) -+{ -+ if (st->frag_data) -+ kunmap_atomic(st->frag_data); -+} -+EXPORT_SYMBOL(skb_abort_seq_read); -+ -+#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) -+ -+static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, -+ struct ts_config *conf, -+ struct ts_state *state) -+{ -+ return skb_seq_read(offset, text, TS_SKB_CB(state)); -+} -+ -+static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) -+{ -+ skb_abort_seq_read(TS_SKB_CB(state)); -+} -+ -+/** -+ * skb_find_text - Find a text pattern in skb data -+ * @skb: the buffer to look in -+ * @from: search offset -+ * @to: search limit -+ * @config: textsearch configuration -+ * @state: uninitialized textsearch state variable -+ * -+ * Finds a pattern in the skb data according to the specified -+ * textsearch configuration. Use textsearch_next() to retrieve -+ * subsequent occurrences of the pattern. Returns the offset -+ * to the first occurrence or UINT_MAX if no match was found. -+ */ -+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, -+ unsigned int to, struct ts_config *config, -+ struct ts_state *state) -+{ -+ unsigned int ret; -+ -+ config->get_next_block = skb_ts_get_next_block; -+ config->finish = skb_ts_finish; -+ -+ skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); -+ -+ ret = textsearch_find(config, state); -+ return (ret <= to - from ? ret : UINT_MAX); -+} -+EXPORT_SYMBOL(skb_find_text); -+ -+/** -+ * skb_append_datato_frags - append the user data to a skb -+ * @sk: sock structure -+ * @skb: skb structure to be appened with user data. -+ * @getfrag: call back function to be used for getting the user data -+ * @from: pointer to user message iov -+ * @length: length of the iov message -+ * -+ * Description: This procedure append the user data in the fragment part -+ * of the skb if any page alloc fails user this procedure returns -ENOMEM -+ */ -+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, -+ int (*getfrag)(void *from, char *to, int offset, -+ int len, int odd, struct sk_buff *skb), -+ void *from, int length) -+{ -+ int frg_cnt = skb_shinfo(skb)->nr_frags; -+ int copy; -+ int offset = 0; -+ int ret; -+ struct page_frag *pfrag = ¤t->task_frag; -+ -+ do { -+ /* Return error if we don't have space for new frag */ -+ if (frg_cnt >= MAX_SKB_FRAGS) -+ return -EMSGSIZE; -+ -+ if (!sk_page_frag_refill(sk, pfrag)) -+ return -ENOMEM; -+ -+ /* copy the user data to page */ -+ copy = min_t(int, length, pfrag->size - pfrag->offset); -+ -+ ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, -+ offset, copy, 0, skb); -+ if (ret < 0) -+ return -EFAULT; -+ -+ /* copy was successful so update the size parameters */ -+ skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, -+ copy); -+ frg_cnt++; -+ pfrag->offset += copy; -+ get_page(pfrag->page); -+ -+ skb->truesize += copy; -+ atomic_add(copy, &sk->sk_wmem_alloc); -+ skb->len += copy; -+ skb->data_len += copy; -+ offset += copy; -+ length -= copy; -+ -+ } while (length > 0); -+ -+ return 0; -+} -+EXPORT_SYMBOL(skb_append_datato_frags); -+ -+/** -+ * skb_pull_rcsum - pull skb and update receive checksum -+ * @skb: buffer to update -+ * @len: length of data pulled -+ * -+ * This function performs an skb_pull on the packet and updates -+ * the CHECKSUM_COMPLETE checksum. It should be used on -+ * receive path processing instead of skb_pull unless you know -+ * that the checksum difference is zero (e.g., a valid IP header) -+ * or you are setting ip_summed to CHECKSUM_NONE. -+ */ -+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) -+{ -+ BUG_ON(len > skb->len); -+ skb->len -= len; -+ BUG_ON(skb->len < skb->data_len); -+ skb_postpull_rcsum(skb, skb->data, len); -+ return skb->data += len; -+} -+EXPORT_SYMBOL_GPL(skb_pull_rcsum); -+ -+/** -+ * skb_segment - Perform protocol segmentation on skb. -+ * @skb: buffer to segment -+ * @features: features for the output path (see dev->features) -+ * -+ * This function performs segmentation on the given skb. It returns -+ * a pointer to the first in a list of new skbs for the segments. -+ * In case of error it returns ERR_PTR(err). -+ */ -+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) -+{ -+ struct sk_buff *segs = NULL; -+ struct sk_buff *tail = NULL; -+ struct sk_buff *fskb = skb_shinfo(skb)->frag_list; -+ unsigned int mss = skb_shinfo(skb)->gso_size; -+ unsigned int doffset = skb->data - skb_mac_header(skb); -+ unsigned int offset = doffset; -+ unsigned int tnl_hlen = skb_tnl_header_len(skb); -+ unsigned int headroom; -+ unsigned int len; -+ __be16 proto; -+ bool csum; -+ int sg = !!(features & NETIF_F_SG); -+ int nfrags = skb_shinfo(skb)->nr_frags; -+ int err = -ENOMEM; -+ int i = 0; -+ int pos; -+ -+ proto = skb_network_protocol(skb); -+ if (unlikely(!proto)) -+ return ERR_PTR(-EINVAL); -+ -+ csum = !!can_checksum_protocol(features, proto); -+ __skb_push(skb, doffset); -+ headroom = skb_headroom(skb); -+ pos = skb_headlen(skb); -+ -+ do { -+ struct sk_buff *nskb; -+ skb_frag_t *frag; -+ int hsize; -+ int size; -+ -+ len = skb->len - offset; -+ if (len > mss) -+ len = mss; -+ -+ hsize = skb_headlen(skb) - offset; -+ if (hsize < 0) -+ hsize = 0; -+ if (hsize > len || !sg) -+ hsize = len; -+ -+ if (!hsize && i >= nfrags) { -+ BUG_ON(fskb->len != len); -+ -+ pos += len; -+ nskb = skb_clone(fskb, GFP_ATOMIC); -+ fskb = fskb->next; -+ -+ if (unlikely(!nskb)) -+ goto err; -+ -+ hsize = skb_end_offset(nskb); -+ if (skb_cow_head(nskb, doffset + headroom)) { -+ kfree_skb(nskb); -+ goto err; -+ } -+ -+ nskb->truesize += skb_end_offset(nskb) - hsize; -+ skb_release_head_state(nskb); -+ __skb_push(nskb, doffset); -+ } else { -+ nskb = __alloc_skb(hsize + doffset + headroom, -+ GFP_ATOMIC, skb_alloc_rx_flag(skb), -+ NUMA_NO_NODE); -+ -+ if (unlikely(!nskb)) -+ goto err; -+ -+ skb_reserve(nskb, headroom); -+ __skb_put(nskb, doffset); -+ } -+ -+ if (segs) -+ tail->next = nskb; -+ else -+ segs = nskb; -+ tail = nskb; -+ -+ __copy_skb_header(nskb, skb); -+ nskb->mac_len = skb->mac_len; -+ -+ /* nskb and skb might have different headroom */ -+ if (nskb->ip_summed == CHECKSUM_PARTIAL) -+ nskb->csum_start += skb_headroom(nskb) - headroom; -+ -+ skb_reset_mac_header(nskb); -+ skb_set_network_header(nskb, skb->mac_len); -+ nskb->transport_header = (nskb->network_header + -+ skb_network_header_len(skb)); -+ -+ skb_copy_from_linear_data_offset(skb, -tnl_hlen, -+ nskb->data - tnl_hlen, -+ doffset + tnl_hlen); -+ -+ if (fskb != skb_shinfo(skb)->frag_list) -+ goto perform_csum_check; -+ -+ if (!sg) { -+ nskb->ip_summed = CHECKSUM_NONE; -+ nskb->csum = skb_copy_and_csum_bits(skb, offset, -+ skb_put(nskb, len), -+ len, 0); -+ continue; -+ } -+ -+ frag = skb_shinfo(nskb)->frags; -+ -+ skb_copy_from_linear_data_offset(skb, offset, -+ skb_put(nskb, hsize), hsize); -+ -+ skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; -+ -+ while (pos < offset + len && i < nfrags) { -+ *frag = skb_shinfo(skb)->frags[i]; -+ __skb_frag_ref(frag); -+ size = skb_frag_size(frag); -+ -+ if (pos < offset) { -+ frag->page_offset += offset - pos; -+ skb_frag_size_sub(frag, offset - pos); -+ } -+ -+ skb_shinfo(nskb)->nr_frags++; -+ -+ if (pos + size <= offset + len) { -+ i++; -+ pos += size; -+ } else { -+ skb_frag_size_sub(frag, pos + size - (offset + len)); -+ goto skip_fraglist; -+ } -+ -+ frag++; -+ } -+ -+ if (pos < offset + len) { -+ struct sk_buff *fskb2 = fskb; -+ -+ BUG_ON(pos + fskb->len != offset + len); -+ -+ pos += fskb->len; -+ fskb = fskb->next; -+ -+ if (fskb2->next) { -+ fskb2 = skb_clone(fskb2, GFP_ATOMIC); -+ if (!fskb2) -+ goto err; -+ } else -+ skb_get(fskb2); -+ -+ SKB_FRAG_ASSERT(nskb); -+ skb_shinfo(nskb)->frag_list = fskb2; -+ } -+ -+skip_fraglist: -+ nskb->data_len = len - hsize; -+ nskb->len += nskb->data_len; -+ nskb->truesize += nskb->data_len; -+ -+perform_csum_check: -+ if (!csum) { -+ nskb->csum = skb_checksum(nskb, doffset, -+ nskb->len - doffset, 0); -+ nskb->ip_summed = CHECKSUM_NONE; -+ } -+ } while ((offset += len) < skb->len); -+ -+ return segs; -+ -+err: -+ while ((skb = segs)) { -+ segs = skb->next; -+ kfree_skb(skb); -+ } -+ return ERR_PTR(err); -+} -+EXPORT_SYMBOL_GPL(skb_segment); -+ -+int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) -+{ -+ struct sk_buff *p = *head; -+ struct sk_buff *nskb; -+ struct skb_shared_info *skbinfo = skb_shinfo(skb); -+ struct skb_shared_info *pinfo = skb_shinfo(p); -+ unsigned int headroom; -+ unsigned int len = skb_gro_len(skb); -+ unsigned int offset = skb_gro_offset(skb); -+ unsigned int headlen = skb_headlen(skb); -+ unsigned int delta_truesize; -+ -+ if (p->len + len >= 65536) -+ return -E2BIG; -+ -+ if (pinfo->frag_list) -+ goto merge; -+ else if (headlen <= offset) { -+ skb_frag_t *frag; -+ skb_frag_t *frag2; -+ int i = skbinfo->nr_frags; -+ int nr_frags = pinfo->nr_frags + i; -+ -+ offset -= headlen; -+ -+ if (nr_frags > MAX_SKB_FRAGS) -+ return -E2BIG; -+ -+ pinfo->nr_frags = nr_frags; -+ skbinfo->nr_frags = 0; -+ -+ frag = pinfo->frags + nr_frags; -+ frag2 = skbinfo->frags + i; -+ do { -+ *--frag = *--frag2; -+ } while (--i); -+ -+ frag->page_offset += offset; -+ skb_frag_size_sub(frag, offset); -+ -+ /* all fragments truesize : remove (head size + sk_buff) */ -+ delta_truesize = skb->truesize - -+ SKB_TRUESIZE(skb_end_offset(skb)); -+ -+ skb->truesize -= skb->data_len; -+ skb->len -= skb->data_len; -+ skb->data_len = 0; -+ -+ NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; -+ goto done; -+ } else if (skb->head_frag) { -+ int nr_frags = pinfo->nr_frags; -+ skb_frag_t *frag = pinfo->frags + nr_frags; -+ struct page *page = virt_to_head_page(skb->head); -+ unsigned int first_size = headlen - offset; -+ unsigned int first_offset; -+ -+ if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) -+ return -E2BIG; -+ -+ first_offset = skb->data - -+ (unsigned char *)page_address(page) + -+ offset; -+ -+ pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; -+ -+ frag->page.p = page; -+ frag->page_offset = first_offset; -+ skb_frag_size_set(frag, first_size); -+ -+ memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); -+ /* We dont need to clear skbinfo->nr_frags here */ -+ -+ delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); -+ NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; -+ goto done; -+ } else if (skb_gro_len(p) != pinfo->gso_size) -+ return -E2BIG; -+ -+ headroom = skb_headroom(p); -+ nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); -+ if (unlikely(!nskb)) -+ return -ENOMEM; -+ -+ __copy_skb_header(nskb, p); -+ nskb->mac_len = p->mac_len; -+ -+ skb_reserve(nskb, headroom); -+ __skb_put(nskb, skb_gro_offset(p)); -+ -+ skb_set_mac_header(nskb, skb_mac_header(p) - p->data); -+ skb_set_network_header(nskb, skb_network_offset(p)); -+ skb_set_transport_header(nskb, skb_transport_offset(p)); -+ -+ __skb_pull(p, skb_gro_offset(p)); -+ memcpy(skb_mac_header(nskb), skb_mac_header(p), -+ p->data - skb_mac_header(p)); -+ -+ skb_shinfo(nskb)->frag_list = p; -+ skb_shinfo(nskb)->gso_size = pinfo->gso_size; -+ pinfo->gso_size = 0; -+ skb_header_release(p); -+ NAPI_GRO_CB(nskb)->last = p; -+ -+ nskb->data_len += p->len; -+ nskb->truesize += p->truesize; -+ nskb->len += p->len; -+ -+ *head = nskb; -+ nskb->next = p->next; -+ p->next = NULL; -+ -+ p = nskb; -+ -+merge: -+ delta_truesize = skb->truesize; -+ if (offset > headlen) { -+ unsigned int eat = offset - headlen; -+ -+ skbinfo->frags[0].page_offset += eat; -+ skb_frag_size_sub(&skbinfo->frags[0], eat); -+ skb->data_len -= eat; -+ skb->len -= eat; -+ offset = headlen; -+ } -+ -+ __skb_pull(skb, offset); -+ -+ NAPI_GRO_CB(p)->last->next = skb; -+ NAPI_GRO_CB(p)->last = skb; -+ skb_header_release(skb); -+ -+done: -+ NAPI_GRO_CB(p)->count++; -+ p->data_len += len; -+ p->truesize += delta_truesize; -+ p->len += len; -+ -+ NAPI_GRO_CB(skb)->same_flow = 1; -+ return 0; -+} -+EXPORT_SYMBOL_GPL(skb_gro_receive); -+ -+void __init skb_init(void) -+{ -+ skbuff_head_cache = kmem_cache_create("skbuff_head_cache", -+ sizeof(struct sk_buff), -+ 0, -+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, -+ NULL); -+ skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", -+ (2*sizeof(struct sk_buff)) + -+ sizeof(atomic_t), -+ 0, -+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, -+ NULL); -+} -+ -+/** -+ * skb_to_sgvec - Fill a scatter-gather list from a socket buffer -+ * @skb: Socket buffer containing the buffers to be mapped -+ * @sg: The scatter-gather list to map into -+ * @offset: The offset into the buffer's contents to start mapping -+ * @len: Length of buffer space to be mapped -+ * -+ * Fill the specified scatter-gather list with mappings/pointers into a -+ * region of the buffer space attached to a socket buffer. -+ */ -+static int -+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -+{ -+ int start = skb_headlen(skb); -+ int i, copy = start - offset; -+ struct sk_buff *frag_iter; -+ int elt = 0; -+ -+ if (copy > 0) { -+ if (copy > len) -+ copy = len; -+ sg_set_buf(sg, skb->data + offset, copy); -+ elt++; -+ if ((len -= copy) == 0) -+ return elt; -+ offset += copy; -+ } -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); -+ if ((copy = end - offset) > 0) { -+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; -+ -+ if (copy > len) -+ copy = len; -+ sg_set_page(&sg[elt], skb_frag_page(frag), copy, -+ frag->page_offset+offset-start); -+ elt++; -+ if (!(len -= copy)) -+ return elt; -+ offset += copy; -+ } -+ start = end; -+ } -+ -+ skb_walk_frags(skb, frag_iter) { -+ int end; -+ -+ WARN_ON(start > offset + len); -+ -+ end = start + frag_iter->len; -+ if ((copy = end - offset) > 0) { -+ if (copy > len) -+ copy = len; -+ elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, -+ copy); -+ if ((len -= copy) == 0) -+ return elt; -+ offset += copy; -+ } -+ start = end; -+ } -+ BUG_ON(len); -+ return elt; -+} -+ -+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -+{ -+ int nsg = __skb_to_sgvec(skb, sg, offset, len); -+ -+ sg_mark_end(&sg[nsg - 1]); -+ -+ return nsg; -+} -+EXPORT_SYMBOL_GPL(skb_to_sgvec); -+ -+/** -+ * skb_cow_data - Check that a socket buffer's data buffers are writable -+ * @skb: The socket buffer to check. -+ * @tailbits: Amount of trailing space to be added -+ * @trailer: Returned pointer to the skb where the @tailbits space begins -+ * -+ * Make sure that the data buffers attached to a socket buffer are -+ * writable. If they are not, private copies are made of the data buffers -+ * and the socket buffer is set to use these instead. -+ * -+ * If @tailbits is given, make sure that there is space to write @tailbits -+ * bytes of data beyond current end of socket buffer. @trailer will be -+ * set to point to the skb in which this space begins. -+ * -+ * The number of scatterlist elements required to completely map the -+ * COW'd and extended socket buffer will be returned. -+ */ -+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) -+{ -+ int copyflag; -+ int elt; -+ struct sk_buff *skb1, **skb_p; -+ -+ /* If skb is cloned or its head is paged, reallocate -+ * head pulling out all the pages (pages are considered not writable -+ * at the moment even if they are anonymous). -+ */ -+ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && -+ __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) -+ return -ENOMEM; -+ -+ /* Easy case. Most of packets will go this way. */ -+ if (!skb_has_frag_list(skb)) { -+ /* A little of trouble, not enough of space for trailer. -+ * This should not happen, when stack is tuned to generate -+ * good frames. OK, on miss we reallocate and reserve even more -+ * space, 128 bytes is fair. */ -+ -+ if (skb_tailroom(skb) < tailbits && -+ pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) -+ return -ENOMEM; -+ -+ /* Voila! */ -+ *trailer = skb; -+ return 1; -+ } -+ -+ /* Misery. We are in troubles, going to mincer fragments... */ -+ -+ elt = 1; -+ skb_p = &skb_shinfo(skb)->frag_list; -+ copyflag = 0; -+ -+ while ((skb1 = *skb_p) != NULL) { -+ int ntail = 0; -+ -+ /* The fragment is partially pulled by someone, -+ * this can happen on input. Copy it and everything -+ * after it. */ -+ -+ if (skb_shared(skb1)) -+ copyflag = 1; -+ -+ /* If the skb is the last, worry about trailer. */ -+ -+ if (skb1->next == NULL && tailbits) { -+ if (skb_shinfo(skb1)->nr_frags || -+ skb_has_frag_list(skb1) || -+ skb_tailroom(skb1) < tailbits) -+ ntail = tailbits + 128; -+ } -+ -+ if (copyflag || -+ skb_cloned(skb1) || -+ ntail || -+ skb_shinfo(skb1)->nr_frags || -+ skb_has_frag_list(skb1)) { -+ struct sk_buff *skb2; -+ -+ /* Fuck, we are miserable poor guys... */ -+ if (ntail == 0) -+ skb2 = skb_copy(skb1, GFP_ATOMIC); -+ else -+ skb2 = skb_copy_expand(skb1, -+ skb_headroom(skb1), -+ ntail, -+ GFP_ATOMIC); -+ if (unlikely(skb2 == NULL)) -+ return -ENOMEM; -+ -+ if (skb1->sk) -+ skb_set_owner_w(skb2, skb1->sk); -+ -+ /* Looking around. Are we still alive? -+ * OK, link new skb, drop old one */ -+ -+ skb2->next = skb1->next; -+ *skb_p = skb2; -+ kfree_skb(skb1); -+ skb1 = skb2; -+ } -+ elt++; -+ *trailer = skb1; -+ skb_p = &skb1->next; -+ } -+ -+ return elt; -+} -+EXPORT_SYMBOL_GPL(skb_cow_data); -+ -+static void sock_rmem_free(struct sk_buff *skb) -+{ -+ struct sock *sk = skb->sk; -+ -+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc); -+} -+ -+/* -+ * Note: We dont mem charge error packets (no sk_forward_alloc changes) -+ */ -+int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) -+{ -+ int len = skb->len; -+ -+ if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= -+ (unsigned int)sk->sk_rcvbuf) -+ return -ENOMEM; -+ -+ skb_orphan(skb); -+ skb->sk = sk; -+ skb->destructor = sock_rmem_free; -+ atomic_add(skb->truesize, &sk->sk_rmem_alloc); -+ -+ /* before exiting rcu section, make sure dst is refcounted */ -+ skb_dst_force(skb); -+ -+ skb_queue_tail(&sk->sk_error_queue, skb); -+ if (!sock_flag(sk, SOCK_DEAD)) -+ sk->sk_data_ready(sk, len); -+ return 0; -+} -+EXPORT_SYMBOL(sock_queue_err_skb); -+ -+void skb_tstamp_tx(struct sk_buff *orig_skb, -+ struct skb_shared_hwtstamps *hwtstamps) -+{ -+ struct sock *sk = orig_skb->sk; -+ struct sock_exterr_skb *serr; -+ struct sk_buff *skb; -+ int err; -+ -+ if (!sk) -+ return; -+ -+ if (hwtstamps) { -+ *skb_hwtstamps(orig_skb) = -+ *hwtstamps; -+ } else { -+ /* -+ * no hardware time stamps available, -+ * so keep the shared tx_flags and only -+ * store software time stamp -+ */ -+ orig_skb->tstamp = ktime_get_real(); -+ } -+ -+ skb = skb_clone(orig_skb, GFP_ATOMIC); -+ if (!skb) -+ return; -+ -+ serr = SKB_EXT_ERR(skb); -+ memset(serr, 0, sizeof(*serr)); -+ serr->ee.ee_errno = ENOMSG; -+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; -+ -+ err = sock_queue_err_skb(sk, skb); -+ -+ if (err) -+ kfree_skb(skb); -+} -+EXPORT_SYMBOL_GPL(skb_tstamp_tx); -+ -+void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) -+{ -+ struct sock *sk = skb->sk; -+ struct sock_exterr_skb *serr; -+ int err; -+ -+ skb->wifi_acked_valid = 1; -+ skb->wifi_acked = acked; -+ -+ serr = SKB_EXT_ERR(skb); -+ memset(serr, 0, sizeof(*serr)); -+ serr->ee.ee_errno = ENOMSG; -+ serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; -+ -+ err = sock_queue_err_skb(sk, skb); -+ if (err) -+ kfree_skb(skb); -+} -+EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); -+ -+ -+/** -+ * skb_partial_csum_set - set up and verify partial csum values for packet -+ * @skb: the skb to set -+ * @start: the number of bytes after skb->data to start checksumming. -+ * @off: the offset from start to place the checksum. -+ * -+ * For untrusted partially-checksummed packets, we need to make sure the values -+ * for skb->csum_start and skb->csum_offset are valid so we don't oops. -+ * -+ * This function checks and sets those values and skb->ip_summed: if this -+ * returns false you should drop the packet. -+ */ -+bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) -+{ -+ if (unlikely(start > skb_headlen(skb)) || -+ unlikely((int)start + off > skb_headlen(skb) - 2)) { -+ net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", -+ start, off, skb_headlen(skb)); -+ return false; -+ } -+ skb->ip_summed = CHECKSUM_PARTIAL; -+ skb->csum_start = skb_headroom(skb) + start; -+ skb->csum_offset = off; -+ skb_set_transport_header(skb, start); -+ return true; -+} -+EXPORT_SYMBOL_GPL(skb_partial_csum_set); -+ -+void __skb_warn_lro_forwarding(const struct sk_buff *skb) -+{ -+ net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", -+ skb->dev->name); -+} -+EXPORT_SYMBOL(__skb_warn_lro_forwarding); -+ -+void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) -+{ -+ if (head_stolen) { -+ skb_release_head_state(skb); -+ kmem_cache_free(skbuff_head_cache, skb); -+ } else { -+ __kfree_skb(skb); -+ } -+} -+EXPORT_SYMBOL(kfree_skb_partial); -+ -+/** -+ * skb_try_coalesce - try to merge skb to prior one -+ * @to: prior buffer -+ * @from: buffer to add -+ * @fragstolen: pointer to boolean -+ * @delta_truesize: how much more was allocated than was requested -+ */ -+bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, -+ bool *fragstolen, int *delta_truesize) -+{ -+ int i, delta, len = from->len; -+ -+ *fragstolen = false; -+ -+ if (skb_cloned(to)) -+ return false; -+ -+ if (len <= skb_tailroom(to)) { -+ BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); -+ *delta_truesize = 0; -+ return true; -+ } -+ -+ if (skb_has_frag_list(to) || skb_has_frag_list(from)) -+ return false; -+ -+ if (skb_headlen(from) != 0) { -+ struct page *page; -+ unsigned int offset; -+ -+ if (skb_shinfo(to)->nr_frags + -+ skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) -+ return false; -+ -+ if (skb_head_is_locked(from)) -+ return false; -+ -+ delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); -+ -+ page = virt_to_head_page(from->head); -+ offset = from->data - (unsigned char *)page_address(page); -+ -+ skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, -+ page, offset, skb_headlen(from)); -+ *fragstolen = true; -+ } else { -+ if (skb_shinfo(to)->nr_frags + -+ skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) -+ return false; -+ -+ delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); -+ } -+ -+ WARN_ON_ONCE(delta < len); -+ -+ memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, -+ skb_shinfo(from)->frags, -+ skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); -+ skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; -+ -+ if (!skb_cloned(from)) -+ skb_shinfo(from)->nr_frags = 0; -+ -+ /* if the skb is not cloned this does nothing -+ * since we set nr_frags to 0. -+ */ -+ for (i = 0; i < skb_shinfo(from)->nr_frags; i++) -+ skb_frag_ref(from, i); -+ -+ to->truesize += delta; -+ to->len += len; -+ to->data_len += len; -+ -+ *delta_truesize = delta; -+ return true; -+} -+EXPORT_SYMBOL(skb_try_coalesce); -diff -ruN linux-3.10.27/net/ipv6/ip6_output.c linux-3.10.27-imq/net/ipv6/ip6_output.c ---- linux-3.10.27/net/ipv6/ip6_output.c 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/ipv6/ip6_output.c 2014-01-18 10:19:59.348342972 +0100 -@@ -89,9 +89,6 @@ - struct in6_addr *nexthop; - int ret; - -- skb->protocol = htons(ETH_P_IPV6); -- skb->dev = dev; -- - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - -@@ -168,6 +165,13 @@ - return 0; - } - -+ /* -+ * IMQ-patch: moved setting skb->dev and skb->protocol from -+ * ip6_finish_output2 to fix crashing at netif_skb_features(). -+ */ -+ skb->protocol = htons(ETH_P_IPV6); -+ skb->dev = dev; -+ - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, - ip6_finish_output, - !(IP6CB(skb)->flags & IP6SKB_REROUTED)); -diff -ruN linux-3.10.27/net/ipv6/ip6_output.c.orig linux-3.10.27-imq/net/ipv6/ip6_output.c.orig ---- linux-3.10.27/net/ipv6/ip6_output.c.orig 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/net/ipv6/ip6_output.c.orig 2014-01-16 00:29:14.000000000 +0100 -@@ -0,0 +1,1580 @@ -+/* -+ * IPv6 output functions -+ * Linux INET6 implementation -+ * -+ * Authors: -+ * Pedro Roque -+ * -+ * Based on linux/net/ipv4/ip_output.c -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * Changes: -+ * A.N.Kuznetsov : airthmetics in fragmentation. -+ * extension headers are implemented. -+ * route changes now work. -+ * ip6_forward does not confuse sniffers. -+ * etc. -+ * -+ * H. von Brand : Added missing #include -+ * Imran Patel : frag id should be in NBO -+ * Kazunori MIYAZAWA @USAGI -+ * : add ip6_append_data and related functions -+ * for datagram xmit -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int __ip6_local_out(struct sk_buff *skb) -+{ -+ int len; -+ -+ len = skb->len - sizeof(struct ipv6hdr); -+ if (len > IPV6_MAXPLEN) -+ len = 0; -+ ipv6_hdr(skb)->payload_len = htons(len); -+ -+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, -+ skb_dst(skb)->dev, dst_output); -+} -+ -+int ip6_local_out(struct sk_buff *skb) -+{ -+ int err; -+ -+ err = __ip6_local_out(skb); -+ if (likely(err == 1)) -+ err = dst_output(skb); -+ -+ return err; -+} -+EXPORT_SYMBOL_GPL(ip6_local_out); -+ -+static int ip6_finish_output2(struct sk_buff *skb) -+{ -+ struct dst_entry *dst = skb_dst(skb); -+ struct net_device *dev = dst->dev; -+ struct neighbour *neigh; -+ struct in6_addr *nexthop; -+ int ret; -+ -+ skb->protocol = htons(ETH_P_IPV6); -+ skb->dev = dev; -+ -+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { -+ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); -+ -+ if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && -+ ((mroute6_socket(dev_net(dev), skb) && -+ !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || -+ ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, -+ &ipv6_hdr(skb)->saddr))) { -+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); -+ -+ /* Do not check for IFF_ALLMULTI; multicast routing -+ is not supported in any case. -+ */ -+ if (newskb) -+ NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, -+ newskb, NULL, newskb->dev, -+ dev_loopback_xmit); -+ -+ if (ipv6_hdr(skb)->hop_limit == 0) { -+ IP6_INC_STATS(dev_net(dev), idev, -+ IPSTATS_MIB_OUTDISCARDS); -+ kfree_skb(skb); -+ return 0; -+ } -+ } -+ -+ IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, -+ skb->len); -+ -+ if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= -+ IPV6_ADDR_SCOPE_NODELOCAL && -+ !(dev->flags & IFF_LOOPBACK)) { -+ kfree_skb(skb); -+ return 0; -+ } -+ } -+ -+ rcu_read_lock_bh(); -+ nexthop = rt6_nexthop((struct rt6_info *)dst); -+ neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); -+ if (unlikely(!neigh)) -+ neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); -+ if (!IS_ERR(neigh)) { -+ ret = dst_neigh_output(dst, neigh, skb); -+ rcu_read_unlock_bh(); -+ return ret; -+ } -+ rcu_read_unlock_bh(); -+ -+ IP6_INC_STATS(dev_net(dst->dev), -+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); -+ kfree_skb(skb); -+ return -EINVAL; -+} -+ -+static int ip6_finish_output(struct sk_buff *skb) -+{ -+ if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || -+ dst_allfrag(skb_dst(skb)) || -+ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) -+ return ip6_fragment(skb, ip6_finish_output2); -+ else -+ return ip6_finish_output2(skb); -+} -+ -+int ip6_output(struct sk_buff *skb) -+{ -+ struct net_device *dev = skb_dst(skb)->dev; -+ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); -+ if (unlikely(idev->cnf.disable_ipv6)) { -+ IP6_INC_STATS(dev_net(dev), idev, -+ IPSTATS_MIB_OUTDISCARDS); -+ kfree_skb(skb); -+ return 0; -+ } -+ -+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, -+ ip6_finish_output, -+ !(IP6CB(skb)->flags & IP6SKB_REROUTED)); -+} -+ -+/* -+ * xmit an sk_buff (used by TCP, SCTP and DCCP) -+ */ -+ -+int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, -+ struct ipv6_txoptions *opt, int tclass) -+{ -+ struct net *net = sock_net(sk); -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ struct in6_addr *first_hop = &fl6->daddr; -+ struct dst_entry *dst = skb_dst(skb); -+ struct ipv6hdr *hdr; -+ u8 proto = fl6->flowi6_proto; -+ int seg_len = skb->len; -+ int hlimit = -1; -+ u32 mtu; -+ -+ if (opt) { -+ unsigned int head_room; -+ -+ /* First: exthdrs may take lots of space (~8K for now) -+ MAX_HEADER is not enough. -+ */ -+ head_room = opt->opt_nflen + opt->opt_flen; -+ seg_len += head_room; -+ head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); -+ -+ if (skb_headroom(skb) < head_room) { -+ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); -+ if (skb2 == NULL) { -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_OUTDISCARDS); -+ kfree_skb(skb); -+ return -ENOBUFS; -+ } -+ consume_skb(skb); -+ skb = skb2; -+ skb_set_owner_w(skb, sk); -+ } -+ if (opt->opt_flen) -+ ipv6_push_frag_opts(skb, opt, &proto); -+ if (opt->opt_nflen) -+ ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); -+ } -+ -+ skb_push(skb, sizeof(struct ipv6hdr)); -+ skb_reset_network_header(skb); -+ hdr = ipv6_hdr(skb); -+ -+ /* -+ * Fill in the IPv6 header -+ */ -+ if (np) -+ hlimit = np->hop_limit; -+ if (hlimit < 0) -+ hlimit = ip6_dst_hoplimit(dst); -+ -+ ip6_flow_hdr(hdr, tclass, fl6->flowlabel); -+ -+ hdr->payload_len = htons(seg_len); -+ hdr->nexthdr = proto; -+ hdr->hop_limit = hlimit; -+ -+ hdr->saddr = fl6->saddr; -+ hdr->daddr = *first_hop; -+ -+ skb->priority = sk->sk_priority; -+ skb->mark = sk->sk_mark; -+ -+ mtu = dst_mtu(dst); -+ if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { -+ IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_OUT, skb->len); -+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, -+ dst->dev, dst_output); -+ } -+ -+ skb->dev = dst->dev; -+ ipv6_local_error(sk, EMSGSIZE, fl6, mtu); -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); -+ kfree_skb(skb); -+ return -EMSGSIZE; -+} -+ -+EXPORT_SYMBOL(ip6_xmit); -+ -+static int ip6_call_ra_chain(struct sk_buff *skb, int sel) -+{ -+ struct ip6_ra_chain *ra; -+ struct sock *last = NULL; -+ -+ read_lock(&ip6_ra_lock); -+ for (ra = ip6_ra_chain; ra; ra = ra->next) { -+ struct sock *sk = ra->sk; -+ if (sk && ra->sel == sel && -+ (!sk->sk_bound_dev_if || -+ sk->sk_bound_dev_if == skb->dev->ifindex)) { -+ if (last) { -+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); -+ if (skb2) -+ rawv6_rcv(last, skb2); -+ } -+ last = sk; -+ } -+ } -+ -+ if (last) { -+ rawv6_rcv(last, skb); -+ read_unlock(&ip6_ra_lock); -+ return 1; -+ } -+ read_unlock(&ip6_ra_lock); -+ return 0; -+} -+ -+static int ip6_forward_proxy_check(struct sk_buff *skb) -+{ -+ struct ipv6hdr *hdr = ipv6_hdr(skb); -+ u8 nexthdr = hdr->nexthdr; -+ __be16 frag_off; -+ int offset; -+ -+ if (ipv6_ext_hdr(nexthdr)) { -+ offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); -+ if (offset < 0) -+ return 0; -+ } else -+ offset = sizeof(struct ipv6hdr); -+ -+ if (nexthdr == IPPROTO_ICMPV6) { -+ struct icmp6hdr *icmp6; -+ -+ if (!pskb_may_pull(skb, (skb_network_header(skb) + -+ offset + 1 - skb->data))) -+ return 0; -+ -+ icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); -+ -+ switch (icmp6->icmp6_type) { -+ case NDISC_ROUTER_SOLICITATION: -+ case NDISC_ROUTER_ADVERTISEMENT: -+ case NDISC_NEIGHBOUR_SOLICITATION: -+ case NDISC_NEIGHBOUR_ADVERTISEMENT: -+ case NDISC_REDIRECT: -+ /* For reaction involving unicast neighbor discovery -+ * message destined to the proxied address, pass it to -+ * input function. -+ */ -+ return 1; -+ default: -+ break; -+ } -+ } -+ -+ /* -+ * The proxying router can't forward traffic sent to a link-local -+ * address, so signal the sender and discard the packet. This -+ * behavior is clarified by the MIPv6 specification. -+ */ -+ if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { -+ dst_link_failure(skb); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static inline int ip6_forward_finish(struct sk_buff *skb) -+{ -+ return dst_output(skb); -+} -+ -+int ip6_forward(struct sk_buff *skb) -+{ -+ struct dst_entry *dst = skb_dst(skb); -+ struct ipv6hdr *hdr = ipv6_hdr(skb); -+ struct inet6_skb_parm *opt = IP6CB(skb); -+ struct net *net = dev_net(dst->dev); -+ u32 mtu; -+ -+ if (net->ipv6.devconf_all->forwarding == 0) -+ goto error; -+ -+ if (skb_warn_if_lro(skb)) -+ goto drop; -+ -+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { -+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); -+ goto drop; -+ } -+ -+ if (skb->pkt_type != PACKET_HOST) -+ goto drop; -+ -+ skb_forward_csum(skb); -+ -+ /* -+ * We DO NOT make any processing on -+ * RA packets, pushing them to user level AS IS -+ * without ane WARRANTY that application will be able -+ * to interpret them. The reason is that we -+ * cannot make anything clever here. -+ * -+ * We are not end-node, so that if packet contains -+ * AH/ESP, we cannot make anything. -+ * Defragmentation also would be mistake, RA packets -+ * cannot be fragmented, because there is no warranty -+ * that different fragments will go along one path. --ANK -+ */ -+ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { -+ if (ip6_call_ra_chain(skb, ntohs(opt->ra))) -+ return 0; -+ } -+ -+ /* -+ * check and decrement ttl -+ */ -+ if (hdr->hop_limit <= 1) { -+ /* Force OUTPUT device used as source address */ -+ skb->dev = dst->dev; -+ icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); -+ IP6_INC_STATS_BH(net, -+ ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); -+ -+ kfree_skb(skb); -+ return -ETIMEDOUT; -+ } -+ -+ /* XXX: idev->cnf.proxy_ndp? */ -+ if (net->ipv6.devconf_all->proxy_ndp && -+ pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { -+ int proxied = ip6_forward_proxy_check(skb); -+ if (proxied > 0) -+ return ip6_input(skb); -+ else if (proxied < 0) { -+ IP6_INC_STATS(net, ip6_dst_idev(dst), -+ IPSTATS_MIB_INDISCARDS); -+ goto drop; -+ } -+ } -+ -+ if (!xfrm6_route_forward(skb)) { -+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); -+ goto drop; -+ } -+ dst = skb_dst(skb); -+ -+ /* IPv6 specs say nothing about it, but it is clear that we cannot -+ send redirects to source routed frames. -+ We don't send redirects to frames decapsulated from IPsec. -+ */ -+ if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { -+ struct in6_addr *target = NULL; -+ struct inet_peer *peer; -+ struct rt6_info *rt; -+ -+ /* -+ * incoming and outgoing devices are the same -+ * send a redirect. -+ */ -+ -+ rt = (struct rt6_info *) dst; -+ if (rt->rt6i_flags & RTF_GATEWAY) -+ target = &rt->rt6i_gateway; -+ else -+ target = &hdr->daddr; -+ -+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); -+ -+ /* Limit redirects both by destination (here) -+ and by source (inside ndisc_send_redirect) -+ */ -+ if (inet_peer_xrlim_allow(peer, 1*HZ)) -+ ndisc_send_redirect(skb, target); -+ if (peer) -+ inet_putpeer(peer); -+ } else { -+ int addrtype = ipv6_addr_type(&hdr->saddr); -+ -+ /* This check is security critical. */ -+ if (addrtype == IPV6_ADDR_ANY || -+ addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) -+ goto error; -+ if (addrtype & IPV6_ADDR_LINKLOCAL) { -+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, -+ ICMPV6_NOT_NEIGHBOUR, 0); -+ goto error; -+ } -+ } -+ -+ mtu = dst_mtu(dst); -+ if (mtu < IPV6_MIN_MTU) -+ mtu = IPV6_MIN_MTU; -+ -+ if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || -+ (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { -+ /* Again, force OUTPUT device used as source address */ -+ skb->dev = dst->dev; -+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -+ IP6_INC_STATS_BH(net, -+ ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); -+ IP6_INC_STATS_BH(net, -+ ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); -+ kfree_skb(skb); -+ return -EMSGSIZE; -+ } -+ -+ if (skb_cow(skb, dst->dev->hard_header_len)) { -+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); -+ goto drop; -+ } -+ -+ hdr = ipv6_hdr(skb); -+ -+ /* Mangling hops number delayed to point after skb COW */ -+ -+ hdr->hop_limit--; -+ -+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); -+ IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); -+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, -+ ip6_forward_finish); -+ -+error: -+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); -+drop: -+ kfree_skb(skb); -+ return -EINVAL; -+} -+ -+static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) -+{ -+ to->pkt_type = from->pkt_type; -+ to->priority = from->priority; -+ to->protocol = from->protocol; -+ skb_dst_drop(to); -+ skb_dst_set(to, dst_clone(skb_dst(from))); -+ to->dev = from->dev; -+ to->mark = from->mark; -+ -+#ifdef CONFIG_NET_SCHED -+ to->tc_index = from->tc_index; -+#endif -+ nf_copy(to, from); -+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) -+ to->nf_trace = from->nf_trace; -+#endif -+ skb_copy_secmark(to, from); -+} -+ -+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) -+{ -+ struct sk_buff *frag; -+ struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); -+ struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; -+ struct ipv6hdr *tmp_hdr; -+ struct frag_hdr *fh; -+ unsigned int mtu, hlen, left, len; -+ int hroom, troom; -+ __be32 frag_id = 0; -+ int ptr, offset = 0, err=0; -+ u8 *prevhdr, nexthdr = 0; -+ struct net *net = dev_net(skb_dst(skb)->dev); -+ -+ hlen = ip6_find_1stfragopt(skb, &prevhdr); -+ nexthdr = *prevhdr; -+ -+ mtu = ip6_skb_dst_mtu(skb); -+ -+ /* We must not fragment if the socket is set to force MTU discovery -+ * or if the skb it not generated by a local socket. -+ */ -+ if (unlikely(!skb->local_df && skb->len > mtu) || -+ (IP6CB(skb)->frag_max_size && -+ IP6CB(skb)->frag_max_size > mtu)) { -+ if (skb->sk && dst_allfrag(skb_dst(skb))) -+ sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); -+ -+ skb->dev = skb_dst(skb)->dev; -+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_FRAGFAILS); -+ kfree_skb(skb); -+ return -EMSGSIZE; -+ } -+ -+ if (np && np->frag_size < mtu) { -+ if (np->frag_size) -+ mtu = np->frag_size; -+ } -+ mtu -= hlen + sizeof(struct frag_hdr); -+ -+ if (skb_has_frag_list(skb)) { -+ int first_len = skb_pagelen(skb); -+ struct sk_buff *frag2; -+ -+ if (first_len - hlen > mtu || -+ ((first_len - hlen) & 7) || -+ skb_cloned(skb)) -+ goto slow_path; -+ -+ skb_walk_frags(skb, frag) { -+ /* Correct geometry. */ -+ if (frag->len > mtu || -+ ((frag->len & 7) && frag->next) || -+ skb_headroom(frag) < hlen) -+ goto slow_path_clean; -+ -+ /* Partially cloned skb? */ -+ if (skb_shared(frag)) -+ goto slow_path_clean; -+ -+ BUG_ON(frag->sk); -+ if (skb->sk) { -+ frag->sk = skb->sk; -+ frag->destructor = sock_wfree; -+ } -+ skb->truesize -= frag->truesize; -+ } -+ -+ err = 0; -+ offset = 0; -+ frag = skb_shinfo(skb)->frag_list; -+ skb_frag_list_init(skb); -+ /* BUILD HEADER */ -+ -+ *prevhdr = NEXTHDR_FRAGMENT; -+ tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); -+ if (!tmp_hdr) { -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_FRAGFAILS); -+ return -ENOMEM; -+ } -+ -+ __skb_pull(skb, hlen); -+ fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); -+ __skb_push(skb, hlen); -+ skb_reset_network_header(skb); -+ memcpy(skb_network_header(skb), tmp_hdr, hlen); -+ -+ ipv6_select_ident(fh, rt); -+ fh->nexthdr = nexthdr; -+ fh->reserved = 0; -+ fh->frag_off = htons(IP6_MF); -+ frag_id = fh->identification; -+ -+ first_len = skb_pagelen(skb); -+ skb->data_len = first_len - skb_headlen(skb); -+ skb->len = first_len; -+ ipv6_hdr(skb)->payload_len = htons(first_len - -+ sizeof(struct ipv6hdr)); -+ -+ dst_hold(&rt->dst); -+ -+ for (;;) { -+ /* Prepare header of the next frame, -+ * before previous one went down. */ -+ if (frag) { -+ frag->ip_summed = CHECKSUM_NONE; -+ skb_reset_transport_header(frag); -+ fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); -+ __skb_push(frag, hlen); -+ skb_reset_network_header(frag); -+ memcpy(skb_network_header(frag), tmp_hdr, -+ hlen); -+ offset += skb->len - hlen - sizeof(struct frag_hdr); -+ fh->nexthdr = nexthdr; -+ fh->reserved = 0; -+ fh->frag_off = htons(offset); -+ if (frag->next != NULL) -+ fh->frag_off |= htons(IP6_MF); -+ fh->identification = frag_id; -+ ipv6_hdr(frag)->payload_len = -+ htons(frag->len - -+ sizeof(struct ipv6hdr)); -+ ip6_copy_metadata(frag, skb); -+ } -+ -+ err = output(skb); -+ if(!err) -+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), -+ IPSTATS_MIB_FRAGCREATES); -+ -+ if (err || !frag) -+ break; -+ -+ skb = frag; -+ frag = skb->next; -+ skb->next = NULL; -+ } -+ -+ kfree(tmp_hdr); -+ -+ if (err == 0) { -+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), -+ IPSTATS_MIB_FRAGOKS); -+ ip6_rt_put(rt); -+ return 0; -+ } -+ -+ while (frag) { -+ skb = frag->next; -+ kfree_skb(frag); -+ frag = skb; -+ } -+ -+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), -+ IPSTATS_MIB_FRAGFAILS); -+ ip6_rt_put(rt); -+ return err; -+ -+slow_path_clean: -+ skb_walk_frags(skb, frag2) { -+ if (frag2 == frag) -+ break; -+ frag2->sk = NULL; -+ frag2->destructor = NULL; -+ skb->truesize += frag2->truesize; -+ } -+ } -+ -+slow_path: -+ if ((skb->ip_summed == CHECKSUM_PARTIAL) && -+ skb_checksum_help(skb)) -+ goto fail; -+ -+ left = skb->len - hlen; /* Space per frame */ -+ ptr = hlen; /* Where to start from */ -+ -+ /* -+ * Fragment the datagram. -+ */ -+ -+ *prevhdr = NEXTHDR_FRAGMENT; -+ hroom = LL_RESERVED_SPACE(rt->dst.dev); -+ troom = rt->dst.dev->needed_tailroom; -+ -+ /* -+ * Keep copying data until we run out. -+ */ -+ while(left > 0) { -+ len = left; -+ /* IF: it doesn't fit, use 'mtu' - the data space left */ -+ if (len > mtu) -+ len = mtu; -+ /* IF: we are not sending up to and including the packet end -+ then align the next start on an eight byte boundary */ -+ if (len < left) { -+ len &= ~7; -+ } -+ /* -+ * Allocate buffer. -+ */ -+ -+ if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + -+ hroom + troom, GFP_ATOMIC)) == NULL) { -+ NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_FRAGFAILS); -+ err = -ENOMEM; -+ goto fail; -+ } -+ -+ /* -+ * Set up data on packet -+ */ -+ -+ ip6_copy_metadata(frag, skb); -+ skb_reserve(frag, hroom); -+ skb_put(frag, len + hlen + sizeof(struct frag_hdr)); -+ skb_reset_network_header(frag); -+ fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); -+ frag->transport_header = (frag->network_header + hlen + -+ sizeof(struct frag_hdr)); -+ -+ /* -+ * Charge the memory for the fragment to any owner -+ * it might possess -+ */ -+ if (skb->sk) -+ skb_set_owner_w(frag, skb->sk); -+ -+ /* -+ * Copy the packet header into the new buffer. -+ */ -+ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); -+ -+ /* -+ * Build fragment header. -+ */ -+ fh->nexthdr = nexthdr; -+ fh->reserved = 0; -+ if (!frag_id) { -+ ipv6_select_ident(fh, rt); -+ frag_id = fh->identification; -+ } else -+ fh->identification = frag_id; -+ -+ /* -+ * Copy a block of the IP datagram. -+ */ -+ if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) -+ BUG(); -+ left -= len; -+ -+ fh->frag_off = htons(offset); -+ if (left > 0) -+ fh->frag_off |= htons(IP6_MF); -+ ipv6_hdr(frag)->payload_len = htons(frag->len - -+ sizeof(struct ipv6hdr)); -+ -+ ptr += len; -+ offset += len; -+ -+ /* -+ * Put this fragment into the sending queue. -+ */ -+ err = output(frag); -+ if (err) -+ goto fail; -+ -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_FRAGCREATES); -+ } -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_FRAGOKS); -+ consume_skb(skb); -+ return err; -+ -+fail: -+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_FRAGFAILS); -+ kfree_skb(skb); -+ return err; -+} -+ -+static inline int ip6_rt_check(const struct rt6key *rt_key, -+ const struct in6_addr *fl_addr, -+ const struct in6_addr *addr_cache) -+{ -+ return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && -+ (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); -+} -+ -+static struct dst_entry *ip6_sk_dst_check(struct sock *sk, -+ struct dst_entry *dst, -+ const struct flowi6 *fl6) -+{ -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ struct rt6_info *rt; -+ -+ if (!dst) -+ goto out; -+ -+ if (dst->ops->family != AF_INET6) { -+ dst_release(dst); -+ return NULL; -+ } -+ -+ rt = (struct rt6_info *)dst; -+ /* Yes, checking route validity in not connected -+ * case is not very simple. Take into account, -+ * that we do not support routing by source, TOS, -+ * and MSG_DONTROUTE --ANK (980726) -+ * -+ * 1. ip6_rt_check(): If route was host route, -+ * check that cached destination is current. -+ * If it is network route, we still may -+ * check its validity using saved pointer -+ * to the last used address: daddr_cache. -+ * We do not want to save whole address now, -+ * (because main consumer of this service -+ * is tcp, which has not this problem), -+ * so that the last trick works only on connected -+ * sockets. -+ * 2. oif also should be the same. -+ */ -+ if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || -+#ifdef CONFIG_IPV6_SUBTREES -+ ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || -+#endif -+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { -+ dst_release(dst); -+ dst = NULL; -+ } -+ -+out: -+ return dst; -+} -+ -+static int ip6_dst_lookup_tail(struct sock *sk, -+ struct dst_entry **dst, struct flowi6 *fl6) -+{ -+ struct net *net = sock_net(sk); -+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD -+ struct neighbour *n; -+ struct rt6_info *rt; -+#endif -+ int err; -+ -+ if (*dst == NULL) -+ *dst = ip6_route_output(net, sk, fl6); -+ -+ if ((err = (*dst)->error)) -+ goto out_err_release; -+ -+ if (ipv6_addr_any(&fl6->saddr)) { -+ struct rt6_info *rt = (struct rt6_info *) *dst; -+ err = ip6_route_get_saddr(net, rt, &fl6->daddr, -+ sk ? inet6_sk(sk)->srcprefs : 0, -+ &fl6->saddr); -+ if (err) -+ goto out_err_release; -+ } -+ -+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD -+ /* -+ * Here if the dst entry we've looked up -+ * has a neighbour entry that is in the INCOMPLETE -+ * state and the src address from the flow is -+ * marked as OPTIMISTIC, we release the found -+ * dst entry and replace it instead with the -+ * dst entry of the nexthop router -+ */ -+ rt = (struct rt6_info *) *dst; -+ rcu_read_lock_bh(); -+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); -+ err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; -+ rcu_read_unlock_bh(); -+ -+ if (err) { -+ struct inet6_ifaddr *ifp; -+ struct flowi6 fl_gw6; -+ int redirect; -+ -+ ifp = ipv6_get_ifaddr(net, &fl6->saddr, -+ (*dst)->dev, 1); -+ -+ redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); -+ if (ifp) -+ in6_ifa_put(ifp); -+ -+ if (redirect) { -+ /* -+ * We need to get the dst entry for the -+ * default router instead -+ */ -+ dst_release(*dst); -+ memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); -+ memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); -+ *dst = ip6_route_output(net, sk, &fl_gw6); -+ if ((err = (*dst)->error)) -+ goto out_err_release; -+ } -+ } -+#endif -+ -+ return 0; -+ -+out_err_release: -+ if (err == -ENETUNREACH) -+ IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); -+ dst_release(*dst); -+ *dst = NULL; -+ return err; -+} -+ -+/** -+ * ip6_dst_lookup - perform route lookup on flow -+ * @sk: socket which provides route info -+ * @dst: pointer to dst_entry * for result -+ * @fl6: flow to lookup -+ * -+ * This function performs a route lookup on the given flow. -+ * -+ * It returns zero on success, or a standard errno code on error. -+ */ -+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) -+{ -+ *dst = NULL; -+ return ip6_dst_lookup_tail(sk, dst, fl6); -+} -+EXPORT_SYMBOL_GPL(ip6_dst_lookup); -+ -+/** -+ * ip6_dst_lookup_flow - perform route lookup on flow with ipsec -+ * @sk: socket which provides route info -+ * @fl6: flow to lookup -+ * @final_dst: final destination address for ipsec lookup -+ * @can_sleep: we are in a sleepable context -+ * -+ * This function performs a route lookup on the given flow. -+ * -+ * It returns a valid dst pointer on success, or a pointer encoded -+ * error code. -+ */ -+struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -+ const struct in6_addr *final_dst, -+ bool can_sleep) -+{ -+ struct dst_entry *dst = NULL; -+ int err; -+ -+ err = ip6_dst_lookup_tail(sk, &dst, fl6); -+ if (err) -+ return ERR_PTR(err); -+ if (final_dst) -+ fl6->daddr = *final_dst; -+ if (can_sleep) -+ fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; -+ -+ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); -+} -+EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); -+ -+/** -+ * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow -+ * @sk: socket which provides the dst cache and route info -+ * @fl6: flow to lookup -+ * @final_dst: final destination address for ipsec lookup -+ * @can_sleep: we are in a sleepable context -+ * -+ * This function performs a route lookup on the given flow with the -+ * possibility of using the cached route in the socket if it is valid. -+ * It will take the socket dst lock when operating on the dst cache. -+ * As a result, this function can only be used in process context. -+ * -+ * It returns a valid dst pointer on success, or a pointer encoded -+ * error code. -+ */ -+struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -+ const struct in6_addr *final_dst, -+ bool can_sleep) -+{ -+ struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); -+ int err; -+ -+ dst = ip6_sk_dst_check(sk, dst, fl6); -+ -+ err = ip6_dst_lookup_tail(sk, &dst, fl6); -+ if (err) -+ return ERR_PTR(err); -+ if (final_dst) -+ fl6->daddr = *final_dst; -+ if (can_sleep) -+ fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; -+ -+ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); -+} -+EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -+ -+static inline int ip6_ufo_append_data(struct sock *sk, -+ int getfrag(void *from, char *to, int offset, int len, -+ int odd, struct sk_buff *skb), -+ void *from, int length, int hh_len, int fragheaderlen, -+ int transhdrlen, int mtu,unsigned int flags, -+ struct rt6_info *rt) -+ -+{ -+ struct sk_buff *skb; -+ int err; -+ -+ /* There is support for UDP large send offload by network -+ * device, so create one single skb packet containing complete -+ * udp datagram -+ */ -+ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { -+ struct frag_hdr fhdr; -+ -+ skb = sock_alloc_send_skb(sk, -+ hh_len + fragheaderlen + transhdrlen + 20, -+ (flags & MSG_DONTWAIT), &err); -+ if (skb == NULL) -+ return err; -+ -+ /* reserve space for Hardware header */ -+ skb_reserve(skb, hh_len); -+ -+ /* create space for UDP/IP header */ -+ skb_put(skb,fragheaderlen + transhdrlen); -+ -+ /* initialize network header pointer */ -+ skb_reset_network_header(skb); -+ -+ /* initialize protocol header pointer */ -+ skb->transport_header = skb->network_header + fragheaderlen; -+ -+ skb->ip_summed = CHECKSUM_PARTIAL; -+ skb->csum = 0; -+ -+ /* Specify the length of each IPv6 datagram fragment. -+ * It has to be a multiple of 8. -+ */ -+ skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - -+ sizeof(struct frag_hdr)) & ~7; -+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP; -+ ipv6_select_ident(&fhdr, rt); -+ skb_shinfo(skb)->ip6_frag_id = fhdr.identification; -+ __skb_queue_tail(&sk->sk_write_queue, skb); -+ } -+ -+ return skb_append_datato_frags(sk, skb, getfrag, from, -+ (length - transhdrlen)); -+} -+ -+static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, -+ gfp_t gfp) -+{ -+ return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; -+} -+ -+static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, -+ gfp_t gfp) -+{ -+ return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; -+} -+ -+static void ip6_append_data_mtu(unsigned int *mtu, -+ int *maxfraglen, -+ unsigned int fragheaderlen, -+ struct sk_buff *skb, -+ struct rt6_info *rt, -+ bool pmtuprobe) -+{ -+ if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { -+ if (skb == NULL) { -+ /* first fragment, reserve header_len */ -+ *mtu = *mtu - rt->dst.header_len; -+ -+ } else { -+ /* -+ * this fragment is not first, the headers -+ * space is regarded as data space. -+ */ -+ *mtu = min(*mtu, pmtuprobe ? -+ rt->dst.dev->mtu : -+ dst_mtu(rt->dst.path)); -+ } -+ *maxfraglen = ((*mtu - fragheaderlen) & ~7) -+ + fragheaderlen - sizeof(struct frag_hdr); -+ } -+} -+ -+int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, -+ int offset, int len, int odd, struct sk_buff *skb), -+ void *from, int length, int transhdrlen, -+ int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, -+ struct rt6_info *rt, unsigned int flags, int dontfrag) -+{ -+ struct inet_sock *inet = inet_sk(sk); -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ struct inet_cork *cork; -+ struct sk_buff *skb, *skb_prev = NULL; -+ unsigned int maxfraglen, fragheaderlen, mtu; -+ int exthdrlen; -+ int dst_exthdrlen; -+ int hh_len; -+ int copy; -+ int err; -+ int offset = 0; -+ __u8 tx_flags = 0; -+ -+ if (flags&MSG_PROBE) -+ return 0; -+ cork = &inet->cork.base; -+ if (skb_queue_empty(&sk->sk_write_queue)) { -+ /* -+ * setup for corking -+ */ -+ if (opt) { -+ if (WARN_ON(np->cork.opt)) -+ return -EINVAL; -+ -+ np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); -+ if (unlikely(np->cork.opt == NULL)) -+ return -ENOBUFS; -+ -+ np->cork.opt->tot_len = opt->tot_len; -+ np->cork.opt->opt_flen = opt->opt_flen; -+ np->cork.opt->opt_nflen = opt->opt_nflen; -+ -+ np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, -+ sk->sk_allocation); -+ if (opt->dst0opt && !np->cork.opt->dst0opt) -+ return -ENOBUFS; -+ -+ np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, -+ sk->sk_allocation); -+ if (opt->dst1opt && !np->cork.opt->dst1opt) -+ return -ENOBUFS; -+ -+ np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, -+ sk->sk_allocation); -+ if (opt->hopopt && !np->cork.opt->hopopt) -+ return -ENOBUFS; -+ -+ np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, -+ sk->sk_allocation); -+ if (opt->srcrt && !np->cork.opt->srcrt) -+ return -ENOBUFS; -+ -+ /* need source address above miyazawa*/ -+ } -+ dst_hold(&rt->dst); -+ cork->dst = &rt->dst; -+ inet->cork.fl.u.ip6 = *fl6; -+ np->cork.hop_limit = hlimit; -+ np->cork.tclass = tclass; -+ if (rt->dst.flags & DST_XFRM_TUNNEL) -+ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? -+ rt->dst.dev->mtu : dst_mtu(&rt->dst); -+ else -+ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? -+ rt->dst.dev->mtu : dst_mtu(rt->dst.path); -+ if (np->frag_size < mtu) { -+ if (np->frag_size) -+ mtu = np->frag_size; -+ } -+ cork->fragsize = mtu; -+ if (dst_allfrag(rt->dst.path)) -+ cork->flags |= IPCORK_ALLFRAG; -+ cork->length = 0; -+ exthdrlen = (opt ? opt->opt_flen : 0); -+ length += exthdrlen; -+ transhdrlen += exthdrlen; -+ dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; -+ } else { -+ rt = (struct rt6_info *)cork->dst; -+ fl6 = &inet->cork.fl.u.ip6; -+ opt = np->cork.opt; -+ transhdrlen = 0; -+ exthdrlen = 0; -+ dst_exthdrlen = 0; -+ mtu = cork->fragsize; -+ } -+ -+ hh_len = LL_RESERVED_SPACE(rt->dst.dev); -+ -+ fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + -+ (opt ? opt->opt_nflen : 0); -+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); -+ -+ if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { -+ if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { -+ ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); -+ return -EMSGSIZE; -+ } -+ } -+ -+ /* For UDP, check if TX timestamp is enabled */ -+ if (sk->sk_type == SOCK_DGRAM) -+ sock_tx_timestamp(sk, &tx_flags); -+ -+ /* -+ * Let's try using as much space as possible. -+ * Use MTU if total length of the message fits into the MTU. -+ * Otherwise, we need to reserve fragment header and -+ * fragment alignment (= 8-15 octects, in total). -+ * -+ * Note that we may need to "move" the data from the tail of -+ * of the buffer to the new fragment when we split -+ * the message. -+ * -+ * FIXME: It may be fragmented into multiple chunks -+ * at once if non-fragmentable extension headers -+ * are too large. -+ * --yoshfuji -+ */ -+ -+ if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || -+ sk->sk_protocol == IPPROTO_RAW)) { -+ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); -+ return -EMSGSIZE; -+ } -+ -+ skb = skb_peek_tail(&sk->sk_write_queue); -+ cork->length += length; -+ if (((length > mtu) || -+ (skb && skb_has_frags(skb))) && -+ (sk->sk_protocol == IPPROTO_UDP) && -+ (rt->dst.dev->features & NETIF_F_UFO)) { -+ err = ip6_ufo_append_data(sk, getfrag, from, length, -+ hh_len, fragheaderlen, -+ transhdrlen, mtu, flags, rt); -+ if (err) -+ goto error; -+ return 0; -+ } -+ -+ if (!skb) -+ goto alloc_new_skb; -+ -+ while (length > 0) { -+ /* Check if the remaining data fits into current packet. */ -+ copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; -+ if (copy < length) -+ copy = maxfraglen - skb->len; -+ -+ if (copy <= 0) { -+ char *data; -+ unsigned int datalen; -+ unsigned int fraglen; -+ unsigned int fraggap; -+ unsigned int alloclen; -+alloc_new_skb: -+ /* There's no room in the current skb */ -+ if (skb) -+ fraggap = skb->len - maxfraglen; -+ else -+ fraggap = 0; -+ /* update mtu and maxfraglen if necessary */ -+ if (skb == NULL || skb_prev == NULL) -+ ip6_append_data_mtu(&mtu, &maxfraglen, -+ fragheaderlen, skb, rt, -+ np->pmtudisc == -+ IPV6_PMTUDISC_PROBE); -+ -+ skb_prev = skb; -+ -+ /* -+ * If remaining data exceeds the mtu, -+ * we know we need more fragment(s). -+ */ -+ datalen = length + fraggap; -+ -+ if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) -+ datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; -+ if ((flags & MSG_MORE) && -+ !(rt->dst.dev->features&NETIF_F_SG)) -+ alloclen = mtu; -+ else -+ alloclen = datalen + fragheaderlen; -+ -+ alloclen += dst_exthdrlen; -+ -+ if (datalen != length + fraggap) { -+ /* -+ * this is not the last fragment, the trailer -+ * space is regarded as data space. -+ */ -+ datalen += rt->dst.trailer_len; -+ } -+ -+ alloclen += rt->dst.trailer_len; -+ fraglen = datalen + fragheaderlen; -+ -+ /* -+ * We just reserve space for fragment header. -+ * Note: this may be overallocation if the message -+ * (without MSG_MORE) fits into the MTU. -+ */ -+ alloclen += sizeof(struct frag_hdr); -+ -+ if (transhdrlen) { -+ skb = sock_alloc_send_skb(sk, -+ alloclen + hh_len, -+ (flags & MSG_DONTWAIT), &err); -+ } else { -+ skb = NULL; -+ if (atomic_read(&sk->sk_wmem_alloc) <= -+ 2 * sk->sk_sndbuf) -+ skb = sock_wmalloc(sk, -+ alloclen + hh_len, 1, -+ sk->sk_allocation); -+ if (unlikely(skb == NULL)) -+ err = -ENOBUFS; -+ else { -+ /* Only the initial fragment -+ * is time stamped. -+ */ -+ tx_flags = 0; -+ } -+ } -+ if (skb == NULL) -+ goto error; -+ /* -+ * Fill in the control structures -+ */ -+ skb->ip_summed = CHECKSUM_NONE; -+ skb->csum = 0; -+ /* reserve for fragmentation and ipsec header */ -+ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + -+ dst_exthdrlen); -+ -+ if (sk->sk_type == SOCK_DGRAM) -+ skb_shinfo(skb)->tx_flags = tx_flags; -+ -+ /* -+ * Find where to start putting bytes -+ */ -+ data = skb_put(skb, fraglen); -+ skb_set_network_header(skb, exthdrlen); -+ data += fragheaderlen; -+ skb->transport_header = (skb->network_header + -+ fragheaderlen); -+ if (fraggap) { -+ skb->csum = skb_copy_and_csum_bits( -+ skb_prev, maxfraglen, -+ data + transhdrlen, fraggap, 0); -+ skb_prev->csum = csum_sub(skb_prev->csum, -+ skb->csum); -+ data += fraggap; -+ pskb_trim_unique(skb_prev, maxfraglen); -+ } -+ copy = datalen - transhdrlen - fraggap; -+ -+ if (copy < 0) { -+ err = -EINVAL; -+ kfree_skb(skb); -+ goto error; -+ } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { -+ err = -EFAULT; -+ kfree_skb(skb); -+ goto error; -+ } -+ -+ offset += copy; -+ length -= datalen - fraggap; -+ transhdrlen = 0; -+ exthdrlen = 0; -+ dst_exthdrlen = 0; -+ -+ /* -+ * Put the packet on the pending queue -+ */ -+ __skb_queue_tail(&sk->sk_write_queue, skb); -+ continue; -+ } -+ -+ if (copy > length) -+ copy = length; -+ -+ if (!(rt->dst.dev->features&NETIF_F_SG)) { -+ unsigned int off; -+ -+ off = skb->len; -+ if (getfrag(from, skb_put(skb, copy), -+ offset, copy, off, skb) < 0) { -+ __skb_trim(skb, off); -+ err = -EFAULT; -+ goto error; -+ } -+ } else { -+ int i = skb_shinfo(skb)->nr_frags; -+ struct page_frag *pfrag = sk_page_frag(sk); -+ -+ err = -ENOMEM; -+ if (!sk_page_frag_refill(sk, pfrag)) -+ goto error; -+ -+ if (!skb_can_coalesce(skb, i, pfrag->page, -+ pfrag->offset)) { -+ err = -EMSGSIZE; -+ if (i == MAX_SKB_FRAGS) -+ goto error; -+ -+ __skb_fill_page_desc(skb, i, pfrag->page, -+ pfrag->offset, 0); -+ skb_shinfo(skb)->nr_frags = ++i; -+ get_page(pfrag->page); -+ } -+ copy = min_t(int, copy, pfrag->size - pfrag->offset); -+ if (getfrag(from, -+ page_address(pfrag->page) + pfrag->offset, -+ offset, copy, skb->len, skb) < 0) -+ goto error_efault; -+ -+ pfrag->offset += copy; -+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); -+ skb->len += copy; -+ skb->data_len += copy; -+ skb->truesize += copy; -+ atomic_add(copy, &sk->sk_wmem_alloc); -+ } -+ offset += copy; -+ length -= copy; -+ } -+ -+ return 0; -+ -+error_efault: -+ err = -EFAULT; -+error: -+ cork->length -= length; -+ IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); -+ return err; -+} -+EXPORT_SYMBOL_GPL(ip6_append_data); -+ -+static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) -+{ -+ if (np->cork.opt) { -+ kfree(np->cork.opt->dst0opt); -+ kfree(np->cork.opt->dst1opt); -+ kfree(np->cork.opt->hopopt); -+ kfree(np->cork.opt->srcrt); -+ kfree(np->cork.opt); -+ np->cork.opt = NULL; -+ } -+ -+ if (inet->cork.base.dst) { -+ dst_release(inet->cork.base.dst); -+ inet->cork.base.dst = NULL; -+ inet->cork.base.flags &= ~IPCORK_ALLFRAG; -+ } -+ memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); -+} -+ -+int ip6_push_pending_frames(struct sock *sk) -+{ -+ struct sk_buff *skb, *tmp_skb; -+ struct sk_buff **tail_skb; -+ struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; -+ struct inet_sock *inet = inet_sk(sk); -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ struct net *net = sock_net(sk); -+ struct ipv6hdr *hdr; -+ struct ipv6_txoptions *opt = np->cork.opt; -+ struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; -+ struct flowi6 *fl6 = &inet->cork.fl.u.ip6; -+ unsigned char proto = fl6->flowi6_proto; -+ int err = 0; -+ -+ if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) -+ goto out; -+ tail_skb = &(skb_shinfo(skb)->frag_list); -+ -+ /* move skb->data to ip header from ext header */ -+ if (skb->data < skb_network_header(skb)) -+ __skb_pull(skb, skb_network_offset(skb)); -+ while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { -+ __skb_pull(tmp_skb, skb_network_header_len(skb)); -+ *tail_skb = tmp_skb; -+ tail_skb = &(tmp_skb->next); -+ skb->len += tmp_skb->len; -+ skb->data_len += tmp_skb->len; -+ skb->truesize += tmp_skb->truesize; -+ tmp_skb->destructor = NULL; -+ tmp_skb->sk = NULL; -+ } -+ -+ /* Allow local fragmentation. */ -+ if (np->pmtudisc < IPV6_PMTUDISC_DO) -+ skb->local_df = 1; -+ -+ *final_dst = fl6->daddr; -+ __skb_pull(skb, skb_network_header_len(skb)); -+ if (opt && opt->opt_flen) -+ ipv6_push_frag_opts(skb, opt, &proto); -+ if (opt && opt->opt_nflen) -+ ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); -+ -+ skb_push(skb, sizeof(struct ipv6hdr)); -+ skb_reset_network_header(skb); -+ hdr = ipv6_hdr(skb); -+ -+ ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); -+ hdr->hop_limit = np->cork.hop_limit; -+ hdr->nexthdr = proto; -+ hdr->saddr = fl6->saddr; -+ hdr->daddr = *final_dst; -+ -+ skb->priority = sk->sk_priority; -+ skb->mark = sk->sk_mark; -+ -+ skb_dst_set(skb, dst_clone(&rt->dst)); -+ IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); -+ if (proto == IPPROTO_ICMPV6) { -+ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); -+ -+ ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); -+ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); -+ } -+ -+ err = ip6_local_out(skb); -+ if (err) { -+ if (err > 0) -+ err = net_xmit_errno(err); -+ if (err) -+ goto error; -+ } -+ -+out: -+ ip6_cork_release(inet, np); -+ return err; -+error: -+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); -+ goto out; -+} -+EXPORT_SYMBOL_GPL(ip6_push_pending_frames); -+ -+void ip6_flush_pending_frames(struct sock *sk) -+{ -+ struct sk_buff *skb; -+ -+ while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { -+ if (skb_dst(skb)) -+ IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), -+ IPSTATS_MIB_OUTDISCARDS); -+ kfree_skb(skb); -+ } -+ -+ ip6_cork_release(inet_sk(sk), inet6_sk(sk)); -+} -+EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); -diff -ruN linux-3.10.27/net/netfilter/core.c linux-3.10.27-imq/net/netfilter/core.c ---- linux-3.10.27/net/netfilter/core.c 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/netfilter/core.c 2014-01-18 10:19:59.349342984 +0100 -@@ -191,9 +191,11 @@ - ret = NF_DROP_GETERR(verdict); - if (ret == 0) - ret = -EPERM; -- } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { -+ } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE || -+ (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) { - int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, -- verdict >> NF_VERDICT_QBITS); -+ verdict >> NF_VERDICT_QBITS, -+ verdict & NF_VERDICT_MASK); - if (err < 0) { - if (err == -ECANCELED) - goto next_hook; -diff -ruN linux-3.10.27/net/netfilter/Kconfig linux-3.10.27-imq/net/netfilter/Kconfig ---- linux-3.10.27/net/netfilter/Kconfig 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/netfilter/Kconfig 2014-01-18 10:19:59.349342984 +0100 -@@ -641,6 +641,18 @@ - - To compile it as a module, choose M here. If unsure, say N. - -+config NETFILTER_XT_TARGET_IMQ -+ tristate '"IMQ" target support' -+ depends on NETFILTER_XTABLES -+ depends on IP_NF_MANGLE || IP6_NF_MANGLE -+ select IMQ -+ default m if NETFILTER_ADVANCED=n -+ help -+ This option adds a `IMQ' target which is used to specify if and -+ to which imq device packets should get enqueued/dequeued. -+ -+ To compile it as a module, choose M here. If unsure, say N. -+ - config NETFILTER_XT_TARGET_MARK - tristate '"MARK" target support' - depends on NETFILTER_ADVANCED -diff -ruN linux-3.10.27/net/netfilter/Makefile linux-3.10.27-imq/net/netfilter/Makefile ---- linux-3.10.27/net/netfilter/Makefile 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/netfilter/Makefile 2014-01-18 10:19:59.349342984 +0100 -@@ -82,6 +82,7 @@ - obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o - obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o - obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o -+obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o - obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o - obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o - obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o -diff -ruN linux-3.10.27/net/netfilter/nf_internals.h linux-3.10.27-imq/net/netfilter/nf_internals.h ---- linux-3.10.27/net/netfilter/nf_internals.h 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/netfilter/nf_internals.h 2014-01-18 10:19:59.349342984 +0100 -@@ -29,7 +29,7 @@ - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), -- unsigned int queuenum); -+ unsigned int queuenum, unsigned int queuetype); - extern int __init netfilter_queue_init(void); - - /* nf_log.c */ -diff -ruN linux-3.10.27/net/netfilter/nf_queue.c linux-3.10.27-imq/net/netfilter/nf_queue.c ---- linux-3.10.27/net/netfilter/nf_queue.c 2014-01-16 00:29:14.000000000 +0100 -+++ linux-3.10.27-imq/net/netfilter/nf_queue.c 2014-01-18 10:19:59.350342998 +0100 -@@ -27,6 +27,23 @@ - */ - static const struct nf_queue_handler __rcu *queue_handler __read_mostly; - -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+static const struct nf_queue_handler __rcu *queue_imq_handler __read_mostly; -+ -+void nf_register_queue_imq_handler(const struct nf_queue_handler *qh) -+{ -+ rcu_assign_pointer(queue_imq_handler, qh); -+} -+EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler); -+ -+void nf_unregister_queue_imq_handler(void) -+{ -+ RCU_INIT_POINTER(queue_imq_handler, NULL); -+ synchronize_rcu(); -+} -+EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler); -+#endif -+ - /* return EBUSY when somebody else is registered, return EEXIST if the - * same handler is registered, return 0 in case of success. */ - void nf_register_queue_handler(const struct nf_queue_handler *qh) -@@ -105,7 +122,8 @@ - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), -- unsigned int queuenum) -+ unsigned int queuenum, -+ unsigned int queuetype) - { - int status = -ENOENT; - struct nf_queue_entry *entry = NULL; -@@ -115,7 +133,17 @@ - /* QUEUE == DROP if no one is waiting, to be safe. */ - rcu_read_lock(); - -- qh = rcu_dereference(queue_handler); -+ if (queuetype == NF_IMQ_QUEUE) { -+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) -+ qh = rcu_dereference(queue_imq_handler); -+#else -+ BUG(); -+ goto err_unlock; -+#endif -+ } else { -+ qh = rcu_dereference(queue_handler); -+ } -+ - if (!qh) { - status = -ESRCH; - goto err_unlock; -@@ -205,9 +233,11 @@ - local_bh_enable(); - break; - case NF_QUEUE: -+ case NF_IMQ_QUEUE: - err = nf_queue(skb, elem, entry->pf, entry->hook, - entry->indev, entry->outdev, entry->okfn, -- verdict >> NF_VERDICT_QBITS); -+ verdict >> NF_VERDICT_QBITS, -+ verdict & NF_VERDICT_MASK); - if (err < 0) { - if (err == -ECANCELED) - goto next_hook; -diff -ruN linux-3.10.27/net/netfilter/xt_IMQ.c linux-3.10.27-imq/net/netfilter/xt_IMQ.c ---- linux-3.10.27/net/netfilter/xt_IMQ.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-3.10.27-imq/net/netfilter/xt_IMQ.c 2014-01-18 10:19:59.350342998 +0100 -@@ -0,0 +1,72 @@ -+/* -+ * This target marks packets to be enqueued to an imq device -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+static unsigned int imq_target(struct sk_buff *pskb, -+ const struct xt_action_param *par) -+{ -+ const struct xt_imq_info *mr = par->targinfo; -+ -+ pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE; -+ -+ return XT_CONTINUE; -+} -+ -+static int imq_checkentry(const struct xt_tgchk_param *par) -+{ -+ struct xt_imq_info *mr = par->targinfo; -+ -+ if (mr->todev > IMQ_MAX_DEVS - 1) { -+ pr_warn("IMQ: invalid device specified, highest is %u\n", -+ IMQ_MAX_DEVS - 1); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static struct xt_target xt_imq_reg[] __read_mostly = { -+ { -+ .name = "IMQ", -+ .family = AF_INET, -+ .checkentry = imq_checkentry, -+ .target = imq_target, -+ .targetsize = sizeof(struct xt_imq_info), -+ .table = "mangle", -+ .me = THIS_MODULE -+ }, -+ { -+ .name = "IMQ", -+ .family = AF_INET6, -+ .checkentry = imq_checkentry, -+ .target = imq_target, -+ .targetsize = sizeof(struct xt_imq_info), -+ .table = "mangle", -+ .me = THIS_MODULE -+ }, -+}; -+ -+static int __init imq_init(void) -+{ -+ return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); -+} -+ -+static void __exit imq_fini(void) -+{ -+ xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); -+} -+ -+module_init(imq_init); -+module_exit(imq_fini); -+ -+MODULE_AUTHOR("http://www.linuximq.net"); -+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information."); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS("ipt_IMQ"); -+MODULE_ALIAS("ip6t_IMQ"); -+ diff --git a/src/patches/linux-3.10.30-imq.patch b/src/patches/linux-3.10.30-imq.patch new file mode 100644 index 000000000..7eebd693e --- /dev/null +++ b/src/patches/linux-3.10.30-imq.patch @@ -0,0 +1,3304 @@ +diff -Naur linux-3.10.30.org/drivers/net/imq.c linux-3.10.30/drivers/net/imq.c +--- linux-3.10.30.org/drivers/net/imq.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-3.10.30/drivers/net/imq.c 2014-02-14 20:29:05.379402305 +0100 +@@ -0,0 +1,1001 @@ ++/* ++ * Pseudo-driver for the intermediate queue device. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Patrick McHardy, ++ * ++ * The first version was written by Martin Devera, ++ * ++ * Credits: Jan Rafaj ++ * - Update patch to 2.4.21 ++ * Sebastian Strollo ++ * - Fix "Dead-loop on netdevice imq"-issue ++ * Marcel Sebek ++ * - Update to 2.6.2-rc1 ++ * ++ * After some time of inactivity there is a group taking care ++ * of IMQ again: http://www.linuximq.net ++ * ++ * ++ * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7 ++ * including the following changes: ++ * ++ * - Correction of ipv6 support "+"s issue (Hasso Tepper) ++ * - Correction of imq_init_devs() issue that resulted in ++ * kernel OOPS unloading IMQ as module (Norbert Buchmuller) ++ * - Addition of functionality to choose number of IMQ devices ++ * during kernel config (Andre Correa) ++ * - Addition of functionality to choose how IMQ hooks on ++ * PRE and POSTROUTING (after or before NAT) (Andre Correa) ++ * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa) ++ * ++ * ++ * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were ++ * released with almost no problems. 2.6.14-x was released ++ * with some important changes: nfcache was removed; After ++ * some weeks of trouble we figured out that some IMQ fields ++ * in skb were missing in skbuff.c - skb_clone and copy_skb_header. ++ * These functions are correctly patched by this new patch version. ++ * ++ * Thanks for all who helped to figure out all the problems with ++ * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX, ++ * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully ++ * I didn't forget anybody). I apologize again for my lack of time. ++ * ++ * ++ * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead ++ * of qdisc_restart() and moved qdisc_run() to tasklet to avoid ++ * recursive locking. New initialization routines to fix 'rmmod' not ++ * working anymore. Used code from ifb.c. (Jussi Kivilinna) ++ * ++ * 2008/08/06 - 2.6.26 - (JK) ++ * - Replaced tasklet with 'netif_schedule()'. ++ * - Cleaned up and added comments for imq_nf_queue(). ++ * ++ * 2009/04/12 ++ * - Add skb_save_cb/skb_restore_cb helper functions for backuping ++ * control buffer. This is needed because qdisc-layer on kernels ++ * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna) ++ * - Add better locking for IMQ device. Hopefully this will solve ++ * SMP issues. (Jussi Kivilinna) ++ * - Port to 2.6.27 ++ * - Port to 2.6.28 ++ * - Port to 2.6.29 + fix rmmod not working ++ * ++ * 2009/04/20 - (Jussi Kivilinna) ++ * - Use netdevice feature flags to avoid extra packet handling ++ * by core networking layer and possibly increase performance. ++ * ++ * 2009/09/26 - (Jussi Kivilinna) ++ * - Add imq_nf_reinject_lockless to fix deadlock with ++ * imq_nf_queue/imq_nf_reinject. ++ * ++ * 2009/12/08 - (Jussi Kivilinna) ++ * - Port to 2.6.32 ++ * - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit() ++ * - Also add better error checking for skb->nf_queue_entry usage ++ * ++ * 2010/02/25 - (Jussi Kivilinna) ++ * - Port to 2.6.33 ++ * ++ * 2010/08/15 - (Jussi Kivilinna) ++ * - Port to 2.6.35 ++ * - Simplify hook registration by using nf_register_hooks. ++ * - nf_reinject doesn't need spinlock around it, therefore remove ++ * imq_nf_reinject function. Other nf_reinject users protect ++ * their own data with spinlock. With IMQ however all data is ++ * needed is stored per skbuff, so no locking is needed. ++ * - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of ++ * NF_QUEUE, this allows working coexistance of IMQ and other ++ * NF_QUEUE users. ++ * - Make IMQ multi-queue. Number of IMQ device queues can be ++ * increased with 'numqueues' module parameters. Default number ++ * of queues is 1, in other words by default IMQ works as ++ * single-queue device. Multi-queue selection is based on ++ * IFB multi-queue patch by Changli Gao . ++ * ++ * 2011/03/18 - (Jussi Kivilinna) ++ * - Port to 2.6.38 ++ * ++ * 2011/07/12 - (syoder89@gmail.com) ++ * - Crash fix that happens when the receiving interface has more ++ * than one queue (add missing skb_set_queue_mapping in ++ * imq_select_queue). ++ * ++ * 2011/07/26 - (Jussi Kivilinna) ++ * - Add queue mapping checks for packets exiting IMQ. ++ * - Port to 3.0 ++ * ++ * 2011/08/16 - (Jussi Kivilinna) ++ * - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2 ++ * ++ * 2011/11/03 - Germano Michel ++ * - Fix IMQ for net namespaces ++ * ++ * 2011/11/04 - Jussi Kivilinna ++ * - Port to 3.1 ++ * - Clean-up, move 'get imq device pointer by imqX name' to ++ * separate function from imq_nf_queue(). ++ * ++ * 2012/01/05 - Jussi Kivilinna ++ * - Port to 3.2 ++ * ++ * 2012/03/19 - Jussi Kivilinna ++ * - Port to 3.3 ++ * ++ * 2012/12/12 - Jussi Kivilinna ++ * - Port to 3.7 ++ * - Fix checkpatch.pl warnings ++ * ++ * 2013/09/10 - Jussi Kivilinna ++ * - Fixed GSO handling for 3.10, see imq_nf_queue() for comments. ++ * - Don't copy skb->cb_next when copying or cloning skbuffs. ++ * ++ * Also, many thanks to pablo Sebastian Greco for making the initial ++ * patch and to those who helped the testing. ++ * ++ * More info at: http://www.linuximq.net/ (Andre Correa) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ #include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num); ++ ++static nf_hookfn imq_nf_hook; ++ ++static struct nf_hook_ops imq_ops[] = { ++ { ++ /* imq_ingress_ipv4 */ ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET, ++ .hooknum = NF_INET_PRE_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ .priority = NF_IP_PRI_MANGLE + 1, ++#else ++ .priority = NF_IP_PRI_NAT_DST + 1, ++#endif ++ }, ++ { ++ /* imq_egress_ipv4 */ ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET, ++ .hooknum = NF_INET_POST_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) ++ .priority = NF_IP_PRI_LAST, ++#else ++ .priority = NF_IP_PRI_NAT_SRC - 1, ++#endif ++ }, ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ { ++ /* imq_ingress_ipv6 */ ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET6, ++ .hooknum = NF_INET_PRE_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ .priority = NF_IP6_PRI_MANGLE + 1, ++#else ++ .priority = NF_IP6_PRI_NAT_DST + 1, ++#endif ++ }, ++ { ++ /* imq_egress_ipv6 */ ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET6, ++ .hooknum = NF_INET_POST_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) ++ .priority = NF_IP6_PRI_LAST, ++#else ++ .priority = NF_IP6_PRI_NAT_SRC - 1, ++#endif ++ }, ++#endif ++}; ++ ++#if defined(CONFIG_IMQ_NUM_DEVS) ++static int numdevs = CONFIG_IMQ_NUM_DEVS; ++#else ++static int numdevs = IMQ_MAX_DEVS; ++#endif ++ ++static struct net_device *imq_devs_cache[IMQ_MAX_DEVS]; ++ ++#define IMQ_MAX_QUEUES 32 ++static int numqueues = 1; ++static u32 imq_hashrnd; ++ ++static inline __be16 pppoe_proto(const struct sk_buff *skb) ++{ ++ return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + ++ sizeof(struct pppoe_hdr))); ++} ++ ++static u16 imq_hash(struct net_device *dev, struct sk_buff *skb) ++{ ++ unsigned int pull_len; ++ u16 protocol = skb->protocol; ++ u32 addr1, addr2; ++ u32 hash, ihl = 0; ++ union { ++ u16 in16[2]; ++ u32 in32; ++ } ports; ++ u8 ip_proto; ++ ++ pull_len = 0; ++ ++recheck: ++ switch (protocol) { ++ case htons(ETH_P_8021Q): { ++ if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL)) ++ goto other; ++ ++ pull_len += VLAN_HLEN; ++ skb->network_header += VLAN_HLEN; ++ ++ protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; ++ goto recheck; ++ } ++ ++ case htons(ETH_P_PPP_SES): { ++ if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL)) ++ goto other; ++ ++ pull_len += PPPOE_SES_HLEN; ++ skb->network_header += PPPOE_SES_HLEN; ++ ++ protocol = pppoe_proto(skb); ++ goto recheck; ++ } ++ ++ case htons(ETH_P_IP): { ++ const struct iphdr *iph = ip_hdr(skb); ++ ++ if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) ++ goto other; ++ ++ addr1 = iph->daddr; ++ addr2 = iph->saddr; ++ ++ ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ? ++ iph->protocol : 0; ++ ihl = ip_hdrlen(skb); ++ ++ break; ++ } ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ case htons(ETH_P_IPV6): { ++ const struct ipv6hdr *iph = ipv6_hdr(skb); ++ __be16 fo = 0; ++ ++ if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr)))) ++ goto other; ++ ++ addr1 = iph->daddr.s6_addr32[3]; ++ addr2 = iph->saddr.s6_addr32[3]; ++ ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto, ++ &fo); ++ if (unlikely(ihl < 0)) ++ goto other; ++ ++ break; ++ } ++#endif ++ default: ++other: ++ if (pull_len != 0) { ++ skb_push(skb, pull_len); ++ skb->network_header -= pull_len; ++ } ++ ++ return (u16)(ntohs(protocol) % dev->real_num_tx_queues); ++ } ++ ++ if (addr1 > addr2) ++ swap(addr1, addr2); ++ ++ switch (ip_proto) { ++ case IPPROTO_TCP: ++ case IPPROTO_UDP: ++ case IPPROTO_DCCP: ++ case IPPROTO_ESP: ++ case IPPROTO_AH: ++ case IPPROTO_SCTP: ++ case IPPROTO_UDPLITE: { ++ if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) { ++ if (ports.in16[0] > ports.in16[1]) ++ swap(ports.in16[0], ports.in16[1]); ++ break; ++ } ++ /* fall-through */ ++ } ++ default: ++ ports.in32 = 0; ++ break; ++ } ++ ++ if (pull_len != 0) { ++ skb_push(skb, pull_len); ++ skb->network_header -= pull_len; ++ } ++ ++ hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto); ++ ++ return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); ++} ++ ++static inline bool sk_tx_queue_recorded(struct sock *sk) ++{ ++ return (sk_tx_queue_get(sk) >= 0); ++} ++ ++static struct netdev_queue *imq_select_queue(struct net_device *dev, ++ struct sk_buff *skb) ++{ ++ u16 queue_index = 0; ++ u32 hash; ++ ++ if (likely(dev->real_num_tx_queues == 1)) ++ goto out; ++ ++ /* IMQ can be receiving ingress or engress packets. */ ++ ++ /* Check first for if rx_queue is set */ ++ if (skb_rx_queue_recorded(skb)) { ++ queue_index = skb_get_rx_queue(skb); ++ goto out; ++ } ++ ++ /* Check if socket has tx_queue set */ ++ if (sk_tx_queue_recorded(skb->sk)) { ++ queue_index = sk_tx_queue_get(skb->sk); ++ goto out; ++ } ++ ++ /* Try use socket hash */ ++ if (skb->sk && skb->sk->sk_hash) { ++ hash = skb->sk->sk_hash; ++ queue_index = ++ (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); ++ goto out; ++ } ++ ++ /* Generate hash from packet data */ ++ queue_index = imq_hash(dev, skb); ++ ++out: ++ if (unlikely(queue_index >= dev->real_num_tx_queues)) ++ queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues); ++ ++ skb_set_queue_mapping(skb, queue_index); ++ return netdev_get_tx_queue(dev, queue_index); ++} ++ ++static struct net_device_stats *imq_get_stats(struct net_device *dev) ++{ ++ return &dev->stats; ++} ++ ++/* called for packets kfree'd in qdiscs at places other than enqueue */ ++static void imq_skb_destructor(struct sk_buff *skb) ++{ ++ struct nf_queue_entry *entry = skb->nf_queue_entry; ++ ++ skb->nf_queue_entry = NULL; ++ ++ if (entry) { ++ nf_queue_entry_release_refs(entry); ++ kfree(entry); ++ } ++ ++ skb_restore_cb(skb); /* kfree backup */ ++} ++ ++static void imq_done_check_queue_mapping(struct sk_buff *skb, ++ struct net_device *dev) ++{ ++ unsigned int queue_index; ++ ++ /* Don't let queue_mapping be left too large after exiting IMQ */ ++ if (likely(skb->dev != dev && skb->dev != NULL)) { ++ queue_index = skb_get_queue_mapping(skb); ++ if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) { ++ queue_index = (u16)((u32)queue_index % ++ skb->dev->real_num_tx_queues); ++ skb_set_queue_mapping(skb, queue_index); ++ } ++ } else { ++ /* skb->dev was IMQ device itself or NULL, be on safe side and ++ * just clear queue mapping. ++ */ ++ skb_set_queue_mapping(skb, 0); ++ } ++} ++ ++static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ struct nf_queue_entry *entry = skb->nf_queue_entry; ++ ++ skb->nf_queue_entry = NULL; ++ dev->trans_start = jiffies; ++ ++ dev->stats.tx_bytes += skb->len; ++ dev->stats.tx_packets++; ++ ++ if (unlikely(entry == NULL)) { ++ /* We don't know what is going on here.. packet is queued for ++ * imq device, but (probably) not by us. ++ * ++ * If this packet was not send here by imq_nf_queue(), then ++ * skb_save_cb() was not used and skb_free() should not show: ++ * WARNING: IMQ: kfree_skb: skb->cb_next:.. ++ * and/or ++ * WARNING: IMQ: kfree_skb: skb->nf_queue_entry... ++ * ++ * However if this message is shown, then IMQ is somehow broken ++ * and you should report this to linuximq.net. ++ */ ++ ++ /* imq_dev_xmit is black hole that eats all packets, report that ++ * we eat this packet happily and increase dropped counters. ++ */ ++ ++ dev->stats.tx_dropped++; ++ dev_kfree_skb(skb); ++ ++ return NETDEV_TX_OK; ++ } ++ ++ skb_restore_cb(skb); /* restore skb->cb */ ++ ++ skb->imq_flags = 0; ++ skb->destructor = NULL; ++ ++ imq_done_check_queue_mapping(skb, dev); ++ ++ nf_reinject(entry, NF_ACCEPT); ++ ++ return NETDEV_TX_OK; ++} ++ ++static struct net_device *get_imq_device_by_index(int index) ++{ ++ struct net_device *dev = NULL; ++ struct net *net; ++ char buf[8]; ++ ++ /* get device by name and cache result */ ++ snprintf(buf, sizeof(buf), "imq%d", index); ++ ++ /* Search device from all namespaces. */ ++ for_each_net(net) { ++ dev = dev_get_by_name(net, buf); ++ if (dev) ++ break; ++ } ++ ++ if (WARN_ON_ONCE(dev == NULL)) { ++ /* IMQ device not found. Exotic config? */ ++ return ERR_PTR(-ENODEV); ++ } ++ ++ imq_devs_cache[index] = dev; ++ dev_put(dev); ++ ++ return dev; ++} ++ ++static struct nf_queue_entry *nf_queue_entry_dup(struct nf_queue_entry *e) ++{ ++ struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); ++ if (entry) { ++ if (nf_queue_entry_get_refs(entry)) ++ return entry; ++ kfree(entry); ++ } ++ return NULL; ++} ++ ++#ifdef CONFIG_BRIDGE_NETFILTER ++/* When called from bridge netfilter, skb->data must point to MAC header ++ * before calling skb_gso_segment(). Else, original MAC header is lost ++ * and segmented skbs will be sent to wrong destination. ++ */ ++static void nf_bridge_adjust_skb_data(struct sk_buff *skb) ++{ ++ if (skb->nf_bridge) ++ __skb_push(skb, skb->network_header - skb->mac_header); ++} ++ ++static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) ++{ ++ if (skb->nf_bridge) ++ __skb_pull(skb, skb->network_header - skb->mac_header); ++} ++#else ++#define nf_bridge_adjust_skb_data(s) do {} while (0) ++#define nf_bridge_adjust_segmented_data(s) do {} while (0) ++#endif ++ ++static void free_entry(struct nf_queue_entry *entry) ++{ ++ nf_queue_entry_release_refs(entry); ++ kfree(entry); ++} ++ ++static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev); ++ ++static int __imq_nf_queue_gso(struct nf_queue_entry *entry, ++ struct net_device *dev, struct sk_buff *skb) ++{ ++ int ret = -ENOMEM; ++ struct nf_queue_entry *entry_seg; ++ ++ nf_bridge_adjust_segmented_data(skb); ++ ++ if (skb->next == NULL) { /* last packet, no need to copy entry */ ++ struct sk_buff *gso_skb = entry->skb; ++ entry->skb = skb; ++ ret = __imq_nf_queue(entry, dev); ++ if (ret) ++ entry->skb = gso_skb; ++ return ret; ++ } ++ ++ skb->next = NULL; ++ ++ entry_seg = nf_queue_entry_dup(entry); ++ if (entry_seg) { ++ entry_seg->skb = skb; ++ ret = __imq_nf_queue(entry_seg, dev); ++ if (ret) ++ free_entry(entry_seg); ++ } ++ return ret; ++} ++ ++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num) ++{ ++ struct sk_buff *skb, *segs; ++ struct net_device *dev; ++ unsigned int queued; ++ int index, retval, err; ++ ++ index = entry->skb->imq_flags & IMQ_F_IFMASK; ++ if (unlikely(index > numdevs - 1)) { ++ if (net_ratelimit()) ++ pr_warn("IMQ: invalid device specified, highest is %u\n", ++ numdevs - 1); ++ retval = -EINVAL; ++ goto out_no_dev; ++ } ++ ++ /* check for imq device by index from cache */ ++ dev = imq_devs_cache[index]; ++ if (unlikely(!dev)) { ++ dev = get_imq_device_by_index(index); ++ if (IS_ERR(dev)) { ++ retval = PTR_ERR(dev); ++ goto out_no_dev; ++ } ++ } ++ ++ if (unlikely(!(dev->flags & IFF_UP))) { ++ entry->skb->imq_flags = 0; ++ retval = -ECANCELED; ++ goto out_no_dev; ++ } ++ ++ if (!skb_is_gso(entry->skb)) ++ return __imq_nf_queue(entry, dev); ++ ++ /* Since 3.10.x, GSO handling moved here as result of upstream commit ++ * a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (netfilter: move ++ * skb_gso_segment into nfnetlink_queue module). ++ * ++ * Following code replicates the gso handling from ++ * 'net/netfilter/nfnetlink_queue_core.c':nfqnl_enqueue_packet(). ++ */ ++ ++ skb = entry->skb; ++ ++ switch (entry->pf) { ++ case NFPROTO_IPV4: ++ skb->protocol = htons(ETH_P_IP); ++ break; ++ case NFPROTO_IPV6: ++ skb->protocol = htons(ETH_P_IPV6); ++ break; ++ } ++ ++ nf_bridge_adjust_skb_data(skb); ++ segs = skb_gso_segment(skb, 0); ++ /* Does not use PTR_ERR to limit the number of error codes that can be ++ * returned by nf_queue. For instance, callers rely on -ECANCELED to ++ * mean 'ignore this hook'. ++ */ ++ err = -ENOBUFS; ++ if (IS_ERR(segs)) ++ goto out_err; ++ queued = 0; ++ err = 0; ++ do { ++ struct sk_buff *nskb = segs->next; ++ if (nskb && nskb->next) ++ nskb->cb_next = NULL; ++ if (err == 0) ++ err = __imq_nf_queue_gso(entry, dev, segs); ++ if (err == 0) ++ queued++; ++ else ++ kfree_skb(segs); ++ segs = nskb; ++ } while (segs); ++ ++ if (queued) { ++ if (err) /* some segments are already queued */ ++ free_entry(entry); ++ kfree_skb(skb); ++ return 0; ++ } ++ ++out_err: ++ nf_bridge_adjust_segmented_data(skb); ++ retval = err; ++out_no_dev: ++ return retval; ++} ++ ++static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev) ++{ ++ struct sk_buff *skb_orig, *skb, *skb_shared; ++ struct Qdisc *q; ++ struct netdev_queue *txq; ++ spinlock_t *root_lock; ++ int users; ++ int retval = -EINVAL; ++ unsigned int orig_queue_index; ++ ++ dev->last_rx = jiffies; ++ ++ skb = entry->skb; ++ skb_orig = NULL; ++ ++ /* skb has owner? => make clone */ ++ if (unlikely(skb->destructor)) { ++ skb_orig = skb; ++ skb = skb_clone(skb, GFP_ATOMIC); ++ if (unlikely(!skb)) { ++ retval = -ENOMEM; ++ goto out; ++ } ++ skb->cb_next = NULL; ++ entry->skb = skb; ++ } ++ ++ skb->nf_queue_entry = entry; ++ ++ dev->stats.rx_bytes += skb->len; ++ dev->stats.rx_packets++; ++ ++ if (!skb->dev) { ++ /* skb->dev == NULL causes problems, try the find cause. */ ++ if (net_ratelimit()) { ++ dev_warn(&dev->dev, ++ "received packet with skb->dev == NULL\n"); ++ dump_stack(); ++ } ++ ++ skb->dev = dev; ++ } ++ ++ /* Disables softirqs for lock below */ ++ rcu_read_lock_bh(); ++ ++ /* Multi-queue selection */ ++ orig_queue_index = skb_get_queue_mapping(skb); ++ txq = imq_select_queue(dev, skb); ++ ++ q = rcu_dereference(txq->qdisc); ++ if (unlikely(!q->enqueue)) ++ goto packet_not_eaten_by_imq_dev; ++ ++ root_lock = qdisc_lock(q); ++ spin_lock(root_lock); ++ ++ users = atomic_read(&skb->users); ++ ++ skb_shared = skb_get(skb); /* increase reference count by one */ ++ ++ /* backup skb->cb, as qdisc layer will overwrite it */ ++ skb_save_cb(skb_shared); ++ qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */ ++ ++ if (likely(atomic_read(&skb_shared->users) == users + 1)) { ++ kfree_skb(skb_shared); /* decrease reference count by one */ ++ ++ skb->destructor = &imq_skb_destructor; ++ ++ /* cloned? */ ++ if (unlikely(skb_orig)) ++ kfree_skb(skb_orig); /* free original */ ++ ++ spin_unlock(root_lock); ++ rcu_read_unlock_bh(); ++ ++ /* schedule qdisc dequeue */ ++ __netif_schedule(q); ++ ++ retval = 0; ++ goto out; ++ } else { ++ skb_restore_cb(skb_shared); /* restore skb->cb */ ++ skb->nf_queue_entry = NULL; ++ /* ++ * qdisc dropped packet and decreased skb reference count of ++ * skb, so we don't really want to and try refree as that would ++ * actually destroy the skb. ++ */ ++ spin_unlock(root_lock); ++ goto packet_not_eaten_by_imq_dev; ++ } ++ ++packet_not_eaten_by_imq_dev: ++ skb_set_queue_mapping(skb, orig_queue_index); ++ rcu_read_unlock_bh(); ++ ++ /* cloned? restore original */ ++ if (unlikely(skb_orig)) { ++ kfree_skb(skb); ++ entry->skb = skb_orig; ++ } ++ retval = -1; ++out: ++ return retval; ++} ++ ++static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb, ++ const struct net_device *indev, ++ const struct net_device *outdev, ++ int (*okfn)(struct sk_buff *)) ++{ ++ return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT; ++} ++ ++static int imq_close(struct net_device *dev) ++{ ++ netif_stop_queue(dev); ++ return 0; ++} ++ ++static int imq_open(struct net_device *dev) ++{ ++ netif_start_queue(dev); ++ return 0; ++} ++ ++static const struct net_device_ops imq_netdev_ops = { ++ .ndo_open = imq_open, ++ .ndo_stop = imq_close, ++ .ndo_start_xmit = imq_dev_xmit, ++ .ndo_get_stats = imq_get_stats, ++}; ++ ++static void imq_setup(struct net_device *dev) ++{ ++ dev->netdev_ops = &imq_netdev_ops; ++ dev->type = ARPHRD_VOID; ++ dev->mtu = 16000; /* too small? */ ++ dev->tx_queue_len = 11000; /* too big? */ ++ dev->flags = IFF_NOARP; ++ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | ++ NETIF_F_GSO | NETIF_F_HW_CSUM | ++ NETIF_F_HIGHDMA; ++ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | ++ IFF_TX_SKB_SHARING); ++} ++ ++static int imq_validate(struct nlattr *tb[], struct nlattr *data[]) ++{ ++ int ret = 0; ++ ++ if (tb[IFLA_ADDRESS]) { ++ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { ++ ret = -EINVAL; ++ goto end; ++ } ++ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { ++ ret = -EADDRNOTAVAIL; ++ goto end; ++ } ++ } ++ return 0; ++end: ++ pr_warn("IMQ: imq_validate failed (%d)\n", ret); ++ return ret; ++} ++ ++static struct rtnl_link_ops imq_link_ops __read_mostly = { ++ .kind = "imq", ++ .priv_size = 0, ++ .setup = imq_setup, ++ .validate = imq_validate, ++}; ++ ++static const struct nf_queue_handler imq_nfqh = { ++ .outfn = imq_nf_queue, ++}; ++ ++static int __init imq_init_hooks(void) ++{ ++ int ret; ++ ++ nf_register_queue_imq_handler(&imq_nfqh); ++ ++ ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops)); ++ if (ret < 0) ++ nf_unregister_queue_imq_handler(); ++ ++ return ret; ++} ++ ++static int __init imq_init_one(int index) ++{ ++ struct net_device *dev; ++ int ret; ++ ++ dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues); ++ if (!dev) ++ return -ENOMEM; ++ ++ ret = dev_alloc_name(dev, dev->name); ++ if (ret < 0) ++ goto fail; ++ ++ dev->rtnl_link_ops = &imq_link_ops; ++ ret = register_netdevice(dev); ++ if (ret < 0) ++ goto fail; ++ ++ return 0; ++fail: ++ free_netdev(dev); ++ return ret; ++} ++ ++static int __init imq_init_devs(void) ++{ ++ int err, i; ++ ++ if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) { ++ pr_err("IMQ: numdevs has to be betweed 1 and %u\n", ++ IMQ_MAX_DEVS); ++ return -EINVAL; ++ } ++ ++ if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) { ++ pr_err("IMQ: numqueues has to be betweed 1 and %u\n", ++ IMQ_MAX_QUEUES); ++ return -EINVAL; ++ } ++ ++ get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd)); ++ ++ rtnl_lock(); ++ err = __rtnl_link_register(&imq_link_ops); ++ ++ for (i = 0; i < numdevs && !err; i++) ++ err = imq_init_one(i); ++ ++ if (err) { ++ __rtnl_link_unregister(&imq_link_ops); ++ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); ++ } ++ rtnl_unlock(); ++ ++ return err; ++} ++ ++static int __init imq_init_module(void) ++{ ++ int err; ++ ++#if defined(CONFIG_IMQ_NUM_DEVS) ++ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16); ++ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2); ++ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK); ++#endif ++ ++ err = imq_init_devs(); ++ if (err) { ++ pr_err("IMQ: Error trying imq_init_devs(net)\n"); ++ return err; ++ } ++ ++ err = imq_init_hooks(); ++ if (err) { ++ pr_err(KERN_ERR "IMQ: Error trying imq_init_hooks()\n"); ++ rtnl_link_unregister(&imq_link_ops); ++ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); ++ return err; ++ } ++ ++ pr_info("IMQ driver loaded successfully. (numdevs = %d, numqueues = %d)\n", ++ numdevs, numqueues); ++ ++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ pr_info("\tHooking IMQ before NAT on PREROUTING.\n"); ++#else ++ pr_info("\tHooking IMQ after NAT on PREROUTING.\n"); ++#endif ++#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ pr_info("\tHooking IMQ before NAT on POSTROUTING.\n"); ++#else ++ pr_info("\tHooking IMQ after NAT on POSTROUTING.\n"); ++#endif ++ ++ return 0; ++} ++ ++static void __exit imq_unhook(void) ++{ ++ nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops)); ++ nf_unregister_queue_imq_handler(); ++} ++ ++static void __exit imq_cleanup_devs(void) ++{ ++ rtnl_link_unregister(&imq_link_ops); ++ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); ++} ++ ++static void __exit imq_exit_module(void) ++{ ++ imq_unhook(); ++ imq_cleanup_devs(); ++ pr_info("IMQ driver unloaded successfully.\n"); ++} ++ ++module_init(imq_init_module); ++module_exit(imq_exit_module); ++ ++module_param(numdevs, int, 0); ++module_param(numqueues, int, 0); ++MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will be created)"); ++MODULE_PARM_DESC(numqueues, "number of queues per IMQ device"); ++MODULE_AUTHOR("http://www.linuximq.net"); ++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information."); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS_RTNL_LINK("imq"); ++ +diff -Naur linux-3.10.30.org/drivers/net/Kconfig linux-3.10.30/drivers/net/Kconfig +--- linux-3.10.30.org/drivers/net/Kconfig 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/drivers/net/Kconfig 2014-02-14 20:29:05.379402305 +0100 +@@ -207,6 +207,125 @@ + depends on RIONET + default "128" + ++config IMQ ++ tristate "IMQ (intermediate queueing device) support" ++ depends on NETDEVICES && NETFILTER ++ ---help--- ++ The IMQ device(s) is used as placeholder for QoS queueing ++ disciplines. Every packet entering/leaving the IP stack can be ++ directed through the IMQ device where it's enqueued/dequeued to the ++ attached qdisc. This allows you to treat network devices as classes ++ and distribute bandwidth among them. Iptables is used to specify ++ through which IMQ device, if any, packets travel. ++ ++ More information at: http://www.linuximq.net/ ++ ++ To compile this driver as a module, choose M here: the module ++ will be called imq. If unsure, say N. ++ ++choice ++ prompt "IMQ behavior (PRE/POSTROUTING)" ++ depends on IMQ ++ default IMQ_BEHAVIOR_AB ++ help ++ This setting defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ IMQ can work in any of the following ways: ++ ++ PREROUTING | POSTROUTING ++ -----------------|------------------- ++ #1 After NAT | After NAT ++ #2 After NAT | Before NAT ++ #3 Before NAT | After NAT ++ #4 Before NAT | Before NAT ++ ++ The default behavior is to hook before NAT on PREROUTING ++ and after NAT on POSTROUTING (#3). ++ ++ This settings are specially usefull when trying to use IMQ ++ to shape NATed clients. ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_AA ++ bool "IMQ AA" ++ help ++ This setting defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: After NAT ++ POSTROUTING: After NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_AB ++ bool "IMQ AB" ++ help ++ This setting defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: After NAT ++ POSTROUTING: Before NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_BA ++ bool "IMQ BA" ++ help ++ This setting defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: Before NAT ++ POSTROUTING: After NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_BB ++ bool "IMQ BB" ++ help ++ This setting defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: Before NAT ++ POSTROUTING: Before NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++endchoice ++ ++config IMQ_NUM_DEVS ++ int "Number of IMQ devices" ++ range 2 16 ++ depends on IMQ ++ default "16" ++ help ++ This setting defines how many IMQ devices will be created. ++ ++ The default value is 16. ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ + config TUN + tristate "Universal TUN/TAP device driver support" + select CRC32 +diff -Naur linux-3.10.30.org/drivers/net/Makefile linux-3.10.30/drivers/net/Makefile +--- linux-3.10.30.org/drivers/net/Makefile 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/drivers/net/Makefile 2014-02-14 20:29:05.379402305 +0100 +@@ -9,6 +9,7 @@ + obj-$(CONFIG_DUMMY) += dummy.o + obj-$(CONFIG_EQUALIZER) += eql.o + obj-$(CONFIG_IFB) += ifb.o ++obj-$(CONFIG_IMQ) += imq.o + obj-$(CONFIG_MACVLAN) += macvlan.o + obj-$(CONFIG_MACVTAP) += macvtap.o + obj-$(CONFIG_MII) += mii.o +diff -Naur linux-3.10.30.org/include/linux/imq.h linux-3.10.30/include/linux/imq.h +--- linux-3.10.30.org/include/linux/imq.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-3.10.30/include/linux/imq.h 2014-02-14 20:29:05.379402305 +0100 +@@ -0,0 +1,13 @@ ++#ifndef _IMQ_H ++#define _IMQ_H ++ ++/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */ ++#define IMQ_F_BITS 5 ++ ++#define IMQ_F_IFMASK 0x0f ++#define IMQ_F_ENQUEUE 0x10 ++ ++#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1) ++ ++#endif /* _IMQ_H */ ++ +diff -Naur linux-3.10.30.org/include/linux/netfilter/xt_IMQ.h linux-3.10.30/include/linux/netfilter/xt_IMQ.h +--- linux-3.10.30.org/include/linux/netfilter/xt_IMQ.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-3.10.30/include/linux/netfilter/xt_IMQ.h 2014-02-14 20:29:05.379402305 +0100 +@@ -0,0 +1,9 @@ ++#ifndef _XT_IMQ_H ++#define _XT_IMQ_H ++ ++struct xt_imq_info { ++ unsigned int todev; /* target imq device */ ++}; ++ ++#endif /* _XT_IMQ_H */ ++ +diff -Naur linux-3.10.30.org/include/linux/netfilter_ipv4/ipt_IMQ.h linux-3.10.30/include/linux/netfilter_ipv4/ipt_IMQ.h +--- linux-3.10.30.org/include/linux/netfilter_ipv4/ipt_IMQ.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-3.10.30/include/linux/netfilter_ipv4/ipt_IMQ.h 2014-02-14 20:29:05.379402305 +0100 +@@ -0,0 +1,10 @@ ++#ifndef _IPT_IMQ_H ++#define _IPT_IMQ_H ++ ++/* Backwards compatibility for old userspace */ ++#include ++ ++#define ipt_imq_info xt_imq_info ++ ++#endif /* _IPT_IMQ_H */ ++ +diff -Naur linux-3.10.30.org/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-3.10.30/include/linux/netfilter_ipv6/ip6t_IMQ.h +--- linux-3.10.30.org/include/linux/netfilter_ipv6/ip6t_IMQ.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-3.10.30/include/linux/netfilter_ipv6/ip6t_IMQ.h 2014-02-14 20:29:05.379402305 +0100 +@@ -0,0 +1,10 @@ ++#ifndef _IP6T_IMQ_H ++#define _IP6T_IMQ_H ++ ++/* Backwards compatibility for old userspace */ ++#include ++ ++#define ip6t_imq_info xt_imq_info ++ ++#endif /* _IP6T_IMQ_H */ ++ +diff -Naur linux-3.10.30.org/include/linux/skbuff.h linux-3.10.30/include/linux/skbuff.h +--- linux-3.10.30.org/include/linux/skbuff.h 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/include/linux/skbuff.h 2014-02-14 20:29:05.379402305 +0100 +@@ -33,6 +33,9 @@ + #include + #include + #include ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++#include ++#endif + + /* Don't change this without changing skb_csum_unnecessary! */ + #define CHECKSUM_NONE 0 +@@ -414,6 +417,9 @@ + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48] __aligned(8); ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ void *cb_next; ++#endif + + unsigned long _skb_refdst; + #ifdef CONFIG_XFRM +@@ -449,6 +455,9 @@ + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct nf_conntrack *nfct; + #endif ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ struct nf_queue_entry *nf_queue_entry; ++#endif + #ifdef CONFIG_BRIDGE_NETFILTER + struct nf_bridge_info *nf_bridge; + #endif +@@ -487,7 +496,9 @@ + __u8 encapsulation:1; + /* 7/9 bit hole (depending on ndisc_nodetype presence) */ + kmemcheck_bitfield_end(flags2); +- ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ __u8 imq_flags:IMQ_F_BITS; ++#endif + #ifdef CONFIG_NET_DMA + dma_cookie_t dma_cookie; + #endif +@@ -616,7 +627,10 @@ + { + return (struct rtable *)skb_dst(skb); + } +- ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++extern int skb_save_cb(struct sk_buff *skb); ++extern int skb_restore_cb(struct sk_buff *skb); ++#endif + extern void kfree_skb(struct sk_buff *skb); + extern void kfree_skb_list(struct sk_buff *segs); + extern void skb_tx_error(struct sk_buff *skb); +@@ -2735,6 +2749,10 @@ + nf_conntrack_get(src->nfct); + dst->nfctinfo = src->nfctinfo; + #endif ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ dst->imq_flags = src->imq_flags; ++ dst->nf_queue_entry = src->nf_queue_entry; ++#endif + #ifdef CONFIG_BRIDGE_NETFILTER + dst->nf_bridge = src->nf_bridge; + nf_bridge_get(src->nf_bridge); +diff -Naur linux-3.10.30.org/include/net/netfilter/nf_queue.h linux-3.10.30/include/net/netfilter/nf_queue.h +--- linux-3.10.30.org/include/net/netfilter/nf_queue.h 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/include/net/netfilter/nf_queue.h 2014-02-14 20:29:05.382736249 +0100 +@@ -29,6 +29,12 @@ + void nf_register_queue_handler(const struct nf_queue_handler *qh); + void nf_unregister_queue_handler(void); + extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); ++extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry); ++ ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh); ++extern void nf_unregister_queue_imq_handler(void); ++#endif + + bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); + void nf_queue_entry_release_refs(struct nf_queue_entry *entry); +diff -Naur linux-3.10.30.org/include/uapi/linux/netfilter.h linux-3.10.30/include/uapi/linux/netfilter.h +--- linux-3.10.30.org/include/uapi/linux/netfilter.h 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/include/uapi/linux/netfilter.h 2014-02-14 20:29:05.382736249 +0100 +@@ -13,7 +13,8 @@ + #define NF_QUEUE 3 + #define NF_REPEAT 4 + #define NF_STOP 5 +-#define NF_MAX_VERDICT NF_STOP ++#define NF_IMQ_QUEUE 6 ++#define NF_MAX_VERDICT NF_IMQ_QUEUE + + /* we overload the higher bits for encoding auxiliary data such as the queue + * number or errno values. Not nice, but better than additional function +diff -Naur linux-3.10.30.org/net/core/dev.c linux-3.10.30/net/core/dev.c +--- linux-3.10.30.org/net/core/dev.c 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/core/dev.c 2014-02-14 20:29:05.382736249 +0100 +@@ -129,6 +129,9 @@ + #include + #include + #include ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++#include ++#endif + + #include "net-sysfs.h" + +@@ -2573,7 +2576,12 @@ + } + } + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ if (!list_empty(&ptype_all) && ++ !(skb->imq_flags & IMQ_F_ENQUEUE)) ++#else + if (!list_empty(&ptype_all)) ++#endif + dev_queue_xmit_nit(skb, dev); + + skb_len = skb->len; +diff -Naur linux-3.10.30.org/net/core/skbuff.c linux-3.10.30/net/core/skbuff.c +--- linux-3.10.30.org/net/core/skbuff.c 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/core/skbuff.c 2014-02-14 21:47:17.286039229 +0100 +@@ -73,6 +73,9 @@ + + struct kmem_cache *skbuff_head_cache __read_mostly; + static struct kmem_cache *skbuff_fclone_cache __read_mostly; ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++static struct kmem_cache *skbuff_cb_store_cache __read_mostly; ++#endif + + /** + * skb_panic - private function for out-of-line support +@@ -552,6 +555,29 @@ + WARN_ON(in_irq()); + skb->destructor(skb); + } ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ /* ++ * This should not happen. When it does, avoid memleak by restoring ++ * the chain of cb-backups. ++ */ ++ while (skb->cb_next != NULL) { ++ if (net_ratelimit()) ++ pr_warn("IMQ: kfree_skb: skb->cb_next: %08x\n", ++ (unsigned int)skb->cb_next); ++ ++ skb_restore_cb(skb); ++ } ++ /* ++ * This should not happen either, nf_queue_entry is nullified in ++ * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are ++ * leaking entry pointers, maybe memory. We don't know if this is ++ * pointer to already freed memory, or should this be freed. ++ * If this happens we need to add refcounting, etc for nf_queue_entry. ++ */ ++ if (skb->nf_queue_entry && net_ratelimit()) ++ pr_warn("%s\n", "IMQ: kfree_skb: skb->nf_queue_entry != NULL"); ++#endif ++ + #if IS_ENABLED(CONFIG_NF_CONNTRACK) + nf_conntrack_put(skb->nfct); + #endif +@@ -683,6 +709,10 @@ + new->sp = secpath_get(old->sp); + #endif + memcpy(new->cb, old->cb, sizeof(old->cb)); ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ new->cb_next = NULL; ++ /*skb_copy_stored_cb(new, old);*/ ++#endif + new->csum = old->csum; + new->local_df = old->local_df; + new->pkt_type = old->pkt_type; +@@ -3050,6 +3080,15 @@ + } + EXPORT_SYMBOL_GPL(skb_gro_receive); + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++/* Control buffer save/restore for IMQ devices */ ++struct skb_cb_table { ++ char cb[48] __aligned(8); ++ void *cb_next; ++ atomic_t refcnt; ++}; ++#endif ++ + void __init skb_init(void) + { + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", +@@ -3063,6 +3102,13 @@ + 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL); ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache", ++ sizeof(struct skb_cb_table), ++ 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC, ++ NULL); ++#endif + } + + /** +@@ -3348,6 +3394,76 @@ + EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); + + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++static DEFINE_SPINLOCK(skb_cb_store_lock); ++ ++int skb_save_cb(struct sk_buff *skb) ++{ ++ struct skb_cb_table *next; ++ ++ next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC); ++ if (!next) ++ return -ENOMEM; ++ ++ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); ++ ++ memcpy(next->cb, skb->cb, sizeof(skb->cb)); ++ next->cb_next = skb->cb_next; ++ ++ atomic_set(&next->refcnt, 1); ++ ++ skb->cb_next = next; ++ return 0; ++} ++EXPORT_SYMBOL(skb_save_cb); ++ ++int skb_restore_cb(struct sk_buff *skb) ++{ ++ struct skb_cb_table *next; ++ ++ if (!skb->cb_next) ++ return 0; ++ ++ next = skb->cb_next; ++ ++ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); ++ ++ memcpy(skb->cb, next->cb, sizeof(skb->cb)); ++ skb->cb_next = next->cb_next; ++ ++ spin_lock(&skb_cb_store_lock); ++ ++ if (atomic_dec_and_test(&next->refcnt)) ++ kmem_cache_free(skbuff_cb_store_cache, next); ++ ++ spin_unlock(&skb_cb_store_lock); ++ ++ return 0; ++} ++EXPORT_SYMBOL(skb_restore_cb); ++ ++static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old) ++{ ++ struct skb_cb_table *next; ++ struct sk_buff *old; ++ ++ if (!__old->cb_next) { ++ new->cb_next = NULL; ++ return; ++ } ++ ++ spin_lock(&skb_cb_store_lock); ++ ++ old = (struct sk_buff *)__old; ++ ++ next = old->cb_next; ++ atomic_inc(&next->refcnt); ++ new->cb_next = next; ++ ++ spin_unlock(&skb_cb_store_lock); ++} ++#endif ++ + /** + * skb_partial_csum_set - set up and verify partial csum values for packet + * @skb: the skb to set +diff -Naur linux-3.10.30.org/net/ipv6/ip6_output.c linux-3.10.30/net/ipv6/ip6_output.c +--- linux-3.10.30.org/net/ipv6/ip6_output.c 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/ipv6/ip6_output.c 2014-02-14 20:29:05.392738001 +0100 +@@ -89,9 +89,6 @@ + struct in6_addr *nexthop; + int ret; + +- skb->protocol = htons(ETH_P_IPV6); +- skb->dev = dev; +- + if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + +@@ -168,6 +165,13 @@ + return 0; + } + ++ /* ++ * IMQ-patch: moved setting skb->dev and skb->protocol from ++ * ip6_finish_output2 to fix crashing at netif_skb_features(). ++ */ ++ skb->protocol = htons(ETH_P_IPV6); ++ skb->dev = dev; ++ + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, + ip6_finish_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); +diff -Naur linux-3.10.30.org/net/ipv6/ip6_output.c.orig linux-3.10.30/net/ipv6/ip6_output.c.orig +--- linux-3.10.30.org/net/ipv6/ip6_output.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ linux-3.10.30/net/ipv6/ip6_output.c.orig 2014-02-14 20:29:05.392738001 +0100 +@@ -0,0 +1,1580 @@ ++/* ++ * IPv6 output functions ++ * Linux INET6 implementation ++ * ++ * Authors: ++ * Pedro Roque ++ * ++ * Based on linux/net/ipv4/ip_output.c ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Changes: ++ * A.N.Kuznetsov : airthmetics in fragmentation. ++ * extension headers are implemented. ++ * route changes now work. ++ * ip6_forward does not confuse sniffers. ++ * etc. ++ * ++ * H. von Brand : Added missing #include ++ * Imran Patel : frag id should be in NBO ++ * Kazunori MIYAZAWA @USAGI ++ * : add ip6_append_data and related functions ++ * for datagram xmit ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int __ip6_local_out(struct sk_buff *skb) ++{ ++ int len; ++ ++ len = skb->len - sizeof(struct ipv6hdr); ++ if (len > IPV6_MAXPLEN) ++ len = 0; ++ ipv6_hdr(skb)->payload_len = htons(len); ++ ++ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, ++ skb_dst(skb)->dev, dst_output); ++} ++ ++int ip6_local_out(struct sk_buff *skb) ++{ ++ int err; ++ ++ err = __ip6_local_out(skb); ++ if (likely(err == 1)) ++ err = dst_output(skb); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(ip6_local_out); ++ ++static int ip6_finish_output2(struct sk_buff *skb) ++{ ++ struct dst_entry *dst = skb_dst(skb); ++ struct net_device *dev = dst->dev; ++ struct neighbour *neigh; ++ struct in6_addr *nexthop; ++ int ret; ++ ++ skb->protocol = htons(ETH_P_IPV6); ++ skb->dev = dev; ++ ++ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { ++ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); ++ ++ if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && ++ ((mroute6_socket(dev_net(dev), skb) && ++ !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ++ ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, ++ &ipv6_hdr(skb)->saddr))) { ++ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); ++ ++ /* Do not check for IFF_ALLMULTI; multicast routing ++ is not supported in any case. ++ */ ++ if (newskb) ++ NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, ++ newskb, NULL, newskb->dev, ++ dev_loopback_xmit); ++ ++ if (ipv6_hdr(skb)->hop_limit == 0) { ++ IP6_INC_STATS(dev_net(dev), idev, ++ IPSTATS_MIB_OUTDISCARDS); ++ kfree_skb(skb); ++ return 0; ++ } ++ } ++ ++ IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, ++ skb->len); ++ ++ if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= ++ IPV6_ADDR_SCOPE_NODELOCAL && ++ !(dev->flags & IFF_LOOPBACK)) { ++ kfree_skb(skb); ++ return 0; ++ } ++ } ++ ++ rcu_read_lock_bh(); ++ nexthop = rt6_nexthop((struct rt6_info *)dst); ++ neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); ++ if (unlikely(!neigh)) ++ neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); ++ if (!IS_ERR(neigh)) { ++ ret = dst_neigh_output(dst, neigh, skb); ++ rcu_read_unlock_bh(); ++ return ret; ++ } ++ rcu_read_unlock_bh(); ++ ++ IP6_INC_STATS(dev_net(dst->dev), ++ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); ++ kfree_skb(skb); ++ return -EINVAL; ++} ++ ++static int ip6_finish_output(struct sk_buff *skb) ++{ ++ if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || ++ dst_allfrag(skb_dst(skb)) || ++ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) ++ return ip6_fragment(skb, ip6_finish_output2); ++ else ++ return ip6_finish_output2(skb); ++} ++ ++int ip6_output(struct sk_buff *skb) ++{ ++ struct net_device *dev = skb_dst(skb)->dev; ++ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); ++ if (unlikely(idev->cnf.disable_ipv6)) { ++ IP6_INC_STATS(dev_net(dev), idev, ++ IPSTATS_MIB_OUTDISCARDS); ++ kfree_skb(skb); ++ return 0; ++ } ++ ++ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, ++ ip6_finish_output, ++ !(IP6CB(skb)->flags & IP6SKB_REROUTED)); ++} ++ ++/* ++ * xmit an sk_buff (used by TCP, SCTP and DCCP) ++ */ ++ ++int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, ++ struct ipv6_txoptions *opt, int tclass) ++{ ++ struct net *net = sock_net(sk); ++ struct ipv6_pinfo *np = inet6_sk(sk); ++ struct in6_addr *first_hop = &fl6->daddr; ++ struct dst_entry *dst = skb_dst(skb); ++ struct ipv6hdr *hdr; ++ u8 proto = fl6->flowi6_proto; ++ int seg_len = skb->len; ++ int hlimit = -1; ++ u32 mtu; ++ ++ if (opt) { ++ unsigned int head_room; ++ ++ /* First: exthdrs may take lots of space (~8K for now) ++ MAX_HEADER is not enough. ++ */ ++ head_room = opt->opt_nflen + opt->opt_flen; ++ seg_len += head_room; ++ head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); ++ ++ if (skb_headroom(skb) < head_room) { ++ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); ++ if (skb2 == NULL) { ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_OUTDISCARDS); ++ kfree_skb(skb); ++ return -ENOBUFS; ++ } ++ consume_skb(skb); ++ skb = skb2; ++ skb_set_owner_w(skb, sk); ++ } ++ if (opt->opt_flen) ++ ipv6_push_frag_opts(skb, opt, &proto); ++ if (opt->opt_nflen) ++ ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); ++ } ++ ++ skb_push(skb, sizeof(struct ipv6hdr)); ++ skb_reset_network_header(skb); ++ hdr = ipv6_hdr(skb); ++ ++ /* ++ * Fill in the IPv6 header ++ */ ++ if (np) ++ hlimit = np->hop_limit; ++ if (hlimit < 0) ++ hlimit = ip6_dst_hoplimit(dst); ++ ++ ip6_flow_hdr(hdr, tclass, fl6->flowlabel); ++ ++ hdr->payload_len = htons(seg_len); ++ hdr->nexthdr = proto; ++ hdr->hop_limit = hlimit; ++ ++ hdr->saddr = fl6->saddr; ++ hdr->daddr = *first_hop; ++ ++ skb->priority = sk->sk_priority; ++ skb->mark = sk->sk_mark; ++ ++ mtu = dst_mtu(dst); ++ if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { ++ IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_OUT, skb->len); ++ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, ++ dst->dev, dst_output); ++ } ++ ++ skb->dev = dst->dev; ++ ipv6_local_error(sk, EMSGSIZE, fl6, mtu); ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); ++ kfree_skb(skb); ++ return -EMSGSIZE; ++} ++ ++EXPORT_SYMBOL(ip6_xmit); ++ ++static int ip6_call_ra_chain(struct sk_buff *skb, int sel) ++{ ++ struct ip6_ra_chain *ra; ++ struct sock *last = NULL; ++ ++ read_lock(&ip6_ra_lock); ++ for (ra = ip6_ra_chain; ra; ra = ra->next) { ++ struct sock *sk = ra->sk; ++ if (sk && ra->sel == sel && ++ (!sk->sk_bound_dev_if || ++ sk->sk_bound_dev_if == skb->dev->ifindex)) { ++ if (last) { ++ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); ++ if (skb2) ++ rawv6_rcv(last, skb2); ++ } ++ last = sk; ++ } ++ } ++ ++ if (last) { ++ rawv6_rcv(last, skb); ++ read_unlock(&ip6_ra_lock); ++ return 1; ++ } ++ read_unlock(&ip6_ra_lock); ++ return 0; ++} ++ ++static int ip6_forward_proxy_check(struct sk_buff *skb) ++{ ++ struct ipv6hdr *hdr = ipv6_hdr(skb); ++ u8 nexthdr = hdr->nexthdr; ++ __be16 frag_off; ++ int offset; ++ ++ if (ipv6_ext_hdr(nexthdr)) { ++ offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); ++ if (offset < 0) ++ return 0; ++ } else ++ offset = sizeof(struct ipv6hdr); ++ ++ if (nexthdr == IPPROTO_ICMPV6) { ++ struct icmp6hdr *icmp6; ++ ++ if (!pskb_may_pull(skb, (skb_network_header(skb) + ++ offset + 1 - skb->data))) ++ return 0; ++ ++ icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); ++ ++ switch (icmp6->icmp6_type) { ++ case NDISC_ROUTER_SOLICITATION: ++ case NDISC_ROUTER_ADVERTISEMENT: ++ case NDISC_NEIGHBOUR_SOLICITATION: ++ case NDISC_NEIGHBOUR_ADVERTISEMENT: ++ case NDISC_REDIRECT: ++ /* For reaction involving unicast neighbor discovery ++ * message destined to the proxied address, pass it to ++ * input function. ++ */ ++ return 1; ++ default: ++ break; ++ } ++ } ++ ++ /* ++ * The proxying router can't forward traffic sent to a link-local ++ * address, so signal the sender and discard the packet. This ++ * behavior is clarified by the MIPv6 specification. ++ */ ++ if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { ++ dst_link_failure(skb); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static inline int ip6_forward_finish(struct sk_buff *skb) ++{ ++ return dst_output(skb); ++} ++ ++int ip6_forward(struct sk_buff *skb) ++{ ++ struct dst_entry *dst = skb_dst(skb); ++ struct ipv6hdr *hdr = ipv6_hdr(skb); ++ struct inet6_skb_parm *opt = IP6CB(skb); ++ struct net *net = dev_net(dst->dev); ++ u32 mtu; ++ ++ if (net->ipv6.devconf_all->forwarding == 0) ++ goto error; ++ ++ if (skb_warn_if_lro(skb)) ++ goto drop; ++ ++ if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { ++ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); ++ goto drop; ++ } ++ ++ if (skb->pkt_type != PACKET_HOST) ++ goto drop; ++ ++ skb_forward_csum(skb); ++ ++ /* ++ * We DO NOT make any processing on ++ * RA packets, pushing them to user level AS IS ++ * without ane WARRANTY that application will be able ++ * to interpret them. The reason is that we ++ * cannot make anything clever here. ++ * ++ * We are not end-node, so that if packet contains ++ * AH/ESP, we cannot make anything. ++ * Defragmentation also would be mistake, RA packets ++ * cannot be fragmented, because there is no warranty ++ * that different fragments will go along one path. --ANK ++ */ ++ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { ++ if (ip6_call_ra_chain(skb, ntohs(opt->ra))) ++ return 0; ++ } ++ ++ /* ++ * check and decrement ttl ++ */ ++ if (hdr->hop_limit <= 1) { ++ /* Force OUTPUT device used as source address */ ++ skb->dev = dst->dev; ++ icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); ++ IP6_INC_STATS_BH(net, ++ ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); ++ ++ kfree_skb(skb); ++ return -ETIMEDOUT; ++ } ++ ++ /* XXX: idev->cnf.proxy_ndp? */ ++ if (net->ipv6.devconf_all->proxy_ndp && ++ pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { ++ int proxied = ip6_forward_proxy_check(skb); ++ if (proxied > 0) ++ return ip6_input(skb); ++ else if (proxied < 0) { ++ IP6_INC_STATS(net, ip6_dst_idev(dst), ++ IPSTATS_MIB_INDISCARDS); ++ goto drop; ++ } ++ } ++ ++ if (!xfrm6_route_forward(skb)) { ++ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); ++ goto drop; ++ } ++ dst = skb_dst(skb); ++ ++ /* IPv6 specs say nothing about it, but it is clear that we cannot ++ send redirects to source routed frames. ++ We don't send redirects to frames decapsulated from IPsec. ++ */ ++ if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { ++ struct in6_addr *target = NULL; ++ struct inet_peer *peer; ++ struct rt6_info *rt; ++ ++ /* ++ * incoming and outgoing devices are the same ++ * send a redirect. ++ */ ++ ++ rt = (struct rt6_info *) dst; ++ if (rt->rt6i_flags & RTF_GATEWAY) ++ target = &rt->rt6i_gateway; ++ else ++ target = &hdr->daddr; ++ ++ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); ++ ++ /* Limit redirects both by destination (here) ++ and by source (inside ndisc_send_redirect) ++ */ ++ if (inet_peer_xrlim_allow(peer, 1*HZ)) ++ ndisc_send_redirect(skb, target); ++ if (peer) ++ inet_putpeer(peer); ++ } else { ++ int addrtype = ipv6_addr_type(&hdr->saddr); ++ ++ /* This check is security critical. */ ++ if (addrtype == IPV6_ADDR_ANY || ++ addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) ++ goto error; ++ if (addrtype & IPV6_ADDR_LINKLOCAL) { ++ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ++ ICMPV6_NOT_NEIGHBOUR, 0); ++ goto error; ++ } ++ } ++ ++ mtu = dst_mtu(dst); ++ if (mtu < IPV6_MIN_MTU) ++ mtu = IPV6_MIN_MTU; ++ ++ if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || ++ (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { ++ /* Again, force OUTPUT device used as source address */ ++ skb->dev = dst->dev; ++ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ++ IP6_INC_STATS_BH(net, ++ ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); ++ IP6_INC_STATS_BH(net, ++ ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); ++ kfree_skb(skb); ++ return -EMSGSIZE; ++ } ++ ++ if (skb_cow(skb, dst->dev->hard_header_len)) { ++ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); ++ goto drop; ++ } ++ ++ hdr = ipv6_hdr(skb); ++ ++ /* Mangling hops number delayed to point after skb COW */ ++ ++ hdr->hop_limit--; ++ ++ IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); ++ IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); ++ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ++ ip6_forward_finish); ++ ++error: ++ IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); ++drop: ++ kfree_skb(skb); ++ return -EINVAL; ++} ++ ++static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) ++{ ++ to->pkt_type = from->pkt_type; ++ to->priority = from->priority; ++ to->protocol = from->protocol; ++ skb_dst_drop(to); ++ skb_dst_set(to, dst_clone(skb_dst(from))); ++ to->dev = from->dev; ++ to->mark = from->mark; ++ ++#ifdef CONFIG_NET_SCHED ++ to->tc_index = from->tc_index; ++#endif ++ nf_copy(to, from); ++#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) ++ to->nf_trace = from->nf_trace; ++#endif ++ skb_copy_secmark(to, from); ++} ++ ++int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ++{ ++ struct sk_buff *frag; ++ struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); ++ struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; ++ struct ipv6hdr *tmp_hdr; ++ struct frag_hdr *fh; ++ unsigned int mtu, hlen, left, len; ++ int hroom, troom; ++ __be32 frag_id = 0; ++ int ptr, offset = 0, err=0; ++ u8 *prevhdr, nexthdr = 0; ++ struct net *net = dev_net(skb_dst(skb)->dev); ++ ++ hlen = ip6_find_1stfragopt(skb, &prevhdr); ++ nexthdr = *prevhdr; ++ ++ mtu = ip6_skb_dst_mtu(skb); ++ ++ /* We must not fragment if the socket is set to force MTU discovery ++ * or if the skb it not generated by a local socket. ++ */ ++ if (unlikely(!skb->local_df && skb->len > mtu) || ++ (IP6CB(skb)->frag_max_size && ++ IP6CB(skb)->frag_max_size > mtu)) { ++ if (skb->sk && dst_allfrag(skb_dst(skb))) ++ sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); ++ ++ skb->dev = skb_dst(skb)->dev; ++ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_FRAGFAILS); ++ kfree_skb(skb); ++ return -EMSGSIZE; ++ } ++ ++ if (np && np->frag_size < mtu) { ++ if (np->frag_size) ++ mtu = np->frag_size; ++ } ++ mtu -= hlen + sizeof(struct frag_hdr); ++ ++ if (skb_has_frag_list(skb)) { ++ int first_len = skb_pagelen(skb); ++ struct sk_buff *frag2; ++ ++ if (first_len - hlen > mtu || ++ ((first_len - hlen) & 7) || ++ skb_cloned(skb)) ++ goto slow_path; ++ ++ skb_walk_frags(skb, frag) { ++ /* Correct geometry. */ ++ if (frag->len > mtu || ++ ((frag->len & 7) && frag->next) || ++ skb_headroom(frag) < hlen) ++ goto slow_path_clean; ++ ++ /* Partially cloned skb? */ ++ if (skb_shared(frag)) ++ goto slow_path_clean; ++ ++ BUG_ON(frag->sk); ++ if (skb->sk) { ++ frag->sk = skb->sk; ++ frag->destructor = sock_wfree; ++ } ++ skb->truesize -= frag->truesize; ++ } ++ ++ err = 0; ++ offset = 0; ++ frag = skb_shinfo(skb)->frag_list; ++ skb_frag_list_init(skb); ++ /* BUILD HEADER */ ++ ++ *prevhdr = NEXTHDR_FRAGMENT; ++ tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); ++ if (!tmp_hdr) { ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_FRAGFAILS); ++ return -ENOMEM; ++ } ++ ++ __skb_pull(skb, hlen); ++ fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); ++ __skb_push(skb, hlen); ++ skb_reset_network_header(skb); ++ memcpy(skb_network_header(skb), tmp_hdr, hlen); ++ ++ ipv6_select_ident(fh, rt); ++ fh->nexthdr = nexthdr; ++ fh->reserved = 0; ++ fh->frag_off = htons(IP6_MF); ++ frag_id = fh->identification; ++ ++ first_len = skb_pagelen(skb); ++ skb->data_len = first_len - skb_headlen(skb); ++ skb->len = first_len; ++ ipv6_hdr(skb)->payload_len = htons(first_len - ++ sizeof(struct ipv6hdr)); ++ ++ dst_hold(&rt->dst); ++ ++ for (;;) { ++ /* Prepare header of the next frame, ++ * before previous one went down. */ ++ if (frag) { ++ frag->ip_summed = CHECKSUM_NONE; ++ skb_reset_transport_header(frag); ++ fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); ++ __skb_push(frag, hlen); ++ skb_reset_network_header(frag); ++ memcpy(skb_network_header(frag), tmp_hdr, ++ hlen); ++ offset += skb->len - hlen - sizeof(struct frag_hdr); ++ fh->nexthdr = nexthdr; ++ fh->reserved = 0; ++ fh->frag_off = htons(offset); ++ if (frag->next != NULL) ++ fh->frag_off |= htons(IP6_MF); ++ fh->identification = frag_id; ++ ipv6_hdr(frag)->payload_len = ++ htons(frag->len - ++ sizeof(struct ipv6hdr)); ++ ip6_copy_metadata(frag, skb); ++ } ++ ++ err = output(skb); ++ if(!err) ++ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), ++ IPSTATS_MIB_FRAGCREATES); ++ ++ if (err || !frag) ++ break; ++ ++ skb = frag; ++ frag = skb->next; ++ skb->next = NULL; ++ } ++ ++ kfree(tmp_hdr); ++ ++ if (err == 0) { ++ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), ++ IPSTATS_MIB_FRAGOKS); ++ ip6_rt_put(rt); ++ return 0; ++ } ++ ++ while (frag) { ++ skb = frag->next; ++ kfree_skb(frag); ++ frag = skb; ++ } ++ ++ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), ++ IPSTATS_MIB_FRAGFAILS); ++ ip6_rt_put(rt); ++ return err; ++ ++slow_path_clean: ++ skb_walk_frags(skb, frag2) { ++ if (frag2 == frag) ++ break; ++ frag2->sk = NULL; ++ frag2->destructor = NULL; ++ skb->truesize += frag2->truesize; ++ } ++ } ++ ++slow_path: ++ if ((skb->ip_summed == CHECKSUM_PARTIAL) && ++ skb_checksum_help(skb)) ++ goto fail; ++ ++ left = skb->len - hlen; /* Space per frame */ ++ ptr = hlen; /* Where to start from */ ++ ++ /* ++ * Fragment the datagram. ++ */ ++ ++ *prevhdr = NEXTHDR_FRAGMENT; ++ hroom = LL_RESERVED_SPACE(rt->dst.dev); ++ troom = rt->dst.dev->needed_tailroom; ++ ++ /* ++ * Keep copying data until we run out. ++ */ ++ while(left > 0) { ++ len = left; ++ /* IF: it doesn't fit, use 'mtu' - the data space left */ ++ if (len > mtu) ++ len = mtu; ++ /* IF: we are not sending up to and including the packet end ++ then align the next start on an eight byte boundary */ ++ if (len < left) { ++ len &= ~7; ++ } ++ /* ++ * Allocate buffer. ++ */ ++ ++ if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + ++ hroom + troom, GFP_ATOMIC)) == NULL) { ++ NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_FRAGFAILS); ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ /* ++ * Set up data on packet ++ */ ++ ++ ip6_copy_metadata(frag, skb); ++ skb_reserve(frag, hroom); ++ skb_put(frag, len + hlen + sizeof(struct frag_hdr)); ++ skb_reset_network_header(frag); ++ fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); ++ frag->transport_header = (frag->network_header + hlen + ++ sizeof(struct frag_hdr)); ++ ++ /* ++ * Charge the memory for the fragment to any owner ++ * it might possess ++ */ ++ if (skb->sk) ++ skb_set_owner_w(frag, skb->sk); ++ ++ /* ++ * Copy the packet header into the new buffer. ++ */ ++ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); ++ ++ /* ++ * Build fragment header. ++ */ ++ fh->nexthdr = nexthdr; ++ fh->reserved = 0; ++ if (!frag_id) { ++ ipv6_select_ident(fh, rt); ++ frag_id = fh->identification; ++ } else ++ fh->identification = frag_id; ++ ++ /* ++ * Copy a block of the IP datagram. ++ */ ++ if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) ++ BUG(); ++ left -= len; ++ ++ fh->frag_off = htons(offset); ++ if (left > 0) ++ fh->frag_off |= htons(IP6_MF); ++ ipv6_hdr(frag)->payload_len = htons(frag->len - ++ sizeof(struct ipv6hdr)); ++ ++ ptr += len; ++ offset += len; ++ ++ /* ++ * Put this fragment into the sending queue. ++ */ ++ err = output(frag); ++ if (err) ++ goto fail; ++ ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_FRAGCREATES); ++ } ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_FRAGOKS); ++ consume_skb(skb); ++ return err; ++ ++fail: ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_FRAGFAILS); ++ kfree_skb(skb); ++ return err; ++} ++ ++static inline int ip6_rt_check(const struct rt6key *rt_key, ++ const struct in6_addr *fl_addr, ++ const struct in6_addr *addr_cache) ++{ ++ return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && ++ (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); ++} ++ ++static struct dst_entry *ip6_sk_dst_check(struct sock *sk, ++ struct dst_entry *dst, ++ const struct flowi6 *fl6) ++{ ++ struct ipv6_pinfo *np = inet6_sk(sk); ++ struct rt6_info *rt; ++ ++ if (!dst) ++ goto out; ++ ++ if (dst->ops->family != AF_INET6) { ++ dst_release(dst); ++ return NULL; ++ } ++ ++ rt = (struct rt6_info *)dst; ++ /* Yes, checking route validity in not connected ++ * case is not very simple. Take into account, ++ * that we do not support routing by source, TOS, ++ * and MSG_DONTROUTE --ANK (980726) ++ * ++ * 1. ip6_rt_check(): If route was host route, ++ * check that cached destination is current. ++ * If it is network route, we still may ++ * check its validity using saved pointer ++ * to the last used address: daddr_cache. ++ * We do not want to save whole address now, ++ * (because main consumer of this service ++ * is tcp, which has not this problem), ++ * so that the last trick works only on connected ++ * sockets. ++ * 2. oif also should be the same. ++ */ ++ if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || ++#ifdef CONFIG_IPV6_SUBTREES ++ ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || ++#endif ++ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { ++ dst_release(dst); ++ dst = NULL; ++ } ++ ++out: ++ return dst; ++} ++ ++static int ip6_dst_lookup_tail(struct sock *sk, ++ struct dst_entry **dst, struct flowi6 *fl6) ++{ ++ struct net *net = sock_net(sk); ++#ifdef CONFIG_IPV6_OPTIMISTIC_DAD ++ struct neighbour *n; ++ struct rt6_info *rt; ++#endif ++ int err; ++ ++ if (*dst == NULL) ++ *dst = ip6_route_output(net, sk, fl6); ++ ++ if ((err = (*dst)->error)) ++ goto out_err_release; ++ ++ if (ipv6_addr_any(&fl6->saddr)) { ++ struct rt6_info *rt = (struct rt6_info *) *dst; ++ err = ip6_route_get_saddr(net, rt, &fl6->daddr, ++ sk ? inet6_sk(sk)->srcprefs : 0, ++ &fl6->saddr); ++ if (err) ++ goto out_err_release; ++ } ++ ++#ifdef CONFIG_IPV6_OPTIMISTIC_DAD ++ /* ++ * Here if the dst entry we've looked up ++ * has a neighbour entry that is in the INCOMPLETE ++ * state and the src address from the flow is ++ * marked as OPTIMISTIC, we release the found ++ * dst entry and replace it instead with the ++ * dst entry of the nexthop router ++ */ ++ rt = (struct rt6_info *) *dst; ++ rcu_read_lock_bh(); ++ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); ++ err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; ++ rcu_read_unlock_bh(); ++ ++ if (err) { ++ struct inet6_ifaddr *ifp; ++ struct flowi6 fl_gw6; ++ int redirect; ++ ++ ifp = ipv6_get_ifaddr(net, &fl6->saddr, ++ (*dst)->dev, 1); ++ ++ redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); ++ if (ifp) ++ in6_ifa_put(ifp); ++ ++ if (redirect) { ++ /* ++ * We need to get the dst entry for the ++ * default router instead ++ */ ++ dst_release(*dst); ++ memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); ++ memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); ++ *dst = ip6_route_output(net, sk, &fl_gw6); ++ if ((err = (*dst)->error)) ++ goto out_err_release; ++ } ++ } ++#endif ++ ++ return 0; ++ ++out_err_release: ++ if (err == -ENETUNREACH) ++ IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); ++ dst_release(*dst); ++ *dst = NULL; ++ return err; ++} ++ ++/** ++ * ip6_dst_lookup - perform route lookup on flow ++ * @sk: socket which provides route info ++ * @dst: pointer to dst_entry * for result ++ * @fl6: flow to lookup ++ * ++ * This function performs a route lookup on the given flow. ++ * ++ * It returns zero on success, or a standard errno code on error. ++ */ ++int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) ++{ ++ *dst = NULL; ++ return ip6_dst_lookup_tail(sk, dst, fl6); ++} ++EXPORT_SYMBOL_GPL(ip6_dst_lookup); ++ ++/** ++ * ip6_dst_lookup_flow - perform route lookup on flow with ipsec ++ * @sk: socket which provides route info ++ * @fl6: flow to lookup ++ * @final_dst: final destination address for ipsec lookup ++ * @can_sleep: we are in a sleepable context ++ * ++ * This function performs a route lookup on the given flow. ++ * ++ * It returns a valid dst pointer on success, or a pointer encoded ++ * error code. ++ */ ++struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, ++ const struct in6_addr *final_dst, ++ bool can_sleep) ++{ ++ struct dst_entry *dst = NULL; ++ int err; ++ ++ err = ip6_dst_lookup_tail(sk, &dst, fl6); ++ if (err) ++ return ERR_PTR(err); ++ if (final_dst) ++ fl6->daddr = *final_dst; ++ if (can_sleep) ++ fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; ++ ++ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); ++} ++EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); ++ ++/** ++ * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow ++ * @sk: socket which provides the dst cache and route info ++ * @fl6: flow to lookup ++ * @final_dst: final destination address for ipsec lookup ++ * @can_sleep: we are in a sleepable context ++ * ++ * This function performs a route lookup on the given flow with the ++ * possibility of using the cached route in the socket if it is valid. ++ * It will take the socket dst lock when operating on the dst cache. ++ * As a result, this function can only be used in process context. ++ * ++ * It returns a valid dst pointer on success, or a pointer encoded ++ * error code. ++ */ ++struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, ++ const struct in6_addr *final_dst, ++ bool can_sleep) ++{ ++ struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); ++ int err; ++ ++ dst = ip6_sk_dst_check(sk, dst, fl6); ++ ++ err = ip6_dst_lookup_tail(sk, &dst, fl6); ++ if (err) ++ return ERR_PTR(err); ++ if (final_dst) ++ fl6->daddr = *final_dst; ++ if (can_sleep) ++ fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; ++ ++ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); ++} ++EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); ++ ++static inline int ip6_ufo_append_data(struct sock *sk, ++ int getfrag(void *from, char *to, int offset, int len, ++ int odd, struct sk_buff *skb), ++ void *from, int length, int hh_len, int fragheaderlen, ++ int transhdrlen, int mtu,unsigned int flags, ++ struct rt6_info *rt) ++ ++{ ++ struct sk_buff *skb; ++ int err; ++ ++ /* There is support for UDP large send offload by network ++ * device, so create one single skb packet containing complete ++ * udp datagram ++ */ ++ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { ++ struct frag_hdr fhdr; ++ ++ skb = sock_alloc_send_skb(sk, ++ hh_len + fragheaderlen + transhdrlen + 20, ++ (flags & MSG_DONTWAIT), &err); ++ if (skb == NULL) ++ return err; ++ ++ /* reserve space for Hardware header */ ++ skb_reserve(skb, hh_len); ++ ++ /* create space for UDP/IP header */ ++ skb_put(skb,fragheaderlen + transhdrlen); ++ ++ /* initialize network header pointer */ ++ skb_reset_network_header(skb); ++ ++ /* initialize protocol header pointer */ ++ skb->transport_header = skb->network_header + fragheaderlen; ++ ++ skb->ip_summed = CHECKSUM_PARTIAL; ++ skb->csum = 0; ++ ++ /* Specify the length of each IPv6 datagram fragment. ++ * It has to be a multiple of 8. ++ */ ++ skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - ++ sizeof(struct frag_hdr)) & ~7; ++ skb_shinfo(skb)->gso_type = SKB_GSO_UDP; ++ ipv6_select_ident(&fhdr, rt); ++ skb_shinfo(skb)->ip6_frag_id = fhdr.identification; ++ __skb_queue_tail(&sk->sk_write_queue, skb); ++ } ++ ++ return skb_append_datato_frags(sk, skb, getfrag, from, ++ (length - transhdrlen)); ++} ++ ++static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, ++ gfp_t gfp) ++{ ++ return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; ++} ++ ++static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, ++ gfp_t gfp) ++{ ++ return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; ++} ++ ++static void ip6_append_data_mtu(unsigned int *mtu, ++ int *maxfraglen, ++ unsigned int fragheaderlen, ++ struct sk_buff *skb, ++ struct rt6_info *rt, ++ bool pmtuprobe) ++{ ++ if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { ++ if (skb == NULL) { ++ /* first fragment, reserve header_len */ ++ *mtu = *mtu - rt->dst.header_len; ++ ++ } else { ++ /* ++ * this fragment is not first, the headers ++ * space is regarded as data space. ++ */ ++ *mtu = min(*mtu, pmtuprobe ? ++ rt->dst.dev->mtu : ++ dst_mtu(rt->dst.path)); ++ } ++ *maxfraglen = ((*mtu - fragheaderlen) & ~7) ++ + fragheaderlen - sizeof(struct frag_hdr); ++ } ++} ++ ++int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ++ int offset, int len, int odd, struct sk_buff *skb), ++ void *from, int length, int transhdrlen, ++ int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, ++ struct rt6_info *rt, unsigned int flags, int dontfrag) ++{ ++ struct inet_sock *inet = inet_sk(sk); ++ struct ipv6_pinfo *np = inet6_sk(sk); ++ struct inet_cork *cork; ++ struct sk_buff *skb, *skb_prev = NULL; ++ unsigned int maxfraglen, fragheaderlen, mtu; ++ int exthdrlen; ++ int dst_exthdrlen; ++ int hh_len; ++ int copy; ++ int err; ++ int offset = 0; ++ __u8 tx_flags = 0; ++ ++ if (flags&MSG_PROBE) ++ return 0; ++ cork = &inet->cork.base; ++ if (skb_queue_empty(&sk->sk_write_queue)) { ++ /* ++ * setup for corking ++ */ ++ if (opt) { ++ if (WARN_ON(np->cork.opt)) ++ return -EINVAL; ++ ++ np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); ++ if (unlikely(np->cork.opt == NULL)) ++ return -ENOBUFS; ++ ++ np->cork.opt->tot_len = opt->tot_len; ++ np->cork.opt->opt_flen = opt->opt_flen; ++ np->cork.opt->opt_nflen = opt->opt_nflen; ++ ++ np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, ++ sk->sk_allocation); ++ if (opt->dst0opt && !np->cork.opt->dst0opt) ++ return -ENOBUFS; ++ ++ np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, ++ sk->sk_allocation); ++ if (opt->dst1opt && !np->cork.opt->dst1opt) ++ return -ENOBUFS; ++ ++ np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, ++ sk->sk_allocation); ++ if (opt->hopopt && !np->cork.opt->hopopt) ++ return -ENOBUFS; ++ ++ np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, ++ sk->sk_allocation); ++ if (opt->srcrt && !np->cork.opt->srcrt) ++ return -ENOBUFS; ++ ++ /* need source address above miyazawa*/ ++ } ++ dst_hold(&rt->dst); ++ cork->dst = &rt->dst; ++ inet->cork.fl.u.ip6 = *fl6; ++ np->cork.hop_limit = hlimit; ++ np->cork.tclass = tclass; ++ if (rt->dst.flags & DST_XFRM_TUNNEL) ++ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? ++ rt->dst.dev->mtu : dst_mtu(&rt->dst); ++ else ++ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? ++ rt->dst.dev->mtu : dst_mtu(rt->dst.path); ++ if (np->frag_size < mtu) { ++ if (np->frag_size) ++ mtu = np->frag_size; ++ } ++ cork->fragsize = mtu; ++ if (dst_allfrag(rt->dst.path)) ++ cork->flags |= IPCORK_ALLFRAG; ++ cork->length = 0; ++ exthdrlen = (opt ? opt->opt_flen : 0); ++ length += exthdrlen; ++ transhdrlen += exthdrlen; ++ dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; ++ } else { ++ rt = (struct rt6_info *)cork->dst; ++ fl6 = &inet->cork.fl.u.ip6; ++ opt = np->cork.opt; ++ transhdrlen = 0; ++ exthdrlen = 0; ++ dst_exthdrlen = 0; ++ mtu = cork->fragsize; ++ } ++ ++ hh_len = LL_RESERVED_SPACE(rt->dst.dev); ++ ++ fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + ++ (opt ? opt->opt_nflen : 0); ++ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); ++ ++ if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { ++ if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { ++ ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); ++ return -EMSGSIZE; ++ } ++ } ++ ++ /* For UDP, check if TX timestamp is enabled */ ++ if (sk->sk_type == SOCK_DGRAM) ++ sock_tx_timestamp(sk, &tx_flags); ++ ++ /* ++ * Let's try using as much space as possible. ++ * Use MTU if total length of the message fits into the MTU. ++ * Otherwise, we need to reserve fragment header and ++ * fragment alignment (= 8-15 octects, in total). ++ * ++ * Note that we may need to "move" the data from the tail of ++ * of the buffer to the new fragment when we split ++ * the message. ++ * ++ * FIXME: It may be fragmented into multiple chunks ++ * at once if non-fragmentable extension headers ++ * are too large. ++ * --yoshfuji ++ */ ++ ++ if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || ++ sk->sk_protocol == IPPROTO_RAW)) { ++ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); ++ return -EMSGSIZE; ++ } ++ ++ skb = skb_peek_tail(&sk->sk_write_queue); ++ cork->length += length; ++ if (((length > mtu) || ++ (skb && skb_has_frags(skb))) && ++ (sk->sk_protocol == IPPROTO_UDP) && ++ (rt->dst.dev->features & NETIF_F_UFO)) { ++ err = ip6_ufo_append_data(sk, getfrag, from, length, ++ hh_len, fragheaderlen, ++ transhdrlen, mtu, flags, rt); ++ if (err) ++ goto error; ++ return 0; ++ } ++ ++ if (!skb) ++ goto alloc_new_skb; ++ ++ while (length > 0) { ++ /* Check if the remaining data fits into current packet. */ ++ copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; ++ if (copy < length) ++ copy = maxfraglen - skb->len; ++ ++ if (copy <= 0) { ++ char *data; ++ unsigned int datalen; ++ unsigned int fraglen; ++ unsigned int fraggap; ++ unsigned int alloclen; ++alloc_new_skb: ++ /* There's no room in the current skb */ ++ if (skb) ++ fraggap = skb->len - maxfraglen; ++ else ++ fraggap = 0; ++ /* update mtu and maxfraglen if necessary */ ++ if (skb == NULL || skb_prev == NULL) ++ ip6_append_data_mtu(&mtu, &maxfraglen, ++ fragheaderlen, skb, rt, ++ np->pmtudisc == ++ IPV6_PMTUDISC_PROBE); ++ ++ skb_prev = skb; ++ ++ /* ++ * If remaining data exceeds the mtu, ++ * we know we need more fragment(s). ++ */ ++ datalen = length + fraggap; ++ ++ if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) ++ datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; ++ if ((flags & MSG_MORE) && ++ !(rt->dst.dev->features&NETIF_F_SG)) ++ alloclen = mtu; ++ else ++ alloclen = datalen + fragheaderlen; ++ ++ alloclen += dst_exthdrlen; ++ ++ if (datalen != length + fraggap) { ++ /* ++ * this is not the last fragment, the trailer ++ * space is regarded as data space. ++ */ ++ datalen += rt->dst.trailer_len; ++ } ++ ++ alloclen += rt->dst.trailer_len; ++ fraglen = datalen + fragheaderlen; ++ ++ /* ++ * We just reserve space for fragment header. ++ * Note: this may be overallocation if the message ++ * (without MSG_MORE) fits into the MTU. ++ */ ++ alloclen += sizeof(struct frag_hdr); ++ ++ if (transhdrlen) { ++ skb = sock_alloc_send_skb(sk, ++ alloclen + hh_len, ++ (flags & MSG_DONTWAIT), &err); ++ } else { ++ skb = NULL; ++ if (atomic_read(&sk->sk_wmem_alloc) <= ++ 2 * sk->sk_sndbuf) ++ skb = sock_wmalloc(sk, ++ alloclen + hh_len, 1, ++ sk->sk_allocation); ++ if (unlikely(skb == NULL)) ++ err = -ENOBUFS; ++ else { ++ /* Only the initial fragment ++ * is time stamped. ++ */ ++ tx_flags = 0; ++ } ++ } ++ if (skb == NULL) ++ goto error; ++ /* ++ * Fill in the control structures ++ */ ++ skb->ip_summed = CHECKSUM_NONE; ++ skb->csum = 0; ++ /* reserve for fragmentation and ipsec header */ ++ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + ++ dst_exthdrlen); ++ ++ if (sk->sk_type == SOCK_DGRAM) ++ skb_shinfo(skb)->tx_flags = tx_flags; ++ ++ /* ++ * Find where to start putting bytes ++ */ ++ data = skb_put(skb, fraglen); ++ skb_set_network_header(skb, exthdrlen); ++ data += fragheaderlen; ++ skb->transport_header = (skb->network_header + ++ fragheaderlen); ++ if (fraggap) { ++ skb->csum = skb_copy_and_csum_bits( ++ skb_prev, maxfraglen, ++ data + transhdrlen, fraggap, 0); ++ skb_prev->csum = csum_sub(skb_prev->csum, ++ skb->csum); ++ data += fraggap; ++ pskb_trim_unique(skb_prev, maxfraglen); ++ } ++ copy = datalen - transhdrlen - fraggap; ++ ++ if (copy < 0) { ++ err = -EINVAL; ++ kfree_skb(skb); ++ goto error; ++ } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { ++ err = -EFAULT; ++ kfree_skb(skb); ++ goto error; ++ } ++ ++ offset += copy; ++ length -= datalen - fraggap; ++ transhdrlen = 0; ++ exthdrlen = 0; ++ dst_exthdrlen = 0; ++ ++ /* ++ * Put the packet on the pending queue ++ */ ++ __skb_queue_tail(&sk->sk_write_queue, skb); ++ continue; ++ } ++ ++ if (copy > length) ++ copy = length; ++ ++ if (!(rt->dst.dev->features&NETIF_F_SG)) { ++ unsigned int off; ++ ++ off = skb->len; ++ if (getfrag(from, skb_put(skb, copy), ++ offset, copy, off, skb) < 0) { ++ __skb_trim(skb, off); ++ err = -EFAULT; ++ goto error; ++ } ++ } else { ++ int i = skb_shinfo(skb)->nr_frags; ++ struct page_frag *pfrag = sk_page_frag(sk); ++ ++ err = -ENOMEM; ++ if (!sk_page_frag_refill(sk, pfrag)) ++ goto error; ++ ++ if (!skb_can_coalesce(skb, i, pfrag->page, ++ pfrag->offset)) { ++ err = -EMSGSIZE; ++ if (i == MAX_SKB_FRAGS) ++ goto error; ++ ++ __skb_fill_page_desc(skb, i, pfrag->page, ++ pfrag->offset, 0); ++ skb_shinfo(skb)->nr_frags = ++i; ++ get_page(pfrag->page); ++ } ++ copy = min_t(int, copy, pfrag->size - pfrag->offset); ++ if (getfrag(from, ++ page_address(pfrag->page) + pfrag->offset, ++ offset, copy, skb->len, skb) < 0) ++ goto error_efault; ++ ++ pfrag->offset += copy; ++ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); ++ skb->len += copy; ++ skb->data_len += copy; ++ skb->truesize += copy; ++ atomic_add(copy, &sk->sk_wmem_alloc); ++ } ++ offset += copy; ++ length -= copy; ++ } ++ ++ return 0; ++ ++error_efault: ++ err = -EFAULT; ++error: ++ cork->length -= length; ++ IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++ return err; ++} ++EXPORT_SYMBOL_GPL(ip6_append_data); ++ ++static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) ++{ ++ if (np->cork.opt) { ++ kfree(np->cork.opt->dst0opt); ++ kfree(np->cork.opt->dst1opt); ++ kfree(np->cork.opt->hopopt); ++ kfree(np->cork.opt->srcrt); ++ kfree(np->cork.opt); ++ np->cork.opt = NULL; ++ } ++ ++ if (inet->cork.base.dst) { ++ dst_release(inet->cork.base.dst); ++ inet->cork.base.dst = NULL; ++ inet->cork.base.flags &= ~IPCORK_ALLFRAG; ++ } ++ memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); ++} ++ ++int ip6_push_pending_frames(struct sock *sk) ++{ ++ struct sk_buff *skb, *tmp_skb; ++ struct sk_buff **tail_skb; ++ struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; ++ struct inet_sock *inet = inet_sk(sk); ++ struct ipv6_pinfo *np = inet6_sk(sk); ++ struct net *net = sock_net(sk); ++ struct ipv6hdr *hdr; ++ struct ipv6_txoptions *opt = np->cork.opt; ++ struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; ++ struct flowi6 *fl6 = &inet->cork.fl.u.ip6; ++ unsigned char proto = fl6->flowi6_proto; ++ int err = 0; ++ ++ if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) ++ goto out; ++ tail_skb = &(skb_shinfo(skb)->frag_list); ++ ++ /* move skb->data to ip header from ext header */ ++ if (skb->data < skb_network_header(skb)) ++ __skb_pull(skb, skb_network_offset(skb)); ++ while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { ++ __skb_pull(tmp_skb, skb_network_header_len(skb)); ++ *tail_skb = tmp_skb; ++ tail_skb = &(tmp_skb->next); ++ skb->len += tmp_skb->len; ++ skb->data_len += tmp_skb->len; ++ skb->truesize += tmp_skb->truesize; ++ tmp_skb->destructor = NULL; ++ tmp_skb->sk = NULL; ++ } ++ ++ /* Allow local fragmentation. */ ++ if (np->pmtudisc < IPV6_PMTUDISC_DO) ++ skb->local_df = 1; ++ ++ *final_dst = fl6->daddr; ++ __skb_pull(skb, skb_network_header_len(skb)); ++ if (opt && opt->opt_flen) ++ ipv6_push_frag_opts(skb, opt, &proto); ++ if (opt && opt->opt_nflen) ++ ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); ++ ++ skb_push(skb, sizeof(struct ipv6hdr)); ++ skb_reset_network_header(skb); ++ hdr = ipv6_hdr(skb); ++ ++ ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); ++ hdr->hop_limit = np->cork.hop_limit; ++ hdr->nexthdr = proto; ++ hdr->saddr = fl6->saddr; ++ hdr->daddr = *final_dst; ++ ++ skb->priority = sk->sk_priority; ++ skb->mark = sk->sk_mark; ++ ++ skb_dst_set(skb, dst_clone(&rt->dst)); ++ IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); ++ if (proto == IPPROTO_ICMPV6) { ++ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); ++ ++ ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); ++ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); ++ } ++ ++ err = ip6_local_out(skb); ++ if (err) { ++ if (err > 0) ++ err = net_xmit_errno(err); ++ if (err) ++ goto error; ++ } ++ ++out: ++ ip6_cork_release(inet, np); ++ return err; ++error: ++ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); ++ goto out; ++} ++EXPORT_SYMBOL_GPL(ip6_push_pending_frames); ++ ++void ip6_flush_pending_frames(struct sock *sk) ++{ ++ struct sk_buff *skb; ++ ++ while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { ++ if (skb_dst(skb)) ++ IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_OUTDISCARDS); ++ kfree_skb(skb); ++ } ++ ++ ip6_cork_release(inet_sk(sk), inet6_sk(sk)); ++} ++EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); +diff -Naur linux-3.10.30.org/net/netfilter/core.c linux-3.10.30/net/netfilter/core.c +--- linux-3.10.30.org/net/netfilter/core.c 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/netfilter/core.c 2014-02-14 20:29:05.392738001 +0100 +@@ -191,9 +191,11 @@ + ret = NF_DROP_GETERR(verdict); + if (ret == 0) + ret = -EPERM; +- } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { ++ } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE || ++ (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) { + int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, +- verdict >> NF_VERDICT_QBITS); ++ verdict >> NF_VERDICT_QBITS, ++ verdict & NF_VERDICT_MASK); + if (err < 0) { + if (err == -ECANCELED) + goto next_hook; +diff -Naur linux-3.10.30.org/net/netfilter/Kconfig linux-3.10.30/net/netfilter/Kconfig +--- linux-3.10.30.org/net/netfilter/Kconfig 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/netfilter/Kconfig 2014-02-14 20:29:05.396071847 +0100 +@@ -641,6 +641,18 @@ + + To compile it as a module, choose M here. If unsure, say N. + ++config NETFILTER_XT_TARGET_IMQ ++ tristate '"IMQ" target support' ++ depends on NETFILTER_XTABLES ++ depends on IP_NF_MANGLE || IP6_NF_MANGLE ++ select IMQ ++ default m if NETFILTER_ADVANCED=n ++ help ++ This option adds a `IMQ' target which is used to specify if and ++ to which imq device packets should get enqueued/dequeued. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ + config NETFILTER_XT_TARGET_MARK + tristate '"MARK" target support' + depends on NETFILTER_ADVANCED +diff -Naur linux-3.10.30.org/net/netfilter/Makefile linux-3.10.30/net/netfilter/Makefile +--- linux-3.10.30.org/net/netfilter/Makefile 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/netfilter/Makefile 2014-02-14 20:29:05.396071847 +0100 +@@ -82,6 +82,7 @@ + obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o + obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o + obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o ++obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o + obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o + obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o + obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o +diff -Naur linux-3.10.30.org/net/netfilter/nf_internals.h linux-3.10.30/net/netfilter/nf_internals.h +--- linux-3.10.30.org/net/netfilter/nf_internals.h 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/netfilter/nf_internals.h 2014-02-14 20:29:05.396071847 +0100 +@@ -29,7 +29,7 @@ + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), +- unsigned int queuenum); ++ unsigned int queuenum, unsigned int queuetype); + extern int __init netfilter_queue_init(void); + + /* nf_log.c */ +diff -Naur linux-3.10.30.org/net/netfilter/nf_queue.c linux-3.10.30/net/netfilter/nf_queue.c +--- linux-3.10.30.org/net/netfilter/nf_queue.c 2014-02-13 22:48:15.000000000 +0100 ++++ linux-3.10.30/net/netfilter/nf_queue.c 2014-02-14 20:29:05.396071847 +0100 +@@ -27,6 +27,23 @@ + */ + static const struct nf_queue_handler __rcu *queue_handler __read_mostly; + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++static const struct nf_queue_handler __rcu *queue_imq_handler __read_mostly; ++ ++void nf_register_queue_imq_handler(const struct nf_queue_handler *qh) ++{ ++ rcu_assign_pointer(queue_imq_handler, qh); ++} ++EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler); ++ ++void nf_unregister_queue_imq_handler(void) ++{ ++ RCU_INIT_POINTER(queue_imq_handler, NULL); ++ synchronize_rcu(); ++} ++EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler); ++#endif ++ + /* return EBUSY when somebody else is registered, return EEXIST if the + * same handler is registered, return 0 in case of success. */ + void nf_register_queue_handler(const struct nf_queue_handler *qh) +@@ -105,7 +122,8 @@ + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), +- unsigned int queuenum) ++ unsigned int queuenum, ++ unsigned int queuetype) + { + int status = -ENOENT; + struct nf_queue_entry *entry = NULL; +@@ -115,7 +133,17 @@ + /* QUEUE == DROP if no one is waiting, to be safe. */ + rcu_read_lock(); + +- qh = rcu_dereference(queue_handler); ++ if (queuetype == NF_IMQ_QUEUE) { ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ qh = rcu_dereference(queue_imq_handler); ++#else ++ BUG(); ++ goto err_unlock; ++#endif ++ } else { ++ qh = rcu_dereference(queue_handler); ++ } ++ + if (!qh) { + status = -ESRCH; + goto err_unlock; +@@ -205,9 +233,11 @@ + local_bh_enable(); + break; + case NF_QUEUE: ++ case NF_IMQ_QUEUE: + err = nf_queue(skb, elem, entry->pf, entry->hook, + entry->indev, entry->outdev, entry->okfn, +- verdict >> NF_VERDICT_QBITS); ++ verdict >> NF_VERDICT_QBITS, ++ verdict & NF_VERDICT_MASK); + if (err < 0) { + if (err == -ECANCELED) + goto next_hook; +diff -Naur linux-3.10.30.org/net/netfilter/xt_IMQ.c linux-3.10.30/net/netfilter/xt_IMQ.c +--- linux-3.10.30.org/net/netfilter/xt_IMQ.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-3.10.30/net/netfilter/xt_IMQ.c 2014-02-14 20:29:05.396071847 +0100 +@@ -0,0 +1,72 @@ ++/* ++ * This target marks packets to be enqueued to an imq device ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++static unsigned int imq_target(struct sk_buff *pskb, ++ const struct xt_action_param *par) ++{ ++ const struct xt_imq_info *mr = par->targinfo; ++ ++ pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE; ++ ++ return XT_CONTINUE; ++} ++ ++static int imq_checkentry(const struct xt_tgchk_param *par) ++{ ++ struct xt_imq_info *mr = par->targinfo; ++ ++ if (mr->todev > IMQ_MAX_DEVS - 1) { ++ pr_warn("IMQ: invalid device specified, highest is %u\n", ++ IMQ_MAX_DEVS - 1); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static struct xt_target xt_imq_reg[] __read_mostly = { ++ { ++ .name = "IMQ", ++ .family = AF_INET, ++ .checkentry = imq_checkentry, ++ .target = imq_target, ++ .targetsize = sizeof(struct xt_imq_info), ++ .table = "mangle", ++ .me = THIS_MODULE ++ }, ++ { ++ .name = "IMQ", ++ .family = AF_INET6, ++ .checkentry = imq_checkentry, ++ .target = imq_target, ++ .targetsize = sizeof(struct xt_imq_info), ++ .table = "mangle", ++ .me = THIS_MODULE ++ }, ++}; ++ ++static int __init imq_init(void) ++{ ++ return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); ++} ++ ++static void __exit imq_fini(void) ++{ ++ xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); ++} ++ ++module_init(imq_init); ++module_exit(imq_fini); ++ ++MODULE_AUTHOR("http://www.linuximq.net"); ++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information."); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS("ipt_IMQ"); ++MODULE_ALIAS("ip6t_IMQ"); ++ -- 2.39.2