From: Ben Schweikert Date: Thu, 16 Apr 2009 15:42:00 +0000 (+0000) Subject: Imported xen patches. X-Git-Url: http://git.ipfire.org/?p=people%2Fteissler%2Fipfire-2.x.git;a=commitdiff_plain;h=cc90b958b2203d9fa8e89656f95e7b3c31559f48 Imported xen patches. --- diff --git a/src/patches/60000_add-console-use-vt.patch1 b/src/patches/60000_add-console-use-vt.patch1 new file mode 100644 index 000000000..2fa3e9164 --- /dev/null +++ b/src/patches/60000_add-console-use-vt.patch1 @@ -0,0 +1,58 @@ +Subject: add console_use_vt +From: kraxel@suse.de +Patch-mainline: no + +$subject says all + +--- + drivers/char/tty_io.c | 7 ++++++- + include/linux/console.h | 1 + + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/char/tty_io.c ++++ b/drivers/char/tty_io.c +@@ -136,6 +136,8 @@ LIST_HEAD(tty_drivers); /* linked list + DEFINE_MUTEX(tty_mutex); + EXPORT_SYMBOL(tty_mutex); + ++int console_use_vt = 1; ++ + #ifdef CONFIG_UNIX98_PTYS + extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */ + static int ptmx_open(struct inode *, struct file *); +@@ -2200,7 +2202,7 @@ retry_open: + goto got_driver; + } + #ifdef CONFIG_VT +- if (device == MKDEV(TTY_MAJOR, 0)) { ++ if (console_use_vt && device == MKDEV(TTY_MAJOR, 0)) { + extern struct tty_driver *console_driver; + driver = console_driver; + index = fg_console; +@@ -3729,6 +3731,8 @@ static int __init tty_init(void) + #endif + + #ifdef CONFIG_VT ++ if (!console_use_vt) ++ goto out_vt; + cdev_init(&vc0_cdev, &console_fops); + if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) || + register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0) +@@ -3736,6 +3740,7 @@ static int __init tty_init(void) + device_create_drvdata(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0"); + + vty_init(); ++ out_vt: + #endif + return 0; + } +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -63,6 +63,7 @@ extern const struct consw dummy_con; /* + extern const struct consw vga_con; /* VGA text console */ + extern const struct consw newport_con; /* SGI Newport console */ + extern const struct consw prom_con; /* SPARC PROM console */ ++extern int console_use_vt; + + int con_is_bound(const struct consw *csw); + int register_con_driver(const struct consw *csw, int first, int last); diff --git a/src/patches/60001_linux-2.6.19-rc1-kexec-move_segment_code-i386.patch1 b/src/patches/60001_linux-2.6.19-rc1-kexec-move_segment_code-i386.patch1 new file mode 100644 index 000000000..1439e0349 --- /dev/null +++ b/src/patches/60001_linux-2.6.19-rc1-kexec-move_segment_code-i386.patch1 @@ -0,0 +1,178 @@ +Subject: kexec: Move asm segment handling code to the assembly file (i386) +From: http://xenbits.xensource.com/xen-unstable.hg (tip 13816) +Patch-mainline: obsolete + +This patch moves the idt, gdt, and segment handling code from machine_kexec.c +to relocate_kernel.S. The main reason behind this move is to avoid code +duplication in the Xen hypervisor. With this patch all code required to kexec +is put on the control page. + +On top of that this patch also counts as a cleanup - I think it is much +nicer to write assembly directly in assembly files than wrap inline assembly +in C functions for no apparent reason. + +Signed-off-by: Magnus Damm +Acked-by: jbeulich@novell.com +--- + + Applies to 2.6.19-rc1. + + machine_kexec.c | 59 ----------------------------------------------------- + relocate_kernel.S | 58 +++++++++++++++++++++++++++++++++++++++++++++++----- + 2 files changed, 53 insertions(+), 64 deletions(-) + +Index: head-2008-11-17/arch/x86/kernel/machine_kexec_32.c +=================================================================== +--- head-2008-11-17.orig/arch/x86/kernel/machine_kexec_32.c 2008-11-17 13:15:56.000000000 +0100 ++++ head-2008-11-17/arch/x86/kernel/machine_kexec_32.c 2008-11-17 13:38:03.000000000 +0100 +@@ -34,48 +34,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + +-static void set_idt(void *newidt, __u16 limit) +-{ +- struct desc_ptr curidt; +- +- /* ia32 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- load_idt(&curidt); +-} +- +- +-static void set_gdt(void *newgdt, __u16 limit) +-{ +- struct desc_ptr curgdt; +- +- /* ia32 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- load_gdt(&curgdt); +-} +- +-static void load_segments(void) +-{ +-#define __STR(X) #X +-#define STR(X) __STR(X) +- +- __asm__ __volatile__ ( +- "\tljmp $"STR(__KERNEL_CS)",$1f\n" +- "\t1:\n" +- "\tmovl $"STR(__KERNEL_DS)",%%eax\n" +- "\tmovl %%eax,%%ds\n" +- "\tmovl %%eax,%%es\n" +- "\tmovl %%eax,%%fs\n" +- "\tmovl %%eax,%%gs\n" +- "\tmovl %%eax,%%ss\n" +- ::: "eax", "memory"); +-#undef STR +-#undef __STR +-} +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -167,23 +125,6 @@ void machine_kexec(struct kimage *image) + page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) + << PAGE_SHIFT); + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + image->start = relocate_kernel_ptr((unsigned long)image->head, + (unsigned long)page_list, + image->start, cpu_has_pae, +Index: head-2008-11-17/arch/x86/kernel/relocate_kernel_32.S +=================================================================== +--- head-2008-11-17.orig/arch/x86/kernel/relocate_kernel_32.S 2008-11-17 13:15:56.000000000 +0100 ++++ head-2008-11-17/arch/x86/kernel/relocate_kernel_32.S 2008-11-17 13:38:03.000000000 +0100 +@@ -199,14 +199,45 @@ relocate_new_kernel: + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, %cr3 + ++ /* setup idt */ ++ movl %edi, %eax ++ addl $(idt_48 - relocate_kernel), %eax ++ lidtl (%eax) ++ ++ /* setup gdt */ ++ movl %edi, %eax ++ addl $(gdt - relocate_kernel), %eax ++ movl %edi, %esi ++ addl $((gdt_48 - relocate_kernel) + 2), %esi ++ movl %eax, (%esi) ++ ++ movl %edi, %eax ++ addl $(gdt_48 - relocate_kernel), %eax ++ lgdtl (%eax) ++ ++ /* setup data segment registers */ ++ mov $(gdt_ds - gdt), %eax ++ mov %eax, %ds ++ mov %eax, %es ++ mov %eax, %fs ++ mov %eax, %gs ++ mov %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea PAGE_SIZE(%edi), %esp + +- /* jump to identity mapped page */ +- movl %edi, %eax +- addl $(identity_mapped - relocate_kernel), %eax +- pushl %eax +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movl %edi, %esi ++ xorl %eax, %eax ++ pushl %eax ++ pushl %esi ++ pushl %eax ++ movl $(gdt_cs - gdt), %eax ++ pushl %eax ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel),%eax ++ pushl %eax ++ iretl + + identity_mapped: + /* store the start address on the stack */ +@@ -378,5 +409,22 @@ swap_pages: + popl %ebp + ret + ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ ++gdt_ds: ++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ ++gdt_end: ++ ++gdt_48: ++ .word gdt_end - gdt - 1 /* limit */ ++ .long 0 /* base - filled in by code above */ ++ ++idt_48: ++ .word 0 /* limit */ ++ .long 0 /* base */ ++ + .globl kexec_control_code_size + .set kexec_control_code_size, . - relocate_kernel diff --git a/src/patches/60002_linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch1 b/src/patches/60002_linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch1 new file mode 100644 index 000000000..5235a0399 --- /dev/null +++ b/src/patches/60002_linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch1 @@ -0,0 +1,168 @@ +Subject: kexec: Move asm segment handling code to the assembly file (x86_64) +From: http://xenbits.xensource.com/xen-unstable.hg (tip 13816) +Patch-mainline: obsolete + +This patch moves the idt, gdt, and segment handling code from machine_kexec.c +to relocate_kernel.S. The main reason behind this move is to avoid code +duplication in the Xen hypervisor. With this patch all code required to kexec +is put on the control page. + +On top of that this patch also counts as a cleanup - I think it is much +nicer to write assembly directly in assembly files than wrap inline assembly +in C functions for no apparent reason. + +Signed-off-by: Magnus Damm +Acked-by: jbeulich@novell.com +--- + + Applies to 2.6.19-rc1. + + machine_kexec.c | 58 ----------------------------------------------------- + relocate_kernel.S | 50 +++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 45 insertions(+), 63 deletions(-) + +Index: head-2008-08-18/arch/x86/kernel/machine_kexec_64.c +=================================================================== +--- head-2008-08-18.orig/arch/x86/kernel/machine_kexec_64.c 2008-08-18 09:05:04.000000000 +0200 ++++ head-2008-08-18/arch/x86/kernel/machine_kexec_64.c 2008-08-18 10:13:08.000000000 +0200 +@@ -115,47 +115,6 @@ static int init_pgtable(struct kimage *i + return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + } + +-static void set_idt(void *newidt, u16 limit) +-{ +- struct desc_ptr curidt; +- +- /* x86-64 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- __asm__ __volatile__ ( +- "lidtq %0\n" +- : : "m" (curidt) +- ); +-}; +- +- +-static void set_gdt(void *newgdt, u16 limit) +-{ +- struct desc_ptr curgdt; +- +- /* x86-64 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- __asm__ __volatile__ ( +- "lgdtq %0\n" +- : : "m" (curgdt) +- ); +-}; +- +-static void load_segments(void) +-{ +- __asm__ __volatile__ ( +- "\tmovl %0,%%ds\n" +- "\tmovl %0,%%es\n" +- "\tmovl %0,%%ss\n" +- "\tmovl %0,%%fs\n" +- "\tmovl %0,%%gs\n" +- : : "a" (__KERNEL_DS) : "memory" +- ); +-} +- + int machine_kexec_prepare(struct kimage *image) + { + unsigned long start_pgtable; +@@ -214,23 +173,6 @@ void machine_kexec(struct kimage *image) + page_list[PA_TABLE_PAGE] = + (unsigned long)__pa(page_address(image->control_code_page)); + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start); + } +Index: head-2008-08-18/arch/x86/kernel/relocate_kernel_64.S +=================================================================== +--- head-2008-08-18.orig/arch/x86/kernel/relocate_kernel_64.S 2008-07-13 23:51:29.000000000 +0200 ++++ head-2008-08-18/arch/x86/kernel/relocate_kernel_64.S 2008-08-18 10:13:08.000000000 +0200 +@@ -160,13 +160,39 @@ relocate_new_kernel: + movq PTR(PA_PGD)(%rsi), %r9 + movq %r9, %cr3 + ++ /* setup idt */ ++ movq %r8, %rax ++ addq $(idt_80 - relocate_kernel), %rax ++ lidtq (%rax) ++ ++ /* setup gdt */ ++ movq %r8, %rax ++ addq $(gdt - relocate_kernel), %rax ++ movq %r8, %r9 ++ addq $((gdt_80 - relocate_kernel) + 2), %r9 ++ movq %rax, (%r9) ++ ++ movq %r8, %rax ++ addq $(gdt_80 - relocate_kernel), %rax ++ lgdtq (%rax) ++ ++ /* setup data segment registers */ ++ xorl %eax, %eax ++ movl %eax, %ds ++ movl %eax, %es ++ movl %eax, %fs ++ movl %eax, %gs ++ movl %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea PAGE_SIZE(%r8), %rsp + +- /* jump to identity mapped page */ +- addq $(identity_mapped - relocate_kernel), %r8 +- pushq %r8 +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movq %r8, %rax ++ addq $(identity_mapped - relocate_kernel), %rax ++ pushq $(gdt_cs - gdt) ++ pushq %rax ++ lretq + + identity_mapped: + /* store the start address on the stack */ +@@ -262,5 +288,19 @@ identity_mapped: + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 +- + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00af9a000000ffff ++gdt_end: ++ ++gdt_80: ++ .word gdt_end - gdt - 1 /* limit */ ++ .quad 0 /* base - filled in by code above */ ++ ++idt_80: ++ .word 0 /* limit */ ++ .quad 0 /* base */ diff --git a/src/patches/60003_ipv6-no-autoconf.patch1 b/src/patches/60003_ipv6-no-autoconf.patch1 new file mode 100644 index 000000000..10ff1df66 --- /dev/null +++ b/src/patches/60003_ipv6-no-autoconf.patch1 @@ -0,0 +1,39 @@ +From: Olaf Kirch +Subject: Allow to bring up network interface w/o ipv6 autoconf +References: 161888 + +When bringing up a xen bridge device, it will always be configured to +use a MAC address of ff:ff:ff:ff:ff:fe. This greatly confuses IPv6 DAD, +which starts logging lots and lots of useless messages to syslog. + +We really want to disable IPv6 on these interfaces, and there doesn't +seem to be a reliable way to do this without bringing the interface +up first (and triggering IPv6 autoconf). + +This patch makes autoconf (DAD and router discovery) depend on the +interface's ability to do multicast. Turning off multicast for an +interface before bringing it up will suppress autoconfiguration. + + net/ipv6/addrconf.c | 2 ++ + 1 files changed, 2 insertions(+) + +Index: head-2008-09-25/net/ipv6/addrconf.c +=================================================================== +--- head-2008-09-25.orig/net/ipv6/addrconf.c 2008-09-25 13:56:12.000000000 +0200 ++++ head-2008-09-25/net/ipv6/addrconf.c 2008-09-25 14:15:19.000000000 +0200 +@@ -2781,6 +2781,7 @@ static void addrconf_dad_start(struct in + spin_lock_bh(&ifp->lock); + + if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || ++ !(dev->flags&IFF_MULTICAST) || + idev->cnf.accept_dad < 1 || + !(ifp->flags&IFA_F_TENTATIVE) || + ifp->flags & IFA_F_NODAD) { +@@ -2878,6 +2879,7 @@ static void addrconf_dad_completed(struc + if (ifp->idev->cnf.forwarding == 0 && + ifp->idev->cnf.rtr_solicits > 0 && + (dev->flags&IFF_LOOPBACK) == 0 && ++ (dev->flags & IFF_MULTICAST) && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + /* + * If a host as already performed a random delay diff --git a/src/patches/60004_pci-reassign-resources.patch1 b/src/patches/60004_pci-reassign-resources.patch1 new file mode 100644 index 000000000..2d25ca806 --- /dev/null +++ b/src/patches/60004_pci-reassign-resources.patch1 @@ -0,0 +1,307 @@ +Subject: xen/dom0: Reassign memory resources to device for pci passthrough +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 745:2268be46c75e) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +Index: head-2008-12-01/drivers/pci/Makefile +=================================================================== +--- head-2008-12-01.orig/drivers/pci/Makefile 2008-12-01 10:53:15.000000000 +0100 ++++ head-2008-12-01/drivers/pci/Makefile 2008-10-21 13:09:46.000000000 +0200 +@@ -4,6 +4,7 @@ + + obj-y += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \ + pci-driver.o search.o pci-sysfs.o rom.o setup-res.o ++obj-$(CONFIG_PCI_REASSIGN) += reassigndev.o + obj-$(CONFIG_PROC_FS) += proc.o + + # Build PCI Express stuff if needed +Index: head-2008-12-01/drivers/pci/pci.h +=================================================================== +--- head-2008-12-01.orig/drivers/pci/pci.h 2008-12-01 10:53:15.000000000 +0100 ++++ head-2008-12-01/drivers/pci/pci.h 2008-10-21 13:09:01.000000000 +0200 +@@ -144,3 +144,9 @@ struct pci_slot_attribute { + }; + #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) + ++#ifdef CONFIG_PCI_REASSIGN ++extern int is_reassigndev(struct pci_dev *dev); ++extern void pci_disable_bridge_window(struct pci_dev *dev); ++#else ++#define is_reassigndev(dev) 0 ++#endif +Index: head-2008-12-01/drivers/pci/quirks.c +=================================================================== +--- head-2008-12-01.orig/drivers/pci/quirks.c 2008-12-01 10:53:15.000000000 +0100 ++++ head-2008-12-01/drivers/pci/quirks.c 2008-10-29 10:52:40.000000000 +0100 +@@ -24,6 +24,54 @@ + #include + #include "pci.h" + ++#ifdef CONFIG_PCI_REASSIGN ++/* ++ * This quirk function disables the device and releases resources ++ * which is specified by kernel's boot parameter 'reassigndev'. ++ * Later on, kernel will assign page-aligned memory resource back ++ * to that device. ++ */ ++static void __devinit quirk_release_resources(struct pci_dev *dev) ++{ ++ int i; ++ struct resource *r; ++ ++ if (is_reassigndev(dev)) { ++ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && ++ (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { ++ /* PCI Host Bridge isn't a target device */ ++ return; ++ } ++ printk(KERN_INFO ++ "PCI: Disable device and release resources [%s].\n", ++ pci_name(dev)); ++ pci_disable_device(dev); ++ ++ for (i=0; i < PCI_NUM_RESOURCES; i++) { ++ r = &dev->resource[i]; ++ if (!(r->flags & IORESOURCE_MEM)) ++ continue; ++ ++ r->end = r->end - r->start; ++ r->start = 0; ++ ++ if (i < PCI_BRIDGE_RESOURCES) { ++ pci_update_resource(dev, r, i); ++ } ++ } ++ /* need to disable bridge's resource window, ++ * to make kernel enable to reassign new resource ++ * window later on. ++ */ ++ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && ++ (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { ++ pci_disable_bridge_window(dev); ++ } ++ } ++} ++DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_release_resources); ++#endif ++ + /* The Mellanox Tavor device gives false positive parity errors + * Mark this device with a broken_parity_status, to allow + * PCI scanning code to "skip" this now blacklisted device. +Index: head-2008-12-01/drivers/pci/reassigndev.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-12-01/drivers/pci/reassigndev.c 2008-10-21 13:13:38.000000000 +0200 +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2008, NEC Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple ++ * Place - Suite 330, Boston, MA 02111-1307 USA. ++ */ ++ ++#include ++#include ++#include ++#include "pci.h" ++ ++ ++#define REASSIGNDEV_PARAM_MAX (2048) ++#define TOKEN_MAX (12) /* "SSSS:BB:DD.F" length is 12 */ ++ ++static char param_reassigndev[REASSIGNDEV_PARAM_MAX] = {0}; ++ ++static int __init reassigndev_setup(char *str) ++{ ++ strncpy(param_reassigndev, str, REASSIGNDEV_PARAM_MAX); ++ param_reassigndev[REASSIGNDEV_PARAM_MAX - 1] = '\0'; ++ return 1; ++} ++__setup("reassigndev=", reassigndev_setup); ++ ++int is_reassigndev(struct pci_dev *dev) ++{ ++ char dev_str[TOKEN_MAX+1]; ++ int seg, bus, slot, func; ++ int len; ++ char *p, *next_str; ++ ++ p = param_reassigndev; ++ for (; p; p = next_str + 1) { ++ next_str = strpbrk(p, ","); ++ if (next_str) { ++ len = next_str - p; ++ } else { ++ len = strlen(p); ++ } ++ if (len > 0 && len <= TOKEN_MAX) { ++ strncpy(dev_str, p, len); ++ *(dev_str + len) = '\0'; ++ ++ if (sscanf(dev_str, "%x:%x:%x.%x", ++ &seg, &bus, &slot, &func) != 4) { ++ if (sscanf(dev_str, "%x:%x.%x", ++ &bus, &slot, &func) == 3) { ++ seg = 0; ++ } else { ++ /* failed to scan strings */ ++ seg = -1; ++ bus = -1; ++ } ++ } ++ if (seg == pci_domain_nr(dev->bus) && ++ bus == dev->bus->number && ++ slot == PCI_SLOT(dev->devfn) && ++ func == PCI_FUNC(dev->devfn)) { ++ /* It's a target device */ ++ return 1; ++ } ++ } ++ if (!next_str) ++ break; ++ } ++ ++ return 0; ++} +Index: head-2008-12-01/drivers/pci/setup-bus.c +=================================================================== +--- head-2008-12-01.orig/drivers/pci/setup-bus.c 2008-12-01 10:53:15.000000000 +0100 ++++ head-2008-12-01/drivers/pci/setup-bus.c 2008-10-21 13:09:01.000000000 +0200 +@@ -26,6 +26,7 @@ + #include + #include + ++#include "pci.h" + + static void pbus_assign_resources_sorted(struct pci_bus *bus) + { +@@ -343,7 +344,8 @@ static int pbus_size_mem(struct pci_bus + + list_for_each_entry(dev, &bus->devices, bus_list) { + int i; +- ++ int reassign = is_reassigndev(dev); ++ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + resource_size_t r_size; +@@ -351,6 +353,10 @@ static int pbus_size_mem(struct pci_bus + if (r->parent || (r->flags & mask) != type) + continue; + r_size = r->end - r->start + 1; ++ ++ if ((i < PCI_BRIDGE_RESOURCES) && reassign) ++ r_size = ALIGN(r_size, PAGE_SIZE); ++ + /* For bridges size != alignment */ + align = resource_alignment(r); + order = __ffs(align) - 20; +Index: head-2008-12-01/drivers/pci/setup-res.c +=================================================================== +--- head-2008-12-01.orig/drivers/pci/setup-res.c 2008-12-01 10:53:15.000000000 +0100 ++++ head-2008-12-01/drivers/pci/setup-res.c 2008-12-01 11:10:02.000000000 +0100 +@@ -126,6 +126,21 @@ int pci_claim_resource(struct pci_dev *d + return err; + } + ++#ifdef CONFIG_PCI_REASSIGN ++void pci_disable_bridge_window(struct pci_dev *dev) ++{ ++ printk(KERN_DEBUG "PCI: Disable bridge window on %s\n", pci_name(dev)); ++ ++ /* MMIO Base/Limit */ ++ pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); ++ ++ /* Prefetchable MMIO Base/Limit */ ++ pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); ++ pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); ++ pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); ++} ++#endif ++ + int pci_assign_resource(struct pci_dev *dev, int resno) + { + struct pci_bus *bus = dev->bus; +@@ -144,6 +159,10 @@ int pci_assign_resource(struct pci_dev * + (unsigned long long)res->end, res->flags); + return -EINVAL; + } ++ if (resno < PCI_BRIDGE_RESOURCES ++ && is_reassigndev(dev) ++ && (res->flags & IORESOURCE_MEM)) ++ align = ALIGN(align, PAGE_SIZE); + + /* First, try exact prefetching match.. */ + ret = pci_bus_alloc_resource(bus, res, size, align, min, +@@ -169,8 +188,15 @@ int pci_assign_resource(struct pci_dev * + (unsigned long long)res->end); + } else { + res->flags &= ~IORESOURCE_STARTALIGN; +- if (resno < PCI_BRIDGE_RESOURCES) ++ if (resno < PCI_BRIDGE_RESOURCES) { ++#ifdef CONFIG_PCI_REASSIGN ++ printk(KERN_DEBUG "PCI: Assign resource(%d) on %s " ++ "%016llx - %016llx\n", resno, pci_name(dev), ++ (unsigned long long)res->start, ++ (unsigned long long)res->end); ++#endif + pci_update_resource(dev, res, resno); ++ } + } + + return ret; +@@ -208,6 +234,12 @@ int pci_assign_resource_fixed(struct pci + (unsigned long long)res->start, + (unsigned long long)res->end); + } else if (resno < PCI_BRIDGE_RESOURCES) { ++#ifdef CONFIG_PCI_REASSIGN ++ printk(KERN_DEBUG "PCI: Assign resource(%d) on %s " ++ "%016llx - %016llx\n", resno, pci_name(dev), ++ (unsigned long long)res->start, ++ (unsigned long long)res->end); ++#endif + pci_update_resource(dev, res, resno); + } + +@@ -220,6 +252,7 @@ EXPORT_SYMBOL_GPL(pci_assign_resource_fi + void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) + { + int i; ++ int reassigndev = is_reassigndev(dev); + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r; +@@ -242,12 +275,22 @@ void pdev_sort_resources(struct pci_dev + (unsigned long long)r->end, r->flags); + continue; + } ++ if (i < PCI_BRIDGE_RESOURCES && (r->flags & IORESOURCE_MEM) && ++ reassigndev) ++ r_align = ALIGN(r_align, PAGE_SIZE); ++ + for (list = head; ; list = list->next) { + resource_size_t align = 0; + struct resource_list *ln = list->next; + +- if (ln) ++ if (ln) { + align = resource_alignment(ln->res); ++ if (ln->res - ln->dev->resource < ++ PCI_BRIDGE_RESOURCES && ++ (ln->res->flags & IORESOURCE_MEM) && ++ is_reassigndev(ln->dev)) ++ align = ALIGN(align, PAGE_SIZE); ++ } + + if (r_align > align) { + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); diff --git a/src/patches/60005_sfc-driverlink.patch1 b/src/patches/60005_sfc-driverlink.patch1 new file mode 100644 index 000000000..7e0ac85f4 --- /dev/null +++ b/src/patches/60005_sfc-driverlink.patch1 @@ -0,0 +1,1155 @@ +From: David Riddoch +commit d96c061bfd1839e34e136de0555564520acc97af +Author: Steve Hodgson +Date: Mon Jul 14 15:38:47 2008 +0100 + +Subject: sfc: Driverlink API for exporting hardware features to client drivers + +References: FATE#303479 +Acked-by: jbeulich@novell.com + +Index: head-2008-08-18/drivers/net/sfc/Makefile +=================================================================== +--- head-2008-08-18.orig/drivers/net/sfc/Makefile 2008-08-18 10:16:43.000000000 +0200 ++++ head-2008-08-18/drivers/net/sfc/Makefile 2008-08-18 10:16:46.000000000 +0200 +@@ -1,5 +1,5 @@ + sfc-y += efx.o falcon.o tx.o rx.o falcon_xmac.o \ + selftest.o ethtool.o xfp_phy.o \ +- mdio_10g.o tenxpress.o boards.o sfe4001.o +- ++ mdio_10g.o tenxpress.o boards.o sfe4001.o \ ++ driverlink.o + obj-$(CONFIG_SFC) += sfc.o +Index: head-2008-08-18/drivers/net/sfc/driverlink.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-08-18/drivers/net/sfc/driverlink.c 2008-08-18 10:16:46.000000000 +0200 +@@ -0,0 +1,367 @@ ++/**************************************************************************** ++ * Driver for Solarflare Solarstorm network controllers and boards ++ * Copyright 2005 Fen Systems Ltd. ++ * Copyright 2005-2008 Solarflare Communications Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ */ ++ ++#include ++#include ++#include ++#include ++#include "net_driver.h" ++#include "efx.h" ++#include "driverlink_api.h" ++#include "driverlink.h" ++ ++/* Protects @efx_driverlink_lock and @efx_driver_list */ ++static DEFINE_MUTEX(efx_driverlink_lock); ++ ++/* List of all registered drivers */ ++static LIST_HEAD(efx_driver_list); ++ ++/* List of all registered Efx ports */ ++static LIST_HEAD(efx_port_list); ++ ++/** ++ * Driver link handle used internally to track devices ++ * @efx_dev: driverlink device handle exported to consumers ++ * @efx: efx_nic backing the driverlink device ++ * @port_node: per-device list head ++ * @driver_node: per-driver list head ++ */ ++struct efx_dl_handle { ++ struct efx_dl_device efx_dev; ++ struct efx_nic *efx; ++ struct list_head port_node; ++ struct list_head driver_node; ++}; ++ ++static struct efx_dl_handle *efx_dl_handle(struct efx_dl_device *efx_dev) ++{ ++ return container_of(efx_dev, struct efx_dl_handle, efx_dev); ++} ++ ++/* Remove an Efx device, and call the driver's remove() callback if ++ * present. The caller must hold @efx_driverlink_lock. */ ++static void efx_dl_del_device(struct efx_dl_device *efx_dev) ++{ ++ struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); ++ ++ EFX_INFO(efx_handle->efx, "%s driverlink client unregistering\n", ++ efx_dev->driver->name); ++ ++ if (efx_dev->driver->remove) ++ efx_dev->driver->remove(efx_dev); ++ ++ list_del(&efx_handle->driver_node); ++ list_del(&efx_handle->port_node); ++ ++ kfree(efx_handle); ++} ++ ++/* Attempt to probe the given device with the driver, creating a ++ * new &struct efx_dl_device. If the probe routine returns an error, ++ * then the &struct efx_dl_device is destroyed */ ++static void efx_dl_try_add_device(struct efx_nic *efx, ++ struct efx_dl_driver *driver) ++{ ++ struct efx_dl_handle *efx_handle; ++ struct efx_dl_device *efx_dev; ++ int rc; ++ ++ efx_handle = kzalloc(sizeof(*efx_handle), GFP_KERNEL); ++ if (!efx_handle) ++ goto fail; ++ efx_dev = &efx_handle->efx_dev; ++ efx_handle->efx = efx; ++ efx_dev->driver = driver; ++ efx_dev->pci_dev = efx->pci_dev; ++ INIT_LIST_HEAD(&efx_handle->port_node); ++ INIT_LIST_HEAD(&efx_handle->driver_node); ++ ++ rc = driver->probe(efx_dev, efx->net_dev, ++ efx->dl_info, efx->silicon_rev); ++ if (rc) ++ goto fail; ++ ++ list_add_tail(&efx_handle->driver_node, &driver->device_list); ++ list_add_tail(&efx_handle->port_node, &efx->dl_device_list); ++ ++ EFX_INFO(efx, "%s driverlink client registered\n", driver->name); ++ return; ++ ++ fail: ++ EFX_INFO(efx, "%s driverlink client skipped\n", driver->name); ++ ++ kfree(efx_handle); ++} ++ ++/* Unregister a driver from the driverlink layer, calling the ++ * driver's remove() callback for every attached device */ ++void efx_dl_unregister_driver(struct efx_dl_driver *driver) ++{ ++ struct efx_dl_handle *efx_handle, *efx_handle_n; ++ ++ printk(KERN_INFO "Efx driverlink unregistering %s driver\n", ++ driver->name); ++ ++ mutex_lock(&efx_driverlink_lock); ++ ++ list_for_each_entry_safe(efx_handle, efx_handle_n, ++ &driver->device_list, driver_node) ++ efx_dl_del_device(&efx_handle->efx_dev); ++ ++ list_del(&driver->node); ++ ++ mutex_unlock(&efx_driverlink_lock); ++} ++EXPORT_SYMBOL(efx_dl_unregister_driver); ++ ++/* Register a new driver with the driverlink layer. The driver's ++ * probe routine will be called for every attached nic. */ ++int efx_dl_register_driver(struct efx_dl_driver *driver) ++{ ++ struct efx_nic *efx; ++ int rc; ++ ++ printk(KERN_INFO "Efx driverlink registering %s driver\n", ++ driver->name); ++ ++ INIT_LIST_HEAD(&driver->node); ++ INIT_LIST_HEAD(&driver->device_list); ++ ++ rc = mutex_lock_interruptible(&efx_driverlink_lock); ++ if (rc) ++ return rc; ++ ++ list_add_tail(&driver->node, &efx_driver_list); ++ list_for_each_entry(efx, &efx_port_list, dl_node) ++ efx_dl_try_add_device(efx, driver); ++ ++ mutex_unlock(&efx_driverlink_lock); ++ ++ return 0; ++} ++EXPORT_SYMBOL(efx_dl_register_driver); ++ ++void efx_dl_unregister_nic(struct efx_nic *efx) ++{ ++ struct efx_dl_handle *efx_handle, *efx_handle_n; ++ ++ mutex_lock(&efx_driverlink_lock); ++ ++ list_for_each_entry_safe_reverse(efx_handle, efx_handle_n, ++ &efx->dl_device_list, ++ port_node) ++ efx_dl_del_device(&efx_handle->efx_dev); ++ ++ list_del(&efx->dl_node); ++ ++ mutex_unlock(&efx_driverlink_lock); ++} ++ ++int efx_dl_register_nic(struct efx_nic *efx) ++{ ++ struct efx_dl_driver *driver; ++ int rc; ++ ++ rc = mutex_lock_interruptible(&efx_driverlink_lock); ++ if (rc) ++ return rc; ++ ++ list_add_tail(&efx->dl_node, &efx_port_list); ++ list_for_each_entry(driver, &efx_driver_list, node) ++ efx_dl_try_add_device(efx, driver); ++ ++ mutex_unlock(&efx_driverlink_lock); ++ ++ return 0; ++} ++ ++/* Dummy callback implementations. ++ * To avoid a branch point on the fast-path, the callbacks are always ++ * implemented - they are never NULL. ++ */ ++static enum efx_veto efx_dummy_tx_packet_callback(struct efx_dl_device *efx_dev, ++ struct sk_buff *skb) ++{ ++ return EFX_ALLOW_PACKET; ++} ++ ++static enum efx_veto efx_dummy_rx_packet_callback(struct efx_dl_device *efx_dev, ++ const char *pkt_buf, int len) ++{ ++ return EFX_ALLOW_PACKET; ++} ++ ++static int efx_dummy_request_mtu_callback(struct efx_dl_device *efx_dev, ++ int new_mtu) ++{ ++ return 0; ++} ++ ++static void efx_dummy_mtu_changed_callback(struct efx_dl_device *efx_dev, ++ int mtu) ++{ ++ return; ++} ++ ++static void efx_dummy_event_callback(struct efx_dl_device *efx_dev, void *event) ++{ ++ return; ++} ++ ++struct efx_dl_callbacks efx_default_callbacks = { ++ .tx_packet = efx_dummy_tx_packet_callback, ++ .rx_packet = efx_dummy_rx_packet_callback, ++ .request_mtu = efx_dummy_request_mtu_callback, ++ .mtu_changed = efx_dummy_mtu_changed_callback, ++ .event = efx_dummy_event_callback, ++}; ++ ++void efx_dl_unregister_callbacks(struct efx_dl_device *efx_dev, ++ struct efx_dl_callbacks *callbacks) ++{ ++ struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); ++ struct efx_nic *efx = efx_handle->efx; ++ ++ efx_suspend(efx); ++ ++ EFX_INFO(efx, "removing callback hooks into %s driver\n", ++ efx_dev->driver->name); ++ ++ if (callbacks->tx_packet) { ++ BUG_ON(efx->dl_cb_dev.tx_packet != efx_dev); ++ efx->dl_cb.tx_packet = efx_default_callbacks.tx_packet; ++ efx->dl_cb_dev.tx_packet = NULL; ++ } ++ if (callbacks->rx_packet) { ++ BUG_ON(efx->dl_cb_dev.rx_packet != efx_dev); ++ efx->dl_cb.rx_packet = efx_default_callbacks.rx_packet; ++ efx->dl_cb_dev.rx_packet = NULL; ++ } ++ if (callbacks->request_mtu) { ++ BUG_ON(efx->dl_cb_dev.request_mtu != efx_dev); ++ efx->dl_cb.request_mtu = efx_default_callbacks.request_mtu; ++ efx->dl_cb_dev.request_mtu = NULL; ++ } ++ if (callbacks->mtu_changed) { ++ BUG_ON(efx->dl_cb_dev.mtu_changed != efx_dev); ++ efx->dl_cb.mtu_changed = efx_default_callbacks.mtu_changed; ++ efx->dl_cb_dev.mtu_changed = NULL; ++ } ++ if (callbacks->event) { ++ BUG_ON(efx->dl_cb_dev.event != efx_dev); ++ efx->dl_cb.event = efx_default_callbacks.event; ++ efx->dl_cb_dev.event = NULL; ++ } ++ ++ efx_resume(efx); ++} ++EXPORT_SYMBOL(efx_dl_unregister_callbacks); ++ ++int efx_dl_register_callbacks(struct efx_dl_device *efx_dev, ++ struct efx_dl_callbacks *callbacks) ++{ ++ struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); ++ struct efx_nic *efx = efx_handle->efx; ++ int rc = 0; ++ ++ efx_suspend(efx); ++ ++ /* Check that the requested callbacks are not already hooked. */ ++ if ((callbacks->tx_packet && efx->dl_cb_dev.tx_packet) || ++ (callbacks->rx_packet && efx->dl_cb_dev.rx_packet) || ++ (callbacks->request_mtu && efx->dl_cb_dev.request_mtu) || ++ (callbacks->mtu_changed && efx->dl_cb_dev.mtu_changed) || ++ (callbacks->event && efx->dl_cb_dev.event)) { ++ rc = -EBUSY; ++ goto out; ++ } ++ ++ EFX_INFO(efx, "adding callback hooks to %s driver\n", ++ efx_dev->driver->name); ++ ++ /* Hook in the requested callbacks, leaving any NULL members ++ * referencing the members of @efx_default_callbacks */ ++ if (callbacks->tx_packet) { ++ efx->dl_cb.tx_packet = callbacks->tx_packet; ++ efx->dl_cb_dev.tx_packet = efx_dev; ++ } ++ if (callbacks->rx_packet) { ++ efx->dl_cb.rx_packet = callbacks->rx_packet; ++ efx->dl_cb_dev.rx_packet = efx_dev; ++ } ++ if (callbacks->request_mtu) { ++ efx->dl_cb.request_mtu = callbacks->request_mtu; ++ efx->dl_cb_dev.request_mtu = efx_dev; ++ } ++ if (callbacks->mtu_changed) { ++ efx->dl_cb.mtu_changed = callbacks->mtu_changed; ++ efx->dl_cb_dev.mtu_changed = efx_dev; ++ } ++ if (callbacks->event) { ++ efx->dl_cb.event = callbacks->event; ++ efx->dl_cb_dev.event = efx_dev; ++ } ++ ++ out: ++ efx_resume(efx); ++ ++ return rc; ++} ++EXPORT_SYMBOL(efx_dl_register_callbacks); ++ ++void efx_dl_schedule_reset(struct efx_dl_device *efx_dev) ++{ ++ struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); ++ struct efx_nic *efx = efx_handle->efx; ++ ++ efx_schedule_reset(efx, RESET_TYPE_ALL); ++} ++EXPORT_SYMBOL(efx_dl_schedule_reset); ++ ++void efx_dl_reset_unlock(void) ++{ ++ mutex_unlock(&efx_driverlink_lock); ++} ++ ++/* Suspend ready for reset, serialising against all the driverlink interfacse ++ * and calling the suspend() callback of every registered driver */ ++void efx_dl_reset_suspend(struct efx_nic *efx) ++{ ++ struct efx_dl_handle *efx_handle; ++ struct efx_dl_device *efx_dev; ++ ++ mutex_lock(&efx_driverlink_lock); ++ ++ list_for_each_entry_reverse(efx_handle, ++ &efx->dl_device_list, ++ port_node) { ++ efx_dev = &efx_handle->efx_dev; ++ if (efx_dev->driver->reset_suspend) ++ efx_dev->driver->reset_suspend(efx_dev); ++ } ++} ++ ++/* Resume after a reset, calling the resume() callback of every registered ++ * driver, and releasing @Efx_driverlink_lock acquired in ++ * efx_dl_reset_resume() */ ++void efx_dl_reset_resume(struct efx_nic *efx, int ok) ++{ ++ struct efx_dl_handle *efx_handle; ++ struct efx_dl_device *efx_dev; ++ ++ list_for_each_entry(efx_handle, &efx->dl_device_list, ++ port_node) { ++ efx_dev = &efx_handle->efx_dev; ++ if (efx_dev->driver->reset_resume) ++ efx_dev->driver->reset_resume(efx_dev, ok); ++ } ++ ++ mutex_unlock(&efx_driverlink_lock); ++} +Index: head-2008-08-18/drivers/net/sfc/driverlink.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-08-18/drivers/net/sfc/driverlink.h 2008-08-18 10:16:46.000000000 +0200 +@@ -0,0 +1,43 @@ ++/**************************************************************************** ++ * Driver for Solarflare Solarstorm network controllers and boards ++ * Copyright 2005 Fen Systems Ltd. ++ * Copyright 2006-2008 Solarflare Communications Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ */ ++ ++#ifndef EFX_DRIVERLINK_H ++#define EFX_DRIVERLINK_H ++ ++/* Forward declarations */ ++struct efx_dl_device; ++struct efx_nic; ++ ++/* Efx callback devices ++ * ++ * A list of the devices that own each callback. The partner to ++ * struct efx_dl_callbacks. ++ */ ++struct efx_dl_cb_devices { ++ struct efx_dl_device *tx_packet; ++ struct efx_dl_device *rx_packet; ++ struct efx_dl_device *request_mtu; ++ struct efx_dl_device *mtu_changed; ++ struct efx_dl_device *event; ++}; ++ ++extern struct efx_dl_callbacks efx_default_callbacks; ++ ++#define EFX_DL_CALLBACK(_port, _name, ...) \ ++ (_port)->dl_cb._name((_port)->dl_cb_dev._name, __VA_ARGS__) ++ ++extern int efx_dl_register_nic(struct efx_nic *efx); ++extern void efx_dl_unregister_nic(struct efx_nic *efx); ++ ++/* Suspend and resume client drivers over a hardware reset */ ++extern void efx_dl_reset_suspend(struct efx_nic *efx); ++extern void efx_dl_reset_resume(struct efx_nic *efx, int ok); ++ ++#endif /* EFX_DRIVERLINK_H */ +Index: head-2008-08-18/drivers/net/sfc/driverlink_api.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-08-18/drivers/net/sfc/driverlink_api.h 2008-08-18 10:16:46.000000000 +0200 +@@ -0,0 +1,303 @@ ++/**************************************************************************** ++ * Driver for Solarflare Solarstorm network controllers and boards ++ * Copyright 2005-2006 Fen Systems Ltd. ++ * Copyright 2005-2008 Solarflare Communications Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ */ ++ ++#ifndef EFX_DRIVERLINK_API_H ++#define EFX_DRIVERLINK_API_H ++ ++#include ++ ++/* Forward declarations */ ++struct pci_dev; ++struct net_device; ++struct sk_buff; ++struct efx_dl_device; ++struct efx_dl_device_info; ++ ++/* An extra safeguard in addition to symbol versioning */ ++#define EFX_DRIVERLINK_API_VERSION 2 ++ ++/** ++ * struct efx_dl_driver - An Efx driverlink device driver ++ * ++ * A driverlink client defines and initializes as many instances of ++ * efx_dl_driver as required, registering each one with ++ * efx_dl_register_driver(). ++ * ++ * @name: Name of the driver ++ * @probe: Called when device added ++ * The client should use the @def_info linked list and @silicon_rev ++ * to determine if they wish to attach to this device. ++ * Context: process, driverlink semaphore held ++ * @remove: Called when device removed ++ * The client must ensure the finish all operations with this ++ * device before returning from this method. ++ * Context: process, driverlink semaphore held ++ * @reset_suspend: Called before device is reset ++ * Called immediately before a hardware reset. The client must stop all ++ * hardware processing before returning from this method. Callbacks will ++ * be inactive when this method is called. ++ * Context: process, driverlink semaphore held. rtnl_lock may be held ++ * @reset_resume: Called after device is reset ++ * Called after a hardware reset. If @ok is true, the client should ++ * state and resume normal operations. If @ok is false, the client should ++ * abandon use of the hardware resources. remove() will still be called. ++ * Context: process, driverlink semaphore held. rtnl_lock may be held ++ */ ++struct efx_dl_driver { ++ const char *name; ++ ++ int (*probe) (struct efx_dl_device *efx_dl_dev, ++ const struct net_device *net_dev, ++ const struct efx_dl_device_info *dev_info, ++ const char *silicon_rev); ++ void (*remove) (struct efx_dl_device *efx_dev); ++ void (*reset_suspend) (struct efx_dl_device *efx_dev); ++ void (*reset_resume) (struct efx_dl_device *efx_dev, int ok); ++ ++/* private: */ ++ struct list_head node; ++ struct list_head device_list; ++}; ++ ++/** ++ * enum efx_dl_device_info_type - Device information identifier. ++ * ++ * Used to identify each item in the &struct efx_dl_device_info linked list ++ * provided to each driverlink client in the probe() @dev_info member. ++ * ++ * @EFX_DL_FALCON_RESOURCES: Information type is &struct efx_dl_falcon_resources ++ */ ++enum efx_dl_device_info_type { ++ /** Falcon resources available for export */ ++ EFX_DL_FALCON_RESOURCES = 0, ++}; ++ ++/** ++ * struct efx_dl_device_info - device information structure ++ * ++ * @next: Link to next structure, if any ++ * @type: Type code for this structure ++ */ ++struct efx_dl_device_info { ++ struct efx_dl_device_info *next; ++ enum efx_dl_device_info_type type; ++}; ++ ++/** ++ * enum efx_dl_falcon_resource_flags - Falcon resource information flags. ++ * ++ * Flags that describe hardware variations for the current Falcon device. ++ * ++ * @EFX_DL_FALCON_DUAL_FUNC: Port is dual-function. ++ * Certain silicon revisions have two pci functions, and require ++ * certain hardware resources to be accessed via the secondary ++ * function ++ * @EFX_DL_FALCON_USE_MSI: Port is initialised to use MSI/MSI-X interrupts. ++ * Falcon supports traditional legacy interrupts and MSI/MSI-X ++ * interrupts. The choice is made at run time by the sfc driver, and ++ * notified to the clients by this enumeration ++ */ ++enum efx_dl_falcon_resource_flags { ++ EFX_DL_FALCON_DUAL_FUNC = 0x1, ++ EFX_DL_FALCON_USE_MSI = 0x2, ++}; ++ ++/** ++ * struct efx_dl_falcon_resources - Falcon resource information. ++ * ++ * This structure describes Falcon hardware resources available for ++ * use by a driverlink driver. ++ * ++ * @hdr: Resource linked list header ++ * @biu_lock: Register access lock. ++ * Some Falcon revisions require register access for configuration ++ * registers to be serialised between ports and PCI functions. ++ * The sfc driver will provide the appropriate lock semantics for ++ * the underlying hardware. ++ * @buffer_table_min: First available buffer table entry ++ * @buffer_table_lim: Last available buffer table entry + 1 ++ * @evq_timer_min: First available event queue with timer ++ * @evq_timer_lim: Last available event queue with timer + 1 ++ * @evq_int_min: First available event queue with interrupt ++ * @evq_int_lim: Last available event queue with interrupt + 1 ++ * @rxq_min: First available RX queue ++ * @rxq_lim: Last available RX queue + 1 ++ * @txq_min: First available TX queue ++ * @txq_lim: Last available TX queue + 1 ++ * @flags: Hardware variation flags ++ */ ++struct efx_dl_falcon_resources { ++ struct efx_dl_device_info hdr; ++ spinlock_t *biu_lock; ++ unsigned buffer_table_min; ++ unsigned buffer_table_lim; ++ unsigned evq_timer_min; ++ unsigned evq_timer_lim; ++ unsigned evq_int_min; ++ unsigned evq_int_lim; ++ unsigned rxq_min; ++ unsigned rxq_lim; ++ unsigned txq_min; ++ unsigned txq_lim; ++ enum efx_dl_falcon_resource_flags flags; ++}; ++ ++/** ++ * struct efx_dl_device - An Efx driverlink device. ++ * ++ * @pci_dev: PCI device used by the sfc driver. ++ * @priv: Driver private data ++ * Driverlink clients can use this to store a pointer to their ++ * internal per-device data structure. Each (driver, device) ++ * tuple has a separate &struct efx_dl_device, so clients can use ++ * this @priv field independently. ++ * @driver: Efx driverlink driver for this device ++ */ ++struct efx_dl_device { ++ struct pci_dev *pci_dev; ++ void *priv; ++ struct efx_dl_driver *driver; ++}; ++ ++/** ++ * enum efx_veto - Packet veto request flag. ++ * ++ * This is the return type for the rx_packet() and tx_packet() methods ++ * in &struct efx_dl_callbacks. ++ * ++ * @EFX_ALLOW_PACKET: Packet may be transmitted/received ++ * @EFX_VETO_PACKET: Packet must not be transmitted/received ++ */ ++enum efx_veto { ++ EFX_ALLOW_PACKET = 0, ++ EFX_VETO_PACKET = 1, ++}; ++ ++/** ++ * struct efx_dl_callbacks - Efx callbacks ++ * ++ * This is a tighly controlled set of simple callbacks, that are attached ++ * to the sfc driver via efx_dl_register_callbacks(). They export just enough ++ * state to allow clients to make use of the available hardware resources. ++ * ++ * For efficiency, only one client can hook each callback. Since these ++ * callbacks are called on packet transmit and reception paths, and the ++ * sfc driver may have multiple tx and rx queues per port, clients should ++ * avoid acquiring locks or allocating memory. ++ * ++ * @tx_packet: Called when packet is about to be transmitted ++ * Called for every packet about to be transmitted, providing means ++ * for the client to snoop traffic, and veto transmission by returning ++ * %EFX_VETO_PACKET (the sfc driver will subsequently free the skb). ++ * Context: tasklet, netif_tx_lock held ++ * @rx_packet: Called when packet is received ++ * Called for every received packet (after LRO), allowing the client ++ * to snoop every received packet (on every rx queue), and veto ++ * reception by returning %EFX_VETO_PACKET. ++ * Context: tasklet ++ * @request_mtu: Called to request MTU change. ++ * Called whenever the user requests the net_dev mtu to be changed. ++ * If the client returns an error, the mtu change is aborted. The sfc ++ * driver guarantees that no other callbacks are running. ++ * Context: process, rtnl_lock held. ++ * @mtu_changed: Called when MTU has been changed. ++ * Called after the mtu has been successfully changed, always after ++ * a previous call to request_mtu(). The sfc driver guarantees that no ++ * other callbacks are running. ++ * Context: process, rtnl_lock held. ++ * @event: Called when a hardware NIC event is not understood by the sfc driver. ++ * Context: tasklet. ++ */ ++struct efx_dl_callbacks { ++ enum efx_veto (*tx_packet) (struct efx_dl_device *efx_dev, ++ struct sk_buff *skb); ++ enum efx_veto (*rx_packet) (struct efx_dl_device *efx_dev, ++ const char *pkt_hdr, int pkt_len); ++ int (*request_mtu) (struct efx_dl_device *efx_dev, int new_mtu); ++ void (*mtu_changed) (struct efx_dl_device *efx_dev, int mtu); ++ void (*event) (struct efx_dl_device *efx_dev, void *p_event); ++}; ++ ++/* Include API version number in symbol used for efx_dl_register_driver */ ++#define efx_dl_stringify_1(x, y) x ## y ++#define efx_dl_stringify_2(x, y) efx_dl_stringify_1(x, y) ++#define efx_dl_register_driver \ ++ efx_dl_stringify_2(efx_dl_register_driver_api_ver_, \ ++ EFX_DRIVERLINK_API_VERSION) ++ ++/* Exported driverlink api used to register and unregister the client driver ++ * and any callbacks [only one per port allowed], and to allow a client driver ++ * to request reset to recover from an error condition. ++ * ++ * All of these functions acquire the driverlink semaphore, so must not be ++ * called from an efx_dl_driver or efx_dl_callbacks member, and must be called ++ * from process context. ++ */ ++extern int efx_dl_register_driver(struct efx_dl_driver *driver); ++ ++extern void efx_dl_unregister_driver(struct efx_dl_driver *driver); ++ ++extern int efx_dl_register_callbacks(struct efx_dl_device *efx_dev, ++ struct efx_dl_callbacks *callbacks); ++ ++extern void efx_dl_unregister_callbacks(struct efx_dl_device *efx_dev, ++ struct efx_dl_callbacks *callbacks); ++ ++/* Schedule a reset without grabbing any locks */ ++extern void efx_dl_schedule_reset(struct efx_dl_device *efx_dev); ++ ++/** ++ * efx_dl_for_each_device_info_matching - iterate an efx_dl_device_info list ++ * @_dev_info: Pointer to first &struct efx_dl_device_info ++ * @_type: Type code to look for ++ * @_info_type: Structure type corresponding to type code ++ * @_field: Name of &struct efx_dl_device_info field in the type ++ * @_p: Iterator variable ++ * ++ * Example: ++ * struct efx_dl_falcon_resources *res; ++ * efx_dl_for_each_device_info_matching(dev_info, EFX_DL_FALCON_RESOURCES, ++ * struct efx_dl_falcon_resources, ++ * hdr, res) { ++ * if (res->flags & EFX_DL_FALCON_DUAL_FUNC) ++ * .... ++ * } ++ */ ++#define efx_dl_for_each_device_info_matching(_dev_info, _type, \ ++ _info_type, _field, _p) \ ++ for ((_p) = container_of((_dev_info), _info_type, _field); \ ++ (_p) != NULL; \ ++ (_p) = container_of((_p)->_field.next, _info_type, _field))\ ++ if ((_p)->_field.type != _type) \ ++ continue; \ ++ else ++ ++/** ++ * efx_dl_search_device_info - search an efx_dl_device_info list ++ * @_dev_info: Pointer to first &struct efx_dl_device_info ++ * @_type: Type code to look for ++ * @_info_type: Structure type corresponding to type code ++ * @_field: Name of &struct efx_dl_device_info member in this type ++ * @_p: Result variable ++ * ++ * Example: ++ * struct efx_dl_falcon_resources *res; ++ * efx_dl_search_device_info(dev_info, EFX_DL_FALCON_RESOURCES, ++ * struct efx_dl_falcon_resources, hdr, res); ++ * if (res) ++ * .... ++ */ ++#define efx_dl_search_device_info(_dev_info, _type, _info_type, \ ++ _field, _p) \ ++ efx_dl_for_each_device_info_matching((_dev_info), (_type), \ ++ _info_type, _field, (_p)) \ ++ break; ++ ++#endif /* EFX_DRIVERLINK_API_H */ +Index: head-2008-08-18/drivers/net/sfc/efx.c +=================================================================== +--- head-2008-08-18.orig/drivers/net/sfc/efx.c 2008-08-18 10:16:43.000000000 +0200 ++++ head-2008-08-18/drivers/net/sfc/efx.c 2008-08-18 10:16:46.000000000 +0200 +@@ -1427,6 +1427,11 @@ static int efx_change_mtu(struct net_dev + + efx_stop_all(efx); + ++ /* Ask driverlink client if we can change MTU */ ++ rc = EFX_DL_CALLBACK(efx, request_mtu, new_mtu); ++ if (rc) ++ goto out; ++ + EFX_LOG(efx, "changing MTU to %d\n", new_mtu); + + efx_fini_channels(efx); +@@ -1435,6 +1440,10 @@ static int efx_change_mtu(struct net_dev + if (rc) + goto fail; + ++ /* Notify driverlink client of new MTU */ ++ EFX_DL_CALLBACK(efx, mtu_changed, new_mtu); ++ ++ out: + efx_start_all(efx); + return rc; + +@@ -1587,6 +1596,23 @@ static void efx_unregister_netdev(struct + * Device reset and suspend + * + **************************************************************************/ ++/* Serialise access to the driverlink callbacks, by quiescing event processing ++ * (without flushing the descriptor queues), and acquiring the rtnl_lock */ ++void efx_suspend(struct efx_nic *efx) ++{ ++ EFX_LOG(efx, "suspending operations\n"); ++ ++ rtnl_lock(); ++ efx_stop_all(efx); ++} ++ ++void efx_resume(struct efx_nic *efx) ++{ ++ EFX_LOG(efx, "resuming operations\n"); ++ ++ efx_start_all(efx); ++ rtnl_unlock(); ++} + + /* The final hardware and software finalisation before reset. */ + static int efx_reset_down(struct efx_nic *efx, struct ethtool_cmd *ecmd) +@@ -1649,8 +1675,8 @@ static int efx_reset(struct efx_nic *efx + enum reset_type method = efx->reset_pending; + int rc; + +- /* Serialise with kernel interfaces */ + rtnl_lock(); ++ efx_dl_reset_suspend(efx); + + /* If we're not RUNNING then don't reset. Leave the reset_pending + * flag set so that efx_pci_probe_main will be retried */ +@@ -1717,6 +1743,7 @@ static int efx_reset(struct efx_nic *efx + efx_start_all(efx); + + unlock_rtnl: ++ efx_dl_reset_resume(efx, 1); + rtnl_unlock(); + return 0; + +@@ -1729,6 +1756,7 @@ static int efx_reset(struct efx_nic *efx + efx->state = STATE_DISABLED; + + mutex_unlock(&efx->mac_lock); ++ efx_dl_reset_resume(efx, 0); + rtnl_unlock(); + efx_unregister_netdev(efx); + efx_fini_port(efx); +@@ -1871,6 +1899,9 @@ static int efx_init_struct(struct efx_ni + mutex_init(&efx->mac_lock); + efx->phy_op = &efx_dummy_phy_operations; + efx->mii.dev = net_dev; ++ INIT_LIST_HEAD(&efx->dl_node); ++ INIT_LIST_HEAD(&efx->dl_device_list); ++ efx->dl_cb = efx_default_callbacks; + INIT_WORK(&efx->reconfigure_work, efx_reconfigure_work); + atomic_set(&efx->netif_stop_count, 1); + +@@ -1990,6 +2021,7 @@ static void efx_pci_remove(struct pci_de + efx = pci_get_drvdata(pci_dev); + if (!efx) + return; ++ efx_dl_unregister_nic(efx); + + /* Mark the NIC as fini, then stop the interface */ + rtnl_lock(); +@@ -2157,8 +2189,15 @@ static int __devinit efx_pci_probe(struc + + EFX_LOG(efx, "initialisation successful\n"); + ++ /* Register with driverlink layer */ ++ rc = efx_dl_register_nic(efx); ++ if (rc) ++ goto fail6; ++ + return 0; + ++ fail6: ++ efx_unregister_netdev(efx); + fail5: + efx_pci_remove_main(efx); + fail4: +Index: head-2008-08-18/drivers/net/sfc/falcon.c +=================================================================== +--- head-2008-08-18.orig/drivers/net/sfc/falcon.c 2008-08-18 10:16:43.000000000 +0200 ++++ head-2008-08-18/drivers/net/sfc/falcon.c 2008-08-18 10:16:46.000000000 +0200 +@@ -36,12 +36,12 @@ + + /** + * struct falcon_nic_data - Falcon NIC state +- * @next_buffer_table: First available buffer table id ++ * @resources: Resource information for driverlink client + * @pci_dev2: The secondary PCI device if present + * @i2c_data: Operations and state for I2C bit-bashing algorithm + */ + struct falcon_nic_data { +- unsigned next_buffer_table; ++ struct efx_dl_falcon_resources resources; + struct pci_dev *pci_dev2; + struct i2c_algo_bit_data i2c_data; + }; +@@ -322,8 +322,8 @@ static int falcon_alloc_special_buffer(s + memset(buffer->addr, 0xff, len); + + /* Select new buffer ID */ +- buffer->index = nic_data->next_buffer_table; +- nic_data->next_buffer_table += buffer->entries; ++ buffer->index = nic_data->resources.buffer_table_min; ++ nic_data->resources.buffer_table_min += buffer->entries; + + EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x " + "(virt %p phys %lx)\n", buffer->index, +@@ -1115,10 +1115,12 @@ static void falcon_handle_driver_event(s + case TX_DESCQ_FLS_DONE_EV_DECODE: + EFX_TRACE(efx, "channel %d TXQ %d flushed\n", + channel->channel, ev_sub_data); ++ EFX_DL_CALLBACK(efx, event, event); + break; + case RX_DESCQ_FLS_DONE_EV_DECODE: + EFX_TRACE(efx, "channel %d RXQ %d flushed\n", + channel->channel, ev_sub_data); ++ EFX_DL_CALLBACK(efx, event, event); + break; + case EVQ_INIT_DONE_EV_DECODE: + EFX_LOG(efx, "channel %d EVQ %d initialised\n", +@@ -1127,14 +1129,17 @@ static void falcon_handle_driver_event(s + case SRM_UPD_DONE_EV_DECODE: + EFX_TRACE(efx, "channel %d SRAM update done\n", + channel->channel); ++ EFX_DL_CALLBACK(efx, event, event); + break; + case WAKE_UP_EV_DECODE: + EFX_TRACE(efx, "channel %d RXQ %d wakeup event\n", + channel->channel, ev_sub_data); ++ EFX_DL_CALLBACK(efx, event, event); + break; + case TIMER_EV_DECODE: + EFX_TRACE(efx, "channel %d RX queue %d timer expired\n", + channel->channel, ev_sub_data); ++ EFX_DL_CALLBACK(efx, event, event); + break; + case RX_RECOVERY_EV_DECODE: + EFX_ERR(efx, "channel %d seen DRIVER RX_RESET event. " +@@ -1159,6 +1164,7 @@ static void falcon_handle_driver_event(s + EFX_TRACE(efx, "channel %d unknown driver event code %d " + "data %04x\n", channel->channel, ev_sub_code, + ev_sub_data); ++ EFX_DL_CALLBACK(efx, event, event); + break; + } + } +@@ -2371,6 +2377,59 @@ static int falcon_probe_nvconfig(struct + return rc; + } + ++/* Looks at available SRAM resources and silicon revision, and works out ++ * how many queues we can support, and where things like descriptor caches ++ * should live. */ ++static int falcon_dimension_resources(struct efx_nic *efx) ++{ ++ unsigned internal_dcs_entries; ++ struct falcon_nic_data *nic_data = efx->nic_data; ++ struct efx_dl_falcon_resources *res = &nic_data->resources; ++ ++ /* Fill out the driverlink resource list */ ++ res->hdr.type = EFX_DL_FALCON_RESOURCES; ++ res->biu_lock = &efx->biu_lock; ++ efx->dl_info = &res->hdr; ++ ++ /* NB. The minimum values get increased as this driver initialises ++ * its resources, so this should prevent any overlap. ++ */ ++ switch (falcon_rev(efx)) { ++ case FALCON_REV_A1: ++ res->rxq_min = 16; ++ res->txq_min = 16; ++ res->evq_int_min = 4; ++ res->evq_int_lim = 5; ++ res->evq_timer_min = 5; ++ res->evq_timer_lim = 4096; ++ internal_dcs_entries = 8192; ++ break; ++ case FALCON_REV_B0: ++ default: ++ res->rxq_min = 0; ++ res->txq_min = 0; ++ res->evq_int_min = 0; ++ res->evq_int_lim = 64; ++ res->evq_timer_min = 64; ++ res->evq_timer_lim = 4096; ++ internal_dcs_entries = 4096; ++ break; ++ } ++ ++ /* Internal SRAM only for now */ ++ res->rxq_lim = internal_dcs_entries / RX_DC_ENTRIES; ++ res->txq_lim = internal_dcs_entries / TX_DC_ENTRIES; ++ res->buffer_table_lim = 8192; ++ ++ if (FALCON_IS_DUAL_FUNC(efx)) ++ res->flags |= EFX_DL_FALCON_DUAL_FUNC; ++ ++ if (EFX_INT_MODE_USE_MSI(efx)) ++ res->flags |= EFX_DL_FALCON_USE_MSI; ++ ++ return 0; ++} ++ + /* Probe the NIC variant (revision, ASIC vs FPGA, function count, port + * count, port speed). Set workaround and feature flags accordingly. + */ +@@ -2403,10 +2462,12 @@ static int falcon_probe_nic_variant(stru + EFX_ERR(efx, "1G mode not supported\n"); + return -ENODEV; + } ++ efx->silicon_rev = "falcon/a1"; + break; + } + + case FALCON_REV_B0: ++ efx->silicon_rev = "falcon/b0"; + break; + + default: +@@ -2472,6 +2533,10 @@ int falcon_probe_nic(struct efx_nic *efx + if (rc) + goto fail5; + ++ rc = falcon_dimension_resources(efx); ++ if (rc) ++ goto fail6; ++ + /* Initialise I2C adapter */ + efx->i2c_adap.owner = THIS_MODULE; + nic_data->i2c_data = falcon_i2c_bit_operations; +@@ -2481,10 +2546,12 @@ int falcon_probe_nic(struct efx_nic *efx + strlcpy(efx->i2c_adap.name, "SFC4000 GPIO", sizeof(efx->i2c_adap.name)); + rc = i2c_bit_add_bus(&efx->i2c_adap); + if (rc) +- goto fail5; ++ goto fail6; + + return 0; + ++ fail6: ++ efx->dl_info = NULL; + fail5: + falcon_free_buffer(efx, &efx->irq_status); + fail4: +@@ -2675,6 +2742,7 @@ void falcon_remove_nic(struct efx_nic *e + /* Tear down the private nic state */ + kfree(efx->nic_data); + efx->nic_data = NULL; ++ efx->dl_info = NULL; + } + + void falcon_update_nic_stats(struct efx_nic *efx) +Index: head-2008-08-18/drivers/net/sfc/net_driver.h +=================================================================== +--- head-2008-08-18.orig/drivers/net/sfc/net_driver.h 2008-08-18 10:16:43.000000000 +0200 ++++ head-2008-08-18/drivers/net/sfc/net_driver.h 2008-08-18 10:16:46.000000000 +0200 +@@ -30,6 +30,8 @@ + + #include "enum.h" + #include "bitfield.h" ++#include "driverlink_api.h" ++#include "driverlink.h" + + #define EFX_MAX_LRO_DESCRIPTORS 8 + #define EFX_MAX_LRO_AGGR MAX_SKB_FRAGS +@@ -676,6 +678,12 @@ union efx_multicast_hash { + * @loopback_mode: Loopback status + * @loopback_modes: Supported loopback mode bitmask + * @loopback_selftest: Offline self-test private state ++ * @silicon_rev: Silicon revision description for driverlink ++ * @dl_info: Linked list of hardware parameters exposed through driverlink ++ * @dl_node: Driverlink port list ++ * @dl_device_list: Driverlink device list ++ * @dl_cb: Driverlink callbacks table ++ * @dl_cb_dev: Driverlink callback owner devices + * + * The @priv field of the corresponding &struct net_device points to + * this. +@@ -752,6 +760,13 @@ struct efx_nic { + unsigned int loopback_modes; + + void *loopback_selftest; ++ ++ const char *silicon_rev; ++ struct efx_dl_device_info *dl_info; ++ struct list_head dl_node; ++ struct list_head dl_device_list; ++ struct efx_dl_callbacks dl_cb; ++ struct efx_dl_cb_devices dl_cb_dev; + }; + + static inline int efx_dev_registered(struct efx_nic *efx) +Index: head-2008-08-18/drivers/net/sfc/rx.c +=================================================================== +--- head-2008-08-18.orig/drivers/net/sfc/rx.c 2008-08-18 10:16:43.000000000 +0200 ++++ head-2008-08-18/drivers/net/sfc/rx.c 2008-08-18 10:16:46.000000000 +0200 +@@ -549,8 +549,22 @@ static inline void efx_rx_packet__check_ + static inline void efx_rx_packet_lro(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf) + { ++ struct efx_nic *efx = channel->efx; + struct net_lro_mgr *lro_mgr = &channel->lro_mgr; + void *priv = channel; ++ enum efx_veto veto; ++ ++ /* It would be faster if we had access to packets at the ++ * other side of generic LRO. Unfortunately, there isn't ++ * an obvious interface to this, so veto packets before LRO */ ++ veto = EFX_DL_CALLBACK(efx, rx_packet, rx_buf->data, rx_buf->len); ++ if (unlikely(veto)) { ++ EFX_TRACE(efx, "LRO RX vetoed by driverlink %s driver\n", ++ efx->dl_cb_dev.rx_packet->driver->name); ++ /* Free the buffer now */ ++ efx_free_rx_buffer(efx, rx_buf); ++ return; ++ } + + /* Pass the skb/page into the LRO engine */ + if (rx_buf->page) { +@@ -686,6 +700,7 @@ void __efx_rx_packet(struct efx_channel + struct efx_rx_buffer *rx_buf, int checksummed) + { + struct efx_nic *efx = channel->efx; ++ enum efx_veto veto; + struct sk_buff *skb; + int lro = efx->net_dev->features & NETIF_F_LRO; + +@@ -723,6 +738,16 @@ void __efx_rx_packet(struct efx_channel + goto done; + } + ++ /* Allow callback to veto the packet */ ++ veto = EFX_DL_CALLBACK(efx, rx_packet, rx_buf->data, rx_buf->len); ++ if (unlikely(veto)) { ++ EFX_LOG(efx, "RX vetoed by driverlink %s driver\n", ++ efx->dl_cb_dev.rx_packet->driver->name); ++ /* Free the buffer now */ ++ efx_free_rx_buffer(efx, rx_buf); ++ goto done; ++ } ++ + /* Form an skb if required */ + if (rx_buf->page) { + int hdr_len = min(rx_buf->len, EFX_SKB_HEADERS); +Index: head-2008-08-18/drivers/net/sfc/tx.c +=================================================================== +--- head-2008-08-18.orig/drivers/net/sfc/tx.c 2008-08-18 10:16:43.000000000 +0200 ++++ head-2008-08-18/drivers/net/sfc/tx.c 2008-08-18 10:16:46.000000000 +0200 +@@ -368,7 +368,21 @@ inline int efx_xmit(struct efx_nic *efx, + int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) + { + struct efx_nic *efx = net_dev->priv; +- return efx_xmit(efx, &efx->tx_queue[0], skb); ++ struct efx_tx_queue *tx_queue = &efx->tx_queue[0]; ++ enum efx_veto veto; ++ ++ /* See if driverlink wants to veto the packet. */ ++ veto = EFX_DL_CALLBACK(efx, tx_packet, skb); ++ if (unlikely(veto)) { ++ EFX_TRACE(efx, "TX queue %d packet vetoed by " ++ "driverlink %s driver\n", tx_queue->queue, ++ efx->dl_cb_dev.tx_packet->driver->name); ++ /* Free the skb; nothing else will do it */ ++ dev_kfree_skb_any(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ return efx_xmit(efx, tx_queue, skb); + } + + void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) diff --git a/src/patches/60006_sfc-resource-driver.patch1 b/src/patches/60006_sfc-resource-driver.patch1 new file mode 100644 index 000000000..29dac94b7 --- /dev/null +++ b/src/patches/60006_sfc-resource-driver.patch1 @@ -0,0 +1,15194 @@ +From: David Riddoch +# replaces http://xenbits.xensource.com/linux-2.6.18-xen.hg c/s 421: +# HG changeset patch +# User Keir Fraser +# Date 1203330569 0 +# Node ID e4dd072db2595c420bb21d9e835416f4fd543526 +# Parent fc90e9b2c12b316b5460ece28f013e6de881af1a +Subject: Solarflare: Resource driver. +References: FATE#303479 +Acked-by: jbeulich@novell.com + +Index: head-2008-07-15/drivers/net/sfc/Kconfig +=================================================================== +--- head-2008-07-15.orig/drivers/net/sfc/Kconfig 2008-07-17 16:17:36.000000000 +0200 ++++ head-2008-07-15/drivers/net/sfc/Kconfig 2008-07-17 16:18:07.000000000 +0200 +@@ -12,3 +12,9 @@ config SFC + + To compile this driver as a module, choose M here. The module + will be called sfc. ++ ++config SFC_RESOURCE ++ depends on SFC && X86 ++ tristate "Solarflare Solarstorm SFC4000 resource driver" ++ help ++ This module provides the SFC resource manager driver. +Index: head-2008-07-15/drivers/net/sfc/Makefile +=================================================================== +--- head-2008-07-15.orig/drivers/net/sfc/Makefile 2008-07-17 16:17:53.000000000 +0200 ++++ head-2008-07-15/drivers/net/sfc/Makefile 2008-07-17 16:18:07.000000000 +0200 +@@ -3,3 +3,5 @@ sfc-y += efx.o falcon.o tx.o rx.o falc + mdio_10g.o tenxpress.o boards.o sfe4001.o \ + driverlink.o + obj-$(CONFIG_SFC) += sfc.o ++ ++obj-$(CONFIG_SFC_RESOURCE) += sfc_resource/ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/Makefile +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/Makefile 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,14 @@ ++obj-$(CONFIG_SFC_RESOURCE) := sfc_resource.o ++ ++EXTRA_CFLAGS += -D__CI_HARDWARE_CONFIG_FALCON__ ++EXTRA_CFLAGS += -D__ci_driver__ ++EXTRA_CFLAGS += -Werror ++EXTRA_CFLAGS += -Idrivers/net/sfc -Idrivers/net/sfc/sfc_resource ++ ++sfc_resource-objs := resource_driver.o iopage.o efx_vi_shm.o \ ++ driverlink_new.o kernel_proc.o kfifo.o \ ++ nic.o eventq.o falcon.o falcon_hash.o \ ++ assert_valid.o buddy.o buffer_table.o filter_resource.o \ ++ iobufset_resource.o resource_manager.o resources.o \ ++ vi_resource_alloc.o vi_resource_event.o vi_resource_flush.o \ ++ vi_resource_manager.o driver_object.o kernel_compat.o +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/assert_valid.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/assert_valid.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,92 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains functions to assert validness of resources and ++ * resource manager in DEBUG build of the resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++ ++#ifndef NDEBUG ++#include ++#include ++#include ++ ++void ++efrm_resource_manager_assert_valid(struct efrm_resource_manager *rm, ++ const char *file, int line) ++{ ++ _EFRM_ASSERT(rm, file, line); ++ _EFRM_ASSERT(rm->rm_name, file, line); ++ _EFRM_ASSERT(rm->rm_type < EFRM_RESOURCE_NUM, file, line); ++ _EFRM_ASSERT(rm->rm_dtor, file, line); ++} ++EXPORT_SYMBOL(efrm_resource_manager_assert_valid); ++ ++/* ++ * \param rs resource to validate ++ * \param ref_count_is_zero One of 3 values ++ * > 0 - check ref count is zero ++ * = 0 - check ref count is non-zero ++ * < 0 - ref count could be any value ++ */ ++void ++efrm_resource_assert_valid(struct efrm_resource *rs, int ref_count_is_zero, ++ const char *file, int line) ++{ ++ struct efrm_resource_manager *rm; ++ ++ _EFRM_ASSERT(rs, file, line); ++ ++ if (ref_count_is_zero >= 0) { ++ if (!(ref_count_is_zero || rs->rs_ref_count > 0) ++ || !(!ref_count_is_zero || rs->rs_ref_count == 0)) ++ EFRM_WARN("%s: check %szero ref=%d " EFRM_RESOURCE_FMT, ++ __func__, ++ ref_count_is_zero == 0 ? "non-" : "", ++ rs->rs_ref_count, ++ EFRM_RESOURCE_PRI_ARG(rs->rs_handle)); ++ ++ _EFRM_ASSERT(!(ref_count_is_zero == 0) || ++ rs->rs_ref_count != 0, file, line); ++ _EFRM_ASSERT(!(ref_count_is_zero > 0) || ++ rs->rs_ref_count == 0, file, line); ++ } ++ ++ rm = efrm_rm_table[EFRM_RESOURCE_TYPE(rs->rs_handle)]; ++ efrm_resource_manager_assert_valid(rm, file, line); ++} ++EXPORT_SYMBOL(efrm_resource_assert_valid); ++ ++#endif +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/buddy.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/buddy.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,220 @@ ++ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains implementation of a buddy allocator. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include /* get uintXX types on win32 */ ++#include ++#include ++#include ++ ++#if 1 ++#define DEBUG_ALLOC(x) ++#else ++#define DEBUG_ALLOC(x) x ++ ++static inline void efrm_buddy_dump(struct efrm_buddy_allocator *b) ++{ ++ unsigned o; ++ ++ EFRM_NOTICE("%s: dump allocator with order %u", ++ __func__, b->order); ++ for (o = 0; o <= b->order; o++) { ++ struct list_head *l = &b->free_lists[o]; ++ while (l->next != &b->free_lists[o]) { ++ l = l->next; ++ EFRM_NOTICE("%s: order %x: %zx", __func__, o, ++ l - b->links); ++ } ++ } ++} ++#endif ++ ++/* ++ * The purpose of the following inline functions is to give the ++ * understandable names to the simple actions. ++ */ ++static inline void ++efrm_buddy_free_list_add(struct efrm_buddy_allocator *b, ++ unsigned order, unsigned addr) ++{ ++ list_add(&b->links[addr], &b->free_lists[order]); ++ b->orders[addr] = (uint8_t) order; ++} ++static inline void ++efrm_buddy_free_list_del(struct efrm_buddy_allocator *b, unsigned addr) ++{ ++ list_del(&b->links[addr]); ++ b->links[addr].next = NULL; ++} ++static inline int ++efrm_buddy_free_list_empty(struct efrm_buddy_allocator *b, unsigned order) ++{ ++ return list_empty(&b->free_lists[order]); ++} ++static inline unsigned ++efrm_buddy_free_list_pop(struct efrm_buddy_allocator *b, unsigned order) ++{ ++ struct list_head *l = list_pop(&b->free_lists[order]); ++ l->next = NULL; ++ return (unsigned)(l - b->links); ++} ++static inline int ++efrm_buddy_addr_in_free_list(struct efrm_buddy_allocator *b, unsigned addr) ++{ ++ return b->links[addr].next != NULL; ++} ++static inline unsigned ++efrm_buddy_free_list_first(struct efrm_buddy_allocator *b, unsigned order) ++{ ++ return (unsigned)(b->free_lists[order].next - b->links); ++} ++ ++int efrm_buddy_ctor(struct efrm_buddy_allocator *b, unsigned order) ++{ ++ unsigned o; ++ unsigned size = 1 << order; ++ ++ DEBUG_ALLOC(EFRM_NOTICE("%s(%u)", __func__, order)); ++ EFRM_ASSERT(b); ++ EFRM_ASSERT(order <= sizeof(unsigned) * 8 - 1); ++ ++ b->order = order; ++ b->free_lists = vmalloc((order + 1) * sizeof(struct list_head)); ++ if (b->free_lists == NULL) ++ goto fail1; ++ ++ b->links = vmalloc(size * sizeof(struct list_head)); ++ if (b->links == NULL) ++ goto fail2; ++ ++ b->orders = vmalloc(size); ++ if (b->orders == NULL) ++ goto fail3; ++ ++ memset(b->links, 0, size * sizeof(struct list_head)); ++ ++ for (o = 0; o <= b->order; ++o) ++ INIT_LIST_HEAD(b->free_lists + o); ++ ++ efrm_buddy_free_list_add(b, b->order, 0); ++ ++ return 0; ++ ++fail3: ++ vfree(b->links); ++fail2: ++ vfree(b->free_lists); ++fail1: ++ return -ENOMEM; ++} ++ ++void efrm_buddy_dtor(struct efrm_buddy_allocator *b) ++{ ++ EFRM_ASSERT(b); ++ ++ vfree(b->free_lists); ++ vfree(b->links); ++ vfree(b->orders); ++} ++ ++int efrm_buddy_alloc(struct efrm_buddy_allocator *b, unsigned order) ++{ ++ unsigned smallest; ++ unsigned addr; ++ ++ DEBUG_ALLOC(EFRM_NOTICE("%s(%u)", __func__, order)); ++ EFRM_ASSERT(b); ++ ++ /* Find smallest chunk that is big enough. ?? Can optimise this by ++ ** keeping array of pointers to smallest chunk for each order. ++ */ ++ smallest = order; ++ while (smallest <= b->order && ++ efrm_buddy_free_list_empty(b, smallest)) ++ ++smallest; ++ ++ if (smallest > b->order) { ++ DEBUG_ALLOC(EFRM_NOTICE ++ ("buddy - alloc order %d failed - max order %d", ++ order, b->order);); ++ return -ENOMEM; ++ } ++ ++ /* Split blocks until we get one of the correct size. */ ++ addr = efrm_buddy_free_list_pop(b, smallest); ++ ++ DEBUG_ALLOC(EFRM_NOTICE("buddy - alloc %x order %d cut from order %d", ++ addr, order, smallest);); ++ while (smallest-- > order) ++ efrm_buddy_free_list_add(b, smallest, addr + (1 << smallest)); ++ ++ EFRM_DO_DEBUG(b->orders[addr] = (uint8_t) order); ++ ++ EFRM_ASSERT(addr < 1u << b->order); ++ return addr; ++} ++ ++void ++efrm_buddy_free(struct efrm_buddy_allocator *b, unsigned addr, ++ unsigned order) ++{ ++ unsigned buddy_addr; ++ ++ DEBUG_ALLOC(EFRM_NOTICE("%s(%u, %u)", __func__, addr, order)); ++ EFRM_ASSERT(b); ++ EFRM_ASSERT(order <= b->order); ++ EFRM_ASSERT((unsigned long)addr + ((unsigned long)1 << order) <= ++ (unsigned long)1 << b->order); ++ EFRM_ASSERT(!efrm_buddy_addr_in_free_list(b, addr)); ++ EFRM_ASSERT(b->orders[addr] == order); ++ ++ /* merge free blocks */ ++ while (order < b->order) { ++ buddy_addr = addr ^ (1 << order); ++ if (!efrm_buddy_addr_in_free_list(b, buddy_addr) || ++ b->orders[buddy_addr] != order) ++ break; ++ efrm_buddy_free_list_del(b, buddy_addr); ++ if (buddy_addr < addr) ++ addr = buddy_addr; ++ ++order; ++ } ++ ++ DEBUG_ALLOC(EFRM_NOTICE ++ ("buddy - free %x merged into order %d", addr, order);); ++ efrm_buddy_free_list_add(b, order, addr); ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/buffer_table.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/buffer_table.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,209 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains abstraction of the buffer table on the NIC. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++/* ++** Might be worth keeping a bitmap of which entries are clear. Then we ++** wouldn't need to clear them all again when we free an allocation. ++*/ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/*! Comment? */ ++struct efrm_buffer_table { ++ spinlock_t lock; ++ struct efrm_buddy_allocator buddy; ++}; ++ ++/* Efab buffer state. */ ++static struct efrm_buffer_table efrm_buffers; ++ ++int efrm_buffer_table_ctor(unsigned low, unsigned high) ++{ ++ int log2_n_entries, rc, i; ++ ++ EFRM_ASSERT(high > 0); ++ EFRM_ASSERT(low < high); ++ ++ EFRM_TRACE("%s: low=%u high=%u", __func__, low, high); ++ EFRM_NOTICE("%s: low=%u high=%u", __func__, low, high); ++ ++ log2_n_entries = fls(high - 1); ++ ++ rc = efrm_buddy_ctor(&efrm_buffers.buddy, log2_n_entries); ++ if (rc < 0) { ++ EFRM_ERR("efrm_buffer_table_ctor: efrm_buddy_ctor(%d) " ++ "failed (%d)", log2_n_entries, rc); ++ return rc; ++ } ++ for (i = 0; i < (1 << log2_n_entries); ++i) { ++ rc = efrm_buddy_alloc(&efrm_buffers.buddy, 0); ++ EFRM_ASSERT(rc >= 0); ++ EFRM_ASSERT(rc < (1 << log2_n_entries)); ++ } ++ for (i = low; i < (int) high; ++i) ++ efrm_buddy_free(&efrm_buffers.buddy, i, 0); ++ ++ spin_lock_init(&efrm_buffers.lock); ++ ++ EFRM_TRACE("%s: done", __func__); ++ ++ return 0; ++} ++ ++void efrm_buffer_table_dtor(void) ++{ ++ /* ?? debug check that all allocations have been freed? */ ++ ++ spin_lock_destroy(&efrm_buffers.lock); ++ efrm_buddy_dtor(&efrm_buffers.buddy); ++ ++ EFRM_TRACE("%s: done", __func__); ++} ++ ++/**********************************************************************/ ++ ++int ++efrm_buffer_table_alloc(unsigned order, ++ struct efhw_buffer_table_allocation *a) ++{ ++ irq_flags_t lock_flags; ++ int rc; ++ ++ EFRM_ASSERT(&efrm_buffers.buddy); ++ EFRM_ASSERT(a); ++ ++ /* Round up to multiple of two, as the buffer clear logic works in ++ * pairs when not in "full" mode. */ ++ order = max_t(unsigned, order, 1); ++ ++ spin_lock_irqsave(&efrm_buffers.lock, lock_flags); ++ rc = efrm_buddy_alloc(&efrm_buffers.buddy, order); ++ spin_unlock_irqrestore(&efrm_buffers.lock, lock_flags); ++ ++ if (rc < 0) { ++ EFRM_ERR("efrm_buffer_table_alloc: failed (n=%ld) rc %d", ++ 1ul << order, rc); ++ return rc; ++ } ++ ++ EFRM_TRACE("efrm_buffer_table_alloc: base=%d n=%ld", ++ rc, 1ul << order); ++ a->order = order; ++ a->base = (unsigned)rc; ++ return 0; ++} ++ ++void efrm_buffer_table_free(struct efhw_buffer_table_allocation *a) ++{ ++ irq_flags_t lock_flags; ++ struct efhw_nic *nic; ++ int nic_i; ++ ++ EFRM_ASSERT(&efrm_buffers.buddy); ++ EFRM_ASSERT(a); ++ EFRM_ASSERT(a->base != -1); ++ EFRM_ASSERT((unsigned long)a->base + (1ul << a->order) <= ++ efrm_buddy_size(&efrm_buffers.buddy)); ++ ++ EFRM_TRACE("efrm_buffer_table_free: base=%d n=%ld", ++ a->base, (1ul << a->order)); ++ ++ EFRM_FOR_EACH_NIC(nic_i, nic) ++ efhw_nic_buffer_table_clear(nic, a->base, 1ul << a->order); ++ ++ spin_lock_irqsave(&efrm_buffers.lock, lock_flags); ++ efrm_buddy_free(&efrm_buffers.buddy, a->base, a->order); ++ spin_unlock_irqrestore(&efrm_buffers.lock, lock_flags); ++ ++ EFRM_DO_DEBUG(a->base = a->order = -1); ++} ++ ++/**********************************************************************/ ++ ++void ++efrm_buffer_table_set(struct efhw_buffer_table_allocation *a, ++ struct efhw_nic *nic, ++ unsigned i, dma_addr_t dma_addr, int owner) ++{ ++ EFRM_ASSERT(a); ++ EFRM_ASSERT(i < (unsigned)1 << a->order); ++ ++ efhw_nic_buffer_table_set(nic, dma_addr, EFHW_NIC_PAGE_SIZE, ++ 0, owner, a->base + i); ++} ++ ++ ++int efrm_buffer_table_size(void) ++{ ++ return efrm_buddy_size(&efrm_buffers.buddy); ++} ++ ++/**********************************************************************/ ++ ++int ++efrm_page_register(struct efhw_nic *nic, dma_addr_t dma_addr, int owner, ++ efhw_buffer_addr_t *buf_addr_out) ++{ ++ struct efhw_buffer_table_allocation alloc; ++ int rc; ++ ++ rc = efrm_buffer_table_alloc(0, &alloc); ++ if (rc == 0) { ++ efrm_buffer_table_set(&alloc, nic, 0, dma_addr, owner); ++ efrm_buffer_table_commit(); ++ *buf_addr_out = EFHW_BUFFER_ADDR(alloc.base, 0); ++ } ++ return rc; ++} ++EXPORT_SYMBOL(efrm_page_register); ++ ++void efrm_page_unregister(efhw_buffer_addr_t buf_addr) ++{ ++ struct efhw_buffer_table_allocation alloc; ++ ++ alloc.order = 0; ++ alloc.base = EFHW_BUFFER_PAGE(buf_addr); ++ efrm_buffer_table_free(&alloc); ++} ++EXPORT_SYMBOL(efrm_page_unregister); ++ ++void efrm_buffer_table_commit(void) ++{ ++ struct efhw_nic *nic; ++ int nic_i; ++ ++ EFRM_FOR_EACH_NIC(nic_i, nic) ++ efhw_nic_buffer_table_commit(nic); ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,188 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides EtherFabric NIC hardware interface. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_EFAB_HARDWARE_H__ ++#define __CI_DRIVER_EFAB_HARDWARE_H__ ++ ++#include "ci/driver/efab/hardware/workarounds.h" ++#include ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Common EtherFabric definitions ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#include ++#include ++#include ++ ++/*---------------------------------------------------------------------------- ++ * ++ * EtherFabric varients ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#include ++ ++/*---------------------------------------------------------------------------- ++ * ++ * EtherFabric Portable Hardware Layer defines ++ * ++ *---------------------------------------------------------------------------*/ ++ ++ /*-------------- Initialisation ------------ */ ++#define efhw_nic_close_hardware(nic) \ ++ ((nic)->efhw_func->close_hardware(nic)) ++ ++#define efhw_nic_init_hardware(nic, ev_handlers, mac_addr, non_irq_evq) \ ++ ((nic)->efhw_func->init_hardware((nic), (ev_handlers), (mac_addr), \ ++ (non_irq_evq))) ++ ++/*-------------- Interrupt support ------------ */ ++/** Handle interrupt. Return 0 if not handled, 1 if handled. */ ++#define efhw_nic_interrupt(nic) \ ++ ((nic)->efhw_func->interrupt(nic)) ++ ++#define efhw_nic_interrupt_enable(nic) \ ++ ((nic)->efhw_func->interrupt_enable(nic)) ++ ++#define efhw_nic_interrupt_disable(nic) \ ++ ((nic)->efhw_func->interrupt_disable(nic)) ++ ++#define efhw_nic_set_interrupt_moderation(nic, evq, val) \ ++ ((nic)->efhw_func->set_interrupt_moderation(nic, evq, val)) ++ ++/*-------------- Event support ------------ */ ++ ++#define efhw_nic_event_queue_enable(nic, evq, size, q_base, buf_base, \ ++ interrupting) \ ++ ((nic)->efhw_func->event_queue_enable((nic), (evq), (size), (q_base), \ ++ (buf_base), (interrupting))) ++ ++#define efhw_nic_event_queue_disable(nic, evq, timer_only) \ ++ ((nic)->efhw_func->event_queue_disable(nic, evq, timer_only)) ++ ++#define efhw_nic_wakeup_request(nic, q_base, index, evq) \ ++ ((nic)->efhw_func->wakeup_request(nic, q_base, index, evq)) ++ ++#define efhw_nic_sw_event(nic, data, ev) \ ++ ((nic)->efhw_func->sw_event(nic, data, ev)) ++ ++/*-------------- Filter support ------------ */ ++#define efhw_nic_ipfilter_set(nic, type, index, dmaq, \ ++ saddr, sport, daddr, dport) \ ++ ((nic)->efhw_func->ipfilter_set(nic, type, index, dmaq, \ ++ saddr, sport, daddr, dport)) ++ ++#define efhw_nic_ipfilter_clear(nic, index) \ ++ ((nic)->efhw_func->ipfilter_clear(nic, index)) ++ ++/*-------------- DMA support ------------ */ ++#define efhw_nic_dmaq_tx_q_init(nic, dmaq, evq, owner, tag, \ ++ dmaq_size, index, flags) \ ++ ((nic)->efhw_func->dmaq_tx_q_init(nic, dmaq, evq, owner, tag, \ ++ dmaq_size, index, flags)) ++ ++#define efhw_nic_dmaq_rx_q_init(nic, dmaq, evq, owner, tag, \ ++ dmaq_size, index, flags) \ ++ ((nic)->efhw_func->dmaq_rx_q_init(nic, dmaq, evq, owner, tag, \ ++ dmaq_size, index, flags)) ++ ++#define efhw_nic_dmaq_tx_q_disable(nic, dmaq) \ ++ ((nic)->efhw_func->dmaq_tx_q_disable(nic, dmaq)) ++ ++#define efhw_nic_dmaq_rx_q_disable(nic, dmaq) \ ++ ((nic)->efhw_func->dmaq_rx_q_disable(nic, dmaq)) ++ ++#define efhw_nic_flush_tx_dma_channel(nic, dmaq) \ ++ ((nic)->efhw_func->flush_tx_dma_channel(nic, dmaq)) ++ ++#define efhw_nic_flush_rx_dma_channel(nic, dmaq) \ ++ ((nic)->efhw_func->flush_rx_dma_channel(nic, dmaq)) ++ ++/*-------------- MAC Low level interface ---- */ ++#define efhw_gmac_get_mac_addr(nic) \ ++ ((nic)->gmac->get_mac_addr((nic)->gmac)) ++ ++/*-------------- Buffer table -------------- */ ++#define efhw_nic_buffer_table_set(nic, addr, bufsz, region, \ ++ own_id, buf_id) \ ++ ((nic)->efhw_func->buffer_table_set(nic, addr, bufsz, region, \ ++ own_id, buf_id)) ++ ++#define efhw_nic_buffer_table_set_n(nic, buf_id, addr, bufsz, \ ++ region, n_pages, own_id) \ ++ ((nic)->efhw_func->buffer_table_set_n(nic, buf_id, addr, bufsz, \ ++ region, n_pages, own_id)) ++ ++#define efhw_nic_buffer_table_clear(nic, id, num) \ ++ ((nic)->efhw_func->buffer_table_clear(nic, id, num)) ++ ++#define efhw_nic_buffer_table_commit(nic) \ ++ ((nic)->efhw_func->buffer_table_commit(nic)) ++ ++/*-------------- New filter API ------------ */ ++#define efhw_nic_filter_set(nic, spec, index_out) \ ++ ((nic)->efhw_func->filter_set(nic, spec, index_out)) ++ ++#define efhw_nic_filter_clear(nic, type, index_out) \ ++ ((nic)->efhw_func->filter_clear(nic, type, index_out)) ++ ++ ++/* --- DMA --- */ ++#define EFHW_DMA_ADDRMASK (0xffffffffffffffffULL) ++ ++/* --- Buffers --- */ ++#define EFHW_BUFFER_ADDR FALCON_BUFFER_4K_ADDR ++#define EFHW_BUFFER_PAGE FALCON_BUFFER_4K_PAGE ++#define EFHW_BUFFER_OFF FALCON_BUFFER_4K_OFF ++ ++/* --- Filters --- */ ++#define EFHW_IP_FILTER_NUM FALCON_FILTER_TBL_NUM ++ ++#define EFHW_MAX_PAGE_SIZE FALCON_MAX_PAGE_SIZE ++ ++#if PAGE_SIZE <= EFHW_MAX_PAGE_SIZE ++#define EFHW_NIC_PAGE_SIZE PAGE_SIZE ++#else ++#define EFHW_NIC_PAGE_SIZE EFHW_MAX_PAGE_SIZE ++#endif ++#define EFHW_NIC_PAGE_MASK (~(EFHW_NIC_PAGE_SIZE-1)) ++ ++#endif /* __CI_DRIVER_EFAB_HARDWARE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/common.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/common.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,68 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides EtherFabric NIC hardware interface common ++ * definitions. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_EFAB_HARDWARE_COMMON_H__ ++#define __CI_DRIVER_EFAB_HARDWARE_COMMON_H__ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * EtherFabric constants ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#define EFHW_1K 0x00000400u ++#define EFHW_2K 0x00000800u ++#define EFHW_4K 0x00001000u ++#define EFHW_8K 0x00002000u ++#define EFHW_16K 0x00004000u ++#define EFHW_32K 0x00008000u ++#define EFHW_64K 0x00010000u ++#define EFHW_128K 0x00020000u ++#define EFHW_256K 0x00040000u ++#define EFHW_512K 0x00080000u ++#define EFHW_1M 0x00100000u ++#define EFHW_2M 0x00200000u ++#define EFHW_4M 0x00400000u ++#define EFHW_8M 0x00800000u ++#define EFHW_16M 0x01000000u ++#define EFHW_32M 0x02000000u ++#define EFHW_48M 0x03000000u ++#define EFHW_64M 0x04000000u ++#define EFHW_128M 0x08000000u ++#define EFHW_256M 0x10000000u ++#define EFHW_512M 0x20000000u ++#define EFHW_1G 0x40000000u ++#define EFHW_2G 0x80000000u ++#define EFHW_4G 0x100000000ULL ++#define EFHW_8G 0x200000000ULL ++ ++#endif /* __CI_DRIVER_EFAB_HARDWARE_COMMON_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,422 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides EtherFabric NIC - EFXXXX (aka Falcon) specific ++ * definitions. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_EFAB_HARDWARE_FALCON_H__ ++#define __CI_DRIVER_EFAB_HARDWARE_FALCON_H__ ++ ++/*---------------------------------------------------------------------------- ++ * Compile options ++ *---------------------------------------------------------------------------*/ ++ ++/* Falcon has an 8K maximum page size. */ ++#define FALCON_MAX_PAGE_SIZE EFHW_8K ++ ++/* include the register definitions */ ++#include ++#include ++#include ++#include ++ ++#define FALCON_DMA_TX_DESC_BYTES 8 ++#define FALCON_DMA_RX_PHYS_DESC_BYTES 8 ++#define FALCON_DMA_RX_BUF_DESC_BYTES 4 ++ ++ ++/* ---- efhw_event_t helpers --- */ ++ ++#ifndef EFHW_IS_LITTLE_ENDIAN ++#error This needs lots of cpu_to_le64s() in ++#endif ++ ++/*!\ TODO look at whether there is an efficiency gain to be had by ++ treating the event codes to 32bit masks as is done for EF1 ++ ++ These masks apply to the full 64 bits of the event to extract the ++ event code - followed by the common event codes to expect ++ */ ++#define __FALCON_OPEN_MASK(WIDTH) ((((uint64_t)1) << (WIDTH)) - 1) ++#define FALCON_EVENT_CODE_MASK \ ++ (__FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN) ++#define FALCON_EVENT_EV_Q_ID_MASK \ ++ (__FALCON_OPEN_MASK(DRIVER_EV_EVQ_ID_WIDTH) << DRIVER_EV_EVQ_ID_LBN) ++#define FALCON_EVENT_TX_FLUSH_Q_ID_MASK \ ++ (__FALCON_OPEN_MASK(DRIVER_EV_TX_DESCQ_ID_WIDTH) << \ ++ DRIVER_EV_TX_DESCQ_ID_LBN) ++#define FALCON_EVENT_RX_FLUSH_Q_ID_MASK \ ++ (__FALCON_OPEN_MASK(DRIVER_EV_RX_DESCQ_ID_WIDTH) << \ ++ DRIVER_EV_RX_DESCQ_ID_LBN) ++#define FALCON_EVENT_DRV_SUBCODE_MASK \ ++ (__FALCON_OPEN_MASK(DRIVER_EV_SUB_CODE_WIDTH) << \ ++ DRIVER_EV_SUB_CODE_LBN) ++ ++#define FALCON_EVENT_FMT "[ev:%x:%08x:%08x]" ++#define FALCON_EVENT_PRI_ARG(e) \ ++ ((unsigned)(((e).u64 & FALCON_EVENT_CODE_MASK) >> EV_CODE_LBN)), \ ++ ((unsigned)((e).u64 >> 32)), ((unsigned)((e).u64 & 0xFFFFFFFF)) ++ ++#define FALCON_EVENT_CODE(evp) ((evp)->u64 & FALCON_EVENT_CODE_MASK) ++#define FALCON_EVENT_WAKE_EVQ_ID(evp) \ ++ (((evp)->u64 & FALCON_EVENT_EV_Q_ID_MASK) >> DRIVER_EV_EVQ_ID_LBN) ++#define FALCON_EVENT_TX_FLUSH_Q_ID(evp) \ ++ (((evp)->u64 & FALCON_EVENT_TX_FLUSH_Q_ID_MASK) >> \ ++ DRIVER_EV_TX_DESCQ_ID_LBN) ++#define FALCON_EVENT_RX_FLUSH_Q_ID(evp) \ ++ (((evp)->u64 & FALCON_EVENT_RX_FLUSH_Q_ID_MASK) >> \ ++ DRIVER_EV_RX_DESCQ_ID_LBN) ++#define FALCON_EVENT_DRIVER_SUBCODE(evp) \ ++ (((evp)->u64 & FALCON_EVENT_DRV_SUBCODE_MASK) >> \ ++ DRIVER_EV_SUB_CODE_LBN) ++ ++#define FALCON_EVENT_CODE_CHAR ((uint64_t)DRIVER_EV_DECODE << EV_CODE_LBN) ++#define FALCON_EVENT_CODE_SW ((uint64_t)DRV_GEN_EV_DECODE << EV_CODE_LBN) ++ ++ ++/* so this is the size in bytes of an awful lot of things */ ++#define FALCON_REGISTER128 (16) ++ ++/* we define some unique dummy values as a debug aid */ ++#ifdef _WIN32 ++#define FALCON_ATOMIC_BASE 0xdeadbeef00000000ui64 ++#else ++#define FALCON_ATOMIC_BASE 0xdeadbeef00000000ULL ++#endif ++#define FALCON_ATOMIC_UPD_REG (FALCON_ATOMIC_BASE | 0x1) ++#define FALCON_ATOMIC_PTR_TBL_REG (FALCON_ATOMIC_BASE | 0x2) ++#define FALCON_ATOMIC_SRPM_UDP_EVQ_REG (FALCON_ATOMIC_BASE | 0x3) ++#define FALCON_ATOMIC_RX_FLUSH_DESCQ (FALCON_ATOMIC_BASE | 0x4) ++#define FALCON_ATOMIC_TX_FLUSH_DESCQ (FALCON_ATOMIC_BASE | 0x5) ++#define FALCON_ATOMIC_INT_EN_REG (FALCON_ATOMIC_BASE | 0x6) ++#define FALCON_ATOMIC_TIMER_CMD_REG (FALCON_ATOMIC_BASE | 0x7) ++#define FALCON_ATOMIC_PACE_REG (FALCON_ATOMIC_BASE | 0x8) ++#define FALCON_ATOMIC_INT_ACK_REG (FALCON_ATOMIC_BASE | 0x9) ++/* XXX It crashed with odd value in FALCON_ATOMIC_INT_ADR_REG */ ++#define FALCON_ATOMIC_INT_ADR_REG (FALCON_ATOMIC_BASE | 0xa) ++ ++/*---------------------------------------------------------------------------- ++ * ++ * PCI control blocks for Falcon - ++ * (P) primary is for NET ++ * (S) secondary is for CHAR ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#define FALCON_P_CTR_AP_BAR 2 ++#define FALCON_S_CTR_AP_BAR 0 ++#define FALCON_S_DEVID 0x6703 ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Falcon constants ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/* Note: the following constants have moved to values in struct efhw_nic: ++ * FALCON_EVQ_TBL_NUM -> nic->num_evqs ++ * FALCON_DMAQ_NUM -> nic->num_dmaqs ++ * FALCON_TIMERS_NUM -> nic->num_times ++ * These replacement constants are used as sanity checks in assertions in ++ * certain functions that don't have access to struct efhw_nic. ++ */ ++#define FALCON_DMAQ_NUM_SANITY (EFHW_4K) ++#define FALCON_EVQ_TBL_NUM_SANITY (EFHW_4K) ++#define FALCON_TIMERS_NUM_SANITY (EFHW_4K) ++ ++/* This value is an upper limit on the total number of filter table ++ * entries. The actual size of filter table is determined at runtime, as ++ * it can vary. ++ */ ++#define FALCON_FILTER_TBL_NUM (EFHW_8K) ++ ++/* max number of buffers which can be pushed before commiting */ ++#define FALCON_BUFFER_UPD_MAX (128) ++ ++/* We can tell falcon to write its RX buffers in 32 byte quantums, ++ and since we pad packets 2 bytes to the right we can't use ++ a full page (not unless we use jumbo mode for all queues) ++ ++ NOTE: tests/nic/dma.c assumes that the value here is the real NIC ++ value, so we explicitly round it down to the nearest 32 bytes */ ++ ++/* #define FALCON_RX_USR_BUF_SIZE round_down(4096-2,32) */ ++#define FALCON_RX_USR_BUF_SIZE 4064 ++ ++#define FALCON_EVQ_RPTR_REG_P0 0x400 ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Falcon requires user-space descriptor pushes to be: ++ * dword[0-2]; wiob(); dword[3] ++ * ++ * Driver register access must be locked against other threads from ++ * the same driver but can be in any order: i.e dword[0-3]; wiob() ++ * ++ * The following helpers ensure that valid dword orderings are exercised ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/* A union to allow writting 64bit values as 32bit values, without ++ * hitting the compilers aliasing rules. We hope the compiler optimises ++ * away the copy's anyway */ ++union __u64to32 { ++ uint64_t u64; ++ struct { ++#ifdef EFHW_IS_LITTLE_ENDIAN ++ uint32_t a; ++ uint32_t b; ++#else ++ uint32_t b; ++ uint32_t a; ++#endif ++ } s; ++}; ++ ++static inline void ++falcon_write_ddd_d(volatile char __iomem *kva, ++ uint32_t d0, uint32_t d1, uint32_t d2, uint32_t d3) ++{ ++ writel(d0, kva + 0); ++ writel(d1, kva + 4); ++ writel(d2, kva + 8); ++ mmiowb(); ++ writel(d3, kva + 12); ++} ++ ++static inline void falcon_write_q(volatile char __iomem *kva, uint64_t q) ++{ ++ union __u64to32 u; ++ u.u64 = q; ++ ++ writel(u.s.a, kva); ++ mmiowb(); ++ writel(u.s.b, kva + 4); ++} ++ ++static inline void falcon_read_q(volatile char __iomem *addr, uint64_t *q0) ++{ ++ /* It is essential that we read dword0 first, so that ++ * the shadow register is updated with the latest value ++ * and we get a self consistent value. ++ */ ++ union __u64to32 u; ++ u.s.a = readl(addr); ++ rmb(); ++ u.s.b = readl(addr + 4); ++ ++ *q0 = u.u64; ++} ++ ++static inline void ++falcon_write_qq(volatile char __iomem *kva, uint64_t q0, uint64_t q1) ++{ ++ writeq(q0, kva + 0); ++ falcon_write_q(kva + 8, q1); ++} ++ ++static inline void ++falcon_read_qq(volatile char __iomem *addr, uint64_t *q0, uint64_t *q1) ++{ ++ falcon_read_q(addr, q0); ++ *q1 = readq(addr + 8); ++} ++ ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Buffer virtual addresses (4K buffers) ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/* Form a buffer virtual address from buffer ID and offset. If the offset ++** is larger than the buffer size, then the buffer indexed will be ++** calculated appropriately. It is the responsibility of the caller to ++** ensure that they have valid buffers programmed at that address. ++*/ ++#define FALCON_VADDR_8K_S (13) ++#define FALCON_VADDR_4K_S (12) ++#define FALCON_VADDR_M 0xfffff /* post shift mask */ ++ ++#define FALCON_BUFFER_8K_ADDR(id, off) (((id) << FALCON_VADDR_8K_S) + (off)) ++#define FALCON_BUFFER_8K_PAGE(vaddr) \ ++ (((vaddr) >> FALCON_VADDR_8K_S) & FALCON_VADDR_M) ++#define FALCON_BUFFER_8K_OFF(vaddr) \ ++ ((vaddr) & __FALCON_MASK32(FALCON_VADDR_8K_S)) ++ ++#define FALCON_BUFFER_4K_ADDR(id, off) (((id) << FALCON_VADDR_4K_S) + (off)) ++#define FALCON_BUFFER_4K_PAGE(vaddr) \ ++ (((vaddr) >> FALCON_VADDR_4K_S) & FALCON_VADDR_M) ++#define FALCON_BUFFER_4K_OFF(vaddr) \ ++ ((vaddr) & __FALCON_MASK32(FALCON_VADDR_4K_S)) ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Timer helpers ++ * ++ *---------------------------------------------------------------------------*/ ++ ++static inline int falcon_timer_page_addr(uint idx) ++{ ++ ++ EFHW_ASSERT(TIMER_CMD_REG_KER_OFST == ++ (TIMER_CMD_REG_PAGE4_OFST - 4 * EFHW_8K)); ++ ++ EFHW_ASSERT(idx < FALCON_TIMERS_NUM_SANITY); ++ ++ if (idx < 4) ++ return TIMER_CMD_REG_KER_OFST + (idx * EFHW_8K); ++ else if (idx < 1024) ++ return TIMER_CMD_REG_PAGE4_OFST + ((idx - 4) * EFHW_8K); ++ else ++ return TIMER_CMD_REG_PAGE123K_OFST + ((idx - 1024) * EFHW_8K); ++} ++ ++#define FALCON_TIMER_PAGE_MASK (EFHW_8K-1) ++ ++static inline int falcon_timer_page_offset(uint idx) ++{ ++ return falcon_timer_page_addr(idx) & FALCON_TIMER_PAGE_MASK; ++} ++ ++/*---------------------------------------------------------------------------- ++ * ++ * DMA Queue helpers ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/* iSCSI queue for A1; see bug 5427 for more details. */ ++#define FALCON_A1_ISCSI_DMAQ 4 ++ ++/*! returns an address within a bar of the TX DMA doorbell */ ++static inline uint falcon_tx_dma_page_addr(uint dmaq_idx) ++{ ++ uint page; ++ ++ EFHW_ASSERT((((TX_DESC_UPD_REG_PAGE123K_OFST) & (EFHW_8K - 1)) == ++ (((TX_DESC_UPD_REG_PAGE4_OFST) & (EFHW_8K - 1))))); ++ ++ EFHW_ASSERT(dmaq_idx < FALCON_DMAQ_NUM_SANITY); ++ ++ if (dmaq_idx < 1024) ++ page = TX_DESC_UPD_REG_PAGE4_OFST + ((dmaq_idx - 4) * EFHW_8K); ++ else ++ page = ++ TX_DESC_UPD_REG_PAGE123K_OFST + ++ ((dmaq_idx - 1024) * EFHW_8K); ++ ++ return page; ++} ++ ++/*! returns an address within a bar of the RX DMA doorbell */ ++static inline uint falcon_rx_dma_page_addr(uint dmaq_idx) ++{ ++ uint page; ++ ++ EFHW_ASSERT((((RX_DESC_UPD_REG_PAGE123K_OFST) & (EFHW_8K - 1)) == ++ ((RX_DESC_UPD_REG_PAGE4_OFST) & (EFHW_8K - 1)))); ++ ++ EFHW_ASSERT(dmaq_idx < FALCON_DMAQ_NUM_SANITY); ++ ++ if (dmaq_idx < 1024) ++ page = RX_DESC_UPD_REG_PAGE4_OFST + ((dmaq_idx - 4) * EFHW_8K); ++ else ++ page = ++ RX_DESC_UPD_REG_PAGE123K_OFST + ++ ((dmaq_idx - 1024) * EFHW_8K); ++ ++ return page; ++} ++ ++/*! "page"=NIC-dependent register set size */ ++#define FALCON_DMA_PAGE_MASK (EFHW_8K-1) ++ ++/*! returns an address within a bar of the start of the "page" ++ containing the TX DMA doorbell */ ++static inline int falcon_tx_dma_page_base(uint dma_idx) ++{ ++ return falcon_tx_dma_page_addr(dma_idx) & ~FALCON_DMA_PAGE_MASK; ++} ++ ++/*! returns an address within a bar of the start of the "page" ++ containing the RX DMA doorbell */ ++static inline int falcon_rx_dma_page_base(uint dma_idx) ++{ ++ return falcon_rx_dma_page_addr(dma_idx) & ~FALCON_DMA_PAGE_MASK; ++} ++ ++/*! returns an offset within a "page" of the TX DMA doorbell */ ++static inline int falcon_tx_dma_page_offset(uint dma_idx) ++{ ++ return falcon_tx_dma_page_addr(dma_idx) & FALCON_DMA_PAGE_MASK; ++} ++ ++/*! returns an offset within a "page" of the RX DMA doorbell */ ++static inline int falcon_rx_dma_page_offset(uint dma_idx) ++{ ++ return falcon_rx_dma_page_addr(dma_idx) & FALCON_DMA_PAGE_MASK; ++} ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Events ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/* Falcon nails down the event queue mappings */ ++#define FALCON_EVQ_KERNEL0 (0) /* hardwired for net driver */ ++#define FALCON_EVQ_CHAR (4) /* char driver's event queue */ ++ ++/* reserved by the drivers */ ++#define FALCON_EVQ_TBL_RESERVED (8) ++ ++/* default DMA-Q sizes */ ++#define FALCON_DMA_Q_DEFAULT_TX_SIZE 512 ++ ++#define FALCON_DMA_Q_DEFAULT_RX_SIZE 512 ++ ++#define FALCON_DMA_Q_DEFAULT_MMAP \ ++ (FALCON_DMA_Q_DEFAULT_TX_SIZE * (FALCON_DMA_TX_DESC_BYTES * 2)) ++ ++/*---------------------------------------------------------------------------- ++ * ++ * DEBUG - Analyser trigger ++ * ++ *---------------------------------------------------------------------------*/ ++ ++static inline void ++falcon_deadbeef(volatile char __iomem *efhw_kva, unsigned what) ++{ ++ writel(what, efhw_kva + 0x300); ++ mmiowb(); ++} ++#endif /* __CI_DRIVER_EFAB_HARDWARE_FALCON_H__ */ ++/*! \cidoxg_end */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_core.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_core.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,1147 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides EtherFabric NIC - EFXXXX (aka Falcon) core register ++ * definitions. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#define FALCON_EXTENDED_P_BAR 1 ++ ++/*************---- Bus Interface Unit Registers C Header ----*************/ ++#define IOM_IND_ADR_REG_OFST 0x0 /* IO-mapped indirect access address ++ register */ ++ #define IOM_AUTO_ADR_INC_EN_LBN 16 ++ #define IOM_AUTO_ADR_INC_EN_WIDTH 1 ++ #define IOM_IND_ADR_LBN 0 ++ #define IOM_IND_ADR_WIDTH 16 ++#define IOM_IND_DAT_REG_OFST 0x4 /* IO-mapped indirect access data register */ ++ #define IOM_IND_DAT_LBN 0 ++ #define IOM_IND_DAT_WIDTH 32 ++#define ADR_REGION_REG_KER_OFST 0x0 /* Address region register */ ++#define ADR_REGION_REG_OFST 0x0 /* Address region register */ ++ #define ADR_REGION3_LBN 96 ++ #define ADR_REGION3_WIDTH 18 ++ #define ADR_REGION2_LBN 64 ++ #define ADR_REGION2_WIDTH 18 ++ #define ADR_REGION1_LBN 32 ++ #define ADR_REGION1_WIDTH 18 ++ #define ADR_REGION0_LBN 0 ++ #define ADR_REGION0_WIDTH 18 ++#define INT_EN_REG_KER_OFST 0x10 /* Kernel driver Interrupt enable register */ ++ #define KER_INT_CHAR_LBN 4 ++ #define KER_INT_CHAR_WIDTH 1 ++ #define KER_INT_KER_LBN 3 ++ #define KER_INT_KER_WIDTH 1 ++ #define ILL_ADR_ERR_INT_EN_KER_LBN 2 ++ #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1 ++ #define SRM_PERR_INT_EN_KER_LBN 1 ++ #define SRM_PERR_INT_EN_KER_WIDTH 1 ++ #define DRV_INT_EN_KER_LBN 0 ++ #define DRV_INT_EN_KER_WIDTH 1 ++#define INT_EN_REG_CHAR_OFST 0x20 /* Char Driver interrupt enable register */ ++ #define CHAR_INT_CHAR_LBN 4 ++ #define CHAR_INT_CHAR_WIDTH 1 ++ #define CHAR_INT_KER_LBN 3 ++ #define CHAR_INT_KER_WIDTH 1 ++ #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2 ++ #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1 ++ #define SRM_PERR_INT_EN_CHAR_LBN 1 ++ #define SRM_PERR_INT_EN_CHAR_WIDTH 1 ++ #define DRV_INT_EN_CHAR_LBN 0 ++ #define DRV_INT_EN_CHAR_WIDTH 1 ++#define INT_ADR_REG_KER_OFST 0x30 /* Interrupt host address for Kernel driver */ ++ #define INT_ADR_KER_LBN 0 ++ #define INT_ADR_KER_WIDTH 64 ++ #define DRV_INT_KER_LBN 32 ++ #define DRV_INT_KER_WIDTH 1 ++ #define EV_FF_HALF_INT_KER_LBN 3 ++ #define EV_FF_HALF_INT_KER_WIDTH 1 ++ #define EV_FF_FULL_INT_KER_LBN 2 ++ #define EV_FF_FULL_INT_KER_WIDTH 1 ++ #define ILL_ADR_ERR_INT_KER_LBN 1 ++ #define ILL_ADR_ERR_INT_KER_WIDTH 1 ++ #define SRAM_PERR_INT_KER_LBN 0 ++ #define SRAM_PERR_INT_KER_WIDTH 1 ++#define INT_ADR_REG_CHAR_OFST 0x40 /* Interrupt host address for Char driver */ ++ #define INT_ADR_CHAR_LBN 0 ++ #define INT_ADR_CHAR_WIDTH 64 ++ #define DRV_INT_CHAR_LBN 32 ++ #define DRV_INT_CHAR_WIDTH 1 ++ #define EV_FF_HALF_INT_CHAR_LBN 3 ++ #define EV_FF_HALF_INT_CHAR_WIDTH 1 ++ #define EV_FF_FULL_INT_CHAR_LBN 2 ++ #define EV_FF_FULL_INT_CHAR_WIDTH 1 ++ #define ILL_ADR_ERR_INT_CHAR_LBN 1 ++ #define ILL_ADR_ERR_INT_CHAR_WIDTH 1 ++ #define SRAM_PERR_INT_CHAR_LBN 0 ++ #define SRAM_PERR_INT_CHAR_WIDTH 1 ++#define INT_ISR0_B0_OFST 0x90 /* B0 only */ ++#define INT_ISR1_B0_OFST 0xA0 ++#define INT_ACK_REG_KER_A1_OFST 0x50 /* Kernel interrupt acknowledge register */ ++ #define RESERVED_LBN 0 ++ #define RESERVED_WIDTH 32 ++#define INT_ACK_REG_CHAR_A1_OFST 0x60 /* CHAR interrupt acknowledge register */ ++ #define RESERVED_LBN 0 ++ #define RESERVED_WIDTH 32 ++/*************---- Global CSR Registers C Header ----*************/ ++#define NIC_STAT_REG_KER_OFST 0x200 /* ASIC strap status register */ ++#define NIC_STAT_REG_OFST 0x200 /* ASIC strap status register */ ++ #define ONCHIP_SRAM_LBN 16 ++ #define ONCHIP_SRAM_WIDTH 0 ++ #define STRAP_PINS_LBN 0 ++ #define STRAP_PINS_WIDTH 3 ++#define GPIO_CTL_REG_KER_OFST 0x210 /* GPIO control register */ ++#define GPIO_CTL_REG_OFST 0x210 /* GPIO control register */ ++ #define GPIO_OEN_LBN 24 ++ #define GPIO_OEN_WIDTH 4 ++ #define GPIO_OUT_LBN 16 ++ #define GPIO_OUT_WIDTH 4 ++ #define GPIO_IN_LBN 8 ++ #define GPIO_IN_WIDTH 4 ++ #define GPIO_PWRUP_VALUE_LBN 0 ++ #define GPIO_PWRUP_VALUE_WIDTH 4 ++#define GLB_CTL_REG_KER_OFST 0x220 /* Global control register */ ++#define GLB_CTL_REG_OFST 0x220 /* Global control register */ ++ #define SWRST_LBN 0 ++ #define SWRST_WIDTH 1 ++#define FATAL_INTR_REG_KER_OFST 0x230 /* Fatal interrupt register for Kernel */ ++ #define PCI_BUSERR_INT_KER_EN_LBN 43 ++ #define PCI_BUSERR_INT_KER_EN_WIDTH 1 ++ #define SRAM_OOB_INT_KER_EN_LBN 42 ++ #define SRAM_OOB_INT_KER_EN_WIDTH 1 ++ #define BUFID_OOB_INT_KER_EN_LBN 41 ++ #define BUFID_OOB_INT_KER_EN_WIDTH 1 ++ #define MEM_PERR_INT_KER_EN_LBN 40 ++ #define MEM_PERR_INT_KER_EN_WIDTH 1 ++ #define RBUF_OWN_INT_KER_EN_LBN 39 ++ #define RBUF_OWN_INT_KER_EN_WIDTH 1 ++ #define TBUF_OWN_INT_KER_EN_LBN 38 ++ #define TBUF_OWN_INT_KER_EN_WIDTH 1 ++ #define RDESCQ_OWN_INT_KER_EN_LBN 37 ++ #define RDESCQ_OWN_INT_KER_EN_WIDTH 1 ++ #define TDESCQ_OWN_INT_KER_EN_LBN 36 ++ #define TDESCQ_OWN_INT_KER_EN_WIDTH 1 ++ #define EVQ_OWN_INT_KER_EN_LBN 35 ++ #define EVQ_OWN_INT_KER_EN_WIDTH 1 ++ #define EVFF_OFLO_INT_KER_EN_LBN 34 ++ #define EVFF_OFLO_INT_KER_EN_WIDTH 1 ++ #define ILL_ADR_INT_KER_EN_LBN 33 ++ #define ILL_ADR_INT_KER_EN_WIDTH 1 ++ #define SRM_PERR_INT_KER_EN_LBN 32 ++ #define SRM_PERR_INT_KER_EN_WIDTH 1 ++ #define PCI_BUSERR_INT_KER_LBN 11 ++ #define PCI_BUSERR_INT_KER_WIDTH 1 ++ #define SRAM_OOB_INT_KER_LBN 10 ++ #define SRAM_OOB_INT_KER_WIDTH 1 ++ #define BUFID_OOB_INT_KER_LBN 9 ++ #define BUFID_OOB_INT_KER_WIDTH 1 ++ #define MEM_PERR_INT_KER_LBN 8 ++ #define MEM_PERR_INT_KER_WIDTH 1 ++ #define RBUF_OWN_INT_KER_LBN 7 ++ #define RBUF_OWN_INT_KER_WIDTH 1 ++ #define TBUF_OWN_INT_KER_LBN 6 ++ #define TBUF_OWN_INT_KER_WIDTH 1 ++ #define RDESCQ_OWN_INT_KER_LBN 5 ++ #define RDESCQ_OWN_INT_KER_WIDTH 1 ++ #define TDESCQ_OWN_INT_KER_LBN 4 ++ #define TDESCQ_OWN_INT_KER_WIDTH 1 ++ #define EVQ_OWN_INT_KER_LBN 3 ++ #define EVQ_OWN_INT_KER_WIDTH 1 ++ #define EVFF_OFLO_INT_KER_LBN 2 ++ #define EVFF_OFLO_INT_KER_WIDTH 1 ++ #define ILL_ADR_INT_KER_LBN 1 ++ #define ILL_ADR_INT_KER_WIDTH 1 ++ #define SRM_PERR_INT_KER_LBN 0 ++ #define SRM_PERR_INT_KER_WIDTH 1 ++#define FATAL_INTR_REG_OFST 0x240 /* Fatal interrupt register for Char */ ++ #define PCI_BUSERR_INT_CHAR_EN_LBN 43 ++ #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1 ++ #define SRAM_OOB_INT_CHAR_EN_LBN 42 ++ #define SRAM_OOB_INT_CHAR_EN_WIDTH 1 ++ #define BUFID_OOB_INT_CHAR_EN_LBN 41 ++ #define BUFID_OOB_INT_CHAR_EN_WIDTH 1 ++ #define MEM_PERR_INT_CHAR_EN_LBN 40 ++ #define MEM_PERR_INT_CHAR_EN_WIDTH 1 ++ #define RBUF_OWN_INT_CHAR_EN_LBN 39 ++ #define RBUF_OWN_INT_CHAR_EN_WIDTH 1 ++ #define TBUF_OWN_INT_CHAR_EN_LBN 38 ++ #define TBUF_OWN_INT_CHAR_EN_WIDTH 1 ++ #define RDESCQ_OWN_INT_CHAR_EN_LBN 37 ++ #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1 ++ #define TDESCQ_OWN_INT_CHAR_EN_LBN 36 ++ #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1 ++ #define EVQ_OWN_INT_CHAR_EN_LBN 35 ++ #define EVQ_OWN_INT_CHAR_EN_WIDTH 1 ++ #define EVFF_OFLO_INT_CHAR_EN_LBN 34 ++ #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1 ++ #define ILL_ADR_INT_CHAR_EN_LBN 33 ++ #define ILL_ADR_INT_CHAR_EN_WIDTH 1 ++ #define SRM_PERR_INT_CHAR_EN_LBN 32 ++ #define SRM_PERR_INT_CHAR_EN_WIDTH 1 ++ #define FATAL_INTR_REG_EN_BITS 0xffffffffffffffffULL ++ #define PCI_BUSERR_INT_CHAR_LBN 11 ++ #define PCI_BUSERR_INT_CHAR_WIDTH 1 ++ #define SRAM_OOB_INT_CHAR_LBN 10 ++ #define SRAM_OOB_INT_CHAR_WIDTH 1 ++ #define BUFID_OOB_INT_CHAR_LBN 9 ++ #define BUFID_OOB_INT_CHAR_WIDTH 1 ++ #define MEM_PERR_INT_CHAR_LBN 8 ++ #define MEM_PERR_INT_CHAR_WIDTH 1 ++ #define RBUF_OWN_INT_CHAR_LBN 7 ++ #define RBUF_OWN_INT_CHAR_WIDTH 1 ++ #define TBUF_OWN_INT_CHAR_LBN 6 ++ #define TBUF_OWN_INT_CHAR_WIDTH 1 ++ #define RDESCQ_OWN_INT_CHAR_LBN 5 ++ #define RDESCQ_OWN_INT_CHAR_WIDTH 1 ++ #define TDESCQ_OWN_INT_CHAR_LBN 4 ++ #define TDESCQ_OWN_INT_CHAR_WIDTH 1 ++ #define EVQ_OWN_INT_CHAR_LBN 3 ++ #define EVQ_OWN_INT_CHAR_WIDTH 1 ++ #define EVFF_OFLO_INT_CHAR_LBN 2 ++ #define EVFF_OFLO_INT_CHAR_WIDTH 1 ++ #define ILL_ADR_INT_CHAR_LBN 1 ++ #define ILL_ADR_INT_CHAR_WIDTH 1 ++ #define SRM_PERR_INT_CHAR_LBN 0 ++ #define SRM_PERR_INT_CHAR_WIDTH 1 ++#define DP_CTRL_REG_OFST 0x250 /* Datapath control register */ ++ #define FLS_EVQ_ID_LBN 0 ++ #define FLS_EVQ_ID_WIDTH 12 ++#define MEM_STAT_REG_KER_OFST 0x260 /* Memory status register */ ++#define MEM_STAT_REG_OFST 0x260 /* Memory status register */ ++ #define MEM_PERR_VEC_LBN 53 ++ #define MEM_PERR_VEC_WIDTH 38 ++ #define MBIST_CORR_LBN 38 ++ #define MBIST_CORR_WIDTH 15 ++ #define MBIST_ERR_LBN 0 ++ #define MBIST_ERR_WIDTH 38 ++#define DEBUG_REG_KER_OFST 0x270 /* Debug register */ ++#define DEBUG_REG_OFST 0x270 /* Debug register */ ++ #define DEBUG_BLK_SEL2_LBN 47 ++ #define DEBUG_BLK_SEL2_WIDTH 3 ++ #define DEBUG_BLK_SEL1_LBN 44 ++ #define DEBUG_BLK_SEL1_WIDTH 3 ++ #define DEBUG_BLK_SEL0_LBN 41 ++ #define DEBUG_BLK_SEL0_WIDTH 3 ++ #define MISC_DEBUG_ADDR_LBN 36 ++ #define MISC_DEBUG_ADDR_WIDTH 5 ++ #define SERDES_DEBUG_ADDR_LBN 31 ++ #define SERDES_DEBUG_ADDR_WIDTH 5 ++ #define EM_DEBUG_ADDR_LBN 26 ++ #define EM_DEBUG_ADDR_WIDTH 5 ++ #define SR_DEBUG_ADDR_LBN 21 ++ #define SR_DEBUG_ADDR_WIDTH 5 ++ #define EV_DEBUG_ADDR_LBN 16 ++ #define EV_DEBUG_ADDR_WIDTH 5 ++ #define RX_DEBUG_ADDR_LBN 11 ++ #define RX_DEBUG_ADDR_WIDTH 5 ++ #define TX_DEBUG_ADDR_LBN 6 ++ #define TX_DEBUG_ADDR_WIDTH 5 ++ #define BIU_DEBUG_ADDR_LBN 1 ++ #define BIU_DEBUG_ADDR_WIDTH 5 ++ #define DEBUG_EN_LBN 0 ++ #define DEBUG_EN_WIDTH 1 ++#define DRIVER_REG0_KER_OFST 0x280 /* Driver scratch register 0 */ ++#define DRIVER_REG0_OFST 0x280 /* Driver scratch register 0 */ ++ #define DRIVER_DW0_LBN 0 ++ #define DRIVER_DW0_WIDTH 32 ++#define DRIVER_REG1_KER_OFST 0x290 /* Driver scratch register 1 */ ++#define DRIVER_REG1_OFST 0x290 /* Driver scratch register 1 */ ++ #define DRIVER_DW1_LBN 0 ++ #define DRIVER_DW1_WIDTH 32 ++#define DRIVER_REG2_KER_OFST 0x2A0 /* Driver scratch register 2 */ ++#define DRIVER_REG2_OFST 0x2A0 /* Driver scratch register 2 */ ++ #define DRIVER_DW2_LBN 0 ++ #define DRIVER_DW2_WIDTH 32 ++#define DRIVER_REG3_KER_OFST 0x2B0 /* Driver scratch register 3 */ ++#define DRIVER_REG3_OFST 0x2B0 /* Driver scratch register 3 */ ++ #define DRIVER_DW3_LBN 0 ++ #define DRIVER_DW3_WIDTH 32 ++#define DRIVER_REG4_KER_OFST 0x2C0 /* Driver scratch register 4 */ ++#define DRIVER_REG4_OFST 0x2C0 /* Driver scratch register 4 */ ++ #define DRIVER_DW4_LBN 0 ++ #define DRIVER_DW4_WIDTH 32 ++#define DRIVER_REG5_KER_OFST 0x2D0 /* Driver scratch register 5 */ ++#define DRIVER_REG5_OFST 0x2D0 /* Driver scratch register 5 */ ++ #define DRIVER_DW5_LBN 0 ++ #define DRIVER_DW5_WIDTH 32 ++#define DRIVER_REG6_KER_OFST 0x2E0 /* Driver scratch register 6 */ ++#define DRIVER_REG6_OFST 0x2E0 /* Driver scratch register 6 */ ++ #define DRIVER_DW6_LBN 0 ++ #define DRIVER_DW6_WIDTH 32 ++#define DRIVER_REG7_KER_OFST 0x2F0 /* Driver scratch register 7 */ ++#define DRIVER_REG7_OFST 0x2F0 /* Driver scratch register 7 */ ++ #define DRIVER_DW7_LBN 0 ++ #define DRIVER_DW7_WIDTH 32 ++#define ALTERA_BUILD_REG_OFST 0x300 /* Altera build register */ ++#define ALTERA_BUILD_REG_OFST 0x300 /* Altera build register */ ++ #define ALTERA_BUILD_VER_LBN 0 ++ #define ALTERA_BUILD_VER_WIDTH 32 ++ ++/* so called CSR spare register ++ - contains separate parity enable bits for the various internal memory ++ blocks */ ++#define MEM_PARITY_ERR_EN_REG_KER 0x310 ++#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64 ++#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38 ++#define MEM_PARITY_TX_DATA_EN_LBN 72 ++#define MEM_PARITY_TX_DATA_EN_WIDTH 2 ++ ++/*************---- Event & Timer Module Registers C Header ----*************/ ++ ++#if FALCON_EXTENDED_P_BAR ++#define EVQ_RPTR_REG_KER_OFST 0x11B00 /* Event queue read pointer register */ ++#else ++#define EVQ_RPTR_REG_KER_OFST 0x1B00 /* Event queue read pointer register */ ++#endif ++ ++#define EVQ_RPTR_REG_OFST 0xFA0000 /* Event queue read pointer register ++ array. */ ++ #define EVQ_RPTR_LBN 0 ++ #define EVQ_RPTR_WIDTH 15 ++ ++#if FALCON_EXTENDED_P_BAR ++#define EVQ_PTR_TBL_KER_OFST 0x11A00 /* Event queue pointer table for kernel ++ access */ ++#else ++#define EVQ_PTR_TBL_KER_OFST 0x1A00 /* Event queue pointer table for kernel ++ access */ ++#endif ++ ++#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 /* Event queue pointer table for char ++ direct access */ ++ #define EVQ_WKUP_OR_INT_EN_LBN 39 ++ #define EVQ_WKUP_OR_INT_EN_WIDTH 1 ++ #define EVQ_NXT_WPTR_LBN 24 ++ #define EVQ_NXT_WPTR_WIDTH 15 ++ #define EVQ_EN_LBN 23 ++ #define EVQ_EN_WIDTH 1 ++ #define EVQ_SIZE_LBN 20 ++ #define EVQ_SIZE_WIDTH 3 ++ #define EVQ_BUF_BASE_ID_LBN 0 ++ #define EVQ_BUF_BASE_ID_WIDTH 20 ++#define TIMER_CMD_REG_KER_OFST 0x420 /* Timer table for kernel access. ++ Page-mapped */ ++#define TIMER_CMD_REG_PAGE4_OFST 0x8420 /* Timer table for user-level access. ++ Page-mapped. For lowest 1K queues. ++ */ ++#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 /* Timer table for user-level ++ access. Page-mapped. ++ For upper 3K queues. */ ++#define TIMER_TBL_OFST 0xF70000 /* Timer table for char driver direct access */ ++ #define TIMER_MODE_LBN 12 ++ #define TIMER_MODE_WIDTH 2 ++ #define TIMER_VAL_LBN 0 ++ #define TIMER_VAL_WIDTH 12 ++ #define TIMER_MODE_INT_HLDOFF 2 ++ #define EVQ_BUF_SIZE_LBN 0 ++ #define EVQ_BUF_SIZE_WIDTH 1 ++#define DRV_EV_REG_KER_OFST 0x440 /* Driver generated event register */ ++#define DRV_EV_REG_OFST 0x440 /* Driver generated event register */ ++ #define DRV_EV_QID_LBN 64 ++ #define DRV_EV_QID_WIDTH 12 ++ #define DRV_EV_DATA_LBN 0 ++ #define DRV_EV_DATA_WIDTH 64 ++#define EVQ_CTL_REG_KER_OFST 0x450 /* Event queue control register */ ++#define EVQ_CTL_REG_OFST 0x450 /* Event queue control register */ ++ #define RX_EVQ_WAKEUP_MASK_B0_LBN 15 ++ #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6 ++ #define EVQ_OWNERR_CTL_LBN 14 ++ #define EVQ_OWNERR_CTL_WIDTH 1 ++ #define EVQ_FIFO_AF_TH_LBN 8 ++ #define EVQ_FIFO_AF_TH_WIDTH 6 ++ #define EVQ_FIFO_NOTAF_TH_LBN 0 ++ #define EVQ_FIFO_NOTAF_TH_WIDTH 6 ++/*************---- SRAM Module Registers C Header ----*************/ ++#define BUF_TBL_CFG_REG_KER_OFST 0x600 /* Buffer table configuration register */ ++#define BUF_TBL_CFG_REG_OFST 0x600 /* Buffer table configuration register */ ++ #define BUF_TBL_MODE_LBN 3 ++ #define BUF_TBL_MODE_WIDTH 1 ++#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 /* SRAM receive descriptor cache ++ configuration register */ ++#define SRM_RX_DC_CFG_REG_OFST 0x610 /* SRAM receive descriptor cache ++ configuration register */ ++ #define SRM_RX_DC_BASE_ADR_LBN 0 ++ #define SRM_RX_DC_BASE_ADR_WIDTH 21 ++#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 /* SRAM transmit descriptor cache ++ configuration register */ ++#define SRM_TX_DC_CFG_REG_OFST 0x620 /* SRAM transmit descriptor cache ++ configuration register */ ++ #define SRM_TX_DC_BASE_ADR_LBN 0 ++ #define SRM_TX_DC_BASE_ADR_WIDTH 21 ++#define SRM_CFG_REG_KER_OFST 0x630 /* SRAM configuration register */ ++#define SRM_CFG_REG_OFST 0x630 /* SRAM configuration register */ ++ #define SRAM_OOB_ADR_INTEN_LBN 5 ++ #define SRAM_OOB_ADR_INTEN_WIDTH 1 ++ #define SRAM_OOB_BUF_INTEN_LBN 4 ++ #define SRAM_OOB_BUF_INTEN_WIDTH 1 ++ #define SRAM_BT_INIT_EN_LBN 3 ++ #define SRAM_BT_INIT_EN_WIDTH 1 ++ #define SRM_NUM_BANK_LBN 2 ++ #define SRM_NUM_BANK_WIDTH 1 ++ #define SRM_BANK_SIZE_LBN 0 ++ #define SRM_BANK_SIZE_WIDTH 2 ++#define BUF_TBL_UPD_REG_KER_OFST 0x650 /* Buffer table update register */ ++#define BUF_TBL_UPD_REG_OFST 0x650 /* Buffer table update register */ ++ #define BUF_UPD_CMD_LBN 63 ++ #define BUF_UPD_CMD_WIDTH 1 ++ #define BUF_CLR_CMD_LBN 62 ++ #define BUF_CLR_CMD_WIDTH 1 ++ #define BUF_CLR_END_ID_LBN 32 ++ #define BUF_CLR_END_ID_WIDTH 20 ++ #define BUF_CLR_START_ID_LBN 0 ++ #define BUF_CLR_START_ID_WIDTH 20 ++#define SRM_UPD_EVQ_REG_KER_OFST 0x660 /* Buffer table update register */ ++#define SRM_UPD_EVQ_REG_OFST 0x660 /* Buffer table update register */ ++ #define SRM_UPD_EVQ_ID_LBN 0 ++ #define SRM_UPD_EVQ_ID_WIDTH 12 ++#define SRAM_PARITY_REG_KER_OFST 0x670 /* SRAM parity register. */ ++#define SRAM_PARITY_REG_OFST 0x670 /* SRAM parity register. */ ++ #define FORCE_SRAM_PERR_LBN 0 ++ #define FORCE_SRAM_PERR_WIDTH 1 ++ ++#if FALCON_EXTENDED_P_BAR ++#define BUF_HALF_TBL_KER_OFST 0x18000 /* Buffer table in half buffer table ++ mode direct access by kernel driver */ ++#else ++#define BUF_HALF_TBL_KER_OFST 0x8000 /* Buffer table in half buffer table ++ mode direct access by kernel driver */ ++#endif ++ ++ ++#define BUF_HALF_TBL_OFST 0x800000 /* Buffer table in half buffer table mode ++ direct access by char driver */ ++ #define BUF_ADR_HBUF_ODD_LBN 44 ++ #define BUF_ADR_HBUF_ODD_WIDTH 20 ++ #define BUF_OWNER_ID_HBUF_ODD_LBN 32 ++ #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12 ++ #define BUF_ADR_HBUF_EVEN_LBN 12 ++ #define BUF_ADR_HBUF_EVEN_WIDTH 20 ++ #define BUF_OWNER_ID_HBUF_EVEN_LBN 0 ++ #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12 ++ ++ ++#if FALCON_EXTENDED_P_BAR ++#define BUF_FULL_TBL_KER_OFST 0x18000 /* Buffer table in full buffer table ++ mode direct access by kernel driver */ ++#else ++#define BUF_FULL_TBL_KER_OFST 0x8000 /* Buffer table in full buffer table mode ++ direct access by kernel driver */ ++#endif ++ ++ ++ ++ ++#define BUF_FULL_TBL_OFST 0x800000 /* Buffer table in full buffer table mode ++ direct access by char driver */ ++ #define IP_DAT_BUF_SIZE_LBN 50 ++ #define IP_DAT_BUF_SIZE_WIDTH 1 ++ #define BUF_ADR_REGION_LBN 48 ++ #define BUF_ADR_REGION_WIDTH 2 ++ #define BUF_ADR_FBUF_LBN 14 ++ #define BUF_ADR_FBUF_WIDTH 34 ++ #define BUF_OWNER_ID_FBUF_LBN 0 ++ #define BUF_OWNER_ID_FBUF_WIDTH 14 ++#define SRM_DBG_REG_OFST 0x3000000 /* SRAM debug access */ ++ #define SRM_DBG_LBN 0 ++ #define SRM_DBG_WIDTH 64 ++/*************---- RX Datapath Registers C Header ----*************/ ++ ++#define RX_CFG_REG_KER_OFST 0x800 /* Receive configuration register */ ++#define RX_CFG_REG_OFST 0x800 /* Receive configuration register */ ++ ++#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029) ++# if !defined(FALCON_128K_RXFIFO) ++# define FALCON_128K_RXFIFO ++# endif ++#endif ++ ++#if defined(FALCON_128K_RXFIFO) ++ ++/* new for B0 */ ++ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48 ++ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 ++ #define RX_INGR_EN_B0_LBN 47 ++ #define RX_INGR_EN_B0_WIDTH 1 ++ #define RX_TOEP_IPV4_B0_LBN 46 ++ #define RX_TOEP_IPV4_B0_WIDTH 1 ++ #define RX_HASH_ALG_B0_LBN 45 ++ #define RX_HASH_ALG_B0_WIDTH 1 ++ #define RX_HASH_INSERT_HDR_B0_LBN 44 ++ #define RX_HASH_INSERT_HDR_B0_WIDTH 1 ++/* moved for B0 */ ++ #define RX_DESC_PUSH_EN_B0_LBN 43 ++ #define RX_DESC_PUSH_EN_B0_WIDTH 1 ++ #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */ ++ #define RX_RDW_PATCH_EN_WIDTH 1 ++ #define RX_PCI_BURST_SIZE_B0_LBN 39 ++ #define RX_PCI_BURST_SIZE_B0_WIDTH 3 ++ #define RX_OWNERR_CTL_B0_LBN 38 ++ #define RX_OWNERR_CTL_B0_WIDTH 1 ++ #define RX_XON_TX_TH_B0_LBN 33 ++ #define RX_XON_TX_TH_B0_WIDTH 5 ++ #define RX_XOFF_TX_TH_B0_LBN 28 ++ #define RX_XOFF_TX_TH_B0_WIDTH 5 ++ #define RX_USR_BUF_SIZE_B0_LBN 19 ++ #define RX_USR_BUF_SIZE_B0_WIDTH 9 ++ #define RX_XON_MAC_TH_B0_LBN 10 ++ #define RX_XON_MAC_TH_B0_WIDTH 9 ++ #define RX_XOFF_MAC_TH_B0_LBN 1 ++ #define RX_XOFF_MAC_TH_B0_WIDTH 9 ++ #define RX_XOFF_MAC_EN_B0_LBN 0 ++ #define RX_XOFF_MAC_EN_B0_WIDTH 1 ++ ++#elif !defined(FALCON_PRE_02020029) ++/* new for B0 */ ++ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46 ++ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 ++ #define RX_INGR_EN_B0_LBN 45 ++ #define RX_INGR_EN_B0_WIDTH 1 ++ #define RX_TOEP_IPV4_B0_LBN 44 ++ #define RX_TOEP_IPV4_B0_WIDTH 1 ++ #define RX_HASH_ALG_B0_LBN 43 ++ #define RX_HASH_ALG_B0_WIDTH 41 ++ #define RX_HASH_INSERT_HDR_B0_LBN 42 ++ #define RX_HASH_INSERT_HDR_B0_WIDTH 1 ++/* moved for B0 */ ++ #define RX_DESC_PUSH_EN_B0_LBN 41 ++ #define RX_DESC_PUSH_EN_B0_WIDTH 1 ++ #define RX_PCI_BURST_SIZE_B0_LBN 37 ++ #define RX_PCI_BURST_SIZE_B0_WIDTH 3 ++ #define RX_OWNERR_CTL_B0_LBN 36 ++ #define RX_OWNERR_CTL_B0_WIDTH 1 ++ #define RX_XON_TX_TH_B0_LBN 31 ++ #define RX_XON_TX_TH_B0_WIDTH 5 ++ #define RX_XOFF_TX_TH_B0_LBN 26 ++ #define RX_XOFF_TX_TH_B0_WIDTH 5 ++ #define RX_USR_BUF_SIZE_B0_LBN 17 ++ #define RX_USR_BUF_SIZE_B0_WIDTH 9 ++ #define RX_XON_MAC_TH_B0_LBN 9 ++ #define RX_XON_MAC_TH_B0_WIDTH 8 ++ #define RX_XOFF_MAC_TH_B0_LBN 1 ++ #define RX_XOFF_MAC_TH_B0_WIDTH 8 ++ #define RX_XOFF_MAC_EN_B0_LBN 0 ++ #define RX_XOFF_MAC_EN_B0_WIDTH 1 ++ ++#else ++/* new for B0 */ ++ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44 ++ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 ++ #define RX_INGR_EN_B0_LBN 43 ++ #define RX_INGR_EN_B0_WIDTH 1 ++ #define RX_TOEP_IPV4_B0_LBN 42 ++ #define RX_TOEP_IPV4_B0_WIDTH 1 ++ #define RX_HASH_ALG_B0_LBN 41 ++ #define RX_HASH_ALG_B0_WIDTH 41 ++ #define RX_HASH_INSERT_HDR_B0_LBN 40 ++ #define RX_HASH_INSERT_HDR_B0_WIDTH 1 ++/* moved for B0 */ ++ #define RX_DESC_PUSH_EN_B0_LBN 35 ++ #define RX_DESC_PUSH_EN_B0_WIDTH 1 ++ #define RX_PCI_BURST_SIZE_B0_LBN 35 ++ #define RX_PCI_BURST_SIZE_B0_WIDTH 2 ++ #define RX_OWNERR_CTL_B0_LBN 34 ++ #define RX_OWNERR_CTL_B0_WIDTH 1 ++ #define RX_XON_TX_TH_B0_LBN 29 ++ #define RX_XON_TX_TH_B0_WIDTH 5 ++ #define RX_XOFF_TX_TH_B0_LBN 24 ++ #define RX_XOFF_TX_TH_B0_WIDTH 5 ++ #define RX_USR_BUF_SIZE_B0_LBN 15 ++ #define RX_USR_BUF_SIZE_B0_WIDTH 9 ++ #define RX_XON_MAC_TH_B0_LBN 8 ++ #define RX_XON_MAC_TH_B0_WIDTH 7 ++ #define RX_XOFF_MAC_TH_B0_LBN 1 ++ #define RX_XOFF_MAC_TH_B0_WIDTH 7 ++ #define RX_XOFF_MAC_EN_B0_LBN 0 ++ #define RX_XOFF_MAC_EN_B0_WIDTH 1 ++ ++#endif ++ ++/* A0/A1 */ ++ #define RX_PUSH_EN_A1_LBN 35 ++ #define RX_PUSH_EN_A1_WIDTH 1 ++ #define RX_PCI_BURST_SIZE_A1_LBN 31 ++ #define RX_PCI_BURST_SIZE_A1_WIDTH 3 ++ #define RX_OWNERR_CTL_A1_LBN 30 ++ #define RX_OWNERR_CTL_A1_WIDTH 1 ++ #define RX_XON_TX_TH_A1_LBN 25 ++ #define RX_XON_TX_TH_A1_WIDTH 5 ++ #define RX_XOFF_TX_TH_A1_LBN 20 ++ #define RX_XOFF_TX_TH_A1_WIDTH 5 ++ #define RX_USR_BUF_SIZE_A1_LBN 11 ++ #define RX_USR_BUF_SIZE_A1_WIDTH 9 ++ #define RX_XON_MAC_TH_A1_LBN 6 ++ #define RX_XON_MAC_TH_A1_WIDTH 5 ++ #define RX_XOFF_MAC_TH_A1_LBN 1 ++ #define RX_XOFF_MAC_TH_A1_WIDTH 5 ++ #define RX_XOFF_MAC_EN_A1_LBN 0 ++ #define RX_XOFF_MAC_EN_A1_WIDTH 1 ++ ++#define RX_FILTER_CTL_REG_OFST 0x810 /* Receive filter control registers */ ++ #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40 ++ #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1 ++ #define UDP_FULL_SRCH_LIMIT_LBN 32 ++ #define UDP_FULL_SRCH_LIMIT_WIDTH 8 ++ #define NUM_KER_LBN 24 ++ #define NUM_KER_WIDTH 2 ++ #define UDP_WILD_SRCH_LIMIT_LBN 16 ++ #define UDP_WILD_SRCH_LIMIT_WIDTH 8 ++ #define TCP_WILD_SRCH_LIMIT_LBN 8 ++ #define TCP_WILD_SRCH_LIMIT_WIDTH 8 ++ #define TCP_FULL_SRCH_LIMIT_LBN 0 ++ #define TCP_FULL_SRCH_LIMIT_WIDTH 8 ++#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 /* Receive flush descriptor queue ++ register */ ++#define RX_FLUSH_DESCQ_REG_OFST 0x820 /* Receive flush descriptor queue ++ register */ ++ #define RX_FLUSH_DESCQ_CMD_LBN 24 ++ #define RX_FLUSH_DESCQ_CMD_WIDTH 1 ++ #define RX_FLUSH_EVQ_ID_LBN 12 ++ #define RX_FLUSH_EVQ_ID_WIDTH 12 ++ #define RX_FLUSH_DESCQ_LBN 0 ++ #define RX_FLUSH_DESCQ_WIDTH 12 ++#define RX_DESC_UPD_REG_KER_OFST 0x830 /* Kernel receive descriptor update ++ register. Page-mapped */ ++#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 /* Char & user receive descriptor ++ update register. Page-mapped. ++ For lowest 1K queues. */ ++#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 /* Char & user receive ++ descriptor update register. ++ Page-mapped. For upper ++ 3K queues. */ ++ #define RX_DESC_WPTR_LBN 96 ++ #define RX_DESC_WPTR_WIDTH 12 ++ #define RX_DESC_PUSH_CMD_LBN 95 ++ #define RX_DESC_PUSH_CMD_WIDTH 1 ++ #define RX_DESC_LBN 0 ++ #define RX_DESC_WIDTH 64 ++ #define RX_KER_DESC_LBN 0 ++ #define RX_KER_DESC_WIDTH 64 ++ #define RX_USR_DESC_LBN 0 ++ #define RX_USR_DESC_WIDTH 32 ++#define RX_DC_CFG_REG_KER_OFST 0x840 /* Receive descriptor cache ++ configuration register */ ++#define RX_DC_CFG_REG_OFST 0x840 /* Receive descriptor cache ++ configuration register */ ++ #define RX_DC_SIZE_LBN 0 ++ #define RX_DC_SIZE_WIDTH 2 ++#define RX_DC_PF_WM_REG_KER_OFST 0x850 /* Receive descriptor cache pre-fetch ++ watermark register */ ++#define RX_DC_PF_WM_REG_OFST 0x850 /* Receive descriptor cache pre-fetch ++ watermark register */ ++ #define RX_DC_PF_LWM_LO_LBN 0 ++ #define RX_DC_PF_LWM_LO_WIDTH 6 ++ ++#define RX_RSS_TKEY_B0_OFST 0x860 /* RSS Toeplitz hash key (B0 only) */ ++ ++#define RX_NODESC_DROP_REG 0x880 ++ #define RX_NODESC_DROP_CNT_LBN 0 ++ #define RX_NODESC_DROP_CNT_WIDTH 16 ++ ++#define XM_TX_CFG_REG_OFST 0x1230 ++ #define XM_AUTO_PAD_LBN 5 ++ #define XM_AUTO_PAD_WIDTH 1 ++ ++#define RX_FILTER_TBL0_OFST 0xF00000 /* Receive filter table - even entries */ ++ #define RSS_EN_0_B0_LBN 110 ++ #define RSS_EN_0_B0_WIDTH 1 ++ #define SCATTER_EN_0_B0_LBN 109 ++ #define SCATTER_EN_0_B0_WIDTH 1 ++ #define TCP_UDP_0_LBN 108 ++ #define TCP_UDP_0_WIDTH 1 ++ #define RXQ_ID_0_LBN 96 ++ #define RXQ_ID_0_WIDTH 12 ++ #define DEST_IP_0_LBN 64 ++ #define DEST_IP_0_WIDTH 32 ++ #define DEST_PORT_TCP_0_LBN 48 ++ #define DEST_PORT_TCP_0_WIDTH 16 ++ #define SRC_IP_0_LBN 16 ++ #define SRC_IP_0_WIDTH 32 ++ #define SRC_TCP_DEST_UDP_0_LBN 0 ++ #define SRC_TCP_DEST_UDP_0_WIDTH 16 ++#define RX_FILTER_TBL1_OFST 0xF00010 /* Receive filter table - odd entries */ ++ #define RSS_EN_1_B0_LBN 110 ++ #define RSS_EN_1_B0_WIDTH 1 ++ #define SCATTER_EN_1_B0_LBN 109 ++ #define SCATTER_EN_1_B0_WIDTH 1 ++ #define TCP_UDP_1_LBN 108 ++ #define TCP_UDP_1_WIDTH 1 ++ #define RXQ_ID_1_LBN 96 ++ #define RXQ_ID_1_WIDTH 12 ++ #define DEST_IP_1_LBN 64 ++ #define DEST_IP_1_WIDTH 32 ++ #define DEST_PORT_TCP_1_LBN 48 ++ #define DEST_PORT_TCP_1_WIDTH 16 ++ #define SRC_IP_1_LBN 16 ++ #define SRC_IP_1_WIDTH 32 ++ #define SRC_TCP_DEST_UDP_1_LBN 0 ++ #define SRC_TCP_DEST_UDP_1_WIDTH 16 ++ ++#if FALCON_EXTENDED_P_BAR ++#define RX_DESC_PTR_TBL_KER_OFST 0x11800 /* Receive descriptor pointer ++ kernel access */ ++#else ++#define RX_DESC_PTR_TBL_KER_OFST 0x1800 /* Receive descriptor pointer ++ kernel access */ ++#endif ++ ++ ++#define RX_DESC_PTR_TBL_OFST 0xF40000 /* Receive descriptor pointer table */ ++ #define RX_ISCSI_DDIG_EN_LBN 88 ++ #define RX_ISCSI_DDIG_EN_WIDTH 1 ++ #define RX_ISCSI_HDIG_EN_LBN 87 ++ #define RX_ISCSI_HDIG_EN_WIDTH 1 ++ #define RX_DESC_PREF_ACT_LBN 86 ++ #define RX_DESC_PREF_ACT_WIDTH 1 ++ #define RX_DC_HW_RPTR_LBN 80 ++ #define RX_DC_HW_RPTR_WIDTH 6 ++ #define RX_DESCQ_HW_RPTR_LBN 68 ++ #define RX_DESCQ_HW_RPTR_WIDTH 12 ++ #define RX_DESCQ_SW_WPTR_LBN 56 ++ #define RX_DESCQ_SW_WPTR_WIDTH 12 ++ #define RX_DESCQ_BUF_BASE_ID_LBN 36 ++ #define RX_DESCQ_BUF_BASE_ID_WIDTH 20 ++ #define RX_DESCQ_EVQ_ID_LBN 24 ++ #define RX_DESCQ_EVQ_ID_WIDTH 12 ++ #define RX_DESCQ_OWNER_ID_LBN 10 ++ #define RX_DESCQ_OWNER_ID_WIDTH 14 ++ #define RX_DESCQ_LABEL_LBN 5 ++ #define RX_DESCQ_LABEL_WIDTH 5 ++ #define RX_DESCQ_SIZE_LBN 3 ++ #define RX_DESCQ_SIZE_WIDTH 2 ++ #define RX_DESCQ_TYPE_LBN 2 ++ #define RX_DESCQ_TYPE_WIDTH 1 ++ #define RX_DESCQ_JUMBO_LBN 1 ++ #define RX_DESCQ_JUMBO_WIDTH 1 ++ #define RX_DESCQ_EN_LBN 0 ++ #define RX_DESCQ_EN_WIDTH 1 ++ ++ ++#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 /* RSS indirection table (B0 only) */ ++ #define RX_RSS_INDIR_ENT_B0_LBN 0 ++ #define RX_RSS_INDIR_ENT_B0_WIDTH 6 ++ ++/*************---- TX Datapath Registers C Header ----*************/ ++#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 /* Transmit flush descriptor ++ queue register */ ++#define TX_FLUSH_DESCQ_REG_OFST 0xA00 /* Transmit flush descriptor queue ++ register */ ++ #define TX_FLUSH_DESCQ_CMD_LBN 12 ++ #define TX_FLUSH_DESCQ_CMD_WIDTH 1 ++ #define TX_FLUSH_DESCQ_LBN 0 ++ #define TX_FLUSH_DESCQ_WIDTH 12 ++#define TX_DESC_UPD_REG_KER_OFST 0xA10 /* Kernel transmit descriptor update ++ register. Page-mapped */ ++#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 /* Char & user transmit descriptor ++ update register. Page-mapped */ ++#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 /* Char & user transmit ++ descriptor update register. ++ Page-mapped */ ++ #define TX_DESC_WPTR_LBN 96 ++ #define TX_DESC_WPTR_WIDTH 12 ++ #define TX_DESC_PUSH_CMD_LBN 95 ++ #define TX_DESC_PUSH_CMD_WIDTH 1 ++ #define TX_DESC_LBN 0 ++ #define TX_DESC_WIDTH 95 ++ #define TX_KER_DESC_LBN 0 ++ #define TX_KER_DESC_WIDTH 64 ++ #define TX_USR_DESC_LBN 0 ++ #define TX_USR_DESC_WIDTH 64 ++#define TX_DC_CFG_REG_KER_OFST 0xA20 /* Transmit descriptor cache ++ configuration register */ ++#define TX_DC_CFG_REG_OFST 0xA20 /* Transmit descriptor cache configuration ++ register */ ++ #define TX_DC_SIZE_LBN 0 ++ #define TX_DC_SIZE_WIDTH 2 ++ ++#if FALCON_EXTENDED_P_BAR ++#define TX_DESC_PTR_TBL_KER_OFST 0x11900 /* Transmit descriptor pointer. */ ++#else ++#define TX_DESC_PTR_TBL_KER_OFST 0x1900 /* Transmit descriptor pointer. */ ++#endif ++ ++ ++#define TX_DESC_PTR_TBL_OFST 0xF50000 /* Transmit descriptor pointer */ ++ #define TX_NON_IP_DROP_DIS_B0_LBN 91 ++ #define TX_NON_IP_DROP_DIS_B0_WIDTH 1 ++ #define TX_IP_CHKSM_DIS_B0_LBN 90 ++ #define TX_IP_CHKSM_DIS_B0_WIDTH 1 ++ #define TX_TCP_CHKSM_DIS_B0_LBN 89 ++ #define TX_TCP_CHKSM_DIS_B0_WIDTH 1 ++ #define TX_DESCQ_EN_LBN 88 ++ #define TX_DESCQ_EN_WIDTH 1 ++ #define TX_ISCSI_DDIG_EN_LBN 87 ++ #define TX_ISCSI_DDIG_EN_WIDTH 1 ++ #define TX_ISCSI_HDIG_EN_LBN 86 ++ #define TX_ISCSI_HDIG_EN_WIDTH 1 ++ #define TX_DC_HW_RPTR_LBN 80 ++ #define TX_DC_HW_RPTR_WIDTH 6 ++ #define TX_DESCQ_HW_RPTR_LBN 68 ++ #define TX_DESCQ_HW_RPTR_WIDTH 12 ++ #define TX_DESCQ_SW_WPTR_LBN 56 ++ #define TX_DESCQ_SW_WPTR_WIDTH 12 ++ #define TX_DESCQ_BUF_BASE_ID_LBN 36 ++ #define TX_DESCQ_BUF_BASE_ID_WIDTH 20 ++ #define TX_DESCQ_EVQ_ID_LBN 24 ++ #define TX_DESCQ_EVQ_ID_WIDTH 12 ++ #define TX_DESCQ_OWNER_ID_LBN 10 ++ #define TX_DESCQ_OWNER_ID_WIDTH 14 ++ #define TX_DESCQ_LABEL_LBN 5 ++ #define TX_DESCQ_LABEL_WIDTH 5 ++ #define TX_DESCQ_SIZE_LBN 3 ++ #define TX_DESCQ_SIZE_WIDTH 2 ++ #define TX_DESCQ_TYPE_LBN 1 ++ #define TX_DESCQ_TYPE_WIDTH 2 ++ #define TX_DESCQ_FLUSH_LBN 0 ++ #define TX_DESCQ_FLUSH_WIDTH 1 ++#define TX_CFG_REG_KER_OFST 0xA50 /* Transmit configuration register */ ++#define TX_CFG_REG_OFST 0xA50 /* Transmit configuration register */ ++ #define TX_IP_ID_P1_OFS_LBN 32 ++ #define TX_IP_ID_P1_OFS_WIDTH 15 ++ #define TX_IP_ID_P0_OFS_LBN 16 ++ #define TX_IP_ID_P0_OFS_WIDTH 15 ++ #define TX_TURBO_EN_LBN 3 ++ #define TX_TURBO_EN_WIDTH 1 ++ #define TX_OWNERR_CTL_LBN 2 ++ #define TX_OWNERR_CTL_WIDTH 2 ++ #define TX_NON_IP_DROP_DIS_LBN 1 ++ #define TX_NON_IP_DROP_DIS_WIDTH 1 ++ #define TX_IP_ID_REP_EN_LBN 0 ++ #define TX_IP_ID_REP_EN_WIDTH 1 ++#define TX_RESERVED_REG_KER_OFST 0xA80 /* Transmit configuration register */ ++#define TX_RESERVED_REG_OFST 0xA80 /* Transmit configuration register */ ++ #define TX_CSR_PUSH_EN_LBN 89 ++ #define TX_CSR_PUSH_EN_WIDTH 1 ++ #define TX_RX_SPACER_LBN 64 ++ #define TX_RX_SPACER_WIDTH 8 ++ #define TX_SW_EV_EN_LBN 59 ++ #define TX_SW_EV_EN_WIDTH 1 ++ #define TX_RX_SPACER_EN_LBN 57 ++ #define TX_RX_SPACER_EN_WIDTH 1 ++ #define TX_CSR_PREF_WD_TMR_LBN 24 ++ #define TX_CSR_PREF_WD_TMR_WIDTH 16 ++ #define TX_CSR_ONLY1TAG_LBN 21 ++ #define TX_CSR_ONLY1TAG_WIDTH 1 ++ #define TX_PREF_THRESHOLD_LBN 19 ++ #define TX_PREF_THRESHOLD_WIDTH 2 ++ #define TX_ONE_PKT_PER_Q_LBN 18 ++ #define TX_ONE_PKT_PER_Q_WIDTH 1 ++ #define TX_DIS_NON_IP_EV_LBN 17 ++ #define TX_DIS_NON_IP_EV_WIDTH 1 ++ #define TX_DMA_SPACER_LBN 8 ++ #define TX_DMA_SPACER_WIDTH 8 ++ #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7 ++ #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1 ++ #define TX_TCP_DIS_A1_LBN 7 ++ #define TX_TCP_DIS_A1_WIDTH 1 ++ #define TX_IP_DIS_A1_LBN 6 ++ #define TX_IP_DIS_A1_WIDTH 1 ++ #define TX_MAX_CPL_LBN 2 ++ #define TX_MAX_CPL_WIDTH 2 ++ #define TX_MAX_PREF_LBN 0 ++ #define TX_MAX_PREF_WIDTH 2 ++#define TX_VLAN_REG_OFST 0xAE0 /* Transmit VLAN tag register */ ++ #define TX_VLAN_EN_LBN 127 ++ #define TX_VLAN_EN_WIDTH 1 ++ #define TX_VLAN7_PORT1_EN_LBN 125 ++ #define TX_VLAN7_PORT1_EN_WIDTH 1 ++ #define TX_VLAN7_PORT0_EN_LBN 124 ++ #define TX_VLAN7_PORT0_EN_WIDTH 1 ++ #define TX_VLAN7_LBN 112 ++ #define TX_VLAN7_WIDTH 12 ++ #define TX_VLAN6_PORT1_EN_LBN 109 ++ #define TX_VLAN6_PORT1_EN_WIDTH 1 ++ #define TX_VLAN6_PORT0_EN_LBN 108 ++ #define TX_VLAN6_PORT0_EN_WIDTH 1 ++ #define TX_VLAN6_LBN 96 ++ #define TX_VLAN6_WIDTH 12 ++ #define TX_VLAN5_PORT1_EN_LBN 93 ++ #define TX_VLAN5_PORT1_EN_WIDTH 1 ++ #define TX_VLAN5_PORT0_EN_LBN 92 ++ #define TX_VLAN5_PORT0_EN_WIDTH 1 ++ #define TX_VLAN5_LBN 80 ++ #define TX_VLAN5_WIDTH 12 ++ #define TX_VLAN4_PORT1_EN_LBN 77 ++ #define TX_VLAN4_PORT1_EN_WIDTH 1 ++ #define TX_VLAN4_PORT0_EN_LBN 76 ++ #define TX_VLAN4_PORT0_EN_WIDTH 1 ++ #define TX_VLAN4_LBN 64 ++ #define TX_VLAN4_WIDTH 12 ++ #define TX_VLAN3_PORT1_EN_LBN 61 ++ #define TX_VLAN3_PORT1_EN_WIDTH 1 ++ #define TX_VLAN3_PORT0_EN_LBN 60 ++ #define TX_VLAN3_PORT0_EN_WIDTH 1 ++ #define TX_VLAN3_LBN 48 ++ #define TX_VLAN3_WIDTH 12 ++ #define TX_VLAN2_PORT1_EN_LBN 45 ++ #define TX_VLAN2_PORT1_EN_WIDTH 1 ++ #define TX_VLAN2_PORT0_EN_LBN 44 ++ #define TX_VLAN2_PORT0_EN_WIDTH 1 ++ #define TX_VLAN2_LBN 32 ++ #define TX_VLAN2_WIDTH 12 ++ #define TX_VLAN1_PORT1_EN_LBN 29 ++ #define TX_VLAN1_PORT1_EN_WIDTH 1 ++ #define TX_VLAN1_PORT0_EN_LBN 28 ++ #define TX_VLAN1_PORT0_EN_WIDTH 1 ++ #define TX_VLAN1_LBN 16 ++ #define TX_VLAN1_WIDTH 12 ++ #define TX_VLAN0_PORT1_EN_LBN 13 ++ #define TX_VLAN0_PORT1_EN_WIDTH 1 ++ #define TX_VLAN0_PORT0_EN_LBN 12 ++ #define TX_VLAN0_PORT0_EN_WIDTH 1 ++ #define TX_VLAN0_LBN 0 ++ #define TX_VLAN0_WIDTH 12 ++#define TX_FIL_CTL_REG_OFST 0xAF0 /* Transmit filter control register */ ++ #define TX_MADR1_FIL_EN_LBN 65 ++ #define TX_MADR1_FIL_EN_WIDTH 1 ++ #define TX_MADR0_FIL_EN_LBN 64 ++ #define TX_MADR0_FIL_EN_WIDTH 1 ++ #define TX_IPFIL31_PORT1_EN_LBN 63 ++ #define TX_IPFIL31_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL31_PORT0_EN_LBN 62 ++ #define TX_IPFIL31_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL30_PORT1_EN_LBN 61 ++ #define TX_IPFIL30_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL30_PORT0_EN_LBN 60 ++ #define TX_IPFIL30_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL29_PORT1_EN_LBN 59 ++ #define TX_IPFIL29_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL29_PORT0_EN_LBN 58 ++ #define TX_IPFIL29_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL28_PORT1_EN_LBN 57 ++ #define TX_IPFIL28_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL28_PORT0_EN_LBN 56 ++ #define TX_IPFIL28_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL27_PORT1_EN_LBN 55 ++ #define TX_IPFIL27_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL27_PORT0_EN_LBN 54 ++ #define TX_IPFIL27_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL26_PORT1_EN_LBN 53 ++ #define TX_IPFIL26_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL26_PORT0_EN_LBN 52 ++ #define TX_IPFIL26_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL25_PORT1_EN_LBN 51 ++ #define TX_IPFIL25_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL25_PORT0_EN_LBN 50 ++ #define TX_IPFIL25_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL24_PORT1_EN_LBN 49 ++ #define TX_IPFIL24_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL24_PORT0_EN_LBN 48 ++ #define TX_IPFIL24_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL23_PORT1_EN_LBN 47 ++ #define TX_IPFIL23_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL23_PORT0_EN_LBN 46 ++ #define TX_IPFIL23_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL22_PORT1_EN_LBN 45 ++ #define TX_IPFIL22_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL22_PORT0_EN_LBN 44 ++ #define TX_IPFIL22_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL21_PORT1_EN_LBN 43 ++ #define TX_IPFIL21_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL21_PORT0_EN_LBN 42 ++ #define TX_IPFIL21_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL20_PORT1_EN_LBN 41 ++ #define TX_IPFIL20_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL20_PORT0_EN_LBN 40 ++ #define TX_IPFIL20_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL19_PORT1_EN_LBN 39 ++ #define TX_IPFIL19_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL19_PORT0_EN_LBN 38 ++ #define TX_IPFIL19_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL18_PORT1_EN_LBN 37 ++ #define TX_IPFIL18_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL18_PORT0_EN_LBN 36 ++ #define TX_IPFIL18_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL17_PORT1_EN_LBN 35 ++ #define TX_IPFIL17_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL17_PORT0_EN_LBN 34 ++ #define TX_IPFIL17_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL16_PORT1_EN_LBN 33 ++ #define TX_IPFIL16_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL16_PORT0_EN_LBN 32 ++ #define TX_IPFIL16_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL15_PORT1_EN_LBN 31 ++ #define TX_IPFIL15_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL15_PORT0_EN_LBN 30 ++ #define TX_IPFIL15_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL14_PORT1_EN_LBN 29 ++ #define TX_IPFIL14_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL14_PORT0_EN_LBN 28 ++ #define TX_IPFIL14_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL13_PORT1_EN_LBN 27 ++ #define TX_IPFIL13_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL13_PORT0_EN_LBN 26 ++ #define TX_IPFIL13_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL12_PORT1_EN_LBN 25 ++ #define TX_IPFIL12_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL12_PORT0_EN_LBN 24 ++ #define TX_IPFIL12_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL11_PORT1_EN_LBN 23 ++ #define TX_IPFIL11_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL11_PORT0_EN_LBN 22 ++ #define TX_IPFIL11_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL10_PORT1_EN_LBN 21 ++ #define TX_IPFIL10_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL10_PORT0_EN_LBN 20 ++ #define TX_IPFIL10_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL9_PORT1_EN_LBN 19 ++ #define TX_IPFIL9_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL9_PORT0_EN_LBN 18 ++ #define TX_IPFIL9_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL8_PORT1_EN_LBN 17 ++ #define TX_IPFIL8_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL8_PORT0_EN_LBN 16 ++ #define TX_IPFIL8_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL7_PORT1_EN_LBN 15 ++ #define TX_IPFIL7_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL7_PORT0_EN_LBN 14 ++ #define TX_IPFIL7_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL6_PORT1_EN_LBN 13 ++ #define TX_IPFIL6_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL6_PORT0_EN_LBN 12 ++ #define TX_IPFIL6_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL5_PORT1_EN_LBN 11 ++ #define TX_IPFIL5_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL5_PORT0_EN_LBN 10 ++ #define TX_IPFIL5_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL4_PORT1_EN_LBN 9 ++ #define TX_IPFIL4_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL4_PORT0_EN_LBN 8 ++ #define TX_IPFIL4_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL3_PORT1_EN_LBN 7 ++ #define TX_IPFIL3_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL3_PORT0_EN_LBN 6 ++ #define TX_IPFIL3_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL2_PORT1_EN_LBN 5 ++ #define TX_IPFIL2_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL2_PORT0_EN_LBN 4 ++ #define TX_IPFIL2_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL1_PORT1_EN_LBN 3 ++ #define TX_IPFIL1_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL1_PORT0_EN_LBN 2 ++ #define TX_IPFIL1_PORT0_EN_WIDTH 1 ++ #define TX_IPFIL0_PORT1_EN_LBN 1 ++ #define TX_IPFIL0_PORT1_EN_WIDTH 1 ++ #define TX_IPFIL0_PORT0_EN_LBN 0 ++ #define TX_IPFIL0_PORT0_EN_WIDTH 1 ++#define TX_IPFIL_TBL_OFST 0xB00 /* Transmit IP source address filter table */ ++ #define TX_IPFIL_MASK_LBN 32 ++ #define TX_IPFIL_MASK_WIDTH 32 ++ #define TX_IP_SRC_ADR_LBN 0 ++ #define TX_IP_SRC_ADR_WIDTH 32 ++#define TX_PACE_REG_A1_OFST 0xF80000 /* Transmit pace control register */ ++#define TX_PACE_REG_B0_OFST 0xA90 /* Transmit pace control register */ ++ #define TX_PACE_SB_NOTAF_LBN 19 ++ #define TX_PACE_SB_NOTAF_WIDTH 10 ++ #define TX_PACE_SB_AF_LBN 9 ++ #define TX_PACE_SB_AF_WIDTH 10 ++ #define TX_PACE_FB_BASE_LBN 5 ++ #define TX_PACE_FB_BASE_WIDTH 4 ++ #define TX_PACE_BIN_TH_LBN 0 ++ #define TX_PACE_BIN_TH_WIDTH 5 ++#define TX_PACE_TBL_A1_OFST 0xF80040 /* Transmit pacing table */ ++#define TX_PACE_TBL_FIRST_QUEUE_A1 4 ++#define TX_PACE_TBL_B0_OFST 0xF80000 /* Transmit pacing table */ ++#define TX_PACE_TBL_FIRST_QUEUE_B0 0 ++ #define TX_PACE_LBN 0 ++ #define TX_PACE_WIDTH 5 ++ ++/*************---- EE/Flash Registers C Header ----*************/ ++#define EE_SPI_HCMD_REG_KER_OFST 0x100 /* SPI host command register */ ++#define EE_SPI_HCMD_REG_OFST 0x100 /* SPI host command register */ ++ #define EE_SPI_HCMD_CMD_EN_LBN 31 ++ #define EE_SPI_HCMD_CMD_EN_WIDTH 1 ++ #define EE_WR_TIMER_ACTIVE_LBN 28 ++ #define EE_WR_TIMER_ACTIVE_WIDTH 1 ++ #define EE_SPI_HCMD_SF_SEL_LBN 24 ++ #define EE_SPI_HCMD_SF_SEL_WIDTH 1 ++ #define EE_SPI_HCMD_DABCNT_LBN 16 ++ #define EE_SPI_HCMD_DABCNT_WIDTH 5 ++ #define EE_SPI_HCMD_READ_LBN 15 ++ #define EE_SPI_HCMD_READ_WIDTH 1 ++ #define EE_SPI_HCMD_DUBCNT_LBN 12 ++ #define EE_SPI_HCMD_DUBCNT_WIDTH 2 ++ #define EE_SPI_HCMD_ADBCNT_LBN 8 ++ #define EE_SPI_HCMD_ADBCNT_WIDTH 2 ++ #define EE_SPI_HCMD_ENC_LBN 0 ++ #define EE_SPI_HCMD_ENC_WIDTH 8 ++#define EE_SPI_HADR_REG_KER_OFST 0X110 /* SPI host address register */ ++#define EE_SPI_HADR_REG_OFST 0X110 /* SPI host address register */ ++ #define EE_SPI_HADR_DUBYTE_LBN 24 ++ #define EE_SPI_HADR_DUBYTE_WIDTH 8 ++ #define EE_SPI_HADR_ADR_LBN 0 ++ #define EE_SPI_HADR_ADR_WIDTH 24 ++#define EE_SPI_HDATA_REG_KER_OFST 0x120 /* SPI host data register */ ++#define EE_SPI_HDATA_REG_OFST 0x120 /* SPI host data register */ ++ #define EE_SPI_HDATA3_LBN 96 ++ #define EE_SPI_HDATA3_WIDTH 32 ++ #define EE_SPI_HDATA2_LBN 64 ++ #define EE_SPI_HDATA2_WIDTH 32 ++ #define EE_SPI_HDATA1_LBN 32 ++ #define EE_SPI_HDATA1_WIDTH 32 ++ #define EE_SPI_HDATA0_LBN 0 ++ #define EE_SPI_HDATA0_WIDTH 32 ++#define EE_BASE_PAGE_REG_KER_OFST 0x130 /* Expansion ROM base mirror register */ ++#define EE_BASE_PAGE_REG_OFST 0x130 /* Expansion ROM base mirror register */ ++ #define EE_EXP_ROM_WINDOW_BASE_LBN 16 ++ #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13 ++ #define EE_EXPROM_MASK_LBN 0 ++ #define EE_EXPROM_MASK_WIDTH 13 ++#define EE_VPD_CFG0_REG_KER_OFST 0X140 /* SPI/VPD configuration register */ ++#define EE_VPD_CFG0_REG_OFST 0X140 /* SPI/VPD configuration register */ ++ #define EE_SF_FASTRD_EN_LBN 127 ++ #define EE_SF_FASTRD_EN_WIDTH 1 ++ #define EE_SF_CLOCK_DIV_LBN 120 ++ #define EE_SF_CLOCK_DIV_WIDTH 7 ++ #define EE_VPD_WIP_POLL_LBN 119 ++ #define EE_VPD_WIP_POLL_WIDTH 1 ++ #define EE_VPDW_LENGTH_LBN 80 ++ #define EE_VPDW_LENGTH_WIDTH 15 ++ #define EE_VPDW_BASE_LBN 64 ++ #define EE_VPDW_BASE_WIDTH 15 ++ #define EE_VPD_WR_CMD_EN_LBN 56 ++ #define EE_VPD_WR_CMD_EN_WIDTH 8 ++ #define EE_VPD_BASE_LBN 32 ++ #define EE_VPD_BASE_WIDTH 24 ++ #define EE_VPD_LENGTH_LBN 16 ++ #define EE_VPD_LENGTH_WIDTH 13 ++ #define EE_VPD_AD_SIZE_LBN 8 ++ #define EE_VPD_AD_SIZE_WIDTH 5 ++ #define EE_VPD_ACCESS_ON_LBN 5 ++ #define EE_VPD_ACCESS_ON_WIDTH 1 ++#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 /* VPD access SW control register */ ++#define EE_VPD_SW_CNTL_REG_OFST 0X150 /* VPD access SW control register */ ++ #define EE_VPD_CYCLE_PENDING_LBN 31 ++ #define EE_VPD_CYCLE_PENDING_WIDTH 1 ++ #define EE_VPD_CYC_WRITE_LBN 28 ++ #define EE_VPD_CYC_WRITE_WIDTH 1 ++ #define EE_VPD_CYC_ADR_LBN 0 ++ #define EE_VPD_CYC_ADR_WIDTH 15 ++#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 /* VPD access SW data register */ ++#define EE_VPD_SW_DATA_REG_OFST 0x160 /* VPD access SW data register */ ++ #define EE_VPD_CYC_DAT_LBN 0 ++ #define EE_VPD_CYC_DAT_WIDTH 32 +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_desc.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_desc.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,75 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides EtherFabric NIC - EFXXXX (aka Falcon) descriptor ++ * definitions. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++/*************---- Descriptors C Headers ----*************/ ++/* Receive Kernel IP Descriptor */ ++ #define RX_KER_BUF_SIZE_LBN 48 ++ #define RX_KER_BUF_SIZE_WIDTH 14 ++ #define RX_KER_BUF_REGION_LBN 46 ++ #define RX_KER_BUF_REGION_WIDTH 2 ++ #define RX_KER_BUF_REGION0_DECODE 0 ++ #define RX_KER_BUF_REGION1_DECODE 1 ++ #define RX_KER_BUF_REGION2_DECODE 2 ++ #define RX_KER_BUF_REGION3_DECODE 3 ++ #define RX_KER_BUF_ADR_LBN 0 ++ #define RX_KER_BUF_ADR_WIDTH 46 ++/* Receive User IP Descriptor */ ++ #define RX_USR_2BYTE_OFS_LBN 20 ++ #define RX_USR_2BYTE_OFS_WIDTH 12 ++ #define RX_USR_BUF_ID_LBN 0 ++ #define RX_USR_BUF_ID_WIDTH 20 ++/* Transmit Kernel IP Descriptor */ ++ #define TX_KER_PORT_LBN 63 ++ #define TX_KER_PORT_WIDTH 1 ++ #define TX_KER_CONT_LBN 62 ++ #define TX_KER_CONT_WIDTH 1 ++ #define TX_KER_BYTE_CNT_LBN 48 ++ #define TX_KER_BYTE_CNT_WIDTH 14 ++ #define TX_KER_BUF_REGION_LBN 46 ++ #define TX_KER_BUF_REGION_WIDTH 2 ++ #define TX_KER_BUF_REGION0_DECODE 0 ++ #define TX_KER_BUF_REGION1_DECODE 1 ++ #define TX_KER_BUF_REGION2_DECODE 2 ++ #define TX_KER_BUF_REGION3_DECODE 3 ++ #define TX_KER_BUF_ADR_LBN 0 ++ #define TX_KER_BUF_ADR_WIDTH 46 ++/* Transmit User IP Descriptor */ ++ #define TX_USR_PORT_LBN 47 ++ #define TX_USR_PORT_WIDTH 1 ++ #define TX_USR_CONT_LBN 46 ++ #define TX_USR_CONT_WIDTH 1 ++ #define TX_USR_BYTE_CNT_LBN 33 ++ #define TX_USR_BYTE_CNT_WIDTH 13 ++ #define TX_USR_BUF_ID_LBN 13 ++ #define TX_USR_BUF_ID_WIDTH 20 ++ #define TX_USR_BYTE_OFS_LBN 0 ++ #define TX_USR_BYTE_OFS_WIDTH 13 +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_event.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_event.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,155 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides EtherFabric NIC - EFXXXX (aka Falcon) event ++ * definitions. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++/*************---- Events Format C Header ----*************/ ++/*************---- Event entry ----*************/ ++ #define EV_CODE_LBN 60 ++ #define EV_CODE_WIDTH 4 ++ #define RX_IP_EV_DECODE 0 ++ #define TX_IP_EV_DECODE 2 ++ #define DRIVER_EV_DECODE 5 ++ #define GLOBAL_EV_DECODE 6 ++ #define DRV_GEN_EV_DECODE 7 ++ #define EV_DATA_LBN 0 ++ #define EV_DATA_WIDTH 60 ++/******---- Receive IP events for both Kernel & User event queues ----******/ ++ #define RX_EV_PKT_OK_LBN 56 ++ #define RX_EV_PKT_OK_WIDTH 1 ++ #define RX_EV_BUF_OWNER_ID_ERR_LBN 54 ++ #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1 ++ #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52 ++ #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1 ++ #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51 ++ #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1 ++ #define RX_EV_ETH_CRC_ERR_LBN 50 ++ #define RX_EV_ETH_CRC_ERR_WIDTH 1 ++ #define RX_EV_FRM_TRUNC_LBN 49 ++ #define RX_EV_FRM_TRUNC_WIDTH 1 ++ #define RX_EV_DRIB_NIB_LBN 48 ++ #define RX_EV_DRIB_NIB_WIDTH 1 ++ #define RX_EV_TOBE_DISC_LBN 47 ++ #define RX_EV_TOBE_DISC_WIDTH 1 ++ #define RX_EV_PKT_TYPE_LBN 44 ++ #define RX_EV_PKT_TYPE_WIDTH 3 ++ #define RX_EV_PKT_TYPE_ETH_DECODE 0 ++ #define RX_EV_PKT_TYPE_LLC_DECODE 1 ++ #define RX_EV_PKT_TYPE_JUMBO_DECODE 2 ++ #define RX_EV_PKT_TYPE_VLAN_DECODE 3 ++ #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4 ++ #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5 ++ #define RX_EV_HDR_TYPE_LBN 42 ++ #define RX_EV_HDR_TYPE_WIDTH 2 ++ #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0 ++ #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1 ++ #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2 ++ #define RX_EV_HDR_TYPE_NON_IP_DECODE 3 ++ #define RX_EV_DESC_Q_EMPTY_LBN 41 ++ #define RX_EV_DESC_Q_EMPTY_WIDTH 1 ++ #define RX_EV_MCAST_HASH_MATCH_LBN 40 ++ #define RX_EV_MCAST_HASH_MATCH_WIDTH 1 ++ #define RX_EV_MCAST_PKT_LBN 39 ++ #define RX_EV_MCAST_PKT_WIDTH 1 ++ #define RX_EV_Q_LABEL_LBN 32 ++ #define RX_EV_Q_LABEL_WIDTH 5 ++ #define RX_JUMBO_CONT_LBN 31 ++ #define RX_JUMBO_CONT_WIDTH 1 ++ #define RX_SOP_LBN 15 ++ #define RX_SOP_WIDTH 1 ++ #define RX_PORT_LBN 30 ++ #define RX_PORT_WIDTH 1 ++ #define RX_EV_BYTE_CNT_LBN 16 ++ #define RX_EV_BYTE_CNT_WIDTH 14 ++ #define RX_iSCSI_PKT_OK_LBN 14 ++ #define RX_iSCSI_PKT_OK_WIDTH 1 ++ #define RX_ISCSI_DDIG_ERR_LBN 13 ++ #define RX_ISCSI_DDIG_ERR_WIDTH 1 ++ #define RX_ISCSI_HDIG_ERR_LBN 12 ++ #define RX_ISCSI_HDIG_ERR_WIDTH 1 ++ #define RX_EV_DESC_PTR_LBN 0 ++ #define RX_EV_DESC_PTR_WIDTH 12 ++/******---- Transmit IP events for both Kernel & User event queues ----******/ ++ #define TX_EV_PKT_ERR_LBN 38 ++ #define TX_EV_PKT_ERR_WIDTH 1 ++ #define TX_EV_PKT_TOO_BIG_LBN 37 ++ #define TX_EV_PKT_TOO_BIG_WIDTH 1 ++ #define TX_EV_Q_LABEL_LBN 32 ++ #define TX_EV_Q_LABEL_WIDTH 5 ++ #define TX_EV_PORT_LBN 16 ++ #define TX_EV_PORT_WIDTH 1 ++ #define TX_EV_WQ_FF_FULL_LBN 15 ++ #define TX_EV_WQ_FF_FULL_WIDTH 1 ++ #define TX_EV_BUF_OWNER_ID_ERR_LBN 14 ++ #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1 ++ #define TX_EV_COMP_LBN 12 ++ #define TX_EV_COMP_WIDTH 1 ++ #define TX_EV_DESC_PTR_LBN 0 ++ #define TX_EV_DESC_PTR_WIDTH 12 ++/*************---- Char or Kernel driver events ----*************/ ++ #define DRIVER_EV_SUB_CODE_LBN 56 ++ #define DRIVER_EV_SUB_CODE_WIDTH 4 ++ #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0 ++ #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1 ++ #define EVQ_INIT_DONE_EV_DECODE 0x2 ++ #define EVQ_NOT_EN_EV_DECODE 0x3 ++ #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4 ++ #define SRM_UPD_DONE_EV_DECODE 0x5 ++ #define WAKE_UP_EV_DECODE 0x6 ++ #define TX_PKT_NON_TCP_UDP_DECODE 0x9 ++ #define TIMER_EV_DECODE 0xA ++ #define RX_DSC_ERROR_EV_DECODE 0xE ++ #define DRIVER_EV_TX_DESCQ_ID_LBN 0 ++ #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12 ++ #define DRIVER_EV_RX_DESCQ_ID_LBN 0 ++ #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12 ++ #define DRIVER_EV_EVQ_ID_LBN 0 ++ #define DRIVER_EV_EVQ_ID_WIDTH 12 ++ #define DRIVER_TMR_ID_LBN 0 ++ #define DRIVER_TMR_ID_WIDTH 12 ++ #define DRIVER_EV_SRM_UPD_LBN 0 ++ #define DRIVER_EV_SRM_UPD_WIDTH 2 ++ #define SRM_CLR_EV_DECODE 0 ++ #define SRM_UPD_EV_DECODE 1 ++ #define SRM_ILLCLR_EV_DECODE 2 ++/********---- Global events. Sent to both event queue 0 and 4. ----********/ ++ #define XFP_PHY_INTR_LBN 10 ++ #define XFP_PHY_INTR_WIDTH 1 ++ #define XG_PHY_INTR_LBN 9 ++ #define XG_PHY_INTR_WIDTH 1 ++ #define G_PHY1_INTR_LBN 8 ++ #define G_PHY1_INTR_WIDTH 1 ++ #define G_PHY0_INTR_LBN 7 ++ #define G_PHY0_INTR_WIDTH 1 ++/*************---- Driver generated events ----*************/ ++ #define DRV_GEN_EV_CODE_LBN 60 ++ #define DRV_GEN_EV_CODE_WIDTH 4 ++ #define DRV_GEN_EV_DATA_LBN 0 ++ #define DRV_GEN_EV_DATA_WIDTH 60 +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_intr_vec.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_intr_vec.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,44 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides EtherFabric NIC - EFXXXX (aka Falcon) interrupt ++ * vector definitions. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++/*************---- Interrupt Vector Format C Header ----*************/ ++#define DW0_OFST 0x0 /* Double-word 0: Event queue FIFO interrupts */ ++ #define EVQ_FIFO_HF_LBN 1 ++ #define EVQ_FIFO_HF_WIDTH 1 ++ #define EVQ_FIFO_AF_LBN 0 ++ #define EVQ_FIFO_AF_WIDTH 1 ++#define DW1_OFST 0x4 /* Double-word 1: Interrupt indicator */ ++ #define INT_FLAG_LBN 0 ++ #define INT_FLAG_WIDTH 1 ++#define DW2_OFST 0x8 /* Double-word 2: Fatal interrupts */ ++ #define FATAL_INT_LBN 0 ++ #define FATAL_INT_WIDTH 1 +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/workarounds.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/workarounds.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,67 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides workaround settings for EtherFabric NICs. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_EFAB_WORKAROUNDS_H__ ++#define __CI_DRIVER_EFAB_WORKAROUNDS_H__ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Hardware workarounds which have global scope ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#if defined(__CI_HARDWARE_CONFIG_FALCON_B0__) ++/*------------------------------- B0 ---------------------------------------*/ ++ ++#define BUG2175_WORKAROUND 0 /* TX event batching for dual port operation. ++ This removes the effect (dup TX events) ++ of the fix ++ (TX event per packet + batch events) */ ++#define BUG5302_WORKAROUND 0 /* unstick TX DMAQ after out-of-range wr ptr */ ++#define BUG5762_WORKAROUND 0 /* Set all queues to jumbo mode */ ++#define BUG5391_WORKAROUND 0 /* Misaligned TX can't span 512-byte boundary */ ++#define BUG7916_WORKAROUND 0 /* RX flush gets lost */ ++ ++#else ++/*------------------------------- A0/A1 ------------------------------------*/ ++ ++#define BUG2175_WORKAROUND 1 /* TX event batching for dual port operation. ++ This removes the effect (dup TX events) ++ of the fix ++ (TX event per packet + batch events) */ ++#define BUG5302_WORKAROUND 1 /* unstick TX DMAQ after out-of-range wr ptr */ ++#define BUG5762_WORKAROUND 1 /* Set all queues to jumbo mode */ ++#define BUG5391_WORKAROUND 1 /* Misaligned TX can't span 512-byte boundary */ ++#define BUG7916_WORKAROUND 1 /* RX flush gets lost */ ++ ++#endif /* B0/A01 */ ++ ++#endif /* __CI_DRIVER_EFAB_WORKAROUNDS_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/resource/efx_vi.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/resource/efx_vi.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,273 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains public EFX VI API to Solarflare resource manager. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_RESOURCE_EFX_VI_H__ ++#define __CI_DRIVER_RESOURCE_EFX_VI_H__ ++ ++/* Default size of event queue in the efx_vi resource. Copied from ++ * CI_CFG_NETIF_EVENTQ_SIZE */ ++#define EFX_VI_EVENTQ_SIZE_DEFAULT 1024 ++ ++extern int efx_vi_eventq_size; ++ ++/************************************************************************** ++ * efx_vi_state types, allocation and free ++ **************************************************************************/ ++ ++/*! Handle for refering to a efx_vi */ ++struct efx_vi_state; ++ ++/*! ++ * Allocate an efx_vi, including event queue and pt_endpoint ++ * ++ * \param vih_out Pointer to a handle that is set on success ++ * \param ifindex Index of the network interface desired ++ * \return Zero on success (and vih_out set), non-zero on failure. ++ */ ++extern int ++efx_vi_alloc(struct efx_vi_state **vih_out, int ifindex); ++ ++/*! ++ * Free a previously allocated efx_vi ++ * ++ * \param vih The handle of the efx_vi to free ++ */ ++extern void ++efx_vi_free(struct efx_vi_state *vih); ++ ++/*! ++ * Reset a previously allocated efx_vi ++ * ++ * \param vih The handle of the efx_vi to reset ++ */ ++extern void ++efx_vi_reset(struct efx_vi_state *vih); ++ ++/************************************************************************** ++ * efx_vi_eventq types and functions ++ **************************************************************************/ ++ ++/*! ++ * Register a function to receive callbacks when event queue timeouts ++ * or wakeups occur. Only one function per efx_vi can be registered ++ * at once. ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param callback The function to callback ++ * \param context An argument to pass to the callback function ++ * \return Zero on success, non-zero on failure. ++ */ ++extern int ++efx_vi_eventq_register_callback(struct efx_vi_state *vih, ++ void (*callback)(void *context, int is_timeout), ++ void *context); ++ ++/*! ++ * Remove the current eventq timeout or wakeup callback function ++ * ++ * \param vih The handle to identify the efx_vi ++ * \return Zero on success, non-zero on failure ++ */ ++extern int ++efx_vi_eventq_kill_callback(struct efx_vi_state *vih); ++ ++/************************************************************************** ++ * efx_vi_dma_map types and functions ++ **************************************************************************/ ++ ++/*! ++ * Handle for refering to a efx_vi ++ */ ++struct efx_vi_dma_map_state; ++ ++/*! ++ * Map a list of buffer pages so they are registered with the hardware ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param addrs An array of page pointers to map ++ * \param n_addrs Length of the page pointer array. Must be a power of two. ++ * \param dmh_out Set on success to a handle used to refer to this mapping ++ * \return Zero on success, non-zero on failure. ++ */ ++extern int ++efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages, ++ int n_pages, struct efx_vi_dma_map_state **dmh_out); ++extern int ++efx_vi_dma_map_addrs(struct efx_vi_state *vih, ++ unsigned long long *dev_bus_addrs, int n_pages, ++ struct efx_vi_dma_map_state **dmh_out); ++ ++/*! ++ * Unmap a previously mapped set of pages so they are no longer registered ++ * with the hardware. ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param dmh The handle to identify the dma mapping ++ */ ++extern void ++efx_vi_dma_unmap_pages(struct efx_vi_state *vih, ++ struct efx_vi_dma_map_state *dmh); ++extern void ++efx_vi_dma_unmap_addrs(struct efx_vi_state *vih, ++ struct efx_vi_dma_map_state *dmh); ++ ++/*! ++ * Retrieve the buffer address of the mapping ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param dmh The handle to identify the buffer mapping ++ * \return The buffer address on success, or zero on failure ++ */ ++extern unsigned ++efx_vi_dma_get_map_addr(struct efx_vi_state *vih, ++ struct efx_vi_dma_map_state *dmh); ++ ++/************************************************************************** ++ * efx_vi filter functions ++ **************************************************************************/ ++ ++#define EFX_VI_STATIC_FILTERS 32 ++ ++/*! Handle to refer to a filter instance */ ++struct filter_resource_t; ++ ++/*! ++ * Allocate and add a filter ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param protocol The protocol of the new filter: UDP or TCP ++ * \param ip_addr_be32 The local ip address of the filter ++ * \param port_le16 The local port of the filter ++ * \param fh_out Set on success to be a handle to refer to this filter ++ * \return Zero on success, non-zero on failure. ++ */ ++extern int ++efx_vi_filter(struct efx_vi_state *vih, int protocol, unsigned ip_addr_be32, ++ int port_le16, struct filter_resource_t **fh_out); ++ ++/*! ++ * Remove a filter and free resources associated with it ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param fh The handle to identify the filter ++ * \return Zero on success, non-zero on failure ++ */ ++extern int ++efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh); ++ ++/************************************************************************** ++ * efx_vi hw resources types and functions ++ **************************************************************************/ ++ ++/*! Constants for the type field in efx_vi_hw_resource */ ++#define EFX_VI_HW_RESOURCE_TXDMAQ 0x0 /* PFN of TX DMA Q */ ++#define EFX_VI_HW_RESOURCE_RXDMAQ 0x1 /* PFN of RX DMA Q */ ++#define EFX_VI_HW_RESOURCE_EVQTIMER 0x4 /* Address of event q timer */ ++ ++/* Address of event q pointer (EF1) */ ++#define EFX_VI_HW_RESOURCE_EVQPTR 0x5 ++/* Address of register pointer (Falcon A) */ ++#define EFX_VI_HW_RESOURCE_EVQRPTR 0x6 ++/* Offset of register pointer (Falcon B) */ ++#define EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET 0x7 ++/* Address of mem KVA */ ++#define EFX_VI_HW_RESOURCE_EVQMEMKVA 0x8 ++/* PFN of doorbell page (Falcon) */ ++#define EFX_VI_HW_RESOURCE_BELLPAGE 0x9 ++ ++/*! How large an array to allocate for the get_() functions - smaller ++ than the total number of constants as some are mutually exclusive */ ++#define EFX_VI_HW_RESOURCE_MAXSIZE 0x7 ++ ++/*! Constants for the mem_type field in efx_vi_hw_resource */ ++#define EFX_VI_HW_RESOURCE_IOBUFFER 0 /* Host memory */ ++#define EFX_VI_HW_RESOURCE_PERIPHERAL 1 /* Card memory/registers */ ++ ++/*! ++ * Data structure providing information on a hardware resource mapping ++ */ ++struct efx_vi_hw_resource { ++ u8 type; /*!< What this resource represents */ ++ u8 mem_type; /*!< What type of memory is it in, eg, ++ * host or iomem */ ++ u8 more_to_follow; /*!< Is this part of a multi-region resource */ ++ u32 length; /*!< Length of the resource in bytes */ ++ unsigned long address; /*!< Address of this resource */ ++}; ++ ++/*! ++ * Metadata concerning the list of hardware resource mappings ++ */ ++struct efx_vi_hw_resource_metadata { ++ int evq_order; ++ int evq_offs; ++ int evq_capacity; ++ int instance; ++ unsigned rx_capacity; ++ unsigned tx_capacity; ++ int nic_arch; ++ int nic_revision; ++ char nic_variant; ++}; ++ ++/*! ++ * Obtain a list of hardware resource mappings, using virtual addresses ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param mdata Pointer to a structure to receive the metadata ++ * \param hw_res_array An array to receive the list of hardware resources ++ * \param length The length of hw_res_array. Updated on success to contain ++ * the number of entries in the supplied array that were used. ++ * \return Zero on success, non-zero on failure ++ */ ++extern int ++efx_vi_hw_resource_get_virt(struct efx_vi_state *vih, ++ struct efx_vi_hw_resource_metadata *mdata, ++ struct efx_vi_hw_resource *hw_res_array, ++ int *length); ++ ++/*! ++ * Obtain a list of hardware resource mappings, using physical addresses ++ * ++ * \param vih The handle to identify the efx_vi ++ * \param mdata Pointer to a structure to receive the metadata ++ * \param hw_res_array An array to receive the list of hardware resources ++ * \param length The length of hw_res_array. Updated on success to contain ++ * the number of entries in the supplied array that were used. ++ * \return Zero on success, non-zero on failure ++ */ ++extern int ++efx_vi_hw_resource_get_phys(struct efx_vi_state *vih, ++ struct efx_vi_hw_resource_metadata *mdata, ++ struct efx_vi_hw_resource *hw_res_array, ++ int *length); ++ ++#endif /* __CI_DRIVER_RESOURCE_EFX_VI_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/resource/linux_efhw_nic.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/driver/resource/linux_efhw_nic.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,69 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains definition of the public type struct linux_efhw_nic. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_RESOURCE_LINUX_RESOURCE__ ++#define __CI_DRIVER_RESOURCE_LINUX_RESOURCE__ ++ ++#include ++#include ++ ++ ++/************************************************************************ ++ * Per-nic structure in the resource driver * ++ ************************************************************************/ ++ ++struct linux_efhw_nic { ++ struct efrm_nic efrm_nic; ++ ++ struct pci_dev *pci_dev; /*!< pci descriptor */ ++ struct tasklet_struct tasklet; /*!< for interrupt bottom half */ ++ ++ /* Physical addresses of the control aperture bar. */ ++ unsigned long ctr_ap_pci_addr; ++ ++ /*! Callbacks for driverlink, when needed. */ ++ struct efx_dl_callbacks *dl_callbacks; ++ ++ /*! Event handlers. */ ++ struct efhw_ev_handler *ev_handlers; ++ ++}; ++ ++#define linux_efhw_nic(_efhw_nic) \ ++ container_of(_efhw_nic, struct linux_efhw_nic, efrm_nic.efhw_nic) ++ ++#endif /* __CI_DRIVER_RESOURCE_LINUX_RESOURCE__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/checks.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/checks.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,118 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides helpers to turn bit shifts into dword shifts and ++ * check that the bit fields haven't overflown the dword etc. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_CHECK_H__ ++#define __CI_EFHW_CHECK_H__ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Helpers to turn bit shifts into dword shifts and check that the bit fields ++ * haven't overflown the dword etc. Aim is to preserve consistency with the ++ * autogenerated headers - once stable we could hard code. ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/* mask constructors */ ++#define __FALCON_MASK(WIDTH, T) ((((T)1) << (WIDTH)) - 1) ++#define __FALCON_MASK32(WIDTH) __FALCON_MASK((WIDTH), uint32_t) ++#define __FALCON_MASK64(WIDTH) __FALCON_MASK((WIDTH), uint64_t) ++ ++#define __FALCON_MASKFIELD32(LBN, WIDTH) \ ++ ((uint32_t)(__FALCON_MASK32(WIDTH) << (LBN))) ++ ++/* constructors for fields which span the first and second dwords */ ++#define __LW(LBN) (32 - LBN) ++#define __LOW(v, LBN, WIDTH) \ ++ ((uint32_t)(((v) & __FALCON_MASK64(__LW((LBN)))) << (LBN))) ++#define __HIGH(v, LBN, WIDTH) \ ++ ((uint32_t)(((v) >> __LW((LBN))) & \ ++ __FALCON_MASK64((WIDTH - __LW((LBN)))))) ++/* constructors for fields within the second dword */ ++#define __DW2(LBN) ((LBN) - 32) ++ ++/* constructors for fields which span the second and third dwords */ ++#define __LW2(LBN) (64 - LBN) ++#define __LOW2(v, LBN, WIDTH) \ ++ ((uint32_t)(((v) & __FALCON_MASK64(__LW2((LBN)))) << ((LBN) - 32))) ++#define __HIGH2(v, LBN, WIDTH) \ ++ ((uint32_t)(((v) >> __LW2((LBN))) & \ ++ __FALCON_MASK64((WIDTH - __LW2((LBN)))))) ++ ++/* constructors for fields within the third dword */ ++#define __DW3(LBN) ((LBN) - 64) ++ ++/* constructors for fields which span the third and fourth dwords */ ++#define __LW3(LBN) (96 - LBN) ++#define __LOW3(v, LBN, WIDTH) \ ++ ((uint32_t)(((v) & __FALCON_MASK64(__LW3((LBN)))) << ((LBN) - 64))) ++#define __HIGH3(v, LBN, WIDTH) \ ++ ((ci_unit32)(((v) >> __LW3((LBN))) & \ ++ __FALCON_MASK64((WIDTH - __LW3((LBN)))))) ++ ++/* constructors for fields within the fourth dword */ ++#define __DW4(LBN) ((LBN) - 96) ++ ++/* checks that the autogenerated headers are consistent with our model */ ++#define __WIDTHCHCK(a, b) EFHW_ASSERT((a) == (b)) ++#define __RANGECHCK(v, WIDTH) \ ++ EFHW_ASSERT(((uint64_t)(v) & ~(__FALCON_MASK64((WIDTH)))) == 0) ++ ++/* fields within the first dword */ ++#define __DWCHCK(LBN, WIDTH) \ ++ EFHW_ASSERT(((LBN) >= 0) && (((LBN)+(WIDTH)) <= 32)) ++ ++/* fields which span the first and second dwords */ ++#define __LWCHK(LBN, WIDTH) EFHW_ASSERT(WIDTH >= __LW(LBN)) ++ ++/* fields within the second dword */ ++#define __DW2CHCK(LBN, WIDTH) \ ++ EFHW_ASSERT(((LBN) >= 32) && (((LBN)+(WIDTH)) <= 64)) ++ ++/* fields which span the second and third dwords */ ++#define __LW2CHK(LBN, WIDTH) EFHW_ASSERT(WIDTH >= __LW2(LBN)) ++ ++/* fields within the third dword */ ++#define __DW3CHCK(LBN, WIDTH) \ ++ EFHW_ASSERT(((LBN) >= 64) && (((LBN)+(WIDTH)) <= 96)) ++ ++/* fields which span the third and fourth dwords */ ++#define __LW3CHK(LBN, WIDTH) EFHW_ASSERT(WIDTH >= __LW3(LBN)) ++ ++/* fields within the fourth dword */ ++#define __DW4CHCK(LBN, WIDTH) \ ++ EFHW_ASSERT(((LBN) >= 96) && (((LBN)+(WIDTH)) <= 128)) ++ ++/* fields in the first qword */ ++#define __QWCHCK(LBN, WIDTH) \ ++ EFHW_ASSERT(((LBN) >= 0) && (((LBN)+(WIDTH)) <= 64)) ++ ++#endif /* __CI_EFHW_CHECK_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/common.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/common.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,93 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides API of the efhw library which may be used both from ++ * the kernel and from the user-space code. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_COMMON_H__ ++#define __CI_EFHW_COMMON_H__ ++ ++#include ++ ++typedef uint32_t efhw_buffer_addr_t; ++#define EFHW_BUFFER_ADDR_FMT "[ba:%"PRIx32"]" ++ ++/*! Comment? */ ++typedef union { ++ uint64_t u64; ++ struct { ++ uint32_t a; ++ uint32_t b; ++ } opaque; ++} efhw_event_t; ++ ++/* Flags for TX/RX queues */ ++#define EFHW_VI_JUMBO_EN 0x01 /*! scatter RX over multiple desc */ ++#define EFHW_VI_ISCSI_RX_HDIG_EN 0x02 /*! iscsi rx header digest */ ++#define EFHW_VI_ISCSI_TX_HDIG_EN 0x04 /*! iscsi tx header digest */ ++#define EFHW_VI_ISCSI_RX_DDIG_EN 0x08 /*! iscsi rx data digest */ ++#define EFHW_VI_ISCSI_TX_DDIG_EN 0x10 /*! iscsi tx data digest */ ++#define EFHW_VI_TX_PHYS_ADDR_EN 0x20 /*! TX physical address mode */ ++#define EFHW_VI_RX_PHYS_ADDR_EN 0x40 /*! RX physical address mode */ ++#define EFHW_VI_RM_WITH_INTERRUPT 0x80 /*! VI with an interrupt */ ++#define EFHW_VI_TX_IP_CSUM_DIS 0x100 /*! enable ip checksum generation */ ++#define EFHW_VI_TX_TCPUDP_CSUM_DIS 0x200 /*! enable tcp/udp checksum ++ generation */ ++#define EFHW_VI_TX_TCPUDP_ONLY 0x400 /*! drop non-tcp/udp packets */ ++ ++/* Types of hardware filter */ ++/* Each of these values implicitly selects scatter filters on B0 - or in ++ EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK if a non-scatter filter is required */ ++#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD (0) /* dest host only */ ++#define EFHW_IP_FILTER_TYPE_UDP_FULL (1) /* dest host and port */ ++#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD (2) /* dest based filter */ ++#define EFHW_IP_FILTER_TYPE_TCP_FULL (3) /* src filter */ ++/* Same again, but with RSS (for B0 only) */ ++#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD_RSS_B0 (4) ++#define EFHW_IP_FILTER_TYPE_UDP_FULL_RSS_B0 (5) ++#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD_RSS_B0 (6) ++#define EFHW_IP_FILTER_TYPE_TCP_FULL_RSS_B0 (7) ++ ++#define EFHW_IP_FILTER_TYPE_FULL_MASK (0x1) /* Mask for full / wildcard */ ++#define EFHW_IP_FILTER_TYPE_TCP_MASK (0x2) /* Mask for TCP type */ ++#define EFHW_IP_FILTER_TYPE_RSS_B0_MASK (0x4) /* Mask for B0 RSS enable */ ++#define EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK (0x8) /* Mask for B0 SCATTER dsbl */ ++ ++#define EFHW_IP_FILTER_TYPE_MASK (0xffff) /* Mask of types above */ ++ ++#define EFHW_IP_FILTER_BROADCAST (0x10000) /* driverlink filter ++ support */ ++ ++#endif /* __CI_EFHW_COMMON_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/common_sysdep.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/common_sysdep.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,61 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides version-independent Linux kernel API for ++ * userland-to-kernel interfaces. ++ * Only kernels >=2.6.9 are supported. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_COMMON_LINUX_H__ ++#define __CI_EFHW_COMMON_LINUX_H__ ++ ++#include ++ ++/* Dirty hack, but Linux kernel does not provide DMA_ADDR_T_FMT */ ++#if BITS_PER_LONG == 64 || defined(CONFIG_HIGHMEM64G) ++#define DMA_ADDR_T_FMT "%llx" ++#else ++#define DMA_ADDR_T_FMT "%x" ++#endif ++ ++/* Linux kernel also does not provide PRIx32... Sigh. */ ++#define PRIx32 "x" ++ ++#ifdef __ia64__ ++# define PRIx64 "lx" ++#else ++# define PRIx64 "llx" ++#endif ++ ++#endif /* __CI_EFHW_COMMON_LINUX_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/debug.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/debug.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,84 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides debug-related API for efhw library using Linux kernel ++ * primitives. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_DEBUG_LINUX_H__ ++#define __CI_EFHW_DEBUG_LINUX_H__ ++ ++#define EFHW_PRINTK_PREFIX "[sfc efhw] " ++ ++#define EFHW_PRINTK(level, fmt, ...) \ ++ printk(level EFHW_PRINTK_PREFIX fmt "\n", __VA_ARGS__) ++ ++/* Following macros should be used with non-zero format parameters ++ * due to __VA_ARGS__ limitations. Use "%s" with __func__ if you can't ++ * find better parameters. */ ++#define EFHW_ERR(fmt, ...) EFHW_PRINTK(KERN_ERR, fmt, __VA_ARGS__) ++#define EFHW_WARN(fmt, ...) EFHW_PRINTK(KERN_WARNING, fmt, __VA_ARGS__) ++#define EFHW_NOTICE(fmt, ...) EFHW_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__) ++#if 0 && !defined(NDEBUG) ++#define EFHW_TRACE(fmt, ...) EFHW_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__) ++#else ++#define EFHW_TRACE(fmt, ...) ++#endif ++ ++#ifndef NDEBUG ++#define EFHW_ASSERT(cond) BUG_ON((cond) == 0) ++#define EFHW_DO_DEBUG(expr) expr ++#else ++#define EFHW_ASSERT(cond) ++#define EFHW_DO_DEBUG(expr) ++#endif ++ ++#define EFHW_TEST(expr) \ ++ do { \ ++ if (unlikely(!(expr))) \ ++ BUG(); \ ++ } while (0) ++ ++/* Build time asserts. We paste the line number into the type name ++ * so that the macro can be used more than once per file even if the ++ * compiler objects to multiple identical typedefs. Collisions ++ * between use in different header files is still possible. */ ++#ifndef EFHW_BUILD_ASSERT ++#define __EFHW_BUILD_ASSERT_NAME(_x) __EFHW_BUILD_ASSERT_ILOATHECPP(_x) ++#define __EFHW_BUILD_ASSERT_ILOATHECPP(_x) __EFHW_BUILD_ASSERT__ ##_x ++#define EFHW_BUILD_ASSERT(e) \ ++ typedef char __EFHW_BUILD_ASSERT_NAME(__LINE__)[(e) ? 1 : -1] ++#endif ++ ++#endif /* __CI_EFHW_DEBUG_LINUX_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/efhw_config.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/efhw_config.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,43 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides some limits used in both kernel and userland code. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_EFAB_CONFIG_H__ ++#define __CI_EFHW_EFAB_CONFIG_H__ ++ ++#define EFHW_MAX_NR_DEVS 5 /* max number of efhw devices supported */ ++ ++#endif /* __CI_EFHW_EFAB_CONFIG_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/efhw_types.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/efhw_types.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,382 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides struct efhw_nic and some related types. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_EFAB_TYPES_H__ ++#define __CI_EFHW_EFAB_TYPES_H__ ++ ++#include ++#include ++#include ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * forward type declarations ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_nic; ++ ++/*-------------------------------------------------------------------- ++ * ++ * Managed interface ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_buffer_table_allocation{ ++ unsigned base; ++ unsigned order; ++}; ++ ++struct eventq_resource_hardware { ++ /*!iobuffer allocated for eventq - can be larger than eventq */ ++ struct efhw_iopages iobuff; ++ unsigned iobuff_off; ++ struct efhw_buffer_table_allocation buf_tbl_alloc; ++ int capacity; /*!< capacity of event queue */ ++}; ++ ++/*-------------------------------------------------------------------- ++ * ++ * event queues and event driven callbacks ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_keventq { ++ int lock; ++ caddr_t evq_base; ++ int32_t evq_ptr; ++ uint32_t evq_mask; ++ unsigned instance; ++ struct eventq_resource_hardware hw; ++ struct efhw_ev_handler *ev_handlers; ++}; ++ ++/*-------------------------------------------------------------------- ++ * ++ * filters ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_filter_spec { ++ uint dmaq_id; ++ uint32_t saddr_le32; ++ uint32_t daddr_le32; ++ uint16_t sport_le16; ++ uint16_t dport_le16; ++ unsigned tcp : 1; ++ unsigned full : 1; ++ unsigned rss : 1; /* not supported on A1 */ ++ unsigned scatter : 1; /* not supported on A1 */ ++}; ++ ++struct efhw_filter_depth { ++ unsigned needed; ++ unsigned max; ++}; ++ ++struct efhw_filter_search_limits { ++ unsigned tcp_full; ++ unsigned tcp_wild; ++ unsigned udp_full; ++ unsigned udp_wild; ++}; ++ ++ ++/********************************************************************** ++ * Portable HW interface. *************************************** ++ **********************************************************************/ ++ ++/*-------------------------------------------------------------------- ++ * ++ * EtherFabric Functional units - configuration and control ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_func_ops { ++ ++ /*-------------- Initialisation ------------ */ ++ ++ /*! close down all hardware functional units - leaves NIC in a safe ++ state for driver unload */ ++ void (*close_hardware) (struct efhw_nic *nic); ++ ++ /*! initialise all hardware functional units */ ++ int (*init_hardware) (struct efhw_nic *nic, ++ struct efhw_ev_handler *, ++ const uint8_t *mac_addr, int non_irq_evq); ++ ++ /*-------------- Interrupt support ------------ */ ++ ++ /*! Main interrupt routine ++ ** This function returns, ++ ** - zero, if the IRQ was not generated by EF1 ++ ** - non-zero, if EF1 was the source of the IRQ ++ ** ++ ** ++ ** opaque is an OS provided pointer for use by the OS callbacks ++ ** e.g in Windows used to indicate DPC scheduled ++ */ ++ int (*interrupt) (struct efhw_nic *nic); ++ ++ /*! Enable the interrupt */ ++ void (*interrupt_enable) (struct efhw_nic *nic); ++ ++ /*! Disable the interrupt */ ++ void (*interrupt_disable) (struct efhw_nic *nic); ++ ++ /*! Set interrupt moderation strategy for the given IRQ unit ++ ** val is in usec ++ */ ++ void (*set_interrupt_moderation)(struct efhw_nic *nic, int evq, ++ uint val); ++ ++ /*-------------- Event support ------------ */ ++ ++ /*! Enable the given event queue ++ depending on the underlying implementation (EF1 or Falcon) then ++ either a q_base_addr in host memory, or a buffer base id should ++ be proivded ++ */ ++ void (*event_queue_enable) (struct efhw_nic *nic, ++ uint evq, /* evnt queue index */ ++ uint evq_size, /* units of #entries */ ++ dma_addr_t q_base_addr, uint buf_base_id, ++ int interrupting); ++ ++ /*! Disable the given event queue (and any associated timer) */ ++ void (*event_queue_disable) (struct efhw_nic *nic, uint evq, ++ int timer_only); ++ ++ /*! request wakeup from the NIC on a given event Q */ ++ void (*wakeup_request) (struct efhw_nic *nic, dma_addr_t q_base_addr, ++ int next_i, int evq); ++ ++ /*! Push a SW event on a given eventQ */ ++ void (*sw_event) (struct efhw_nic *nic, int data, int evq); ++ ++ /*-------------- IP Filter API ------------ */ ++ ++ /*! Setup a given filter - The software can request a filter_i, ++ * but some EtherFabric implementations will override with ++ * a more suitable index ++ */ ++ int (*ipfilter_set) (struct efhw_nic *nic, int type, ++ int *filter_i, int dmaq, ++ unsigned saddr_be32, unsigned sport_be16, ++ unsigned daddr_be32, unsigned dport_be16); ++ ++ /*! Clear down a given filter */ ++ void (*ipfilter_clear) (struct efhw_nic *nic, int filter_idx); ++ ++ /*-------------- DMA support ------------ */ ++ ++ /*! Initialise NIC state for a given TX DMAQ */ ++ void (*dmaq_tx_q_init) (struct efhw_nic *nic, ++ uint dmaq, uint evq, uint owner, uint tag, ++ uint dmaq_size, uint buf_idx, uint flags); ++ ++ /*! Initialise NIC state for a given RX DMAQ */ ++ void (*dmaq_rx_q_init) (struct efhw_nic *nic, ++ uint dmaq, uint evq, uint owner, uint tag, ++ uint dmaq_size, uint buf_idx, uint flags); ++ ++ /*! Disable a given TX DMAQ */ ++ void (*dmaq_tx_q_disable) (struct efhw_nic *nic, uint dmaq); ++ ++ /*! Disable a given RX DMAQ */ ++ void (*dmaq_rx_q_disable) (struct efhw_nic *nic, uint dmaq); ++ ++ /*! Flush a given TX DMA channel */ ++ int (*flush_tx_dma_channel) (struct efhw_nic *nic, uint dmaq); ++ ++ /*! Flush a given RX DMA channel */ ++ int (*flush_rx_dma_channel) (struct efhw_nic *nic, uint dmaq); ++ ++ /*-------------- Buffer table Support ------------ */ ++ ++ /*! Initialise a buffer table page */ ++ void (*buffer_table_set) (struct efhw_nic *nic, ++ dma_addr_t dma_addr, ++ uint bufsz, uint region, ++ int own_id, int buffer_id); ++ ++ /*! Initialise a block of buffer table pages */ ++ void (*buffer_table_set_n) (struct efhw_nic *nic, int buffer_id, ++ dma_addr_t dma_addr, ++ uint bufsz, uint region, ++ int n_pages, int own_id); ++ ++ /*! Clear a block of buffer table pages */ ++ void (*buffer_table_clear) (struct efhw_nic *nic, int buffer_id, ++ int num); ++ ++ /*! Commit a buffer table update */ ++ void (*buffer_table_commit) (struct efhw_nic *nic); ++ ++ /*-------------- New filter API ------------ */ ++ ++ /*! Set a given filter */ ++ int (*filter_set) (struct efhw_nic *nic, struct efhw_filter_spec *spec, ++ int *filter_idx_out); ++ ++ /*! Clear a given filter */ ++ void (*filter_clear) (struct efhw_nic *nic, int filter_idx); ++}; ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * NIC type ++ * ++ *---------------------------------------------------------------------------*/ ++ ++struct efhw_device_type { ++ int arch; /* enum efhw_arch */ ++ char variant; /* 'A', 'B', ... */ ++ int revision; /* 0, 1, ... */ ++}; ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * EtherFabric NIC instance - nic.c for HW independent functions ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/*! */ ++struct efhw_nic { ++ /*! zero base index in efrm_nic_tablep->nic array */ ++ int index; ++ int ifindex; /*!< OS level nic index */ ++ struct net *nd_net; ++ ++ struct efhw_device_type devtype; ++ ++ /*! Options that can be set by user. */ ++ unsigned options; ++# define NIC_OPT_EFTEST 0x1 /* owner is an eftest app */ ++ ++# define NIC_OPT_DEFAULT 0 ++ ++ /*! Internal flags that indicate hardware properties at runtime. */ ++ unsigned flags; ++# define NIC_FLAG_NO_INTERRUPT 0x01 /* to be set at init time only */ ++# define NIC_FLAG_TRY_MSI 0x02 ++# define NIC_FLAG_MSI 0x04 ++# define NIC_FLAG_OS_IRQ_EN 0x08 ++ ++ unsigned mtu; /*!< MAC MTU (includes MAC hdr) */ ++ ++ /* hardware resources */ ++ ++ /*! I/O address of the start of the bar */ ++ volatile char __iomem *bar_ioaddr; ++ ++ /*! Bar number of control aperture. */ ++ unsigned ctr_ap_bar; ++ /*! Length of control aperture in bytes. */ ++ unsigned ctr_ap_bytes; ++ ++ uint8_t mac_addr[ETH_ALEN]; /*!< mac address */ ++ ++ /*! EtherFabric Functional Units -- functions */ ++ const struct efhw_func_ops *efhw_func; ++ ++ /*! This lock protects a number of misc NIC resources. It should ++ * only be used for things that can be at the bottom of the lock ++ * order. ie. You mustn't attempt to grab any other lock while ++ * holding this one. ++ */ ++ spinlock_t *reg_lock; ++ spinlock_t the_reg_lock; ++ ++ int buf_commit_outstanding; /*!< outstanding buffer commits */ ++ ++ /*! interrupt callbacks (hard-irq) */ ++ void (*irq_handler) (struct efhw_nic *, int unit); ++ ++ /*! event queues per driver */ ++ struct efhw_keventq interrupting_evq; ++ ++/* for marking when we are not using an IRQ unit ++ - 0 is a valid offset to an IRQ unit on EF1! */ ++#define EFHW_IRQ_UNIT_UNUSED 0xffff ++ /*! interrupt unit in use for the interrupting event queue */ ++ unsigned int irq_unit; ++ ++ struct efhw_keventq non_interrupting_evq; ++ ++ struct efhw_iopage irq_iobuff; /*!< Falcon SYSERR interrupt */ ++ ++ /* The new driverlink infrastructure. */ ++ struct efx_dl_device *net_driver_dev; ++ struct efx_dlfilt_cb_s *dlfilter_cb; ++ ++ /*! Bit masks of the sizes of event queues and dma queues supported ++ * by the nic. */ ++ unsigned evq_sizes; ++ unsigned rxq_sizes; ++ unsigned txq_sizes; ++ ++ /* Size of filter table. */ ++ unsigned ip_filter_tbl_size; ++ ++ /* Number of filters currently used */ ++ unsigned ip_filter_tbl_used; ++ ++ /* Dynamically allocated filter state. */ ++ uint8_t *filter_in_use; ++ struct efhw_filter_spec *filter_spec_cache; ++ ++ /* Currently required and maximum filter table search depths. */ ++ struct efhw_filter_depth tcp_full_srch; ++ struct efhw_filter_depth tcp_wild_srch; ++ struct efhw_filter_depth udp_full_srch; ++ struct efhw_filter_depth udp_wild_srch; ++ ++ /* Number of event queues, DMA queues and timers. */ ++ unsigned num_evqs; ++ unsigned num_dmaqs; ++ unsigned num_timers; ++}; ++ ++ ++#define EFHW_KVA(nic) ((nic)->bar_ioaddr) ++ ++ ++#endif /* __CI_EFHW_EFHW_TYPES_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/eventq.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/eventq.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,72 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains API provided by efhw/eventq.c file. This file is not ++ * designed for use outside of the SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_EVENTQ_H__ ++#define __CI_EFHW_EVENTQ_H__ ++ ++#include ++#include ++ ++/*! Poll the event queue. */ ++extern int efhw_keventq_poll(struct efhw_nic *, struct efhw_keventq *); ++ ++/*! Callbacks for handling events. */ ++struct efhw_ev_handler { ++ void (*wakeup_fn)(struct efhw_nic *nic, unsigned); ++ void (*timeout_fn)(struct efhw_nic *nic, unsigned); ++ void (*dmaq_flushed_fn) (struct efhw_nic *, unsigned, int); ++}; ++ ++extern int efhw_keventq_ctor(struct efhw_nic *, int instance, ++ struct efhw_keventq *, struct efhw_ev_handler *); ++extern void efhw_keventq_dtor(struct efhw_nic *, struct efhw_keventq *); ++ ++extern void efhw_handle_txdmaq_flushed(struct efhw_nic *, ++ struct efhw_ev_handler *, ++ efhw_event_t *); ++extern void efhw_handle_rxdmaq_flushed(struct efhw_nic *, ++ struct efhw_ev_handler *, ++ efhw_event_t *); ++extern void efhw_handle_wakeup_event(struct efhw_nic *, ++ struct efhw_ev_handler *, ++ efhw_event_t *); ++extern void efhw_handle_timeout_event(struct efhw_nic *, ++ struct efhw_ev_handler *, ++ efhw_event_t *); ++ ++#endif /* __CI_EFHW_EVENTQ_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/eventq_macros.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/eventq_macros.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,77 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides some event-related macros. This file is designed for ++ * use from kernel and from the userland contexts. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_EVENTQ_MACROS_H__ ++#define __CI_EFHW_EVENTQ_MACROS_H__ ++ ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * Event Queue manipulation ++ * ++ *--------------------------------------------------------------------*/ ++ ++#define EFHW_EVENT_OFFSET(q, s, i) \ ++ (((s)->evq_ptr - (i) * (int32_t)sizeof(efhw_event_t)) \ ++ & (q)->evq_mask) ++ ++#define EFHW_EVENT_PTR(q, s, i) \ ++ ((efhw_event_t *)((q)->evq_base + EFHW_EVENT_OFFSET(q, s, i))) ++ ++#define EFHW_EVENTQ_NEXT(s) \ ++ do { ((s)->evq_ptr += sizeof(efhw_event_t)); } while (0) ++ ++#define EFHW_EVENTQ_PREV(s) \ ++ do { ((s)->evq_ptr -= sizeof(efhw_event_t)); } while (0) ++ ++/* Be worried about this on byteswapped machines */ ++/* Due to crazy chipsets, we see the event words being written in ++** arbitrary order (bug4539). So test for presence of event must ensure ++** that both halves have changed from the null. ++*/ ++#define EFHW_IS_EVENT(evp) \ ++ (((evp)->opaque.a != (uint32_t)-1) && \ ++ ((evp)->opaque.b != (uint32_t)-1)) ++#define EFHW_CLEAR_EVENT(evp) ((evp)->u64 = (uint64_t)-1) ++#define EFHW_CLEAR_EVENT_VALUE 0xff ++ ++#define EFHW_EVENT_OVERFLOW(evq, s) \ ++ (EFHW_IS_EVENT(EFHW_EVENT_PTR(evq, s, 1))) ++ ++#endif /* __CI_EFHW_EVENTQ_MACROS_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/falcon.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/falcon.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,94 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains API provided by efhw/falcon.c file. This file is not ++ * designed for use outside of the SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_FALCON_H__ ++#define __CI_EFHW_FALCON_H__ ++ ++#include ++#include ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Locks - unfortunately required ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#define FALCON_LOCK_DECL irq_flags_t lock_state ++#define FALCON_LOCK_LOCK(nic) \ ++ spin_lock_irqsave((nic)->reg_lock, lock_state) ++#define FALCON_LOCK_UNLOCK(nic) \ ++ spin_unlock_irqrestore((nic)->reg_lock, lock_state) ++ ++extern struct efhw_func_ops falcon_char_functional_units; ++ ++/*! specify a pace value for a TX DMA Queue */ ++extern void falcon_nic_pace(struct efhw_nic *nic, uint dmaq, uint pace); ++ ++/*! configure the pace engine */ ++extern void falcon_nic_pace_cfg(struct efhw_nic *nic, int fb_base, ++ int bin_thresh); ++ ++/*! confirm buffer table updates - should be used for items where ++ loss of data would be unacceptable. E.g for the buffers that back ++ an event or DMA queue */ ++extern void falcon_nic_buffer_table_confirm(struct efhw_nic *nic); ++ ++/*! Reset the all the TX DMA queue pointers. */ ++extern void falcon_clobber_tx_dma_ptrs(struct efhw_nic *nic, uint dmaq); ++ ++extern int ++falcon_handle_char_event(struct efhw_nic *nic, ++ struct efhw_ev_handler *h, efhw_event_t *evp); ++ ++/*! Acknowledge to HW that processing is complete on a given event queue */ ++extern void falcon_nic_evq_ack(struct efhw_nic *nic, uint evq, /* evq id */ ++ uint rptr, /* new read pointer update */ ++ bool wakeup /* request a wakeup event if ++ ptr's != */ ++ ); ++ ++extern void ++falcon_nic_buffer_table_set_n(struct efhw_nic *nic, int buffer_id, ++ dma_addr_t dma_addr, uint bufsz, uint region, ++ int n_pages, int own_id); ++ ++extern int falcon_nic_filter_ctor(struct efhw_nic *nic); ++ ++extern void falcon_nic_filter_dtor(struct efhw_nic *nic); ++ ++#endif /* __CI_EFHW_FALCON_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/falcon_hash.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/falcon_hash.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,58 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains API provided by efhw/falcon_hash.c file. ++ * Function declared in this file are not exported from the Linux ++ * sfc_resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_FALCON_HASH_H__ ++#define __CI_EFHW_FALCON_HASH_H__ ++ ++extern unsigned int ++falcon_hash_get_ip_key(unsigned int src_ip, unsigned int src_port, ++ unsigned int dest_ip, unsigned int dest_port, ++ int tcp, int full); ++ ++extern unsigned int ++falcon_hash_function1(unsigned int key, unsigned int nfilters); ++ ++extern unsigned int ++falcon_hash_function2(unsigned int key, unsigned int nfilters); ++ ++extern unsigned int ++falcon_hash_iterator(unsigned int hash1, unsigned int hash2, ++ unsigned int n_search, unsigned int nfilters); ++ ++#endif /* __CI_EFHW_FALCON_HASH_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/hardware_sysdep.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/hardware_sysdep.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,69 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides version-independent Linux kernel API for header files ++ * with hardware-related definitions (in ci/driver/efab/hardware*). ++ * Only kernels >=2.6.9 are supported. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_HARDWARE_LINUX_H__ ++#define __CI_EFHW_HARDWARE_LINUX_H__ ++ ++#include ++ ++#ifdef __LITTLE_ENDIAN ++#define EFHW_IS_LITTLE_ENDIAN ++#elif __BIG_ENDIAN ++#define EFHW_IS_BIG_ENDIAN ++#else ++#error Unknown endianness ++#endif ++ ++#ifndef readq ++static inline uint64_t __readq(volatile void __iomem *addr) ++{ ++ return *(volatile uint64_t *)addr; ++} ++#define readq(x) __readq(x) ++#endif ++ ++#ifndef writeq ++static inline void __writeq(uint64_t v, volatile void __iomem *addr) ++{ ++ *(volatile uint64_t *)addr = v; ++} ++#define writeq(val, addr) __writeq((val), (addr)) ++#endif ++ ++#endif /* __CI_EFHW_HARDWARE_LINUX_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/iopage.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/iopage.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,58 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains OS-independent API for allocating iopage types. ++ * The implementation of these functions is highly OS-dependent. ++ * This file is not designed for use outside of the SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_RESOURCE_IOPAGE_H__ ++#define __CI_DRIVER_RESOURCE_IOPAGE_H__ ++ ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * memory allocation ++ * ++ *--------------------------------------------------------------------*/ ++ ++extern int efhw_iopage_alloc(struct efhw_nic *, struct efhw_iopage *p); ++extern void efhw_iopage_free(struct efhw_nic *, struct efhw_iopage *p); ++ ++extern int efhw_iopages_alloc(struct efhw_nic *, struct efhw_iopages *p, ++ unsigned order); ++extern void efhw_iopages_free(struct efhw_nic *, struct efhw_iopages *p); ++ ++#endif /* __CI_DRIVER_RESOURCE_IOPAGE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/iopage_types.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/iopage_types.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,190 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides struct efhw_page and struct efhw_iopage for Linux ++ * kernel. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_IOPAGE_LINUX_H__ ++#define __CI_EFHW_IOPAGE_LINUX_H__ ++ ++#include ++#include ++#include ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * struct efhw_page: A single page of memory. Directly mapped in the ++ * driver, and can be mapped to userlevel. ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_page { ++ unsigned long kva; ++}; ++ ++static inline int efhw_page_alloc(struct efhw_page *p) ++{ ++ p->kva = __get_free_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); ++ return p->kva ? 0 : -ENOMEM; ++} ++ ++static inline int efhw_page_alloc_zeroed(struct efhw_page *p) ++{ ++ p->kva = get_zeroed_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); ++ return p->kva ? 0 : -ENOMEM; ++} ++ ++static inline void efhw_page_free(struct efhw_page *p) ++{ ++ free_page(p->kva); ++ EFHW_DO_DEBUG(memset(p, 0, sizeof(*p))); ++} ++ ++static inline char *efhw_page_ptr(struct efhw_page *p) ++{ ++ return (char *)p->kva; ++} ++ ++static inline unsigned efhw_page_pfn(struct efhw_page *p) ++{ ++ return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); ++} ++ ++static inline void efhw_page_mark_invalid(struct efhw_page *p) ++{ ++ p->kva = 0; ++} ++ ++static inline int efhw_page_is_valid(struct efhw_page *p) ++{ ++ return p->kva != 0; ++} ++ ++static inline void efhw_page_init_from_va(struct efhw_page *p, void *va) ++{ ++ p->kva = (unsigned long)va; ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * struct efhw_iopage: A single page of memory. Directly mapped in the driver, ++ * and can be mapped to userlevel. Can also be accessed by the NIC. ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_iopage { ++ struct efhw_page p; ++ dma_addr_t dma_addr; ++}; ++ ++static inline dma_addr_t efhw_iopage_dma_addr(struct efhw_iopage *p) ++{ ++ return p->dma_addr; ++} ++ ++#define efhw_iopage_ptr(iop) efhw_page_ptr(&(iop)->p) ++#define efhw_iopage_pfn(iop) efhw_page_pfn(&(iop)->p) ++#define efhw_iopage_mark_invalid(iop) efhw_page_mark_invalid(&(iop)->p) ++#define efhw_iopage_is_valid(iop) efhw_page_is_valid(&(iop)->p) ++ ++/*-------------------------------------------------------------------- ++ * ++ * struct efhw_iopages: A set of pages that are contiguous in physical ++ * memory. Directly mapped in the driver, and can be mapped to userlevel. ++ * Can also be accessed by the NIC. ++ * ++ * NB. The O/S may be unwilling to allocate many, or even any of these. So ++ * only use this type where the NIC really needs a physically contiguous ++ * buffer. ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_iopages { ++ caddr_t kva; ++ unsigned order; ++ dma_addr_t dma_addr; ++}; ++ ++static inline caddr_t efhw_iopages_ptr(struct efhw_iopages *p) ++{ ++ return p->kva; ++} ++ ++static inline unsigned efhw_iopages_pfn(struct efhw_iopages *p) ++{ ++ return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); ++} ++ ++static inline dma_addr_t efhw_iopages_dma_addr(struct efhw_iopages *p) ++{ ++ return p->dma_addr; ++} ++ ++static inline unsigned efhw_iopages_size(struct efhw_iopages *p) ++{ ++ return 1u << (p->order + PAGE_SHIFT); ++} ++ ++/* struct efhw_iopage <-> struct efhw_iopages conversions for handling ++ * physically contiguous allocations in iobufsets for iSCSI. This allows ++ * the essential information about contiguous allocations from ++ * efhw_iopages_alloc() to be saved away in the struct efhw_iopage array in ++ * an iobufset. (Changing the iobufset resource to use a union type would ++ * involve a lot of code changes, and make the iobufset's metadata larger ++ * which could be bad as it's supposed to fit into a single page on some ++ * platforms.) ++ */ ++static inline void ++efhw_iopage_init_from_iopages(struct efhw_iopage *iopage, ++ struct efhw_iopages *iopages, unsigned pageno) ++{ ++ iopage->p.kva = ((unsigned long)efhw_iopages_ptr(iopages)) ++ + (pageno * PAGE_SIZE); ++ iopage->dma_addr = efhw_iopages_dma_addr(iopages) + ++ (pageno * PAGE_SIZE); ++} ++ ++static inline void ++efhw_iopages_init_from_iopage(struct efhw_iopages *iopages, ++ struct efhw_iopage *iopage, unsigned order) ++{ ++ iopages->kva = (caddr_t) efhw_iopage_ptr(iopage); ++ EFHW_ASSERT(iopages->kva); ++ iopages->order = order; ++ iopages->dma_addr = efhw_iopage_dma_addr(iopage); ++} ++ ++#endif /* __CI_EFHW_IOPAGE_LINUX_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/nic.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/nic.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,62 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains API provided by efhw/nic.c file. This file is not ++ * designed for use outside of the SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_NIC_H__ ++#define __CI_EFHW_NIC_H__ ++ ++#include ++#include ++ ++ ++/* Convert PCI info to device type. Returns false when device is not ++ * recognised. ++ */ ++extern int efhw_device_type_init(struct efhw_device_type *dt, ++ int vendor_id, int device_id, int revision); ++ ++/* Initialise fields that do not involve touching hardware. */ ++extern void efhw_nic_init(struct efhw_nic *nic, unsigned flags, ++ unsigned options, struct efhw_device_type dev_type); ++ ++/*! Destruct NIC resources */ ++extern void efhw_nic_dtor(struct efhw_nic *nic); ++ ++/*! Shutdown interrupts */ ++extern void efhw_nic_close_interrupts(struct efhw_nic *nic); ++ ++#endif /* __CI_EFHW_NIC_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/public.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/public.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,104 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides public API of efhw library exported from the SFC ++ * resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_PUBLIC_H__ ++#define __CI_EFHW_PUBLIC_H__ ++ ++#include ++#include ++ ++/*! Returns true if we have some EtherFabric functional units - ++ whether configured or not */ ++static inline int efhw_nic_have_functional_units(struct efhw_nic *nic) ++{ ++ return nic->efhw_func != 0; ++} ++ ++/*! Returns true if the EtherFabric functional units have been configured */ ++static inline int efhw_nic_have_hw(struct efhw_nic *nic) ++{ ++ return efhw_nic_have_functional_units(nic) && (EFHW_KVA(nic) != 0); ++} ++ ++/*! Helper function to allocate the iobuffer needed by an eventq ++ * - it ensures the eventq has the correct alignment for the NIC ++ * ++ * \param rm Event-queue resource manager ++ * \param instance Event-queue instance (index) ++ * \param buf_bytes Requested size of eventq ++ * \return < 0 if iobuffer allocation fails ++ */ ++int efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic, ++ struct eventq_resource_hardware *h, ++ int evq_instance, unsigned buf_bytes); ++ ++extern void falcon_nic_set_rx_usr_buf_size(struct efhw_nic *, ++ int rx_usr_buf_size); ++ ++/*! Get RX filter search limits from RX_FILTER_CTL_REG. ++ * use_raw_values = 0 to get actual depth of search, or 1 to get raw values ++ * from register. ++ */ ++extern void ++falcon_nic_get_rx_filter_search_limits(struct efhw_nic *nic, ++ struct efhw_filter_search_limits *lim, ++ int use_raw_values); ++ ++/*! Set RX filter search limits in RX_FILTER_CTL_REG. ++ * use_raw_values = 0 if specifying actual depth of search, or 1 if specifying ++ * raw values to write to the register. ++ */ ++extern void ++falcon_nic_set_rx_filter_search_limits(struct efhw_nic *nic, ++ struct efhw_filter_search_limits *lim, ++ int use_raw_values); ++ ++ ++/*! Legacy RX IP filter search depth control interface */ ++extern void ++falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full, ++ uint32_t tcp_wild, ++ uint32_t udp_full, uint32_t udp_wild); ++ ++/*! Legacy RX IP filter search depth control interface */ ++extern void ++falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full, ++ uint32_t *tcp_wild, ++ uint32_t *udp_full, uint32_t *udp_wild); ++ ++#endif /* __CI_EFHW_PUBLIC_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/sysdep.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efhw/sysdep.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,55 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides version-independent Linux kernel API for efhw library. ++ * Only kernels >=2.6.9 are supported. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFHW_SYSDEP_LINUX_H__ ++#define __CI_EFHW_SYSDEP_LINUX_H__ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include /* necessary for etherdevice.h on some kernels */ ++#include ++ ++typedef unsigned long irq_flags_t; ++ ++#define spin_lock_destroy(l_) do {} while (0) ++ ++#endif /* __CI_EFHW_SYSDEP_LINUX_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/buddy.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/buddy.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,68 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides private API for buddy allocator. This API is not ++ * designed for use outside of SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_BUDDY_H__ ++#define __CI_EFRM_BUDDY_H__ ++ ++#include ++ ++/*! Comment? */ ++struct efrm_buddy_allocator { ++ struct list_head *free_lists; /* array[order+1] */ ++ struct list_head *links; /* array[1<order; ++} ++ ++int efrm_buddy_ctor(struct efrm_buddy_allocator *b, unsigned order); ++void efrm_buddy_dtor(struct efrm_buddy_allocator *b); ++int efrm_buddy_alloc(struct efrm_buddy_allocator *b, unsigned order); ++void efrm_buddy_free(struct efrm_buddy_allocator *b, unsigned addr, ++ unsigned order); ++ ++ ++#endif /* __CI_EFRM_BUDDY_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/buffer_table.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/buffer_table.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,81 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides private buffer table API. This API is not designed ++ * for use outside of SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_BUFFER_TABLE_H__ ++#define __CI_EFRM_BUFFER_TABLE_H__ ++ ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * NIC's buffer table. ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Managed interface. */ ++ ++/*! construct a managed buffer table object, allocated over a region of ++ * the NICs buffer table space ++ */ ++extern int efrm_buffer_table_ctor(unsigned low, unsigned high); ++/*! destructor for above */ ++extern void efrm_buffer_table_dtor(void); ++ ++/*! allocate a contiguous region of buffer table space */ ++extern int efrm_buffer_table_alloc(unsigned order, ++ struct efhw_buffer_table_allocation *a); ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * buffer table operations through the HW independent API ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! free a previously allocated region of buffer table space */ ++extern void efrm_buffer_table_free(struct efhw_buffer_table_allocation *a); ++ ++/*! commit the update of a buffer table entry to every NIC */ ++extern void efrm_buffer_table_commit(void); ++ ++extern void efrm_buffer_table_set(struct efhw_buffer_table_allocation *, ++ struct efhw_nic *, ++ unsigned i, dma_addr_t dma_addr, int owner); ++ ++ ++#endif /* __CI_EFRM_BUFFER_TABLE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/debug.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/debug.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,78 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides debug-related API for efrm library using Linux kernel ++ * primitives. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_DEBUG_LINUX_H__ ++#define __CI_EFRM_DEBUG_LINUX_H__ ++ ++#define EFRM_PRINTK_PREFIX "[sfc efrm] " ++ ++#define EFRM_PRINTK(level, fmt, ...) \ ++ printk(level EFRM_PRINTK_PREFIX fmt "\n", __VA_ARGS__) ++ ++/* Following macros should be used with non-zero format parameters ++ * due to __VA_ARGS__ limitations. Use "%s" with __func__ if you can't ++ * find better parameters. */ ++#define EFRM_ERR(fmt, ...) EFRM_PRINTK(KERN_ERR, fmt, __VA_ARGS__) ++#define EFRM_WARN(fmt, ...) EFRM_PRINTK(KERN_WARNING, fmt, __VA_ARGS__) ++#define EFRM_NOTICE(fmt, ...) EFRM_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__) ++#if !defined(NDEBUG) ++#define EFRM_TRACE(fmt, ...) EFRM_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__) ++#else ++#define EFRM_TRACE(fmt, ...) ++#endif ++ ++#ifndef NDEBUG ++#define EFRM_ASSERT(cond) BUG_ON((cond) == 0) ++#define _EFRM_ASSERT(cond, file, line) \ ++ do { \ ++ if (unlikely(!(cond))) { \ ++ EFRM_ERR("assertion \"%s\" failed at %s %d", \ ++ #cond, file, line); \ ++ BUG(); \ ++ } \ ++ } while (0) ++ ++#define EFRM_DO_DEBUG(expr) expr ++#define EFRM_VERIFY_EQ(expr, val) EFRM_ASSERT((expr) == (val)) ++#else ++#define EFRM_ASSERT(cond) ++#define EFRM_DO_DEBUG(expr) ++#define EFRM_VERIFY_EQ(expr, val) expr ++#endif ++ ++#endif /* __CI_EFRM_DEBUG_LINUX_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/driver_private.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/driver_private.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,89 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides private API of efrm library to be used from the SFC ++ * resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_DRIVER_PRIVATE_H__ ++#define __CI_EFRM_DRIVER_PRIVATE_H__ ++ ++#include ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * global variables ++ * ++ *--------------------------------------------------------------------*/ ++ ++/* Internal structure for resource driver */ ++extern struct efrm_resource_manager *efrm_rm_table[]; ++ ++/*-------------------------------------------------------------------- ++ * ++ * efrm_nic_table handling ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efrm_nic; ++ ++extern void efrm_driver_ctor(void); ++extern void efrm_driver_dtor(void); ++extern int efrm_driver_register_nic(struct efrm_nic *, int nic_index, ++ int ifindex); ++extern int efrm_driver_unregister_nic(struct efrm_nic *); ++ ++/*-------------------------------------------------------------------- ++ * ++ * create/destroy resource managers ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct vi_resource_dimensions { ++ unsigned evq_int_min, evq_int_lim; ++ unsigned evq_timer_min, evq_timer_lim; ++ unsigned rxq_min, rxq_lim; ++ unsigned txq_min, txq_lim; ++}; ++ ++/*! Initialise resources */ ++extern int ++efrm_resources_init(const struct vi_resource_dimensions *, ++ int buffer_table_min, int buffer_table_lim); ++ ++/*! Tear down resources */ ++extern void efrm_resources_fini(void); ++ ++#endif /* __CI_EFRM_DRIVER_PRIVATE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/efrm_client.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/efrm_client.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,32 @@ ++#ifndef __EFRM_CLIENT_H__ ++#define __EFRM_CLIENT_H__ ++ ++ ++struct efrm_client; ++ ++ ++struct efrm_client_callbacks { ++ /* Called before device is reset. Callee may block. */ ++ void (*pre_reset)(struct efrm_client *, void *user_data); ++ void (*stop)(struct efrm_client *, void *user_data); ++ void (*restart)(struct efrm_client *, void *user_data); ++}; ++ ++ ++#define EFRM_IFINDEX_DEFAULT -1 ++ ++ ++/* NB. Callbacks may be invoked even before this returns. */ ++extern int efrm_client_get(int ifindex, struct efrm_client_callbacks *, ++ void *user_data, struct efrm_client **client_out); ++extern void efrm_client_put(struct efrm_client *); ++ ++extern struct efhw_nic *efrm_client_get_nic(struct efrm_client *); ++ ++#if 0 ++/* For each resource type... */ ++extern void efrm_x_resource_resume(struct x_resource *); ++#endif ++ ++ ++#endif /* __EFRM_CLIENT_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/efrm_nic.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/efrm_nic.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,26 @@ ++#ifndef __EFRM_NIC_H__ ++#define __EFRM_NIC_H__ ++ ++#include ++ ++ ++struct efrm_nic_per_vi { ++ unsigned long state; ++ struct vi_resource *vi; ++}; ++ ++ ++struct efrm_nic { ++ struct efhw_nic efhw_nic; ++ struct list_head link; ++ struct list_head clients; ++ struct efrm_nic_per_vi *vis; ++}; ++ ++ ++#define efrm_nic(_efhw_nic) \ ++ container_of(_efhw_nic, struct efrm_nic, efhw_nic) ++ ++ ++ ++#endif /* __EFRM_NIC_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/filter.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/filter.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,122 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides public API for filter resource. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_FILTER_H__ ++#define __CI_EFRM_FILTER_H__ ++ ++#include ++#include ++ ++ ++struct filter_resource; ++struct vi_resource; ++struct efrm_client; ++ ++ ++/*! ++ * Allocate filter resource. ++ * ++ * \param vi_parent VI resource to use as parent. The function takes ++ * reference to the VI resource on success. ++ * \param frs_out pointer to return the new filter resource ++ * ++ * \return status code; if non-zero, frs_out is unchanged ++ */ ++extern int ++efrm_filter_resource_alloc(struct vi_resource *vi_parent, ++ struct filter_resource **frs_out); ++ ++extern void ++efrm_filter_resource_release(struct filter_resource *); ++ ++ ++extern int efrm_filter_resource_clear(struct filter_resource *frs); ++ ++extern int __efrm_filter_resource_set(struct filter_resource *frs, int type, ++ unsigned saddr_be32, uint16_t sport_be16, ++ unsigned daddr_be32, uint16_t dport_be16); ++ ++static inline int ++efrm_filter_resource_tcp_set(struct filter_resource *frs, ++ unsigned saddr, uint16_t sport, ++ unsigned daddr, uint16_t dport) ++{ ++ int type; ++ ++ EFRM_ASSERT((saddr && sport) || (!saddr && !sport)); ++ ++ type = ++ saddr ? EFHW_IP_FILTER_TYPE_TCP_FULL : ++ EFHW_IP_FILTER_TYPE_TCP_WILDCARD; ++ ++ return __efrm_filter_resource_set(frs, type, ++ saddr, sport, daddr, dport); ++} ++ ++static inline int ++efrm_filter_resource_udp_set(struct filter_resource *frs, ++ unsigned saddr, uint16_t sport, ++ unsigned daddr, uint16_t dport) ++{ ++ int type; ++ ++ EFRM_ASSERT((saddr && sport) || (!saddr && !sport)); ++ ++ type = ++ saddr ? EFHW_IP_FILTER_TYPE_UDP_FULL : ++ EFHW_IP_FILTER_TYPE_UDP_WILDCARD; ++ ++ return __efrm_filter_resource_set(frs, ++ type, saddr, sport, daddr, dport); ++} ++ ++ ++extern int ++efrm_filter_resource_instance(struct filter_resource *); ++ ++extern struct efrm_resource * ++efrm_filter_resource_to_resource(struct filter_resource *); ++ ++extern struct filter_resource * ++efrm_filter_resource_from_resource(struct efrm_resource *); ++ ++extern void ++efrm_filter_resource_free(struct filter_resource *); ++ ++ ++#endif /* __CI_EFRM_FILTER_H__ */ ++/*! \cidoxg_end */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/iobufset.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/iobufset.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,110 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides public API for iobufset resource. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_IOBUFSET_H__ ++#define __CI_EFRM_IOBUFSET_H__ ++ ++#include ++ ++/*! Iobufset resource structture. ++ * Users should not access the structure fields directly, but use the API ++ * below. ++ * However, this structure should not be moved out of public headers, ++ * because part of API (ex. efrm_iobufset_dma_addr function) is inline and ++ * is used in the fast-path code. ++ */ ++struct iobufset_resource { ++ struct efrm_resource rs; ++ struct vi_resource *evq; ++ struct iobufset_resource *linked; ++ struct efhw_buffer_table_allocation buf_tbl_alloc; ++ unsigned int n_bufs; ++ unsigned int pages_per_contiguous_chunk; ++ unsigned chunk_order; ++ struct efhw_iopage bufs[1]; ++ /*!< up to n_bufs can follow this, so this must be the last member */ ++}; ++ ++#define iobufset_resource(rs1) \ ++ container_of((rs1), struct iobufset_resource, rs) ++ ++/*! ++ * Allocate iobufset resource. ++ * ++ * \param vi VI that "owns" these buffers. Grabs a reference ++ * on success. ++ * \param linked Uses memory from an existing iobufset. Grabs a ++ * reference on success. ++ * \param iobrs_out pointer to return the new filter resource ++ * ++ * \return status code; if non-zero, frs_out is unchanged ++ */ ++extern int ++efrm_iobufset_resource_alloc(int32_t n_pages, ++ int32_t pages_per_contiguous_chunk, ++ struct vi_resource *vi, ++ struct iobufset_resource *linked, ++ bool phys_addr_mode, ++ struct iobufset_resource **iobrs_out); ++ ++extern void efrm_iobufset_resource_free(struct iobufset_resource *); ++extern void efrm_iobufset_resource_release(struct iobufset_resource *); ++ ++static inline char * ++efrm_iobufset_ptr(struct iobufset_resource *rs, unsigned offs) ++{ ++ EFRM_ASSERT(offs < (unsigned)(rs->n_bufs << PAGE_SHIFT)); ++ return efhw_iopage_ptr(&rs->bufs[offs >> PAGE_SHIFT]) ++ + (offs & (PAGE_SIZE - 1)); ++} ++ ++static inline char *efrm_iobufset_page_ptr(struct iobufset_resource *rs, ++ unsigned page_i) ++{ ++ EFRM_ASSERT(page_i < (unsigned)rs->n_bufs); ++ return efhw_iopage_ptr(&rs->bufs[page_i]); ++} ++ ++static inline dma_addr_t ++efrm_iobufset_dma_addr(struct iobufset_resource *rs, unsigned offs) ++{ ++ EFRM_ASSERT(offs < (unsigned)(rs->n_bufs << PAGE_SHIFT)); ++ return efhw_iopage_dma_addr(&rs->bufs[offs >> PAGE_SHIFT]) ++ + (offs & (PAGE_SIZE - 1)); ++} ++ ++#endif /* __CI_EFRM_IOBUFSET_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/nic_set.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/nic_set.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,104 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides public API for NIC sets. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_NIC_SET_H__ ++#define __CI_EFRM_NIC_SET_H__ ++ ++#include ++#include ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * efrm_nic_set_t - tracks which NICs something has been done on ++ * ++ *--------------------------------------------------------------------*/ ++ ++/* Internal suructure of efrm_nic_set_t should not be referenced outside of ++ * this file. Add a new accessor if you should do it. */ ++typedef struct { ++ uint32_t nics; ++} efrm_nic_set_t; ++ ++#if EFHW_MAX_NR_DEVS > 32 ++#error change efrm_nic_set to handle EFHW_MAX_NR_DEVS number of devices ++#endif ++ ++static inline bool ++efrm_nic_set_read(const efrm_nic_set_t *nic_set, unsigned index) ++{ ++ EFRM_ASSERT(nic_set); ++ EFRM_ASSERT(index < EFHW_MAX_NR_DEVS && index < 32); ++ return (nic_set->nics & (1 << index)) ? true : false; ++} ++ ++static inline void ++efrm_nic_set_write(efrm_nic_set_t *nic_set, unsigned index, bool value) ++{ ++ EFRM_ASSERT(nic_set); ++ EFRM_ASSERT(index < EFHW_MAX_NR_DEVS && index < 32); ++ EFRM_ASSERT(value == false || value == true); ++ nic_set->nics = (nic_set->nics & (~(1 << index))) + (value << index); ++} ++ ++static inline void efrm_nic_set_clear(efrm_nic_set_t *nic_set) ++{ ++ nic_set->nics = 0; ++} ++ ++static inline void efrm_nic_set_all(efrm_nic_set_t *nic_set) ++{ ++ nic_set->nics = 0xffffffff; ++} ++ ++static inline bool efrm_nic_set_is_all_clear(efrm_nic_set_t *nic_set) ++{ ++ return nic_set->nics == 0 ? true : false; ++} ++ ++#define EFRM_NIC_SET_FMT "%x" ++ ++static inline uint32_t efrm_nic_set_pri_arg(efrm_nic_set_t *nic_set) ++{ ++ return nic_set->nics; ++} ++ ++#define EFRM_FOR_EACH_NIC_INDEX_IN_SET(_set, _nic_i) \ ++ for ((_nic_i) = 0; (_nic_i) < EFHW_MAX_NR_DEVS; ++(_nic_i)) \ ++ if (efrm_nic_set_read((_set), (_nic_i))) ++ ++#endif /* __CI_EFRM_NIC_SET_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/nic_table.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/nic_table.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,98 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides public API for NIC table. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_NIC_TABLE_H__ ++#define __CI_EFRM_NIC_TABLE_H__ ++ ++#include ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * struct efrm_nic_table - top level driver object keeping all NICs - ++ * implemented in driver_object.c ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Comment? */ ++struct efrm_nic_table { ++ /*! nics attached to this driver */ ++ struct efhw_nic *nic[EFHW_MAX_NR_DEVS]; ++ /*! pointer to an arbitrary struct efhw_nic if one exists; ++ * for code which does not care which NIC it wants but ++ * still needs one. Note you cannot assume nic[0] exists. */ ++ struct efhw_nic *a_nic; ++ uint32_t nic_count; /*!< number of nics attached to this driver */ ++ spinlock_t lock; /*!< lock for table modifications */ ++ atomic_t ref_count; /*!< refcount for users of nic table */ ++}; ++ ++/* Resource driver structures used by other drivers as well */ ++extern struct efrm_nic_table *efrm_nic_tablep; ++ ++static inline void efrm_nic_table_hold(void) ++{ ++ atomic_inc(&efrm_nic_tablep->ref_count); ++} ++ ++static inline void efrm_nic_table_rele(void) ++{ ++ atomic_dec(&efrm_nic_tablep->ref_count); ++} ++ ++static inline int efrm_nic_table_held(void) ++{ ++ return atomic_read(&efrm_nic_tablep->ref_count) != 0; ++} ++ ++/* Run code block _x multiple times with variable nic set to each ++ * registered NIC in turn. ++ * DO NOT "break" out of this loop early. */ ++#define EFRM_FOR_EACH_NIC(_nic_i, _nic) \ ++ for ((_nic_i) = (efrm_nic_table_hold(), 0); \ ++ (_nic_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ ++ (_nic_i)++) \ ++ if (((_nic) = efrm_nic_tablep->nic[_nic_i])) ++ ++#define EFRM_FOR_EACH_NIC_IN_SET(_set, _i, _nic) \ ++ for ((_i) = (efrm_nic_table_hold(), 0); \ ++ (_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ ++ ++(_i)) \ ++ if (((_nic) = efrm_nic_tablep->nic[_i]) && \ ++ efrm_nic_set_read((_set), (_i))) ++ ++#endif /* __CI_EFRM_NIC_TABLE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/private.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/private.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,118 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides private API of efrm library -- resource handling. ++ * This API is not designed for use outside of SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_PRIVATE_H__ ++#define __CI_EFRM_PRIVATE_H__ ++ ++#include ++#include ++#include ++#include ++ ++/*-------------------------------------------------------------------- ++ * ++ * create resource managers ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Create a resource manager for various types of resources ++ */ ++extern int ++efrm_create_iobufset_resource_manager(struct efrm_resource_manager **out); ++ ++extern int ++efrm_create_filter_resource_manager(struct efrm_resource_manager **out); ++ ++extern int ++efrm_create_vi_resource_manager(struct efrm_resource_manager **out, ++ const struct vi_resource_dimensions *); ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * Instance pool management ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Allocate instance pool. Use kfifo_vfree to destroy it. */ ++static inline int ++efrm_kfifo_id_ctor(struct kfifo **ids_out, ++ unsigned int base, unsigned int limit, spinlock_t *lock) ++{ ++ unsigned int i; ++ struct kfifo *ids; ++ unsigned char *buffer; ++ unsigned int size = roundup_pow_of_two((limit - base) * sizeof(int)); ++ EFRM_ASSERT(base <= limit); ++ buffer = vmalloc(size); ++ ids = kfifo_init(buffer, size, GFP_KERNEL, lock); ++ if (IS_ERR(ids)) ++ return PTR_ERR(ids); ++ for (i = base; i < limit; i++) ++ EFRM_VERIFY_EQ(__kfifo_put(ids, (unsigned char *)&i, ++ sizeof(i)), sizeof(i)); ++ ++ *ids_out = ids; ++ return 0; ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * Various private functions ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Initialize the fields in the provided resource manager memory area ++ * \param rm The area of memory to be initialized ++ * \param dtor A method to destroy the resource manager ++ * \param name A Textual name for the resource manager ++ * \param type The type of resource managed ++ * \param initial_table_size Initial size of the ID table ++ * \param auto_destroy Destroy resource manager on driver onload iff true ++ * ++ * A default table size is provided if the value 0 is provided. ++ */ ++extern int ++efrm_resource_manager_ctor(struct efrm_resource_manager *rm, ++ void (*dtor)(struct efrm_resource_manager *), ++ const char *name, unsigned type); ++ ++extern void efrm_resource_manager_dtor(struct efrm_resource_manager *rm); ++ ++ ++#endif /* __CI_EFRM_PRIVATE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/resource.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/resource.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,119 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides public interface of efrm library -- resource handling. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_RESOURCE_H__ ++#define __CI_EFRM_RESOURCE_H__ ++ ++/*-------------------------------------------------------------------- ++ * ++ * headers for type dependencies ++ * ++ *--------------------------------------------------------------------*/ ++ ++#include ++#include ++#include ++#include ++ ++#ifndef __ci_driver__ ++#error "Driver-only file" ++#endif ++ ++/*-------------------------------------------------------------------- ++ * ++ * struct efrm_resource - represents an allocated resource ++ * (eg. pinned pages of memory, or resource on a NIC) ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Representation of an allocated resource */ ++struct efrm_resource { ++ int rs_ref_count; ++ efrm_resource_handle_t rs_handle; ++ struct efrm_client *rs_client; ++ struct list_head rs_client_link; ++ struct list_head rs_manager_link; ++}; ++ ++/*-------------------------------------------------------------------- ++ * ++ * managed resource abstraction ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Factory for resources of a specific type */ ++struct efrm_resource_manager { ++ const char *rm_name; /*!< human readable only */ ++ spinlock_t rm_lock; ++#ifndef NDEBUG ++ unsigned rm_type; ++#endif ++ int rm_resources; ++ int rm_resources_hiwat; ++ struct list_head rm_resources_list; ++ /** ++ * Destructor for the resource manager. Other resource managers ++ * might be already dead, although the system guarantees that ++ * managers are destructed in the order by which they were created ++ */ ++ void (*rm_dtor)(struct efrm_resource_manager *); ++}; ++ ++#ifdef NDEBUG ++# define EFRM_RESOURCE_ASSERT_VALID(rs, rc_mbz) ++# define EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm) ++#else ++/*! Check validity of resource and report on failure */ ++extern void efrm_resource_assert_valid(struct efrm_resource *, ++ int rc_may_be_zero, ++ const char *file, int line); ++# define EFRM_RESOURCE_ASSERT_VALID(rs, rc_mbz) \ ++ efrm_resource_assert_valid((rs), (rc_mbz), __FILE__, __LINE__) ++ ++/*! Check validity of resource manager and report on failure */ ++extern void efrm_resource_manager_assert_valid(struct efrm_resource_manager *, ++ const char *file, int line); ++# define EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm) \ ++ efrm_resource_manager_assert_valid((rm), __FILE__, __LINE__) ++#endif ++ ++ ++extern void efrm_resource_ref(struct efrm_resource *rs); ++extern int __efrm_resource_release(struct efrm_resource *); ++ ++ ++#endif /* __CI_EFRM_RESOURCE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/resource_id.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/resource_id.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,104 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides public type and definitions resource handle, and the ++ * definitions of resource types. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_EFRM_RESOURCE_ID_H__ ++#define __CI_DRIVER_EFRM_RESOURCE_ID_H__ ++ ++/*********************************************************************** ++ * Resource handles ++ * ++ * Resource handles are intended for identifying resources at kernel ++ * level, within the context of a particular NIC. particularly because ++ * for some resource types, the low 16 bites correspond to hardware ++ * IDs. They were historically also used at user level, with a nonce ++ * stored in the bits 16 to 27 (inclusive), but that approach is ++ * deprecated (but sill alive!). ++ * ++ * The handle value 0 is used to mean "no resource". ++ * Identify resources within the context of a file descriptor at user ++ * level. ++ ***********************************************************************/ ++ ++typedef struct { ++ uint32_t handle; ++} efrm_resource_handle_t; ++ ++/* You may think these following functions should all have ++ * _HANDLE_ in their names, but really we are providing an abstract set ++ * of methods on a (hypothetical) efrm_resource_t object, with ++ * efrm_resource_handle_t being just the reference one holds to access ++ * the object (aka "this" or "self"). ++ */ ++ ++/* Below I use inline instead of macros where possible in order to get ++ * more type checking help from the compiler; hopefully we'll never ++ * have to rewrite these to use #define as we've found some horrible ++ * compiler on which we cannot make static inline do the Right Thing (tm). ++ * ++ * For consistency and to avoid pointless change I spell these ++ * routines as macro names (CAPTILIZE_UNDERSCORED), which also serves ++ * to remind people they are compact and inlined. ++ */ ++ ++#define EFRM_RESOURCE_FMT "[rs:%08x]" ++ ++static inline unsigned EFRM_RESOURCE_PRI_ARG(efrm_resource_handle_t h) ++{ ++ return h.handle; ++} ++ ++static inline unsigned EFRM_RESOURCE_INSTANCE(efrm_resource_handle_t h) ++{ ++ return h.handle & 0x0000ffff; ++} ++ ++static inline unsigned EFRM_RESOURCE_TYPE(efrm_resource_handle_t h) ++{ ++ return (h.handle & 0xf0000000) >> 28; ++} ++ ++/*********************************************************************** ++ * Resource type codes ++ ***********************************************************************/ ++ ++#define EFRM_RESOURCE_IOBUFSET 0x0 ++#define EFRM_RESOURCE_VI 0x1 ++#define EFRM_RESOURCE_FILTER 0x2 ++#define EFRM_RESOURCE_NUM 0x3 /* This isn't a resource! */ ++ ++#define EFRM_RESOURCE_NAME(type) \ ++ ((type) == EFRM_RESOURCE_IOBUFSET? "IOBUFSET" : \ ++ (type) == EFRM_RESOURCE_VI? "VI" : \ ++ (type) == EFRM_RESOURCE_FILTER? "FILTER" : \ ++ "") ++ ++#endif /* __CI_DRIVER_EFRM_RESOURCE_ID_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/sysdep.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/sysdep.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,46 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides Linux-like system-independent API for efrm library. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_SYSDEP_H__ ++#define __CI_EFRM_SYSDEP_H__ ++ ++/* Spinlocks are defined in efhw/sysdep.h */ ++#include ++ ++#include ++ ++#endif /* __CI_EFRM_SYSDEP_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/sysdep_linux.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/sysdep_linux.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,93 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides version-independent Linux kernel API for efrm library. ++ * Only kernels >=2.6.9 are supported. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Kfifo API is partially stolen from linux-2.6.22/include/linux/list.h ++ * Copyright (C) 2004 Stelian Pop ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_SYSDEP_LINUX_H__ ++#define __CI_EFRM_SYSDEP_LINUX_H__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/******************************************************************** ++ * ++ * List API ++ * ++ ********************************************************************/ ++ ++static inline struct list_head *list_pop(struct list_head *list) ++{ ++ struct list_head *link = list->next; ++ list_del(link); ++ return link; ++} ++ ++static inline struct list_head *list_pop_tail(struct list_head *list) ++{ ++ struct list_head *link = list->prev; ++ list_del(link); ++ return link; ++} ++ ++/******************************************************************** ++ * ++ * Kfifo API ++ * ++ ********************************************************************/ ++ ++static inline void kfifo_vfree(struct kfifo *fifo) ++{ ++ vfree(fifo->buffer); ++ kfree(fifo); ++} ++ ++#endif /* __CI_EFRM_SYSDEP_LINUX_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,157 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains public API for VI resource. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_VI_RESOURCE_H__ ++#define __CI_EFRM_VI_RESOURCE_H__ ++ ++#include ++#include ++#include ++ ++struct vi_resource; ++ ++/* Make these inline instead of macros for type checking */ ++static inline struct vi_resource * ++efrm_to_vi_resource(struct efrm_resource *rs) ++{ ++ EFRM_ASSERT(EFRM_RESOURCE_TYPE(rs->rs_handle) == EFRM_RESOURCE_VI); ++ return (struct vi_resource *) rs; ++} ++static inline struct ++efrm_resource *efrm_from_vi_resource(struct vi_resource *rs) ++{ ++ return (struct efrm_resource *)rs; ++} ++ ++#define EFAB_VI_RESOURCE_INSTANCE(virs) \ ++ EFRM_RESOURCE_INSTANCE(efrm_from_vi_resource(virs)->rs_handle) ++ ++#define EFAB_VI_RESOURCE_PRI_ARG(virs) \ ++ EFRM_RESOURCE_PRI_ARG(efrm_from_vi_resource(virs)->rs_handle) ++ ++extern int ++efrm_vi_resource_alloc(struct efrm_client *client, ++ struct vi_resource *evq_virs, ++ uint16_t vi_flags, int32_t evq_capacity, ++ int32_t txq_capacity, int32_t rxq_capacity, ++ uint8_t tx_q_tag, uint8_t rx_q_tag, ++ struct vi_resource **virs_in_out, ++ uint32_t *out_io_mmap_bytes, ++ uint32_t *out_mem_mmap_bytes, ++ uint32_t *out_txq_capacity, ++ uint32_t *out_rxq_capacity); ++ ++extern void efrm_vi_resource_free(struct vi_resource *); ++extern void efrm_vi_resource_release(struct vi_resource *); ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * eventq handling ++ * ++ *--------------------------------------------------------------------*/ ++ ++/*! Reset an event queue and clear any associated timers */ ++extern void efrm_eventq_reset(struct vi_resource *virs); ++ ++/*! Register a kernel-level handler for the event queue. This function is ++ * called whenever a timer expires, or whenever the event queue is woken ++ * but no thread is blocked on it. ++ * ++ * This function returns -EBUSY if a callback is already installed. ++ * ++ * \param rs Event-queue resource ++ * \param handler Callback-handler ++ * \param arg Argument to pass to callback-handler ++ * \return Status code ++ */ ++extern int ++efrm_eventq_register_callback(struct vi_resource *rs, ++ void (*handler)(void *arg, int is_timeout, ++ struct efhw_nic *nic), ++ void *arg); ++ ++/*! Kill the kernel-level callback. ++ * ++ * This function stops the timer from running and unregisters the callback ++ * function. It waits for any running timeout handlers to complete before ++ * returning. ++ * ++ * \param rs Event-queue resource ++ * \return Nothing ++ */ ++extern void efrm_eventq_kill_callback(struct vi_resource *rs); ++ ++/*! Ask the NIC to generate a wakeup when an event is next delivered. */ ++extern void efrm_eventq_request_wakeup(struct vi_resource *rs, ++ unsigned current_ptr); ++ ++/*! Register a kernel-level handler for flush completions. ++ * \TODO Currently, it is unsafe to install a callback more than once. ++ * ++ * \param rs VI resource being flushed. ++ * \param handler Callback handler function. ++ * \param arg Argument to be passed to handler. ++ */ ++extern void ++efrm_vi_register_flush_callback(struct vi_resource *rs, ++ void (*handler)(void *), ++ void *arg); ++ ++int efrm_vi_resource_flush_retry(struct vi_resource *virs); ++ ++/*! Comment? */ ++extern int efrm_pt_flush(struct vi_resource *); ++ ++/*! Comment? */ ++extern int efrm_pt_pace(struct vi_resource *, unsigned int val); ++ ++uint32_t efrm_vi_rm_txq_bytes(struct vi_resource *virs ++ /*,struct efhw_nic *nic */); ++uint32_t efrm_vi_rm_rxq_bytes(struct vi_resource *virs ++ /*,struct efhw_nic *nic */); ++uint32_t efrm_vi_rm_evq_bytes(struct vi_resource *virs ++ /*,struct efhw_nic *nic */); ++ ++ ++/* Fill [out_vi_data] with information required to allow a VI to be init'd. ++ * [out_vi_data] must ref at least VI_MAPPINGS_SIZE bytes. ++ */ ++extern void efrm_vi_resource_mappings(struct vi_resource *, void *out_vi_data); ++ ++ ++#endif /* __CI_EFRM_VI_RESOURCE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_manager.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_manager.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,155 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains type definitions for VI resource. These types ++ * may be used outside of the SFC resource driver, but such use is not ++ * recommended. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_DRIVER_EFAB_VI_RESOURCE_MANAGER_H__ ++#define __CI_DRIVER_EFAB_VI_RESOURCE_MANAGER_H__ ++ ++#include ++#include ++ ++ ++#define EFRM_VI_RM_DMA_QUEUE_COUNT 2 ++#define EFRM_VI_RM_DMA_QUEUE_TX 0 ++#define EFRM_VI_RM_DMA_QUEUE_RX 1 ++ ++/** Numbers of bits which can be set in the evq_state member of ++ * vi_resource_evq_info. */ ++enum { ++ /** This bit is set if a wakeup has been requested on the NIC. */ ++ VI_RESOURCE_EVQ_STATE_WAKEUP_PENDING, ++ /** This bit is set if the wakeup is valid for the sleeping ++ * process. */ ++ VI_RESOURCE_EVQ_STATE_CALLBACK_REGISTERED, ++ /** This bit is set if a wakeup or timeout event is currently being ++ * processed. */ ++ VI_RESOURCE_EVQ_STATE_BUSY, ++}; ++#define VI_RESOURCE_EVQ_STATE(X) \ ++ (((int32_t)1) << (VI_RESOURCE_EVQ_STATE_##X)) ++ ++ ++/*! Global information for the VI resource manager. */ ++struct vi_resource_manager { ++ struct efrm_resource_manager rm; ++ ++ struct kfifo *instances_with_timer; ++ int with_timer_base; ++ int with_timer_limit; ++ struct kfifo *instances_with_interrupt; ++ int with_interrupt_base; ++ int with_interrupt_limit; ++ ++ bool iscsi_dmaq_instance_is_free; ++ ++ /* We keep VI resources which need flushing on these lists. The VI ++ * is put on the outstanding list when the flush request is issued ++ * to the hardware and removed when the flush event arrives. The ++ * hardware can only handle a limited number of RX flush requests at ++ * once, so VIs are placed in the waiting list until the flush can ++ * be issued. Flushes can be requested by the client or internally ++ * by the VI resource manager. In the former case, the reference ++ * count must be non-zero for the duration of the flush and in the ++ * later case, the reference count must be zero. */ ++ struct list_head rx_flush_waiting_list; ++ struct list_head rx_flush_outstanding_list; ++ struct list_head tx_flush_outstanding_list; ++ int rx_flush_outstanding_count; ++ ++ /* once the flush has happened we push the close into the work queue ++ * so its OK on Windows to free the resources (Bug 3469). Resources ++ * on this list have zero reference count. ++ */ ++ struct list_head close_pending; ++ struct work_struct work_item; ++ struct workqueue_struct *workqueue; ++}; ++ ++struct vi_resource_nic_info { ++ struct eventq_resource_hardware evq_pages; ++ struct efhw_iopages dmaq_pages[EFRM_VI_RM_DMA_QUEUE_COUNT]; ++}; ++ ++struct vi_resource { ++ /* Some macros make the assumption that the struct efrm_resource is ++ * the first member of a struct vi_resource. */ ++ struct efrm_resource rs; ++ atomic_t evq_refs; /*!< Number of users of the event queue. */ ++ ++ uint32_t bar_mmap_bytes; ++ uint32_t mem_mmap_bytes; ++ ++ int32_t evq_capacity; ++ int32_t dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_COUNT]; ++ ++ uint8_t dmaq_tag[EFRM_VI_RM_DMA_QUEUE_COUNT]; ++ uint16_t flags; ++ ++ /* we keep PT endpoints that have been destroyed on a list ++ * until we have seen their TX and RX DMAQs flush complete ++ * (see Bug 1217) ++ */ ++ struct list_head rx_flush_link; ++ struct list_head tx_flush_link; ++ int rx_flushing; ++ int rx_flush_outstanding; ++ int tx_flushing; ++ uint64_t flush_time; ++ int flush_count; ++ ++ void (*flush_callback_fn)(void *); ++ void *flush_callback_arg; ++ ++ void (*evq_callback_fn) (void *arg, int is_timeout, ++ struct efhw_nic *nic); ++ void *evq_callback_arg; ++ ++ struct vi_resource *evq_virs; /*!< EVQ for DMA queues */ ++ ++ struct efhw_buffer_table_allocation ++ dmaq_buf_tbl_alloc[EFRM_VI_RM_DMA_QUEUE_COUNT]; ++ ++ struct vi_resource_nic_info nic_info; ++}; ++ ++#undef vi_resource ++#define vi_resource(rs1) container_of((rs1), struct vi_resource, rs) ++ ++static inline dma_addr_t ++efrm_eventq_dma_addr(struct vi_resource *virs) ++{ ++ struct eventq_resource_hardware *hw; ++ hw = &virs->nic_info.evq_pages; ++ return efhw_iopages_dma_addr(&hw->iobuff) + hw->iobuff_off; ++} ++ ++#endif /* __CI_DRIVER_EFAB_VI_RESOURCE_MANAGER_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_private.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_private.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,65 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains private API for VI resource. The API is not designed ++ * to be used outside of the SFC resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __CI_EFRM_VI_RESOURCE_PRIVATE_H__ ++#define __CI_EFRM_VI_RESOURCE_PRIVATE_H__ ++ ++#include ++#include ++ ++extern struct vi_resource_manager *efrm_vi_manager; ++ ++/*************************************************************************/ ++ ++extern void efrm_vi_rm_delayed_free(struct work_struct *data); ++ ++extern void efrm_vi_rm_salvage_flushed_vis(void); ++ ++void efrm_vi_rm_free_flushed_resource(struct vi_resource *virs); ++ ++void efrm_vi_rm_init_dmaq(struct vi_resource *virs, int queue_index, ++ struct efhw_nic *nic); ++ ++/*! Wakeup handler */ ++extern void efrm_handle_wakeup_event(struct efhw_nic *nic, unsigned id); ++ ++/*! Timeout handler */ ++extern void efrm_handle_timeout_event(struct efhw_nic *nic, unsigned id); ++ ++/*! DMA flush handler */ ++extern void efrm_handle_dmaq_flushed(struct efhw_nic *nic, unsigned id, ++ int rx_flush); ++ ++/*! SRAM update handler */ ++extern void efrm_handle_sram_event(struct efhw_nic *nic); ++ ++#endif /* __CI_EFRM_VI_RESOURCE_PRIVATE_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/driver_object.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/driver_object.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,328 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains support for the global driver variables. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++/* We use #define rather than static inline here so that the Windows ++ * "prefast" compiler can see its own locking primitive when these ++ * two function are used (and then perform extra checking where they ++ * are used) ++ * ++ * Both macros operate on an irq_flags_t ++*/ ++ ++#define efrm_driver_lock(irqlock_state) \ ++ spin_lock_irqsave(&efrm_nic_tablep->lock, irqlock_state) ++ ++#define efrm_driver_unlock(irqlock_state) \ ++ spin_unlock_irqrestore(&efrm_nic_tablep->lock, \ ++ irqlock_state); ++ ++/* These routines are all methods on the architecturally singleton ++ global variables: efrm_nic_table, efrm_rm_table. ++ ++ I hope we never find a driver model that does not allow global ++ structure variables :) (but that would break almost every driver I've ++ ever seen). ++*/ ++ ++/*! Exported driver state */ ++static struct efrm_nic_table efrm_nic_table; ++struct efrm_nic_table *efrm_nic_tablep; ++EXPORT_SYMBOL(efrm_nic_tablep); ++ ++ ++/* Internal table with resource managers. ++ * We'd like to not export it, but we are still using efrm_rm_table ++ * in the char driver. So, it is declared in the private header with ++ * a purpose. */ ++struct efrm_resource_manager *efrm_rm_table[EFRM_RESOURCE_NUM]; ++EXPORT_SYMBOL(efrm_rm_table); ++ ++ ++/* List of registered nics. */ ++static LIST_HEAD(efrm_nics); ++ ++ ++void efrm_driver_ctor(void) ++{ ++ efrm_nic_tablep = &efrm_nic_table; ++ spin_lock_init(&efrm_nic_tablep->lock); ++ EFRM_TRACE("%s: driver created", __func__); ++} ++ ++void efrm_driver_dtor(void) ++{ ++ EFRM_ASSERT(!efrm_nic_table_held()); ++ ++ spin_lock_destroy(&efrm_nic_tablep->lock); ++ memset(&efrm_nic_table, 0, sizeof(efrm_nic_table)); ++ memset(&efrm_rm_table, 0, sizeof(efrm_rm_table)); ++ EFRM_TRACE("%s: driver deleted", __func__); ++} ++ ++int efrm_driver_register_nic(struct efrm_nic *rnic, int nic_index, ++ int ifindex) ++{ ++ struct efhw_nic *nic = &rnic->efhw_nic; ++ struct efrm_nic_per_vi *vis; ++ int max_vis, rc = 0; ++ irq_flags_t lock_flags; ++ ++ EFRM_ASSERT(nic_index >= 0); ++ EFRM_ASSERT(ifindex >= 0); ++ ++ max_vis = 4096; /* TODO: Get runtime value. */ ++ vis = vmalloc(max_vis * sizeof(rnic->vis[0])); ++ if (vis == NULL) { ++ EFRM_ERR("%s: Out of memory", __func__); ++ return -ENOMEM; ++ } ++ ++ efrm_driver_lock(lock_flags); ++ ++ if (efrm_nic_table_held()) { ++ EFRM_ERR("%s: driver object is in use", __func__); ++ rc = -EBUSY; ++ goto done; ++ } ++ ++ if (efrm_nic_tablep->nic_count == EFHW_MAX_NR_DEVS) { ++ EFRM_ERR("%s: filled up NIC table size %d", __func__, ++ EFHW_MAX_NR_DEVS); ++ rc = -E2BIG; ++ goto done; ++ } ++ ++ rnic->vis = vis; ++ ++ EFRM_ASSERT(efrm_nic_tablep->nic[nic_index] == NULL); ++ efrm_nic_tablep->nic[nic_index] = nic; ++ nic->index = nic_index; ++ nic->ifindex = ifindex; ++ ++ if (efrm_nic_tablep->a_nic == NULL) ++ efrm_nic_tablep->a_nic = nic; ++ ++ efrm_nic_tablep->nic_count++; ++ ++ INIT_LIST_HEAD(&rnic->clients); ++ list_add(&rnic->link, &efrm_nics); ++ ++ efrm_driver_unlock(lock_flags); ++ return 0; ++ ++done: ++ efrm_driver_unlock(lock_flags); ++ vfree(vis); ++ return rc; ++} ++ ++int efrm_driver_unregister_nic(struct efrm_nic *rnic) ++{ ++ struct efhw_nic *nic = &rnic->efhw_nic; ++ int rc = 0; ++ int nic_index = nic->index; ++ irq_flags_t lock_flags; ++ ++ EFRM_ASSERT(nic_index >= 0); ++ ++ efrm_driver_lock(lock_flags); ++ ++ if (efrm_nic_table_held()) { ++ EFRM_ERR("%s: driver object is in use", __func__); ++ rc = -EBUSY; ++ goto done; ++ } ++ if (!list_empty(&rnic->clients)) { ++ EFRM_ERR("%s: nic has active clients", __func__); ++ rc = -EBUSY; ++ goto done; ++ } ++ ++ EFRM_ASSERT(efrm_nic_tablep->nic[nic_index] == nic); ++ EFRM_ASSERT(list_empty(&rnic->clients)); ++ ++ list_del(&rnic->link); ++ ++ nic->index = -1; ++ efrm_nic_tablep->nic[nic_index] = NULL; ++ ++ --efrm_nic_tablep->nic_count; ++ ++ if (efrm_nic_tablep->a_nic == nic) { ++ if (efrm_nic_tablep->nic_count == 0) { ++ efrm_nic_tablep->a_nic = NULL; ++ } else { ++ for (nic_index = 0; nic_index < EFHW_MAX_NR_DEVS; ++ nic_index++) { ++ if (efrm_nic_tablep->nic[nic_index] != NULL) ++ efrm_nic_tablep->a_nic = ++ efrm_nic_tablep->nic[nic_index]; ++ } ++ EFRM_ASSERT(efrm_nic_tablep->a_nic); ++ } ++ } ++ ++done: ++ efrm_driver_unlock(lock_flags); ++ return rc; ++} ++ ++ ++int efrm_nic_pre_reset(struct efhw_nic *nic) ++{ ++ struct efrm_nic *rnic = efrm_nic(nic); ++ struct efrm_client *client; ++ struct efrm_resource *rs; ++ struct list_head *client_link; ++ struct list_head *rs_link; ++ irq_flags_t lock_flags; ++ ++ spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); ++ list_for_each(client_link, &rnic->clients) { ++ client = container_of(client_link, struct efrm_client, link); ++ EFRM_ERR("%s: client %p", __func__, client); ++ if (client->callbacks->pre_reset) ++ client->callbacks->pre_reset(client, client->user_data); ++ list_for_each(rs_link, &client->resources) { ++ rs = container_of(rs_link, struct efrm_resource, ++ rs_client_link); ++ EFRM_ERR("%s: resource %p", __func__, rs); ++ /* TODO: mark rs defunct */ ++ } ++ } ++ spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); ++ ++ return 0; ++} ++ ++ ++int efrm_nic_stop(struct efhw_nic *nic) ++{ ++ /* TODO */ ++ return 0; ++} ++ ++ ++int efrm_nic_resume(struct efhw_nic *nic) ++{ ++ /* TODO */ ++ return 0; ++} ++ ++ ++static void efrm_client_nullcb(struct efrm_client *client, void *user_data) ++{ ++} ++ ++static struct efrm_client_callbacks efrm_null_callbacks = { ++ efrm_client_nullcb, ++ efrm_client_nullcb, ++ efrm_client_nullcb ++}; ++ ++ ++int efrm_client_get(int ifindex, struct efrm_client_callbacks *callbacks, ++ void *user_data, struct efrm_client **client_out) ++{ ++ struct efrm_nic *n, *rnic = NULL; ++ irq_flags_t lock_flags; ++ struct list_head *link; ++ struct efrm_client *client; ++ ++ if (callbacks == NULL) ++ callbacks = &efrm_null_callbacks; ++ ++ client = kmalloc(sizeof(*client), GFP_KERNEL); ++ if (client == NULL) ++ return -ENOMEM; ++ ++ spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); ++ list_for_each(link, &efrm_nics) { ++ n = container_of(link, struct efrm_nic, link); ++ if (n->efhw_nic.ifindex == ifindex || ifindex < 0) { ++ rnic = n; ++ break; ++ } ++ } ++ if (rnic) { ++ client->user_data = user_data; ++ client->callbacks = callbacks; ++ client->nic = &rnic->efhw_nic; ++ client->ref_count = 1; ++ INIT_LIST_HEAD(&client->resources); ++ list_add(&client->link, &rnic->clients); ++ } ++ spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); ++ ++ if (rnic == NULL) ++ return -ENODEV; ++ ++ *client_out = client; ++ return 0; ++} ++EXPORT_SYMBOL(efrm_client_get); ++ ++ ++void efrm_client_put(struct efrm_client *client) ++{ ++ irq_flags_t lock_flags; ++ ++ EFRM_ASSERT(client->ref_count > 0); ++ ++ spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); ++ if (--client->ref_count > 0) ++ client = NULL; ++ else ++ list_del(&client->link); ++ spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); ++ kfree(client); ++} ++EXPORT_SYMBOL(efrm_client_put); ++ ++ ++struct efhw_nic *efrm_client_get_nic(struct efrm_client *client) ++{ ++ return client->nic; ++} ++EXPORT_SYMBOL(efrm_client_get_nic); +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/driverlink_new.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/driverlink_new.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,260 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains driverlink code which interacts with the sfc network ++ * driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include "linux_resource_internal.h" ++#include "driverlink_api.h" ++#include "kernel_compat.h" ++#include ++ ++#include ++#include ++#include ++ ++/* The DL driver and associated calls */ ++static int efrm_dl_probe(struct efx_dl_device *efrm_dev, ++ const struct net_device *net_dev, ++ const struct efx_dl_device_info *dev_info, ++ const char *silicon_rev); ++ ++static void efrm_dl_remove(struct efx_dl_device *efrm_dev); ++ ++static void efrm_dl_reset_suspend(struct efx_dl_device *efrm_dev); ++ ++static void efrm_dl_reset_resume(struct efx_dl_device *efrm_dev, int ok); ++ ++static void efrm_dl_mtu_changed(struct efx_dl_device *, int); ++static void efrm_dl_event_falcon(struct efx_dl_device *efx_dev, void *p_event); ++ ++static struct efx_dl_driver efrm_dl_driver = { ++ .name = "resource", ++ .probe = efrm_dl_probe, ++ .remove = efrm_dl_remove, ++ .reset_suspend = efrm_dl_reset_suspend, ++ .reset_resume = efrm_dl_reset_resume ++}; ++ ++static void ++init_vi_resource_dimensions(struct vi_resource_dimensions *rd, ++ const struct efx_dl_falcon_resources *res) ++{ ++ rd->evq_timer_min = res->evq_timer_min; ++ rd->evq_timer_lim = res->evq_timer_lim; ++ rd->evq_int_min = res->evq_int_min; ++ rd->evq_int_lim = res->evq_int_lim; ++ rd->rxq_min = res->rxq_min; ++ rd->rxq_lim = res->rxq_lim; ++ rd->txq_min = res->txq_min; ++ rd->txq_lim = res->txq_lim; ++ EFRM_TRACE ++ ("Using evq_int(%d-%d) evq_timer(%d-%d) RXQ(%d-%d) TXQ(%d-%d)", ++ res->evq_int_min, res->evq_int_lim, res->evq_timer_min, ++ res->evq_timer_lim, res->rxq_min, res->rxq_lim, res->txq_min, ++ res->txq_lim); ++} ++ ++static int ++efrm_dl_probe(struct efx_dl_device *efrm_dev, ++ const struct net_device *net_dev, ++ const struct efx_dl_device_info *dev_info, ++ const char *silicon_rev) ++{ ++ struct vi_resource_dimensions res_dim; ++ struct efx_dl_falcon_resources *res; ++ struct linux_efhw_nic *lnic; ++ struct pci_dev *dev; ++ struct efhw_nic *nic; ++ unsigned probe_flags = 0; ++ int non_irq_evq; ++ int rc; ++ ++ efrm_dev->priv = NULL; ++ ++ efx_dl_search_device_info(dev_info, EFX_DL_FALCON_RESOURCES, ++ struct efx_dl_falcon_resources, ++ hdr, res); ++ ++ if (res == NULL) { ++ EFRM_ERR("%s: Unable to find falcon driverlink resources", ++ __func__); ++ return -EINVAL; ++ } ++ ++ if (res->flags & EFX_DL_FALCON_USE_MSI) ++ probe_flags |= NIC_FLAG_TRY_MSI; ++ ++ dev = efrm_dev->pci_dev; ++ if (res->flags & EFX_DL_FALCON_DUAL_FUNC) { ++ unsigned vendor = dev->vendor; ++ EFRM_ASSERT(dev->bus != NULL); ++ dev = NULL; ++ ++ while ((dev = pci_get_device(vendor, FALCON_S_DEVID, dev)) ++ != NULL) { ++ EFRM_ASSERT(dev->bus != NULL); ++ /* With PCIe (since it's point to point) ++ * the slot ID is usually 0 and ++ * the bus ID changes NIC to NIC, so we really ++ * need to check both. */ ++ if (PCI_SLOT(dev->devfn) == ++ PCI_SLOT(efrm_dev->pci_dev->devfn) ++ && dev->bus->number == ++ efrm_dev->pci_dev->bus->number) ++ break; ++ } ++ if (dev == NULL) { ++ EFRM_ERR("%s: Unable to find falcon secondary " ++ "PCI device.", __func__); ++ return -ENODEV; ++ } ++ pci_dev_put(dev); ++ } ++ ++ init_vi_resource_dimensions(&res_dim, res); ++ ++ EFRM_ASSERT(res_dim.evq_timer_lim > res_dim.evq_timer_min); ++ res_dim.evq_timer_lim--; ++ non_irq_evq = res_dim.evq_timer_lim; ++ ++ rc = efrm_nic_add(dev, probe_flags, net_dev->dev_addr, &lnic, ++ res->biu_lock, ++ res->buffer_table_min, res->buffer_table_lim, ++ non_irq_evq, &res_dim); ++ if (rc != 0) ++ return rc; ++ ++ nic = &lnic->efrm_nic.efhw_nic; ++ nic->mtu = net_dev->mtu + ETH_HLEN; ++ nic->net_driver_dev = efrm_dev; ++ nic->ifindex = net_dev->ifindex; ++#ifdef CONFIG_NET_NS ++ nic->nd_net = net_dev->nd_net; ++#endif ++ efrm_dev->priv = nic; ++ ++ /* Register a callback so we're told when MTU changes. ++ * We dynamically allocate efx_dl_callbacks, because ++ * the callbacks that we want depends on the NIC type. ++ */ ++ lnic->dl_callbacks = ++ kmalloc(sizeof(struct efx_dl_callbacks), GFP_KERNEL); ++ if (!lnic->dl_callbacks) { ++ EFRM_ERR("Out of memory (%s)", __func__); ++ efrm_nic_del(lnic); ++ return -ENOMEM; ++ } ++ memset(lnic->dl_callbacks, 0, sizeof(*lnic->dl_callbacks)); ++ lnic->dl_callbacks->mtu_changed = efrm_dl_mtu_changed; ++ ++ if ((res->flags & EFX_DL_FALCON_DUAL_FUNC) == 0) { ++ /* Net driver receives all management events. ++ * Register a callback to receive the ones ++ * we're interested in. */ ++ lnic->dl_callbacks->event = efrm_dl_event_falcon; ++ } ++ ++ rc = efx_dl_register_callbacks(efrm_dev, lnic->dl_callbacks); ++ if (rc < 0) { ++ EFRM_ERR("%s: efx_dl_register_callbacks failed (%d)", ++ __func__, rc); ++ kfree(lnic->dl_callbacks); ++ efrm_nic_del(lnic); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++/* When we unregister ourselves on module removal, this function will be ++ * called for all the devices we claimed */ ++static void efrm_dl_remove(struct efx_dl_device *efrm_dev) ++{ ++ struct efhw_nic *nic = efrm_dev->priv; ++ struct linux_efhw_nic *lnic = linux_efhw_nic(nic); ++ EFRM_TRACE("%s called", __func__); ++ if (lnic->dl_callbacks) { ++ efx_dl_unregister_callbacks(efrm_dev, lnic->dl_callbacks); ++ kfree(lnic->dl_callbacks); ++ } ++ if (efrm_dev->priv) ++ efrm_nic_del(lnic); ++ EFRM_TRACE("%s OK", __func__); ++} ++ ++static void efrm_dl_reset_suspend(struct efx_dl_device *efrm_dev) ++{ ++ EFRM_NOTICE("%s:", __func__); ++} ++ ++static void efrm_dl_reset_resume(struct efx_dl_device *efrm_dev, int ok) ++{ ++ EFRM_NOTICE("%s: ok=%d", __func__, ok); ++} ++ ++int efrm_driverlink_register(void) ++{ ++ EFRM_TRACE("%s:", __func__); ++ return efx_dl_register_driver(&efrm_dl_driver); ++} ++ ++void efrm_driverlink_unregister(void) ++{ ++ EFRM_TRACE("%s:", __func__); ++ efx_dl_unregister_driver(&efrm_dl_driver); ++} ++ ++static void efrm_dl_mtu_changed(struct efx_dl_device *efx_dev, int mtu) ++{ ++ struct efhw_nic *nic = efx_dev->priv; ++ ++ ASSERT_RTNL(); /* Since we're looking at efx_dl_device::port_net_dev */ ++ ++ EFRM_TRACE("%s: old=%d new=%d", __func__, nic->mtu, mtu + ETH_HLEN); ++ /* If this happened we must have agreed to it above */ ++ nic->mtu = mtu + ETH_HLEN; ++} ++ ++static void efrm_dl_event_falcon(struct efx_dl_device *efx_dev, void *p_event) ++{ ++ struct efhw_nic *nic = efx_dev->priv; ++ struct linux_efhw_nic *lnic = linux_efhw_nic(nic); ++ efhw_event_t *ev = p_event; ++ ++ switch (FALCON_EVENT_CODE(ev)) { ++ case FALCON_EVENT_CODE_CHAR: ++ falcon_handle_char_event(nic, lnic->ev_handlers, ev); ++ break; ++ default: ++ EFRM_WARN("%s: unknown event type=%x", __func__, ++ (unsigned)FALCON_EVENT_CODE(ev)); ++ break; ++ } ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/efrm_internal.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/efrm_internal.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,41 @@ ++#ifndef __EFRM_INTERNAL_H__ ++#define __EFRM_INTERNAL_H__ ++ ++ ++struct filter_resource { ++ struct efrm_resource rs; ++ struct vi_resource *pt; ++ int filter_idx; ++}; ++ ++#define filter_resource(rs1) container_of((rs1), struct filter_resource, rs) ++ ++ ++struct efrm_client { ++ void *user_data; ++ struct list_head link; ++ struct efrm_client_callbacks *callbacks; ++ struct efhw_nic *nic; ++ int ref_count; ++ struct list_head resources; ++}; ++ ++ ++extern void efrm_client_add_resource(struct efrm_client *, ++ struct efrm_resource *); ++ ++extern int efrm_buffer_table_size(void); ++ ++ ++static inline void efrm_resource_init(struct efrm_resource *rs, ++ int type, int instance) ++{ ++ EFRM_ASSERT(instance >= 0); ++ EFRM_ASSERT(type >= 0 && type < EFRM_RESOURCE_NUM); ++ rs->rs_ref_count = 1; ++ rs->rs_handle.handle = (type << 28u) | ++ (((unsigned)jiffies & 0xfff) << 16) | instance; ++} ++ ++ ++#endif /* __EFRM_INTERNAL_H__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/efx_vi_shm.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/efx_vi_shm.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,707 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides implementation of EFX VI API, used from Xen ++ * acceleration driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include "linux_resource_internal.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include "kernel_compat.h" ++ ++#if EFX_VI_STATIC_FILTERS ++struct filter_list_t { ++ struct filter_list_t *next; ++ struct filter_resource *fres; ++}; ++#endif ++ ++struct efx_vi_state { ++ struct vi_resource *vi_res; ++ ++ int ifindex; ++ struct efrm_client *efrm_client; ++ struct efhw_nic *nic; ++ ++ void (*callback_fn)(void *arg, int is_timeout); ++ void *callback_arg; ++ ++ struct completion flush_completion; ++ ++#if EFX_VI_STATIC_FILTERS ++ struct filter_list_t fres[EFX_VI_STATIC_FILTERS]; ++ struct filter_list_t *free_fres; ++ struct filter_list_t *used_fres; ++#endif ++}; ++ ++static void efx_vi_flush_complete(void *state_void) ++{ ++ struct efx_vi_state *state = (struct efx_vi_state *)state_void; ++ ++ complete(&state->flush_completion); ++} ++ ++static inline int alloc_ep(struct efx_vi_state *state) ++{ ++ int rc; ++ ++ rc = efrm_vi_resource_alloc(state->efrm_client, NULL, EFHW_VI_JUMBO_EN, ++ efx_vi_eventq_size, ++ FALCON_DMA_Q_DEFAULT_TX_SIZE, ++ FALCON_DMA_Q_DEFAULT_RX_SIZE, ++ 0, 0, &state->vi_res, NULL, NULL, NULL, ++ NULL); ++ if (rc < 0) { ++ EFRM_ERR("%s: ERROR efrm_vi_resource_alloc error %d", ++ __func__, rc); ++ return rc; ++ } ++ ++ efrm_vi_register_flush_callback(state->vi_res, &efx_vi_flush_complete, ++ (void *)state); ++ ++ return 0; ++} ++ ++static int free_ep(struct efx_vi_state *efx_state) ++{ ++ efrm_vi_resource_release(efx_state->vi_res); ++ ++ return 0; ++} ++ ++#if EFX_VI_STATIC_FILTERS ++static int efx_vi_alloc_static_filters(struct efx_vi_state *efx_state) ++{ ++ int i; ++ int rc; ++ ++ efx_state->free_fres = efx_state->used_fres = NULL; ++ ++ for (i = 0; i < EFX_VI_STATIC_FILTERS; i++) { ++ rc = efrm_filter_resource_alloc(efx_state->vi_res, ++ &efx_state->fres[i].fres); ++ if (rc < 0) { ++ EFRM_ERR("%s: efrm_filter_resource_alloc failed: %d", ++ __func__, rc); ++ while (i > 0) { ++ i--; ++ efrm_filter_resource_release(efx_state-> ++ fres[i].fres); ++ } ++ efx_state->free_fres = NULL; ++ return rc; ++ } ++ efx_state->fres[i].next = efx_state->free_fres; ++ efx_state->free_fres = &efx_state->fres[i]; ++ } ++ ++ return 0; ++} ++#endif ++ ++int efx_vi_alloc(struct efx_vi_state **vih_out, int ifindex) ++{ ++ struct efx_vi_state *efx_state; ++ int rc; ++ ++ efx_state = kmalloc(sizeof(struct efx_vi_state), GFP_KERNEL); ++ ++ if (!efx_state) { ++ EFRM_ERR("%s: failed to allocate memory for efx_vi_state", ++ __func__); ++ rc = -ENOMEM; ++ goto fail; ++ } ++ ++ efx_state->ifindex = ifindex; ++ rc = efrm_client_get(ifindex, NULL, NULL, &efx_state->efrm_client); ++ if (rc < 0) { ++ EFRM_ERR("%s: efrm_client_get(%d) failed: %d", __func__, ++ ifindex, rc); ++ rc = -ENODEV; ++ goto fail_no_ifindex; ++ } ++ efx_state->nic = efrm_client_get_nic(efx_state->efrm_client); ++ ++ init_completion(&efx_state->flush_completion); ++ ++ /* basically allocate_pt_endpoint() */ ++ rc = alloc_ep(efx_state); ++ if (rc) { ++ EFRM_ERR("%s: alloc_ep failed: %d", __func__, rc); ++ goto fail_no_pt; ++ } ++#if EFX_VI_STATIC_FILTERS ++ /* Statically allocate a set of filter resources - removes the ++ restriction on not being able to use efx_vi_filter() from ++ in_atomic() */ ++ rc = efx_vi_alloc_static_filters(efx_state); ++ if (rc) ++ goto fail_no_filters; ++#endif ++ ++ *vih_out = efx_state; ++ ++ return 0; ++#if EFX_VI_STATIC_FILTERS ++fail_no_filters: ++ free_ep(efx_state); ++#endif ++fail_no_pt: ++ efrm_client_put(efx_state->efrm_client); ++fail_no_ifindex: ++ kfree(efx_state); ++fail: ++ return rc; ++} ++EXPORT_SYMBOL(efx_vi_alloc); ++ ++void efx_vi_free(struct efx_vi_state *vih) ++{ ++ struct efx_vi_state *efx_state = vih; ++ ++ /* TODO flush dma channels, init dma queues?. See ef_free_vnic() */ ++#if EFX_VI_STATIC_FILTERS ++ int i; ++ ++ for (i = 0; i < EFX_VI_STATIC_FILTERS; i++) ++ efrm_filter_resource_release(efx_state->fres[i].fres); ++#endif ++ ++ if (efx_state->vi_res) ++ free_ep(efx_state); ++ ++ efrm_client_put(efx_state->efrm_client); ++ ++ kfree(efx_state); ++} ++EXPORT_SYMBOL(efx_vi_free); ++ ++void efx_vi_reset(struct efx_vi_state *vih) ++{ ++ struct efx_vi_state *efx_state = vih; ++ ++ efrm_pt_flush(efx_state->vi_res); ++ ++ while (wait_for_completion_timeout(&efx_state->flush_completion, HZ) ++ == 0) ++ efrm_vi_resource_flush_retry(efx_state->vi_res); ++ ++ /* Bosch the eventq */ ++ efrm_eventq_reset(efx_state->vi_res); ++ return; ++} ++EXPORT_SYMBOL(efx_vi_reset); ++ ++static void ++efx_vi_eventq_callback(void *context, int is_timeout, struct efhw_nic *nic) ++{ ++ struct efx_vi_state *efx_state = (struct efx_vi_state *)context; ++ ++ EFRM_ASSERT(efx_state->callback_fn); ++ ++ return efx_state->callback_fn(efx_state->callback_arg, is_timeout); ++} ++ ++int ++efx_vi_eventq_register_callback(struct efx_vi_state *vih, ++ void (*callback)(void *context, int is_timeout), ++ void *context) ++{ ++ struct efx_vi_state *efx_state = vih; ++ ++ efx_state->callback_fn = callback; ++ efx_state->callback_arg = context; ++ ++ /* Register the eventq timeout event callback */ ++ efrm_eventq_register_callback(efx_state->vi_res, ++ efx_vi_eventq_callback, efx_state); ++ ++ return 0; ++} ++EXPORT_SYMBOL(efx_vi_eventq_register_callback); ++ ++int efx_vi_eventq_kill_callback(struct efx_vi_state *vih) ++{ ++ struct efx_vi_state *efx_state = vih; ++ ++ if (efx_state->vi_res->evq_callback_fn) ++ efrm_eventq_kill_callback(efx_state->vi_res); ++ ++ efx_state->callback_fn = NULL; ++ efx_state->callback_arg = NULL; ++ ++ return 0; ++} ++EXPORT_SYMBOL(efx_vi_eventq_kill_callback); ++ ++struct efx_vi_dma_map_state { ++ struct efhw_buffer_table_allocation bt_handle; ++ int n_pages; ++ dma_addr_t *dma_addrs; ++}; ++ ++int ++efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages, ++ int n_pages, struct efx_vi_dma_map_state **dmh_out) ++{ ++ struct efx_vi_state *efx_state = vih; ++ int order = fls(n_pages - 1), rc, i, evq_id; ++ dma_addr_t dma_addr; ++ struct efx_vi_dma_map_state *dm_state; ++ ++ if (n_pages != (1 << order)) { ++ EFRM_WARN("%s: Can only allocate buffers in power of 2 " ++ "sizes (not %d)", __func__, n_pages); ++ return -EINVAL; ++ } ++ ++ dm_state = kmalloc(sizeof(struct efx_vi_dma_map_state), GFP_KERNEL); ++ if (!dm_state) ++ return -ENOMEM; ++ ++ dm_state->dma_addrs = kmalloc(sizeof(dma_addr_t) * n_pages, ++ GFP_KERNEL); ++ if (!dm_state->dma_addrs) { ++ kfree(dm_state); ++ return -ENOMEM; ++ } ++ ++ rc = efrm_buffer_table_alloc(order, &dm_state->bt_handle); ++ if (rc < 0) { ++ kfree(dm_state->dma_addrs); ++ kfree(dm_state); ++ return rc; ++ } ++ ++ evq_id = EFRM_RESOURCE_INSTANCE(efx_state->vi_res->rs.rs_handle); ++ for (i = 0; i < n_pages; i++) { ++ /* TODO do we need to get_page() here ? */ ++ ++ dma_addr = pci_map_page(linux_efhw_nic(efx_state->nic)-> ++ pci_dev, pages[i], 0, PAGE_SIZE, ++ PCI_DMA_TODEVICE); ++ ++ efrm_buffer_table_set(&dm_state->bt_handle, efx_state->nic, ++ i, dma_addr, evq_id); ++ ++ dm_state->dma_addrs[i] = dma_addr; ++ ++ /* Would be nice to not have to call commit each time, but ++ * comment says there are hardware restrictions on how often ++ * you can go without it, so do this to be safe */ ++ efrm_buffer_table_commit(); ++ } ++ ++ dm_state->n_pages = n_pages; ++ ++ *dmh_out = dm_state; ++ ++ return 0; ++} ++EXPORT_SYMBOL(efx_vi_dma_map_pages); ++ ++/* Function needed as Xen can't get pages for grants in dom0, but can ++ get dma address */ ++int ++efx_vi_dma_map_addrs(struct efx_vi_state *vih, ++ unsigned long long *bus_dev_addrs, ++ int n_pages, struct efx_vi_dma_map_state **dmh_out) ++{ ++ struct efx_vi_state *efx_state = vih; ++ int order = fls(n_pages - 1), rc, i, evq_id; ++ dma_addr_t dma_addr; ++ struct efx_vi_dma_map_state *dm_state; ++ ++ if (n_pages != (1 << order)) { ++ EFRM_WARN("%s: Can only allocate buffers in power of 2 " ++ "sizes (not %d)", __func__, n_pages); ++ return -EINVAL; ++ } ++ ++ dm_state = kmalloc(sizeof(struct efx_vi_dma_map_state), GFP_KERNEL); ++ if (!dm_state) ++ return -ENOMEM; ++ ++ dm_state->dma_addrs = kmalloc(sizeof(dma_addr_t) * n_pages, ++ GFP_KERNEL); ++ if (!dm_state->dma_addrs) { ++ kfree(dm_state); ++ return -ENOMEM; ++ } ++ ++ rc = efrm_buffer_table_alloc(order, &dm_state->bt_handle); ++ if (rc < 0) { ++ kfree(dm_state->dma_addrs); ++ kfree(dm_state); ++ return rc; ++ } ++ ++ evq_id = EFRM_RESOURCE_INSTANCE(efx_state->vi_res->rs.rs_handle); ++#if 0 ++ EFRM_WARN("%s: mapping %d pages to evq %d, bt_ids %d-%d\n", ++ __func__, n_pages, evq_id, ++ dm_state->bt_handle.base, ++ dm_state->bt_handle.base + n_pages); ++#endif ++ for (i = 0; i < n_pages; i++) { ++ ++ dma_addr = (dma_addr_t)bus_dev_addrs[i]; ++ ++ efrm_buffer_table_set(&dm_state->bt_handle, efx_state->nic, ++ i, dma_addr, evq_id); ++ ++ dm_state->dma_addrs[i] = dma_addr; ++ ++ /* Would be nice to not have to call commit each time, but ++ * comment says there are hardware restrictions on how often ++ * you can go without it, so do this to be safe */ ++ efrm_buffer_table_commit(); ++ } ++ ++ dm_state->n_pages = n_pages; ++ ++ *dmh_out = dm_state; ++ ++ return 0; ++} ++EXPORT_SYMBOL(efx_vi_dma_map_addrs); ++ ++void ++efx_vi_dma_unmap_pages(struct efx_vi_state *vih, ++ struct efx_vi_dma_map_state *dmh) ++{ ++ struct efx_vi_state *efx_state = vih; ++ struct efx_vi_dma_map_state *dm_state = ++ (struct efx_vi_dma_map_state *)dmh; ++ int i; ++ ++ efrm_buffer_table_free(&dm_state->bt_handle); ++ ++ for (i = 0; i < dm_state->n_pages; ++i) ++ pci_unmap_page(linux_efhw_nic(efx_state->nic)->pci_dev, ++ dm_state->dma_addrs[i], PAGE_SIZE, ++ PCI_DMA_TODEVICE); ++ ++ kfree(dm_state->dma_addrs); ++ kfree(dm_state); ++ ++ return; ++} ++EXPORT_SYMBOL(efx_vi_dma_unmap_pages); ++ ++void ++efx_vi_dma_unmap_addrs(struct efx_vi_state *vih, ++ struct efx_vi_dma_map_state *dmh) ++{ ++ struct efx_vi_dma_map_state *dm_state = ++ (struct efx_vi_dma_map_state *)dmh; ++ ++ efrm_buffer_table_free(&dm_state->bt_handle); ++ ++ kfree(dm_state->dma_addrs); ++ kfree(dm_state); ++ ++ return; ++} ++EXPORT_SYMBOL(efx_vi_dma_unmap_addrs); ++ ++unsigned ++efx_vi_dma_get_map_addr(struct efx_vi_state *vih, ++ struct efx_vi_dma_map_state *dmh) ++{ ++ struct efx_vi_dma_map_state *dm_state = ++ (struct efx_vi_dma_map_state *)dmh; ++ ++ return EFHW_BUFFER_ADDR(dm_state->bt_handle.base, 0); ++} ++EXPORT_SYMBOL(efx_vi_dma_get_map_addr); ++ ++#if EFX_VI_STATIC_FILTERS ++static int ++get_filter(struct efx_vi_state *efx_state, ++ efrm_resource_handle_t pthandle, struct filter_resource **fres_out) ++{ ++ struct filter_list_t *flist; ++ if (efx_state->free_fres == NULL) ++ return -ENOMEM; ++ else { ++ flist = efx_state->free_fres; ++ efx_state->free_fres = flist->next; ++ flist->next = efx_state->used_fres; ++ efx_state->used_fres = flist; ++ *fres_out = flist->fres; ++ return 0; ++ } ++} ++#endif ++ ++static void ++release_filter(struct efx_vi_state *efx_state, struct filter_resource *fres) ++{ ++#if EFX_VI_STATIC_FILTERS ++ struct filter_list_t *flist = efx_state->used_fres, *prev = NULL; ++ while (flist) { ++ if (flist->fres == fres) { ++ if (prev) ++ prev->next = flist->next; ++ else ++ efx_state->used_fres = flist->next; ++ flist->next = efx_state->free_fres; ++ efx_state->free_fres = flist; ++ return; ++ } ++ prev = flist; ++ flist = flist->next; ++ } ++ EFRM_ERR("%s: couldn't find filter", __func__); ++#else ++ return efrm_filter_resource_release(fres); ++#endif ++} ++ ++int ++efx_vi_filter(struct efx_vi_state *vih, int protocol, ++ unsigned ip_addr_be32, int port_le16, ++ struct filter_resource_t **fh_out) ++{ ++ struct efx_vi_state *efx_state = vih; ++ struct filter_resource *frs; ++ int rc; ++ ++#if EFX_VI_STATIC_FILTERS ++ rc = get_filter(efx_state, efx_state->vi_res->rs.rs_handle, &frs); ++#else ++ rc = efrm_filter_resource_alloc(efx_state->vi_res, &frs); ++#endif ++ if (rc < 0) ++ return rc; ++ ++ /* Add the hardware filter. We pass in the source port and address ++ * as 0 (wildcard) to minimise the number of filters needed. */ ++ if (protocol == IPPROTO_TCP) { ++ rc = efrm_filter_resource_tcp_set(frs, 0, 0, ip_addr_be32, ++ port_le16); ++ } else { ++ rc = efrm_filter_resource_udp_set(frs, 0, 0, ip_addr_be32, ++ port_le16); ++ } ++ ++ *fh_out = (struct filter_resource_t *)frs; ++ ++ return rc; ++} ++EXPORT_SYMBOL(efx_vi_filter); ++ ++int ++efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh) ++{ ++ struct efx_vi_state *efx_state = vih; ++ struct filter_resource *frs = (struct filter_resource *)fh; ++ int rc; ++ ++ rc = efrm_filter_resource_clear(frs); ++ release_filter(efx_state, frs); ++ ++ return rc; ++} ++EXPORT_SYMBOL(efx_vi_filter_stop); ++ ++int ++efx_vi_hw_resource_get_virt(struct efx_vi_state *vih, ++ struct efx_vi_hw_resource_metadata *mdata, ++ struct efx_vi_hw_resource *hw_res_array, ++ int *length) ++{ ++ EFRM_NOTICE("%s: TODO!", __func__); ++ ++ return 0; ++} ++EXPORT_SYMBOL(efx_vi_hw_resource_get_virt); ++ ++int ++efx_vi_hw_resource_get_phys(struct efx_vi_state *vih, ++ struct efx_vi_hw_resource_metadata *mdata, ++ struct efx_vi_hw_resource *hw_res_array, ++ int *length) ++{ ++ struct efx_vi_state *efx_state = vih; ++ struct linux_efhw_nic *lnic = linux_efhw_nic(efx_state->nic); ++ unsigned long phys = lnic->ctr_ap_pci_addr; ++ struct efrm_resource *ep_res = &efx_state->vi_res->rs; ++ unsigned ep_mmap_bytes; ++ int i; ++ ++ if (*length < EFX_VI_HW_RESOURCE_MAXSIZE) ++ return -EINVAL; ++ ++ mdata->nic_arch = efx_state->nic->devtype.arch; ++ mdata->nic_variant = efx_state->nic->devtype.variant; ++ mdata->nic_revision = efx_state->nic->devtype.revision; ++ ++ mdata->evq_order = ++ efx_state->vi_res->nic_info.evq_pages.iobuff.order; ++ mdata->evq_offs = efx_state->vi_res->nic_info.evq_pages.iobuff_off; ++ mdata->evq_capacity = efx_vi_eventq_size; ++ mdata->instance = EFRM_RESOURCE_INSTANCE(ep_res->rs_handle); ++ mdata->rx_capacity = FALCON_DMA_Q_DEFAULT_RX_SIZE; ++ mdata->tx_capacity = FALCON_DMA_Q_DEFAULT_TX_SIZE; ++ ++ ep_mmap_bytes = FALCON_DMA_Q_DEFAULT_MMAP; ++ EFRM_ASSERT(ep_mmap_bytes == PAGE_SIZE * 2); ++ ++#ifndef NDEBUG ++ { ++ /* Sanity about doorbells */ ++ unsigned long tx_dma_page_addr, rx_dma_page_addr; ++ ++ /* get rx doorbell address */ ++ rx_dma_page_addr = ++ phys + falcon_rx_dma_page_addr(mdata->instance); ++ /* get tx doorbell address */ ++ tx_dma_page_addr = ++ phys + falcon_tx_dma_page_addr(mdata->instance); ++ ++ /* Check the lower bits of the TX doorbell will be ++ * consistent. */ ++ EFRM_ASSERT((TX_DESC_UPD_REG_PAGE4_OFST & ++ FALCON_DMA_PAGE_MASK) == ++ (TX_DESC_UPD_REG_PAGE123K_OFST & ++ FALCON_DMA_PAGE_MASK)); ++ ++ /* Check the lower bits of the RX doorbell will be ++ * consistent. */ ++ EFRM_ASSERT((RX_DESC_UPD_REG_PAGE4_OFST & ++ FALCON_DMA_PAGE_MASK) == ++ (RX_DESC_UPD_REG_PAGE123K_OFST & ++ FALCON_DMA_PAGE_MASK)); ++ ++ /* Check that the doorbells will be in the same page. */ ++ EFRM_ASSERT((TX_DESC_UPD_REG_PAGE4_OFST & PAGE_MASK) == ++ (RX_DESC_UPD_REG_PAGE4_OFST & PAGE_MASK)); ++ ++ /* Check that the doorbells are in the same page. */ ++ EFRM_ASSERT((tx_dma_page_addr & PAGE_MASK) == ++ (rx_dma_page_addr & PAGE_MASK)); ++ ++ /* Check that the TX doorbell offset is correct. */ ++ EFRM_ASSERT((TX_DESC_UPD_REG_PAGE4_OFST & ~PAGE_MASK) == ++ (tx_dma_page_addr & ~PAGE_MASK)); ++ ++ /* Check that the RX doorbell offset is correct. */ ++ EFRM_ASSERT((RX_DESC_UPD_REG_PAGE4_OFST & ~PAGE_MASK) == ++ (rx_dma_page_addr & ~PAGE_MASK)); ++ } ++#endif ++ ++ i = 0; ++ hw_res_array[i].type = EFX_VI_HW_RESOURCE_TXDMAQ; ++ hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; ++ hw_res_array[i].more_to_follow = 0; ++ hw_res_array[i].length = PAGE_SIZE; ++ hw_res_array[i].address = ++ (unsigned long)efx_state->vi_res->nic_info. ++ dmaq_pages[EFRM_VI_RM_DMA_QUEUE_TX].kva; ++ ++ i++; ++ hw_res_array[i].type = EFX_VI_HW_RESOURCE_RXDMAQ; ++ hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; ++ hw_res_array[i].more_to_follow = 0; ++ hw_res_array[i].length = PAGE_SIZE; ++ hw_res_array[i].address = ++ (unsigned long)efx_state->vi_res->nic_info. ++ dmaq_pages[EFRM_VI_RM_DMA_QUEUE_RX].kva; ++ ++ i++; ++ hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQTIMER; ++ hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; ++ hw_res_array[i].more_to_follow = 0; ++ hw_res_array[i].length = PAGE_SIZE; ++ hw_res_array[i].address = ++ (unsigned long)phys + falcon_timer_page_addr(mdata->instance); ++ ++ /* NB EFX_VI_HW_RESOURCE_EVQPTR not used on Falcon */ ++ ++ i++; ++ switch (efx_state->nic->devtype.variant) { ++ case 'A': ++ hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQRPTR; ++ hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; ++ hw_res_array[i].more_to_follow = 0; ++ hw_res_array[i].length = PAGE_SIZE; ++ hw_res_array[i].address = (unsigned long)phys + ++ EVQ_RPTR_REG_OFST + ++ (FALCON_REGISTER128 * mdata->instance); ++ break; ++ case 'B': ++ hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET; ++ hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; ++ hw_res_array[i].more_to_follow = 0; ++ hw_res_array[i].length = PAGE_SIZE; ++ hw_res_array[i].address = ++ (unsigned long)FALCON_EVQ_RPTR_REG_P0; ++ break; ++ default: ++ EFRM_ASSERT(0); ++ break; ++ } ++ ++ i++; ++ hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQMEMKVA; ++ hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_IOBUFFER; ++ hw_res_array[i].more_to_follow = 0; ++ hw_res_array[i].length = PAGE_SIZE; ++ hw_res_array[i].address = (unsigned long)efx_state->vi_res-> ++ nic_info.evq_pages.iobuff.kva; ++ ++ i++; ++ hw_res_array[i].type = EFX_VI_HW_RESOURCE_BELLPAGE; ++ hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; ++ hw_res_array[i].more_to_follow = 0; ++ hw_res_array[i].length = PAGE_SIZE; ++ hw_res_array[i].address = ++ (unsigned long)(phys + ++ falcon_tx_dma_page_addr(mdata->instance)) ++ >> PAGE_SHIFT; ++ ++ i++; ++ ++ EFRM_ASSERT(i <= *length); ++ ++ *length = i; ++ ++ return 0; ++} ++EXPORT_SYMBOL(efx_vi_hw_resource_get_phys); +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/eventq.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/eventq.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,321 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains event queue support. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define KEVENTQ_MAGIC 0x07111974 ++ ++/*! Helper function to allocate the iobuffer needed by an eventq ++ * - it ensures the eventq has the correct alignment for the NIC ++ * ++ * \param rm Event-queue resource manager ++ * \param instance Event-queue instance (index) ++ * \param buf_bytes Requested size of eventq ++ * \return < 0 if iobuffer allocation fails ++ */ ++int ++efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic, ++ struct eventq_resource_hardware *h, ++ int evq_instance, unsigned buf_bytes) ++{ ++ unsigned int page_order; ++ int rc; ++ ++ /* Allocate an iobuffer. */ ++ page_order = get_order(buf_bytes); ++ ++ h->iobuff_off = 0; ++ ++ EFHW_TRACE("allocating eventq size %x", ++ 1u << (page_order + PAGE_SHIFT)); ++ rc = efhw_iopages_alloc(nic, &h->iobuff, page_order); ++ if (rc < 0) { ++ EFHW_WARN("%s: failed to allocate %u pages", ++ __func__, 1u << page_order); ++ return rc; ++ } ++ ++ /* Set the eventq pages to match EFHW_CLEAR_EVENT() */ ++ if (EFHW_CLEAR_EVENT_VALUE) ++ memset(efhw_iopages_ptr(&h->iobuff) + h->iobuff_off, ++ EFHW_CLEAR_EVENT_VALUE, (1u << page_order) * PAGE_SIZE); ++ ++ EFHW_TRACE("%s: allocated %u pages", __func__, 1u << (page_order)); ++ ++ /* For Falcon the NIC is programmed with the base buffer address of a ++ * contiguous region of buffer space. This means that larger than a ++ * PAGE event queues can be expected to allocate even when the host's ++ * physical memory is fragmented */ ++ EFHW_ASSERT(efhw_nic_have_hw(nic)); ++ EFHW_ASSERT(page_order <= h->buf_tbl_alloc.order); ++ ++ /* Initialise the buffer table entries. */ ++ falcon_nic_buffer_table_set_n(nic, h->buf_tbl_alloc.base, ++ efhw_iopages_dma_addr(&h->iobuff) + ++ h->iobuff_off, EFHW_NIC_PAGE_SIZE, 0, ++ 1 << page_order, 0); ++ ++ if (evq_instance >= FALCON_EVQ_TBL_RESERVED) ++ falcon_nic_buffer_table_confirm(nic); ++ return 0; ++} ++ ++/********************************************************************** ++ * Kernel event queue management. ++ */ ++ ++/* Values for [struct efhw_keventq::lock] field. */ ++#define KEVQ_UNLOCKED 0 ++#define KEVQ_LOCKED 1 ++#define KEVQ_RECHECK 2 ++ ++int ++efhw_keventq_ctor(struct efhw_nic *nic, int instance, ++ struct efhw_keventq *evq, ++ struct efhw_ev_handler *ev_handlers) ++{ ++ int rc; ++ unsigned buf_bytes = evq->hw.capacity * sizeof(efhw_event_t); ++ ++ evq->instance = instance; ++ evq->ev_handlers = ev_handlers; ++ ++ /* allocate an IObuffer for the eventq */ ++ rc = efhw_nic_event_queue_alloc_iobuffer(nic, &evq->hw, evq->instance, ++ buf_bytes); ++ if (rc < 0) ++ return rc; ++ ++ /* Zero the timer-value for this queue. ++ AND Tell the nic about the event queue. */ ++ efhw_nic_event_queue_enable(nic, evq->instance, evq->hw.capacity, ++ efhw_iopages_dma_addr(&evq->hw.iobuff) + ++ evq->hw.iobuff_off, ++ evq->hw.buf_tbl_alloc.base, ++ 1 /* interrupting */); ++ ++ evq->lock = KEVQ_UNLOCKED; ++ evq->evq_base = efhw_iopages_ptr(&evq->hw.iobuff) + evq->hw.iobuff_off; ++ evq->evq_ptr = 0; ++ evq->evq_mask = (evq->hw.capacity * sizeof(efhw_event_t)) - 1u; ++ ++ EFHW_TRACE("%s: [%d] base=%p end=%p", __func__, evq->instance, ++ evq->evq_base, evq->evq_base + buf_bytes); ++ ++ return 0; ++} ++ ++void efhw_keventq_dtor(struct efhw_nic *nic, struct efhw_keventq *evq) ++{ ++ EFHW_ASSERT(evq); ++ ++ EFHW_TRACE("%s: [%d]", __func__, evq->instance); ++ ++ /* Zero the timer-value for this queue. ++ And Tell NIC to stop using this event queue. */ ++ efhw_nic_event_queue_disable(nic, evq->instance, 0); ++ ++ /* free the pages used by the eventq itself */ ++ efhw_iopages_free(nic, &evq->hw.iobuff); ++} ++ ++void ++efhw_handle_txdmaq_flushed(struct efhw_nic *nic, struct efhw_ev_handler *h, ++ efhw_event_t *evp) ++{ ++ int instance = (int)FALCON_EVENT_TX_FLUSH_Q_ID(evp); ++ EFHW_TRACE("%s: instance=%d", __func__, instance); ++ ++ if (!h->dmaq_flushed_fn) { ++ EFHW_WARN("%s: no handler registered", __func__); ++ return; ++ } ++ ++ h->dmaq_flushed_fn(nic, instance, false); ++} ++ ++void ++efhw_handle_rxdmaq_flushed(struct efhw_nic *nic, struct efhw_ev_handler *h, ++ efhw_event_t *evp) ++{ ++ unsigned instance = (unsigned)FALCON_EVENT_RX_FLUSH_Q_ID(evp); ++ EFHW_TRACE("%s: instance=%d", __func__, instance); ++ ++ if (!h->dmaq_flushed_fn) { ++ EFHW_WARN("%s: no handler registered", __func__); ++ return; ++ } ++ ++ h->dmaq_flushed_fn(nic, instance, true); ++} ++ ++void ++efhw_handle_wakeup_event(struct efhw_nic *nic, struct efhw_ev_handler *h, ++ efhw_event_t *evp) ++{ ++ unsigned instance = (unsigned)FALCON_EVENT_WAKE_EVQ_ID(evp); ++ ++ if (!h->wakeup_fn) { ++ EFHW_WARN("%s: no handler registered", __func__); ++ return; ++ } ++ ++ h->wakeup_fn(nic, instance); ++} ++ ++void ++efhw_handle_timeout_event(struct efhw_nic *nic, struct efhw_ev_handler *h, ++ efhw_event_t *evp) ++{ ++ unsigned instance = (unsigned)FALCON_EVENT_WAKE_EVQ_ID(evp); ++ ++ if (!h->timeout_fn) { ++ EFHW_WARN("%s: no handler registered", __func__); ++ return; ++ } ++ ++ h->timeout_fn(nic, instance); ++} ++ ++/********************************************************************** ++ * Kernel event queue event handling. ++ */ ++ ++int efhw_keventq_poll(struct efhw_nic *nic, struct efhw_keventq *q) ++{ ++ efhw_event_t *ev; ++ int l, count = 0; ++ ++ EFHW_ASSERT(nic); ++ EFHW_ASSERT(q); ++ EFHW_ASSERT(q->ev_handlers); ++ ++ /* Acquire the lock, or mark the queue as needing re-checking. */ ++ for (;;) { ++ l = q->lock; ++ if (l == KEVQ_UNLOCKED) { ++ if ((int)cmpxchg(&q->lock, l, KEVQ_LOCKED) == l) ++ break; ++ } else if (l == KEVQ_LOCKED) { ++ if ((int)cmpxchg(&q->lock, l, KEVQ_RECHECK) == l) ++ return 0; ++ } else { /* already marked for re-checking */ ++ EFHW_ASSERT(l == KEVQ_RECHECK); ++ return 0; ++ } ++ } ++ ++ if (unlikely(EFHW_EVENT_OVERFLOW(q, q))) ++ goto overflow; ++ ++ ev = EFHW_EVENT_PTR(q, q, 0); ++ ++#ifndef NDEBUG ++ if (!EFHW_IS_EVENT(ev)) ++ EFHW_TRACE("%s: %d NO EVENTS!", __func__, q->instance); ++#endif ++ ++ for (;;) { ++ /* Convention for return codes for handlers is: ++ ** 0 - no error, event consumed ++ ** 1 - no error, event not consumed ++ ** -ve - error, event not consumed ++ */ ++ if (likely(EFHW_IS_EVENT(ev))) { ++ count++; ++ ++ switch (FALCON_EVENT_CODE(ev)) { ++ ++ case FALCON_EVENT_CODE_CHAR: ++ falcon_handle_char_event(nic, q->ev_handlers, ++ ev); ++ break; ++ ++ default: ++ EFHW_ERR("efhw_keventq_poll: [%d] UNEXPECTED " ++ "EVENT:"FALCON_EVENT_FMT, ++ q->instance, ++ FALCON_EVENT_PRI_ARG(*ev)); ++ } ++ ++ EFHW_CLEAR_EVENT(ev); ++ EFHW_EVENTQ_NEXT(q); ++ ++ ev = EFHW_EVENT_PTR(q, q, 0); ++ } else { ++ /* No events left. Release the lock (checking if we ++ * need to re-poll to avoid race). */ ++ l = q->lock; ++ if (l == KEVQ_LOCKED) { ++ if ((int)cmpxchg(&q->lock, l, KEVQ_UNLOCKED) ++ == l) { ++ EFHW_TRACE ++ ("efhw_keventq_poll: %d clean exit", ++ q->instance); ++ goto clean_exit; ++ } ++ } ++ ++ /* Potentially more work to do. */ ++ l = q->lock; ++ EFHW_ASSERT(l == KEVQ_RECHECK); ++ EFHW_TEST((int)cmpxchg(&q->lock, l, KEVQ_LOCKED) == l); ++ EFHW_TRACE("efhw_keventq_poll: %d re-poll required", ++ q->instance); ++ } ++ } ++ ++ /* shouldn't get here */ ++ EFHW_ASSERT(0); ++ ++overflow: ++ /* ?? Oh dear. Should we poll everything that could have possibly ++ ** happened? Or merely cry out in anguish... ++ */ ++ EFHW_WARN("efhw_keventq_poll: %d ***** OVERFLOW nic %d *****", ++ q->instance, nic->index); ++ ++ q->lock = KEVQ_UNLOCKED; ++ return count; ++ ++clean_exit: ++ /* Ack the processed events so that this event queue can potentially ++ raise interrupts again */ ++ falcon_nic_evq_ack(nic, q->instance, ++ (EFHW_EVENT_OFFSET(q, q, 0) / sizeof(efhw_event_t)), ++ false); ++ return count; ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/falcon.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/falcon.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,2525 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains Falcon hardware support. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Workarounds and options ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/* Keep a software copy of the filter table and check for duplicates. */ ++#define FALCON_FULL_FILTER_CACHE 1 ++ ++/* Read filters back from the hardware to detect corruption. */ ++#define FALCON_VERIFY_FILTERS 0 ++ ++/* Options */ ++#define RX_FILTER_CTL_SRCH_LIMIT_TCP_FULL 8 /* default search limit */ ++#define RX_FILTER_CTL_SRCH_LIMIT_TCP_WILD 8 /* default search limit */ ++#define RX_FILTER_CTL_SRCH_LIMIT_UDP_FULL 8 /* default search limit */ ++#define RX_FILTER_CTL_SRCH_LIMIT_UDP_WILD 8 /* default search limit */ ++ ++#define FALCON_MAC_SET_TYPE_BY_SPEED 0 ++ ++/* FIXME: We should detect mode at runtime. */ ++#define FALCON_BUFFER_TABLE_FULL_MODE 1 ++ ++/* "Fudge factors" - difference between programmed value and actual depth */ ++#define RX_FILTER_CTL_SRCH_FUDGE_WILD 3 /* increase the search limit */ ++#define RX_FILTER_CTL_SRCH_FUDGE_FULL 1 /* increase the search limit */ ++#define TX_FILTER_CTL_SRCH_FUDGE_WILD 3 /* increase the search limit */ ++#define TX_FILTER_CTL_SRCH_FUDGE_FULL 1 /* increase the search limit */ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Debug Macros ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#define _DEBUG_SYM_ static ++ ++ /*---------------------------------------------------------------------------- ++ * ++ * Macros and forward declarations ++ * ++ *--------------------------------------------------------------------------*/ ++ ++#define FALCON_REGION_NUM 4 /* number of supported memory regions */ ++ ++#define FALCON_BUFFER_TBL_HALF_BYTES 4 ++#define FALCON_BUFFER_TBL_FULL_BYTES 8 ++ ++/* Shadow buffer table - hack for testing only */ ++#if FALCON_BUFFER_TABLE_FULL_MODE == 0 ++# define FALCON_USE_SHADOW_BUFFER_TABLE 1 ++#else ++# define FALCON_USE_SHADOW_BUFFER_TABLE 0 ++#endif ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Header assertion checks ++ * ++ *---------------------------------------------------------------------------*/ ++ ++#define FALCON_ASSERT_VALID() /* nothing yet */ ++ ++/* Falcon has a 128bit register model but most registers have useful ++ defaults or only implement a small number of bits. Some registers ++ can be programmed 32bits UNLOCKED all others should be interlocked ++ against other threads within the same protection domain. ++ ++ Aim is for software to perform the minimum number of writes and ++ also to minimise the read-modify-write activity (which generally ++ indicates a lack of clarity in the use model). ++ ++ Registers which are programmed in this module are listed below ++ together with the method of access. Care must be taken to ensure ++ remain adequate if the register spec changes. ++ ++ All 128bits programmed ++ FALCON_BUFFER_TBL_HALF ++ RX_FILTER_TBL ++ TX_DESC_PTR_TBL ++ RX_DESC_PTR_TBL ++ DRV_EV_REG ++ ++ All 64bits programmed ++ FALCON_BUFFER_TBL_FULL ++ ++ 32 bits are programmed (UNLOCKED) ++ EVQ_RPTR_REG ++ ++ Low 64bits programmed remainder are written with a random number ++ RX_DC_CFG_REG ++ TX_DC_CFG_REG ++ SRM_RX_DC_CFG_REG ++ SRM_TX_DC_CFG_REG ++ BUF_TBL_CFG_REG ++ BUF_TBL_UPD_REG ++ SRM_UPD_EVQ_REG ++ EVQ_PTR_TBL ++ TIMER_CMD_REG ++ TX_PACE_TBL ++ FATAL_INTR_REG ++ INT_EN_REG (When enabling interrupts) ++ TX_FLUSH_DESCQ_REG ++ RX_FLUSH_DESCQ ++ ++ Read Modify Write on low 32bits remainder are written with a random number ++ INT_EN_REG (When sending a driver interrupt) ++ DRIVER_REGX ++ ++ Read Modify Write on low 64bits remainder are written with a random number ++ SRM_CFG_REG_OFST ++ RX_CFG_REG_OFST ++ RX_FILTER_CTL_REG ++ ++ Read Modify Write on full 128bits ++ TXDP_RESERVED_REG (aka TXDP_UNDOCUMENTED) ++ TX_CFG_REG ++ ++*/ ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * DMAQ low-level register interface ++ * ++ *---------------------------------------------------------------------------*/ ++ ++static unsigned dmaq_sizes[] = { ++ 512, ++ EFHW_1K, ++ EFHW_2K, ++ EFHW_4K, ++}; ++ ++#define N_DMAQ_SIZES (sizeof(dmaq_sizes) / sizeof(dmaq_sizes[0])) ++ ++static inline ulong falcon_dma_tx_q_offset(struct efhw_nic *nic, unsigned dmaq) ++{ ++ EFHW_ASSERT(dmaq < nic->num_dmaqs); ++ return TX_DESC_PTR_TBL_OFST + dmaq * FALCON_REGISTER128; ++} ++ ++static inline uint falcon_dma_tx_q_size_index(uint dmaq_size) ++{ ++ uint i; ++ ++ /* size must be one of the various options, otherwise we assert */ ++ for (i = 0; i < N_DMAQ_SIZES; i++) { ++ if (dmaq_size == dmaq_sizes[i]) ++ break; ++ } ++ EFHW_ASSERT(i < N_DMAQ_SIZES); ++ return i; ++} ++ ++static void ++falcon_dmaq_tx_q_init(struct efhw_nic *nic, ++ uint dmaq, uint evq_id, uint own_id, ++ uint tag, uint dmaq_size, uint buf_idx, uint flags) ++{ ++ FALCON_LOCK_DECL; ++ uint index, desc_type; ++ uint64_t val1, val2, val3; ++ ulong offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ /* Q attributes */ ++ int iscsi_hdig_en = ((flags & EFHW_VI_ISCSI_TX_HDIG_EN) != 0); ++ int iscsi_ddig_en = ((flags & EFHW_VI_ISCSI_TX_DDIG_EN) != 0); ++ int csum_ip_dis = ((flags & EFHW_VI_TX_IP_CSUM_DIS) != 0); ++ int csum_tcp_dis = ((flags & EFHW_VI_TX_TCPUDP_CSUM_DIS) != 0); ++ int non_ip_drop_dis = ((flags & EFHW_VI_TX_TCPUDP_ONLY) == 0); ++ ++ /* initialise the TX descriptor queue pointer table */ ++ ++ /* NB physical vs buffer addressing is determined by the Queue ID. */ ++ ++ offset = falcon_dma_tx_q_offset(nic, dmaq); ++ index = falcon_dma_tx_q_size_index(dmaq_size); ++ ++ /* allow VI flag to override this queue's descriptor type */ ++ desc_type = (flags & EFHW_VI_TX_PHYS_ADDR_EN) ? 0 : 1; ++ ++ /* bug9403: It is dangerous to allow buffer-addressed queues to ++ * have owner_id=0. */ ++ EFHW_ASSERT((own_id > 0) || desc_type == 0); ++ ++ /* dword 1 */ ++ __DWCHCK(TX_DESCQ_FLUSH_LBN, TX_DESCQ_FLUSH_WIDTH); ++ __DWCHCK(TX_DESCQ_TYPE_LBN, TX_DESCQ_TYPE_WIDTH); ++ __DWCHCK(TX_DESCQ_SIZE_LBN, TX_DESCQ_SIZE_WIDTH); ++ __DWCHCK(TX_DESCQ_LABEL_LBN, TX_DESCQ_LABEL_WIDTH); ++ __DWCHCK(TX_DESCQ_OWNER_ID_LBN, TX_DESCQ_OWNER_ID_WIDTH); ++ ++ __LWCHK(TX_DESCQ_EVQ_ID_LBN, TX_DESCQ_EVQ_ID_WIDTH); ++ ++ __RANGECHCK(1, TX_DESCQ_FLUSH_WIDTH); ++ __RANGECHCK(desc_type, TX_DESCQ_TYPE_WIDTH); ++ __RANGECHCK(index, TX_DESCQ_SIZE_WIDTH); ++ __RANGECHCK(tag, TX_DESCQ_LABEL_WIDTH); ++ __RANGECHCK(own_id, TX_DESCQ_OWNER_ID_WIDTH); ++ __RANGECHCK(evq_id, TX_DESCQ_EVQ_ID_WIDTH); ++ ++ val1 = ((desc_type << TX_DESCQ_TYPE_LBN) | ++ (index << TX_DESCQ_SIZE_LBN) | ++ (tag << TX_DESCQ_LABEL_LBN) | ++ (own_id << TX_DESCQ_OWNER_ID_LBN) | ++ (__LOW(evq_id, TX_DESCQ_EVQ_ID_LBN, TX_DESCQ_EVQ_ID_WIDTH))); ++ ++ /* dword 2 */ ++ __DW2CHCK(TX_DESCQ_BUF_BASE_ID_LBN, TX_DESCQ_BUF_BASE_ID_WIDTH); ++ __RANGECHCK(buf_idx, TX_DESCQ_BUF_BASE_ID_WIDTH); ++ ++ val2 = ((__HIGH(evq_id, TX_DESCQ_EVQ_ID_LBN, TX_DESCQ_EVQ_ID_WIDTH)) | ++ (buf_idx << __DW2(TX_DESCQ_BUF_BASE_ID_LBN))); ++ ++ /* dword 3 */ ++ __DW3CHCK(TX_ISCSI_HDIG_EN_LBN, TX_ISCSI_HDIG_EN_WIDTH); ++ __DW3CHCK(TX_ISCSI_DDIG_EN_LBN, TX_ISCSI_DDIG_EN_WIDTH); ++ __RANGECHCK(iscsi_hdig_en, TX_ISCSI_HDIG_EN_WIDTH); ++ __RANGECHCK(iscsi_ddig_en, TX_ISCSI_DDIG_EN_WIDTH); ++ ++ val3 = ((iscsi_hdig_en << __DW3(TX_ISCSI_HDIG_EN_LBN)) | ++ (iscsi_ddig_en << __DW3(TX_ISCSI_DDIG_EN_LBN)) | ++ (1 << __DW3(TX_DESCQ_EN_LBN))); /* queue enable bit */ ++ ++ switch (nic->devtype.variant) { ++ case 'B': ++ __DW3CHCK(TX_NON_IP_DROP_DIS_B0_LBN, ++ TX_NON_IP_DROP_DIS_B0_WIDTH); ++ __DW3CHCK(TX_IP_CHKSM_DIS_B0_LBN, TX_IP_CHKSM_DIS_B0_WIDTH); ++ __DW3CHCK(TX_TCP_CHKSM_DIS_B0_LBN, TX_TCP_CHKSM_DIS_B0_WIDTH); ++ ++ val3 |= ((non_ip_drop_dis << __DW3(TX_NON_IP_DROP_DIS_B0_LBN))| ++ (csum_ip_dis << __DW3(TX_IP_CHKSM_DIS_B0_LBN)) | ++ (csum_tcp_dis << __DW3(TX_TCP_CHKSM_DIS_B0_LBN))); ++ break; ++ case 'A': ++ if (csum_ip_dis || csum_tcp_dis || !non_ip_drop_dis) ++ EFHW_WARN ++ ("%s: bad settings for A1 csum_ip_dis=%d " ++ "csum_tcp_dis=%d non_ip_drop_dis=%d", ++ __func__, csum_ip_dis, ++ csum_tcp_dis, non_ip_drop_dis); ++ break; ++ default: ++ EFHW_ASSERT(0); ++ break; ++ } ++ ++ EFHW_TRACE("%s: txq %x evq %u tag %x id %x buf %x " ++ "%x:%x:%x->%" PRIx64 ":%" PRIx64 ":%" PRIx64, ++ __func__, ++ dmaq, evq_id, tag, own_id, buf_idx, dmaq_size, ++ iscsi_hdig_en, iscsi_ddig_en, val1, val2, val3); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ return; ++} ++ ++static inline ulong ++falcon_dma_rx_q_offset(struct efhw_nic *nic, unsigned dmaq) ++{ ++ EFHW_ASSERT(dmaq < nic->num_dmaqs); ++ return RX_DESC_PTR_TBL_OFST + dmaq * FALCON_REGISTER128; ++} ++ ++static void ++falcon_dmaq_rx_q_init(struct efhw_nic *nic, ++ uint dmaq, uint evq_id, uint own_id, ++ uint tag, uint dmaq_size, uint buf_idx, uint flags) ++{ ++ FALCON_LOCK_DECL; ++ uint i, desc_type = 1; ++ uint64_t val1, val2, val3; ++ ulong offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ /* Q attributes */ ++#if BUG5762_WORKAROUND ++ int jumbo = 1; /* Queues must not have mixed types */ ++#else ++ int jumbo = ((flags & EFHW_VI_JUMBO_EN) != 0); ++#endif ++ int iscsi_hdig_en = ((flags & EFHW_VI_ISCSI_RX_HDIG_EN) != 0); ++ int iscsi_ddig_en = ((flags & EFHW_VI_ISCSI_RX_DDIG_EN) != 0); ++ ++ /* initialise the TX descriptor queue pointer table */ ++ offset = falcon_dma_rx_q_offset(nic, dmaq); ++ ++ /* size must be one of the various options, otherwise we assert */ ++ for (i = 0; i < N_DMAQ_SIZES; i++) { ++ if (dmaq_size == dmaq_sizes[i]) ++ break; ++ } ++ EFHW_ASSERT(i < N_DMAQ_SIZES); ++ ++ /* allow VI flag to override this queue's descriptor type */ ++ desc_type = (flags & EFHW_VI_RX_PHYS_ADDR_EN) ? 0 : 1; ++ ++ /* bug9403: It is dangerous to allow buffer-addressed queues to have ++ * owner_id=0 */ ++ EFHW_ASSERT((own_id > 0) || desc_type == 0); ++ ++ /* dword 1 */ ++ __DWCHCK(RX_DESCQ_EN_LBN, RX_DESCQ_EN_WIDTH); ++ __DWCHCK(RX_DESCQ_JUMBO_LBN, RX_DESCQ_JUMBO_WIDTH); ++ __DWCHCK(RX_DESCQ_TYPE_LBN, RX_DESCQ_TYPE_WIDTH); ++ __DWCHCK(RX_DESCQ_SIZE_LBN, RX_DESCQ_SIZE_WIDTH); ++ __DWCHCK(RX_DESCQ_LABEL_LBN, RX_DESCQ_LABEL_WIDTH); ++ __DWCHCK(RX_DESCQ_OWNER_ID_LBN, RX_DESCQ_OWNER_ID_WIDTH); ++ ++ __LWCHK(RX_DESCQ_EVQ_ID_LBN, RX_DESCQ_EVQ_ID_WIDTH); ++ ++ __RANGECHCK(1, RX_DESCQ_EN_WIDTH); ++ __RANGECHCK(jumbo, RX_DESCQ_JUMBO_WIDTH); ++ __RANGECHCK(desc_type, RX_DESCQ_TYPE_WIDTH); ++ __RANGECHCK(i, RX_DESCQ_SIZE_WIDTH); ++ __RANGECHCK(tag, RX_DESCQ_LABEL_WIDTH); ++ __RANGECHCK(own_id, RX_DESCQ_OWNER_ID_WIDTH); ++ __RANGECHCK(evq_id, RX_DESCQ_EVQ_ID_WIDTH); ++ ++ val1 = ((1 << RX_DESCQ_EN_LBN) | ++ (jumbo << RX_DESCQ_JUMBO_LBN) | ++ (desc_type << RX_DESCQ_TYPE_LBN) | ++ (i << RX_DESCQ_SIZE_LBN) | ++ (tag << RX_DESCQ_LABEL_LBN) | ++ (own_id << RX_DESCQ_OWNER_ID_LBN) | ++ (__LOW(evq_id, RX_DESCQ_EVQ_ID_LBN, RX_DESCQ_EVQ_ID_WIDTH))); ++ ++ /* dword 2 */ ++ __DW2CHCK(RX_DESCQ_BUF_BASE_ID_LBN, RX_DESCQ_BUF_BASE_ID_WIDTH); ++ __RANGECHCK(buf_idx, RX_DESCQ_BUF_BASE_ID_WIDTH); ++ ++ val2 = ((__HIGH(evq_id, RX_DESCQ_EVQ_ID_LBN, RX_DESCQ_EVQ_ID_WIDTH)) | ++ (buf_idx << __DW2(RX_DESCQ_BUF_BASE_ID_LBN))); ++ ++ /* dword 3 */ ++ __DW3CHCK(RX_ISCSI_HDIG_EN_LBN, RX_ISCSI_HDIG_EN_WIDTH); ++ __DW3CHCK(RX_ISCSI_DDIG_EN_LBN, RX_ISCSI_DDIG_EN_WIDTH); ++ __RANGECHCK(iscsi_hdig_en, RX_ISCSI_HDIG_EN_WIDTH); ++ __RANGECHCK(iscsi_ddig_en, RX_ISCSI_DDIG_EN_WIDTH); ++ ++ val3 = (iscsi_hdig_en << __DW3(RX_ISCSI_HDIG_EN_LBN)) | ++ (iscsi_ddig_en << __DW3(RX_ISCSI_DDIG_EN_LBN)); ++ ++ EFHW_TRACE("%s: rxq %x evq %u tag %x id %x buf %x %s " ++ "%x:%x:%x -> %" PRIx64 ":%" PRIx64 ":%" PRIx64, ++ __func__, ++ dmaq, evq_id, tag, own_id, buf_idx, ++ jumbo ? "jumbo" : "normal", dmaq_size, ++ iscsi_hdig_en, iscsi_ddig_en, val1, val2, val3); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ return; ++} ++ ++static void falcon_dmaq_tx_q_disable(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ uint64_t val1, val2, val3; ++ ulong offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ /* initialise the TX descriptor queue pointer table */ ++ ++ offset = falcon_dma_tx_q_offset(nic, dmaq); ++ ++ /* dword 1 */ ++ __DWCHCK(TX_DESCQ_TYPE_LBN, TX_DESCQ_TYPE_WIDTH); ++ ++ val1 = ((uint64_t) 1 << TX_DESCQ_TYPE_LBN); ++ ++ /* dword 2 */ ++ val2 = 0; ++ ++ /* dword 3 */ ++ val3 = (0 << __DW3(TX_DESCQ_EN_LBN)); /* queue enable bit */ ++ ++ EFHW_TRACE("%s: %x->%" PRIx64 ":%" PRIx64 ":%" PRIx64, ++ __func__, dmaq, val1, val2, val3); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ return; ++} ++ ++static void falcon_dmaq_rx_q_disable(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ uint64_t val1, val2, val3; ++ ulong offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ /* initialise the TX descriptor queue pointer table */ ++ offset = falcon_dma_rx_q_offset(nic, dmaq); ++ ++ /* dword 1 */ ++ __DWCHCK(RX_DESCQ_EN_LBN, RX_DESCQ_EN_WIDTH); ++ __DWCHCK(RX_DESCQ_TYPE_LBN, RX_DESCQ_TYPE_WIDTH); ++ ++ val1 = ((0 << RX_DESCQ_EN_LBN) | (1 << RX_DESCQ_TYPE_LBN)); ++ ++ /* dword 2 */ ++ val2 = 0; ++ ++ /* dword 3 */ ++ val3 = 0; ++ ++ EFHW_TRACE("falcon_dmaq_rx_q_disable: %x->%" ++ PRIx64 ":%" PRIx64 ":%" PRIx64, ++ dmaq, val1, val2, val3); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ return; ++} ++ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Buffer Table low-level register interface ++ * ++ *---------------------------------------------------------------------------*/ ++ ++/*! Convert a (potentially) 64-bit physical address to 32-bits. Every use ++** of this function is a place where we're not 64-bit clean. ++*/ ++static inline uint32_t dma_addr_to_u32(dma_addr_t addr) ++{ ++ /* Top bits had better be zero! */ ++ EFHW_ASSERT(addr == (addr & 0xffffffff)); ++ return (uint32_t) addr; ++} ++ ++static inline uint32_t ++falcon_nic_buffer_table_entry32_mk(dma_addr_t dma_addr, int own_id) ++{ ++ uint32_t dma_addr32 = FALCON_BUFFER_4K_PAGE(dma_addr_to_u32(dma_addr)); ++ ++ /* don't do this to me */ ++ EFHW_BUILD_ASSERT(BUF_ADR_HBUF_ODD_LBN == BUF_ADR_HBUF_EVEN_LBN + 32); ++ EFHW_BUILD_ASSERT(BUF_OWNER_ID_HBUF_ODD_LBN == ++ BUF_OWNER_ID_HBUF_EVEN_LBN + 32); ++ ++ EFHW_BUILD_ASSERT(BUF_OWNER_ID_HBUF_ODD_WIDTH == ++ BUF_OWNER_ID_HBUF_EVEN_WIDTH); ++ EFHW_BUILD_ASSERT(BUF_ADR_HBUF_ODD_WIDTH == BUF_ADR_HBUF_EVEN_WIDTH); ++ ++ __DWCHCK(BUF_ADR_HBUF_EVEN_LBN, BUF_ADR_HBUF_EVEN_WIDTH); ++ __DWCHCK(BUF_OWNER_ID_HBUF_EVEN_LBN, BUF_OWNER_ID_HBUF_EVEN_WIDTH); ++ ++ __RANGECHCK(dma_addr32, BUF_ADR_HBUF_EVEN_WIDTH); ++ __RANGECHCK(own_id, BUF_OWNER_ID_HBUF_EVEN_WIDTH); ++ ++ return (dma_addr32 << BUF_ADR_HBUF_EVEN_LBN) | ++ (own_id << BUF_OWNER_ID_HBUF_EVEN_LBN); ++} ++ ++static inline uint64_t ++falcon_nic_buffer_table_entry64_mk(dma_addr_t dma_addr, ++ int bufsz, /* bytes */ ++ int region, int own_id) ++{ ++ __DW2CHCK(IP_DAT_BUF_SIZE_LBN, IP_DAT_BUF_SIZE_WIDTH); ++ __DW2CHCK(BUF_ADR_REGION_LBN, BUF_ADR_REGION_WIDTH); ++ __LWCHK(BUF_ADR_FBUF_LBN, BUF_ADR_FBUF_WIDTH); ++ __DWCHCK(BUF_OWNER_ID_FBUF_LBN, BUF_OWNER_ID_FBUF_WIDTH); ++ ++ EFHW_ASSERT((bufsz == EFHW_4K) || (bufsz == EFHW_8K)); ++ ++ dma_addr = (dma_addr >> 12) & __FALCON_MASK64(BUF_ADR_FBUF_WIDTH); ++ ++ __RANGECHCK(dma_addr, BUF_ADR_FBUF_WIDTH); ++ __RANGECHCK(1, IP_DAT_BUF_SIZE_WIDTH); ++ __RANGECHCK(region, BUF_ADR_REGION_WIDTH); ++ __RANGECHCK(own_id, BUF_OWNER_ID_FBUF_WIDTH); ++ ++ return ((uint64_t) (bufsz == EFHW_8K) << IP_DAT_BUF_SIZE_LBN) | ++ ((uint64_t) region << BUF_ADR_REGION_LBN) | ++ ((uint64_t) dma_addr << BUF_ADR_FBUF_LBN) | ++ ((uint64_t) own_id << BUF_OWNER_ID_FBUF_LBN); ++} ++ ++static inline void ++_falcon_nic_buffer_table_set32(struct efhw_nic *nic, ++ dma_addr_t dma_addr, uint bufsz, ++ uint region, /* not used */ ++ int own_id, int buffer_id) ++{ ++ /* programming the half table needs to be done in pairs. */ ++ uint64_t entry, val, shift; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ volatile char __iomem *offset; ++ ++ EFHW_BUILD_ASSERT(BUF_ADR_HBUF_ODD_LBN == BUF_ADR_HBUF_EVEN_LBN + 32); ++ EFHW_BUILD_ASSERT(BUF_OWNER_ID_HBUF_ODD_LBN == ++ BUF_OWNER_ID_HBUF_EVEN_LBN + 32); ++ ++ shift = (buffer_id & 1) ? 32 : 0; ++ ++ offset = (efhw_kva + BUF_HALF_TBL_OFST + ++ ((buffer_id & ~1) * FALCON_BUFFER_TBL_HALF_BYTES)); ++ ++ entry = falcon_nic_buffer_table_entry32_mk(dma_addr_to_u32(dma_addr), ++ own_id); ++ ++#if FALCON_USE_SHADOW_BUFFER_TABLE ++ val = _falcon_buffer_table[buffer_id & ~1]; ++#else ++ /* This will not work unless we've completed ++ * the buffer table updates */ ++ falcon_read_q(offset, &val); ++#endif ++ val &= ~(((uint64_t) 0xffffffff) << shift); ++ val |= (entry << shift); ++ ++ EFHW_TRACE("%s[%x]: %lx:%x:%" PRIx64 "->%x = %" ++ PRIx64, __func__, buffer_id, (unsigned long) dma_addr, ++ own_id, entry, (unsigned)(offset - efhw_kva), val); ++ ++ /* Falcon requires that access to this register is serialised */ ++ falcon_write_q(offset, val); ++ ++ /* NB. No mmiowb(). Caller should do that e.g by calling commit */ ++ ++#if FALCON_USE_SHADOW_BUFFER_TABLE ++ _falcon_buffer_table[buffer_id & ~1] = val; ++#endif ++ ++ /* Confirm the entry if the event queues haven't been set up. */ ++ if (!nic->irq_handler) { ++ uint64_t new_val; ++ int count = 0; ++ while (1) { ++ mmiowb(); ++ falcon_read_q(offset, &new_val); ++ if (new_val == val) ++ break; ++ count++; ++ if (count > 1000) { ++ EFHW_WARN("%s: poll Timeout", __func__); ++ break; ++ } ++ udelay(1); ++ } ++ } ++} ++ ++static inline void ++_falcon_nic_buffer_table_set64(struct efhw_nic *nic, ++ dma_addr_t dma_addr, uint bufsz, ++ uint region, int own_id, int buffer_id) ++{ ++ volatile char __iomem *offset; ++ uint64_t entry; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ EFHW_ASSERT(region < FALCON_REGION_NUM); ++ ++ EFHW_ASSERT((bufsz == EFHW_4K) || ++ (bufsz == EFHW_8K && FALCON_BUFFER_TABLE_FULL_MODE)); ++ ++ offset = (efhw_kva + BUF_FULL_TBL_OFST + ++ (buffer_id * FALCON_BUFFER_TBL_FULL_BYTES)); ++ ++ entry = falcon_nic_buffer_table_entry64_mk(dma_addr, bufsz, region, ++ own_id); ++ ++ EFHW_TRACE("%s[%x]: %lx:bufsz=%x:region=%x:ownid=%x", ++ __func__, buffer_id, (unsigned long) dma_addr, bufsz, ++ region, own_id); ++ ++ EFHW_TRACE("%s: BUF[%x]:NIC[%x]->%" PRIx64, ++ __func__, buffer_id, ++ (unsigned int)(offset - efhw_kva), entry); ++ ++ /* Falcon requires that access to this register is serialised */ ++ falcon_write_q(offset, entry); ++ ++ /* NB. No mmiowb(). Caller should do that e.g by calling commit */ ++ ++ /* Confirm the entry if the event queues haven't been set up. */ ++ if (!nic->irq_handler) { ++ uint64_t new_entry; ++ int count = 0; ++ while (1) { ++ mmiowb(); ++ falcon_read_q(offset, &new_entry); ++ if (new_entry == entry) ++ return; ++ count++; ++ if (count > 1000) { ++ EFHW_WARN("%s: poll Timeout waiting for " ++ "value %"PRIx64 ++ " (last was %"PRIx64")", ++ __func__, entry, new_entry); ++ break; ++ } ++ udelay(1); ++ } ++ } ++} ++ ++#if FALCON_BUFFER_TABLE_FULL_MODE ++#define _falcon_nic_buffer_table_set _falcon_nic_buffer_table_set64 ++#else ++#define _falcon_nic_buffer_table_set _falcon_nic_buffer_table_set32 ++#endif ++ ++static inline void _falcon_nic_buffer_table_commit(struct efhw_nic *nic) ++{ ++ /* MUST be called holding the FALCON_LOCK */ ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ uint64_t cmd; ++ ++ EFHW_BUILD_ASSERT(BUF_TBL_UPD_REG_KER_OFST == BUF_TBL_UPD_REG_OFST); ++ ++ __DW2CHCK(BUF_UPD_CMD_LBN, BUF_UPD_CMD_WIDTH); ++ __RANGECHCK(1, BUF_UPD_CMD_WIDTH); ++ ++ cmd = ((uint64_t) 1 << BUF_UPD_CMD_LBN); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ falcon_write_qq(efhw_kva + BUF_TBL_UPD_REG_OFST, ++ cmd, FALCON_ATOMIC_UPD_REG); ++ mmiowb(); ++ ++ nic->buf_commit_outstanding++; ++ EFHW_TRACE("COMMIT REQ out=%d", nic->buf_commit_outstanding); ++} ++ ++static void falcon_nic_buffer_table_commit(struct efhw_nic *nic) ++{ ++ /* nothing to do */ ++} ++ ++static inline void ++_falcon_nic_buffer_table_clear(struct efhw_nic *nic, int buffer_id, int num) ++{ ++ uint64_t cmd; ++ uint64_t start_id = buffer_id; ++ uint64_t end_id = buffer_id + num - 1; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ volatile char __iomem *offset = (efhw_kva + BUF_TBL_UPD_REG_OFST); ++ ++ EFHW_BUILD_ASSERT(BUF_TBL_UPD_REG_KER_OFST == BUF_TBL_UPD_REG_OFST); ++ ++#if !FALCON_BUFFER_TABLE_FULL_MODE ++ /* buffer_ids in half buffer mode reference pairs of buffers */ ++ EFHW_ASSERT(buffer_id % 1 == 0); ++ EFHW_ASSERT(num % 1 == 0); ++ start_id = start_id >> 1; ++ end_id = end_id >> 1; ++#endif ++ ++ EFHW_ASSERT(num >= 1); ++ ++ __DWCHCK(BUF_CLR_START_ID_LBN, BUF_CLR_START_ID_WIDTH); ++ __DW2CHCK(BUF_CLR_END_ID_LBN, BUF_CLR_END_ID_WIDTH); ++ ++ __DW2CHCK(BUF_CLR_CMD_LBN, BUF_CLR_CMD_WIDTH); ++ __RANGECHCK(1, BUF_CLR_CMD_WIDTH); ++ ++ __RANGECHCK(start_id, BUF_CLR_START_ID_WIDTH); ++ __RANGECHCK(end_id, BUF_CLR_END_ID_WIDTH); ++ ++ cmd = (((uint64_t) 1 << BUF_CLR_CMD_LBN) | ++ (start_id << BUF_CLR_START_ID_LBN) | ++ (end_id << BUF_CLR_END_ID_LBN)); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ falcon_write_qq(offset, cmd, FALCON_ATOMIC_UPD_REG); ++ mmiowb(); ++ ++ nic->buf_commit_outstanding++; ++ EFHW_TRACE("COMMIT CLEAR out=%d", nic->buf_commit_outstanding); ++} ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Events low-level register interface ++ * ++ *---------------------------------------------------------------------------*/ ++ ++static unsigned eventq_sizes[] = { ++ 512, ++ EFHW_1K, ++ EFHW_2K, ++ EFHW_4K, ++ EFHW_8K, ++ EFHW_16K, ++ EFHW_32K ++}; ++ ++#define N_EVENTQ_SIZES (sizeof(eventq_sizes) / sizeof(eventq_sizes[0])) ++ ++static inline void falcon_nic_srm_upd_evq(struct efhw_nic *nic, int evq) ++{ ++ /* set up the eventq which will receive events from the SRAM module. ++ * i.e buffer table updates and clears, TX and RX aperture table ++ * updates */ ++ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ EFHW_BUILD_ASSERT(SRM_UPD_EVQ_REG_OFST == SRM_UPD_EVQ_REG_KER_OFST); ++ ++ __DWCHCK(SRM_UPD_EVQ_ID_LBN, SRM_UPD_EVQ_ID_WIDTH); ++ __RANGECHCK(evq, SRM_UPD_EVQ_ID_WIDTH); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + SRM_UPD_EVQ_REG_OFST, ++ ((uint64_t) evq << SRM_UPD_EVQ_ID_LBN), ++ FALCON_ATOMIC_SRPM_UDP_EVQ_REG); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++static void ++falcon_nic_evq_ptr_tbl(struct efhw_nic *nic, ++ uint evq, /* evq id */ ++ uint enable, /* 1 to enable, 0 to disable */ ++ uint buf_base_id,/* Buffer table base for EVQ */ ++ uint evq_size /* Number of events */) ++{ ++ FALCON_LOCK_DECL; ++ uint i, val; ++ ulong offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ /* size must be one of the various options, otherwise we assert */ ++ for (i = 0; i < N_EVENTQ_SIZES; i++) { ++ if (evq_size <= eventq_sizes[i]) ++ break; ++ } ++ EFHW_ASSERT(i < N_EVENTQ_SIZES); ++ ++ __DWCHCK(EVQ_BUF_BASE_ID_LBN, EVQ_BUF_BASE_ID_WIDTH); ++ __DWCHCK(EVQ_SIZE_LBN, EVQ_SIZE_WIDTH); ++ __DWCHCK(EVQ_EN_LBN, EVQ_EN_WIDTH); ++ ++ __RANGECHCK(i, EVQ_SIZE_WIDTH); ++ __RANGECHCK(buf_base_id, EVQ_BUF_BASE_ID_WIDTH); ++ __RANGECHCK(1, EVQ_EN_WIDTH); ++ ++ /* if !enable then only evq needs to be correct, although valid ++ * values need to be passed in for other arguments to prevent ++ * assertions */ ++ ++ val = ((i << EVQ_SIZE_LBN) | (buf_base_id << EVQ_BUF_BASE_ID_LBN) | ++ (enable ? (1 << EVQ_EN_LBN) : 0)); ++ ++ EFHW_ASSERT(evq < nic->num_evqs); ++ ++ offset = EVQ_PTR_TBL_CHAR_OFST; ++ offset += evq * FALCON_REGISTER128; ++ ++ EFHW_TRACE("%s: evq %u en=%x:buf=%x:size=%x->%x at %lx", ++ __func__, evq, enable, buf_base_id, evq_size, val, ++ offset); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, val, FALCON_ATOMIC_PTR_TBL_REG); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ ++ /* caller must wait for an update done event before writing any more ++ table entries */ ++ ++ return; ++} ++ ++void ++falcon_nic_evq_ack(struct efhw_nic *nic, ++ uint evq, /* evq id */ ++ uint rptr, /* new read pointer update */ ++ bool wakeup /* request a wakeup event if ptr's != */ ++ ) ++{ ++ uint val; ++ ulong offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ EFHW_BUILD_ASSERT(FALCON_EVQ_CHAR == 4); ++ ++ __DWCHCK(EVQ_RPTR_LBN, EVQ_RPTR_WIDTH); ++ __RANGECHCK(rptr, EVQ_RPTR_WIDTH); ++ ++ val = (rptr << EVQ_RPTR_LBN); ++ ++ EFHW_ASSERT(evq < nic->num_evqs); ++ ++ if (evq < FALCON_EVQ_CHAR) { ++ offset = EVQ_RPTR_REG_KER_OFST; ++ offset += evq * FALCON_REGISTER128; ++ ++ EFHW_ASSERT(!wakeup); /* don't try this at home */ ++ } else { ++ offset = EVQ_RPTR_REG_OFST + (FALCON_EVQ_CHAR * ++ FALCON_REGISTER128); ++ offset += (evq - FALCON_EVQ_CHAR) * FALCON_REGISTER128; ++ ++ /* nothing to do for interruptless event queues which do ++ * not want a wakeup */ ++ if (evq != FALCON_EVQ_CHAR && !wakeup) ++ return; ++ } ++ ++ EFHW_TRACE("%s: %x %x %x->%x", __func__, evq, rptr, wakeup, val); ++ ++ writel(val, efhw_kva + offset); ++ mmiowb(); ++} ++ ++/*---------------------------------------------------------------------------*/ ++ ++static inline void ++falcon_drv_ev(struct efhw_nic *nic, uint64_t data, uint qid) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ /* send an event from one driver to the other */ ++ EFHW_BUILD_ASSERT(DRV_EV_REG_KER_OFST == DRV_EV_REG_OFST); ++ EFHW_BUILD_ASSERT(DRV_EV_DATA_LBN == 0); ++ EFHW_BUILD_ASSERT(DRV_EV_DATA_WIDTH == 64); ++ EFHW_BUILD_ASSERT(DRV_EV_QID_LBN == 64); ++ EFHW_BUILD_ASSERT(DRV_EV_QID_WIDTH == 12); ++ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + DRV_EV_REG_OFST, data, qid); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++_DEBUG_SYM_ void ++falcon_ab_timer_tbl_set(struct efhw_nic *nic, ++ uint evq, /* timer id */ ++ uint mode, /* mode bits */ ++ uint countdown /* counting value to set */) ++{ ++ FALCON_LOCK_DECL; ++ uint val; ++ ulong offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ EFHW_BUILD_ASSERT(TIMER_VAL_LBN == 0); ++ ++ __DWCHCK(TIMER_MODE_LBN, TIMER_MODE_WIDTH); ++ __DWCHCK(TIMER_VAL_LBN, TIMER_VAL_WIDTH); ++ ++ __RANGECHCK(mode, TIMER_MODE_WIDTH); ++ __RANGECHCK(countdown, TIMER_VAL_WIDTH); ++ ++ val = ((mode << TIMER_MODE_LBN) | (countdown << TIMER_VAL_LBN)); ++ ++ if (evq < FALCON_EVQ_CHAR) { ++ offset = TIMER_CMD_REG_KER_OFST; ++ offset += evq * EFHW_8K; /* PAGE mapped register */ ++ } else { ++ offset = TIMER_TBL_OFST; ++ offset += evq * FALCON_REGISTER128; ++ } ++ EFHW_ASSERT(evq < nic->num_evqs); ++ ++ EFHW_TRACE("%s: evq %u mode %x (%s) time %x -> %08x", ++ __func__, evq, mode, ++ mode == 0 ? "DISABLE" : ++ mode == 1 ? "IMMED" : ++ mode == 2 ? (evq < 5 ? "HOLDOFF" : "RX_TRIG") : ++ "", countdown, val); ++ ++ /* Falcon requires 128 bit atomic access for this register when ++ * accessed from the driver. User access to timers is paged mapped ++ */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, val, FALCON_ATOMIC_TIMER_CMD_REG); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ return; ++} ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * Rate pacing - Low level interface ++ * ++ *--------------------------------------------------------------------*/ ++void falcon_nic_pace(struct efhw_nic *nic, uint dmaq, uint pace) ++{ ++ /* Pace specified in 2^(units of microseconds). This is the minimum ++ additional delay imposed over and above the IPG. ++ ++ Pacing only available on the virtual interfaces ++ */ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ulong offset; ++ ++ if (pace > 20) ++ pace = 20; /* maxm supported value */ ++ ++ __DWCHCK(TX_PACE_LBN, TX_PACE_WIDTH); ++ __RANGECHCK(pace, TX_PACE_WIDTH); ++ ++ switch (nic->devtype.variant) { ++ case 'A': ++ EFHW_ASSERT(dmaq >= TX_PACE_TBL_FIRST_QUEUE_A1); ++ offset = TX_PACE_TBL_A1_OFST; ++ offset += (dmaq - TX_PACE_TBL_FIRST_QUEUE_A1) * 16; ++ break; ++ case 'B': ++ /* Would be nice to assert this, but as dmaq is unsigned and ++ * TX_PACE_TBL_FIRST_QUEUE_B0 is 0, it makes no sense ++ * EFHW_ASSERT(dmaq >= TX_PACE_TBL_FIRST_QUEUE_B0); ++ */ ++ offset = TX_PACE_TBL_B0_OFST; ++ offset += (dmaq - TX_PACE_TBL_FIRST_QUEUE_B0) * 16; ++ break; ++ default: ++ EFHW_ASSERT(0); ++ offset = 0; ++ break; ++ } ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, pace, FALCON_ATOMIC_PACE_REG); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ ++ EFHW_TRACE("%s: txq %d offset=%lx pace=2^%x", ++ __func__, dmaq, offset, pace); ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * Interrupt - Low level interface ++ * ++ *--------------------------------------------------------------------*/ ++ ++static void falcon_nic_handle_fatal_int(struct efhw_nic *nic) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ uint64_t val; ++ ++ offset = (efhw_kva + FATAL_INTR_REG_OFST); ++ ++ /* Falcon requires 32 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ val = readl(offset); ++ FALCON_LOCK_UNLOCK(nic); ++ ++ /* ?? BUG3249 - need to disable illegal address interrupt */ ++ /* ?? BUG3114 - need to backport interrupt storm protection code */ ++ EFHW_ERR("fatal interrupt: %s%s%s%s%s%s%s%s%s%s%s%s[%" PRIx64 "]", ++ val & (1 << PCI_BUSERR_INT_CHAR_LBN) ? "PCI-bus-error " : "", ++ val & (1 << SRAM_OOB_INT_CHAR_LBN) ? "SRAM-oob " : "", ++ val & (1 << BUFID_OOB_INT_CHAR_LBN) ? "bufid-oob " : "", ++ val & (1 << MEM_PERR_INT_CHAR_LBN) ? "int-parity " : "", ++ val & (1 << RBUF_OWN_INT_CHAR_LBN) ? "rx-bufid-own " : "", ++ val & (1 << TBUF_OWN_INT_CHAR_LBN) ? "tx-bufid-own " : "", ++ val & (1 << RDESCQ_OWN_INT_CHAR_LBN) ? "rx-desc-own " : "", ++ val & (1 << TDESCQ_OWN_INT_CHAR_LBN) ? "tx-desc-own " : "", ++ val & (1 << EVQ_OWN_INT_CHAR_LBN) ? "evq-own " : "", ++ val & (1 << EVFF_OFLO_INT_CHAR_LBN) ? "evq-fifo " : "", ++ val & (1 << ILL_ADR_INT_CHAR_LBN) ? "ill-addr " : "", ++ val & (1 << SRM_PERR_INT_CHAR_LBN) ? "sram-parity " : "", val); ++} ++ ++static void falcon_nic_interrupt_hw_enable(struct efhw_nic *nic) ++{ ++ FALCON_LOCK_DECL; ++ uint val; ++ volatile char __iomem *offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ EFHW_BUILD_ASSERT(DRV_INT_EN_CHAR_WIDTH == 1); ++ ++ if (nic->flags & NIC_FLAG_NO_INTERRUPT) ++ return; ++ ++ offset = (efhw_kva + INT_EN_REG_CHAR_OFST); ++ val = 1 << DRV_INT_EN_CHAR_LBN; ++ ++ EFHW_NOTICE("%s: %x -> %x", __func__, (int)(offset - efhw_kva), ++ val); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(offset, val, FALCON_ATOMIC_INT_EN_REG); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++static void falcon_nic_interrupt_hw_disable(struct efhw_nic *nic) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ EFHW_BUILD_ASSERT(SRAM_PERR_INT_KER_WIDTH == 1); ++ EFHW_BUILD_ASSERT(DRV_INT_EN_KER_LBN == 0); ++ EFHW_BUILD_ASSERT(SRAM_PERR_INT_CHAR_WIDTH == 1); ++ EFHW_BUILD_ASSERT(DRV_INT_EN_CHAR_LBN == 0); ++ EFHW_BUILD_ASSERT(SRAM_PERR_INT_KER_LBN == SRAM_PERR_INT_CHAR_LBN); ++ EFHW_BUILD_ASSERT(DRV_INT_EN_KER_LBN == DRV_INT_EN_CHAR_LBN); ++ ++ if (nic->flags & NIC_FLAG_NO_INTERRUPT) ++ return; ++ ++ offset = (efhw_kva + INT_EN_REG_CHAR_OFST); ++ ++ EFHW_NOTICE("%s: %x -> 0", __func__, (int)(offset - efhw_kva)); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(offset, 0, FALCON_ATOMIC_INT_EN_REG); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++static void falcon_nic_irq_addr_set(struct efhw_nic *nic, dma_addr_t dma_addr) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *offset; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ++ offset = (efhw_kva + INT_ADR_REG_CHAR_OFST); ++ ++ EFHW_NOTICE("%s: %x -> " DMA_ADDR_T_FMT, __func__, ++ (int)(offset - efhw_kva), dma_addr); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(offset, dma_addr, FALCON_ATOMIC_INT_ADR_REG); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * RXDP - low level interface ++ * ++ *--------------------------------------------------------------------*/ ++ ++void ++falcon_nic_set_rx_usr_buf_size(struct efhw_nic *nic, int usr_buf_bytes) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ uint64_t val, val2, usr_buf_size = usr_buf_bytes / 32; ++ int rubs_lbn, rubs_width, roec_lbn; ++ ++ EFHW_BUILD_ASSERT(RX_CFG_REG_OFST == RX_CFG_REG_KER_OFST); ++ ++ switch (nic->devtype.variant) { ++ default: ++ EFHW_ASSERT(0); ++ /* Fall-through to avoid compiler warnings. */ ++ case 'A': ++ rubs_lbn = RX_USR_BUF_SIZE_A1_LBN; ++ rubs_width = RX_USR_BUF_SIZE_A1_WIDTH; ++ roec_lbn = RX_OWNERR_CTL_A1_LBN; ++ break; ++ case 'B': ++ rubs_lbn = RX_USR_BUF_SIZE_B0_LBN; ++ rubs_width = RX_USR_BUF_SIZE_B0_WIDTH; ++ roec_lbn = RX_OWNERR_CTL_B0_LBN; ++ break; ++ } ++ ++ __DWCHCK(rubs_lbn, rubs_width); ++ __QWCHCK(roec_lbn, 1); ++ __RANGECHCK(usr_buf_size, rubs_width); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_qq(efhw_kva + RX_CFG_REG_OFST, &val, &val2); ++ ++ val &= ~((__FALCON_MASK64(rubs_width)) << rubs_lbn); ++ val |= (usr_buf_size << rubs_lbn); ++ ++ /* shouldn't be needed for a production driver */ ++ val |= ((uint64_t) 1 << roec_lbn); ++ ++ falcon_write_qq(efhw_kva + RX_CFG_REG_OFST, val, val2); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++EXPORT_SYMBOL(falcon_nic_set_rx_usr_buf_size); ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * TXDP - low level interface ++ * ++ *--------------------------------------------------------------------*/ ++ ++_DEBUG_SYM_ void falcon_nic_tx_cfg(struct efhw_nic *nic, int unlocked) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ uint64_t val1, val2; ++ ++ EFHW_BUILD_ASSERT(TX_CFG_REG_OFST == TX_CFG_REG_KER_OFST); ++ __DWCHCK(TX_OWNERR_CTL_LBN, TX_OWNERR_CTL_WIDTH); ++ __DWCHCK(TX_NON_IP_DROP_DIS_LBN, TX_NON_IP_DROP_DIS_WIDTH); ++ ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_qq(efhw_kva + TX_CFG_REG_OFST, &val1, &val2); ++ ++ /* Will flag fatal interrupts on owner id errors. This should not be ++ on for production code because there is otherwise a denial of ++ serivce attack possible */ ++ val1 |= (1 << TX_OWNERR_CTL_LBN); ++ ++ /* Setup user queue TCP/UDP only packet security */ ++ if (unlocked) ++ val1 |= (1 << TX_NON_IP_DROP_DIS_LBN); ++ else ++ val1 &= ~(1 << TX_NON_IP_DROP_DIS_LBN); ++ ++ falcon_write_qq(efhw_kva + TX_CFG_REG_OFST, val1, val2); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * Random thresholds - Low level interface (Would like these to be op ++ * defaults wherever possible) ++ * ++ *--------------------------------------------------------------------*/ ++ ++void falcon_nic_pace_cfg(struct efhw_nic *nic, int fb_base, int bin_thresh) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ unsigned offset = 0; ++ uint64_t val; ++ ++ __DWCHCK(TX_PACE_FB_BASE_LBN, TX_PACE_FB_BASE_WIDTH); ++ __DWCHCK(TX_PACE_BIN_TH_LBN, TX_PACE_BIN_TH_WIDTH); ++ ++ switch (nic->devtype.variant) { ++ case 'A': offset = TX_PACE_REG_A1_OFST; break; ++ case 'B': offset = TX_PACE_REG_B0_OFST; break; ++ default: EFHW_ASSERT(0); break; ++ } ++ ++ val = (0x15 << TX_PACE_SB_NOTAF_LBN); ++ val |= (0xb << TX_PACE_SB_AF_LBN); ++ ++ val |= ((fb_base & __FALCON_MASK64(TX_PACE_FB_BASE_WIDTH)) << ++ TX_PACE_FB_BASE_LBN); ++ val |= ((bin_thresh & __FALCON_MASK64(TX_PACE_BIN_TH_WIDTH)) << ++ TX_PACE_BIN_TH_LBN); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + offset, val, 0); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++ ++/********************************************************************** ++ * Implementation of the HAL. ******************************************** ++ **********************************************************************/ ++ ++/*---------------------------------------------------------------------------- ++ * ++ * Initialisation and configuration discovery ++ * ++ *---------------------------------------------------------------------------*/ ++ ++static int falcon_nic_init_irq_channel(struct efhw_nic *nic, int enable) ++{ ++ /* create a buffer for the irq channel */ ++ int rc; ++ ++ if (enable) { ++ rc = efhw_iopage_alloc(nic, &nic->irq_iobuff); ++ if (rc < 0) ++ return rc; ++ ++ falcon_nic_irq_addr_set(nic, ++ efhw_iopage_dma_addr(&nic->irq_iobuff)); ++ } else { ++ if (efhw_iopage_is_valid(&nic->irq_iobuff)) ++ efhw_iopage_free(nic, &nic->irq_iobuff); ++ ++ efhw_iopage_mark_invalid(&nic->irq_iobuff); ++ falcon_nic_irq_addr_set(nic, 0); ++ } ++ ++ EFHW_TRACE("%s: %lx %sable", __func__, ++ (unsigned long) efhw_iopage_dma_addr(&nic->irq_iobuff), ++ enable ? "en" : "dis"); ++ ++ return 0; ++} ++ ++static void falcon_nic_close_hardware(struct efhw_nic *nic) ++{ ++ /* check we are in possession of some hardware */ ++ if (!efhw_nic_have_hw(nic)) ++ return; ++ ++ falcon_nic_init_irq_channel(nic, 0); ++ falcon_nic_filter_dtor(nic); ++ ++ EFHW_NOTICE("%s:", __func__); ++} ++ ++static int ++falcon_nic_init_hardware(struct efhw_nic *nic, ++ struct efhw_ev_handler *ev_handlers, ++ const uint8_t *mac_addr, int non_irq_evq) ++{ ++ int rc; ++ ++ /* header sanity checks */ ++ FALCON_ASSERT_VALID(); ++ ++ /* Initialise supporting modules */ ++ rc = falcon_nic_filter_ctor(nic); ++ if (rc < 0) ++ return rc; ++ ++#if FALCON_USE_SHADOW_BUFFER_TABLE ++ CI_ZERO_ARRAY(_falcon_buffer_table, FALCON_BUFFER_TBL_NUM); ++#endif ++ ++ /* Initialise the top level hardware blocks */ ++ memcpy(nic->mac_addr, mac_addr, ETH_ALEN); ++ ++ EFHW_TRACE("%s:", __func__); ++ ++ /* nic.c:efhw_nic_init marks all the interrupt units as unused. ++ ++ ?? TODO we should be able to request the non-interrupting event ++ queue and the net driver's (for a net driver that is using libefhw) ++ additional RSS queues here. ++ ++ Result would be that that net driver could call ++ nic.c:efhw_nic_allocate_common_hardware_resources() and that the ++ IFDEF FALCON's can be removed from ++ nic.c:efhw_nic_allocate_common_hardware_resources() ++ */ ++ nic->irq_unit = INT_EN_REG_CHAR_OFST; ++ ++ /***************************************************************** ++ * The rest of this function deals with initialization of the NICs ++ * hardware (as opposed to the initialization of the ++ * struct efhw_nic data structure */ ++ ++ /* char driver grabs SRM events onto the non interrupting ++ * event queue */ ++ falcon_nic_srm_upd_evq(nic, non_irq_evq); ++ ++ /* RXDP tweaks */ ++ ++ /* ?? bug2396 rx_cfg should be ok so long as the net driver ++ * always pushes buffers big enough for the link MTU */ ++ ++ /* set the RX buffer cutoff size to be the same as PAGE_SIZE. ++ * Use this value when we think that there will be a lot of ++ * jumbo frames. ++ * ++ * The default value 1600 is useful when packets are small, ++ * but would means that jumbo frame RX queues would need more ++ * descriptors pushing */ ++ falcon_nic_set_rx_usr_buf_size(nic, FALCON_RX_USR_BUF_SIZE); ++ ++ /* TXDP tweaks */ ++ /* ?? bug2396 looks ok */ ++ falcon_nic_tx_cfg(nic, /*unlocked(for non-UDP/TCP)= */ 0); ++ falcon_nic_pace_cfg(nic, 4, 2); ++ ++ /* ?? bug2396 ++ * netdriver must load first or else must RMW this register */ ++ falcon_nic_rx_filter_ctl_set(nic, RX_FILTER_CTL_SRCH_LIMIT_TCP_FULL, ++ RX_FILTER_CTL_SRCH_LIMIT_TCP_WILD, ++ RX_FILTER_CTL_SRCH_LIMIT_UDP_FULL, ++ RX_FILTER_CTL_SRCH_LIMIT_UDP_WILD); ++ ++ if (!(nic->flags & NIC_FLAG_NO_INTERRUPT)) { ++ rc = efhw_keventq_ctor(nic, FALCON_EVQ_CHAR, ++ &nic->interrupting_evq, ev_handlers); ++ if (rc < 0) { ++ EFHW_ERR("%s: efhw_keventq_ctor() failed (%d) evq=%d", ++ __func__, rc, FALCON_EVQ_CHAR); ++ return rc; ++ } ++ } ++ rc = efhw_keventq_ctor(nic, non_irq_evq, ++ &nic->non_interrupting_evq, NULL); ++ if (rc < 0) { ++ EFHW_ERR("%s: efhw_keventq_ctor() failed (%d) evq=%d", ++ __func__, rc, non_irq_evq); ++ return rc; ++ } ++ ++ /* allocate IRQ channel */ ++ rc = falcon_nic_init_irq_channel(nic, 1); ++ /* ignore failure at user-level for eftest */ ++ if ((rc < 0) && !(nic->options & NIC_OPT_EFTEST)) ++ return rc; ++ ++ return 0; ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * Interrupt ++ * ++ *--------------------------------------------------------------------*/ ++ ++static void ++falcon_nic_interrupt_enable(struct efhw_nic *nic) ++{ ++ struct efhw_keventq *q; ++ unsigned rdptr; ++ ++ if (nic->flags & NIC_FLAG_NO_INTERRUPT) ++ return; ++ ++ /* Enable driver interrupts */ ++ EFHW_NOTICE("%s: enable master interrupt", __func__); ++ falcon_nic_interrupt_hw_enable(nic); ++ ++ /* An interrupting eventq must start of day ack its read pointer */ ++ q = &nic->interrupting_evq; ++ rdptr = EFHW_EVENT_OFFSET(q, q, 1) / sizeof(efhw_event_t); ++ falcon_nic_evq_ack(nic, FALCON_EVQ_CHAR, rdptr, false); ++ EFHW_NOTICE("%s: ACK evq[%d]:%x", __func__, ++ FALCON_EVQ_CHAR, rdptr); ++} ++ ++static void falcon_nic_interrupt_disable(struct efhw_nic *nic) ++{ ++ /* NB. No need to check for NIC_FLAG_NO_INTERRUPT, as ++ ** falcon_nic_interrupt_hw_disable() will do it. */ ++ falcon_nic_interrupt_hw_disable(nic); ++} ++ ++static void ++falcon_nic_set_interrupt_moderation(struct efhw_nic *nic, int evq, ++ uint32_t val) ++{ ++ if (evq < 0) ++ evq = FALCON_EVQ_CHAR; ++ ++ falcon_ab_timer_tbl_set(nic, evq, TIMER_MODE_INT_HLDOFF, val / 5); ++} ++ ++static inline void legacy_irq_ack(struct efhw_nic *nic) ++{ ++ EFHW_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); ++ ++ if (!(nic->flags & NIC_FLAG_MSI)) { ++ writel(1, EFHW_KVA(nic) + INT_ACK_REG_CHAR_A1_OFST); ++ mmiowb(); ++ /* ?? FIXME: We should be doing a read here to ensure IRQ is ++ * thoroughly acked before we return from ISR. */ ++ } ++} ++ ++static int falcon_nic_interrupt(struct efhw_nic *nic) ++{ ++ uint32_t *syserr_ptr = ++ (uint32_t *) efhw_iopage_ptr(&nic->irq_iobuff); ++ int handled = 0; ++ int done_ack = 0; ++ ++ EFHW_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); ++ EFHW_ASSERT(syserr_ptr); ++ ++ /* FIFO fill level interrupt - just log it. */ ++ if (unlikely(*(syserr_ptr + (DW0_OFST / 4)))) { ++ EFHW_WARN("%s: *** FIFO *** %x", __func__, ++ *(syserr_ptr + (DW0_OFST / 4))); ++ *(syserr_ptr + (DW0_OFST / 4)) = 0; ++ handled++; ++ } ++ ++ /* Fatal interrupts. */ ++ if (unlikely(*(syserr_ptr + (DW2_OFST / 4)))) { ++ *(syserr_ptr + (DW2_OFST / 4)) = 0; ++ falcon_nic_handle_fatal_int(nic); ++ handled++; ++ } ++ ++ /* Event queue interrupt. For legacy interrupts we have to check ++ * that the interrupt is for us, because it could be shared. */ ++ if (*(syserr_ptr + (DW1_OFST / 4))) { ++ *(syserr_ptr + (DW1_OFST / 4)) = 0; ++ /* ACK must come before callback to handler fn. */ ++ legacy_irq_ack(nic); ++ done_ack = 1; ++ handled++; ++ if (nic->irq_handler) ++ nic->irq_handler(nic, 0); ++ } ++ ++ if (unlikely(!done_ack)) { ++ if (!handled) ++ /* Shared interrupt line (hopefully). */ ++ return 0; ++ legacy_irq_ack(nic); ++ } ++ ++ EFHW_TRACE("%s: handled %d", __func__, handled); ++ return 1; ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * Event Management - and SW event posting ++ * ++ *--------------------------------------------------------------------*/ ++ ++static void ++falcon_nic_event_queue_enable(struct efhw_nic *nic, uint evq, uint evq_size, ++ dma_addr_t q_base_addr, /* not used */ ++ uint buf_base_id, int interrupting) ++{ ++ EFHW_ASSERT(nic); ++ ++ /* Whether or not queue has an interrupt depends on ++ * instance number and h/w variant, so [interrupting] is ++ * ignored. ++ */ ++ falcon_ab_timer_tbl_set(nic, evq, 0/*disable*/, 0); ++ ++ falcon_nic_evq_ptr_tbl(nic, evq, 1, buf_base_id, evq_size); ++ EFHW_TRACE("%s: enable evq %u size %u", __func__, evq, evq_size); ++} ++ ++static void ++falcon_nic_event_queue_disable(struct efhw_nic *nic, uint evq, int timer_only) ++{ ++ EFHW_ASSERT(nic); ++ ++ falcon_ab_timer_tbl_set(nic, evq, 0 /* disable */ , 0); ++ ++ if (!timer_only) ++ falcon_nic_evq_ptr_tbl(nic, evq, 0, 0, 0); ++ EFHW_TRACE("%s: disenable evq %u", __func__, evq); ++} ++ ++static void ++falcon_nic_wakeup_request(struct efhw_nic *nic, dma_addr_t q_base_addr, ++ int next_i, int evq) ++{ ++ EFHW_ASSERT(evq > FALCON_EVQ_CHAR); ++ falcon_nic_evq_ack(nic, evq, next_i, true); ++ EFHW_TRACE("%s: evq %d next_i %d", __func__, evq, next_i); ++} ++ ++static void falcon_nic_sw_event(struct efhw_nic *nic, int data, int evq) ++{ ++ uint64_t ev_data = data; ++ ++ ev_data &= ~FALCON_EVENT_CODE_MASK; ++ ev_data |= FALCON_EVENT_CODE_SW; ++ ++ falcon_drv_ev(nic, ev_data, evq); ++ EFHW_NOTICE("%s: evq[%d]->%x", __func__, evq, data); ++} ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * Buffer table - helpers ++ * ++ *--------------------------------------------------------------------*/ ++ ++#define FALCON_LAZY_COMMIT_HWM (FALCON_BUFFER_UPD_MAX - 16) ++ ++/* Note re.: ++ * falcon_nic_buffer_table_lazy_commit(struct efhw_nic *nic) ++ * falcon_nic_buffer_table_update_poll(struct efhw_nic *nic) ++ * falcon_nic_buffer_table_confirm(struct efhw_nic *nic) ++ * -- these are no-ops in the user-level driver because it would need to ++ * coordinate with the real driver on the number of outstanding commits. ++ * ++ * An exception is made for eftest apps, which manage the hardware without ++ * using the char driver. ++ */ ++ ++static inline void falcon_nic_buffer_table_lazy_commit(struct efhw_nic *nic) ++{ ++ /* Do nothing if operating in synchronous mode. */ ++ if (!nic->irq_handler) ++ return; ++} ++ ++static inline void falcon_nic_buffer_table_update_poll(struct efhw_nic *nic) ++{ ++ FALCON_LOCK_DECL; ++ int count = 0, rc = 0; ++ ++ /* We can be called here early days */ ++ if (!nic->irq_handler) ++ return; ++ ++ /* If we need to gather buffer update events then poll the ++ non-interrupting event queue */ ++ ++ /* For each _buffer_table_commit there will be an update done ++ event. We don't keep track of how many buffers each commit has ++ committed, just make sure that all the expected events have been ++ gathered */ ++ FALCON_LOCK_LOCK(nic); ++ ++ EFHW_TRACE("%s: %d", __func__, nic->buf_commit_outstanding); ++ ++ while (nic->buf_commit_outstanding > 0) { ++ /* we're not expecting to handle any events that require ++ * upcalls into the core driver */ ++ struct efhw_ev_handler handler; ++ memset(&handler, 0, sizeof(handler)); ++ nic->non_interrupting_evq.ev_handlers = &handler; ++ rc = efhw_keventq_poll(nic, &nic->non_interrupting_evq); ++ nic->non_interrupting_evq.ev_handlers = NULL; ++ ++ if (rc < 0) { ++ EFHW_ERR("%s: poll ERROR (%d:%d) ***** ", ++ __func__, rc, ++ nic->buf_commit_outstanding); ++ goto out; ++ } ++ ++ FALCON_LOCK_UNLOCK(nic); ++ ++ if (count++) ++ udelay(1); ++ ++ if (count > 1000) { ++ EFHW_WARN("%s: poll Timeout ***** (%d)", __func__, ++ nic->buf_commit_outstanding); ++ nic->buf_commit_outstanding = 0; ++ return; ++ } ++ FALCON_LOCK_LOCK(nic); ++ } ++ ++out: ++ FALCON_LOCK_UNLOCK(nic); ++ return; ++} ++ ++void falcon_nic_buffer_table_confirm(struct efhw_nic *nic) ++{ ++ /* confirm buffer table updates - should be used for items where ++ loss of data would be unacceptable. E.g for the buffers that back ++ an event or DMA queue */ ++ FALCON_LOCK_DECL; ++ ++ /* Do nothing if operating in synchronous mode. */ ++ if (!nic->irq_handler) ++ return; ++ ++ FALCON_LOCK_LOCK(nic); ++ ++ _falcon_nic_buffer_table_commit(nic); ++ ++ FALCON_LOCK_UNLOCK(nic); ++ ++ falcon_nic_buffer_table_update_poll(nic); ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * Buffer table - API ++ * ++ *--------------------------------------------------------------------*/ ++ ++static void ++falcon_nic_buffer_table_clear(struct efhw_nic *nic, int buffer_id, int num) ++{ ++ FALCON_LOCK_DECL; ++ FALCON_LOCK_LOCK(nic); ++ _falcon_nic_buffer_table_clear(nic, buffer_id, num); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++static void ++falcon_nic_buffer_table_set(struct efhw_nic *nic, dma_addr_t dma_addr, ++ uint bufsz, uint region, ++ int own_id, int buffer_id) ++{ ++ FALCON_LOCK_DECL; ++ ++ EFHW_ASSERT(region < FALCON_REGION_NUM); ++ ++ EFHW_ASSERT((bufsz == EFHW_4K) || ++ (bufsz == EFHW_8K && FALCON_BUFFER_TABLE_FULL_MODE)); ++ ++ falcon_nic_buffer_table_update_poll(nic); ++ ++ FALCON_LOCK_LOCK(nic); ++ ++ _falcon_nic_buffer_table_set(nic, dma_addr, bufsz, region, own_id, ++ buffer_id); ++ ++ falcon_nic_buffer_table_lazy_commit(nic); ++ ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++void ++falcon_nic_buffer_table_set_n(struct efhw_nic *nic, int buffer_id, ++ dma_addr_t dma_addr, uint bufsz, uint region, ++ int n_pages, int own_id) ++{ ++ /* used to set up a contiguous range of buffers */ ++ FALCON_LOCK_DECL; ++ ++ EFHW_ASSERT(region < FALCON_REGION_NUM); ++ ++ EFHW_ASSERT((bufsz == EFHW_4K) || ++ (bufsz == EFHW_8K && FALCON_BUFFER_TABLE_FULL_MODE)); ++ ++ while (n_pages--) { ++ ++ falcon_nic_buffer_table_update_poll(nic); ++ ++ FALCON_LOCK_LOCK(nic); ++ ++ _falcon_nic_buffer_table_set(nic, dma_addr, bufsz, region, ++ own_id, buffer_id++); ++ ++ falcon_nic_buffer_table_lazy_commit(nic); ++ ++ FALCON_LOCK_UNLOCK(nic); ++ ++ dma_addr += bufsz; ++ } ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * DMA Queues - mid level API ++ * ++ *--------------------------------------------------------------------*/ ++ ++#if BUG5302_WORKAROUND ++ ++/* Tx queues can get stuck if the software write pointer is set to an index ++ * beyond the configured size of the queue, such that they will not flush. ++ * This code can be run before attempting a flush; it will detect the bogus ++ * value and reset it. This fixes most instances of this problem, although ++ * sometimes it does not work, or we may not detect it in the first place, ++ * if the out-of-range value was replaced by an in-range value earlier. ++ * (In those cases we have to apply a bigger hammer later, if we see that ++ * the queue is still not flushing.) ++ */ ++static void ++falcon_check_for_bogus_tx_dma_wptr(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ uint64_t val_low64, val_high64; ++ uint64_t size, hwptr, swptr, val; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ulong offset = falcon_dma_tx_q_offset(nic, dmaq); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_qq(efhw_kva + offset, &val_low64, &val_high64); ++ FALCON_LOCK_UNLOCK(nic); ++ ++ size = (val_low64 >> TX_DESCQ_SIZE_LBN) ++ & __FALCON_MASK64(TX_DESCQ_SIZE_WIDTH); ++ size = (1 << size) * 512; ++ hwptr = (val_high64 >> __DW3(TX_DESCQ_HW_RPTR_LBN)) ++ & __FALCON_MASK64(TX_DESCQ_HW_RPTR_WIDTH); ++ swptr = (val_low64 >> TX_DESCQ_SW_WPTR_LBN) ++ & __FALCON_MASK64(__LW2(TX_DESCQ_SW_WPTR_LBN)); ++ val = (val_high64) ++ & ++ __FALCON_MASK64(__DW3 ++ (TX_DESCQ_SW_WPTR_LBN + TX_DESCQ_SW_WPTR_WIDTH)); ++ val = val << __LW2(TX_DESCQ_SW_WPTR_LBN); ++ swptr = swptr | val; ++ ++ if (swptr >= size) { ++ EFHW_WARN("Resetting bad write pointer for TXQ[%d]", dmaq); ++ writel((uint32_t) ((hwptr + 0) & (size - 1)), ++ efhw_kva + falcon_tx_dma_page_addr(dmaq) + 12); ++ mmiowb(); ++ } ++} ++ ++/* Here's that "bigger hammer": we reset all the pointers (hardware read, ++ * hardware descriptor cache read, software write) to zero. ++ */ ++void falcon_clobber_tx_dma_ptrs(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ uint64_t val_low64, val_high64; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ulong offset = falcon_dma_tx_q_offset(nic, dmaq); ++ ++ EFHW_WARN("Recovering stuck TXQ[%d]", dmaq); ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_qq(efhw_kva + offset, &val_low64, &val_high64); ++ val_high64 &= ~(__FALCON_MASK64(TX_DESCQ_HW_RPTR_WIDTH) ++ << __DW3(TX_DESCQ_HW_RPTR_LBN)); ++ val_high64 &= ~(__FALCON_MASK64(TX_DC_HW_RPTR_WIDTH) ++ << __DW3(TX_DC_HW_RPTR_LBN)); ++ falcon_write_qq(efhw_kva + offset, val_low64, val_high64); ++ mmiowb(); ++ writel(0, efhw_kva + falcon_tx_dma_page_addr(dmaq) + 12); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++#endif ++ ++static inline int ++__falcon_really_flush_tx_dma_channel(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ uint val; ++ ++ EFHW_BUILD_ASSERT(TX_FLUSH_DESCQ_REG_KER_OFST == ++ TX_FLUSH_DESCQ_REG_OFST); ++ ++ __DWCHCK(TX_FLUSH_DESCQ_CMD_LBN, TX_FLUSH_DESCQ_CMD_WIDTH); ++ __DWCHCK(TX_FLUSH_DESCQ_LBN, TX_FLUSH_DESCQ_WIDTH); ++ __RANGECHCK(dmaq, TX_FLUSH_DESCQ_WIDTH); ++ ++ val = ((1 << TX_FLUSH_DESCQ_CMD_LBN) | (dmaq << TX_FLUSH_DESCQ_LBN)); ++ ++ EFHW_TRACE("TX DMA flush[%d]", dmaq); ++ ++#if BUG5302_WORKAROUND ++ falcon_check_for_bogus_tx_dma_wptr(nic, dmaq); ++#endif ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + TX_FLUSH_DESCQ_REG_OFST, ++ val, FALCON_ATOMIC_TX_FLUSH_DESCQ); ++ ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ return 0; ++} ++ ++static inline int ++__falcon_is_tx_dma_channel_flushed(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ uint64_t val_low64, val_high64; ++ uint64_t enable, flush_pending; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ulong offset = falcon_dma_tx_q_offset(nic, dmaq); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_qq(efhw_kva + offset, &val_low64, &val_high64); ++ FALCON_LOCK_UNLOCK(nic); ++ ++ /* should see one of three values for these 2 bits ++ * 1, queue enabled no flush pending ++ * - i.e. first flush request ++ * 2, queue enabled, flush pending ++ * - i.e. request to reflush before flush finished ++ * 3, queue disabled (no flush pending) ++ * - flush complete ++ */ ++ __DWCHCK(TX_DESCQ_FLUSH_LBN, TX_DESCQ_FLUSH_WIDTH); ++ __DW3CHCK(TX_DESCQ_EN_LBN, TX_DESCQ_EN_WIDTH); ++ enable = val_high64 & (1 << __DW3(TX_DESCQ_EN_LBN)); ++ flush_pending = val_low64 & (1 << TX_DESCQ_FLUSH_LBN); ++ ++ if (enable && !flush_pending) ++ return 0; ++ ++ EFHW_TRACE("%d, %s: %s, %sflush pending", dmaq, __func__, ++ enable ? "enabled" : "disabled", ++ flush_pending ? "" : "NO "); ++ /* still in progress */ ++ if (enable && flush_pending) ++ return -EALREADY; ++ ++ return -EAGAIN; ++} ++ ++static int falcon_flush_tx_dma_channel(struct efhw_nic *nic, uint dmaq) ++{ ++ int rc; ++ rc = __falcon_is_tx_dma_channel_flushed(nic, dmaq); ++ if (rc < 0) { ++ EFHW_WARN("%s: failed %d", __func__, rc); ++ return rc; ++ } ++ return __falcon_really_flush_tx_dma_channel(nic, dmaq); ++} ++ ++static int ++__falcon_really_flush_rx_dma_channel(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ uint val; ++ ++ EFHW_BUILD_ASSERT(RX_FLUSH_DESCQ_REG_KER_OFST == ++ RX_FLUSH_DESCQ_REG_OFST); ++ ++ __DWCHCK(RX_FLUSH_DESCQ_CMD_LBN, RX_FLUSH_DESCQ_CMD_WIDTH); ++ __DWCHCK(RX_FLUSH_DESCQ_LBN, RX_FLUSH_DESCQ_WIDTH); ++ __RANGECHCK(dmaq, RX_FLUSH_DESCQ_WIDTH); ++ ++ val = ((1 << RX_FLUSH_DESCQ_CMD_LBN) | (dmaq << RX_FLUSH_DESCQ_LBN)); ++ ++ EFHW_TRACE("RX DMA flush[%d]", dmaq); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_write_qq(efhw_kva + RX_FLUSH_DESCQ_REG_OFST, val, ++ FALCON_ATOMIC_RX_FLUSH_DESCQ); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++ return 0; ++} ++ ++static inline int ++__falcon_is_rx_dma_channel_flushed(struct efhw_nic *nic, uint dmaq) ++{ ++ FALCON_LOCK_DECL; ++ uint64_t val; ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ ulong offset = falcon_dma_rx_q_offset(nic, dmaq); ++ ++ /* Falcon requires 128 bit atomic access for this register */ ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_q(efhw_kva + offset, &val); ++ FALCON_LOCK_UNLOCK(nic); ++ ++ __DWCHCK(RX_DESCQ_EN_LBN, RX_DESCQ_EN_WIDTH); ++ ++ /* is it enabled? */ ++ return (val & (1 << RX_DESCQ_EN_LBN)) ++ ? 0 : -EAGAIN; ++} ++ ++static int falcon_flush_rx_dma_channel(struct efhw_nic *nic, uint dmaq) ++{ ++ int rc; ++ rc = __falcon_is_rx_dma_channel_flushed(nic, dmaq); ++ if (rc < 0) { ++ EFHW_ERR("%s: failed %d", __func__, rc); ++ return rc; ++ } ++ return __falcon_really_flush_rx_dma_channel(nic, dmaq); ++} ++ ++/*-------------------------------------------------------------------- ++ * ++ * Falcon specific event callbacks ++ * ++ *--------------------------------------------------------------------*/ ++ ++int ++falcon_handle_char_event(struct efhw_nic *nic, struct efhw_ev_handler *h, ++ efhw_event_t *ev) ++{ ++ EFHW_TRACE("DRIVER EVENT: "FALCON_EVENT_FMT, ++ FALCON_EVENT_PRI_ARG(*ev)); ++ ++ switch (FALCON_EVENT_DRIVER_SUBCODE(ev)) { ++ ++ case TX_DESCQ_FLS_DONE_EV_DECODE: ++ EFHW_TRACE("TX[%d] flushed", ++ (int)FALCON_EVENT_TX_FLUSH_Q_ID(ev)); ++ efhw_handle_txdmaq_flushed(nic, h, ev); ++ break; ++ ++ case RX_DESCQ_FLS_DONE_EV_DECODE: ++ EFHW_TRACE("RX[%d] flushed", ++ (int)FALCON_EVENT_TX_FLUSH_Q_ID(ev)); ++ efhw_handle_rxdmaq_flushed(nic, h, ev); ++ break; ++ ++ case SRM_UPD_DONE_EV_DECODE: ++ nic->buf_commit_outstanding = ++ max(0, nic->buf_commit_outstanding - 1); ++ EFHW_TRACE("COMMIT DONE %d", nic->buf_commit_outstanding); ++ break; ++ ++ case EVQ_INIT_DONE_EV_DECODE: ++ EFHW_TRACE("%sEVQ INIT", ""); ++ break; ++ ++ case WAKE_UP_EV_DECODE: ++ EFHW_TRACE("%sWAKE UP", ""); ++ efhw_handle_wakeup_event(nic, h, ev); ++ break; ++ ++ case TIMER_EV_DECODE: ++ EFHW_TRACE("%sTIMER", ""); ++ efhw_handle_timeout_event(nic, h, ev); ++ break; ++ ++ case RX_DESCQ_FLSFF_OVFL_EV_DECODE: ++ /* This shouldn't happen. */ ++ EFHW_ERR("%s: RX flush fifo overflowed", __func__); ++ return -EINVAL; ++ ++ default: ++ EFHW_TRACE("UNKOWN DRIVER EVENT: " FALCON_EVENT_FMT, ++ FALCON_EVENT_PRI_ARG(*ev)); ++ break; ++ } ++ return 0; ++} ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * Filter search depth control ++ * ++ *--------------------------------------------------------------------*/ ++ ++ ++#define Q0_READ(q0, name) \ ++ ((unsigned)(((q0) >> name##_LBN) & (__FALCON_MASK64(name##_WIDTH)))) ++#define Q0_MASK(name) \ ++ ((__FALCON_MASK64(name##_WIDTH)) << name##_LBN) ++#define Q0_VALUE(name, value) \ ++ (((uint64_t)(value)) << name##_LBN) ++ ++#define Q1_READ(q1, name) \ ++ ((unsigned)(((q1) >> (name##_LBN - 64)) & \ ++ (__FALCON_MASK64(name##_WIDTH)))) ++#define Q1_MASK(name) \ ++ ((__FALCON_MASK64(name##_WIDTH)) << (name##_LBN - 64)) ++#define Q1_VALUE(name, value) \ ++ (((uint64_t)(value)) << (name##_LBN - 64)) ++ ++ ++void ++falcon_nic_get_rx_filter_search_limits(struct efhw_nic *nic, ++ struct efhw_filter_search_limits *lim, ++ int use_raw_values) ++{ ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ FALCON_LOCK_DECL; ++ uint64_t q0, q1; ++ unsigned ff = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_FULL); ++ unsigned wf = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_WILD); ++ ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_qq(efhw_kva + RX_FILTER_CTL_REG_OFST, &q0, &q1); ++ FALCON_LOCK_UNLOCK(nic); ++ ++ lim->tcp_full = Q0_READ(q0, TCP_FULL_SRCH_LIMIT) - ff; ++ lim->tcp_wild = Q0_READ(q0, TCP_WILD_SRCH_LIMIT) - wf; ++ lim->udp_full = Q0_READ(q0, UDP_FULL_SRCH_LIMIT) - ff; ++ lim->udp_wild = Q0_READ(q0, UDP_WILD_SRCH_LIMIT) - wf; ++} ++EXPORT_SYMBOL(falcon_nic_get_rx_filter_search_limits); ++ ++ ++void ++falcon_nic_set_rx_filter_search_limits(struct efhw_nic *nic, ++ struct efhw_filter_search_limits *lim, ++ int use_raw_values) ++{ ++ volatile char __iomem *efhw_kva = EFHW_KVA(nic); ++ FALCON_LOCK_DECL; ++ uint64_t q0, q1; ++ unsigned ff = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_FULL); ++ unsigned wf = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_WILD); ++ ++ FALCON_LOCK_LOCK(nic); ++ falcon_read_qq(efhw_kva + RX_FILTER_CTL_REG_OFST, &q0, &q1); ++ ++ q0 &= ~Q0_MASK(TCP_FULL_SRCH_LIMIT); ++ q0 &= ~Q0_MASK(TCP_WILD_SRCH_LIMIT); ++ q0 &= ~Q0_MASK(UDP_FULL_SRCH_LIMIT); ++ q0 &= ~Q0_MASK(UDP_WILD_SRCH_LIMIT); ++ q0 |= Q0_VALUE(TCP_FULL_SRCH_LIMIT, lim->tcp_full + ff); ++ q0 |= Q0_VALUE(TCP_WILD_SRCH_LIMIT, lim->tcp_wild + wf); ++ q0 |= Q0_VALUE(UDP_FULL_SRCH_LIMIT, lim->udp_full + ff); ++ q0 |= Q0_VALUE(UDP_WILD_SRCH_LIMIT, lim->udp_wild + wf); ++ nic->tcp_full_srch.max = lim->tcp_full + ff ++ - RX_FILTER_CTL_SRCH_FUDGE_FULL; ++ nic->tcp_wild_srch.max = lim->tcp_wild + wf ++ - RX_FILTER_CTL_SRCH_FUDGE_WILD; ++ nic->udp_full_srch.max = lim->udp_full + ff ++ - RX_FILTER_CTL_SRCH_FUDGE_FULL; ++ nic->udp_wild_srch.max = lim->udp_wild + wf ++ - RX_FILTER_CTL_SRCH_FUDGE_WILD; ++ ++ falcon_write_qq(efhw_kva + RX_FILTER_CTL_REG_OFST, q0, q1); ++ mmiowb(); ++ FALCON_LOCK_UNLOCK(nic); ++} ++EXPORT_SYMBOL(falcon_nic_set_rx_filter_search_limits); ++ ++ ++#undef READ_Q0 ++#undef Q0_MASK ++#undef Q0_VALUE ++#undef READ_Q1 ++#undef Q1_MASK ++#undef Q1_VALUE ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * New unified filter API ++ * ++ *--------------------------------------------------------------------*/ ++ ++ ++#if FALCON_FULL_FILTER_CACHE ++static inline struct efhw_filter_spec * ++filter_spec_cache_entry(struct efhw_nic *nic, int filter_idx) ++{ ++ EFHW_ASSERT(nic->filter_spec_cache); ++ return &nic->filter_spec_cache[FALCON_FILTER_TBL_NUM + filter_idx]; ++} ++#endif ++ ++ ++static int filter_is_active(struct efhw_nic *nic, int filter_idx) ++{ ++ return nic->filter_in_use[filter_idx]; ++} ++ ++ ++static void set_filter_cache_entry(struct efhw_nic *nic, ++ struct efhw_filter_spec *spec, ++ int filter_idx) ++{ ++ nic->filter_in_use[filter_idx] = 1; ++#if FALCON_FULL_FILTER_CACHE ++ memcpy(filter_spec_cache_entry(nic, filter_idx), spec, ++ sizeof(struct efhw_filter_spec)); ++#endif ++} ++ ++ ++static void clear_filter_cache_entry(struct efhw_nic *nic, ++ int filter_idx) ++{ ++ nic->filter_in_use[filter_idx] = 0; ++#if FALCON_FULL_FILTER_CACHE ++ memset(filter_spec_cache_entry(nic, filter_idx), 0, ++ sizeof(struct efhw_filter_spec)); ++#endif ++} ++ ++ ++#if FALCON_FULL_FILTER_CACHE ++static int filter_is_duplicate(struct efhw_nic *nic, ++ struct efhw_filter_spec *spec, int filter_idx) ++{ ++ struct efhw_filter_spec *cmp; ++ ++ cmp = filter_spec_cache_entry(nic, filter_idx); ++ ++ EFHW_ASSERT(filter_is_active(nic, filter_idx)); ++ ++ return (spec->saddr_le32 == cmp->saddr_le32) && ++ (spec->daddr_le32 == cmp->daddr_le32) && ++ (spec->sport_le16 == cmp->sport_le16) && ++ (spec->dport_le16 == cmp->dport_le16) && ++ (spec->tcp == cmp->tcp) && ++ (spec->full == cmp->full); ++} ++#endif ++ ++ ++static void common_build_ip_filter(struct efhw_nic *nic, int tcp, int full, ++ int rss, int scatter, uint dmaq_id, ++ unsigned saddr_le32, unsigned sport_le16, ++ unsigned daddr_le32, unsigned dport_le16, ++ uint64_t *q0, uint64_t *q1) ++{ ++ uint64_t v1, v2, v3, v4; ++ unsigned tmp_port_le16; ++ ++ if (!full) { ++ saddr_le32 = 0; ++ sport_le16 = 0; ++ if (!tcp) { ++ tmp_port_le16 = sport_le16; ++ sport_le16 = dport_le16; ++ dport_le16 = tmp_port_le16; ++ } ++ } ++ ++ v4 = (((!tcp) << __DW4(TCP_UDP_0_LBN)) | ++ (dmaq_id << __DW4(RXQ_ID_0_LBN))); ++ ++ switch (nic->devtype.variant) { ++ case 'A': ++ EFHW_ASSERT(!rss); ++ break; ++ case 'B': ++ v4 |= scatter << __DW4(SCATTER_EN_0_B0_LBN); ++ v4 |= rss << __DW4(RSS_EN_0_B0_LBN); ++ break; ++ default: ++ EFHW_ASSERT(0); ++ break; ++ } ++ ++ v3 = daddr_le32; ++ v2 = ((dport_le16 << __DW2(DEST_PORT_TCP_0_LBN)) | ++ (__HIGH(saddr_le32, SRC_IP_0_LBN, SRC_IP_0_WIDTH))); ++ v1 = ((__LOW(saddr_le32, SRC_IP_0_LBN, SRC_IP_0_WIDTH)) | ++ (sport_le16 << SRC_TCP_DEST_UDP_0_LBN)); ++ ++ *q0 = (v2 << 32) | v1; ++ *q1 = (v4 << 32) | v3; ++} ++ ++ ++static void build_filter(struct efhw_nic *nic, struct efhw_filter_spec *spec, ++ unsigned *key, unsigned *tbl_size, ++ struct efhw_filter_depth **depth, ++ uint64_t *q0, uint64_t *q1) ++{ ++ *key = falcon_hash_get_ip_key(spec->saddr_le32, ++ spec->sport_le16, ++ spec->daddr_le32, ++ spec->dport_le16, ++ spec->tcp, ++ spec->full); ++ *tbl_size = nic->ip_filter_tbl_size; ++ if (spec->tcp && spec->full) ++ *depth = &nic->tcp_full_srch; ++ else if (spec->tcp && !spec->full) ++ *depth = &nic->tcp_wild_srch; ++ else if (!spec->tcp && spec->full) ++ *depth = &nic->udp_full_srch; ++ else ++ *depth = &nic->udp_wild_srch; ++ common_build_ip_filter(nic, spec->tcp, spec->full, ++ spec->rss, spec->scatter, ++ spec->dmaq_id, ++ spec->saddr_le32, ++ spec->sport_le16, ++ spec->daddr_le32, ++ spec->dport_le16, ++ q0, q1); ++} ++ ++ ++#if FALCON_VERIFY_FILTERS ++static void verify_filters(struct efhw_nic *nic) ++{ ++ unsigned table_offset, table_stride; ++ unsigned i, dummy_key, dummy_tbl_size; ++ struct efhw_filter_depth *dummy_depth; ++ unsigned filter_tbl_size; ++ struct efhw_filter_spec *spec; ++ uint64_t q0_expect, q1_expect, q0_got, q1_got; ++ ++ filter_tbl_size = nic->ip_filter_tbl_size; ++ table_offset = RX_FILTER_TBL0_OFST; ++ table_stride = 2 * FALCON_REGISTER128; ++ ++ for (i = 0; i < filter_tbl_size; i++) { ++ if (!filter_is_active(nic, type, i)) ++ continue; ++ ++ spec = filter_spec_cache_entry(nic, type, i); ++ ++ build_filter(nic, spec, &dummy_key, &dummy_tbl_size, ++ &dummy_depth, &q0_expect, &q1_expect); ++ ++ falcon_read_qq(EFHW_KVA(nic) + table_offset + i * table_stride, ++ &q0_got, &q1_got); ++ ++ if ((q0_got != q0_expect) || (q1_got != q1_expect)) { ++ falcon_write_qq(EFHW_KVA(nic) + 0x300, ++ q0_got, q1_got); ++ EFHW_ERR("ERROR: RX-filter[%d][%d] was " ++ "%"PRIx64":%" PRIx64" expected " ++ "%"PRIx64":%"PRIx64, ++ nic->index, i, q0_got, q1_got, ++ q0_expect, q1_expect); ++ } ++ } ++} ++#endif ++ ++ ++static void write_filter_table_entry(struct efhw_nic *nic, ++ unsigned filter_idx, ++ uint64_t q0, uint64_t q1) ++{ ++ unsigned table_offset, table_stride, offset; ++ ++ EFHW_ASSERT(filter_idx < nic->ip_filter_tbl_size); ++ table_offset = RX_FILTER_TBL0_OFST; ++ table_stride = 2 * FALCON_REGISTER128; ++ ++ offset = table_offset + filter_idx * table_stride; ++ falcon_write_qq(EFHW_KVA(nic) + offset, q0, q1); ++ mmiowb(); ++ ++#if FALCON_VERIFY_FILTERS ++ { ++ uint64_t q0read, q1read; ++ ++ /* Read a different entry first - ensure BIU flushed shadow */ ++ falcon_read_qq(EFHW_KVA(nic) + offset + 0x10, &q0read, &q1read); ++ falcon_read_qq(EFHW_KVA(nic) + offset, &q0read, &q1read); ++ EFHW_ASSERT(q0read == q0); ++ EFHW_ASSERT(q1read == q1); ++ ++ verify_filters(nic, type); ++ } ++#endif ++} ++ ++ ++static int falcon_nic_filter_set(struct efhw_nic *nic, ++ struct efhw_filter_spec *spec, ++ int *filter_idx_out) ++{ ++ FALCON_LOCK_DECL; ++ unsigned key = 0, tbl_size = 0, hash1, hash2, k; ++ struct efhw_filter_depth *depth = NULL; ++ int filter_idx = -1; ++ int rc = 0; ++ uint64_t q0, q1; ++ ++ build_filter(nic, spec, &key, &tbl_size, &depth, &q0, &q1); ++ ++ if (tbl_size == 0) ++ return -EINVAL; ++ ++ EFHW_TRACE("%s: depth->max=%d", __func__, depth->max); ++ ++ hash1 = falcon_hash_function1(key, tbl_size); ++ hash2 = falcon_hash_function2(key, tbl_size); ++ ++ FALCON_LOCK_LOCK(nic); ++ ++ for (k = 0; k < depth->max; k++) { ++ filter_idx = falcon_hash_iterator(hash1, hash2, k, tbl_size); ++ if (!filter_is_active(nic, filter_idx)) ++ break; ++#if FALCON_FULL_FILTER_CACHE ++ if (filter_is_duplicate(nic, spec, filter_idx)) { ++ EFHW_WARN("%s: ERROR: duplicate filter (disabling " ++ "interrupts)", __func__); ++ falcon_nic_interrupt_hw_disable(nic); ++ rc = -EINVAL; ++ goto fail1; ++ } ++#endif ++ } ++ if (k == depth->max) { ++ rc = -EADDRINUSE; ++ filter_idx = -1; ++ goto fail1; ++ } else if (depth->needed < (k + 1)) { ++ depth->needed = k + 1; ++ } ++ ++ EFHW_ASSERT(filter_idx < (int)tbl_size); ++ ++ set_filter_cache_entry(nic, spec, filter_idx); ++ write_filter_table_entry(nic, filter_idx, q0, q1); ++ ++ ++nic->ip_filter_tbl_used; ++ ++ *filter_idx_out = filter_idx; ++ ++ EFHW_TRACE("%s: filter index %d rxq %u set in %u", ++ __func__, filter_idx, spec->dmaq_id, k); ++ ++fail1: ++ FALCON_LOCK_UNLOCK(nic); ++ return rc; ++} ++ ++ ++static void falcon_nic_filter_clear(struct efhw_nic *nic, ++ int filter_idx) ++{ ++ FALCON_LOCK_DECL; ++ ++ if (filter_idx < 0) ++ return; ++ ++ FALCON_LOCK_LOCK(nic); ++ if (filter_is_active(nic, filter_idx)) { ++ if (--nic->ip_filter_tbl_used == 0) { ++ nic->tcp_full_srch.needed = 0; ++ nic->tcp_wild_srch.needed = 0; ++ nic->udp_full_srch.needed = 0; ++ nic->udp_wild_srch.needed = 0; ++ } ++ } ++ clear_filter_cache_entry(nic, filter_idx); ++ write_filter_table_entry(nic, filter_idx, 0, 0); ++ FALCON_LOCK_UNLOCK(nic); ++} ++ ++ ++int ++falcon_nic_filter_ctor(struct efhw_nic *nic) ++{ ++ nic->ip_filter_tbl_size = 8 * 1024; ++ nic->ip_filter_tbl_used = 0; ++ ++ nic->tcp_full_srch.needed = 0; ++ nic->tcp_full_srch.max = RX_FILTER_CTL_SRCH_LIMIT_TCP_FULL ++ - RX_FILTER_CTL_SRCH_FUDGE_FULL; ++ nic->tcp_wild_srch.needed = 0; ++ nic->tcp_wild_srch.max = RX_FILTER_CTL_SRCH_LIMIT_TCP_WILD ++ - RX_FILTER_CTL_SRCH_FUDGE_WILD; ++ nic->udp_full_srch.needed = 0; ++ nic->udp_full_srch.max = RX_FILTER_CTL_SRCH_LIMIT_UDP_FULL ++ - RX_FILTER_CTL_SRCH_FUDGE_FULL; ++ nic->udp_wild_srch.needed = 0; ++ nic->udp_wild_srch.max = RX_FILTER_CTL_SRCH_LIMIT_UDP_WILD ++ - RX_FILTER_CTL_SRCH_FUDGE_WILD; ++ ++ nic->filter_in_use = vmalloc(FALCON_FILTER_TBL_NUM); ++ if (nic->filter_in_use == NULL) ++ return -ENOMEM; ++ memset(nic->filter_in_use, 0, FALCON_FILTER_TBL_NUM); ++#if FALCON_FULL_FILTER_CACHE ++ nic->filter_spec_cache = vmalloc(FALCON_FILTER_TBL_NUM ++ * sizeof(struct efhw_filter_spec)); ++ if (nic->filter_spec_cache == NULL) ++ return -ENOMEM; ++ memset(nic->filter_spec_cache, 0, FALCON_FILTER_TBL_NUM ++ * sizeof(struct efhw_filter_spec)); ++#endif ++ ++ return 0; ++} ++ ++ ++void ++falcon_nic_filter_dtor(struct efhw_nic *nic) ++{ ++#if FALCON_FULL_FILTER_CACHE ++ if (nic->filter_spec_cache) ++ vfree(nic->filter_spec_cache); ++#endif ++ if (nic->filter_in_use) ++ vfree(nic->filter_in_use); ++} ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * Compatibility with old filter API ++ * ++ *--------------------------------------------------------------------*/ ++ ++void ++falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full, ++ uint32_t *tcp_wild, ++ uint32_t *udp_full, uint32_t *udp_wild) ++{ ++ struct efhw_filter_search_limits lim; ++ ++ falcon_nic_get_rx_filter_search_limits(nic, &lim, 0); ++ *tcp_full = (uint32_t)lim.tcp_full; ++ *tcp_wild = (uint32_t)lim.tcp_wild; ++ *udp_full = (uint32_t)lim.udp_full; ++ *udp_wild = (uint32_t)lim.udp_wild; ++} ++EXPORT_SYMBOL(falcon_nic_rx_filter_ctl_get); ++ ++ ++void ++falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full, ++ uint32_t tcp_wild, ++ uint32_t udp_full, uint32_t udp_wild) ++{ ++ struct efhw_filter_search_limits lim; ++ ++ lim.tcp_full = (unsigned)tcp_full; ++ lim.tcp_wild = (unsigned)tcp_wild; ++ lim.udp_full = (unsigned)udp_full; ++ lim.udp_wild = (unsigned)udp_wild; ++ falcon_nic_set_rx_filter_search_limits(nic, &lim, 0); ++} ++EXPORT_SYMBOL(falcon_nic_rx_filter_ctl_set); ++ ++ ++static int ++falcon_nic_ipfilter_set(struct efhw_nic *nic, int type, int *_filter_idx, ++ int dmaq, ++ unsigned saddr_be32, unsigned sport_be16, ++ unsigned daddr_be32, unsigned dport_be16) ++{ ++ struct efhw_filter_spec spec; ++ ++ spec.dmaq_id = dmaq; ++ spec.saddr_le32 = ntohl(saddr_be32); ++ spec.daddr_le32 = ntohl(daddr_be32); ++ spec.sport_le16 = ntohs((unsigned short) sport_be16); ++ spec.dport_le16 = ntohs((unsigned short) dport_be16); ++ spec.tcp = ((type & EFHW_IP_FILTER_TYPE_TCP_MASK) != 0); ++ spec.full = ((type & EFHW_IP_FILTER_TYPE_FULL_MASK) != 0); ++ spec.rss = ((type & EFHW_IP_FILTER_TYPE_RSS_B0_MASK) != 0); ++ spec.scatter = ((type & EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK) == 0); ++ return falcon_nic_filter_set(nic, &spec, _filter_idx); ++} ++ ++static void falcon_nic_ipfilter_clear(struct efhw_nic *nic, int filter_idx) ++{ ++ falcon_nic_filter_clear(nic, filter_idx); ++} ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * Abstraction Layer Hooks ++ * ++ *--------------------------------------------------------------------*/ ++ ++struct efhw_func_ops falcon_char_functional_units = { ++ falcon_nic_close_hardware, ++ falcon_nic_init_hardware, ++ falcon_nic_interrupt, ++ falcon_nic_interrupt_enable, ++ falcon_nic_interrupt_disable, ++ falcon_nic_set_interrupt_moderation, ++ falcon_nic_event_queue_enable, ++ falcon_nic_event_queue_disable, ++ falcon_nic_wakeup_request, ++ falcon_nic_sw_event, ++ falcon_nic_ipfilter_set, ++ falcon_nic_ipfilter_clear, ++ falcon_dmaq_tx_q_init, ++ falcon_dmaq_rx_q_init, ++ falcon_dmaq_tx_q_disable, ++ falcon_dmaq_rx_q_disable, ++ falcon_flush_tx_dma_channel, ++ falcon_flush_rx_dma_channel, ++ falcon_nic_buffer_table_set, ++ falcon_nic_buffer_table_set_n, ++ falcon_nic_buffer_table_clear, ++ falcon_nic_buffer_table_commit, ++ falcon_nic_filter_set, ++ falcon_nic_filter_clear, ++}; ++ ++ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/falcon_hash.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/falcon_hash.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,159 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains EtherFabric NIC hash algorithms implementation. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++ ++ ++static unsigned int ++common_get_ip_key(unsigned int src_ip, unsigned int src_port, ++ unsigned int dest_ip, unsigned int dest_port, ++ int tcp, int full, int tx, unsigned int masked_q_id) ++{ ++ ++ unsigned int tmp_port, result; ++ ++ EFHW_ASSERT(tcp == 0 || tcp == 1); ++ EFHW_ASSERT(full == 0 || full == 1); ++ EFHW_ASSERT(masked_q_id < (1 << 10)); ++ ++ /* m=masked_q_id(TX)/0(RX) u=UDP S,D=src/dest addr s,d=src/dest port ++ * ++ * Wildcard filters have src(TX)/dest(RX) addr and port = 0; ++ * and UDP wildcard filters have the src and dest port fields swapped. ++ * ++ * Addr/port fields are little-endian. ++ * ++ * 3322222222221111111111 ++ * 10987654321098765432109876543210 ++ * ++ * 000000000000000000000mmmmmmmmmmu ^ ++ * DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD ^ ++ * ddddddddddddddddSSSSSSSSSSSSSSSS ^ ++ * SSSSSSSSSSSSSSSSssssssssssssssss ++ */ ++ ++ if (!tx) ++ masked_q_id = 0; ++ ++ if (!full) { ++ if (tx) { ++ dest_ip = 0; ++ dest_port = 0; ++ } else { ++ src_ip = 0; ++ src_port = 0; ++ } ++ if (!tcp) { ++ tmp_port = src_port; ++ src_port = dest_port; ++ dest_port = tmp_port; ++ } ++ } ++ ++ result = ((masked_q_id << 1) | (!tcp)) ^ ++ (dest_ip) ^ ++ (((dest_port & 0xffff) << 16) | ((src_ip >> 16) & 0xffff)) ^ ++ (((src_ip & 0xffff) << 16) | (src_port & 0xffff)); ++ ++ EFHW_TRACE("%s: IP %s %s %x", __func__, tcp ? "TCP" : "UDP", ++ full ? "Full" : "Wildcard", result); ++ ++ return result; ++} ++ ++ ++unsigned int ++falcon_hash_get_ip_key(unsigned int src_ip, unsigned int src_port, ++ unsigned int dest_ip, unsigned int dest_port, ++ int tcp, int full) ++{ ++ return common_get_ip_key(src_ip, src_port, dest_ip, dest_port, tcp, ++ full, 0, 0); ++} ++ ++ ++/* This function generates the First Hash key */ ++unsigned int falcon_hash_function1(unsigned int key, unsigned int nfilters) ++{ ++ ++ unsigned short int lfsr_reg; ++ unsigned int tmp_key; ++ int index; ++ ++ unsigned short int lfsr_input; ++ unsigned short int single_bit_key; ++ unsigned short int bit16_lfsr; ++ unsigned short int bit3_lfsr; ++ ++ lfsr_reg = 0xFFFF; ++ tmp_key = key; ++ ++ /* For Polynomial equation X^16+X^3+1 */ ++ for (index = 0; index < 32; index++) { ++ /* Get the bit from key and shift the key */ ++ single_bit_key = (tmp_key & 0x80000000) >> 31; ++ tmp_key = tmp_key << 1; ++ ++ /* get the Tap bits to XOR operation */ ++ bit16_lfsr = (lfsr_reg & 0x8000) >> 15; ++ bit3_lfsr = (lfsr_reg & 0x0004) >> 2; ++ ++ /* Get the Input value to the LFSR */ ++ lfsr_input = ((bit16_lfsr ^ bit3_lfsr) ^ single_bit_key); ++ ++ /* Shift and store out of the two TAPs */ ++ lfsr_reg = lfsr_reg << 1; ++ lfsr_reg = lfsr_reg | (lfsr_input & 0x0001); ++ ++ } ++ ++ lfsr_reg = lfsr_reg & (nfilters - 1); ++ ++ return lfsr_reg; ++} ++ ++/* This function generates the Second Hash */ ++unsigned int ++falcon_hash_function2(unsigned int key, unsigned int nfilters) ++{ ++ return (unsigned int)(((unsigned long long)key * 2 - 1) & ++ (nfilters - 1)); ++} ++ ++/* This function iterates through the hash table */ ++unsigned int ++falcon_hash_iterator(unsigned int hash1, unsigned int hash2, ++ unsigned int n_search, unsigned int nfilters) ++{ ++ return (hash1 + (n_search * hash2)) & (nfilters - 1); ++} ++ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/filter_resource.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/filter_resource.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,250 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains filters support. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++ ++struct filter_resource_manager { ++ struct efrm_resource_manager rm; ++ struct kfifo *free_ids; ++}; ++ ++static struct filter_resource_manager *efrm_filter_manager; ++ ++ ++void efrm_filter_resource_free(struct filter_resource *frs) ++{ ++ struct efhw_nic *nic = frs->rs.rs_client->nic; ++ int id; ++ ++ EFRM_RESOURCE_ASSERT_VALID(&frs->rs, 1); ++ ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT, __func__, ++ EFRM_RESOURCE_PRI_ARG(frs->rs.rs_handle)); ++ ++ efhw_nic_ipfilter_clear(nic, frs->filter_idx); ++ frs->filter_idx = -1; ++ efrm_vi_resource_release(frs->pt); ++ ++ /* Free this filter. */ ++ id = EFRM_RESOURCE_INSTANCE(frs->rs.rs_handle); ++ EFRM_VERIFY_EQ(kfifo_put(efrm_filter_manager->free_ids, ++ (unsigned char *)&id, sizeof(id)), ++ sizeof(id)); ++ ++ efrm_client_put(frs->rs.rs_client); ++ EFRM_DO_DEBUG(memset(frs, 0, sizeof(*frs))); ++ kfree(frs); ++} ++EXPORT_SYMBOL(efrm_filter_resource_free); ++ ++ ++void efrm_filter_resource_release(struct filter_resource *frs) ++{ ++ if (__efrm_resource_release(&frs->rs)) ++ efrm_filter_resource_free(frs); ++} ++EXPORT_SYMBOL(efrm_filter_resource_release); ++ ++ ++static void filter_rm_dtor(struct efrm_resource_manager *rm) ++{ ++ EFRM_TRACE("%s:", __func__); ++ ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_filter_manager->rm); ++ EFRM_ASSERT(&efrm_filter_manager->rm == rm); ++ ++ kfifo_vfree(efrm_filter_manager->free_ids); ++ EFRM_TRACE("%s: done", __func__); ++} ++ ++/**********************************************************************/ ++/**********************************************************************/ ++/**********************************************************************/ ++ ++int efrm_create_filter_resource_manager(struct efrm_resource_manager **rm_out) ++{ ++ int rc; ++ ++ EFRM_ASSERT(rm_out); ++ ++ efrm_filter_manager = ++ kmalloc(sizeof(struct filter_resource_manager), GFP_KERNEL); ++ if (efrm_filter_manager == 0) ++ return -ENOMEM; ++ memset(efrm_filter_manager, 0, sizeof(*efrm_filter_manager)); ++ ++ rc = efrm_resource_manager_ctor(&efrm_filter_manager->rm, ++ filter_rm_dtor, "FILTER", ++ EFRM_RESOURCE_FILTER); ++ if (rc < 0) ++ goto fail1; ++ ++ /* Create a pool of free instances */ ++ rc = efrm_kfifo_id_ctor(&efrm_filter_manager->free_ids, ++ 0, EFHW_IP_FILTER_NUM, ++ &efrm_filter_manager->rm.rm_lock); ++ if (rc != 0) ++ goto fail2; ++ ++ *rm_out = &efrm_filter_manager->rm; ++ EFRM_TRACE("%s: filter resources created - %d IDs", ++ __func__, kfifo_len(efrm_filter_manager->free_ids)); ++ return 0; ++ ++fail2: ++ efrm_resource_manager_dtor(&efrm_filter_manager->rm); ++fail1: ++ memset(efrm_filter_manager, 0, sizeof(*efrm_filter_manager)); ++ kfree(efrm_filter_manager); ++ return rc; ++ ++} ++ ++ ++int efrm_filter_resource_clear(struct filter_resource *frs) ++{ ++ struct efhw_nic *nic = frs->rs.rs_client->nic; ++ ++ efhw_nic_ipfilter_clear(nic, frs->filter_idx); ++ frs->filter_idx = -1; ++ return 0; ++} ++EXPORT_SYMBOL(efrm_filter_resource_clear); ++ ++ ++int ++__efrm_filter_resource_set(struct filter_resource *frs, int type, ++ unsigned saddr, uint16_t sport, ++ unsigned daddr, uint16_t dport) ++{ ++ struct efhw_nic *nic = frs->rs.rs_client->nic; ++ int vi_instance; ++ ++ EFRM_ASSERT(frs); ++ ++ if (efrm_nic_tablep->a_nic->devtype.variant >= 'B' && ++ (frs->pt->flags & EFHW_VI_JUMBO_EN) == 0) ++ type |= EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK; ++ vi_instance = EFRM_RESOURCE_INSTANCE(frs->pt->rs.rs_handle); ++ ++ return efhw_nic_ipfilter_set(nic, type, &frs->filter_idx, ++ vi_instance, saddr, sport, daddr, dport); ++} ++EXPORT_SYMBOL(__efrm_filter_resource_set);; ++ ++ ++int ++efrm_filter_resource_alloc(struct vi_resource *vi_parent, ++ struct filter_resource **frs_out) ++{ ++ struct filter_resource *frs; ++ int rc, instance; ++ ++ EFRM_ASSERT(frs_out); ++ EFRM_ASSERT(efrm_filter_manager); ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_filter_manager->rm); ++ EFRM_ASSERT(vi_parent != NULL); ++ EFRM_ASSERT(EFRM_RESOURCE_TYPE(vi_parent->rs.rs_handle) == ++ EFRM_RESOURCE_VI); ++ ++ /* Allocate resource data structure. */ ++ frs = kmalloc(sizeof(struct filter_resource), GFP_KERNEL); ++ if (!frs) ++ return -ENOMEM; ++ ++ /* Allocate an instance. */ ++ rc = kfifo_get(efrm_filter_manager->free_ids, ++ (unsigned char *)&instance, sizeof(instance)); ++ if (rc != sizeof(instance)) { ++ EFRM_TRACE("%s: out of instances", __func__); ++ EFRM_ASSERT(rc == 0); ++ rc = -EBUSY; ++ goto fail1; ++ } ++ ++ /* Initialise the resource DS. */ ++ efrm_resource_init(&frs->rs, EFRM_RESOURCE_FILTER, instance); ++ frs->pt = vi_parent; ++ efrm_resource_ref(&frs->pt->rs); ++ frs->filter_idx = -1; ++ ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " VI %d", __func__, ++ EFRM_RESOURCE_PRI_ARG(frs->rs.rs_handle), ++ EFRM_RESOURCE_INSTANCE(vi_parent->rs.rs_handle)); ++ ++ efrm_client_add_resource(vi_parent->rs.rs_client, &frs->rs); ++ *frs_out = frs; ++ return 0; ++ ++fail1: ++ memset(frs, 0, sizeof(*frs)); ++ kfree(frs); ++ return rc; ++} ++EXPORT_SYMBOL(efrm_filter_resource_alloc); ++ ++ ++int efrm_filter_resource_instance(struct filter_resource *frs) ++{ ++ return EFRM_RESOURCE_INSTANCE(frs->rs.rs_handle); ++} ++EXPORT_SYMBOL(efrm_filter_resource_instance); ++ ++ ++struct efrm_resource * ++efrm_filter_resource_to_resource(struct filter_resource *frs) ++{ ++ return &frs->rs; ++} ++EXPORT_SYMBOL(efrm_filter_resource_to_resource); ++ ++ ++struct filter_resource * ++efrm_filter_resource_from_resource(struct efrm_resource *rs) ++{ ++ return filter_resource(rs); ++} ++EXPORT_SYMBOL(efrm_filter_resource_from_resource); +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/iobufset_resource.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/iobufset_resource.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,404 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains non-contiguous I/O buffers support. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++ ++#define EFRM_IOBUFSET_MAX_NUM_INSTANCES 0x00010000 ++ ++struct iobufset_resource_manager { ++ struct efrm_resource_manager rm; ++ struct kfifo *free_ids; ++}; ++ ++struct iobufset_resource_manager *efrm_iobufset_manager; ++ ++#define iobsrs(rs1) iobufset_resource(rs1) ++ ++/* Returns size of iobufset resource data structure. */ ++static inline size_t iobsrs_size(int n_pages) ++{ ++ return offsetof(struct iobufset_resource, bufs) + ++ n_pages * sizeof(struct efhw_iopage); ++} ++ ++void efrm_iobufset_resource_free(struct iobufset_resource *rs) ++{ ++ unsigned int i; ++ int id; ++ ++ EFRM_RESOURCE_ASSERT_VALID(&rs->rs, 1); ++ ++ if (!rs->linked && rs->buf_tbl_alloc.base != (unsigned) -1) ++ efrm_buffer_table_free(&rs->buf_tbl_alloc); ++ ++ /* see comment on call to efhw_iopage_alloc in the alloc routine above ++ for discussion on use of efrm_nic_tablep->a_nic here */ ++ EFRM_ASSERT(efrm_nic_tablep->a_nic); ++ if (rs->linked) { ++ /* Nothing to do. */ ++ } else if (rs->chunk_order == 0) { ++ for (i = 0; i < rs->n_bufs; ++i) ++ efhw_iopage_free(efrm_nic_tablep->a_nic, &rs->bufs[i]); ++ } else { ++ /* it is important that this is executed in increasing page ++ * order because some implementations of ++ * efhw_iopages_init_from_iopage() assume this */ ++ for (i = 0; i < rs->n_bufs; ++ i += rs->pages_per_contiguous_chunk) { ++ struct efhw_iopages iopages; ++ efhw_iopages_init_from_iopage(&iopages, &rs->bufs[i], ++ rs->chunk_order); ++ efhw_iopages_free(efrm_nic_tablep->a_nic, &iopages); ++ } ++ } ++ ++ /* free the instance number */ ++ id = EFRM_RESOURCE_INSTANCE(rs->rs.rs_handle); ++ EFRM_VERIFY_EQ(kfifo_put(efrm_iobufset_manager->free_ids, ++ (unsigned char *)&id, sizeof(id)), sizeof(id)); ++ ++ efrm_vi_resource_release(rs->evq); ++ if (rs->linked) ++ efrm_iobufset_resource_release(rs->linked); ++ ++ efrm_client_put(rs->rs.rs_client); ++ if (iobsrs_size(rs->n_bufs) < PAGE_SIZE) { ++ EFRM_DO_DEBUG(memset(rs, 0, sizeof(*rs))); ++ kfree(rs); ++ } else { ++ EFRM_DO_DEBUG(memset(rs, 0, sizeof(*rs))); ++ vfree(rs); ++ } ++} ++EXPORT_SYMBOL(efrm_iobufset_resource_free); ++ ++ ++void efrm_iobufset_resource_release(struct iobufset_resource *iobrs) ++{ ++ if (__efrm_resource_release(&iobrs->rs)) ++ efrm_iobufset_resource_free(iobrs); ++} ++EXPORT_SYMBOL(efrm_iobufset_resource_release); ++ ++ ++ ++int ++efrm_iobufset_resource_alloc(int32_t n_pages, ++ int32_t pages_per_contiguous_chunk, ++ struct vi_resource *vi_evq, ++ struct iobufset_resource *linked, ++ bool phys_addr_mode, ++ struct iobufset_resource **iobrs_out) ++{ ++ struct iobufset_resource *iobrs; ++ int rc, instance, object_size; ++ unsigned int i; ++ ++ EFRM_ASSERT(iobrs_out); ++ EFRM_ASSERT(efrm_iobufset_manager); ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_iobufset_manager->rm); ++ EFRM_RESOURCE_ASSERT_VALID(&vi_evq->rs, 0); ++ EFRM_ASSERT(EFRM_RESOURCE_TYPE(vi_evq->rs.rs_handle) == ++ EFRM_RESOURCE_VI); ++ EFRM_ASSERT(efrm_nic_tablep->a_nic); ++ ++ if (linked) { ++ /* This resource will share properties and memory with ++ * another. Only difference is that we'll program it into ++ * the buffer table of another nic. ++ */ ++ n_pages = linked->n_bufs; ++ pages_per_contiguous_chunk = linked->pages_per_contiguous_chunk; ++ phys_addr_mode = linked->buf_tbl_alloc.base == (unsigned) -1; ++ } ++ ++ /* allocate the resource data structure. */ ++ object_size = iobsrs_size(n_pages); ++ if (object_size < PAGE_SIZE) { ++ /* this should be OK from a tasklet */ ++ /* Necessary to do atomic alloc() as this ++ can be called from a weird-ass iSCSI context that is ++ !in_interrupt but is in_atomic - See BUG3163 */ ++ iobrs = kmalloc(object_size, GFP_ATOMIC); ++ } else { /* can't do this within a tasklet */ ++#ifndef NDEBUG ++ if (in_interrupt() || in_atomic()) { ++ EFRM_ERR("%s(): alloc->u.iobufset.in_n_pages=%d", ++ __func__, n_pages); ++ EFRM_ASSERT(!in_interrupt()); ++ EFRM_ASSERT(!in_atomic()); ++ } ++#endif ++ iobrs = (struct iobufset_resource *) vmalloc(object_size); ++ } ++ if (iobrs == NULL) { ++ EFRM_WARN("%s: failed to allocate container", __func__); ++ rc = -ENOMEM; ++ goto fail1; ++ } ++ ++ /* Allocate an instance number. */ ++ rc = kfifo_get(efrm_iobufset_manager->free_ids, ++ (unsigned char *)&instance, sizeof(instance)); ++ if (rc != sizeof(instance)) { ++ EFRM_WARN("%s: out of instances", __func__); ++ EFRM_ASSERT(rc == 0); ++ rc = -EBUSY; ++ goto fail3; ++ } ++ ++ efrm_resource_init(&iobrs->rs, EFRM_RESOURCE_IOBUFSET, instance); ++ ++ iobrs->evq = vi_evq; ++ iobrs->linked = linked; ++ iobrs->n_bufs = n_pages; ++ iobrs->pages_per_contiguous_chunk = pages_per_contiguous_chunk; ++ iobrs->chunk_order = fls(iobrs->pages_per_contiguous_chunk - 1); ++ iobrs->buf_tbl_alloc.base = (unsigned) -1; ++ ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " %u pages", __func__, ++ EFRM_RESOURCE_PRI_ARG(iobrs->rs.rs_handle), iobrs->n_bufs); ++ ++ /* Allocate the iobuffers. */ ++ if (linked) { ++ memcpy(iobrs->bufs, linked->bufs, ++ iobrs->n_bufs * sizeof(iobrs->bufs[0])); ++ } else if (iobrs->chunk_order == 0) { ++ memset(iobrs->bufs, 0, iobrs->n_bufs * sizeof(iobrs->bufs[0])); ++ for (i = 0; i < iobrs->n_bufs; ++i) { ++ /* due to bug2426 we have to specifiy a NIC when ++ * allocating a DMAable page, which is a bit messy. ++ * For now we assume that if the page is suitable ++ * (e.g. DMAable) by one nic (efrm_nic_tablep->a_nic), ++ * it is suitable for all NICs. ++ * XXX I bet that breaks in Solaris. ++ */ ++ rc = efhw_iopage_alloc(efrm_nic_tablep->a_nic, ++ &iobrs->bufs[i]); ++ if (rc < 0) { ++ EFRM_WARN("%s: failed (rc %d) to allocate " ++ "page (i=%u)", __func__, rc, i); ++ goto fail4; ++ } ++ } ++ } else { ++ struct efhw_iopages iopages; ++ unsigned j; ++ ++ memset(iobrs->bufs, 0, iobrs->n_bufs * sizeof(iobrs->bufs[0])); ++ for (i = 0; i < iobrs->n_bufs; ++ i += iobrs->pages_per_contiguous_chunk) { ++ rc = efhw_iopages_alloc(efrm_nic_tablep->a_nic, ++ &iopages, iobrs->chunk_order); ++ if (rc < 0) { ++ EFRM_WARN("%s: failed (rc %d) to allocate " ++ "pages (i=%u order %d)", ++ __func__, rc, i, ++ iobrs->chunk_order); ++ goto fail4; ++ } ++ for (j = 0; j < iobrs->pages_per_contiguous_chunk; ++ j++) { ++ /* some implementation of ++ * efhw_iopage_init_from_iopages() rely on ++ * this function being called for ++ * _all_ pages in the chunk */ ++ efhw_iopage_init_from_iopages( ++ &iobrs->bufs[i + j], ++ &iopages, j); ++ } ++ } ++ } ++ ++ if (!phys_addr_mode) { ++ unsigned owner_id = EFAB_VI_RESOURCE_INSTANCE(iobrs->evq); ++ ++ if (!linked) { ++ /* Allocate space in the NIC's buffer table. */ ++ rc = efrm_buffer_table_alloc(fls(iobrs->n_bufs - 1), ++ &iobrs->buf_tbl_alloc); ++ if (rc < 0) { ++ EFRM_WARN("%s: failed (%d) to alloc %d buffer " ++ "table entries", __func__, rc, ++ iobrs->n_bufs); ++ goto fail5; ++ } ++ EFRM_ASSERT(((unsigned)1 << iobrs->buf_tbl_alloc.order) ++ >= (unsigned) iobrs->n_bufs); ++ } else { ++ iobrs->buf_tbl_alloc = linked->buf_tbl_alloc; ++ } ++ ++ /* Initialise the buffer table entries. */ ++ for (i = 0; i < iobrs->n_bufs; ++i) { ++ /*\ ?? \TODO burst them! */ ++ efrm_buffer_table_set(&iobrs->buf_tbl_alloc, ++ vi_evq->rs.rs_client->nic, ++ i, ++ efhw_iopage_dma_addr(&iobrs-> ++ bufs[i]), ++ owner_id); ++ } ++ efrm_buffer_table_commit(); ++ } ++ ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " %d pages @ " ++ EFHW_BUFFER_ADDR_FMT, __func__, ++ EFRM_RESOURCE_PRI_ARG(iobrs->rs.rs_handle), ++ iobrs->n_bufs, EFHW_BUFFER_ADDR(iobrs->buf_tbl_alloc.base, ++ 0)); ++ efrm_resource_ref(&iobrs->evq->rs); ++ if (linked != NULL) ++ efrm_resource_ref(&linked->rs); ++ efrm_client_add_resource(vi_evq->rs.rs_client, &iobrs->rs); ++ *iobrs_out = iobrs; ++ return 0; ++ ++fail5: ++ i = iobrs->n_bufs; ++fail4: ++ /* see comment on call to efhw_iopage_alloc above for a discussion ++ * on use of efrm_nic_tablep->a_nic here */ ++ if (linked) { ++ /* Nothing to do. */ ++ } else if (iobrs->chunk_order == 0) { ++ while (i--) { ++ struct efhw_iopage *page = &iobrs->bufs[i]; ++ efhw_iopage_free(efrm_nic_tablep->a_nic, page); ++ } ++ } else { ++ unsigned int j; ++ for (j = 0; j < i; j += iobrs->pages_per_contiguous_chunk) { ++ struct efhw_iopages iopages; ++ ++ EFRM_ASSERT(j % iobrs->pages_per_contiguous_chunk ++ == 0); ++ /* it is important that this is executed in increasing ++ * page order because some implementations of ++ * efhw_iopages_init_from_iopage() assume this */ ++ efhw_iopages_init_from_iopage(&iopages, ++ &iobrs->bufs[j], ++ iobrs->chunk_order); ++ efhw_iopages_free(efrm_nic_tablep->a_nic, &iopages); ++ } ++ } ++fail3: ++ if (object_size < PAGE_SIZE) ++ kfree(iobrs); ++ else ++ vfree(iobrs); ++fail1: ++ return rc; ++} ++EXPORT_SYMBOL(efrm_iobufset_resource_alloc); ++ ++static void iobufset_rm_dtor(struct efrm_resource_manager *rm) ++{ ++ EFRM_ASSERT(&efrm_iobufset_manager->rm == rm); ++ kfifo_vfree(efrm_iobufset_manager->free_ids); ++} ++ ++int ++efrm_create_iobufset_resource_manager(struct efrm_resource_manager **rm_out) ++{ ++ int rc, max; ++ ++ EFRM_ASSERT(rm_out); ++ ++ efrm_iobufset_manager = ++ kmalloc(sizeof(*efrm_iobufset_manager), GFP_KERNEL); ++ if (efrm_iobufset_manager == 0) ++ return -ENOMEM; ++ memset(efrm_iobufset_manager, 0, sizeof(*efrm_iobufset_manager)); ++ ++ /* ++ * Bug 1145, 1370: We need to set initial size of both the resource ++ * table and instance id table so they never need to grow as we ++ * want to be allocate new iobufset at tasklet time. Lets make ++ * a pessimistic guess at maximum number of iobufsets possible. ++ * Could be less because ++ * - jumbo frames have same no of packets per iobufset BUT more ++ * pages per buffer ++ * - buffer table entries used independently of iobufsets by ++ * sendfile ++ * ++ * Based on TCP/IP stack setting of PKTS_PER_SET_S=5 ... ++ * - can't use this define here as it breaks the layering. ++ */ ++#define MIN_PAGES_PER_IOBUFSET (1 << 4) ++ ++ max = efrm_buffer_table_size() / MIN_PAGES_PER_IOBUFSET; ++ max = min_t(int, max, EFRM_IOBUFSET_MAX_NUM_INSTANCES); ++ ++ /* HACK: There currently exists an option to allocate buffers that ++ * are not programmed into the buffer table, so the max number is ++ * not limited by the buffer table size. I'm hoping this usage ++ * will go away eventually. ++ */ ++ max = 32768; ++ ++ rc = efrm_kfifo_id_ctor(&efrm_iobufset_manager->free_ids, ++ 0, max, &efrm_iobufset_manager->rm.rm_lock); ++ if (rc != 0) ++ goto fail1; ++ ++ rc = efrm_resource_manager_ctor(&efrm_iobufset_manager->rm, ++ iobufset_rm_dtor, "IOBUFSET", ++ EFRM_RESOURCE_IOBUFSET); ++ if (rc < 0) ++ goto fail2; ++ ++ *rm_out = &efrm_iobufset_manager->rm; ++ return 0; ++ ++fail2: ++ kfifo_vfree(efrm_iobufset_manager->free_ids); ++fail1: ++ EFRM_DO_DEBUG(memset(efrm_iobufset_manager, 0, ++ sizeof(*efrm_iobufset_manager))); ++ kfree(efrm_iobufset_manager); ++ return rc; ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/iopage.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/iopage.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,103 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides Linux-specific implementation for iopage API used ++ * from efhw library. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include "kernel_compat.h" ++#include /* for dma_addr_t */ ++ ++int efhw_iopage_alloc(struct efhw_nic *nic, struct efhw_iopage *p) ++{ ++ struct linux_efhw_nic *lnic = linux_efhw_nic(nic); ++ dma_addr_t handle; ++ void *kva; ++ ++ kva = efrm_pci_alloc_consistent(lnic->pci_dev, PAGE_SIZE, ++ &handle); ++ if (kva == 0) ++ return -ENOMEM; ++ ++ EFHW_ASSERT((handle & ~PAGE_MASK) == 0); ++ ++ memset((void *)kva, 0, PAGE_SIZE); ++ efhw_page_init_from_va(&p->p, kva); ++ ++ p->dma_addr = handle; ++ ++ return 0; ++} ++ ++void efhw_iopage_free(struct efhw_nic *nic, struct efhw_iopage *p) ++{ ++ struct linux_efhw_nic *lnic = linux_efhw_nic(nic); ++ EFHW_ASSERT(efhw_page_is_valid(&p->p)); ++ ++ efrm_pci_free_consistent(lnic->pci_dev, PAGE_SIZE, ++ efhw_iopage_ptr(p), p->dma_addr); ++} ++ ++int ++efhw_iopages_alloc(struct efhw_nic *nic, struct efhw_iopages *p, ++ unsigned order) ++{ ++ unsigned bytes = 1u << (order + PAGE_SHIFT); ++ struct linux_efhw_nic *lnic = linux_efhw_nic(nic); ++ dma_addr_t handle; ++ caddr_t addr; ++ int gfp_flag; ++ ++ /* Set __GFP_COMP if available to make reference counting work. ++ * This is recommended here: ++ * http://www.forbiddenweb.org/viewtopic.php?id=83167&page=4#348331 ++ */ ++ gfp_flag = ((in_atomic() ? GFP_ATOMIC : GFP_KERNEL) | __GFP_COMP); ++ addr = efrm_dma_alloc_coherent(&lnic->pci_dev->dev, bytes, &handle, ++ gfp_flag); ++ if (addr == NULL) ++ return -ENOMEM; ++ ++ EFHW_ASSERT((handle & ~PAGE_MASK) == 0); ++ ++ p->order = order; ++ p->dma_addr = handle; ++ p->kva = addr; ++ ++ return 0; ++} ++ ++void efhw_iopages_free(struct efhw_nic *nic, struct efhw_iopages *p) ++{ ++ unsigned bytes = 1u << (p->order + PAGE_SHIFT); ++ struct linux_efhw_nic *lnic = linux_efhw_nic(nic); ++ ++ efrm_dma_free_coherent(&lnic->pci_dev->dev, bytes, ++ (void *)p->kva, p->dma_addr); ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/kernel_compat.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/kernel_compat.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,118 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file provides compatibility layer for various Linux kernel versions ++ * (starting from 2.6.9 RHEL kernel). ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#define IN_KERNEL_COMPAT_C ++#include ++#include ++#include "kernel_compat.h" ++ ++/* Set this to 1 to enable very basic counting of iopage(s) allocations, then ++ * call dump_iopage_counts() to show the number of current allocations of ++ * orders 0-7. ++ */ ++#define EFRM_IOPAGE_COUNTS_ENABLED 0 ++ ++ ++/**************************************************************************** ++ * ++ * allocate a buffer suitable for DMA to/from the NIC ++ * ++ ****************************************************************************/ ++ ++#if EFRM_IOPAGE_COUNTS_ENABLED ++ ++static int iopage_counts[8]; ++ ++void dump_iopage_counts(void) ++{ ++ EFRM_NOTICE("iopage counts: %d %d %d %d %d %d %d %d", iopage_counts[0], ++ iopage_counts[1], iopage_counts[2], iopage_counts[3], ++ iopage_counts[4], iopage_counts[5], iopage_counts[6], ++ iopage_counts[7]); ++} ++ ++#endif ++ ++ ++ ++/*********** pci_alloc_consistent / pci_free_consistent ***********/ ++ ++void *efrm_dma_alloc_coherent(struct device *dev, size_t size, ++ dma_addr_t *dma_addr, int flag) ++{ ++ void *ptr; ++ unsigned order; ++ ++ order = __ffs(size/PAGE_SIZE); ++ EFRM_ASSERT(size == (PAGE_SIZE< ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef DRIVER_LINUX_RESOURCE_KERNEL_COMPAT_H ++#define DRIVER_LINUX_RESOURCE_KERNEL_COMPAT_H ++ ++#include ++#include ++#include ++#include ++ ++/********* pci_map_*() ********************/ ++ ++extern void *efrm_dma_alloc_coherent(struct device *dev, size_t size, ++ dma_addr_t *dma_addr, int flag); ++ ++extern void efrm_dma_free_coherent(struct device *dev, size_t size, ++ void *ptr, dma_addr_t dma_addr); ++ ++static inline void *efrm_pci_alloc_consistent(struct pci_dev *hwdev, ++ size_t size, ++ dma_addr_t *dma_addr) ++{ ++ return efrm_dma_alloc_coherent(&hwdev->dev, size, dma_addr, ++ GFP_ATOMIC); ++} ++ ++static inline void efrm_pci_free_consistent(struct pci_dev *hwdev, size_t size, ++ void *ptr, dma_addr_t dma_addr) ++{ ++ efrm_dma_free_coherent(&hwdev->dev, size, ptr, dma_addr); ++} ++ ++ ++#endif /* DRIVER_LINUX_RESOURCE_KERNEL_COMPAT_H */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/kernel_proc.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/kernel_proc.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,109 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains /proc/driver/sfc_resource/ implementation. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++ ++/** Top level directory for sfc specific stats **/ ++static struct proc_dir_entry *efrm_proc_root; /* = NULL */ ++ ++static int ++efrm_resource_read_proc(char *buf, char **start, off_t offset, int count, ++ int *eof, void *data); ++ ++int efrm_install_proc_entries(void) ++{ ++ /* create the top-level directory for etherfabric specific stuff */ ++ efrm_proc_root = proc_mkdir("driver/sfc_resource", NULL); ++ if (!efrm_proc_root) ++ return -ENOMEM; ++ ++ if (create_proc_read_entry("resources", 0, efrm_proc_root, ++ efrm_resource_read_proc, 0) == NULL) { ++ EFRM_WARN("%s: Unable to create /proc/drivers/sfc_resource/" ++ "resources", __func__); ++ } ++ return 0; ++} ++ ++void efrm_uninstall_proc_entries(void) ++{ ++ EFRM_ASSERT(efrm_proc_root); ++ remove_proc_entry("resources", efrm_proc_root); ++ remove_proc_entry(efrm_proc_root->name, efrm_proc_root->parent); ++ efrm_proc_root = NULL; ++} ++ ++/**************************************************************************** ++ * ++ * /proc/drivers/sfc/resources ++ * ++ ****************************************************************************/ ++ ++#define EFRM_PROC_PRINTF(buf, len, fmt, ...) \ ++ do { \ ++ if (count - len > 0) \ ++ len += snprintf(buf+len, count-len, (fmt), \ ++ __VA_ARGS__); \ ++ } while (0) ++ ++static int ++efrm_resource_read_proc(char *buf, char **start, off_t offset, int count, ++ int *eof, void *data) ++{ ++ irq_flags_t lock_flags; ++ int len = 0; ++ int type; ++ struct efrm_resource_manager *rm; ++ ++ for (type = 0; type < EFRM_RESOURCE_NUM; type++) { ++ rm = efrm_rm_table[type]; ++ if (rm == NULL) ++ continue; ++ ++ EFRM_PROC_PRINTF(buf, len, "*** %s ***\n", rm->rm_name); ++ ++ spin_lock_irqsave(&rm->rm_lock, lock_flags); ++ EFRM_PROC_PRINTF(buf, len, "current = %u\n", rm->rm_resources); ++ EFRM_PROC_PRINTF(buf, len, " max = %u\n\n", ++ rm->rm_resources_hiwat); ++ spin_unlock_irqrestore(&rm->rm_lock, lock_flags); ++ } ++ ++ return count ? strlen(buf) : 0; ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/kfifo.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/kfifo.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,208 @@ ++/* ++ * A simple kernel FIFO implementation. ++ * ++ * Copyright (C) 2004 Stelian Pop ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++/* ++ * This file is stolen from the Linux kernel sources ++ * (linux-2.6.22/kernel/kfifo.c) into sfc_resource driver. ++ * It should be used for old kernels without kfifo implementation. ++ * Most part of linux/kfifo.h is incorporated into ++ * ci/efrm/sysdep_linux.h. ++ */ ++#include ++#ifdef HAS_NO_KFIFO ++ ++#include ++#include ++#include ++#include ++/*#include */ ++ ++/** ++ * kfifo_init - allocates a new FIFO using a preallocated buffer ++ * @buffer: the preallocated buffer to be used. ++ * @size: the size of the internal buffer, this have to be a power of 2. ++ * @gfp_mask: get_free_pages mask, passed to kmalloc() ++ * @lock: the lock to be used to protect the fifo buffer ++ * ++ * Do NOT pass the kfifo to kfifo_free() after use! Simply free the ++ * &struct kfifo with kfree(). ++ */ ++struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, ++ gfp_t gfp_mask, spinlock_t *lock) ++{ ++ struct kfifo *fifo; ++ ++ /* size must be a power of 2 */ ++ BUG_ON(size & (size - 1)); ++ ++ fifo = kmalloc(sizeof(struct kfifo), gfp_mask); ++ if (!fifo) ++ return ERR_PTR(-ENOMEM); ++ ++ fifo->buffer = buffer; ++ fifo->size = size; ++ fifo->in = fifo->out = 0; ++ fifo->lock = lock; ++ ++ return fifo; ++} ++EXPORT_SYMBOL(kfifo_init); ++ ++/** ++ * kfifo_alloc - allocates a new FIFO and its internal buffer ++ * @size: the size of the internal buffer to be allocated. ++ * @gfp_mask: get_free_pages mask, passed to kmalloc() ++ * @lock: the lock to be used to protect the fifo buffer ++ * ++ * The size will be rounded-up to a power of 2. ++ */ ++struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) ++{ ++ unsigned char *buffer; ++ struct kfifo *ret; ++ ++ /* ++ * round up to the next power of 2, since our 'let the indices ++ * wrap' tachnique works only in this case. ++ */ ++ if (size & (size - 1)) { ++ BUG_ON(size > 0x80000000); ++ size = roundup_pow_of_two(size); ++ } ++ ++ buffer = kmalloc(size, gfp_mask); ++ if (!buffer) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = kfifo_init(buffer, size, gfp_mask, lock); ++ ++ if (IS_ERR(ret)) ++ kfree(buffer); ++ ++ return ret; ++} ++EXPORT_SYMBOL(kfifo_alloc); ++ ++/** ++ * kfifo_free - frees the FIFO ++ * @fifo: the fifo to be freed. ++ */ ++void kfifo_free(struct kfifo *fifo) ++{ ++ kfree(fifo->buffer); ++ kfree(fifo); ++} ++EXPORT_SYMBOL(kfifo_free); ++ ++/** ++ * __kfifo_put - puts some data into the FIFO, no locking version ++ * @fifo: the fifo to be used. ++ * @buffer: the data to be added. ++ * @len: the length of the data to be added. ++ * ++ * This function copies at most @len bytes from the @buffer into ++ * the FIFO depending on the free space, and returns the number of ++ * bytes copied. ++ * ++ * Note that with only one concurrent reader and one concurrent ++ * writer, you don't need extra locking to use these functions. ++ */ ++unsigned int ++__kfifo_put(struct kfifo *fifo, unsigned char *buffer, unsigned int len) ++{ ++ unsigned int l; ++ ++ len = min(len, fifo->size - fifo->in + fifo->out); ++ ++ /* ++ * Ensure that we sample the fifo->out index -before- we ++ * start putting bytes into the kfifo. ++ */ ++ ++ smp_mb(); ++ ++ /* first put the data starting from fifo->in to buffer end */ ++ l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); ++ memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); ++ ++ /* then put the rest (if any) at the beginning of the buffer */ ++ memcpy(fifo->buffer, buffer + l, len - l); ++ ++ /* ++ * Ensure that we add the bytes to the kfifo -before- ++ * we update the fifo->in index. ++ */ ++ ++ smp_wmb(); ++ ++ fifo->in += len; ++ ++ return len; ++} ++EXPORT_SYMBOL(__kfifo_put); ++ ++/** ++ * __kfifo_get - gets some data from the FIFO, no locking version ++ * @fifo: the fifo to be used. ++ * @buffer: where the data must be copied. ++ * @len: the size of the destination buffer. ++ * ++ * This function copies at most @len bytes from the FIFO into the ++ * @buffer and returns the number of copied bytes. ++ * ++ * Note that with only one concurrent reader and one concurrent ++ * writer, you don't need extra locking to use these functions. ++ */ ++unsigned int ++__kfifo_get(struct kfifo *fifo, unsigned char *buffer, unsigned int len) ++{ ++ unsigned int l; ++ ++ len = min(len, fifo->in - fifo->out); ++ ++ /* ++ * Ensure that we sample the fifo->in index -before- we ++ * start removing bytes from the kfifo. ++ */ ++ ++ smp_rmb(); ++ ++ /* first get the data from fifo->out until the end of the buffer */ ++ l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); ++ memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); ++ ++ /* then get the rest (if any) from the beginning of the buffer */ ++ memcpy(buffer + l, fifo->buffer, len - l); ++ ++ /* ++ * Ensure that we remove the bytes from the kfifo -before- ++ * we update the fifo->out index. ++ */ ++ ++ smp_mb(); ++ ++ fifo->out += len; ++ ++ return len; ++} ++EXPORT_SYMBOL(__kfifo_get); ++ ++#endif +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/linux_resource_internal.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/linux_resource_internal.h 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,76 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains Linux-specific API internal for the resource driver. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#ifndef __LINUX_RESOURCE_INTERNAL__ ++#define __LINUX_RESOURCE_INTERNAL__ ++ ++#include ++#include ++#include ++#include ++ ++ ++/*! Linux specific EtherFabric initialisation */ ++extern int ++linux_efrm_nic_ctor(struct linux_efhw_nic *, struct pci_dev *, ++ spinlock_t *reg_lock, ++ unsigned nic_flags, unsigned nic_options); ++ ++/*! Linux specific EtherFabric initialisation */ ++extern void linux_efrm_nic_dtor(struct linux_efhw_nic *); ++ ++/*! Linux specific EtherFabric initialisation -- interrupt registration */ ++extern int linux_efrm_irq_ctor(struct linux_efhw_nic *); ++ ++/*! Linux specific EtherFabric initialisation -- interrupt deregistration */ ++extern void linux_efrm_irq_dtor(struct linux_efhw_nic *); ++ ++extern int efrm_driverlink_register(void); ++extern void efrm_driverlink_unregister(void); ++ ++extern int ++efrm_nic_add(struct pci_dev *dev, unsigned int opts, const uint8_t *mac_addr, ++ struct linux_efhw_nic **lnic_out, spinlock_t *reg_lock, ++ int bt_min, int bt_max, int non_irq_evq, ++ const struct vi_resource_dimensions *); ++extern void efrm_nic_del(struct linux_efhw_nic *); ++ ++ ++extern int efrm_install_proc_entries(void); ++extern void efrm_uninstall_proc_entries(void); ++ ++#endif /* __LINUX_RESOURCE_INTERNAL__ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/nic.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/nic.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,174 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains EtherFabric Generic NIC instance (init, interrupts, ++ * etc) ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++ ++int efhw_device_type_init(struct efhw_device_type *dt, ++ int vendor_id, int device_id, ++ int class_revision) ++{ ++ if (vendor_id != 0x1924) ++ return 0; ++ ++ switch (device_id) { ++ case 0x0703: ++ case 0x6703: ++ dt->variant = 'A'; ++ switch (class_revision) { ++ case 0: ++ dt->revision = 0; ++ break; ++ case 1: ++ dt->revision = 1; ++ break; ++ default: ++ return 0; ++ } ++ break; ++ case 0x0710: ++ dt->variant = 'B'; ++ switch (class_revision) { ++ case 2: ++ dt->revision = 0; ++ break; ++ default: ++ return 0; ++ } ++ break; ++ default: ++ return 0; ++ } ++ ++ return 1; ++} ++ ++ ++/*-------------------------------------------------------------------- ++ * ++ * NIC Initialisation ++ * ++ *--------------------------------------------------------------------*/ ++ ++/* make this separate from initialising data structure ++** to allow this to be called at a later time once we can access PCI ++** config space to find out what hardware we have ++*/ ++void efhw_nic_init(struct efhw_nic *nic, unsigned flags, unsigned options, ++ struct efhw_device_type dev_type) ++{ ++ nic->devtype = dev_type; ++ nic->flags = flags; ++ nic->options = options; ++ nic->bar_ioaddr = 0; ++ spin_lock_init(&nic->the_reg_lock); ++ nic->reg_lock = &nic->the_reg_lock; ++ nic->mtu = 1500 + ETH_HLEN; ++ ++ nic->irq_unit = EFHW_IRQ_UNIT_UNUSED; ++ ++ nic->evq_sizes = 512 | 1024 | 2048 | 4096 | 8192 | ++ 16384 | 32768; ++ nic->txq_sizes = 512 | 1024 | 2048 | 4096; ++ nic->rxq_sizes = 512 | 1024 | 2048 | 4096; ++ nic->efhw_func = &falcon_char_functional_units; ++ nic->ctr_ap_bytes = EFHW_64M; ++ switch (nic->devtype.variant) { ++ case 'A': ++ nic->ctr_ap_bar = FALCON_S_CTR_AP_BAR; ++ nic->num_evqs = 4096; ++ nic->num_dmaqs = 4096; ++ nic->num_timers = 4096; ++ break; ++ case 'B': ++ nic->flags |= NIC_FLAG_NO_INTERRUPT; ++ nic->ctr_ap_bar = FALCON_P_CTR_AP_BAR; ++ nic->num_evqs = 4096; ++ nic->num_dmaqs = 4096; ++ nic->num_timers = 4096; ++ break; ++ default: ++ EFHW_ASSERT(0); ++ break; ++ } ++} ++ ++ ++void efhw_nic_close_interrupts(struct efhw_nic *nic) ++{ ++ EFHW_ASSERT(nic); ++ if (!efhw_nic_have_hw(nic)) ++ return; ++ ++ EFHW_ASSERT(efhw_nic_have_hw(nic)); ++ ++ if (nic->irq_unit != EFHW_IRQ_UNIT_UNUSED) ++ efhw_nic_interrupt_disable(nic); ++} ++ ++void efhw_nic_dtor(struct efhw_nic *nic) ++{ ++ EFHW_ASSERT(nic); ++ ++ /* Check that we have functional units because the software only ++ * driver doesn't initialise anything hardware related any more */ ++ ++ /* close interrupts is called first because the act of deregistering ++ the driver could cause this driver to change from master to slave ++ and hence the implicit interrupt mappings would be wrong */ ++ ++ EFHW_TRACE("%s: functional units ... ", __func__); ++ ++ if (efhw_nic_have_functional_units(nic)) { ++ efhw_nic_close_interrupts(nic); ++ efhw_nic_close_hardware(nic); ++ } ++ EFHW_TRACE("%s: functional units ... done", __func__); ++ ++ /* destroy event queues */ ++ EFHW_TRACE("%s: event queues ... ", __func__); ++ ++ if (nic->interrupting_evq.evq_mask) ++ efhw_keventq_dtor(nic, &nic->interrupting_evq); ++ if (nic->non_interrupting_evq.evq_mask) ++ efhw_keventq_dtor(nic, &nic->non_interrupting_evq); ++ ++ EFHW_TRACE("%s: event queues ... done", __func__); ++ ++ spin_lock_destroy(&nic->the_reg_lock); ++ ++ EFHW_TRACE("%s: DONE", __func__); ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/resource_driver.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/resource_driver.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,600 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains main driver entry points. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include "linux_resource_internal.h" ++#include "kernel_compat.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_AUTHOR("Solarflare Communications"); ++MODULE_LICENSE("GPL"); ++ ++static struct efhw_ev_handler ev_handler = { ++ .wakeup_fn = efrm_handle_wakeup_event, ++ .timeout_fn = efrm_handle_timeout_event, ++ .dmaq_flushed_fn = efrm_handle_dmaq_flushed, ++}; ++ ++const int max_hardware_init_repeats = 10; ++ ++/*-------------------------------------------------------------------- ++ * ++ * Module load time variables ++ * ++ *--------------------------------------------------------------------*/ ++/* See docs/notes/pci_alloc_consistent */ ++static int do_irq = 1; /* enable interrupts */ ++ ++#if defined(CONFIG_X86_XEN) ++static int irq_moderation = 60; /* interrupt moderation (60 usec) */ ++#else ++static int irq_moderation = 20; /* interrupt moderation (20 usec) */ ++#endif ++static int nic_options = NIC_OPT_DEFAULT; ++int efx_vi_eventq_size = EFX_VI_EVENTQ_SIZE_DEFAULT; ++ ++module_param(do_irq, int, S_IRUGO); ++MODULE_PARM_DESC(do_irq, "Enable interrupts. " ++ "Do not turn it off unless you know what are you doing."); ++module_param(irq_moderation, int, S_IRUGO); ++MODULE_PARM_DESC(irq_moderation, "IRQ moderation in usec"); ++module_param(nic_options, int, S_IRUGO); ++MODULE_PARM_DESC(nic_options, "Nic options -- see efhw_types.h"); ++module_param(efx_vi_eventq_size, int, S_IRUGO); ++MODULE_PARM_DESC(efx_vi_eventq_size, ++ "Size of event queue allocated by efx_vi library"); ++ ++/*-------------------------------------------------------------------- ++ * ++ * Linux specific NIC initialisation ++ * ++ *--------------------------------------------------------------------*/ ++ ++static inline irqreturn_t ++linux_efrm_interrupt(int irr, void *dev_id) ++{ ++ return efhw_nic_interrupt((struct efhw_nic *)dev_id); ++} ++ ++int linux_efrm_irq_ctor(struct linux_efhw_nic *lnic) ++{ ++ struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; ++ ++ nic->flags &= ~NIC_FLAG_MSI; ++ if (nic->flags & NIC_FLAG_TRY_MSI) { ++ int rc = pci_enable_msi(lnic->pci_dev); ++ if (rc < 0) { ++ EFRM_WARN("%s: Could not enable MSI (%d)", ++ __func__, rc); ++ EFRM_WARN("%s: Continuing with legacy interrupt mode", ++ __func__); ++ } else { ++ EFRM_NOTICE("%s: MSI enabled", __func__); ++ nic->flags |= NIC_FLAG_MSI; ++ } ++ } ++ ++ if (request_irq(lnic->pci_dev->irq, linux_efrm_interrupt, ++ IRQF_SHARED, "sfc_resource", nic)) { ++ EFRM_ERR("Request for interrupt #%d failed", ++ lnic->pci_dev->irq); ++ nic->flags &= ~NIC_FLAG_OS_IRQ_EN; ++ return -EBUSY; ++ } ++ nic->flags |= NIC_FLAG_OS_IRQ_EN; ++ ++ return 0; ++} ++ ++void linux_efrm_irq_dtor(struct linux_efhw_nic *lnic) ++{ ++ EFRM_TRACE("%s: start", __func__); ++ ++ if (lnic->efrm_nic.efhw_nic.flags & NIC_FLAG_OS_IRQ_EN) { ++ free_irq(lnic->pci_dev->irq, &lnic->efrm_nic.efhw_nic); ++ lnic->efrm_nic.efhw_nic.flags &= ~NIC_FLAG_OS_IRQ_EN; ++ } ++ ++ if (lnic->efrm_nic.efhw_nic.flags & NIC_FLAG_MSI) { ++ pci_disable_msi(lnic->pci_dev); ++ lnic->efrm_nic.efhw_nic.flags &= ~NIC_FLAG_MSI; ++ } ++ ++ EFRM_TRACE("%s: done", __func__); ++} ++ ++/* Allocate buffer table entries for a particular NIC. ++ */ ++static int efrm_nic_buffer_table_alloc(struct efhw_nic *nic) ++{ ++ int capacity; ++ int page_order; ++ int rc; ++ ++ /* Choose queue size. */ ++ for (capacity = 8192; capacity <= nic->evq_sizes; capacity <<= 1) { ++ if (capacity > nic->evq_sizes) { ++ EFRM_ERR ++ ("%s: Unable to choose EVQ size (supported=%x)", ++ __func__, nic->evq_sizes); ++ return -E2BIG; ++ } else if (capacity & nic->evq_sizes) ++ break; ++ } ++ ++ nic->interrupting_evq.hw.capacity = capacity; ++ nic->interrupting_evq.hw.buf_tbl_alloc.base = (unsigned)-1; ++ ++ nic->non_interrupting_evq.hw.capacity = capacity; ++ nic->non_interrupting_evq.hw.buf_tbl_alloc.base = (unsigned)-1; ++ ++ /* allocate buffer table entries to map onto the iobuffer */ ++ page_order = get_order(capacity * sizeof(efhw_event_t)); ++ if (!(nic->flags & NIC_FLAG_NO_INTERRUPT)) { ++ rc = efrm_buffer_table_alloc(page_order, ++ &nic->interrupting_evq ++ .hw.buf_tbl_alloc); ++ if (rc < 0) { ++ EFRM_WARN ++ ("%s: failed (%d) to alloc %d buffer table entries", ++ __func__, rc, page_order); ++ return rc; ++ } ++ } ++ rc = efrm_buffer_table_alloc(page_order, ++ &nic->non_interrupting_evq.hw. ++ buf_tbl_alloc); ++ if (rc < 0) { ++ EFRM_WARN ++ ("%s: failed (%d) to alloc %d buffer table entries", ++ __func__, rc, page_order); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++/* Free buffer table entries allocated for a particular NIC. ++ */ ++static void efrm_nic_buffer_table_free(struct efhw_nic *nic) ++{ ++ if (nic->interrupting_evq.hw.buf_tbl_alloc.base != (unsigned)-1) ++ efrm_buffer_table_free(&nic->interrupting_evq.hw ++ .buf_tbl_alloc); ++ if (nic->non_interrupting_evq.hw.buf_tbl_alloc.base != (unsigned)-1) ++ efrm_buffer_table_free(&nic->non_interrupting_evq ++ .hw.buf_tbl_alloc); ++} ++ ++static int iomap_bar(struct linux_efhw_nic *lnic, size_t len) ++{ ++ volatile char __iomem *ioaddr; ++ ++ ioaddr = ioremap_nocache(lnic->ctr_ap_pci_addr, len); ++ if (ioaddr == 0) ++ return -ENOMEM; ++ ++ lnic->efrm_nic.efhw_nic.bar_ioaddr = ioaddr; ++ return 0; ++} ++ ++static int linux_efhw_nic_map_ctr_ap(struct linux_efhw_nic *lnic) ++{ ++ struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; ++ int rc; ++ ++ rc = iomap_bar(lnic, nic->ctr_ap_bytes); ++ ++ /* Bug 5195: workaround for now. */ ++ if (rc != 0 && nic->ctr_ap_bytes > 16 * 1024 * 1024) { ++ /* Try half the size for now. */ ++ nic->ctr_ap_bytes /= 2; ++ EFRM_WARN("Bug 5195 WORKAROUND: retrying iomap of %d bytes", ++ nic->ctr_ap_bytes); ++ rc = iomap_bar(lnic, nic->ctr_ap_bytes); ++ } ++ ++ if (rc < 0) { ++ EFRM_ERR("Failed (%d) to map bar (%d bytes)", ++ rc, nic->ctr_ap_bytes); ++ return rc; ++ } ++ ++ return rc; ++} ++ ++int ++linux_efrm_nic_ctor(struct linux_efhw_nic *lnic, struct pci_dev *dev, ++ spinlock_t *reg_lock, ++ unsigned nic_flags, unsigned nic_options) ++{ ++ struct efhw_device_type dev_type; ++ struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; ++ u8 class_revision; ++ int rc; ++ ++ rc = pci_read_config_byte(dev, PCI_CLASS_REVISION, &class_revision); ++ if (rc != 0) { ++ EFRM_ERR("%s: pci_read_config_byte failed (%d)", ++ __func__, rc); ++ return rc; ++ } ++ ++ if (!efhw_device_type_init(&dev_type, dev->vendor, dev->device, ++ class_revision)) { ++ EFRM_ERR("%s: efhw_device_type_init failed %04x:%04x(%d)", ++ __func__, (unsigned) dev->vendor, ++ (unsigned) dev->device, (int) class_revision); ++ return -ENODEV; ++ } ++ ++ EFRM_NOTICE("attaching device type %04x:%04x %d:%c%d", ++ (unsigned) dev->vendor, (unsigned) dev->device, ++ dev_type.arch, dev_type.variant, dev_type.revision); ++ ++ /* Initialise the adapter-structure. */ ++ efhw_nic_init(nic, nic_flags, nic_options, dev_type); ++ lnic->pci_dev = dev; ++ ++ rc = pci_enable_device(dev); ++ if (rc < 0) { ++ EFRM_ERR("%s: pci_enable_device failed (%d)", ++ __func__, rc); ++ return rc; ++ } ++ ++ lnic->ctr_ap_pci_addr = pci_resource_start(dev, nic->ctr_ap_bar); ++ ++ if (!pci_dma_supported(dev, (dma_addr_t)EFHW_DMA_ADDRMASK)) { ++ EFRM_ERR("%s: pci_dma_supported(%lx) failed", __func__, ++ (unsigned long)EFHW_DMA_ADDRMASK); ++ return -ENODEV; ++ } ++ ++ if (pci_set_dma_mask(dev, (dma_addr_t)EFHW_DMA_ADDRMASK)) { ++ EFRM_ERR("%s: pci_set_dma_mask(%lx) failed", __func__, ++ (unsigned long)EFHW_DMA_ADDRMASK); ++ return -ENODEV; ++ } ++ ++ if (pci_set_consistent_dma_mask(dev, (dma_addr_t)EFHW_DMA_ADDRMASK)) { ++ EFRM_ERR("%s: pci_set_consistent_dma_mask(%lx) failed", ++ __func__, (unsigned long)EFHW_DMA_ADDRMASK); ++ return -ENODEV; ++ } ++ ++ rc = linux_efhw_nic_map_ctr_ap(lnic); ++ if (rc < 0) ++ return rc; ++ ++ /* By default struct efhw_nic contains its own lock for protecting ++ * access to nic registers. We override it with a pointer to the ++ * lock in the net driver. This is needed when resource and net ++ * drivers share a single PCI function (falcon B series). ++ */ ++ nic->reg_lock = reg_lock; ++ return 0; ++} ++ ++void linux_efrm_nic_dtor(struct linux_efhw_nic *lnic) ++{ ++ struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; ++ volatile char __iomem *bar_ioaddr = nic->bar_ioaddr; ++ ++ efhw_nic_dtor(nic); ++ ++ /* Unmap the bar. */ ++ EFRM_ASSERT(bar_ioaddr); ++ iounmap(bar_ioaddr); ++ nic->bar_ioaddr = 0; ++} ++ ++/**************************************************************************** ++ * ++ * efrm_tasklet - used to poll the eventq which may result in further callbacks ++ * ++ ****************************************************************************/ ++ ++static void efrm_tasklet(unsigned long pdev) ++{ ++ struct efhw_nic *nic = (struct efhw_nic *)pdev; ++ ++ EFRM_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); ++ ++ efhw_keventq_poll(nic, &nic->interrupting_evq); ++ EFRM_TRACE("%s: complete", __func__); ++} ++ ++/**************************************************************************** ++ * ++ * char driver specific interrupt callbacks -- run at hard IRQL ++ * ++ ****************************************************************************/ ++static void efrm_handle_eventq_irq(struct efhw_nic *nic, int evq) ++{ ++ /* NB. The interrupt must have already been acked (for legacy mode). */ ++ ++ EFRM_TRACE("%s: starting tasklet", __func__); ++ EFRM_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); ++ ++ tasklet_schedule(&linux_efhw_nic(nic)->tasklet); ++} ++ ++/* A count of how many NICs this driver knows about. */ ++static int n_nics_probed; ++ ++/**************************************************************************** ++ * ++ * efrm_nic_add: add the NIC to the resource driver ++ * ++ * NOTE: the flow of control through this routine is quite subtle ++ * because of the number of operations that can fail. We therefore ++ * take the apporaching of keeping the return code (rc) variable ++ * accurate, and only do operations while it is non-negative. Tear down ++ * is done at the end if rc is negative, depending on what has been set up ++ * by that point. ++ * ++ * So basically just make sure that any code you add checks rc>=0 before ++ * doing any work and you'll be fine. ++ * ++ ****************************************************************************/ ++int ++efrm_nic_add(struct pci_dev *dev, unsigned flags, const uint8_t *mac_addr, ++ struct linux_efhw_nic **lnic_out, spinlock_t *reg_lock, ++ int bt_min, int bt_lim, int non_irq_evq, ++ const struct vi_resource_dimensions *res_dim) ++{ ++ struct linux_efhw_nic *lnic = NULL; ++ struct efhw_nic *nic = NULL; ++ int count = 0, rc = 0, resources_init = 0; ++ int constructed = 0; ++ int registered_nic = 0; ++ int buffers_allocated = 0; ++ static unsigned nic_index; /* = 0; */ ++ ++ EFRM_TRACE("%s: device detected (Slot '%s', IRQ %d)", __func__, ++ pci_name(dev) ? pci_name(dev) : "?", dev->irq); ++ ++ /* Ensure that we have room for the new adapter-structure. */ ++ if (efrm_nic_tablep->nic_count == EFHW_MAX_NR_DEVS) { ++ EFRM_WARN("%s: WARNING: too many devices", __func__); ++ rc = -ENOMEM; ++ goto failed; ++ } ++ ++ if (n_nics_probed == 0) { ++ rc = efrm_resources_init(res_dim, bt_min, bt_lim); ++ if (rc != 0) ++ goto failed; ++ resources_init = 1; ++ } ++ ++ /* Allocate memory for the new adapter-structure. */ ++ lnic = kmalloc(sizeof(*lnic), GFP_KERNEL); ++ if (lnic == NULL) { ++ EFRM_ERR("%s: ERROR: failed to allocate memory", __func__); ++ rc = -ENOMEM; ++ goto failed; ++ } ++ memset(lnic, 0, sizeof(*lnic)); ++ nic = &lnic->efrm_nic.efhw_nic; ++ ++ lnic->ev_handlers = &ev_handler; ++ ++ /* OS specific hardware mappings */ ++ rc = linux_efrm_nic_ctor(lnic, dev, reg_lock, flags, nic_options); ++ if (rc < 0) { ++ EFRM_ERR("%s: ERROR: initialisation failed", __func__); ++ goto failed; ++ } ++ ++ constructed = 1; ++ ++ /* Tell the driver about the NIC - this needs to be done before the ++ resources managers get created below. Note we haven't initialised ++ the hardware yet, and I don't like doing this before the perhaps ++ unreliable hardware initialisation. However, there's quite a lot ++ of code to review if we wanted to hardware init before bringing ++ up the resource managers. */ ++ rc = efrm_driver_register_nic(&lnic->efrm_nic, nic_index, ++ /* TODO: ifindex */ nic_index); ++ if (rc < 0) { ++ EFRM_ERR("%s: cannot register nic %d with nic error code %d", ++ __func__, efrm_nic_tablep->nic_count, rc); ++ goto failed; ++ } ++ ++nic_index; ++ registered_nic = 1; ++ ++ rc = efrm_nic_buffer_table_alloc(nic); ++ if (rc < 0) ++ goto failed; ++ buffers_allocated = 1; ++ ++ /****************************************************/ ++ /* hardware bringup */ ++ /****************************************************/ ++ /* Detecting hardware can be a slightly unreliable process; ++ we want to make sure that we maximise our chances, so we ++ loop a few times until all is good. */ ++ for (count = 0; count < max_hardware_init_repeats; count++) { ++ rc = efhw_nic_init_hardware(nic, &ev_handler, mac_addr, ++ non_irq_evq); ++ if (rc >= 0) ++ break; ++ ++ /* pain */ ++ EFRM_ERR ++ ("error - hardware initialisation failed code %d, " ++ "attempt %d of %d", rc, count + 1, ++ max_hardware_init_repeats); ++ } ++ if (rc < 0) ++ goto failed; ++ ++ tasklet_init(&lnic->tasklet, efrm_tasklet, (ulong)nic); ++ ++ /* set up interrupt handlers (hard-irq) */ ++ nic->irq_handler = &efrm_handle_eventq_irq; ++ ++ /* this device can now take management interrupts */ ++ if (do_irq && !(nic->flags & NIC_FLAG_NO_INTERRUPT)) { ++ rc = linux_efrm_irq_ctor(lnic); ++ if (rc < 0) { ++ EFRM_ERR("Interrupt initialisation failed (%d)", rc); ++ goto failed; ++ } ++ efhw_nic_set_interrupt_moderation(nic, -1, irq_moderation); ++ efhw_nic_interrupt_enable(nic); ++ } ++ EFRM_TRACE("interrupts are %sregistered", do_irq ? "" : "not "); ++ ++ *lnic_out = lnic; ++ EFRM_ASSERT(rc == 0); ++ ++n_nics_probed; ++ return 0; ++ ++failed: ++ if (buffers_allocated) ++ efrm_nic_buffer_table_free(nic); ++ if (registered_nic) ++ efrm_driver_unregister_nic(&lnic->efrm_nic); ++ if (constructed) ++ linux_efrm_nic_dtor(lnic); ++ kfree(lnic); /* safe in any case */ ++ if (resources_init) ++ efrm_resources_fini(); ++ return rc; ++} ++ ++/**************************************************************************** ++ * ++ * efrm_nic_del: Remove the nic from the resource driver structures ++ * ++ ****************************************************************************/ ++void efrm_nic_del(struct linux_efhw_nic *lnic) ++{ ++ struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; ++ ++ EFRM_TRACE("%s:", __func__); ++ EFRM_ASSERT(nic); ++ ++ efrm_nic_buffer_table_free(nic); ++ ++ efrm_driver_unregister_nic(&lnic->efrm_nic); ++ ++ /* ++ * Synchronise here with any running ISR. ++ * Remove the OS handler. There should be no IRQs being generated ++ * by our NIC at this point. ++ */ ++ if (efhw_nic_have_functional_units(nic)) { ++ efhw_nic_close_interrupts(nic); ++ linux_efrm_irq_dtor(lnic); ++ tasklet_kill(&lnic->tasklet); ++ } ++ ++ /* Close down hardware and free resources. */ ++ linux_efrm_nic_dtor(lnic); ++ kfree(lnic); ++ ++ if (--n_nics_probed == 0) ++ efrm_resources_fini(); ++ ++ EFRM_TRACE("%s: done", __func__); ++} ++ ++/**************************************************************************** ++ * ++ * init_module: register as a PCI driver. ++ * ++ ****************************************************************************/ ++static int init_sfc_resource(void) ++{ ++ int rc = 0; ++ ++ EFRM_TRACE("%s: RESOURCE driver starting", __func__); ++ ++ efrm_driver_ctor(); ++ ++ /* Register the driver so that our 'probe' function is called for ++ * each EtherFabric device in the system. ++ */ ++ rc = efrm_driverlink_register(); ++ if (rc == -ENODEV) ++ EFRM_ERR("%s: no devices found", __func__); ++ if (rc < 0) ++ goto failed_driverlink; ++ ++ if (efrm_install_proc_entries() != 0) { ++ /* Do not fail, but print a warning */ ++ EFRM_WARN("%s: WARNING: failed to install /proc entries", ++ __func__); ++ } ++ ++ return 0; ++ ++failed_driverlink: ++ efrm_driver_dtor(); ++ return rc; ++} ++ ++/**************************************************************************** ++ * ++ * cleanup_module: module-removal entry-point ++ * ++ ****************************************************************************/ ++static void cleanup_sfc_resource(void) ++{ ++ efrm_uninstall_proc_entries(); ++ ++ efrm_driverlink_unregister(); ++ ++ /* Clean up char-driver specific initialisation. ++ - driver dtor can use both work queue and buffer table entries */ ++ efrm_driver_dtor(); ++ ++ EFRM_TRACE("%s: unloaded", __func__); ++} ++ ++module_init(init_sfc_resource); ++module_exit(cleanup_sfc_resource); +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/resource_manager.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/resource_manager.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,145 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains generic code for resources and resource managers. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++/********************************************************************** ++ * struct efrm_resource_manager ++ */ ++ ++void efrm_resource_manager_dtor(struct efrm_resource_manager *rm) ++{ ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm); ++ ++ /* call destructor */ ++ EFRM_DO_DEBUG(if (rm->rm_resources) ++ EFRM_ERR("%s: %s leaked %d resources", ++ __func__, rm->rm_name, rm->rm_resources)); ++ EFRM_ASSERT(rm->rm_resources == 0); ++ EFRM_ASSERT(list_empty(&rm->rm_resources_list)); ++ ++ rm->rm_dtor(rm); ++ ++ /* clear out things built by efrm_resource_manager_ctor */ ++ spin_lock_destroy(&rm->rm_lock); ++ ++ /* and the free the memory */ ++ EFRM_DO_DEBUG(memset(rm, 0, sizeof(*rm))); ++ kfree(rm); ++} ++ ++/* Construct a resource manager. Resource managers are singletons. */ ++int ++efrm_resource_manager_ctor(struct efrm_resource_manager *rm, ++ void (*dtor)(struct efrm_resource_manager *), ++ const char *name, unsigned type) ++{ ++ EFRM_ASSERT(rm); ++ EFRM_ASSERT(dtor); ++ ++ rm->rm_name = name; ++ EFRM_DO_DEBUG(rm->rm_type = type); ++ rm->rm_dtor = dtor; ++ spin_lock_init(&rm->rm_lock); ++ rm->rm_resources = 0; ++ rm->rm_resources_hiwat = 0; ++ INIT_LIST_HEAD(&rm->rm_resources_list); ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm); ++ return 0; ++} ++ ++ ++void efrm_client_add_resource(struct efrm_client *client, ++ struct efrm_resource *rs) ++{ ++ struct efrm_resource_manager *rm; ++ irq_flags_t lock_flags; ++ ++ EFRM_ASSERT(client != NULL); ++ EFRM_ASSERT(rs != NULL); ++ ++ spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); ++ rm = efrm_rm_table[EFRM_RESOURCE_TYPE(rs->rs_handle)]; ++ ++rm->rm_resources; ++ list_add(&rs->rs_manager_link, &rm->rm_resources_list); ++ if (rm->rm_resources > rm->rm_resources_hiwat) ++ rm->rm_resources_hiwat = rm->rm_resources; ++ rs->rs_client = client; ++ ++client->ref_count; ++ list_add(&rs->rs_client_link, &client->resources); ++ spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); ++} ++ ++ ++void efrm_resource_ref(struct efrm_resource *rs) ++{ ++ irq_flags_t lock_flags; ++ spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); ++ ++rs->rs_ref_count; ++ spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); ++} ++EXPORT_SYMBOL(efrm_resource_ref); ++ ++ ++int __efrm_resource_release(struct efrm_resource *rs) ++{ ++ struct efrm_resource_manager *rm; ++ irq_flags_t lock_flags; ++ int free_rs; ++ ++ spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); ++ free_rs = --rs->rs_ref_count == 0; ++ if (free_rs) { ++ rm = efrm_rm_table[EFRM_RESOURCE_TYPE(rs->rs_handle)]; ++ EFRM_ASSERT(rm->rm_resources > 0); ++ --rm->rm_resources; ++ list_del(&rs->rs_manager_link); ++ list_del(&rs->rs_client_link); ++ } ++ spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); ++ return free_rs; ++} ++EXPORT_SYMBOL(__efrm_resource_release); ++ ++/* ++ * vi: sw=8:ai:aw ++ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/resources.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/resources.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,94 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains resource managers initialisation functions. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++ ++int ++efrm_resources_init(const struct vi_resource_dimensions *vi_res_dim, ++ int buffer_table_min, int buffer_table_lim) ++{ ++ int i, rc; ++ ++ rc = efrm_buffer_table_ctor(buffer_table_min, buffer_table_lim); ++ if (rc != 0) ++ return rc; ++ ++ /* Create resources in the correct order */ ++ for (i = 0; i < EFRM_RESOURCE_NUM; ++i) { ++ struct efrm_resource_manager **rmp = &efrm_rm_table[i]; ++ ++ EFRM_ASSERT(*rmp == NULL); ++ switch (i) { ++ case EFRM_RESOURCE_VI: ++ rc = efrm_create_vi_resource_manager(rmp, ++ vi_res_dim); ++ break; ++ case EFRM_RESOURCE_FILTER: ++ rc = efrm_create_filter_resource_manager(rmp); ++ break; ++ case EFRM_RESOURCE_IOBUFSET: ++ rc = efrm_create_iobufset_resource_manager(rmp); ++ break; ++ default: ++ rc = 0; ++ break; ++ } ++ ++ if (rc < 0) { ++ EFRM_ERR("%s: failed type=%d (%d)", ++ __func__, i, rc); ++ efrm_buffer_table_dtor(); ++ return rc; ++ } ++ } ++ ++ return 0; ++} ++ ++void efrm_resources_fini(void) ++{ ++ int i; ++ ++ for (i = EFRM_RESOURCE_NUM - 1; i >= 0; --i) ++ if (efrm_rm_table[i]) { ++ efrm_resource_manager_dtor(efrm_rm_table[i]); ++ efrm_rm_table[i] = NULL; ++ } ++ ++ efrm_buffer_table_dtor(); ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_alloc.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_alloc.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,820 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains allocation of VI resources. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++ ++/*** Data definitions ****************************************************/ ++ ++static const char *dmaq_names[] = { "TX", "RX" }; ++ ++struct vi_resource_manager *efrm_vi_manager; ++ ++/*** Forward references **************************************************/ ++ ++static int ++efrm_vi_resource_alloc_or_free(struct efrm_client *client, ++ int alloc, struct vi_resource *evq_virs, ++ uint16_t vi_flags, int32_t evq_capacity, ++ int32_t txq_capacity, int32_t rxq_capacity, ++ uint8_t tx_q_tag, uint8_t rx_q_tag, ++ struct vi_resource **virs_in_out); ++ ++/*** Reference count handling ********************************************/ ++ ++static inline void efrm_vi_rm_get_ref(struct vi_resource *virs) ++{ ++ atomic_inc(&virs->evq_refs); ++} ++ ++static inline void efrm_vi_rm_drop_ref(struct vi_resource *virs) ++{ ++ EFRM_ASSERT(atomic_read(&virs->evq_refs) != 0); ++ if (atomic_dec_and_test(&virs->evq_refs)) ++ efrm_vi_resource_alloc_or_free(virs->rs.rs_client, false, NULL, ++ 0, 0, 0, 0, 0, 0, &virs); ++} ++ ++/*** Instance numbers ****************************************************/ ++ ++static inline int efrm_vi_rm_alloc_id(uint16_t vi_flags, int32_t evq_capacity) ++{ ++ irq_flags_t lock_flags; ++ int instance; ++ int rc; ++ ++ if (efrm_nic_tablep->a_nic == NULL) /* ?? FIXME: surely not right */ ++ return -ENODEV; ++ ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ /* Falcon A1 RX phys addr wierdness. */ ++ if (efrm_nic_tablep->a_nic->devtype.variant == 'A' && ++ (vi_flags & EFHW_VI_RX_PHYS_ADDR_EN)) { ++ if (vi_flags & EFHW_VI_JUMBO_EN) { ++ /* Falcon-A cannot do phys + scatter. */ ++ EFRM_WARN ++ ("%s: falcon-A does not support phys+scatter mode", ++ __func__); ++ instance = -1; ++ } else if (efrm_vi_manager->iscsi_dmaq_instance_is_free ++ && evq_capacity == 0) { ++ /* Falcon-A has a single RXQ that gives the correct ++ * semantics for physical addressing. However, it ++ * happens to have the same instance number as the ++ * 'char' event queue, so we cannot also hand out ++ * the event queue. */ ++ efrm_vi_manager->iscsi_dmaq_instance_is_free = false; ++ instance = FALCON_A1_ISCSI_DMAQ; ++ } else { ++ EFRM_WARN("%s: iSCSI receive queue not free", ++ __func__); ++ instance = -1; ++ } ++ goto unlock_out; ++ } ++ ++ if (vi_flags & EFHW_VI_RM_WITH_INTERRUPT) { ++ rc = __kfifo_get(efrm_vi_manager->instances_with_interrupt, ++ (unsigned char *)&instance, sizeof(instance)); ++ if (rc != sizeof(instance)) { ++ EFRM_ASSERT(rc == 0); ++ instance = -1; ++ } ++ goto unlock_out; ++ } ++ ++ /* Otherwise a normal run-of-the-mill VI. */ ++ rc = __kfifo_get(efrm_vi_manager->instances_with_timer, ++ (unsigned char *)&instance, sizeof(instance)); ++ if (rc != sizeof(instance)) { ++ EFRM_ASSERT(rc == 0); ++ instance = -1; ++ } ++ ++unlock_out: ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ return instance; ++} ++ ++static void efrm_vi_rm_free_id(int instance) ++{ ++ irq_flags_t lock_flags; ++ struct kfifo *instances; ++ ++ if (efrm_nic_tablep->a_nic == NULL) /* ?? FIXME: surely not right */ ++ return; ++ ++ if (efrm_nic_tablep->a_nic->devtype.variant == 'A' && ++ instance == FALCON_A1_ISCSI_DMAQ) { ++ EFRM_ASSERT(efrm_vi_manager->iscsi_dmaq_instance_is_free == ++ false); ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ efrm_vi_manager->iscsi_dmaq_instance_is_free = true; ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, ++ lock_flags); ++ } else { ++ if (instance >= efrm_vi_manager->with_timer_base && ++ instance < efrm_vi_manager->with_timer_limit) { ++ instances = efrm_vi_manager->instances_with_timer; ++ } else { ++ EFRM_ASSERT(instance >= ++ efrm_vi_manager->with_interrupt_base); ++ EFRM_ASSERT(instance < ++ efrm_vi_manager->with_interrupt_limit); ++ instances = efrm_vi_manager->instances_with_interrupt; ++ } ++ ++ EFRM_VERIFY_EQ(kfifo_put(instances, (unsigned char *)&instance, ++ sizeof(instance)), sizeof(instance)); ++ } ++} ++ ++/*** Queue sizes *********************************************************/ ++ ++/* NB. This should really take a nic as an argument, but that makes ++ * the buffer table allocation difficult. */ ++uint32_t efrm_vi_rm_evq_bytes(struct vi_resource *virs ++ /*,struct efhw_nic *nic */) ++{ ++ return virs->evq_capacity * sizeof(efhw_event_t); ++} ++EXPORT_SYMBOL(efrm_vi_rm_evq_bytes); ++ ++/* NB. This should really take a nic as an argument, but that makes ++ * the buffer table allocation difficult. */ ++uint32_t efrm_vi_rm_txq_bytes(struct vi_resource *virs ++ /*,struct efhw_nic *nic */) ++{ ++ return virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] * ++ FALCON_DMA_TX_DESC_BYTES; ++} ++EXPORT_SYMBOL(efrm_vi_rm_txq_bytes); ++ ++/* NB. This should really take a nic as an argument, but that makes ++ * the buffer table allocation difficult. */ ++uint32_t efrm_vi_rm_rxq_bytes(struct vi_resource *virs ++ /*,struct efhw_nic *nic */) ++{ ++ uint32_t bytes_per_desc = ((virs->flags & EFHW_VI_RX_PHYS_ADDR_EN) ++ ? FALCON_DMA_RX_PHYS_DESC_BYTES ++ : FALCON_DMA_RX_BUF_DESC_BYTES); ++ return virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] * bytes_per_desc; ++} ++EXPORT_SYMBOL(efrm_vi_rm_rxq_bytes); ++ ++static int choose_size(int size_rq, unsigned sizes) ++{ ++ int size; ++ ++ /* size_rq < 0 means default, but we interpret this as 'minimum'. */ ++ ++ for (size = 256;; size <<= 1) ++ if ((size & sizes) && size >= size_rq) ++ return size; ++ else if ((sizes & ~((size - 1) | size)) == 0) ++ return -1; ++} ++ ++static int ++efrm_vi_rm_adjust_alloc_request(struct vi_resource *virs, struct efhw_nic *nic) ++{ ++ int capacity; ++ ++ EFRM_ASSERT(nic->efhw_func); ++ ++ if (virs->evq_capacity) { ++ capacity = choose_size(virs->evq_capacity, nic->evq_sizes); ++ if (capacity < 0) { ++ EFRM_ERR("vi_resource: bad evq size %d (supported=%x)", ++ virs->evq_capacity, nic->evq_sizes); ++ return -E2BIG; ++ } ++ virs->evq_capacity = capacity; ++ } ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]) { ++ capacity = ++ choose_size(virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX], ++ nic->txq_sizes); ++ if (capacity < 0) { ++ EFRM_ERR("vi_resource: bad txq size %d (supported=%x)", ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX], ++ nic->txq_sizes); ++ return -E2BIG; ++ } ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] = capacity; ++ } ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) { ++ capacity = ++ choose_size(virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX], ++ nic->rxq_sizes); ++ if (capacity < 0) { ++ EFRM_ERR("vi_resource: bad rxq size %d (supported=%x)", ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX], ++ nic->rxq_sizes); ++ return -E2BIG; ++ } ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] = capacity; ++ } ++ ++ return 0; ++} ++ ++/* remove the reference to the event queue in this VI resource and decrement ++ the event queue's use count */ ++static inline void efrm_vi_rm_detach_evq(struct vi_resource *virs) ++{ ++ struct vi_resource *evq_virs; ++ ++ EFRM_ASSERT(virs != NULL); ++ ++ evq_virs = virs->evq_virs; ++ ++ if (evq_virs != NULL) { ++ virs->evq_virs = NULL; ++ if (evq_virs == virs) { ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT ++ " had internal event queue ", __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); ++ } else { ++ efrm_vi_rm_drop_ref(evq_virs); ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " had event queue " ++ EFRM_RESOURCE_FMT, __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), ++ EFRM_RESOURCE_PRI_ARG(evq_virs->rs. ++ rs_handle)); ++ } ++ } else { ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT ++ " had no event queue (nothing to do)", ++ __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); ++ } ++} ++ ++/*** Buffer Table allocations ********************************************/ ++ ++static int ++efrm_vi_rm_alloc_or_free_buffer_table(struct vi_resource *virs, bool is_alloc) ++{ ++ uint32_t bytes; ++ int page_order; ++ int rc; ++ ++ if (!is_alloc) ++ goto destroy; ++ ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]) { ++ bytes = efrm_vi_rm_txq_bytes(virs); ++ page_order = get_order(bytes); ++ rc = efrm_buffer_table_alloc(page_order, ++ (virs->dmaq_buf_tbl_alloc + ++ EFRM_VI_RM_DMA_QUEUE_TX)); ++ if (rc != 0) { ++ EFRM_TRACE ++ ("%s: Error %d allocating TX buffer table entry", ++ __func__, rc); ++ goto fail_txq_alloc; ++ } ++ } ++ ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) { ++ bytes = efrm_vi_rm_rxq_bytes(virs); ++ page_order = get_order(bytes); ++ rc = efrm_buffer_table_alloc(page_order, ++ (virs->dmaq_buf_tbl_alloc + ++ EFRM_VI_RM_DMA_QUEUE_RX)); ++ if (rc != 0) { ++ EFRM_TRACE ++ ("%s: Error %d allocating RX buffer table entry", ++ __func__, rc); ++ goto fail_rxq_alloc; ++ } ++ } ++ return 0; ++ ++destroy: ++ rc = 0; ++ ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) { ++ efrm_buffer_table_free(&virs-> ++ dmaq_buf_tbl_alloc ++ [EFRM_VI_RM_DMA_QUEUE_RX]); ++ } ++fail_rxq_alloc: ++ ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]) { ++ efrm_buffer_table_free(&virs-> ++ dmaq_buf_tbl_alloc ++ [EFRM_VI_RM_DMA_QUEUE_TX]); ++ } ++fail_txq_alloc: ++ ++ return rc; ++} ++ ++/*** Per-NIC allocations *************************************************/ ++ ++static inline int ++efrm_vi_rm_init_evq(struct vi_resource *virs, struct efhw_nic *nic) ++{ ++ int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ struct eventq_resource_hardware *evq_hw = ++ &virs->nic_info.evq_pages; ++ uint32_t buf_bytes = efrm_vi_rm_evq_bytes(virs); ++ int rc; ++ ++ if (virs->evq_capacity == 0) ++ return 0; ++ evq_hw->capacity = virs->evq_capacity; ++ ++ /* Allocate buffer table entries to map onto the iobuffer. This ++ * currently allocates its own buffer table entries on Falcon which is ++ * a bit wasteful on a multi-NIC system. */ ++ evq_hw->buf_tbl_alloc.base = (unsigned)-1; ++ rc = efrm_buffer_table_alloc(get_order(buf_bytes), ++ &evq_hw->buf_tbl_alloc); ++ if (rc < 0) { ++ EFHW_WARN("%s: failed (%d) to alloc %d buffer table entries", ++ __func__, rc, get_order(buf_bytes)); ++ return rc; ++ } ++ ++ /* Allocate the event queue memory. */ ++ rc = efhw_nic_event_queue_alloc_iobuffer(nic, evq_hw, instance, ++ buf_bytes); ++ if (rc != 0) { ++ EFRM_ERR("%s: Error allocating iobuffer: %d", __func__, rc); ++ efrm_buffer_table_free(&evq_hw->buf_tbl_alloc); ++ return rc; ++ } ++ ++ /* Initialise the event queue hardware */ ++ efhw_nic_event_queue_enable(nic, instance, virs->evq_capacity, ++ efhw_iopages_dma_addr(&evq_hw->iobuff) + ++ evq_hw->iobuff_off, ++ evq_hw->buf_tbl_alloc.base, ++ instance < 64); ++ ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " capacity=%u", __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), ++ virs->evq_capacity); ++ ++#if defined(__ia64__) ++ /* Page size may be large, so for now just increase the ++ * size of the requested evq up to a round number of ++ * pages ++ */ ++ buf_bytes = CI_ROUNDUP(buf_bytes, PAGE_SIZE); ++#endif ++ EFRM_ASSERT(buf_bytes % PAGE_SIZE == 0); ++ ++ virs->mem_mmap_bytes += buf_bytes; ++ ++ return 0; ++} ++ ++static inline void ++efrm_vi_rm_fini_evq(struct vi_resource *virs, struct efhw_nic *nic) ++{ ++ int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ struct vi_resource_nic_info *nic_info = &virs->nic_info; ++ ++ if (virs->evq_capacity == 0) ++ return; ++ ++ /* Zero the timer-value for this queue. ++ And Tell NIC to stop using this event queue. */ ++ efhw_nic_event_queue_disable(nic, instance, 0); ++ ++ if (nic_info->evq_pages.buf_tbl_alloc.base != (unsigned)-1) ++ efrm_buffer_table_free(&nic_info->evq_pages.buf_tbl_alloc); ++ ++ efhw_iopages_free(nic, &nic_info->evq_pages.iobuff); ++} ++ ++/*! FIXME: we should make sure this number is never zero (=> unprotected) */ ++/*! FIXME: put this definition in a relevant header (e.g. as (evqid)+1) */ ++#define EFAB_EVQ_OWNER_ID(evqid) ((evqid)) ++ ++void ++efrm_vi_rm_init_dmaq(struct vi_resource *virs, int queue_type, ++ struct efhw_nic *nic) ++{ ++ int instance; ++ int evq_instance; ++ efhw_buffer_addr_t buf_addr; ++ ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ evq_instance = EFRM_RESOURCE_INSTANCE(virs->evq_virs->rs.rs_handle); ++ ++ buf_addr = virs->dmaq_buf_tbl_alloc[queue_type].base; ++ ++ if (queue_type == EFRM_VI_RM_DMA_QUEUE_TX) { ++ efhw_nic_dmaq_tx_q_init(nic, ++ instance, /* dmaq */ ++ evq_instance, /* evq */ ++ EFAB_EVQ_OWNER_ID(evq_instance), /* owner */ ++ virs->dmaq_tag[queue_type], /* tag */ ++ virs->dmaq_capacity[queue_type], /* size of queue */ ++ buf_addr, /* buffer index */ ++ virs->flags); /* user specified Q attrs */ ++ } else { ++ efhw_nic_dmaq_rx_q_init(nic, ++ instance, /* dmaq */ ++ evq_instance, /* evq */ ++ EFAB_EVQ_OWNER_ID(evq_instance), /* owner */ ++ virs->dmaq_tag[queue_type], /* tag */ ++ virs->dmaq_capacity[queue_type], /* size of queue */ ++ buf_addr, /* buffer index */ ++ virs->flags); /* user specified Q attrs */ ++ } ++} ++ ++static int ++efrm_vi_rm_init_or_fini_dmaq(struct vi_resource *virs, ++ int queue_type, int init, ++ struct efhw_nic *nic) ++{ ++ int rc; ++ int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ uint32_t buf_bytes; ++ struct vi_resource_nic_info *nic_info = &virs->nic_info; ++ int page_order; ++ uint32_t num_pages; ++ struct efhw_iopages *iobuff; ++ ++ if (!init) ++ goto destroy; ++ ++ /* Ignore disabled queues. */ ++ if (virs->dmaq_capacity[queue_type] == 0) { ++ if (queue_type == EFRM_VI_RM_DMA_QUEUE_TX) ++ efhw_nic_dmaq_tx_q_disable(nic, instance); ++ else ++ efhw_nic_dmaq_rx_q_disable(nic, instance); ++ return 0; ++ } ++ ++ buf_bytes = (queue_type == EFRM_VI_RM_DMA_QUEUE_TX ++ ? efrm_vi_rm_txq_bytes(virs) ++ : efrm_vi_rm_rxq_bytes(virs)); ++ ++ page_order = get_order(buf_bytes); ++ ++ rc = efhw_iopages_alloc(nic, &nic_info->dmaq_pages[queue_type], ++ page_order); ++ if (rc != 0) { ++ EFRM_ERR("%s: Failed to allocate %s DMA buffer.", __func__, ++ dmaq_names[queue_type]); ++ goto fail_iopages; ++ } ++ ++ num_pages = 1 << page_order; ++ iobuff = &nic_info->dmaq_pages[queue_type]; ++ efhw_nic_buffer_table_set_n(nic, ++ virs->dmaq_buf_tbl_alloc[queue_type].base, ++ efhw_iopages_dma_addr(iobuff), ++ EFHW_NIC_PAGE_SIZE, 0, num_pages, 0); ++ ++ falcon_nic_buffer_table_confirm(nic); ++ ++ virs->mem_mmap_bytes += roundup(buf_bytes, PAGE_SIZE); ++ ++ /* Make sure there is an event queue. */ ++ if (virs->evq_virs->evq_capacity <= 0) { ++ EFRM_ERR("%s: Cannot use empty event queue for %s DMA", ++ __func__, dmaq_names[queue_type]); ++ rc = -EINVAL; ++ goto fail_evq; ++ } ++ ++ efrm_vi_rm_init_dmaq(virs, queue_type, nic); ++ ++ return 0; ++ ++destroy: ++ rc = 0; ++ ++ /* Ignore disabled queues. */ ++ if (virs->dmaq_capacity[queue_type] == 0) ++ return 0; ++ ++ /* Ensure TX pacing turned off -- queue flush doesn't reset this. */ ++ if (queue_type == EFRM_VI_RM_DMA_QUEUE_TX) ++ falcon_nic_pace(nic, instance, 0); ++ ++ /* No need to disable the queue here. Nobody is using it anyway. */ ++ ++fail_evq: ++ efhw_iopages_free(nic, &nic_info->dmaq_pages[queue_type]); ++fail_iopages: ++ ++ return rc; ++} ++ ++static int ++efrm_vi_rm_init_or_fini_nic(struct vi_resource *virs, int init, ++ struct efhw_nic *nic) ++{ ++ int rc; ++#ifndef NDEBUG ++ int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++#endif ++ ++ if (!init) ++ goto destroy; ++ ++ rc = efrm_vi_rm_init_evq(virs, nic); ++ if (rc != 0) ++ goto fail_evq; ++ ++ rc = efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_TX, ++ init, nic); ++ if (rc != 0) ++ goto fail_txq; ++ ++ rc = efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_RX, ++ init, nic); ++ if (rc != 0) ++ goto fail_rxq; ++ ++ /* Allocate space for the control page. */ ++ EFRM_ASSERT(falcon_tx_dma_page_offset(instance) < PAGE_SIZE); ++ EFRM_ASSERT(falcon_rx_dma_page_offset(instance) < PAGE_SIZE); ++ EFRM_ASSERT(falcon_timer_page_offset(instance) < PAGE_SIZE); ++ virs->bar_mmap_bytes += PAGE_SIZE; ++ ++ return 0; ++ ++destroy: ++ rc = 0; ++ ++ efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_RX, ++ false, nic); ++fail_rxq: ++ ++ efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_TX, ++ false, nic); ++fail_txq: ++ ++ efrm_vi_rm_fini_evq(virs, nic); ++fail_evq: ++ ++ EFRM_ASSERT(rc != 0 || !init); ++ return rc; ++} ++ ++static int ++efrm_vi_resource_alloc_or_free(struct efrm_client *client, ++ int alloc, struct vi_resource *evq_virs, ++ uint16_t vi_flags, int32_t evq_capacity, ++ int32_t txq_capacity, int32_t rxq_capacity, ++ uint8_t tx_q_tag, uint8_t rx_q_tag, ++ struct vi_resource **virs_in_out) ++{ ++ struct efhw_nic *nic = client->nic; ++ struct vi_resource *virs; ++ int rc; ++ int instance; ++ ++ EFRM_ASSERT(virs_in_out); ++ EFRM_ASSERT(efrm_vi_manager); ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_vi_manager->rm); ++ ++ if (!alloc) ++ goto destroy; ++ ++ rx_q_tag &= (1 << TX_DESCQ_LABEL_WIDTH) - 1; ++ tx_q_tag &= (1 << RX_DESCQ_LABEL_WIDTH) - 1; ++ ++ virs = kmalloc(sizeof(*virs), GFP_KERNEL); ++ if (virs == NULL) { ++ EFRM_ERR("%s: Error allocating VI resource object", ++ __func__); ++ rc = -ENOMEM; ++ goto fail_alloc; ++ } ++ memset(virs, 0, sizeof(*virs)); ++ ++ /* Some macros make the assumption that the struct efrm_resource is ++ * the first member of a struct vi_resource. */ ++ EFRM_ASSERT(&virs->rs == (struct efrm_resource *) (virs)); ++ ++ instance = efrm_vi_rm_alloc_id(vi_flags, evq_capacity); ++ if (instance < 0) { ++ /* Clear out the close list... */ ++ efrm_vi_rm_salvage_flushed_vis(); ++ instance = efrm_vi_rm_alloc_id(vi_flags, evq_capacity); ++ if (instance >= 0) ++ EFRM_TRACE("%s: Salvaged a closed VI.", __func__); ++ } ++ ++ if (instance < 0) { ++ /* Could flush resources and try again here. */ ++ EFRM_ERR("%s: Out of appropriate VI resources", __func__); ++ rc = -EBUSY; ++ goto fail_alloc_id; ++ } ++ ++ EFRM_TRACE("%s: new VI ID %d", __func__, instance); ++ efrm_resource_init(&virs->rs, EFRM_RESOURCE_VI, instance); ++ ++ /* Start with one reference. Any external VIs using the EVQ of this ++ * resource will increment this reference rather than the resource ++ * reference to avoid DMAQ flushes from waiting for other DMAQ ++ * flushes to complete. When the resource reference goes to zero, ++ * the DMAQ flush happens. When the flush completes, this reference ++ * is decremented. When this reference reaches zero, the instance ++ * is freed. */ ++ atomic_set(&virs->evq_refs, 1); ++ ++ virs->bar_mmap_bytes = 0; ++ virs->mem_mmap_bytes = 0; ++ virs->evq_capacity = evq_capacity; ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] = txq_capacity; ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] = rxq_capacity; ++ virs->dmaq_tag[EFRM_VI_RM_DMA_QUEUE_TX] = tx_q_tag; ++ virs->dmaq_tag[EFRM_VI_RM_DMA_QUEUE_RX] = rx_q_tag; ++ virs->flags = vi_flags; ++ INIT_LIST_HEAD(&virs->tx_flush_link); ++ INIT_LIST_HEAD(&virs->rx_flush_link); ++ virs->tx_flushing = 0; ++ virs->rx_flushing = 0; ++ ++ /* Adjust the queue sizes. */ ++ rc = efrm_vi_rm_adjust_alloc_request(virs, nic); ++ if (rc != 0) ++ goto fail_adjust_request; ++ ++ /* Attach the EVQ early so that we can ensure that the NIC sets ++ * match. */ ++ if (evq_virs == NULL) { ++ evq_virs = virs; ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT ++ " has no external event queue", __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); ++ } else { ++ /* Make sure the resource managers are the same. */ ++ if (EFRM_RESOURCE_TYPE(evq_virs->rs.rs_handle) != ++ EFRM_RESOURCE_VI) { ++ EFRM_ERR("%s: Mismatched owner for event queue VI " ++ EFRM_RESOURCE_FMT, __func__, ++ EFRM_RESOURCE_PRI_ARG(evq_virs->rs.rs_handle)); ++ return -EINVAL; ++ } ++ EFRM_ASSERT(atomic_read(&evq_virs->evq_refs) != 0); ++ efrm_vi_rm_get_ref(evq_virs); ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " uses event queue " ++ EFRM_RESOURCE_FMT, ++ __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), ++ EFRM_RESOURCE_PRI_ARG(evq_virs->rs.rs_handle)); ++ } ++ virs->evq_virs = evq_virs; ++ ++ rc = efrm_vi_rm_alloc_or_free_buffer_table(virs, true); ++ if (rc != 0) ++ goto fail_buffer_table; ++ ++ rc = efrm_vi_rm_init_or_fini_nic(virs, true, nic); ++ if (rc != 0) ++ goto fail_init_nic; ++ ++ efrm_client_add_resource(client, &virs->rs); ++ *virs_in_out = virs; ++ EFRM_TRACE("%s: Allocated " EFRM_RESOURCE_FMT, __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); ++ return 0; ++ ++destroy: ++ virs = *virs_in_out; ++ EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 1); ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ ++ EFRM_TRACE("%s: Freeing %d", __func__, ++ EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)); ++ ++ /* Destroying the VI. The reference count must be zero. */ ++ EFRM_ASSERT(atomic_read(&virs->evq_refs) == 0); ++ ++ /* The EVQ should have gone (and DMA disabled) so that this ++ * function can't be re-entered to destroy the EVQ VI. */ ++ EFRM_ASSERT(virs->evq_virs == NULL); ++ rc = 0; ++ ++fail_init_nic: ++ efrm_vi_rm_init_or_fini_nic(virs, false, nic); ++ ++ efrm_vi_rm_alloc_or_free_buffer_table(virs, false); ++fail_buffer_table: ++ ++ efrm_vi_rm_detach_evq(virs); ++ ++fail_adjust_request: ++ ++ EFRM_ASSERT(virs->evq_callback_fn == NULL); ++ EFRM_TRACE("%s: delete VI ID %d", __func__, instance); ++ efrm_vi_rm_free_id(instance); ++fail_alloc_id: ++ if (!alloc) ++ efrm_client_put(virs->rs.rs_client); ++ EFRM_DO_DEBUG(memset(virs, 0, sizeof(*virs))); ++ kfree(virs); ++fail_alloc: ++ *virs_in_out = NULL; ++ ++ return rc; ++} ++ ++/*** Resource object ****************************************************/ ++ ++int ++efrm_vi_resource_alloc(struct efrm_client *client, ++ struct vi_resource *evq_virs, ++ uint16_t vi_flags, int32_t evq_capacity, ++ int32_t txq_capacity, int32_t rxq_capacity, ++ uint8_t tx_q_tag, uint8_t rx_q_tag, ++ struct vi_resource **virs_out, ++ uint32_t *out_io_mmap_bytes, ++ uint32_t *out_mem_mmap_bytes, ++ uint32_t *out_txq_capacity, uint32_t *out_rxq_capacity) ++{ ++ int rc; ++ EFRM_ASSERT(client != NULL); ++ rc = efrm_vi_resource_alloc_or_free(client, true, evq_virs, vi_flags, ++ evq_capacity, txq_capacity, ++ rxq_capacity, tx_q_tag, rx_q_tag, ++ virs_out); ++ if (rc == 0) { ++ if (out_io_mmap_bytes != NULL) ++ *out_io_mmap_bytes = (*virs_out)->bar_mmap_bytes; ++ if (out_mem_mmap_bytes != NULL) ++ *out_mem_mmap_bytes = (*virs_out)->mem_mmap_bytes; ++ if (out_txq_capacity != NULL) ++ *out_txq_capacity = ++ (*virs_out)->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]; ++ if (out_rxq_capacity != NULL) ++ *out_rxq_capacity = ++ (*virs_out)->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]; ++ } ++ ++ return rc; ++} ++EXPORT_SYMBOL(efrm_vi_resource_alloc); ++ ++void efrm_vi_rm_free_flushed_resource(struct vi_resource *virs) ++{ ++ EFRM_ASSERT(virs != NULL); ++ EFRM_ASSERT(virs->rs.rs_ref_count == 0); ++ ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT, __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); ++ /* release the associated event queue then drop our own reference ++ * count */ ++ efrm_vi_rm_detach_evq(virs); ++ efrm_vi_rm_drop_ref(virs); ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_event.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_event.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,250 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains event handling for VI resource. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++ ++static inline int ++efrm_eventq_bytes(struct vi_resource *virs) ++{ ++ return efrm_vi_rm_evq_bytes(virs); ++} ++ ++ ++static inline efhw_event_t * ++efrm_eventq_base(struct vi_resource *virs) ++{ ++ struct eventq_resource_hardware *hw; ++ hw = &(virs->nic_info.evq_pages); ++ return (efhw_event_t *) (efhw_iopages_ptr(&(hw->iobuff)) + ++ hw->iobuff_off); ++} ++ ++ ++void ++efrm_eventq_request_wakeup(struct vi_resource *virs, unsigned current_ptr) ++{ ++ struct efhw_nic *nic = virs->rs.rs_client->nic; ++ int next_i; ++ next_i = ((current_ptr / sizeof(efhw_event_t)) & ++ (virs->evq_capacity - 1)); ++ ++ efhw_nic_wakeup_request(nic, efrm_eventq_dma_addr(virs), next_i, ++ EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)); ++} ++EXPORT_SYMBOL(efrm_eventq_request_wakeup); ++ ++void efrm_eventq_reset(struct vi_resource *virs) ++{ ++ struct efhw_nic *nic = virs->rs.rs_client->nic; ++ int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ ++ EFRM_ASSERT(virs->evq_capacity != 0); ++ ++ /* FIXME: Protect against concurrent resets. */ ++ ++ efhw_nic_event_queue_disable(nic, instance, 0); ++ ++ memset(efrm_eventq_base(virs), EFHW_CLEAR_EVENT_VALUE, ++ efrm_eventq_bytes(virs)); ++ efhw_nic_event_queue_enable(nic, instance, virs->evq_capacity, ++ efrm_eventq_dma_addr(virs), ++ virs->nic_info.evq_pages. ++ buf_tbl_alloc.base, ++ instance < 64); ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT, __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); ++} ++EXPORT_SYMBOL(efrm_eventq_reset); ++ ++int ++efrm_eventq_register_callback(struct vi_resource *virs, ++ void (*handler) (void *, int, ++ struct efhw_nic *nic), ++ void *arg) ++{ ++ struct efrm_nic_per_vi *cb_info; ++ int instance; ++ int bit; ++ ++ EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 0); ++ EFRM_ASSERT(virs->evq_capacity != 0); ++ EFRM_ASSERT(handler != NULL); ++ ++ /* ?? TODO: Get rid of this test when client is compulsory. */ ++ if (virs->rs.rs_client == NULL) { ++ EFRM_ERR("%s: no client", __func__); ++ return -EINVAL; ++ } ++ ++ virs->evq_callback_arg = arg; ++ virs->evq_callback_fn = handler; ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ cb_info = &efrm_nic(virs->rs.rs_client->nic)->vis[instance]; ++ ++ /* The handler can be set only once. */ ++ bit = test_and_set_bit(VI_RESOURCE_EVQ_STATE_CALLBACK_REGISTERED, ++ &cb_info->state); ++ if (bit) ++ return -EBUSY; ++ cb_info->vi = virs; ++ ++ return 0; ++} ++EXPORT_SYMBOL(efrm_eventq_register_callback); ++ ++void efrm_eventq_kill_callback(struct vi_resource *virs) ++{ ++ struct efrm_nic_per_vi *cb_info; ++ int32_t evq_state; ++ int instance; ++ int bit; ++ ++ EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 0); ++ EFRM_ASSERT(virs->evq_capacity != 0); ++ EFRM_ASSERT(virs->rs.rs_client != NULL); ++ ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ cb_info = &efrm_nic(virs->rs.rs_client->nic)->vis[instance]; ++ cb_info->vi = NULL; ++ ++ /* Disable the timer. */ ++ efhw_nic_event_queue_disable(virs->rs.rs_client->nic, ++ instance, /*timer_only */ 1); ++ ++ /* Disable the callback. */ ++ bit = test_and_clear_bit(VI_RESOURCE_EVQ_STATE_CALLBACK_REGISTERED, ++ &cb_info->state); ++ EFRM_ASSERT(bit); /* do not call me twice! */ ++ ++ /* Spin until the callback is complete. */ ++ do { ++ rmb(); ++ ++ udelay(1); ++ evq_state = cb_info->state; ++ } while ((evq_state & VI_RESOURCE_EVQ_STATE(BUSY))); ++ ++ virs->evq_callback_fn = NULL; ++} ++EXPORT_SYMBOL(efrm_eventq_kill_callback); ++ ++static void ++efrm_eventq_do_callback(struct efhw_nic *nic, unsigned instance, ++ bool is_timeout) ++{ ++ struct efrm_nic *rnic = efrm_nic(nic); ++ void (*handler) (void *, int is_timeout, struct efhw_nic *nic); ++ void *arg; ++ struct efrm_nic_per_vi *cb_info; ++ int32_t evq_state; ++ int32_t new_evq_state; ++ struct vi_resource *virs; ++ int bit; ++ ++ EFRM_ASSERT(efrm_vi_manager); ++ ++ cb_info = &rnic->vis[instance]; ++ ++ /* Set the BUSY bit and clear WAKEUP_PENDING. Do this ++ * before waking up the sleeper to avoid races. */ ++ while (1) { ++ evq_state = cb_info->state; ++ new_evq_state = evq_state; ++ ++ if ((evq_state & VI_RESOURCE_EVQ_STATE(BUSY)) != 0) { ++ EFRM_ERR("%s:%d: evq_state[%d] corrupted!", ++ __func__, __LINE__, instance); ++ return; ++ } ++ ++ if (!is_timeout) ++ new_evq_state &= ~VI_RESOURCE_EVQ_STATE(WAKEUP_PENDING); ++ ++ if (evq_state & VI_RESOURCE_EVQ_STATE(CALLBACK_REGISTERED)) { ++ new_evq_state |= VI_RESOURCE_EVQ_STATE(BUSY); ++ virs = cb_info->vi; ++ if (cmpxchg(&cb_info->state, evq_state, ++ new_evq_state) == evq_state) ++ break; ++ } else { ++ /* Just update the state if necessary. */ ++ if (new_evq_state == evq_state || ++ cmpxchg(&cb_info->state, evq_state, ++ new_evq_state) == evq_state) ++ return; ++ } ++ } ++ ++ if (virs) { ++ handler = virs->evq_callback_fn; ++ arg = virs->evq_callback_arg; ++ EFRM_ASSERT(handler != NULL); ++ handler(arg, is_timeout, nic); ++ } ++ ++ /* Clear the BUSY bit. */ ++ bit = ++ test_and_clear_bit(VI_RESOURCE_EVQ_STATE_BUSY, ++ &cb_info->state); ++ if (!bit) { ++ EFRM_ERR("%s:%d: evq_state corrupted!", ++ __func__, __LINE__); ++ } ++} ++ ++void efrm_handle_wakeup_event(struct efhw_nic *nic, unsigned instance) ++{ ++ efrm_eventq_do_callback(nic, instance, false); ++} ++ ++void efrm_handle_timeout_event(struct efhw_nic *nic, unsigned instance) ++{ ++ efrm_eventq_do_callback(nic, instance, true); ++} ++ ++void efrm_handle_sram_event(struct efhw_nic *nic) ++{ ++ if (nic->buf_commit_outstanding > 0) ++ nic->buf_commit_outstanding--; ++} +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_flush.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_flush.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,483 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains DMA queue flushing of VI resources. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++ ++/* can fail as workitem can already be scheuled -- ignore failure */ ++#define EFRM_VI_RM_DELAYED_FREE(manager) \ ++ queue_work(manager->workqueue, &manager->work_item) ++ ++static const int flush_fifo_hwm = 8 /* TODO should be a HW specific const */ ; ++ ++static void ++efrm_vi_resource_rx_flush_done(struct vi_resource *virs, bool *completed) ++{ ++ /* We should only get a flush event if there is a flush ++ * outstanding. */ ++ EFRM_ASSERT(virs->rx_flush_outstanding); ++ ++ virs->rx_flush_outstanding = 0; ++ virs->rx_flushing = 0; ++ ++ list_del(&virs->rx_flush_link); ++ efrm_vi_manager->rx_flush_outstanding_count--; ++ ++ if (virs->tx_flushing == 0) { ++ list_add_tail(&virs->rx_flush_link, ++ &efrm_vi_manager->close_pending); ++ *completed = 1; ++ } ++} ++ ++static void ++efrm_vi_resource_tx_flush_done(struct vi_resource *virs, bool *completed) ++{ ++ /* We should only get a flush event if there is a flush ++ * outstanding. */ ++ EFRM_ASSERT(virs->tx_flushing); ++ ++ virs->tx_flushing = 0; ++ ++ list_del(&virs->tx_flush_link); ++ ++ if (virs->rx_flushing == 0) { ++ list_add_tail(&virs->rx_flush_link, ++ &efrm_vi_manager->close_pending); ++ *completed = 1; ++ } ++} ++ ++static void ++efrm_vi_resource_issue_rx_flush(struct vi_resource *virs, bool *completed) ++{ ++ struct efhw_nic *nic = virs->rs.rs_client->nic; ++ int instance; ++ int rc; ++ ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ ++ list_add_tail(&virs->rx_flush_link, ++ &efrm_vi_manager->rx_flush_outstanding_list); ++ virs->rx_flush_outstanding = virs->rx_flushing; ++ efrm_vi_manager->rx_flush_outstanding_count++; ++ ++ EFRM_TRACE("%s: rx queue %d flush requested for nic %d", ++ __func__, instance, nic->index); ++ rc = efhw_nic_flush_rx_dma_channel(nic, instance); ++ if (rc == -EAGAIN) ++ efrm_vi_resource_rx_flush_done(virs, completed); ++} ++ ++static void ++efrm_vi_resource_issue_tx_flush(struct vi_resource *virs, bool *completed) ++{ ++ struct efhw_nic *nic = virs->rs.rs_client->nic; ++ int instance; ++ int rc; ++ ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ ++ list_add_tail(&virs->tx_flush_link, ++ &efrm_vi_manager->tx_flush_outstanding_list); ++ ++ EFRM_TRACE("%s: tx queue %d flush requested for nic %d", ++ __func__, instance, nic->index); ++ rc = efhw_nic_flush_tx_dma_channel(nic, instance); ++ if (rc == -EAGAIN) ++ efrm_vi_resource_tx_flush_done(virs, completed); ++} ++ ++static void efrm_vi_resource_process_waiting_flushes(bool *completed) ++{ ++ struct vi_resource *virs; ++ ++ while (efrm_vi_manager->rx_flush_outstanding_count < flush_fifo_hwm && ++ !list_empty(&efrm_vi_manager->rx_flush_waiting_list)) { ++ virs = ++ list_entry(list_pop ++ (&efrm_vi_manager->rx_flush_waiting_list), ++ struct vi_resource, rx_flush_link); ++ efrm_vi_resource_issue_rx_flush(virs, completed); ++ } ++} ++ ++#if BUG7916_WORKAROUND || BUG5302_WORKAROUND ++static void ++efrm_vi_resource_flush_retry_vi(struct vi_resource *virs, ++ int64_t time_now, bool *completed) ++{ ++ struct efhw_nic *nic; ++ int instance; ++ ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ ++ virs->flush_count++; ++ virs->flush_time = time_now; ++ nic = virs->rs.rs_client->nic; ++ ++#if BUG7916_WORKAROUND ++ if (virs->rx_flush_outstanding) { ++ EFRM_TRACE("%s: Retrying RX flush on instance %d", ++ __func__, instance); ++ ++ list_del(&virs->rx_flush_link); ++ efrm_vi_manager->rx_flush_outstanding_count--; ++ efrm_vi_resource_issue_rx_flush(virs, completed); ++ efrm_vi_resource_process_waiting_flushes(completed); ++ } ++#endif ++ ++#if BUG5302_WORKAROUND ++ if (virs->tx_flushing) { ++ if (virs->flush_count > 5) { ++ EFRM_TRACE("%s: VI resource stuck flush pending " ++ "(instance=%d, count=%d)", ++ __func__, instance, virs->flush_count); ++ falcon_clobber_tx_dma_ptrs(nic, instance); ++ } else { ++ EFRM_TRACE("%s: Retrying TX flush on instance %d", ++ __func__, instance); ++ } ++ ++ list_del(&virs->tx_flush_link); ++ efrm_vi_resource_issue_tx_flush(virs, completed); ++ } ++#endif ++} ++#endif ++ ++int efrm_vi_resource_flush_retry(struct vi_resource *virs) ++{ ++#if BUG7916_WORKAROUND || BUG5302_WORKAROUND ++ irq_flags_t lock_flags; ++ bool completed = false; ++ ++ if (virs->rx_flushing == 0 && virs->tx_flushing == 0) ++ return -EALREADY; ++ ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ efrm_vi_resource_flush_retry_vi(virs, get_jiffies_64(), &completed); ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ if (completed) ++ EFRM_VI_RM_DELAYED_FREE(efrm_vi_manager); ++#endif ++ ++ return 0; ++} ++EXPORT_SYMBOL(efrm_vi_resource_flush_retry); ++ ++#if BUG7916_WORKAROUND || BUG5302_WORKAROUND ++/* resource manager lock should be taken before this call */ ++static void efrm_vi_handle_flush_loss(bool *completed) ++{ ++ struct list_head *pos, *temp; ++ struct vi_resource *virs; ++ int64_t time_now, time_pending; ++ ++ /* It's possible we miss flushes - the list is sorted in order we ++ * generate flushes, see if any are very old. It's also possible ++ * that we decide an endpoint is flushed even though we've not ++ * received all the flush events. We *should * mark as ++ * completed, reclaim and loop again. ?? ++ * THIS NEEDS BACKPORTING FROM THE FALCON branch ++ */ ++ time_now = get_jiffies_64(); ++ ++#if BUG7916_WORKAROUND ++ list_for_each_safe(pos, temp, ++ &efrm_vi_manager->rx_flush_outstanding_list) { ++ virs = container_of(pos, struct vi_resource, rx_flush_link); ++ ++ time_pending = time_now - virs->flush_time; ++ ++ /* List entries are held in reverse chronological order. Only ++ * process the old ones. */ ++ if (time_pending <= 0x100000000LL) ++ break; ++ ++ efrm_vi_resource_flush_retry_vi(virs, time_now, completed); ++ } ++#endif ++ ++#if BUG5302_WORKAROUND ++ list_for_each_safe(pos, temp, ++ &efrm_vi_manager->tx_flush_outstanding_list) { ++ virs = container_of(pos, struct vi_resource, tx_flush_link); ++ ++ time_pending = time_now - virs->flush_time; ++ ++ /* List entries are held in reverse chronological order. ++ * Only process the old ones. */ ++ if (time_pending <= 0x100000000LL) ++ break; ++ ++ efrm_vi_resource_flush_retry_vi(virs, time_now, completed); ++ } ++#endif ++} ++#endif ++ ++void ++efrm_vi_register_flush_callback(struct vi_resource *virs, ++ void (*handler)(void *), void *arg) ++{ ++ if (handler == NULL) { ++ virs->flush_callback_fn = handler; ++ wmb(); ++ virs->flush_callback_arg = arg; ++ } else { ++ virs->flush_callback_arg = arg; ++ wmb(); ++ virs->flush_callback_fn = handler; ++ } ++} ++EXPORT_SYMBOL(efrm_vi_register_flush_callback); ++ ++int efrm_pt_flush(struct vi_resource *virs) ++{ ++ int instance; ++ irq_flags_t lock_flags; ++ bool completed = false; ++ ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ ++ EFRM_ASSERT(virs->rx_flushing == 0); ++ EFRM_ASSERT(virs->rx_flush_outstanding == 0); ++ EFRM_ASSERT(virs->tx_flushing == 0); ++ ++ EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " EVQ=%d TXQ=%d RXQ=%d", ++ __func__, EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), ++ virs->evq_capacity, ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX], ++ virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]); ++ ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] != 0) ++ virs->rx_flushing = 1; ++ ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] != 0) ++ virs->tx_flushing = 1; ++ ++ /* Clean up immediately if there are no flushes. */ ++ if (virs->rx_flushing == 0 && virs->tx_flushing == 0) { ++ list_add_tail(&virs->rx_flush_link, ++ &efrm_vi_manager->close_pending); ++ completed = true; ++ } ++ ++ /* Issue the RX flush if possible or queue it for later. */ ++ if (virs->rx_flushing) { ++#if BUG7916_WORKAROUND || BUG5302_WORKAROUND ++ if (efrm_vi_manager->rx_flush_outstanding_count >= ++ flush_fifo_hwm) ++ efrm_vi_handle_flush_loss(&completed); ++#endif ++ if (efrm_vi_manager->rx_flush_outstanding_count >= ++ flush_fifo_hwm) { ++ list_add_tail(&virs->rx_flush_link, ++ &efrm_vi_manager->rx_flush_waiting_list); ++ } else { ++ efrm_vi_resource_issue_rx_flush(virs, &completed); ++ } ++ } ++ ++ /* Issue the TX flush. There's no limit to the number of ++ * outstanding TX flushes. */ ++ if (virs->tx_flushing) ++ efrm_vi_resource_issue_tx_flush(virs, &completed); ++ ++ virs->flush_time = get_jiffies_64(); ++ ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ if (completed) ++ EFRM_VI_RM_DELAYED_FREE(efrm_vi_manager); ++ ++ return 0; ++} ++EXPORT_SYMBOL(efrm_pt_flush); ++ ++static void ++efrm_handle_rx_dmaq_flushed(struct efhw_nic *flush_nic, int instance, ++ bool *completed) ++{ ++ struct list_head *pos, *temp; ++ struct vi_resource *virs; ++ ++ list_for_each_safe(pos, temp, ++ &efrm_vi_manager->rx_flush_outstanding_list) { ++ virs = container_of(pos, struct vi_resource, rx_flush_link); ++ ++ if (instance == EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)) { ++ efrm_vi_resource_rx_flush_done(virs, completed); ++ efrm_vi_resource_process_waiting_flushes(completed); ++ return; ++ } ++ } ++ EFRM_TRACE("%s: Unhandled rx flush event, nic %d, instance %d", ++ __func__, flush_nic->index, instance); ++} ++ ++static void ++efrm_handle_tx_dmaq_flushed(struct efhw_nic *flush_nic, int instance, ++ bool *completed) ++{ ++ struct list_head *pos, *temp; ++ struct vi_resource *virs; ++ ++ list_for_each_safe(pos, temp, ++ &efrm_vi_manager->tx_flush_outstanding_list) { ++ virs = container_of(pos, struct vi_resource, tx_flush_link); ++ ++ if (instance == EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)) { ++ efrm_vi_resource_tx_flush_done(virs, completed); ++ return; ++ } ++ } ++ EFRM_TRACE("%s: Unhandled tx flush event, nic %d, instance %d", ++ __func__, flush_nic->index, instance); ++} ++ ++void ++efrm_handle_dmaq_flushed(struct efhw_nic *flush_nic, unsigned instance, ++ int rx_flush) ++{ ++ irq_flags_t lock_flags; ++ bool completed = false; ++ ++ EFRM_TRACE("%s: nic_i=%d instance=%d rx_flush=%d", __func__, ++ flush_nic->index, instance, rx_flush); ++ ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ if (rx_flush) ++ efrm_handle_rx_dmaq_flushed(flush_nic, instance, &completed); ++ else ++ efrm_handle_tx_dmaq_flushed(flush_nic, instance, &completed); ++ ++#if BUG7916_WORKAROUND || BUG5302_WORKAROUND ++ efrm_vi_handle_flush_loss(&completed); ++#endif ++ ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ if (completed) ++ EFRM_VI_RM_DELAYED_FREE(efrm_vi_manager); ++} ++ ++static void ++efrm_vi_rm_reinit_dmaqs(struct vi_resource *virs) ++{ ++ struct efhw_nic *nic = virs->rs.rs_client->nic; ++ ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] != 0) ++ efrm_vi_rm_init_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_TX, nic); ++ if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) ++ efrm_vi_rm_init_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_RX, nic); ++} ++ ++/* free any PT endpoints whose flush has now complete */ ++void efrm_vi_rm_delayed_free(struct work_struct *data) ++{ ++ irq_flags_t lock_flags; ++ struct list_head close_pending; ++ struct vi_resource *virs; ++ ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_vi_manager->rm); ++ ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ list_replace_init(&efrm_vi_manager->close_pending, &close_pending); ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ EFRM_TRACE("%s: %p", __func__, efrm_vi_manager); ++ while (!list_empty(&close_pending)) { ++ virs = ++ list_entry(list_pop(&close_pending), struct vi_resource, ++ rx_flush_link); ++ EFRM_TRACE("%s: flushed VI instance=%d", __func__, ++ EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)); ++ ++ if (virs->flush_callback_fn != NULL) { ++ efrm_vi_rm_reinit_dmaqs(virs); ++ virs->flush_callback_fn(virs->flush_callback_arg); ++ } else ++ efrm_vi_rm_free_flushed_resource(virs); ++ } ++} ++ ++void efrm_vi_rm_salvage_flushed_vis(void) ++{ ++#if BUG7916_WORKAROUND || BUG5302_WORKAROUND ++ irq_flags_t lock_flags; ++ bool completed; ++ ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ efrm_vi_handle_flush_loss(&completed); ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); ++#endif ++ ++ efrm_vi_rm_delayed_free(&efrm_vi_manager->work_item); ++} ++ ++void efrm_vi_resource_free(struct vi_resource *virs) ++{ ++ efrm_vi_register_flush_callback(virs, NULL, NULL); ++ efrm_pt_flush(virs); ++} ++EXPORT_SYMBOL(efrm_vi_resource_free); ++ ++ ++void efrm_vi_resource_release(struct vi_resource *virs) ++{ ++ if (__efrm_resource_release(&virs->rs)) ++ efrm_vi_resource_free(virs); ++} ++EXPORT_SYMBOL(efrm_vi_resource_release); ++ ++/* ++ * vi: sw=8:ai:aw ++ */ +Index: head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_manager.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-07-15/drivers/net/sfc/sfc_resource/vi_resource_manager.c 2008-07-17 16:18:07.000000000 +0200 +@@ -0,0 +1,231 @@ ++/**************************************************************************** ++ * Driver for Solarflare network controllers - ++ * resource management for Xen backend, OpenOnload, etc ++ * (including support for SFE4001 10GBT NIC) ++ * ++ * This file contains the VI resource manager. ++ * ++ * Copyright 2005-2007: Solarflare Communications Inc, ++ * 9501 Jeronimo Road, Suite 250, ++ * Irvine, CA 92618, USA ++ * ++ * Developed and maintained by Solarflare Communications: ++ * ++ * ++ * ++ * Certain parts of the driver were implemented by ++ * Alexandra Kossovsky ++ * OKTET Labs Ltd, Russia, ++ * http://oktetlabs.ru, ++ * by request of Solarflare Communications ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation, incorporated herein by reference. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ **************************************************************************** ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "efrm_internal.h" ++ ++ ++int efrm_pt_pace(struct vi_resource *virs, unsigned int val) ++{ ++ struct efhw_nic *nic = virs->rs.rs_client->nic; ++ int instance; ++ ++ EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 0); ++ instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); ++ falcon_nic_pace(nic, instance, val); ++ EFRM_TRACE("%s[%d]=%d DONE", __func__, instance, val); ++ return 0; ++} ++EXPORT_SYMBOL(efrm_pt_pace); ++ ++/*** Resource manager creation/destruction *******************************/ ++ ++static void efrm_vi_rm_dtor(struct efrm_resource_manager *rm); ++ ++static int ++efrm_create_or_destroy_vi_resource_manager( ++ struct efrm_resource_manager **rm_in_out, ++ const struct vi_resource_dimensions *dims, ++ bool destroy) ++{ ++ struct vi_resource *virs; ++ struct list_head *pos, *temp; ++ struct list_head flush_pending; ++ irq_flags_t lock_flags; ++ int rc; ++ unsigned dmaq_min, dmaq_lim; ++ ++ EFRM_ASSERT(rm_in_out); ++ ++ if (destroy) ++ goto destroy; ++ ++ EFRM_ASSERT(dims); ++ EFRM_NOTICE("vi_resource_manager: evq_int=%u-%u evq_timer=%u-%u", ++ dims->evq_int_min, dims->evq_int_lim, ++ dims->evq_timer_min, dims->evq_timer_lim); ++ EFRM_NOTICE("vi_resource_manager: rxq=%u-%u txq=%u-%u", ++ dims->rxq_min, dims->rxq_lim, ++ dims->txq_min, dims->txq_lim); ++ ++ efrm_vi_manager = kmalloc(sizeof(*efrm_vi_manager), GFP_KERNEL); ++ if (efrm_vi_manager == NULL) { ++ rc = -ENOMEM; ++ goto fail_alloc; ++ } ++ ++ memset(efrm_vi_manager, 0, sizeof(*efrm_vi_manager)); ++ ++ efrm_vi_manager->iscsi_dmaq_instance_is_free = true; ++ ++ dmaq_min = max(dims->rxq_min, dims->txq_min); ++ dmaq_lim = min(dims->rxq_lim, dims->txq_lim); ++ ++ efrm_vi_manager->with_timer_base = ++ max(dmaq_min, dims->evq_timer_min); ++ efrm_vi_manager->with_timer_limit = ++ min(dmaq_lim, dims->evq_timer_lim); ++ rc = efrm_kfifo_id_ctor(&efrm_vi_manager->instances_with_timer, ++ efrm_vi_manager->with_timer_base, ++ efrm_vi_manager->with_timer_limit, ++ &efrm_vi_manager->rm.rm_lock); ++ if (rc < 0) ++ goto fail_with_timer_id_pool; ++ ++ efrm_vi_manager->with_interrupt_base = ++ max(dmaq_min, dims->evq_int_min); ++ efrm_vi_manager->with_interrupt_limit = ++ min(dmaq_lim, dims->evq_int_lim); ++ efrm_vi_manager->with_interrupt_limit = ++ max(efrm_vi_manager->with_interrupt_limit, ++ efrm_vi_manager->with_interrupt_base); ++ rc = efrm_kfifo_id_ctor(&efrm_vi_manager->instances_with_interrupt, ++ efrm_vi_manager->with_interrupt_base, ++ efrm_vi_manager->with_interrupt_limit, ++ &efrm_vi_manager->rm.rm_lock); ++ if (rc < 0) ++ goto fail_with_int_id_pool; ++ ++ INIT_LIST_HEAD(&efrm_vi_manager->rx_flush_waiting_list); ++ INIT_LIST_HEAD(&efrm_vi_manager->rx_flush_outstanding_list); ++ INIT_LIST_HEAD(&efrm_vi_manager->tx_flush_outstanding_list); ++ efrm_vi_manager->rx_flush_outstanding_count = 0; ++ ++ INIT_LIST_HEAD(&efrm_vi_manager->close_pending); ++ efrm_vi_manager->workqueue = create_workqueue("sfc_vi"); ++ if (efrm_vi_manager->workqueue == NULL) ++ goto fail_create_workqueue; ++ INIT_WORK(&efrm_vi_manager->work_item, efrm_vi_rm_delayed_free); ++ ++ /* NB. This must be the last step to avoid things getting tangled. ++ * efrm_resource_manager_dtor calls the vi_rm_dtor which ends up in ++ * this function. */ ++ rc = efrm_resource_manager_ctor(&efrm_vi_manager->rm, efrm_vi_rm_dtor, ++ "VI", EFRM_RESOURCE_VI); ++ if (rc < 0) ++ goto fail_rm_ctor; ++ ++ *rm_in_out = &efrm_vi_manager->rm; ++ return 0; ++ ++destroy: ++ rc = 0; ++ EFRM_RESOURCE_MANAGER_ASSERT_VALID(*rm_in_out); ++ ++ /* Abort outstanding flushes. Note, a VI resource can be on more ++ * than one of these lists. We handle this by starting with the TX ++ * list and then append VIs to this list if they aren't on the TX ++ * list already. A VI is on the TX flush list if tx_flushing ++ * is not empty. */ ++ spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ list_replace_init(&efrm_vi_manager->tx_flush_outstanding_list, ++ &flush_pending); ++ ++ list_for_each_safe(pos, temp, ++ &efrm_vi_manager->rx_flush_waiting_list) { ++ virs = container_of(pos, struct vi_resource, rx_flush_link); ++ ++ list_del(&virs->rx_flush_link); ++ if (virs->tx_flushing == 0) ++ list_add_tail(&virs->tx_flush_link, &flush_pending); ++ } ++ ++ list_for_each_safe(pos, temp, ++ &efrm_vi_manager->rx_flush_outstanding_list) { ++ virs = container_of(pos, struct vi_resource, rx_flush_link); ++ ++ list_del(&virs->rx_flush_link); ++ if (virs->tx_flushing == 0) ++ list_add_tail(&virs->tx_flush_link, &flush_pending); ++ } ++ ++ spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); ++ ++ while (!list_empty(&flush_pending)) { ++ virs = ++ list_entry(list_pop(&flush_pending), struct vi_resource, ++ tx_flush_link); ++ EFRM_TRACE("%s: found PT endpoint " EFRM_RESOURCE_FMT ++ " with flush pending [Tx=0x%x, Rx=0x%x, RxO=0x%x]", ++ __func__, ++ EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), ++ virs->tx_flushing, ++ virs->rx_flushing, ++ virs->rx_flush_outstanding); ++ efrm_vi_rm_free_flushed_resource(virs); ++ } ++ ++fail_rm_ctor: ++ ++ /* Complete outstanding closes. */ ++ destroy_workqueue(efrm_vi_manager->workqueue); ++fail_create_workqueue: ++ EFRM_ASSERT(list_empty(&efrm_vi_manager->close_pending)); ++ kfifo_vfree(efrm_vi_manager->instances_with_interrupt); ++fail_with_int_id_pool: ++ ++ kfifo_vfree(efrm_vi_manager->instances_with_timer); ++fail_with_timer_id_pool: ++ ++ if (destroy) ++ return 0; ++ ++ EFRM_DO_DEBUG(memset(efrm_vi_manager, 0, sizeof(*efrm_vi_manager))); ++ kfree(efrm_vi_manager); ++fail_alloc: ++ ++ *rm_in_out = NULL; ++ EFRM_ERR("%s: failed rc=%d", __func__, rc); ++ return rc; ++} ++ ++int ++efrm_create_vi_resource_manager(struct efrm_resource_manager **rm_out, ++ const struct vi_resource_dimensions *dims) ++{ ++ return efrm_create_or_destroy_vi_resource_manager(rm_out, dims, false); ++} ++ ++static void efrm_vi_rm_dtor(struct efrm_resource_manager *rm) ++{ ++ efrm_create_or_destroy_vi_resource_manager(&rm, NULL, true); ++} diff --git a/src/patches/60007_sfc-driverlink-conditional.patch1 b/src/patches/60007_sfc-driverlink-conditional.patch1 new file mode 100644 index 000000000..412c5d538 --- /dev/null +++ b/src/patches/60007_sfc-driverlink-conditional.patch1 @@ -0,0 +1,264 @@ +From: jbeulich@novell.com +Subject: conditionalize driverlink additions to Solarflare driver +Patch-mainline: obsolete +References: FATE#303479 + +At once converted the EFX_TRACE() invocations after vetoed RX/TX +callbacks to ...LOG() ones, which is consistent with Solarflare's +current code according to David Riddoch (2008-09-12). + +Index: head-2008-09-01/drivers/net/sfc/Kconfig +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/Kconfig 2008-07-17 16:18:07.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/Kconfig 2008-09-12 14:01:48.000000000 +0200 +@@ -13,8 +13,12 @@ config SFC + To compile this driver as a module, choose M here. The module + will be called sfc. + ++config SFC_DRIVERLINK ++ bool ++ + config SFC_RESOURCE + depends on SFC && X86 ++ select SFC_DRIVERLINK + tristate "Solarflare Solarstorm SFC4000 resource driver" + help + This module provides the SFC resource manager driver. +Index: head-2008-09-01/drivers/net/sfc/Makefile +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/Makefile 2008-07-17 16:18:07.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/Makefile 2008-09-12 12:22:05.000000000 +0200 +@@ -1,7 +1,7 @@ + sfc-y += efx.o falcon.o tx.o rx.o falcon_xmac.o \ + selftest.o ethtool.o xfp_phy.o \ +- mdio_10g.o tenxpress.o boards.o sfe4001.o \ +- driverlink.o ++ mdio_10g.o tenxpress.o boards.o sfe4001.o ++sfc-$(CONFIG_SFC_DRIVERLINK) += driverlink.o + obj-$(CONFIG_SFC) += sfc.o + + obj-$(CONFIG_SFC_RESOURCE) += sfc_resource/ +Index: head-2008-09-01/drivers/net/sfc/driverlink.c +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/driverlink.c 2008-08-18 10:16:46.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/driverlink.c 2008-09-12 13:57:29.000000000 +0200 +@@ -14,7 +14,6 @@ + #include + #include "net_driver.h" + #include "efx.h" +-#include "driverlink_api.h" + #include "driverlink.h" + + /* Protects @efx_driverlink_lock and @efx_driver_list */ +Index: head-2008-09-01/drivers/net/sfc/driverlink.h +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/driverlink.h 2008-08-18 10:16:46.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/driverlink.h 2008-09-12 16:14:04.000000000 +0200 +@@ -15,6 +15,10 @@ + struct efx_dl_device; + struct efx_nic; + ++#ifdef CONFIG_SFC_DRIVERLINK ++ ++#include "driverlink_api.h" ++ + /* Efx callback devices + * + * A list of the devices that own each callback. The partner to +@@ -40,4 +44,23 @@ extern void efx_dl_unregister_nic(struct + extern void efx_dl_reset_suspend(struct efx_nic *efx); + extern void efx_dl_reset_resume(struct efx_nic *efx, int ok); + ++#define EFX_DL_LOG EFX_LOG ++ ++#else /* CONFIG_SFC_DRIVERLINK */ ++ ++enum efx_veto { EFX_ALLOW_PACKET = 0 }; ++ ++static inline int efx_nop_callback(struct efx_nic *efx) { return 0; } ++#define EFX_DL_CALLBACK(port, name, ...) efx_nop_callback(port) ++ ++static inline int efx_dl_register_nic(struct efx_nic *efx) { return 0; } ++static inline void efx_dl_unregister_nic(struct efx_nic *efx) {} ++ ++static inline void efx_dl_reset_suspend(struct efx_nic *efx) {} ++static inline void efx_dl_reset_resume(struct efx_nic *efx, int ok) {} ++ ++#define EFX_DL_LOG(efx, fmt, args...) ((void)(efx)) ++ ++#endif /* CONFIG_SFC_DRIVERLINK */ ++ + #endif /* EFX_DRIVERLINK_H */ +Index: head-2008-09-01/drivers/net/sfc/efx.c +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/efx.c 2008-08-18 10:16:46.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/efx.c 2008-09-12 12:42:32.000000000 +0200 +@@ -1596,6 +1596,7 @@ static void efx_unregister_netdev(struct + * Device reset and suspend + * + **************************************************************************/ ++#ifdef CONFIG_SFC_DRIVERLINK + /* Serialise access to the driverlink callbacks, by quiescing event processing + * (without flushing the descriptor queues), and acquiring the rtnl_lock */ + void efx_suspend(struct efx_nic *efx) +@@ -1613,6 +1614,7 @@ void efx_resume(struct efx_nic *efx) + efx_start_all(efx); + rtnl_unlock(); + } ++#endif + + /* The final hardware and software finalisation before reset. */ + static int efx_reset_down(struct efx_nic *efx, struct ethtool_cmd *ecmd) +@@ -1899,9 +1901,11 @@ static int efx_init_struct(struct efx_ni + mutex_init(&efx->mac_lock); + efx->phy_op = &efx_dummy_phy_operations; + efx->mii.dev = net_dev; ++#ifdef CONFIG_SFC_DRIVERLINK + INIT_LIST_HEAD(&efx->dl_node); + INIT_LIST_HEAD(&efx->dl_device_list); + efx->dl_cb = efx_default_callbacks; ++#endif + INIT_WORK(&efx->reconfigure_work, efx_reconfigure_work); + atomic_set(&efx->netif_stop_count, 1); + +Index: head-2008-09-01/drivers/net/sfc/falcon.c +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/falcon.c 2008-08-18 10:16:46.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/falcon.c 2008-09-12 13:51:32.000000000 +0200 +@@ -36,12 +36,17 @@ + + /** + * struct falcon_nic_data - Falcon NIC state ++ * @next_buffer_table: First available buffer table id + * @resources: Resource information for driverlink client + * @pci_dev2: The secondary PCI device if present + * @i2c_data: Operations and state for I2C bit-bashing algorithm + */ + struct falcon_nic_data { ++#ifndef CONFIG_SFC_DRIVERLINK ++ unsigned next_buffer_table; ++#else + struct efx_dl_falcon_resources resources; ++#endif + struct pci_dev *pci_dev2; + struct i2c_algo_bit_data i2c_data; + }; +@@ -322,8 +327,13 @@ static int falcon_alloc_special_buffer(s + memset(buffer->addr, 0xff, len); + + /* Select new buffer ID */ ++#ifndef CONFIG_SFC_DRIVERLINK ++ buffer->index = nic_data->next_buffer_table; ++ nic_data->next_buffer_table += buffer->entries; ++#else + buffer->index = nic_data->resources.buffer_table_min; + nic_data->resources.buffer_table_min += buffer->entries; ++#endif + + EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x " + "(virt %p phys %lx)\n", buffer->index, +@@ -2382,6 +2392,7 @@ static int falcon_probe_nvconfig(struct + * should live. */ + static int falcon_dimension_resources(struct efx_nic *efx) + { ++#ifdef CONFIG_SFC_DRIVERLINK + unsigned internal_dcs_entries; + struct falcon_nic_data *nic_data = efx->nic_data; + struct efx_dl_falcon_resources *res = &nic_data->resources; +@@ -2426,6 +2437,7 @@ static int falcon_dimension_resources(st + + if (EFX_INT_MODE_USE_MSI(efx)) + res->flags |= EFX_DL_FALCON_USE_MSI; ++#endif + + return 0; + } +@@ -2551,7 +2563,9 @@ int falcon_probe_nic(struct efx_nic *efx + return 0; + + fail6: ++#ifdef CONFIG_SFC_DRIVERLINK + efx->dl_info = NULL; ++#endif + fail5: + falcon_free_buffer(efx, &efx->irq_status); + fail4: +@@ -2742,7 +2756,9 @@ void falcon_remove_nic(struct efx_nic *e + /* Tear down the private nic state */ + kfree(efx->nic_data); + efx->nic_data = NULL; ++#ifdef CONFIG_SFC_DRIVERLINK + efx->dl_info = NULL; ++#endif + } + + void falcon_update_nic_stats(struct efx_nic *efx) +Index: head-2008-09-01/drivers/net/sfc/net_driver.h +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/net_driver.h 2008-08-18 10:16:46.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/net_driver.h 2008-09-12 13:57:19.000000000 +0200 +@@ -30,7 +30,6 @@ + + #include "enum.h" + #include "bitfield.h" +-#include "driverlink_api.h" + #include "driverlink.h" + + #define EFX_MAX_LRO_DESCRIPTORS 8 +@@ -762,11 +761,13 @@ struct efx_nic { + void *loopback_selftest; + + const char *silicon_rev; ++#ifdef CONFIG_SFC_DRIVERLINK + struct efx_dl_device_info *dl_info; + struct list_head dl_node; + struct list_head dl_device_list; + struct efx_dl_callbacks dl_cb; + struct efx_dl_cb_devices dl_cb_dev; ++#endif + }; + + static inline int efx_dev_registered(struct efx_nic *efx) +Index: head-2008-09-01/drivers/net/sfc/rx.c +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/rx.c 2008-08-18 10:16:46.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/rx.c 2008-09-12 16:13:49.000000000 +0200 +@@ -559,8 +559,8 @@ static inline void efx_rx_packet_lro(str + * an obvious interface to this, so veto packets before LRO */ + veto = EFX_DL_CALLBACK(efx, rx_packet, rx_buf->data, rx_buf->len); + if (unlikely(veto)) { +- EFX_TRACE(efx, "LRO RX vetoed by driverlink %s driver\n", +- efx->dl_cb_dev.rx_packet->driver->name); ++ EFX_DL_LOG(efx, "LRO RX vetoed by driverlink %s driver\n", ++ efx->dl_cb_dev.rx_packet->driver->name); + /* Free the buffer now */ + efx_free_rx_buffer(efx, rx_buf); + return; +@@ -741,8 +741,8 @@ void __efx_rx_packet(struct efx_channel + /* Allow callback to veto the packet */ + veto = EFX_DL_CALLBACK(efx, rx_packet, rx_buf->data, rx_buf->len); + if (unlikely(veto)) { +- EFX_LOG(efx, "RX vetoed by driverlink %s driver\n", +- efx->dl_cb_dev.rx_packet->driver->name); ++ EFX_DL_LOG(efx, "RX vetoed by driverlink %s driver\n", ++ efx->dl_cb_dev.rx_packet->driver->name); + /* Free the buffer now */ + efx_free_rx_buffer(efx, rx_buf); + goto done; +Index: head-2008-09-01/drivers/net/sfc/tx.c +=================================================================== +--- head-2008-09-01.orig/drivers/net/sfc/tx.c 2008-08-18 10:16:46.000000000 +0200 ++++ head-2008-09-01/drivers/net/sfc/tx.c 2008-09-12 16:13:34.000000000 +0200 +@@ -374,9 +374,9 @@ int efx_hard_start_xmit(struct sk_buff * + /* See if driverlink wants to veto the packet. */ + veto = EFX_DL_CALLBACK(efx, tx_packet, skb); + if (unlikely(veto)) { +- EFX_TRACE(efx, "TX queue %d packet vetoed by " +- "driverlink %s driver\n", tx_queue->queue, +- efx->dl_cb_dev.tx_packet->driver->name); ++ EFX_DL_LOG(efx, "TX queue %d packet vetoed by " ++ "driverlink %s driver\n", tx_queue->queue, ++ efx->dl_cb_dev.tx_packet->driver->name); + /* Free the skb; nothing else will do it */ + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; diff --git a/src/patches/60008_xen3-auto-xen-arch.patch1 b/src/patches/60008_xen3-auto-xen-arch.patch1 new file mode 100644 index 000000000..01720d409 --- /dev/null +++ b/src/patches/60008_xen3-auto-xen-arch.patch1 @@ -0,0 +1,47466 @@ +Subject: xen3 xen-arch +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +List of files having Xen derivates (perhaps created during the merging +of newer kernel versions), for xen-port-patches.py to pick up (i.e. this +must be retained here until the XenSource tree has these in the right +places): ++++ linux/arch/x86/kernel/acpi/sleep-xen.c ++++ linux/arch/x86/kernel/cpu/common_64-xen.c ++++ linux/arch/x86/kernel/e820-xen.c ++++ linux/arch/x86/kernel/head-xen.c ++++ linux/arch/x86/kernel/head32-xen.c ++++ linux/arch/x86/kernel/ioport-xen.c ++++ linux/arch/x86/kernel/ipi-xen.c ++++ linux/arch/x86/kernel/ldt-xen.c ++++ linux/arch/x86/kernel/mpparse-xen.c ++++ linux/arch/x86/kernel/pci-nommu-xen.c ++++ linux/arch/x86/kernel/process-xen.c ++++ linux/arch/x86/kernel/setup-xen.c ++++ linux/arch/x86/kernel/setup_percpu-xen.c ++++ linux/arch/x86/kernel/smp-xen.c ++++ linux/arch/x86/mm/fault-xen.c ++++ linux/arch/x86/mm/ioremap-xen.c ++++ linux/arch/x86/mm/pageattr-xen.c ++++ linux/arch/x86/mm/pat-xen.c ++++ linux/arch/x86/mm/pgtable-xen.c ++++ linux/arch/x86/vdso/vdso32-setup-xen.c ++++ linux/drivers/char/mem-xen.c ++++ linux/include/asm-x86/mach-xen/asm/desc.h ++++ linux/include/asm-x86/mach-xen/asm/dma-mapping.h ++++ linux/include/asm-x86/mach-xen/asm/fixmap.h ++++ linux/include/asm-x86/mach-xen/asm/io.h ++++ linux/include/asm-x86/mach-xen/asm/irq_vectors.h ++++ linux/include/asm-x86/mach-xen/asm/irqflags.h ++++ linux/include/asm-x86/mach-xen/asm/mmu_context.h ++++ linux/include/asm-x86/mach-xen/asm/page.h ++++ linux/include/asm-x86/mach-xen/asm/pci.h ++++ linux/include/asm-x86/mach-xen/asm/pgalloc.h ++++ linux/include/asm-x86/mach-xen/asm/pgtable.h ++++ linux/include/asm-x86/mach-xen/asm/processor.h ++++ linux/include/asm-x86/mach-xen/asm/segment.h ++++ linux/include/asm-x86/mach-xen/asm/smp.h ++++ linux/include/asm-x86/mach-xen/asm/spinlock.h ++++ linux/include/asm-x86/mach-xen/asm/swiotlb.h ++++ linux/include/asm-x86/mach-xen/asm/system.h ++++ linux/include/asm-x86/mach-xen/asm/tlbflush.h ++++ linux/include/asm-x86/mach-xen/asm/xor.h + +List of files folded into their native counterparts (and hence removed +from this patch for xen-port-patches.py to not needlessly pick them up; +for reference, prefixed with the version the removal occured): +2.6.18/include/asm-x86/mach-xen/asm/pgtable-2level.h +2.6.18/include/asm-x86/mach-xen/asm/pgtable-2level-defs.h +2.6.19/include/asm-x86/mach-xen/asm/ptrace.h +2.6.23/arch/x86/kernel/vsyscall-note_32-xen.S +2.6.23/include/asm-x86/mach-xen/asm/ptrace_64.h +2.6.24/arch/x86/kernel/early_printk_32-xen.c +2.6.24/include/asm-x86/mach-xen/asm/arch_hooks_64.h +2.6.24/include/asm-x86/mach-xen/asm/bootsetup_64.h +2.6.24/include/asm-x86/mach-xen/asm/mmu_32.h +2.6.24/include/asm-x86/mach-xen/asm/mmu_64.h +2.6.24/include/asm-x86/mach-xen/asm/nmi_64.h +2.6.24/include/asm-x86/mach-xen/asm/setup.h +2.6.24/include/asm-x86/mach-xen/asm/time_64.h (added in 2.6.20) +2.6.25/arch/x86/ia32/syscall32-xen.c +2.6.25/arch/x86/ia32/syscall32_syscall-xen.S +2.6.25/arch/x86/ia32/vsyscall-int80.S +2.6.25/arch/x86/kernel/acpi/boot-xen.c +2.6.25/include/asm-x86/mach-xen/asm/msr.h +2.6.25/include/asm-x86/mach-xen/asm/page_32.h +2.6.25/include/asm-x86/mach-xen/asm/spinlock_32.h +2.6.25/include/asm-x86/mach-xen/asm/timer.h (added in 2.6.24) +2.6.25/include/asm-x86/mach-xen/asm/timer_64.h +2.6.26/arch/x86/kernel/pci-dma_32-xen.c +2.6.26/arch/x86/kernel/pci-swiotlb_64-xen.c +2.6.26/include/asm-x86/mach-xen/asm/dma-mapping_32.h +2.6.26/include/asm-x86/mach-xen/asm/dma-mapping_64.h +2.6.26/include/asm-x86/mach-xen/asm/nmi.h (added in 2.6.24) +2.6.26/include/asm-x86/mach-xen/asm/scatterlist.h (added in 2.6.24) +2.6.26/include/asm-x86/mach-xen/asm/scatterlist_32.h +2.6.26/include/xen/xencomm.h +2.6.27/arch/x86/kernel/e820_32-xen.c +2.6.27/include/asm-x86/mach-xen/asm/e820.h (added in 2.6.24) +2.6.27/include/asm-x86/mach-xen/asm/e820_64.h +2.6.27/include/asm-x86/mach-xen/asm/hw_irq.h (added in 2.6.24) +2.6.27/include/asm-x86/mach-xen/asm/hw_irq_32.h +2.6.27/include/asm-x86/mach-xen/asm/hw_irq_64.h +2.6.27/include/asm-x86/mach-xen/asm/irq.h (added in 2.6.24) +2.6.27/include/asm-x86/mach-xen/asm/irq_64.h + +Index: head-2008-11-25/arch/x86/kernel/acpi/processor_extcntl_xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/acpi/processor_extcntl_xen.c 2008-10-01 15:43:24.000000000 +0200 +@@ -0,0 +1,209 @@ ++/* ++ * processor_extcntl_xen.c - interface to notify Xen ++ * ++ * Copyright (C) 2008, Intel corporation ++ * ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at ++ * your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static int xen_cx_notifier(struct acpi_processor *pr, int action) ++{ ++ int ret, count = 0, i; ++ xen_platform_op_t op = { ++ .cmd = XENPF_set_processor_pminfo, ++ .interface_version = XENPF_INTERFACE_VERSION, ++ .u.set_pminfo.id = pr->acpi_id, ++ .u.set_pminfo.type = XEN_PM_CX, ++ }; ++ struct xen_processor_cx *data, *buf; ++ struct acpi_processor_cx *cx; ++ ++ if (action == PROCESSOR_PM_CHANGE) ++ return -EINVAL; ++ ++ /* Convert to Xen defined structure and hypercall */ ++ buf = kzalloc(pr->power.count * sizeof(struct xen_processor_cx), ++ GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ data = buf; ++ for (i = 1; i <= pr->power.count; i++) { ++ cx = &pr->power.states[i]; ++ /* Skip invalid cstate entry */ ++ if (!cx->valid) ++ continue; ++ ++ data->type = cx->type; ++ data->latency = cx->latency; ++ data->power = cx->power; ++ data->reg.space_id = cx->reg.space_id; ++ data->reg.bit_width = cx->reg.bit_width; ++ data->reg.bit_offset = cx->reg.bit_offset; ++ data->reg.access_size = cx->reg.reserved; ++ data->reg.address = cx->reg.address; ++ ++ /* Get dependency relationships */ ++ if (cx->csd_count) { ++ printk("Wow! _CSD is found. Not support for now!\n"); ++ kfree(buf); ++ return -EINVAL; ++ } else { ++ data->dpcnt = 0; ++ set_xen_guest_handle(data->dp, NULL); ++ } ++ ++ data++; ++ count++; ++ } ++ ++ if (!count) { ++ printk("No available Cx info for cpu %d\n", pr->acpi_id); ++ kfree(buf); ++ return -EINVAL; ++ } ++ ++ op.u.set_pminfo.power.count = count; ++ op.u.set_pminfo.power.flags.bm_control = pr->flags.bm_control; ++ op.u.set_pminfo.power.flags.bm_check = pr->flags.bm_check; ++ op.u.set_pminfo.power.flags.has_cst = pr->flags.has_cst; ++ op.u.set_pminfo.power.flags.power_setup_done = pr->flags.power_setup_done; ++ ++ set_xen_guest_handle(op.u.set_pminfo.power.states, buf); ++ ret = HYPERVISOR_platform_op(&op); ++ kfree(buf); ++ return ret; ++} ++ ++static int xen_px_notifier(struct acpi_processor *pr, int action) ++{ ++ int ret = -EINVAL; ++ xen_platform_op_t op = { ++ .cmd = XENPF_set_processor_pminfo, ++ .interface_version = XENPF_INTERFACE_VERSION, ++ .u.set_pminfo.id = pr->acpi_id, ++ .u.set_pminfo.type = XEN_PM_PX, ++ }; ++ struct xen_processor_performance *perf; ++ struct xen_processor_px *states = NULL; ++ struct acpi_processor_performance *px; ++ struct acpi_psd_package *pdomain; ++ ++ if (!pr) ++ return -EINVAL; ++ ++ perf = &op.u.set_pminfo.perf; ++ px = pr->performance; ++ ++ switch(action) { ++ case PROCESSOR_PM_CHANGE: ++ /* ppc dynamic handle */ ++ perf->flags = XEN_PX_PPC; ++ perf->platform_limit = pr->performance_platform_limit; ++ ++ ret = HYPERVISOR_platform_op(&op); ++ break; ++ ++ case PROCESSOR_PM_INIT: ++ /* px normal init */ ++ perf->flags = XEN_PX_PPC | ++ XEN_PX_PCT | ++ XEN_PX_PSS | ++ XEN_PX_PSD; ++ ++ /* ppc */ ++ perf->platform_limit = pr->performance_platform_limit; ++ ++ /* pct */ ++ xen_convert_pct_reg(&perf->control_register, &px->control_register); ++ xen_convert_pct_reg(&perf->status_register, &px->status_register); ++ ++ /* pss */ ++ perf->state_count = px->state_count; ++ states = kzalloc(px->state_count*sizeof(xen_processor_px_t),GFP_KERNEL); ++ if (!states) ++ return -ENOMEM; ++ xen_convert_pss_states(states, px->states, px->state_count); ++ set_xen_guest_handle(perf->states, states); ++ ++ /* psd */ ++ pdomain = &px->domain_info; ++ xen_convert_psd_pack(&perf->domain_info, pdomain); ++ if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) ++ perf->shared_type = CPUFREQ_SHARED_TYPE_ALL; ++ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) ++ perf->shared_type = CPUFREQ_SHARED_TYPE_ANY; ++ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) ++ perf->shared_type = CPUFREQ_SHARED_TYPE_HW; ++ else { ++ ret = -ENODEV; ++ kfree(states); ++ break; ++ } ++ ++ ret = HYPERVISOR_platform_op(&op); ++ kfree(states); ++ break; ++ ++ default: ++ break; ++ } ++ ++ return ret; ++} ++ ++static int xen_tx_notifier(struct acpi_processor *pr, int action) ++{ ++ return -EINVAL; ++} ++static int xen_hotplug_notifier(struct acpi_processor *pr, int event) ++{ ++ return -EINVAL; ++} ++ ++static struct processor_extcntl_ops xen_extcntl_ops = { ++ .hotplug = xen_hotplug_notifier, ++}; ++ ++void arch_acpi_processor_init_extcntl(const struct processor_extcntl_ops **ops) ++{ ++ unsigned int pmbits = (xen_start_info->flags & SIF_PM_MASK) >> 8; ++ ++ if (!pmbits) ++ return; ++ if (pmbits & XEN_PROCESSOR_PM_CX) ++ xen_extcntl_ops.pm_ops[PM_TYPE_IDLE] = xen_cx_notifier; ++ if (pmbits & XEN_PROCESSOR_PM_PX) ++ xen_extcntl_ops.pm_ops[PM_TYPE_PERF] = xen_px_notifier; ++ if (pmbits & XEN_PROCESSOR_PM_TX) ++ xen_extcntl_ops.pm_ops[PM_TYPE_THR] = xen_tx_notifier; ++ ++ *ops = &xen_extcntl_ops; ++} ++EXPORT_SYMBOL(arch_acpi_processor_init_extcntl); +Index: head-2008-11-25/arch/x86/kernel/acpi/sleep_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/acpi/sleep_32-xen.c 2008-04-15 09:29:41.000000000 +0200 +@@ -0,0 +1,113 @@ ++/* ++ * sleep.c - x86-specific ACPI sleep support. ++ * ++ * Copyright (C) 2001-2003 Patrick Mochel ++ * Copyright (C) 2001-2003 Pavel Machek ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifndef CONFIG_ACPI_PV_SLEEP ++/* address in low memory of the wakeup routine. */ ++unsigned long acpi_wakeup_address = 0; ++unsigned long acpi_video_flags; ++extern char wakeup_start, wakeup_end; ++ ++extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); ++#endif ++ ++/** ++ * acpi_save_state_mem - save kernel state ++ * ++ * Create an identity mapped page table and copy the wakeup routine to ++ * low memory. ++ */ ++int acpi_save_state_mem(void) ++{ ++#ifndef CONFIG_ACPI_PV_SLEEP ++ if (!acpi_wakeup_address) ++ return 1; ++ memcpy((void *)acpi_wakeup_address, &wakeup_start, ++ &wakeup_end - &wakeup_start); ++ acpi_copy_wakeup_routine(acpi_wakeup_address); ++#endif ++ return 0; ++} ++ ++/* ++ * acpi_restore_state - undo effects of acpi_save_state_mem ++ */ ++void acpi_restore_state_mem(void) ++{ ++} ++ ++/** ++ * acpi_reserve_bootmem - do _very_ early ACPI initialisation ++ * ++ * We allocate a page from the first 1MB of memory for the wakeup ++ * routine for when we come back from a sleep state. The ++ * runtime allocator allows specification of <16MB pages, but not ++ * <1MB pages. ++ */ ++void __init acpi_reserve_bootmem(void) ++{ ++#ifndef CONFIG_ACPI_PV_SLEEP ++ if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { ++ printk(KERN_ERR ++ "ACPI: Wakeup code way too big, S3 disabled.\n"); ++ return; ++ } ++ ++ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); ++ if (!acpi_wakeup_address) ++ printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); ++#endif ++} ++ ++#ifndef CONFIG_ACPI_PV_SLEEP ++static int __init acpi_sleep_setup(char *str) ++{ ++ while ((str != NULL) && (*str != '\0')) { ++ if (strncmp(str, "s3_bios", 7) == 0) ++ acpi_video_flags = 1; ++ if (strncmp(str, "s3_mode", 7) == 0) ++ acpi_video_flags |= 2; ++ str = strchr(str, ','); ++ if (str != NULL) ++ str += strspn(str, ", \t"); ++ } ++ return 1; ++} ++ ++__setup("acpi_sleep=", acpi_sleep_setup); ++ ++static __init int reset_videomode_after_s3(struct dmi_system_id *d) ++{ ++ acpi_video_flags |= 2; ++ return 0; ++} ++ ++static __initdata struct dmi_system_id acpisleep_dmi_table[] = { ++ { /* Reset video mode after returning from ACPI S3 sleep */ ++ .callback = reset_videomode_after_s3, ++ .ident = "Toshiba Satellite 4030cdt", ++ .matches = { ++ DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), ++ }, ++ }, ++ {} ++}; ++ ++static int __init acpisleep_dmi_init(void) ++{ ++ dmi_check_system(acpisleep_dmi_table); ++ return 0; ++} ++ ++core_initcall(acpisleep_dmi_init); ++#endif /* CONFIG_ACPI_PV_SLEEP */ +Index: head-2008-11-25/arch/x86/kernel/apic_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/apic_32-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,155 @@ ++/* ++ * Local APIC handling, local APIC timers ++ * ++ * (c) 1999, 2000 Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively. ++ * Maciej W. Rozycki : Various updates and fixes. ++ * Mikael Pettersson : Power Management for UP-APIC. ++ * Pavel Machek and ++ * Mikael Pettersson : PM converted to driver model. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "io_ports.h" ++ ++#ifndef CONFIG_XEN ++/* ++ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as ++ * IPIs in place of local APIC timers ++ */ ++static cpumask_t timer_bcast_ipi; ++#endif ++ ++/* ++ * Knob to control our willingness to enable the local APIC. ++ */ ++int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ ++ ++/* ++ * Debug level ++ */ ++int apic_verbosity; ++ ++#ifndef CONFIG_XEN ++static int modern_apic(void) ++{ ++ unsigned int lvr, version; ++ /* AMD systems use old APIC versions, so check the CPU */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && ++ boot_cpu_data.x86 >= 0xf) ++ return 1; ++ lvr = apic_read(APIC_LVR); ++ version = GET_APIC_VERSION(lvr); ++ return version >= 0x14; ++} ++#endif /* !CONFIG_XEN */ ++ ++/* ++ * 'what should we do if we get a hw irq event on an illegal vector'. ++ * each architecture has to answer this themselves. ++ */ ++void ack_bad_irq(unsigned int irq) ++{ ++ printk("unexpected IRQ trap at vector %02x\n", irq); ++ /* ++ * Currently unexpected vectors happen only on SMP and APIC. ++ * We _must_ ack these because every local APIC has only N ++ * irq slots per priority level, and a 'hanging, unacked' IRQ ++ * holds up an irq slot - in excessive cases (when multiple ++ * unexpected vectors occur) that might lock up the APIC ++ * completely. ++ * But only ack when the APIC is enabled -AK ++ */ ++ if (cpu_has_apic) ++ ack_APIC_irq(); ++} ++ ++int get_physical_broadcast(void) ++{ ++ return 0xff; ++} ++ ++#ifndef CONFIG_XEN ++#ifndef CONFIG_SMP ++static void up_apic_timer_interrupt_call(struct pt_regs *regs) ++{ ++ int cpu = smp_processor_id(); ++ ++ /* ++ * the NMI deadlock-detector uses this. ++ */ ++ per_cpu(irq_stat, cpu).apic_timer_irqs++; ++ ++ smp_local_timer_interrupt(regs); ++} ++#endif ++ ++void smp_send_timer_broadcast_ipi(struct pt_regs *regs) ++{ ++ cpumask_t mask; ++ ++ cpus_and(mask, cpu_online_map, timer_bcast_ipi); ++ if (!cpus_empty(mask)) { ++#ifdef CONFIG_SMP ++ send_IPI_mask(mask, LOCAL_TIMER_VECTOR); ++#else ++ /* ++ * We can directly call the apic timer interrupt handler ++ * in UP case. Minus all irq related functions ++ */ ++ up_apic_timer_interrupt_call(regs); ++#endif ++ } ++} ++#endif ++ ++int setup_profiling_timer(unsigned int multiplier) ++{ ++ return -EINVAL; ++} ++ ++/* ++ * This initializes the IO-APIC and APIC hardware if this is ++ * a UP kernel. ++ */ ++int __init APIC_init_uniprocessor (void) ++{ ++#ifdef CONFIG_X86_IO_APIC ++ if (smp_found_config) ++ if (!skip_ioapic_setup && nr_ioapics) ++ setup_IO_APIC(); ++#endif ++ ++ return 0; ++} +Index: head-2008-11-25/arch/x86/kernel/cpu/common-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/cpu/common-xen.c 2007-12-10 08:47:31.000000000 +0100 +@@ -0,0 +1,743 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#include ++#include ++#else ++#ifdef CONFIG_XEN ++#define phys_pkg_id(a,b) a ++#endif ++#endif ++#include ++ ++#include "cpu.h" ++ ++DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); ++EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); ++ ++#ifndef CONFIG_XEN ++DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); ++EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); ++#endif ++ ++static int cachesize_override __cpuinitdata = -1; ++static int disable_x86_fxsr __cpuinitdata; ++static int disable_x86_serial_nr __cpuinitdata = 1; ++static int disable_x86_sep __cpuinitdata; ++ ++struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; ++ ++extern int disable_pse; ++ ++static void default_init(struct cpuinfo_x86 * c) ++{ ++ /* Not much we can do here... */ ++ /* Check if at least it has cpuid */ ++ if (c->cpuid_level == -1) { ++ /* No cpuid. It must be an ancient CPU */ ++ if (c->x86 == 4) ++ strcpy(c->x86_model_id, "486"); ++ else if (c->x86 == 3) ++ strcpy(c->x86_model_id, "386"); ++ } ++} ++ ++static struct cpu_dev default_cpu = { ++ .c_init = default_init, ++ .c_vendor = "Unknown", ++}; ++static struct cpu_dev * this_cpu = &default_cpu; ++ ++static int __init cachesize_setup(char *str) ++{ ++ get_option (&str, &cachesize_override); ++ return 1; ++} ++__setup("cachesize=", cachesize_setup); ++ ++int __cpuinit get_model_name(struct cpuinfo_x86 *c) ++{ ++ unsigned int *v; ++ char *p, *q; ++ ++ if (cpuid_eax(0x80000000) < 0x80000004) ++ return 0; ++ ++ v = (unsigned int *) c->x86_model_id; ++ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); ++ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); ++ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); ++ c->x86_model_id[48] = 0; ++ ++ /* Intel chips right-justify this string for some dumb reason; ++ undo that brain damage */ ++ p = q = &c->x86_model_id[0]; ++ while ( *p == ' ' ) ++ p++; ++ if ( p != q ) { ++ while ( *p ) ++ *q++ = *p++; ++ while ( q <= &c->x86_model_id[48] ) ++ *q++ = '\0'; /* Zero-pad the rest */ ++ } ++ ++ return 1; ++} ++ ++ ++void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) ++{ ++ unsigned int n, dummy, ecx, edx, l2size; ++ ++ n = cpuid_eax(0x80000000); ++ ++ if (n >= 0x80000005) { ++ cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); ++ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", ++ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); ++ c->x86_cache_size=(ecx>>24)+(edx>>24); ++ } ++ ++ if (n < 0x80000006) /* Some chips just has a large L1. */ ++ return; ++ ++ ecx = cpuid_ecx(0x80000006); ++ l2size = ecx >> 16; ++ ++ /* do processor-specific cache resizing */ ++ if (this_cpu->c_size_cache) ++ l2size = this_cpu->c_size_cache(c,l2size); ++ ++ /* Allow user to override all this if necessary. */ ++ if (cachesize_override != -1) ++ l2size = cachesize_override; ++ ++ if ( l2size == 0 ) ++ return; /* Again, no L2 cache is possible */ ++ ++ c->x86_cache_size = l2size; ++ ++ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", ++ l2size, ecx & 0xFF); ++} ++ ++/* Naming convention should be: [()] */ ++/* This table only is used unless init_() below doesn't set it; */ ++/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ ++ ++/* Look up CPU names by table lookup. */ ++static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) ++{ ++ struct cpu_model_info *info; ++ ++ if ( c->x86_model >= 16 ) ++ return NULL; /* Range check */ ++ ++ if (!this_cpu) ++ return NULL; ++ ++ info = this_cpu->c_models; ++ ++ while (info && info->family) { ++ if (info->family == c->x86) ++ return info->model_names[c->x86_model]; ++ info++; ++ } ++ return NULL; /* Not found */ ++} ++ ++ ++static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) ++{ ++ char *v = c->x86_vendor_id; ++ int i; ++ static int printed; ++ ++ for (i = 0; i < X86_VENDOR_NUM; i++) { ++ if (cpu_devs[i]) { ++ if (!strcmp(v,cpu_devs[i]->c_ident[0]) || ++ (cpu_devs[i]->c_ident[1] && ++ !strcmp(v,cpu_devs[i]->c_ident[1]))) { ++ c->x86_vendor = i; ++ if (!early) ++ this_cpu = cpu_devs[i]; ++ return; ++ } ++ } ++ } ++ if (!printed) { ++ printed++; ++ printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); ++ printk(KERN_ERR "CPU: Your system may be unstable.\n"); ++ } ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ this_cpu = &default_cpu; ++} ++ ++ ++static int __init x86_fxsr_setup(char * s) ++{ ++ disable_x86_fxsr = 1; ++ return 1; ++} ++__setup("nofxsr", x86_fxsr_setup); ++ ++ ++static int __init x86_sep_setup(char * s) ++{ ++ disable_x86_sep = 1; ++ return 1; ++} ++__setup("nosep", x86_sep_setup); ++ ++ ++/* Standard macro to see if a specific flag is changeable */ ++static inline int flag_is_changeable_p(u32 flag) ++{ ++ u32 f1, f2; ++ ++ asm("pushfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "movl %0,%1\n\t" ++ "xorl %2,%0\n\t" ++ "pushl %0\n\t" ++ "popfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "popfl\n\t" ++ : "=&r" (f1), "=&r" (f2) ++ : "ir" (flag)); ++ ++ return ((f1^f2) & flag) != 0; ++} ++ ++ ++/* Probe for the CPUID instruction */ ++static int __cpuinit have_cpuid_p(void) ++{ ++ return flag_is_changeable_p(X86_EFLAGS_ID); ++} ++ ++/* Do minimum CPU detection early. ++ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. ++ The others are not touched to avoid unwanted side effects. ++ ++ WARNING: this function is only called on the BP. Don't add code here ++ that is supposed to run on all CPUs. */ ++static void __init early_cpu_detect(void) ++{ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ c->x86_cache_alignment = 32; ++ ++ if (!have_cpuid_p()) ++ return; ++ ++ /* Get vendor name */ ++ cpuid(0x00000000, &c->cpuid_level, ++ (int *)&c->x86_vendor_id[0], ++ (int *)&c->x86_vendor_id[8], ++ (int *)&c->x86_vendor_id[4]); ++ ++ get_cpu_vendor(c, 1); ++ ++ c->x86 = 4; ++ if (c->cpuid_level >= 0x00000001) { ++ u32 junk, tfms, cap0, misc; ++ cpuid(0x00000001, &tfms, &misc, &junk, &cap0); ++ c->x86 = (tfms >> 8) & 15; ++ c->x86_model = (tfms >> 4) & 15; ++ if (c->x86 == 0xf) ++ c->x86 += (tfms >> 20) & 0xff; ++ if (c->x86 >= 0x6) ++ c->x86_model += ((tfms >> 16) & 0xF) << 4; ++ c->x86_mask = tfms & 15; ++ if (cap0 & (1<<19)) ++ c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; ++ } ++} ++ ++void __cpuinit generic_identify(struct cpuinfo_x86 * c) ++{ ++ u32 tfms, xlvl; ++ int ebx; ++ ++ if (have_cpuid_p()) { ++ /* Get vendor name */ ++ cpuid(0x00000000, &c->cpuid_level, ++ (int *)&c->x86_vendor_id[0], ++ (int *)&c->x86_vendor_id[8], ++ (int *)&c->x86_vendor_id[4]); ++ ++ get_cpu_vendor(c, 0); ++ /* Initialize the standard set of capabilities */ ++ /* Note that the vendor-specific code below might override */ ++ ++ /* Intel-defined flags: level 0x00000001 */ ++ if ( c->cpuid_level >= 0x00000001 ) { ++ u32 capability, excap; ++ cpuid(0x00000001, &tfms, &ebx, &excap, &capability); ++ c->x86_capability[0] = capability; ++ c->x86_capability[4] = excap; ++ c->x86 = (tfms >> 8) & 15; ++ c->x86_model = (tfms >> 4) & 15; ++ if (c->x86 == 0xf) ++ c->x86 += (tfms >> 20) & 0xff; ++ if (c->x86 >= 0x6) ++ c->x86_model += ((tfms >> 16) & 0xF) << 4; ++ c->x86_mask = tfms & 15; ++#ifdef CONFIG_X86_HT ++ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); ++#else ++ c->apicid = (ebx >> 24) & 0xFF; ++#endif ++ } else { ++ /* Have CPUID level 0 only - unheard of */ ++ c->x86 = 4; ++ } ++ ++ /* AMD-defined flags: level 0x80000001 */ ++ xlvl = cpuid_eax(0x80000000); ++ if ( (xlvl & 0xffff0000) == 0x80000000 ) { ++ if ( xlvl >= 0x80000001 ) { ++ c->x86_capability[1] = cpuid_edx(0x80000001); ++ c->x86_capability[6] = cpuid_ecx(0x80000001); ++ } ++ if ( xlvl >= 0x80000004 ) ++ get_model_name(c); /* Default name */ ++ } ++ } ++ ++ early_intel_workaround(c); ++ ++#ifdef CONFIG_X86_HT ++ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; ++#endif ++} ++ ++static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) ++{ ++ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { ++ /* Disable processor serial number */ ++ unsigned long lo,hi; ++ rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); ++ lo |= 0x200000; ++ wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); ++ printk(KERN_NOTICE "CPU serial number disabled.\n"); ++ clear_bit(X86_FEATURE_PN, c->x86_capability); ++ ++ /* Disabling the serial number may affect the cpuid level */ ++ c->cpuid_level = cpuid_eax(0); ++ } ++} ++ ++static int __init x86_serial_nr_setup(char *s) ++{ ++ disable_x86_serial_nr = 0; ++ return 1; ++} ++__setup("serialnumber", x86_serial_nr_setup); ++ ++ ++ ++/* ++ * This does the hard work of actually picking apart the CPU stuff... ++ */ ++void __cpuinit identify_cpu(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ c->loops_per_jiffy = loops_per_jiffy; ++ c->x86_cache_size = -1; ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ c->cpuid_level = -1; /* CPUID not detected */ ++ c->x86_model = c->x86_mask = 0; /* So far unknown... */ ++ c->x86_vendor_id[0] = '\0'; /* Unset */ ++ c->x86_model_id[0] = '\0'; /* Unset */ ++ c->x86_max_cores = 1; ++ memset(&c->x86_capability, 0, sizeof c->x86_capability); ++ ++ if (!have_cpuid_p()) { ++ /* First of all, decide if this is a 486 or higher */ ++ /* It's a 486 if we can modify the AC flag */ ++ if ( flag_is_changeable_p(X86_EFLAGS_AC) ) ++ c->x86 = 4; ++ else ++ c->x86 = 3; ++ } ++ ++ generic_identify(c); ++ ++ printk(KERN_DEBUG "CPU: After generic identify, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ ++ if (this_cpu->c_identify) { ++ this_cpu->c_identify(c); ++ ++ printk(KERN_DEBUG "CPU: After vendor identify, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ } ++ ++ /* ++ * Vendor-specific initialization. In this section we ++ * canonicalize the feature flags, meaning if there are ++ * features a certain CPU supports which CPUID doesn't ++ * tell us, CPUID claiming incorrect flags, or other bugs, ++ * we handle them here. ++ * ++ * At the end of this section, c->x86_capability better ++ * indicate the features this CPU genuinely supports! ++ */ ++ if (this_cpu->c_init) ++ this_cpu->c_init(c); ++ ++ /* Disable the PN if appropriate */ ++ squash_the_stupid_serial_number(c); ++ ++ /* ++ * The vendor-specific functions might have changed features. Now ++ * we do "generic changes." ++ */ ++ ++ /* TSC disabled? */ ++ if ( tsc_disable ) ++ clear_bit(X86_FEATURE_TSC, c->x86_capability); ++ ++ /* FXSR disabled? */ ++ if (disable_x86_fxsr) { ++ clear_bit(X86_FEATURE_FXSR, c->x86_capability); ++ clear_bit(X86_FEATURE_XMM, c->x86_capability); ++ } ++ ++ /* SEP disabled? */ ++ if (disable_x86_sep) ++ clear_bit(X86_FEATURE_SEP, c->x86_capability); ++ ++ if (disable_pse) ++ clear_bit(X86_FEATURE_PSE, c->x86_capability); ++ ++ /* If the model name is still unset, do table lookup. */ ++ if ( !c->x86_model_id[0] ) { ++ char *p; ++ p = table_lookup_model(c); ++ if ( p ) ++ strcpy(c->x86_model_id, p); ++ else ++ /* Last resort... */ ++ sprintf(c->x86_model_id, "%02x/%02x", ++ c->x86, c->x86_model); ++ } ++ ++ /* Now the feature flags better reflect actual CPU features! */ ++ ++ printk(KERN_DEBUG "CPU: After all inits, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ ++ /* ++ * On SMP, boot_cpu_data holds the common feature set between ++ * all CPUs; so make sure that we indicate which features are ++ * common between the CPUs. The first time this routine gets ++ * executed, c == &boot_cpu_data. ++ */ ++ if ( c != &boot_cpu_data ) { ++ /* AND the already accumulated flags with these */ ++ for ( i = 0 ; i < NCAPINTS ; i++ ) ++ boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; ++ } ++ ++ /* Init Machine Check Exception if available. */ ++ mcheck_init(c); ++ ++ if (c == &boot_cpu_data) ++ sysenter_setup(); ++ enable_sep_cpu(); ++ ++ if (c == &boot_cpu_data) ++ mtrr_bp_init(); ++ else ++ mtrr_ap_init(); ++} ++ ++#ifdef CONFIG_X86_HT ++void __cpuinit detect_ht(struct cpuinfo_x86 *c) ++{ ++ u32 eax, ebx, ecx, edx; ++ int index_msb, core_bits; ++ ++ cpuid(1, &eax, &ebx, &ecx, &edx); ++ ++ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) ++ return; ++ ++ smp_num_siblings = (ebx & 0xff0000) >> 16; ++ ++ if (smp_num_siblings == 1) { ++ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); ++ } else if (smp_num_siblings > 1 ) { ++ ++ if (smp_num_siblings > NR_CPUS) { ++ printk(KERN_WARNING "CPU: Unsupported number of the " ++ "siblings %d", smp_num_siblings); ++ smp_num_siblings = 1; ++ return; ++ } ++ ++ index_msb = get_count_order(smp_num_siblings); ++ c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); ++ ++ printk(KERN_INFO "CPU: Physical Processor ID: %d\n", ++ c->phys_proc_id); ++ ++ smp_num_siblings = smp_num_siblings / c->x86_max_cores; ++ ++ index_msb = get_count_order(smp_num_siblings) ; ++ ++ core_bits = get_count_order(c->x86_max_cores); ++ ++ c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & ++ ((1 << core_bits) - 1); ++ ++ if (c->x86_max_cores > 1) ++ printk(KERN_INFO "CPU: Processor Core ID: %d\n", ++ c->cpu_core_id); ++ } ++} ++#endif ++ ++void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) ++{ ++ char *vendor = NULL; ++ ++ if (c->x86_vendor < X86_VENDOR_NUM) ++ vendor = this_cpu->c_vendor; ++ else if (c->cpuid_level >= 0) ++ vendor = c->x86_vendor_id; ++ ++ if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) ++ printk("%s ", vendor); ++ ++ if (!c->x86_model_id[0]) ++ printk("%d86", c->x86); ++ else ++ printk("%s", c->x86_model_id); ++ ++ if (c->x86_mask || c->cpuid_level >= 0) ++ printk(" stepping %02x\n", c->x86_mask); ++ else ++ printk("\n"); ++} ++ ++cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; ++ ++/* This is hacky. :) ++ * We're emulating future behavior. ++ * In the future, the cpu-specific init functions will be called implicitly ++ * via the magic of initcalls. ++ * They will insert themselves into the cpu_devs structure. ++ * Then, when cpu_init() is called, we can just iterate over that array. ++ */ ++ ++extern int intel_cpu_init(void); ++extern int cyrix_init_cpu(void); ++extern int nsc_init_cpu(void); ++extern int amd_init_cpu(void); ++extern int centaur_init_cpu(void); ++extern int transmeta_init_cpu(void); ++extern int rise_init_cpu(void); ++extern int nexgen_init_cpu(void); ++extern int umc_init_cpu(void); ++ ++void __init early_cpu_init(void) ++{ ++ intel_cpu_init(); ++ cyrix_init_cpu(); ++ nsc_init_cpu(); ++ amd_init_cpu(); ++ centaur_init_cpu(); ++ transmeta_init_cpu(); ++ rise_init_cpu(); ++ nexgen_init_cpu(); ++ umc_init_cpu(); ++ early_cpu_detect(); ++ ++#ifdef CONFIG_DEBUG_PAGEALLOC ++ /* pse is not compatible with on-the-fly unmapping, ++ * disable it even if the cpus claim to support it. ++ */ ++ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); ++ disable_pse = 1; ++#endif ++} ++ ++static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr) ++{ ++ unsigned long frames[16]; ++ unsigned long va; ++ int f; ++ ++ for (va = gdt_descr->address, f = 0; ++ va < gdt_descr->address + gdt_descr->size; ++ va += PAGE_SIZE, f++) { ++ frames[f] = virt_to_mfn(va); ++ make_lowmem_page_readonly( ++ (void *)va, XENFEAT_writable_descriptor_tables); ++ } ++ if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8)) ++ BUG(); ++} ++ ++/* ++ * cpu_init() initializes state that is per-CPU. Some data is already ++ * initialized (naturally) in the bootstrap process, such as the GDT ++ * and IDT. We reload them nevertheless, this function acts as a ++ * 'CPU state barrier', nothing should get across. ++ */ ++void __cpuinit cpu_init(void) ++{ ++ int cpu = smp_processor_id(); ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct * t = &per_cpu(init_tss, cpu); ++#endif ++ struct thread_struct *thread = ¤t->thread; ++ struct desc_struct *gdt; ++ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ ++ if (cpu_test_and_set(cpu, cpu_initialized)) { ++ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); ++ for (;;) local_irq_enable(); ++ } ++ printk(KERN_INFO "Initializing CPU#%d\n", cpu); ++ ++ if (cpu_has_vme || cpu_has_de) ++ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); ++ if (tsc_disable && cpu_has_tsc) { ++ printk(KERN_NOTICE "Disabling TSC...\n"); ++ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ ++ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); ++ set_in_cr4(X86_CR4_TSD); ++ } ++ ++#ifndef CONFIG_XEN ++ /* The CPU hotplug case */ ++ if (cpu_gdt_descr->address) { ++ gdt = (struct desc_struct *)cpu_gdt_descr->address; ++ memset(gdt, 0, PAGE_SIZE); ++ goto old_gdt; ++ } ++ /* ++ * This is a horrible hack to allocate the GDT. The problem ++ * is that cpu_init() is called really early for the boot CPU ++ * (and hence needs bootmem) but much later for the secondary ++ * CPUs, when bootmem will have gone away ++ */ ++ if (NODE_DATA(0)->bdata->node_bootmem_map) { ++ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); ++ /* alloc_bootmem_pages panics on failure, so no check */ ++ memset(gdt, 0, PAGE_SIZE); ++ } else { ++ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); ++ if (unlikely(!gdt)) { ++ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); ++ for (;;) ++ local_irq_enable(); ++ } ++ } ++old_gdt: ++ /* ++ * Initialize the per-CPU GDT with the boot GDT, ++ * and set up the GDT descriptor: ++ */ ++ memcpy(gdt, cpu_gdt_table, GDT_SIZE); ++ ++ /* Set up GDT entry for 16bit stack */ ++ *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= ++ ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | ++ ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | ++ (CPU_16BIT_STACK_SIZE - 1); ++ ++ cpu_gdt_descr->size = GDT_SIZE - 1; ++ cpu_gdt_descr->address = (unsigned long)gdt; ++#else ++ if (cpu == 0 && cpu_gdt_descr->address == 0) { ++ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); ++ /* alloc_bootmem_pages panics on failure, so no check */ ++ memset(gdt, 0, PAGE_SIZE); ++ ++ memcpy(gdt, cpu_gdt_table, GDT_SIZE); ++ ++ cpu_gdt_descr->size = GDT_SIZE; ++ cpu_gdt_descr->address = (unsigned long)gdt; ++ } ++#endif ++ ++ cpu_gdt_init(cpu_gdt_descr); ++ ++ /* ++ * Set up and load the per-CPU TSS and LDT ++ */ ++ atomic_inc(&init_mm.mm_count); ++ current->active_mm = &init_mm; ++ if (current->mm) ++ BUG(); ++ enter_lazy_tlb(&init_mm, current); ++ ++ load_esp0(t, thread); ++ ++ load_LDT(&init_mm.context); ++ ++#ifdef CONFIG_DOUBLEFAULT ++ /* Set up doublefault TSS pointer in the GDT */ ++ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); ++#endif ++ ++ /* Clear %fs and %gs. */ ++ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); ++ ++ /* Clear all 6 debug registers: */ ++ set_debugreg(0, 0); ++ set_debugreg(0, 1); ++ set_debugreg(0, 2); ++ set_debugreg(0, 3); ++ set_debugreg(0, 6); ++ set_debugreg(0, 7); ++ ++ /* ++ * Force FPU initialization: ++ */ ++ current_thread_info()->status = 0; ++ clear_used_math(); ++ mxcsr_feature_mask_init(); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++void __cpuinit cpu_uninit(void) ++{ ++ int cpu = raw_smp_processor_id(); ++ cpu_clear(cpu, cpu_initialized); ++ ++ /* lazy TLB state */ ++ per_cpu(cpu_tlbstate, cpu).state = 0; ++ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; ++} ++#endif +Index: head-2008-11-25/arch/x86/kernel/cpu/mtrr/main-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-01-28 12:24:18.000000000 +0100 +@@ -0,0 +1,198 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include "mtrr.h" ++ ++static DEFINE_MUTEX(mtrr_mutex); ++ ++void generic_get_mtrr(unsigned int reg, unsigned long *base, ++ unsigned int *size, mtrr_type * type) ++{ ++ struct xen_platform_op op; ++ ++ op.cmd = XENPF_read_memtype; ++ op.u.read_memtype.reg = reg; ++ if (unlikely(HYPERVISOR_platform_op(&op))) ++ memset(&op.u.read_memtype, 0, sizeof(op.u.read_memtype)); ++ ++ *size = op.u.read_memtype.nr_mfns; ++ *base = op.u.read_memtype.mfn; ++ *type = op.u.read_memtype.type; ++} ++ ++struct mtrr_ops generic_mtrr_ops = { ++ .use_intel_if = 1, ++ .get = generic_get_mtrr, ++}; ++ ++struct mtrr_ops *mtrr_if = &generic_mtrr_ops; ++unsigned int num_var_ranges; ++unsigned int *usage_table; ++ ++static void __init set_num_var_ranges(void) ++{ ++ struct xen_platform_op op; ++ ++ for (num_var_ranges = 0; ; num_var_ranges++) { ++ op.cmd = XENPF_read_memtype; ++ op.u.read_memtype.reg = num_var_ranges; ++ if (HYPERVISOR_platform_op(&op) != 0) ++ break; ++ } ++} ++ ++static void __init init_table(void) ++{ ++ int i, max; ++ ++ max = num_var_ranges; ++ if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) ++ == NULL) { ++ printk(KERN_ERR "mtrr: could not allocate\n"); ++ return; ++ } ++ for (i = 0; i < max; i++) ++ usage_table[i] = 0; ++} ++ ++int mtrr_add_page(unsigned long base, unsigned long size, ++ unsigned int type, char increment) ++{ ++ int error; ++ struct xen_platform_op op; ++ ++ mutex_lock(&mtrr_mutex); ++ ++ op.cmd = XENPF_add_memtype; ++ op.u.add_memtype.mfn = base; ++ op.u.add_memtype.nr_mfns = size; ++ op.u.add_memtype.type = type; ++ error = HYPERVISOR_platform_op(&op); ++ if (error) { ++ mutex_unlock(&mtrr_mutex); ++ BUG_ON(error > 0); ++ return error; ++ } ++ ++ if (increment) ++ ++usage_table[op.u.add_memtype.reg]; ++ ++ mutex_unlock(&mtrr_mutex); ++ ++ return op.u.add_memtype.reg; ++} ++ ++static int mtrr_check(unsigned long base, unsigned long size) ++{ ++ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { ++ printk(KERN_WARNING ++ "mtrr: size and base must be multiples of 4 kiB\n"); ++ printk(KERN_DEBUG ++ "mtrr: size: 0x%lx base: 0x%lx\n", size, base); ++ dump_stack(); ++ return -1; ++ } ++ return 0; ++} ++ ++int ++mtrr_add(unsigned long base, unsigned long size, unsigned int type, ++ char increment) ++{ ++ if (mtrr_check(base, size)) ++ return -EINVAL; ++ return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, ++ increment); ++} ++ ++int mtrr_del_page(int reg, unsigned long base, unsigned long size) ++{ ++ unsigned i; ++ mtrr_type ltype; ++ unsigned long lbase; ++ unsigned int lsize; ++ int error = -EINVAL; ++ struct xen_platform_op op; ++ ++ mutex_lock(&mtrr_mutex); ++ ++ if (reg < 0) { ++ /* Search for existing MTRR */ ++ for (i = 0; i < num_var_ranges; ++i) { ++ mtrr_if->get(i, &lbase, &lsize, <ype); ++ if (lbase == base && lsize == size) { ++ reg = i; ++ break; ++ } ++ } ++ if (reg < 0) { ++ printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, ++ size); ++ goto out; ++ } ++ } ++ if (usage_table[reg] < 1) { ++ printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); ++ goto out; ++ } ++ if (--usage_table[reg] < 1) { ++ op.cmd = XENPF_del_memtype; ++ op.u.del_memtype.handle = 0; ++ op.u.del_memtype.reg = reg; ++ error = HYPERVISOR_platform_op(&op); ++ if (error) { ++ BUG_ON(error > 0); ++ goto out; ++ } ++ } ++ error = reg; ++ out: ++ mutex_unlock(&mtrr_mutex); ++ return error; ++} ++ ++int ++mtrr_del(int reg, unsigned long base, unsigned long size) ++{ ++ if (mtrr_check(base, size)) ++ return -EINVAL; ++ return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); ++} ++ ++EXPORT_SYMBOL(mtrr_add); ++EXPORT_SYMBOL(mtrr_del); ++ ++void __init mtrr_bp_init(void) ++{ ++} ++ ++void mtrr_ap_init(void) ++{ ++} ++ ++static int __init mtrr_init(void) ++{ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ if (!is_initial_xendomain()) ++ return -ENODEV; ++ ++ if ((!cpu_has(c, X86_FEATURE_MTRR)) && ++ (!cpu_has(c, X86_FEATURE_K6_MTRR)) && ++ (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && ++ (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) ++ return -ENODEV; ++ ++ set_num_var_ranges(); ++ init_table(); ++ ++ return 0; ++} ++ ++subsys_initcall(mtrr_init); +Index: head-2008-11-25/arch/x86/kernel/entry_32-xen.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/entry_32-xen.S 2007-12-10 08:47:31.000000000 +0100 +@@ -0,0 +1,1238 @@ ++/* ++ * linux/arch/i386/entry.S ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ */ ++ ++/* ++ * entry.S contains the system-call and fault low-level handling routines. ++ * This also contains the timer-interrupt handler, as well as all interrupts ++ * and faults that can result in a task-switch. ++ * ++ * NOTE: This code handles signal-recognition, which happens every time ++ * after a timer-interrupt and after each system call. ++ * ++ * I changed all the .align's to 4 (16 byte alignment), as that's faster ++ * on a 486. ++ * ++ * Stack layout in 'ret_from_system_call': ++ * ptrace needs to have all regs on the stack. ++ * if the order here is changed, it needs to be ++ * updated in fork.c:copy_process, signal.c:do_signal, ++ * ptrace.c and ptrace.h ++ * ++ * 0(%esp) - %ebx ++ * 4(%esp) - %ecx ++ * 8(%esp) - %edx ++ * C(%esp) - %esi ++ * 10(%esp) - %edi ++ * 14(%esp) - %ebp ++ * 18(%esp) - %eax ++ * 1C(%esp) - %ds ++ * 20(%esp) - %es ++ * 24(%esp) - orig_eax ++ * 28(%esp) - %eip ++ * 2C(%esp) - %cs ++ * 30(%esp) - %eflags ++ * 34(%esp) - %oldesp ++ * 38(%esp) - %oldss ++ * ++ * "current" is in register %ebx during any slow entries. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "irq_vectors.h" ++#include ++ ++#define nr_syscalls ((syscall_table_size)/4) ++ ++EBX = 0x00 ++ECX = 0x04 ++EDX = 0x08 ++ESI = 0x0C ++EDI = 0x10 ++EBP = 0x14 ++EAX = 0x18 ++DS = 0x1C ++ES = 0x20 ++ORIG_EAX = 0x24 ++EIP = 0x28 ++CS = 0x2C ++EFLAGS = 0x30 ++OLDESP = 0x34 ++OLDSS = 0x38 ++ ++CF_MASK = 0x00000001 ++TF_MASK = 0x00000100 ++IF_MASK = 0x00000200 ++DF_MASK = 0x00000400 ++NT_MASK = 0x00004000 ++VM_MASK = 0x00020000 ++/* Pseudo-eflags. */ ++NMI_MASK = 0x80000000 ++ ++#ifndef CONFIG_XEN ++#define DISABLE_INTERRUPTS cli ++#define ENABLE_INTERRUPTS sti ++#else ++/* Offsets into shared_info_t. */ ++#define evtchn_upcall_pending /* 0 */ ++#define evtchn_upcall_mask 1 ++ ++#define sizeof_vcpu_shift 6 ++ ++#ifdef CONFIG_SMP ++#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ ++ shl $sizeof_vcpu_shift,%esi ; \ ++ addl HYPERVISOR_shared_info,%esi ++#else ++#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi ++#endif ++ ++#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) ++#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) ++#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ ++ __DISABLE_INTERRUPTS ++#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ ++ __ENABLE_INTERRUPTS ++#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) ++#endif ++ ++#ifdef CONFIG_PREEMPT ++#define preempt_stop cli; TRACE_IRQS_OFF ++#else ++#define preempt_stop ++#define resume_kernel restore_nocheck ++#endif ++ ++.macro TRACE_IRQS_IRET ++#ifdef CONFIG_TRACE_IRQFLAGS ++ testl $IF_MASK,EFLAGS(%esp) # interrupts off? ++ jz 1f ++ TRACE_IRQS_ON ++1: ++#endif ++.endm ++ ++#ifdef CONFIG_VM86 ++#define resume_userspace_sig check_userspace ++#else ++#define resume_userspace_sig resume_userspace ++#endif ++ ++#define SAVE_ALL \ ++ cld; \ ++ pushl %es; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ /*CFI_REL_OFFSET es, 0;*/\ ++ pushl %ds; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ /*CFI_REL_OFFSET ds, 0;*/\ ++ pushl %eax; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET eax, 0;\ ++ pushl %ebp; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ebp, 0;\ ++ pushl %edi; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET edi, 0;\ ++ pushl %esi; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET esi, 0;\ ++ pushl %edx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET edx, 0;\ ++ pushl %ecx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ecx, 0;\ ++ pushl %ebx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ebx, 0;\ ++ movl $(__USER_DS), %edx; \ ++ movl %edx, %ds; \ ++ movl %edx, %es; ++ ++#define RESTORE_INT_REGS \ ++ popl %ebx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ebx;\ ++ popl %ecx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ecx;\ ++ popl %edx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE edx;\ ++ popl %esi; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE esi;\ ++ popl %edi; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE edi;\ ++ popl %ebp; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ebp;\ ++ popl %eax; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE eax ++ ++#define RESTORE_REGS \ ++ RESTORE_INT_REGS; \ ++1: popl %ds; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ /*CFI_RESTORE ds;*/\ ++2: popl %es; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ /*CFI_RESTORE es;*/\ ++.section .fixup,"ax"; \ ++3: movl $0,(%esp); \ ++ jmp 1b; \ ++4: movl $0,(%esp); \ ++ jmp 2b; \ ++.previous; \ ++.section __ex_table,"a";\ ++ .align 4; \ ++ .long 1b,3b; \ ++ .long 2b,4b; \ ++.previous ++ ++#define RING0_INT_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, 3*4;\ ++ /*CFI_OFFSET cs, -2*4;*/\ ++ CFI_OFFSET eip, -3*4 ++ ++#define RING0_EC_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, 4*4;\ ++ /*CFI_OFFSET cs, -2*4;*/\ ++ CFI_OFFSET eip, -3*4 ++ ++#define RING0_PTREGS_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, OLDESP-EBX;\ ++ /*CFI_OFFSET cs, CS-OLDESP;*/\ ++ CFI_OFFSET eip, EIP-OLDESP;\ ++ /*CFI_OFFSET es, ES-OLDESP;*/\ ++ /*CFI_OFFSET ds, DS-OLDESP;*/\ ++ CFI_OFFSET eax, EAX-OLDESP;\ ++ CFI_OFFSET ebp, EBP-OLDESP;\ ++ CFI_OFFSET edi, EDI-OLDESP;\ ++ CFI_OFFSET esi, ESI-OLDESP;\ ++ CFI_OFFSET edx, EDX-OLDESP;\ ++ CFI_OFFSET ecx, ECX-OLDESP;\ ++ CFI_OFFSET ebx, EBX-OLDESP ++ ++ENTRY(ret_from_fork) ++ CFI_STARTPROC ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ call schedule_tail ++ GET_THREAD_INFO(%ebp) ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ pushl $0x0202 # Reset kernel eflags ++ CFI_ADJUST_CFA_OFFSET 4 ++ popfl ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp syscall_exit ++ CFI_ENDPROC ++ ++/* ++ * Return to user mode is not as complex as all this looks, ++ * but we want the default path for a system call return to ++ * go as quickly as possible which is why some of this is ++ * less clear than it otherwise should be. ++ */ ++ ++ # userspace resumption stub bypassing syscall exit tracing ++ ALIGN ++ RING0_PTREGS_FRAME ++ret_from_exception: ++ preempt_stop ++ret_from_intr: ++ GET_THREAD_INFO(%ebp) ++check_userspace: ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 2), %eax ++ jz resume_kernel ++ENTRY(resume_userspace) ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ movl TI_flags(%ebp), %ecx ++ andl $_TIF_WORK_MASK, %ecx # is there any work to be done on ++ # int/exception return? ++ jne work_pending ++ jmp restore_all ++ ++#ifdef CONFIG_PREEMPT ++ENTRY(resume_kernel) ++ cli ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? ++ jnz restore_nocheck ++need_resched: ++ movl TI_flags(%ebp), %ecx # need_resched set ? ++ testb $_TIF_NEED_RESCHED, %cl ++ jz restore_all ++ testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? ++ jz restore_all ++ call preempt_schedule_irq ++ jmp need_resched ++#endif ++ CFI_ENDPROC ++ ++/* SYSENTER_RETURN points to after the "sysenter" instruction in ++ the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ ++ ++ # sysenter call handler stub ++ENTRY(sysenter_entry) ++ CFI_STARTPROC simple ++ CFI_DEF_CFA esp, 0 ++ CFI_REGISTER esp, ebp ++ movl SYSENTER_stack_esp0(%esp),%esp ++sysenter_past_esp: ++ /* ++ * No need to follow this irqs on/off section: the syscall ++ * disabled irqs and here we enable it straight after entry: ++ */ ++ sti ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ss, 0*/ ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esp, 0 ++ pushfl ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $(__USER_CS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET cs, 0*/ ++ /* ++ * Push current_thread_info()->sysenter_return to the stack. ++ * A tiny bit of offset fixup is necessary - 4*4 means the 4 words ++ * pushed above; +8 corresponds to copy_thread's esp0 setting. ++ */ ++ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eip, 0 ++ ++/* ++ * Load the potential sixth argument from user stack. ++ * Careful about security. ++ */ ++ cmpl $__PAGE_OFFSET-3,%ebp ++ jae syscall_fault ++1: movl (%ebp),%ebp ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,syscall_fault ++.previous ++ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ ++ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ ++ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) ++ jnz syscall_trace_entry ++ cmpl $(nr_syscalls), %eax ++ jae syscall_badsys ++ call *sys_call_table(,%eax,4) ++ movl %eax,EAX(%esp) ++ DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ testw $_TIF_ALLWORK_MASK, %cx ++ jne syscall_exit_work ++/* if something modifies registers it must also disable sysexit */ ++ movl EIP(%esp), %edx ++ movl OLDESP(%esp), %ecx ++ xorl %ebp,%ebp ++#ifdef CONFIG_XEN ++ TRACE_IRQS_ON ++ __ENABLE_INTERRUPTS ++sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ++ __TEST_PENDING ++ jnz 14f # process more events if necessary... ++ movl ESI(%esp), %esi ++ sysexit ++14: __DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ++ push %esp ++ call evtchn_do_upcall ++ add $4,%esp ++ jmp ret_from_intr ++#else ++ TRACE_IRQS_ON ++ sti ++ sysexit ++#endif /* !CONFIG_XEN */ ++ CFI_ENDPROC ++ ++ # pv sysenter call handler stub ++ENTRY(sysenter_entry_pv) ++ RING0_INT_FRAME ++ movl $__USER_DS,16(%esp) ++ movl %ebp,12(%esp) ++ movl $__USER_CS,4(%esp) ++ addl $4,%esp ++ /* +5*4 is SS:ESP,EFLAGS,CS:EIP. +8 is esp0 setting. */ ++ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++/* ++ * Load the potential sixth argument from user stack. ++ * Careful about security. ++ */ ++ cmpl $__PAGE_OFFSET-3,%ebp ++ jae syscall_fault ++1: movl (%ebp),%ebp ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,syscall_fault ++.previous ++ /* fall through */ ++ CFI_ENDPROC ++ENDPROC(sysenter_entry_pv) ++ ++ # system call handler stub ++ENTRY(system_call) ++ RING0_INT_FRAME # can't unwind into user space anyway ++ pushl %eax # save orig_eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ testl $TF_MASK,EFLAGS(%esp) ++ jz no_singlestep ++ orl $_TIF_SINGLESTEP,TI_flags(%ebp) ++no_singlestep: ++ # system call tracing in operation / emulation ++ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ ++ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) ++ jnz syscall_trace_entry ++ cmpl $(nr_syscalls), %eax ++ jae syscall_badsys ++syscall_call: ++ call *sys_call_table(,%eax,4) ++ movl %eax,EAX(%esp) # store the return value ++syscall_exit: ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ testw $_TIF_ALLWORK_MASK, %cx # current->work ++ jne syscall_exit_work ++ ++restore_all: ++#ifndef CONFIG_XEN ++ movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS ++ # Warning: OLDSS(%esp) contains the wrong/random values if we ++ # are returning to the kernel. ++ # See comments in process.c:copy_thread() for details. ++ movb OLDSS(%esp), %ah ++ movb CS(%esp), %al ++ andl $(VM_MASK | (4 << 8) | 3), %eax ++ cmpl $((4 << 8) | 3), %eax ++ CFI_REMEMBER_STATE ++ je ldt_ss # returning to user-space with LDT SS ++restore_nocheck: ++#else ++restore_nocheck: ++ movl EFLAGS(%esp), %eax ++ testl $(VM_MASK|NMI_MASK), %eax ++ CFI_REMEMBER_STATE ++ jnz hypervisor_iret ++ shr $9, %eax # EAX[0] == IRET_EFLAGS.IF ++ GET_VCPU_INFO ++ andb evtchn_upcall_mask(%esi),%al ++ andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask ++ CFI_REMEMBER_STATE ++ jnz restore_all_enable_events # != 0 => enable event delivery ++#endif ++ TRACE_IRQS_IRET ++restore_nocheck_notrace: ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++1: iret ++.section .fixup,"ax" ++iret_exc: ++#ifndef CONFIG_XEN ++ TRACE_IRQS_ON ++ sti ++#endif ++ pushl $0 # no error code ++ pushl $do_iret_error ++ jmp error_code ++.previous ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++ ++ CFI_RESTORE_STATE ++#ifndef CONFIG_XEN ++ldt_ss: ++ larl OLDSS(%esp), %eax ++ jnz restore_nocheck ++ testl $0x00400000, %eax # returning to 32bit stack? ++ jnz restore_nocheck # allright, normal return ++ /* If returning to userspace with 16bit stack, ++ * try to fix the higher word of ESP, as the CPU ++ * won't restore it. ++ * This is an "official" bug of all the x86-compatible ++ * CPUs, which we can try to work around to make ++ * dosemu and wine happy. */ ++ subl $8, %esp # reserve space for switch16 pointer ++ CFI_ADJUST_CFA_OFFSET 8 ++ cli ++ TRACE_IRQS_OFF ++ movl %esp, %eax ++ /* Set up the 16bit stack frame with switch32 pointer on top, ++ * and a switch16 pointer on top of the current frame. */ ++ call setup_x86_bogus_stack ++ CFI_ADJUST_CFA_OFFSET -8 # frame has moved ++ TRACE_IRQS_IRET ++ RESTORE_REGS ++ lss 20+4(%esp), %esp # switch to 16bit stack ++1: iret ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++#else ++ ALIGN ++restore_all_enable_events: ++ TRACE_IRQS_ON ++ __ENABLE_INTERRUPTS ++scrit: /**** START OF CRITICAL REGION ****/ ++ __TEST_PENDING ++ jnz 14f # process more events if necessary... ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++1: iret ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++14: __DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++ jmp 11f ++ecrit: /**** END OF CRITICAL REGION ****/ ++ ++ CFI_RESTORE_STATE ++hypervisor_iret: ++ andl $~NMI_MASK, EFLAGS(%esp) ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp hypercall_page + (__HYPERVISOR_iret * 32) ++#endif ++ CFI_ENDPROC ++ ++ # perform work that needs to be done immediately before resumption ++ ALIGN ++ RING0_PTREGS_FRAME # can't unwind into user space anyway ++work_pending: ++ testb $_TIF_NEED_RESCHED, %cl ++ jz work_notifysig ++work_resched: ++ call schedule ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ andl $_TIF_WORK_MASK, %ecx # is there any work to be done other ++ # than syscall tracing? ++ jz restore_all ++ testb $_TIF_NEED_RESCHED, %cl ++ jnz work_resched ++ ++work_notifysig: # deal with pending signals and ++ # notify-resume requests ++ testl $VM_MASK, EFLAGS(%esp) ++ movl %esp, %eax ++ jne work_notifysig_v86 # returning to kernel-space or ++ # vm86-space ++ xorl %edx, %edx ++ call do_notify_resume ++ jmp resume_userspace_sig ++ ++ ALIGN ++work_notifysig_v86: ++#ifdef CONFIG_VM86 ++ pushl %ecx # save ti_flags for do_notify_resume ++ CFI_ADJUST_CFA_OFFSET 4 ++ call save_v86_state # %eax contains pt_regs pointer ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ movl %eax, %esp ++ xorl %edx, %edx ++ call do_notify_resume ++ jmp resume_userspace_sig ++#endif ++ ++ # perform syscall exit tracing ++ ALIGN ++syscall_trace_entry: ++ movl $-ENOSYS,EAX(%esp) ++ movl %esp, %eax ++ xorl %edx,%edx ++ call do_syscall_trace ++ cmpl $0, %eax ++ jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, ++ # so must skip actual syscall ++ movl ORIG_EAX(%esp), %eax ++ cmpl $(nr_syscalls), %eax ++ jnae syscall_call ++ jmp syscall_exit ++ ++ # perform syscall exit tracing ++ ALIGN ++syscall_exit_work: ++ testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl ++ jz work_pending ++ TRACE_IRQS_ON ++ ENABLE_INTERRUPTS # could let do_syscall_trace() call ++ # schedule() instead ++ movl %esp, %eax ++ movl $1, %edx ++ call do_syscall_trace ++ jmp resume_userspace ++ CFI_ENDPROC ++ ++ RING0_INT_FRAME # can't unwind into user space anyway ++syscall_fault: ++ pushl %eax # save orig_eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ movl $-EFAULT,EAX(%esp) ++ jmp resume_userspace ++ ++syscall_badsys: ++ movl $-ENOSYS,EAX(%esp) ++ jmp resume_userspace ++ CFI_ENDPROC ++ ++#ifndef CONFIG_XEN ++#define FIXUP_ESPFIX_STACK \ ++ movl %esp, %eax; \ ++ /* switch to 32bit stack using the pointer on top of 16bit stack */ \ ++ lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ ++ /* copy data from 16bit stack to 32bit stack */ \ ++ call fixup_x86_bogus_stack; \ ++ /* put ESP to the proper location */ \ ++ movl %eax, %esp; ++#define UNWIND_ESPFIX_STACK \ ++ pushl %eax; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ movl %ss, %eax; \ ++ /* see if on 16bit stack */ \ ++ cmpw $__ESPFIX_SS, %ax; \ ++ je 28f; \ ++27: popl %eax; \ ++ CFI_ADJUST_CFA_OFFSET -4; \ ++.section .fixup,"ax"; \ ++28: movl $__KERNEL_DS, %eax; \ ++ movl %eax, %ds; \ ++ movl %eax, %es; \ ++ /* switch to 32bit stack */ \ ++ FIXUP_ESPFIX_STACK; \ ++ jmp 27b; \ ++.previous ++ ++/* ++ * Build the entry stubs and pointer table with ++ * some assembler magic. ++ */ ++.data ++ENTRY(interrupt) ++.text ++ ++vector=0 ++ENTRY(irq_entries_start) ++ RING0_INT_FRAME ++.rept NR_IRQS ++ ALIGN ++ .if vector ++ CFI_ADJUST_CFA_OFFSET -4 ++ .endif ++1: pushl $~(vector) ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp common_interrupt ++.data ++ .long 1b ++.text ++vector=vector+1 ++.endr ++ ++/* ++ * the CPU automatically disables interrupts when executing an IRQ vector, ++ * so IRQ-flags tracing has to follow that: ++ */ ++ ALIGN ++common_interrupt: ++ SAVE_ALL ++ TRACE_IRQS_OFF ++ movl %esp,%eax ++ call do_IRQ ++ jmp ret_from_intr ++ CFI_ENDPROC ++ ++#define BUILD_INTERRUPT(name, nr) \ ++ENTRY(name) \ ++ RING0_INT_FRAME; \ ++ pushl $~(nr); \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ SAVE_ALL; \ ++ TRACE_IRQS_OFF \ ++ movl %esp,%eax; \ ++ call smp_/**/name; \ ++ jmp ret_from_intr; \ ++ CFI_ENDPROC ++ ++/* The include is where all of the SMP etc. interrupts come from */ ++#include "entry_arch.h" ++#else ++#define UNWIND_ESPFIX_STACK ++#endif ++ ++ENTRY(divide_error) ++ RING0_INT_FRAME ++ pushl $0 # no error code ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_divide_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ ALIGN ++error_code: ++ pushl %ds ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ds, 0*/ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eax, 0 ++ xorl %eax, %eax ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebp, 0 ++ pushl %edi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edi, 0 ++ pushl %esi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esi, 0 ++ pushl %edx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edx, 0 ++ decl %eax # eax = -1 ++ pushl %ecx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ecx, 0 ++ pushl %ebx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebx, 0 ++ cld ++ pushl %es ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET es, 0*/ ++ UNWIND_ESPFIX_STACK ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_REGISTER es, ecx*/ ++ movl ES(%esp), %edi # get the function address ++ movl ORIG_EAX(%esp), %edx # get the error code ++ movl %eax, ORIG_EAX(%esp) ++ movl %ecx, ES(%esp) ++ /*CFI_REL_OFFSET es, ES*/ ++ movl $(__USER_DS), %ecx ++ movl %ecx, %ds ++ movl %ecx, %es ++ movl %esp,%eax # pt_regs pointer ++ call *%edi ++ jmp ret_from_exception ++ CFI_ENDPROC ++ ++#ifdef CONFIG_XEN ++# A note on the "critical region" in our callback handler. ++# We want to avoid stacking callback handlers due to events occurring ++# during handling of the last event. To do this, we keep events disabled ++# until we've done all processing. HOWEVER, we must enable events before ++# popping the stack frame (can't be done atomically) and so it would still ++# be possible to get enough handler activations to overflow the stack. ++# Although unlikely, bugs of that kind are hard to track down, so we'd ++# like to avoid the possibility. ++# So, on entry to the handler we detect whether we interrupted an ++# existing activation in its critical region -- if so, we pop the current ++# activation and restart the handler using the previous one. ++# ++# The sysexit critical region is slightly different. sysexit ++# atomically removes the entire stack frame. If we interrupt in the ++# critical region we know that the entire frame is present and correct ++# so we can simply throw away the new one. ++ENTRY(hypervisor_callback) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ movl EIP(%esp),%eax ++ cmpl $scrit,%eax ++ jb 11f ++ cmpl $ecrit,%eax ++ jb critical_region_fixup ++ cmpl $sysexit_scrit,%eax ++ jb 11f ++ cmpl $sysexit_ecrit,%eax ++ ja 11f ++ addl $OLDESP,%esp # Remove eflags...ebx from stack frame. ++11: push %esp ++ CFI_ADJUST_CFA_OFFSET 4 ++ call evtchn_do_upcall ++ add $4,%esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp ret_from_intr ++ CFI_ENDPROC ++ ++# [How we do the fixup]. We want to merge the current stack frame with the ++# just-interrupted frame. How we do this depends on where in the critical ++# region the interrupted handler was executing, and so how many saved ++# registers are in each frame. We do this quickly using the lookup table ++# 'critical_fixup_table'. For each byte offset in the critical region, it ++# provides the number of bytes which have already been popped from the ++# interrupted stack frame. ++critical_region_fixup: ++ movzbl critical_fixup_table-scrit(%eax),%ecx # %eax contains num bytes popped ++ cmpb $0xff,%cl # 0xff => vcpu_info critical region ++ jne 15f ++ xorl %ecx,%ecx ++15: leal (%esp,%ecx),%esi # %esi points at end of src region ++ leal OLDESP(%esp),%edi # %edi points at end of dst region ++ shrl $2,%ecx # convert words to bytes ++ je 17f # skip loop if nothing to copy ++16: subl $4,%esi # pre-decrementing copy loop ++ subl $4,%edi ++ movl (%esi),%eax ++ movl %eax,(%edi) ++ loop 16b ++17: movl %edi,%esp # final %edi is top of merged stack ++ jmp 11b ++ ++.section .rodata,"a" ++critical_fixup_table: ++ .byte 0xff,0xff,0xff # testb $0xff,(%esi) = __TEST_PENDING ++ .byte 0xff,0xff # jnz 14f ++ .byte 0x00 # pop %ebx ++ .byte 0x04 # pop %ecx ++ .byte 0x08 # pop %edx ++ .byte 0x0c # pop %esi ++ .byte 0x10 # pop %edi ++ .byte 0x14 # pop %ebp ++ .byte 0x18 # pop %eax ++ .byte 0x1c # pop %ds ++ .byte 0x20 # pop %es ++ .byte 0x24,0x24,0x24 # add $4,%esp ++ .byte 0x28 # iret ++ .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) ++ .byte 0x00,0x00 # jmp 11b ++.previous ++ ++# Hypervisor uses this for application faults while it executes. ++# We get here for two reasons: ++# 1. Fault while reloading DS, ES, FS or GS ++# 2. Fault while executing IRET ++# Category 1 we fix up by reattempting the load, and zeroing the segment ++# register if the load fails. ++# Category 2 we fix up by jumping to do_iret_error. We cannot use the ++# normal Linux return path in this case because if we use the IRET hypercall ++# to pop the stack frame we end up in an infinite loop of failsafe callbacks. ++# We distinguish between categories by maintaining a status value in EAX. ++ENTRY(failsafe_callback) ++ pushl %eax ++ movl $1,%eax ++1: mov 4(%esp),%ds ++2: mov 8(%esp),%es ++3: mov 12(%esp),%fs ++4: mov 16(%esp),%gs ++ testl %eax,%eax ++ popl %eax ++ jz 5f ++ addl $16,%esp # EAX != 0 => Category 2 (Bad IRET) ++ jmp iret_exc ++5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment) ++ RING0_INT_FRAME ++ pushl $0 ++ SAVE_ALL ++ jmp ret_from_exception ++.section .fixup,"ax"; \ ++6: xorl %eax,%eax; \ ++ movl %eax,4(%esp); \ ++ jmp 1b; \ ++7: xorl %eax,%eax; \ ++ movl %eax,8(%esp); \ ++ jmp 2b; \ ++8: xorl %eax,%eax; \ ++ movl %eax,12(%esp); \ ++ jmp 3b; \ ++9: xorl %eax,%eax; \ ++ movl %eax,16(%esp); \ ++ jmp 4b; \ ++.previous; \ ++.section __ex_table,"a"; \ ++ .align 4; \ ++ .long 1b,6b; \ ++ .long 2b,7b; \ ++ .long 3b,8b; \ ++ .long 4b,9b; \ ++.previous ++#endif ++ CFI_ENDPROC ++ ++ENTRY(coprocessor_error) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_coprocessor_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(simd_coprocessor_error) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_simd_coprocessor_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(device_not_available) ++ RING0_INT_FRAME ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++#ifndef CONFIG_XEN ++ movl %cr0, %eax ++ testl $0x4, %eax # EM (math emulation bit) ++ je device_available_emulate ++ pushl $0 # temporary storage for ORIG_EIP ++ CFI_ADJUST_CFA_OFFSET 4 ++ call math_emulate ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp ret_from_exception ++device_available_emulate: ++#endif ++ preempt_stop ++ call math_state_restore ++ jmp ret_from_exception ++ CFI_ENDPROC ++ ++#ifndef CONFIG_XEN ++/* ++ * Debug traps and NMI can happen at the one SYSENTER instruction ++ * that sets up the real kernel stack. Check here, since we can't ++ * allow the wrong stack to be used. ++ * ++ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have ++ * already pushed 3 words if it hits on the sysenter instruction: ++ * eflags, cs and eip. ++ * ++ * We just load the right stack, and push the three (known) values ++ * by hand onto the new stack - while updating the return eip past ++ * the instruction that would have done it for sysenter. ++ */ ++#define FIX_STACK(offset, ok, label) \ ++ cmpw $__KERNEL_CS,4(%esp); \ ++ jne ok; \ ++label: \ ++ movl SYSENTER_stack_esp0+offset(%esp),%esp; \ ++ pushfl; \ ++ pushl $__KERNEL_CS; \ ++ pushl $sysenter_past_esp ++#endif /* CONFIG_XEN */ ++ ++KPROBE_ENTRY(debug) ++ RING0_INT_FRAME ++#ifndef CONFIG_XEN ++ cmpl $sysenter_entry,(%esp) ++ jne debug_stack_correct ++ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) ++debug_stack_correct: ++#endif /* !CONFIG_XEN */ ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # error code 0 ++ movl %esp,%eax # pt_regs pointer ++ call do_debug ++ jmp ret_from_exception ++ CFI_ENDPROC ++ .previous .text ++#ifndef CONFIG_XEN ++/* ++ * NMI is doubly nasty. It can happen _while_ we're handling ++ * a debug fault, and the debug fault hasn't yet been able to ++ * clear up the stack. So we first check whether we got an ++ * NMI on the sysenter entry path, but after that we need to ++ * check whether we got an NMI on the debug path where the debug ++ * fault happened on the sysenter path. ++ */ ++ENTRY(nmi) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ movl %ss, %eax ++ cmpw $__ESPFIX_SS, %ax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ je nmi_16bit_stack ++ cmpl $sysenter_entry,(%esp) ++ je nmi_stack_fixup ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ movl %esp,%eax ++ /* Do not access memory above the end of our stack page, ++ * it might not exist. ++ */ ++ andl $(THREAD_SIZE-1),%eax ++ cmpl $(THREAD_SIZE-20),%eax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ jae nmi_stack_correct ++ cmpl $sysenter_entry,12(%esp) ++ je nmi_debug_stack_check ++nmi_stack_correct: ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ jmp restore_nocheck_notrace ++ CFI_ENDPROC ++ ++nmi_stack_fixup: ++ FIX_STACK(12,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++nmi_debug_stack_check: ++ cmpw $__KERNEL_CS,16(%esp) ++ jne nmi_stack_correct ++ cmpl $debug,(%esp) ++ jb nmi_stack_correct ++ cmpl $debug_esp_fix_insn,(%esp) ++ ja nmi_stack_correct ++ FIX_STACK(24,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++ ++nmi_16bit_stack: ++ RING0_INT_FRAME ++ /* create the pointer to lss back */ ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl %esp ++ CFI_ADJUST_CFA_OFFSET 4 ++ movzwl %sp, %esp ++ addw $4, (%esp) ++ /* copy the iret frame of 12 bytes */ ++ .rept 3 ++ pushl 16(%esp) ++ CFI_ADJUST_CFA_OFFSET 4 ++ .endr ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ FIXUP_ESPFIX_STACK # %eax == %esp ++ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved ++ xorl %edx,%edx # zero error code ++ call do_nmi ++ RESTORE_REGS ++ lss 12+4(%esp), %esp # back to 16bit stack ++1: iret ++ CFI_ENDPROC ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++#else ++ENTRY(nmi) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ orl $NMI_MASK, EFLAGS(%esp) ++ jmp restore_all ++ CFI_ENDPROC ++#endif ++ ++KPROBE_ENTRY(int3) ++ RING0_INT_FRAME ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_int3 ++ jmp ret_from_exception ++ CFI_ENDPROC ++ .previous .text ++ ++ENTRY(overflow) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_overflow ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(bounds) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_bounds ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(invalid_op) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_invalid_op ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(coprocessor_segment_overrun) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_coprocessor_segment_overrun ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(invalid_TSS) ++ RING0_EC_FRAME ++ pushl $do_invalid_TSS ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(segment_not_present) ++ RING0_EC_FRAME ++ pushl $do_segment_not_present ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(stack_segment) ++ RING0_EC_FRAME ++ pushl $do_stack_segment ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++KPROBE_ENTRY(general_protection) ++ RING0_EC_FRAME ++ pushl $do_general_protection ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ .previous .text ++ ++ENTRY(alignment_check) ++ RING0_EC_FRAME ++ pushl $do_alignment_check ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++KPROBE_ENTRY(page_fault) ++ RING0_EC_FRAME ++ pushl $do_page_fault ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ .previous .text ++ ++#ifdef CONFIG_X86_MCE ++ENTRY(machine_check) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl machine_check_vector ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++#endif ++ ++#ifndef CONFIG_XEN ++ENTRY(spurious_interrupt_bug) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_spurious_interrupt_bug ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++#endif /* !CONFIG_XEN */ ++ ++#ifdef CONFIG_STACK_UNWIND ++ENTRY(arch_unwind_init_running) ++ CFI_STARTPROC ++ movl 4(%esp), %edx ++ movl (%esp), %ecx ++ leal 4(%esp), %eax ++ movl %ebx, EBX(%edx) ++ xorl %ebx, %ebx ++ movl %ebx, ECX(%edx) ++ movl %ebx, EDX(%edx) ++ movl %esi, ESI(%edx) ++ movl %edi, EDI(%edx) ++ movl %ebp, EBP(%edx) ++ movl %ebx, EAX(%edx) ++ movl $__USER_DS, DS(%edx) ++ movl $__USER_DS, ES(%edx) ++ movl %ebx, ORIG_EAX(%edx) ++ movl %ecx, EIP(%edx) ++ movl 12(%esp), %ecx ++ movl $__KERNEL_CS, CS(%edx) ++ movl %ebx, EFLAGS(%edx) ++ movl %eax, OLDESP(%edx) ++ movl 8(%esp), %eax ++ movl %ecx, 8(%esp) ++ movl EBX(%edx), %ebx ++ movl $__KERNEL_DS, OLDSS(%edx) ++ jmpl *%eax ++ CFI_ENDPROC ++ENDPROC(arch_unwind_init_running) ++#endif ++ ++ENTRY(fixup_4gb_segment) ++ RING0_EC_FRAME ++ pushl $do_fixup_4gb_segment ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++.section .rodata,"a" ++#include "syscall_table.S" ++ ++syscall_table_size=(.-sys_call_table) +Index: head-2008-11-25/arch/x86/kernel/fixup.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/fixup.c 2008-01-28 12:24:18.000000000 +0100 +@@ -0,0 +1,88 @@ ++/****************************************************************************** ++ * fixup.c ++ * ++ * Binary-rewriting of certain IA32 instructions, on notification by Xen. ++ * Used to avoid repeated slow emulation of common instructions used by the ++ * user-space TLS (Thread-Local Storage) libraries. ++ * ++ * **** NOTE **** ++ * Issues with the binary rewriting have caused it to be removed. Instead ++ * we rely on Xen's emulator to boot the kernel, and then print a banner ++ * message recommending that the user disables /lib/tls. ++ * ++ * Copyright (c) 2004, K A Fraser ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args ) ++ ++fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) ++{ ++ static unsigned long printed = 0; ++ char info[100]; ++ int i; ++ ++ /* Ignore statically-linked init. */ ++ if (current->tgid == 1) ++ return; ++ ++ VOID(HYPERVISOR_vm_assist(VMASST_CMD_disable, ++ VMASST_TYPE_4gb_segments_notify)); ++ ++ if (test_and_set_bit(0, &printed)) ++ return; ++ ++ sprintf(info, "%s (pid=%d)", current->comm, current->tgid); ++ ++ DP(""); ++ DP("***************************************************************"); ++ DP("***************************************************************"); ++ DP("** WARNING: Currently emulating unsupported memory accesses **"); ++ DP("** in /lib/tls glibc libraries. The emulation is **"); ++ DP("** slow. To ensure full performance you should **"); ++ DP("** install a 'xen-friendly' (nosegneg) version of **"); ++ DP("** the library, or disable tls support by executing **"); ++ DP("** the following as root: **"); ++ DP("** mv /lib/tls /lib/tls.disabled **"); ++ DP("** Offending process: %-38.38s **", info); ++ DP("***************************************************************"); ++ DP("***************************************************************"); ++ DP(""); ++ ++ for (i = 5; i > 0; i--) { ++ touch_softlockup_watchdog(); ++ printk("Pausing... %d", i); ++ mdelay(1000); ++ printk("\b\b\b\b\b\b\b\b\b\b\b\b"); ++ } ++ ++ printk("Continuing...\n\n"); ++} ++ ++static int __init fixup_init(void) ++{ ++ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, ++ VMASST_TYPE_4gb_segments_notify)); ++ return 0; ++} ++__initcall(fixup_init); +Index: head-2008-11-25/arch/x86/kernel/head_32-xen.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/head_32-xen.S 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,207 @@ ++ ++ ++.text ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * References to members of the new_cpu_data structure. ++ */ ++ ++#define X86 new_cpu_data+CPUINFO_x86 ++#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor ++#define X86_MODEL new_cpu_data+CPUINFO_x86_model ++#define X86_MASK new_cpu_data+CPUINFO_x86_mask ++#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math ++#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level ++#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability ++#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id ++ ++#define VIRT_ENTRY_OFFSET 0x0 ++.org VIRT_ENTRY_OFFSET ++ENTRY(startup_32) ++ movl %esi,xen_start_info ++ cld ++ ++ /* Set up the stack pointer */ ++ movl $(init_thread_union+THREAD_SIZE),%esp ++ ++ /* get vendor info */ ++ xorl %eax,%eax # call CPUID with 0 -> return vendor ID ++ XEN_CPUID ++ movl %eax,X86_CPUID # save CPUID level ++ movl %ebx,X86_VENDOR_ID # lo 4 chars ++ movl %edx,X86_VENDOR_ID+4 # next 4 chars ++ movl %ecx,X86_VENDOR_ID+8 # last 4 chars ++ ++ movl $1,%eax # Use the CPUID instruction to get CPU type ++ XEN_CPUID ++ movb %al,%cl # save reg for future use ++ andb $0x0f,%ah # mask processor family ++ movb %ah,X86 ++ andb $0xf0,%al # mask model ++ shrb $4,%al ++ movb %al,X86_MODEL ++ andb $0x0f,%cl # mask mask revision ++ movb %cl,X86_MASK ++ movl %edx,X86_CAPABILITY ++ ++ movb $1,X86_HARD_MATH ++ ++ xorl %eax,%eax # Clear FS/GS and LDT ++ movl %eax,%fs ++ movl %eax,%gs ++ cld # gcc2 wants the direction flag cleared at all times ++ ++ pushl %eax # fake return address ++ jmp start_kernel ++ ++#define HYPERCALL_PAGE_OFFSET 0x1000 ++.org HYPERCALL_PAGE_OFFSET ++ENTRY(hypercall_page) ++ CFI_STARTPROC ++.skip 0x1000 ++ CFI_ENDPROC ++ ++/* ++ * Real beginning of normal "text" segment ++ */ ++ENTRY(stext) ++ENTRY(_stext) ++ ++/* ++ * BSS section ++ */ ++.section ".bss.page_aligned","w" ++ENTRY(empty_zero_page) ++ .fill 4096,1,0 ++ ++/* ++ * This starts the data section. ++ */ ++.data ++ ++/* ++ * The Global Descriptor Table contains 28 quadwords, per-CPU. ++ */ ++ .align L1_CACHE_BYTES ++ENTRY(cpu_gdt_table) ++ .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x0000000000000000 /* 0x0b reserved */ ++ .quad 0x0000000000000000 /* 0x13 reserved */ ++ .quad 0x0000000000000000 /* 0x1b reserved */ ++ .quad 0x0000000000000000 /* 0x20 unused */ ++ .quad 0x0000000000000000 /* 0x28 unused */ ++ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ ++ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ ++ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ ++ .quad 0x0000000000000000 /* 0x4b reserved */ ++ .quad 0x0000000000000000 /* 0x53 reserved */ ++ .quad 0x0000000000000000 /* 0x5b reserved */ ++ ++ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ ++ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ ++ .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ ++ .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ ++ ++ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ ++ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ ++ ++ /* ++ * Segments used for calling PnP BIOS have byte granularity. ++ * They code segments and data segments have fixed 64k limits, ++ * the transfer segment sizes are set at run time. ++ */ ++ .quad 0x0000000000000000 /* 0x90 32-bit code */ ++ .quad 0x0000000000000000 /* 0x98 16-bit code */ ++ .quad 0x0000000000000000 /* 0xa0 16-bit data */ ++ .quad 0x0000000000000000 /* 0xa8 16-bit data */ ++ .quad 0x0000000000000000 /* 0xb0 16-bit data */ ++ ++ /* ++ * The APM segments have byte granularity and their bases ++ * are set at run time. All have 64k limits. ++ */ ++ .quad 0x0000000000000000 /* 0xb8 APM CS code */ ++ .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ ++ .quad 0x0000000000000000 /* 0xc8 APM DS data */ ++ ++ .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ ++ .quad 0x0000000000000000 /* 0xd8 - unused */ ++ .quad 0x0000000000000000 /* 0xe0 - unused */ ++ .quad 0x0000000000000000 /* 0xe8 - unused */ ++ .quad 0x0000000000000000 /* 0xf0 - unused */ ++ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ ++ ++#if CONFIG_XEN_COMPAT <= 0x030002 ++/* ++ * __xen_guest information ++ */ ++.macro utoa value ++ .if (\value) < 0 || (\value) >= 0x10 ++ utoa (((\value)>>4)&0x0fffffff) ++ .endif ++ .if ((\value) & 0xf) < 10 ++ .byte '0' + ((\value) & 0xf) ++ .else ++ .byte 'A' + ((\value) & 0xf) - 10 ++ .endif ++.endm ++ ++.section __xen_guest ++ .ascii "GUEST_OS=linux,GUEST_VER=2.6" ++ .ascii ",XEN_VER=xen-3.0" ++ .ascii ",VIRT_BASE=0x" ++ utoa __PAGE_OFFSET ++ .ascii ",ELF_PADDR_OFFSET=0x" ++ utoa __PAGE_OFFSET ++ .ascii ",VIRT_ENTRY=0x" ++ utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET) ++ .ascii ",HYPERCALL_PAGE=0x" ++ utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) ++ .ascii ",FEATURES=writable_page_tables" ++ .ascii "|writable_descriptor_tables" ++ .ascii "|auto_translated_physmap" ++ .ascii "|pae_pgdir_above_4gb" ++ .ascii "|supervisor_mode_kernel" ++#ifdef CONFIG_X86_PAE ++ .ascii ",PAE=yes[extended-cr3]" ++#else ++ .ascii ",PAE=no" ++#endif ++ .ascii ",LOADER=generic" ++ .byte 0 ++#endif /* CONFIG_XEN_COMPAT <= 0x030002 */ ++ ++ ++ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") ++ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") ++ ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") ++ ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET) ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET) ++#else ++ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0) ++#endif ++ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32) ++ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) ++ ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) ++ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") ++#ifdef CONFIG_X86_PAE ++ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") ++ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT) ++#else ++ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no") ++ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, _PAGE_PRESENT,_PAGE_PRESENT) ++#endif ++ ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") ++ ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1) +Index: head-2008-11-25/arch/x86/kernel/init_task-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/init_task-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,51 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static struct fs_struct init_fs = INIT_FS; ++static struct files_struct init_files = INIT_FILES; ++static struct signal_struct init_signals = INIT_SIGNALS(init_signals); ++static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); ++ ++#define swapper_pg_dir ((pgd_t *)NULL) ++struct mm_struct init_mm = INIT_MM(init_mm); ++#undef swapper_pg_dir ++ ++EXPORT_SYMBOL(init_mm); ++ ++/* ++ * Initial thread structure. ++ * ++ * We need to make sure that this is THREAD_SIZE aligned due to the ++ * way process stacks are handled. This is done by having a special ++ * "init_task" linker map entry.. ++ */ ++union thread_union init_thread_union ++ __attribute__((__section__(".data.init_task"))) = ++ { INIT_THREAD_INFO(init_task) }; ++ ++/* ++ * Initial task structure. ++ * ++ * All other task structs will be allocated on slabs in fork.c ++ */ ++struct task_struct init_task = INIT_TASK(init_task); ++ ++EXPORT_SYMBOL(init_task); ++ ++#ifndef CONFIG_X86_NO_TSS ++/* ++ * per-CPU TSS segments. Threads are completely 'soft' on Linux, ++ * no more per-task TSS's. ++ */ ++DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; ++#endif ++ +Index: head-2008-11-25/arch/x86/kernel/io_apic_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/io_apic_32-xen.c 2008-11-25 12:22:34.000000000 +0100 +@@ -0,0 +1,2776 @@ ++/* ++ * Intel IO-APIC support for multi-Pentium hosts. ++ * ++ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo ++ * ++ * Many thanks to Stig Venaas for trying out countless experimental ++ * patches and reporting/debugging problems patiently! ++ * ++ * (c) 1999, Multiple IO-APIC support, developed by ++ * Ken-ichi Yaku and ++ * Hidemi Kishimoto , ++ * further tested and cleaned up by Zach Brown ++ * and Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively ++ * Paul Diefenbaugh : Added full ACPI support ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "io_ports.h" ++ ++#ifdef CONFIG_XEN ++ ++#include ++#include ++#include ++ ++/* Fake i8259 */ ++#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) ++#define disable_8259A_irq(_irq) ((void)0) ++#define i8259A_irq_pending(_irq) (0) ++ ++unsigned long io_apic_irqs; ++ ++static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) ++{ ++ struct physdev_apic apic_op; ++ int ret; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); ++ if (ret) ++ return ret; ++ return apic_op.value; ++} ++ ++static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) ++{ ++ struct physdev_apic apic_op; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ apic_op.value = value; ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); ++} ++ ++#define io_apic_read(a,r) xen_io_apic_read(a,r) ++#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) ++ ++#endif /* CONFIG_XEN */ ++ ++int (*ioapic_renumber_irq)(int ioapic, int irq); ++atomic_t irq_mis_count; ++ ++/* Where if anywhere is the i8259 connect in external int mode */ ++static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; ++ ++static DEFINE_SPINLOCK(ioapic_lock); ++static DEFINE_SPINLOCK(vector_lock); ++ ++int timer_over_8254 __initdata = 1; ++ ++/* ++ * Is the SiS APIC rmw bug present ? ++ * -1 = don't know, 0 = no, 1 = yes ++ */ ++int sis_apic_bug = -1; ++ ++/* ++ * # of IRQ routing registers ++ */ ++int nr_ioapic_registers[MAX_IO_APICS]; ++ ++int disable_timer_pin_1 __initdata; ++ ++/* ++ * Rough estimation of how many shared IRQs there are, can ++ * be changed anytime. ++ */ ++#define MAX_PLUS_SHARED_IRQS NR_IRQS ++#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) ++ ++/* ++ * This is performance-critical, we want to do it O(1) ++ * ++ * the indexing order of this array favors 1:1 mappings ++ * between pins and IRQs. ++ */ ++ ++static struct irq_pin_list { ++ int apic, pin, next; ++} irq_2_pin[PIN_MAP_SIZE]; ++ ++int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; ++#ifdef CONFIG_PCI_MSI ++#define vector_to_irq(vector) \ ++ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) ++#else ++#define vector_to_irq(vector) (vector) ++#endif ++ ++/* ++ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are ++ * shared ISA-space IRQs, so we have to support them. We are super ++ * fast in the common case, and fast for shared ISA-space IRQs. ++ */ ++static void add_pin_to_irq(unsigned int irq, int apic, int pin) ++{ ++ static int first_free_entry = NR_IRQS; ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ ++ while (entry->next) ++ entry = irq_2_pin + entry->next; ++ ++ if (entry->pin != -1) { ++ entry->next = first_free_entry; ++ entry = irq_2_pin + entry->next; ++ if (++first_free_entry >= PIN_MAP_SIZE) ++ panic("io_apic.c: whoops"); ++ } ++ entry->apic = apic; ++ entry->pin = pin; ++} ++ ++#ifdef CONFIG_XEN ++#define clear_IO_APIC() ((void)0) ++#else ++/* ++ * Reroute an IRQ to a different pin. ++ */ ++static void __init replace_pin_at_irq(unsigned int irq, ++ int oldapic, int oldpin, ++ int newapic, int newpin) ++{ ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ ++ while (1) { ++ if (entry->apic == oldapic && entry->pin == oldpin) { ++ entry->apic = newapic; ++ entry->pin = newpin; ++ } ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++} ++ ++static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) ++{ ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ unsigned int pin, reg; ++ ++ for (;;) { ++ pin = entry->pin; ++ if (pin == -1) ++ break; ++ reg = io_apic_read(entry->apic, 0x10 + pin*2); ++ reg &= ~disable; ++ reg |= enable; ++ io_apic_modify(entry->apic, 0x10 + pin*2, reg); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++} ++ ++/* mask = 1 */ ++static void __mask_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00010000, 0); ++} ++ ++/* mask = 0 */ ++static void __unmask_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0, 0x00010000); ++} ++ ++/* mask = 1, trigger = 0 */ ++static void __mask_and_edge_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); ++} ++ ++/* mask = 0, trigger = 1 */ ++static void __unmask_and_level_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); ++} ++ ++static void mask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __mask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void unmask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ /* Check delivery_mode to be sure we're not clearing an SMI pin */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (entry.delivery_mode == dest_SMI) ++ return; ++ ++ /* ++ * Disable it in the IO-APIC irq-routing table: ++ */ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = 1; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC (void) ++{ ++ int apic, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ++ clear_IO_APIC_pin(apic, pin); ++} ++ ++#ifdef CONFIG_SMP ++static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) ++{ ++ unsigned long flags; ++ int pin; ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ unsigned int apicid_value; ++ cpumask_t tmp; ++ ++ cpus_and(tmp, cpumask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ tmp = TARGET_CPUS; ++ ++ cpus_and(cpumask, tmp, CPU_MASK_ALL); ++ ++ apicid_value = cpu_mask_to_apicid(cpumask); ++ /* Prepare to do the io_apic_write */ ++ apicid_value = apicid_value << 24; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ for (;;) { ++ pin = entry->pin; ++ if (pin == -1) ++ break; ++ io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++ set_irq_info(irq, cpumask); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#if defined(CONFIG_IRQBALANCE) ++# include /* kernel_thread() */ ++# include /* kstat */ ++# include /* kmalloc() */ ++# include /* time_after() */ ++ ++#ifdef CONFIG_BALANCED_IRQ_DEBUG ++# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) ++# define Dprintk(x...) do { TDprintk(x); } while (0) ++# else ++# define TDprintk(x...) ++# define Dprintk(x...) ++# endif ++ ++#define IRQBALANCE_CHECK_ARCH -999 ++#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) ++#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) ++#define BALANCED_IRQ_MORE_DELTA (HZ/10) ++#define BALANCED_IRQ_LESS_DELTA (HZ) ++ ++static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; ++static int physical_balance __read_mostly; ++static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; ++ ++static struct irq_cpu_info { ++ unsigned long * last_irq; ++ unsigned long * irq_delta; ++ unsigned long irq; ++} irq_cpu_data[NR_CPUS]; ++ ++#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) ++#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) ++#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) ++ ++#define IDLE_ENOUGH(cpu,now) \ ++ (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) ++ ++#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) ++ ++#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) ++ ++static cpumask_t balance_irq_affinity[NR_IRQS] = { ++ [0 ... NR_IRQS-1] = CPU_MASK_ALL ++}; ++ ++void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ balance_irq_affinity[irq] = mask; ++} ++ ++static unsigned long move(int curr_cpu, cpumask_t allowed_mask, ++ unsigned long now, int direction) ++{ ++ int search_idle = 1; ++ int cpu = curr_cpu; ++ ++ goto inside; ++ ++ do { ++ if (unlikely(cpu == curr_cpu)) ++ search_idle = 0; ++inside: ++ if (direction == 1) { ++ cpu++; ++ if (cpu >= NR_CPUS) ++ cpu = 0; ++ } else { ++ cpu--; ++ if (cpu == -1) ++ cpu = NR_CPUS-1; ++ } ++ } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || ++ (search_idle && !IDLE_ENOUGH(cpu,now))); ++ ++ return cpu; ++} ++ ++static inline void balance_irq(int cpu, int irq) ++{ ++ unsigned long now = jiffies; ++ cpumask_t allowed_mask; ++ unsigned int new_cpu; ++ ++ if (irqbalance_disabled) ++ return; ++ ++ cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); ++ new_cpu = move(cpu, allowed_mask, now, 1); ++ if (cpu != new_cpu) { ++ set_pending_irq(irq, cpumask_of_cpu(new_cpu)); ++ } ++} ++ ++static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) ++{ ++ int i, j; ++ Dprintk("Rotating IRQs among CPUs.\n"); ++ for_each_online_cpu(i) { ++ for (j = 0; j < NR_IRQS; j++) { ++ if (!irq_desc[j].action) ++ continue; ++ /* Is it a significant load ? */ ++ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < ++ useful_load_threshold) ++ continue; ++ balance_irq(i, j); ++ } ++ } ++ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); ++ return; ++} ++ ++static void do_irq_balance(void) ++{ ++ int i, j; ++ unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); ++ unsigned long move_this_load = 0; ++ int max_loaded = 0, min_loaded = 0; ++ int load; ++ unsigned long useful_load_threshold = balanced_irq_interval + 10; ++ int selected_irq; ++ int tmp_loaded, first_attempt = 1; ++ unsigned long tmp_cpu_irq; ++ unsigned long imbalance = 0; ++ cpumask_t allowed_mask, target_cpu_mask, tmp; ++ ++ for_each_possible_cpu(i) { ++ int package_index; ++ CPU_IRQ(i) = 0; ++ if (!cpu_online(i)) ++ continue; ++ package_index = CPU_TO_PACKAGEINDEX(i); ++ for (j = 0; j < NR_IRQS; j++) { ++ unsigned long value_now, delta; ++ /* Is this an active IRQ? */ ++ if (!irq_desc[j].action) ++ continue; ++ if ( package_index == i ) ++ IRQ_DELTA(package_index,j) = 0; ++ /* Determine the total count per processor per IRQ */ ++ value_now = (unsigned long) kstat_cpu(i).irqs[j]; ++ ++ /* Determine the activity per processor per IRQ */ ++ delta = value_now - LAST_CPU_IRQ(i,j); ++ ++ /* Update last_cpu_irq[][] for the next time */ ++ LAST_CPU_IRQ(i,j) = value_now; ++ ++ /* Ignore IRQs whose rate is less than the clock */ ++ if (delta < useful_load_threshold) ++ continue; ++ /* update the load for the processor or package total */ ++ IRQ_DELTA(package_index,j) += delta; ++ ++ /* Keep track of the higher numbered sibling as well */ ++ if (i != package_index) ++ CPU_IRQ(i) += delta; ++ /* ++ * We have sibling A and sibling B in the package ++ * ++ * cpu_irq[A] = load for cpu A + load for cpu B ++ * cpu_irq[B] = load for cpu B ++ */ ++ CPU_IRQ(package_index) += delta; ++ } ++ } ++ /* Find the least loaded processor package */ ++ for_each_online_cpu(i) { ++ if (i != CPU_TO_PACKAGEINDEX(i)) ++ continue; ++ if (min_cpu_irq > CPU_IRQ(i)) { ++ min_cpu_irq = CPU_IRQ(i); ++ min_loaded = i; ++ } ++ } ++ max_cpu_irq = ULONG_MAX; ++ ++tryanothercpu: ++ /* Look for heaviest loaded processor. ++ * We may come back to get the next heaviest loaded processor. ++ * Skip processors with trivial loads. ++ */ ++ tmp_cpu_irq = 0; ++ tmp_loaded = -1; ++ for_each_online_cpu(i) { ++ if (i != CPU_TO_PACKAGEINDEX(i)) ++ continue; ++ if (max_cpu_irq <= CPU_IRQ(i)) ++ continue; ++ if (tmp_cpu_irq < CPU_IRQ(i)) { ++ tmp_cpu_irq = CPU_IRQ(i); ++ tmp_loaded = i; ++ } ++ } ++ ++ if (tmp_loaded == -1) { ++ /* In the case of small number of heavy interrupt sources, ++ * loading some of the cpus too much. We use Ingo's original ++ * approach to rotate them around. ++ */ ++ if (!first_attempt && imbalance >= useful_load_threshold) { ++ rotate_irqs_among_cpus(useful_load_threshold); ++ return; ++ } ++ goto not_worth_the_effort; ++ } ++ ++ first_attempt = 0; /* heaviest search */ ++ max_cpu_irq = tmp_cpu_irq; /* load */ ++ max_loaded = tmp_loaded; /* processor */ ++ imbalance = (max_cpu_irq - min_cpu_irq) / 2; ++ ++ Dprintk("max_loaded cpu = %d\n", max_loaded); ++ Dprintk("min_loaded cpu = %d\n", min_loaded); ++ Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); ++ Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); ++ Dprintk("load imbalance = %lu\n", imbalance); ++ ++ /* if imbalance is less than approx 10% of max load, then ++ * observe diminishing returns action. - quit ++ */ ++ if (imbalance < (max_cpu_irq >> 3)) { ++ Dprintk("Imbalance too trivial\n"); ++ goto not_worth_the_effort; ++ } ++ ++tryanotherirq: ++ /* if we select an IRQ to move that can't go where we want, then ++ * see if there is another one to try. ++ */ ++ move_this_load = 0; ++ selected_irq = -1; ++ for (j = 0; j < NR_IRQS; j++) { ++ /* Is this an active IRQ? */ ++ if (!irq_desc[j].action) ++ continue; ++ if (imbalance <= IRQ_DELTA(max_loaded,j)) ++ continue; ++ /* Try to find the IRQ that is closest to the imbalance ++ * without going over. ++ */ ++ if (move_this_load < IRQ_DELTA(max_loaded,j)) { ++ move_this_load = IRQ_DELTA(max_loaded,j); ++ selected_irq = j; ++ } ++ } ++ if (selected_irq == -1) { ++ goto tryanothercpu; ++ } ++ ++ imbalance = move_this_load; ++ ++ /* For physical_balance case, we accumlated both load ++ * values in the one of the siblings cpu_irq[], ++ * to use the same code for physical and logical processors ++ * as much as possible. ++ * ++ * NOTE: the cpu_irq[] array holds the sum of the load for ++ * sibling A and sibling B in the slot for the lowest numbered ++ * sibling (A), _AND_ the load for sibling B in the slot for ++ * the higher numbered sibling. ++ * ++ * We seek the least loaded sibling by making the comparison ++ * (A+B)/2 vs B ++ */ ++ load = CPU_IRQ(min_loaded) >> 1; ++ for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { ++ if (load > CPU_IRQ(j)) { ++ /* This won't change cpu_sibling_map[min_loaded] */ ++ load = CPU_IRQ(j); ++ min_loaded = j; ++ } ++ } ++ ++ cpus_and(allowed_mask, ++ cpu_online_map, ++ balance_irq_affinity[selected_irq]); ++ target_cpu_mask = cpumask_of_cpu(min_loaded); ++ cpus_and(tmp, target_cpu_mask, allowed_mask); ++ ++ if (!cpus_empty(tmp)) { ++ ++ Dprintk("irq = %d moved to cpu = %d\n", ++ selected_irq, min_loaded); ++ /* mark for change destination */ ++ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); ++ ++ /* Since we made a change, come back sooner to ++ * check for more variation. ++ */ ++ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); ++ return; ++ } ++ goto tryanotherirq; ++ ++not_worth_the_effort: ++ /* ++ * if we did not find an IRQ to move, then adjust the time interval ++ * upward ++ */ ++ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); ++ Dprintk("IRQ worth rotating not found\n"); ++ return; ++} ++ ++static int balanced_irq(void *unused) ++{ ++ int i; ++ unsigned long prev_balance_time = jiffies; ++ long time_remaining = balanced_irq_interval; ++ ++ daemonize("kirqd"); ++ ++ /* push everything to CPU 0 to give us a starting point. */ ++ for (i = 0 ; i < NR_IRQS ; i++) { ++ irq_desc[i].pending_mask = cpumask_of_cpu(0); ++ set_pending_irq(i, cpumask_of_cpu(0)); ++ } ++ ++ for ( ; ; ) { ++ time_remaining = schedule_timeout_interruptible(time_remaining); ++ try_to_freeze(); ++ if (time_after(jiffies, ++ prev_balance_time+balanced_irq_interval)) { ++ preempt_disable(); ++ do_irq_balance(); ++ prev_balance_time = jiffies; ++ time_remaining = balanced_irq_interval; ++ preempt_enable(); ++ } ++ } ++ return 0; ++} ++ ++static int __init balanced_irq_init(void) ++{ ++ int i; ++ struct cpuinfo_x86 *c; ++ cpumask_t tmp; ++ ++ cpus_shift_right(tmp, cpu_online_map, 2); ++ c = &boot_cpu_data; ++ /* When not overwritten by the command line ask subarchitecture. */ ++ if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) ++ irqbalance_disabled = NO_BALANCE_IRQ; ++ if (irqbalance_disabled) ++ return 0; ++ ++ /* disable irqbalance completely if there is only one processor online */ ++ if (num_online_cpus() < 2) { ++ irqbalance_disabled = 1; ++ return 0; ++ } ++ /* ++ * Enable physical balance only if more than 1 physical processor ++ * is present ++ */ ++ if (smp_num_siblings > 1 && !cpus_empty(tmp)) ++ physical_balance = 1; ++ ++ for_each_online_cpu(i) { ++ irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); ++ irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); ++ if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { ++ printk(KERN_ERR "balanced_irq_init: out of memory"); ++ goto failed; ++ } ++ memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); ++ memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); ++ } ++ ++ printk(KERN_INFO "Starting balanced_irq\n"); ++ if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) ++ return 0; ++ else ++ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); ++failed: ++ for_each_possible_cpu(i) { ++ kfree(irq_cpu_data[i].irq_delta); ++ irq_cpu_data[i].irq_delta = NULL; ++ kfree(irq_cpu_data[i].last_irq); ++ irq_cpu_data[i].last_irq = NULL; ++ } ++ return 0; ++} ++ ++int __init irqbalance_disable(char *str) ++{ ++ irqbalance_disabled = 1; ++ return 1; ++} ++ ++__setup("noirqbalance", irqbalance_disable); ++ ++late_initcall(balanced_irq_init); ++#endif /* CONFIG_IRQBALANCE */ ++#endif /* CONFIG_SMP */ ++#endif ++ ++#ifndef CONFIG_SMP ++void fastcall send_IPI_self(int vector) ++{ ++#ifndef CONFIG_XEN ++ unsigned int cfg; ++ ++ /* ++ * Wait for idle. ++ */ ++ apic_wait_icr_idle(); ++ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; ++ /* ++ * Send the IPI. The write to APIC_ICR fires this off. ++ */ ++ apic_write_around(APIC_ICR, cfg); ++#endif ++} ++#endif /* !CONFIG_SMP */ ++ ++ ++/* ++ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to ++ * specific CPU-side IRQs. ++ */ ++ ++#define MAX_PIRQS 8 ++static int pirq_entries [MAX_PIRQS]; ++static int pirqs_enabled; ++int skip_ioapic_setup; ++ ++static int __init ioapic_setup(char *str) ++{ ++ skip_ioapic_setup = 1; ++ return 1; ++} ++ ++__setup("noapic", ioapic_setup); ++ ++static int __init ioapic_pirq_setup(char *str) ++{ ++ int i, max; ++ int ints[MAX_PIRQS+1]; ++ ++ get_options(str, ARRAY_SIZE(ints), ints); ++ ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++ ++ pirqs_enabled = 1; ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "PIRQ redirection, working around broken MP-BIOS.\n"); ++ max = MAX_PIRQS; ++ if (ints[0] < MAX_PIRQS) ++ max = ints[0]; ++ ++ for (i = 0; i < max; i++) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); ++ /* ++ * PIRQs are mapped upside down, usually. ++ */ ++ pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; ++ } ++ return 1; ++} ++ ++__setup("pirq=", ioapic_pirq_setup); ++ ++/* ++ * Find the IRQ entry number of a certain pin. ++ */ ++static int find_irq_entry(int apic, int pin, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mpc_irqtype == type && ++ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || ++ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && ++ mp_irqs[i].mpc_dstirq == pin) ++ return i; ++ ++ return -1; ++} ++ ++/* ++ * Find the pin to which IRQ[irq] (ISA) is connected ++ */ ++static int __init find_isa_irq_pin(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_EISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA || ++ mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ ) && ++ (mp_irqs[i].mpc_irqtype == type) && ++ (mp_irqs[i].mpc_srcbusirq == irq)) ++ ++ return mp_irqs[i].mpc_dstirq; ++ } ++ return -1; ++} ++ ++static int __init find_isa_irq_apic(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_EISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA || ++ mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ ) && ++ (mp_irqs[i].mpc_irqtype == type) && ++ (mp_irqs[i].mpc_srcbusirq == irq)) ++ break; ++ } ++ if (i < mp_irq_entries) { ++ int apic; ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) ++ return apic; ++ } ++ } ++ ++ return -1; ++} ++ ++/* ++ * Find a specific PCI IRQ entry. ++ * Not an __init, possibly needed by modules ++ */ ++static int pin_2_irq(int idx, int apic, int pin); ++ ++int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) ++{ ++ int apic, i, best_guess = -1; ++ ++ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " ++ "slot:%d, pin:%d.\n", bus, slot, pin); ++ if (mp_bus_id_to_pci_bus[bus] == -1) { ++ printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); ++ return -1; ++ } ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || ++ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) ++ break; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && ++ !mp_irqs[i].mpc_irqtype && ++ (bus == lbus) && ++ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { ++ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); ++ ++ if (!(apic || IO_APIC_IRQ(irq))) ++ continue; ++ ++ if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) ++ return irq; ++ /* ++ * Use the first all-but-pin matching entry as a ++ * best-guess fuzzy result for broken mptables. ++ */ ++ if (best_guess < 0) ++ best_guess = irq; ++ } ++ } ++ return best_guess; ++} ++EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); ++ ++/* ++ * This function currently is only a helper for the i386 smp boot process where ++ * we need to reprogram the ioredtbls to cater for the cpus which have come online ++ * so mask in all cases should simply be TARGET_CPUS ++ */ ++#ifdef CONFIG_SMP ++#ifndef CONFIG_XEN ++void __init setup_ioapic_dest(void) ++{ ++ int pin, ioapic, irq, irq_entry; ++ ++ if (skip_ioapic_setup == 1) ++ return; ++ ++ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { ++ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { ++ irq_entry = find_irq_entry(ioapic, pin, mp_INT); ++ if (irq_entry == -1) ++ continue; ++ irq = pin_2_irq(irq_entry, ioapic, pin); ++ set_ioapic_affinity_irq(irq, TARGET_CPUS); ++ } ++ ++ } ++} ++#endif /* !CONFIG_XEN */ ++#endif ++ ++/* ++ * EISA Edge/Level control register, ELCR ++ */ ++static int EISA_ELCR(unsigned int irq) ++{ ++ if (irq < 16) { ++ unsigned int port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++ } ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "Broken MPtable reports ISA irq %d\n", irq); ++ return 0; ++} ++ ++/* EISA interrupts are always polarity zero and can be edge or level ++ * trigger depending on the ELCR value. If an interrupt is listed as ++ * EISA conforming in the MP table, that means its trigger type must ++ * be read in from the ELCR */ ++ ++#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) ++#define default_EISA_polarity(idx) (0) ++ ++/* ISA interrupts are always polarity zero edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_ISA_trigger(idx) (0) ++#define default_ISA_polarity(idx) (0) ++ ++/* PCI interrupts are always polarity one level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_PCI_trigger(idx) (1) ++#define default_PCI_polarity(idx) (1) ++ ++/* MCA interrupts are always polarity zero level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_MCA_trigger(idx) (1) ++#define default_MCA_polarity(idx) (0) ++ ++/* NEC98 interrupts are always polarity zero edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_NEC98_trigger(idx) (0) ++#define default_NEC98_polarity(idx) (0) ++ ++static int __init MPBIOS_polarity(int idx) ++{ ++ int bus = mp_irqs[idx].mpc_srcbus; ++ int polarity; ++ ++ /* ++ * Determine IRQ line polarity (high active or low active): ++ */ ++ switch (mp_irqs[idx].mpc_irqflag & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent polarity */ ++ { ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ { ++ polarity = default_ISA_polarity(idx); ++ break; ++ } ++ case MP_BUS_EISA: /* EISA pin */ ++ { ++ polarity = default_EISA_polarity(idx); ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ polarity = default_PCI_polarity(idx); ++ break; ++ } ++ case MP_BUS_MCA: /* MCA pin */ ++ { ++ polarity = default_MCA_polarity(idx); ++ break; ++ } ++ case MP_BUS_NEC98: /* NEC 98 pin */ ++ { ++ polarity = default_NEC98_polarity(idx); ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ } ++ break; ++ } ++ case 1: /* high active */ ++ { ++ polarity = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ case 3: /* low active */ ++ { ++ polarity = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ } ++ return polarity; ++} ++ ++static int MPBIOS_trigger(int idx) ++{ ++ int bus = mp_irqs[idx].mpc_srcbus; ++ int trigger; ++ ++ /* ++ * Determine IRQ trigger mode (edge or level sensitive): ++ */ ++ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent */ ++ { ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ { ++ trigger = default_ISA_trigger(idx); ++ break; ++ } ++ case MP_BUS_EISA: /* EISA pin */ ++ { ++ trigger = default_EISA_trigger(idx); ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ trigger = default_PCI_trigger(idx); ++ break; ++ } ++ case MP_BUS_MCA: /* MCA pin */ ++ { ++ trigger = default_MCA_trigger(idx); ++ break; ++ } ++ case MP_BUS_NEC98: /* NEC 98 pin */ ++ { ++ trigger = default_NEC98_trigger(idx); ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ } ++ break; ++ } ++ case 1: /* edge */ ++ { ++ trigger = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ case 3: /* level */ ++ { ++ trigger = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 0; ++ break; ++ } ++ } ++ return trigger; ++} ++ ++static inline int irq_polarity(int idx) ++{ ++ return MPBIOS_polarity(idx); ++} ++ ++static inline int irq_trigger(int idx) ++{ ++ return MPBIOS_trigger(idx); ++} ++ ++static int pin_2_irq(int idx, int apic, int pin) ++{ ++ int irq, i; ++ int bus = mp_irqs[idx].mpc_srcbus; ++ ++ /* ++ * Debugging check, we are in big trouble if this message pops up! ++ */ ++ if (mp_irqs[idx].mpc_dstirq != pin) ++ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); ++ ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ case MP_BUS_EISA: ++ case MP_BUS_MCA: ++ case MP_BUS_NEC98: ++ { ++ irq = mp_irqs[idx].mpc_srcbusirq; ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ /* ++ * PCI IRQs are mapped in order ++ */ ++ i = irq = 0; ++ while (i < apic) ++ irq += nr_ioapic_registers[i++]; ++ irq += pin; ++ ++ /* ++ * For MPS mode, so far only needed by ES7000 platform ++ */ ++ if (ioapic_renumber_irq) ++ irq = ioapic_renumber_irq(apic, irq); ++ ++ break; ++ } ++ default: ++ { ++ printk(KERN_ERR "unknown bus type %d.\n",bus); ++ irq = 0; ++ break; ++ } ++ } ++ ++ /* ++ * PCI IRQ command line redirection. Yes, limits are hardcoded. ++ */ ++ if ((pin >= 16) && (pin <= 23)) { ++ if (pirq_entries[pin-16] != -1) { ++ if (!pirq_entries[pin-16]) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "disabling PIRQ%d\n", pin-16); ++ } else { ++ irq = pirq_entries[pin-16]; ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "using PIRQ%d -> IRQ %d\n", ++ pin-16, irq); ++ } ++ } ++ } ++ return irq; ++} ++ ++static inline int IO_APIC_irq_trigger(int irq) ++{ ++ int apic, idx, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ idx = find_irq_entry(apic,pin,mp_INT); ++ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) ++ return irq_trigger(idx); ++ } ++ } ++ /* ++ * nonexistent IRQs are edge default ++ */ ++ return 0; ++} ++ ++/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ ++u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ ++ ++int assign_irq_vector(int irq) ++{ ++ unsigned long flags; ++ int vector; ++ struct physdev_irq irq_op; ++ ++ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); ++ ++ if (irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ ++ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { ++ spin_unlock_irqrestore(&vector_lock, flags); ++ return IO_APIC_VECTOR(irq); ++ } ++ ++ irq_op.irq = irq; ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { ++ spin_unlock_irqrestore(&vector_lock, flags); ++ return -ENOSPC; ++ } ++ ++ vector = irq_op.vector; ++ vector_irq[vector] = irq; ++ if (irq != AUTO_ASSIGN) ++ IO_APIC_VECTOR(irq) = vector; ++ ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ return vector; ++} ++ ++#ifndef CONFIG_XEN ++static struct hw_interrupt_type ioapic_level_type; ++static struct hw_interrupt_type ioapic_edge_type; ++ ++#define IOAPIC_AUTO -1 ++#define IOAPIC_EDGE 0 ++#define IOAPIC_LEVEL 1 ++ ++static void ioapic_register_intr(int irq, int vector, unsigned long trigger) ++{ ++ unsigned idx; ++ ++ idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; ++ ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ irq_desc[idx].chip = &ioapic_level_type; ++ else ++ irq_desc[idx].chip = &ioapic_edge_type; ++ set_intr_gate(vector, interrupt[idx]); ++} ++#else ++#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq) ++#endif ++ ++static void __init setup_IO_APIC_irqs(void) ++{ ++ struct IO_APIC_route_entry entry; ++ int apic, pin, idx, irq, first_notcon = 1, vector; ++ unsigned long flags; ++ ++ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ ++ /* ++ * add it to the IO-APIC irq-routing table: ++ */ ++ memset(&entry,0,sizeof(entry)); ++ ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 0; /* enable IRQ */ ++ entry.dest.logical.logical_dest = ++ cpu_mask_to_apicid(TARGET_CPUS); ++ ++ idx = find_irq_entry(apic,pin,mp_INT); ++ if (idx == -1) { ++ if (first_notcon) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ " IO-APIC (apicid-pin) %d-%d", ++ mp_ioapics[apic].mpc_apicid, ++ pin); ++ first_notcon = 0; ++ } else ++ apic_printk(APIC_VERBOSE, ", %d-%d", ++ mp_ioapics[apic].mpc_apicid, pin); ++ continue; ++ } ++ ++ entry.trigger = irq_trigger(idx); ++ entry.polarity = irq_polarity(idx); ++ ++ if (irq_trigger(idx)) { ++ entry.trigger = 1; ++ entry.mask = 1; ++ } ++ ++ irq = pin_2_irq(idx, apic, pin); ++ /* ++ * skip adding the timer int on secondary nodes, which causes ++ * a small but painful rift in the time-space continuum ++ */ ++ if (multi_timer_check(apic, irq)) ++ continue; ++ else ++ add_pin_to_irq(irq, apic, pin); ++ ++ if (/*!apic &&*/ !IO_APIC_IRQ(irq)) ++ continue; ++ ++ if (IO_APIC_IRQ(irq)) { ++ vector = assign_irq_vector(irq); ++ entry.vector = vector; ++ ioapic_register_intr(irq, vector, IOAPIC_AUTO); ++ ++ if (!apic && (irq < 16)) ++ disable_8259A_irq(irq); ++ } ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); ++ set_native_irq_info(irq, TARGET_CPUS); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ } ++ ++ if (!first_notcon) ++ apic_printk(APIC_VERBOSE, " not connected.\n"); ++} ++ ++/* ++ * Set up the 8259A-master output pin: ++ */ ++#ifndef CONFIG_XEN ++static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ memset(&entry,0,sizeof(entry)); ++ ++ disable_8259A_irq(0); ++ ++ /* mask LVT0 */ ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ ++ /* ++ * We use logical delivery to get the timer IRQ ++ * to the first CPU. ++ */ ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 0; /* unmask IRQ now */ ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.polarity = 0; ++ entry.trigger = 0; ++ entry.vector = vector; ++ ++ /* ++ * The timer IRQ doesn't have to know that behind the ++ * scene we have a 8259A-master in AEOI mode ... ++ */ ++ irq_desc[0].chip = &ioapic_edge_type; ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ enable_8259A_irq(0); ++} ++ ++static inline void UNEXPECTED_IO_APIC(void) ++{ ++} ++ ++void __init print_IO_APIC(void) ++{ ++ int apic, i; ++ union IO_APIC_reg_00 reg_00; ++ union IO_APIC_reg_01 reg_01; ++ union IO_APIC_reg_02 reg_02; ++ union IO_APIC_reg_03 reg_03; ++ unsigned long flags; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); ++ for (i = 0; i < nr_ioapics; i++) ++ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", ++ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); ++ ++ /* ++ * We are a bit conservative about what we expect. We have to ++ * know about every hardware change ASAP. ++ */ ++ printk(KERN_INFO "testing the IO APIC.......................\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ reg_01.raw = io_apic_read(apic, 1); ++ if (reg_01.bits.version >= 0x10) ++ reg_02.raw = io_apic_read(apic, 2); ++ if (reg_01.bits.version >= 0x20) ++ reg_03.raw = io_apic_read(apic, 3); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); ++ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); ++ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); ++ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); ++ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); ++ if (reg_00.bits.ID >= get_physical_broadcast()) ++ UNEXPECTED_IO_APIC(); ++ if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ ++ printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); ++ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); ++ if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ ++ (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ ++ (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ ++ (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ ++ (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ ++ (reg_01.bits.entries != 0x2E) && ++ (reg_01.bits.entries != 0x3F) ++ ) ++ UNEXPECTED_IO_APIC(); ++ ++ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); ++ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); ++ if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ ++ (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ ++ (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ ++ (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ ++ (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ ++ ) ++ UNEXPECTED_IO_APIC(); ++ if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, ++ * but the value of reg_02 is read as the previous read register ++ * value, so ignore it if reg_02 == reg_01. ++ */ ++ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); ++ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); ++ if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ } ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 ++ * or reg_03, but the value of reg_0[23] is read as the previous read ++ * register value, so ignore it if reg_03 == reg_0[12]. ++ */ ++ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && ++ reg_03.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); ++ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); ++ if (reg_03.bits.__reserved_1) ++ UNEXPECTED_IO_APIC(); ++ } ++ ++ printk(KERN_DEBUG ".... IRQ redirection table:\n"); ++ ++ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" ++ " Stat Dest Deli Vect: \n"); ++ ++ for (i = 0; i <= reg_01.bits.entries; i++) { ++ struct IO_APIC_route_entry entry; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); ++ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk(KERN_DEBUG " %02x %03X %02X ", ++ i, ++ entry.dest.logical.logical_dest, ++ entry.dest.physical.physical_dest ++ ); ++ ++ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", ++ entry.mask, ++ entry.trigger, ++ entry.irr, ++ entry.polarity, ++ entry.delivery_status, ++ entry.dest_mode, ++ entry.delivery_mode, ++ entry.vector ++ ); ++ } ++ } ++ if (use_pci_vector()) ++ printk(KERN_INFO "Using vector-based indexing\n"); ++ printk(KERN_DEBUG "IRQ to pin mappings:\n"); ++ for (i = 0; i < NR_IRQS; i++) { ++ struct irq_pin_list *entry = irq_2_pin + i; ++ if (entry->pin < 0) ++ continue; ++ if (use_pci_vector() && !platform_legacy_irq(i)) ++ printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); ++ else ++ printk(KERN_DEBUG "IRQ%d ", i); ++ for (;;) { ++ printk("-> %d:%d", entry->apic, entry->pin); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++ printk("\n"); ++ } ++ ++ printk(KERN_INFO ".................................... done.\n"); ++ ++ return; ++} ++ ++static void print_APIC_bitfield (int base) ++{ ++ unsigned int v; ++ int i, j; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); ++ for (i = 0; i < 8; i++) { ++ v = apic_read(base + i*0x10); ++ for (j = 0; j < 32; j++) { ++ if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ v = apic_read(APIC_ESR); ++ printk(KERN_DEBUG "... APIC ESR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_ICR); ++ printk(KERN_DEBUG "... APIC ICR: %08x\n", v); ++ v = apic_read(APIC_ICR2); ++ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); ++ ++ v = apic_read(APIC_LVTT); ++ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); ++ ++ if (maxlvt > 3) { /* PC is LVT#4. */ ++ v = apic_read(APIC_LVTPC); ++ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); ++ } ++ v = apic_read(APIC_LVT0); ++ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); ++ v = apic_read(APIC_LVT1); ++ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); ++ ++ if (maxlvt > 2) { /* ERR is LVT#3. */ ++ v = apic_read(APIC_LVTERR); ++ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_TMICT); ++ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); ++ v = apic_read(APIC_TMCCT); ++ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); ++ v = apic_read(APIC_TDCR); ++ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); ++ printk("\n"); ++} ++ ++void print_all_local_APICs (void) ++{ ++ on_each_cpu(print_local_APIC, NULL, 1, 1); ++} ++ ++void /*__init*/ print_PIC(void) ++{ ++ unsigned int v; ++ unsigned long flags; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "\nprinting PIC contents\n"); ++ ++ spin_lock_irqsave(&i8259A_lock, flags); ++ ++ v = inb(0xa1) << 8 | inb(0x21); ++ printk(KERN_DEBUG "... PIC IMR: %04x\n", v); ++ ++ v = inb(0xa0) << 8 | inb(0x20); ++ printk(KERN_DEBUG "... PIC IRR: %04x\n", v); ++ ++ outb(0x0b,0xa0); ++ outb(0x0b,0x20); ++ v = inb(0xa0) << 8 | inb(0x20); ++ outb(0x0a,0xa0); ++ outb(0x0a,0x20); ++ ++ spin_unlock_irqrestore(&i8259A_lock, flags); ++ ++ printk(KERN_DEBUG "... PIC ISR: %04x\n", v); ++ ++ v = inb(0x4d1) << 8 | inb(0x4d0); ++ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); ++} ++#endif /* !CONFIG_XEN */ ++ ++static void __init enable_IO_APIC(void) ++{ ++ union IO_APIC_reg_01 reg_01; ++ int i8259_apic, i8259_pin; ++ int i, apic; ++ unsigned long flags; ++ ++ for (i = 0; i < PIN_MAP_SIZE; i++) { ++ irq_2_pin[i].pin = -1; ++ irq_2_pin[i].next = 0; ++ } ++ if (!pirqs_enabled) ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++ ++ /* ++ * The number of IO-APIC IRQ registers (== #pins): ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(apic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ nr_ioapic_registers[apic] = reg_01.bits.entries+1; ++ } ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ int pin; ++ /* See if any of the pins is in ExtINT mode */ ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ struct IO_APIC_route_entry entry; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ ++ /* If the interrupt line is enabled and in ExtInt mode ++ * I have found the pin where the i8259 is connected. ++ */ ++ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { ++ ioapic_i8259.apic = apic; ++ ioapic_i8259.pin = pin; ++ goto found_i8259; ++ } ++ } ++ } ++ found_i8259: ++ /* Look to see what if the MP table has reported the ExtINT */ ++ /* If we could not find the appropriate pin by looking at the ioapic ++ * the i8259 probably is not connected the ioapic but give the ++ * mptable a chance anyway. ++ */ ++ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); ++ i8259_apic = find_isa_irq_apic(0, mp_ExtINT); ++ /* Trust the MP table if nothing is setup in the hardware */ ++ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { ++ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); ++ ioapic_i8259.pin = i8259_pin; ++ ioapic_i8259.apic = i8259_apic; ++ } ++ /* Complain if the MP table and the hardware disagree */ ++ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && ++ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) ++ { ++ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); ++ } ++ ++ /* ++ * Do not trust the IO-APIC being empty at bootup ++ */ ++ clear_IO_APIC(); ++} ++ ++/* ++ * Not an __init, needed by the reboot code ++ */ ++void disable_IO_APIC(void) ++{ ++ /* ++ * Clear the IO-APIC before rebooting: ++ */ ++ clear_IO_APIC(); ++ ++#ifndef CONFIG_XEN ++ /* ++ * If the i8259 is routed through an IOAPIC ++ * Put that IOAPIC in virtual wire mode ++ * so legacy interrupts can be delivered. ++ */ ++ if (ioapic_i8259.pin != -1) { ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = 0; /* Enabled */ ++ entry.trigger = 0; /* Edge */ ++ entry.irr = 0; ++ entry.polarity = 0; /* High */ ++ entry.delivery_status = 0; ++ entry.dest_mode = 0; /* Physical */ ++ entry.delivery_mode = dest_ExtINT; /* ExtInt */ ++ entry.vector = 0; ++ entry.dest.physical.physical_dest = ++ GET_APIC_ID(apic_read(APIC_ID)); ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, ++ *(((int *)&entry)+1)); ++ io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, ++ *(((int *)&entry)+0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ disconnect_bsp_APIC(ioapic_i8259.pin != -1); ++#endif ++} ++ ++/* ++ * function to set the IO-APIC physical IDs based on the ++ * values stored in the MPC table. ++ * ++ * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 ++ */ ++ ++#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ) ++static void __init setup_ioapic_ids_from_mpc(void) ++{ ++ union IO_APIC_reg_00 reg_00; ++ physid_mask_t phys_id_present_map; ++ int apic; ++ int i; ++ unsigned char old_id; ++ unsigned long flags; ++ ++ /* ++ * Don't check I/O APIC IDs for xAPIC systems. They have ++ * no meaning without the serial APIC bus. ++ */ ++ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ return; ++ /* ++ * This is broken; anything with a real cpu count has to ++ * circumvent this idiocy regardless. ++ */ ++ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ /* ++ * Set the IOAPIC ID to the value stored in the MPC table. ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ /* Read the register 0 value */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ old_id = mp_ioapics[apic].mpc_apicid; ++ ++ if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", ++ apic, mp_ioapics[apic].mpc_apicid); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ reg_00.bits.ID); ++ mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; ++ } ++ ++ /* ++ * Sanity check, is the ID really free? Every APIC in a ++ * system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(phys_id_present_map, ++ mp_ioapics[apic].mpc_apicid)) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", ++ apic, mp_ioapics[apic].mpc_apicid); ++ for (i = 0; i < get_physical_broadcast(); i++) ++ if (!physid_isset(i, phys_id_present_map)) ++ break; ++ if (i >= get_physical_broadcast()) ++ panic("Max APIC ID exceeded!\n"); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ i); ++ physid_set(i, phys_id_present_map); ++ mp_ioapics[apic].mpc_apicid = i; ++ } else { ++ physid_mask_t tmp; ++ tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); ++ apic_printk(APIC_VERBOSE, "Setting %d in the " ++ "phys_id_present_map\n", ++ mp_ioapics[apic].mpc_apicid); ++ physids_or(phys_id_present_map, phys_id_present_map, tmp); ++ } ++ ++ ++ /* ++ * We need to adjust the IRQ routing table ++ * if the ID changed. ++ */ ++ if (old_id != mp_ioapics[apic].mpc_apicid) ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mpc_dstapic == old_id) ++ mp_irqs[i].mpc_dstapic ++ = mp_ioapics[apic].mpc_apicid; ++ ++ /* ++ * Read the right value from the MPC table and ++ * write it into the ID register. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "...changing IO-APIC physical APIC ID to %d ...", ++ mp_ioapics[apic].mpc_apicid); ++ ++ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0, reg_00.raw); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* ++ * Sanity check ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) ++ printk("could not set ID!\n"); ++ else ++ apic_printk(APIC_VERBOSE, " ok.\n"); ++ } ++} ++#else ++static void __init setup_ioapic_ids_from_mpc(void) { } ++#endif ++ ++#ifndef CONFIG_XEN ++/* ++ * There is a nasty bug in some older SMP boards, their mptable lies ++ * about the timer IRQ. We do the following to work around the situation: ++ * ++ * - timer IRQ defaults to IO-APIC IRQ ++ * - if this function detects that timer IRQs are defunct, then we fall ++ * back to ISA timer IRQs ++ */ ++static int __init timer_irq_works(void) ++{ ++ unsigned long t1 = jiffies; ++ ++ local_irq_enable(); ++ /* Let ten ticks pass... */ ++ mdelay((10 * 1000) / HZ); ++ ++ /* ++ * Expect a few ticks at least, to be sure some possible ++ * glue logic does not lock up after one or two first ++ * ticks in a non-ExtINT mode. Also the local APIC ++ * might have cached one ExtINT interrupt. Finally, at ++ * least one tick may be lost due to delays. ++ */ ++ if (jiffies - t1 > 4) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * In the SMP+IOAPIC case it might happen that there are an unspecified ++ * number of pending IRQ events unhandled. These cases are very rare, ++ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much ++ * better to do it this way as thus we do not have to be aware of ++ * 'pending' interrupts in the IRQ path, except at this point. ++ */ ++/* ++ * Edge triggered needs to resend any interrupt ++ * that was delayed but this is now handled in the device ++ * independent code. ++ */ ++ ++/* ++ * Starting up a edge-triggered IO-APIC interrupt is ++ * nasty - we need to make sure that we get the edge. ++ * If it is already asserted for some reason, we need ++ * return 1 to indicate that is was pending. ++ * ++ * This is not complete - we should be able to fake ++ * an edge even if it isn't on the 8259A... ++ */ ++static unsigned int startup_edge_ioapic_irq(unsigned int irq) ++{ ++ int was_pending = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ if (irq < 16) { ++ disable_8259A_irq(irq); ++ if (i8259A_irq_pending(irq)) ++ was_pending = 1; ++ } ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return was_pending; ++} ++ ++/* ++ * Once we have recorded IRQ_PENDING already, we can mask the ++ * interrupt for real. This prevents IRQ storms from unhandled ++ * devices. ++ */ ++static void ack_edge_ioapic_irq(unsigned int irq) ++{ ++ move_irq(irq); ++ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) ++ == (IRQ_PENDING | IRQ_DISABLED)) ++ mask_IO_APIC_irq(irq); ++ ack_APIC_irq(); ++} ++ ++/* ++ * Level triggered interrupts can just be masked, ++ * and shutting down and starting up the interrupt ++ * is the same as enabling and disabling them -- except ++ * with a startup need to return a "was pending" value. ++ * ++ * Level triggered interrupts are special because we ++ * do not touch any IO-APIC register while handling ++ * them. We ack the APIC in the end-IRQ handler, not ++ * in the start-IRQ-handler. Protection against reentrance ++ * from the same interrupt is still provided, both by the ++ * generic IRQ layer and by the fact that an unacked local ++ * APIC does not accept IRQs. ++ */ ++static unsigned int startup_level_ioapic_irq (unsigned int irq) ++{ ++ unmask_IO_APIC_irq(irq); ++ ++ return 0; /* don't check for pending */ ++} ++ ++static void end_level_ioapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ int i; ++ ++ move_irq(irq); ++/* ++ * It appears there is an erratum which affects at least version 0x11 ++ * of I/O APIC (that's the 82093AA and cores integrated into various ++ * chipsets). Under certain conditions a level-triggered interrupt is ++ * erroneously delivered as edge-triggered one but the respective IRR ++ * bit gets set nevertheless. As a result the I/O unit expects an EOI ++ * message but it will never arrive and further interrupts are blocked ++ * from the source. The exact reason is so far unknown, but the ++ * phenomenon was observed when two consecutive interrupt requests ++ * from a given source get delivered to the same CPU and the source is ++ * temporarily disabled in between. ++ * ++ * A workaround is to simulate an EOI message manually. We achieve it ++ * by setting the trigger mode to edge and then to level when the edge ++ * trigger mode gets detected in the TMR of a local APIC for a ++ * level-triggered interrupt. We mask the source for the time of the ++ * operation to prevent an edge-triggered interrupt escaping meanwhile. ++ * The idea is from Manfred Spraul. --macro ++ */ ++ i = IO_APIC_VECTOR(irq); ++ ++ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ++ ++ ack_APIC_irq(); ++ ++ if (!(v & (1 << (i & 0x1f)))) { ++ atomic_inc(&irq_mis_count); ++ spin_lock(&ioapic_lock); ++ __mask_and_edge_IO_APIC_irq(irq); ++ __unmask_and_level_IO_APIC_irq(irq); ++ spin_unlock(&ioapic_lock); ++ } ++} ++ ++#ifdef CONFIG_PCI_MSI ++static unsigned int startup_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_edge_ioapic_irq(irq); ++} ++ ++static void ack_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ move_native_irq(vector); ++ ack_edge_ioapic_irq(irq); ++} ++ ++static unsigned int startup_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_level_ioapic_irq (irq); ++} ++ ++static void end_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ move_native_irq(vector); ++ end_level_ioapic_irq(irq); ++} ++ ++static void mask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ mask_IO_APIC_irq(irq); ++} ++ ++static void unmask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ unmask_IO_APIC_irq(irq); ++} ++ ++#ifdef CONFIG_SMP ++static void set_ioapic_affinity_vector (unsigned int vector, ++ cpumask_t cpu_mask) ++{ ++ int irq = vector_to_irq(vector); ++ ++ set_native_irq_info(vector, cpu_mask); ++ set_ioapic_affinity_irq(irq, cpu_mask); ++} ++#endif ++#endif ++ ++static int ioapic_retrigger(unsigned int irq) ++{ ++ send_IPI_self(IO_APIC_VECTOR(irq)); ++ ++ return 1; ++} ++ ++/* ++ * Level and edge triggered IO-APIC interrupts need different handling, ++ * so we use two separate IRQ descriptors. Edge triggered IRQs can be ++ * handled with the level-triggered descriptor, but that one has slightly ++ * more overhead. Level-triggered interrupts cannot be handled with the ++ * edge-triggered handler, without risking IRQ storms and other ugly ++ * races. ++ */ ++static struct hw_interrupt_type ioapic_edge_type __read_mostly = { ++ .typename = "IO-APIC-edge", ++ .startup = startup_edge_ioapic, ++ .shutdown = shutdown_edge_ioapic, ++ .enable = enable_edge_ioapic, ++ .disable = disable_edge_ioapic, ++ .ack = ack_edge_ioapic, ++ .end = end_edge_ioapic, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ioapic_affinity, ++#endif ++ .retrigger = ioapic_retrigger, ++}; ++ ++static struct hw_interrupt_type ioapic_level_type __read_mostly = { ++ .typename = "IO-APIC-level", ++ .startup = startup_level_ioapic, ++ .shutdown = shutdown_level_ioapic, ++ .enable = enable_level_ioapic, ++ .disable = disable_level_ioapic, ++ .ack = mask_and_ack_level_ioapic, ++ .end = end_level_ioapic, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ioapic_affinity, ++#endif ++ .retrigger = ioapic_retrigger, ++}; ++#endif /* !CONFIG_XEN */ ++ ++static inline void init_IO_APIC_traps(void) ++{ ++ int irq; ++ ++ /* ++ * NOTE! The local APIC isn't very good at handling ++ * multiple interrupts at the same interrupt level. ++ * As the interrupt level is determined by taking the ++ * vector number and shifting that right by 4, we ++ * want to spread these out a bit so that they don't ++ * all fall in the same interrupt level. ++ * ++ * Also, we've got to be careful not to trash gate ++ * 0x80, because int 0x80 is hm, kind of importantish. ;) ++ */ ++ for (irq = 0; irq < NR_IRQS ; irq++) { ++ int tmp = irq; ++ if (use_pci_vector()) { ++ if (!platform_legacy_irq(tmp)) ++ if ((tmp = vector_to_irq(tmp)) == -1) ++ continue; ++ } ++ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { ++ /* ++ * Hmm.. We don't have an entry for this, ++ * so default to an old-fashioned 8259 ++ * interrupt if we can.. ++ */ ++ if (irq < 16) ++ make_8259A_irq(irq); ++#ifndef CONFIG_XEN ++ else ++ /* Strange. Oh, well.. */ ++ irq_desc[irq].chip = &no_irq_type; ++#endif ++ } ++ } ++} ++ ++#ifndef CONFIG_XEN ++static void enable_lapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); ++} ++ ++static void disable_lapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); ++} ++ ++static void ack_lapic_irq (unsigned int irq) ++{ ++ ack_APIC_irq(); ++} ++ ++static void end_lapic_irq (unsigned int i) { /* nothing */ } ++ ++static struct hw_interrupt_type lapic_irq_type __read_mostly = { ++ .typename = "local-APIC-edge", ++ .startup = NULL, /* startup_irq() not used for IRQ0 */ ++ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ ++ .enable = enable_lapic_irq, ++ .disable = disable_lapic_irq, ++ .ack = ack_lapic_irq, ++ .end = end_lapic_irq ++}; ++ ++static void setup_nmi (void) ++{ ++ /* ++ * Dirty trick to enable the NMI watchdog ... ++ * We put the 8259A master into AEOI mode and ++ * unmask on all local APICs LVT0 as NMI. ++ * ++ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') ++ * is from Maciej W. Rozycki - so we do not have to EOI from ++ * the NMI handler or the timer interrupt. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); ++ ++ on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); ++ ++ apic_printk(APIC_VERBOSE, " done.\n"); ++} ++ ++/* ++ * This looks a bit hackish but it's about the only one way of sending ++ * a few INTA cycles to 8259As and any associated glue logic. ICR does ++ * not support the ExtINT mode, unfortunately. We need to send these ++ * cycles as some i82489DX-based boards have glue logic that keeps the ++ * 8259A interrupt line asserted until INTA. --macro ++ */ ++static inline void unlock_ExtINT_logic(void) ++{ ++ int apic, pin, i; ++ struct IO_APIC_route_entry entry0, entry1; ++ unsigned char save_control, save_freq_select; ++ unsigned long flags; ++ ++ pin = find_isa_irq_pin(8, mp_INT); ++ apic = find_isa_irq_apic(8, mp_INT); ++ if (pin == -1) ++ return; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ clear_IO_APIC_pin(apic, pin); ++ ++ memset(&entry1, 0, sizeof(entry1)); ++ ++ entry1.dest_mode = 0; /* physical delivery */ ++ entry1.mask = 0; /* unmask IRQ now */ ++ entry1.dest.physical.physical_dest = hard_smp_processor_id(); ++ entry1.delivery_mode = dest_ExtINT; ++ entry1.polarity = entry0.polarity; ++ entry1.trigger = 0; ++ entry1.vector = 0; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ save_control = CMOS_READ(RTC_CONTROL); ++ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); ++ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, ++ RTC_FREQ_SELECT); ++ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); ++ ++ i = 100; ++ while (i-- > 0) { ++ mdelay(10); ++ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) ++ i -= 10; ++ } ++ ++ CMOS_WRITE(save_control, RTC_CONTROL); ++ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); ++ clear_IO_APIC_pin(apic, pin); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++int timer_uses_ioapic_pin_0; ++ ++/* ++ * This code may look a bit paranoid, but it's supposed to cooperate with ++ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ ++ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast ++ * fanatically on his truly buggy board. ++ */ ++static inline void check_timer(void) ++{ ++ int apic1, pin1, apic2, pin2; ++ int vector; ++ ++ /* ++ * get/set the timer IRQ vector: ++ */ ++ disable_8259A_irq(0); ++ vector = assign_irq_vector(0); ++ set_intr_gate(vector, interrupt[0]); ++ ++ /* ++ * Subtle, code in do_timer_interrupt() expects an AEOI ++ * mode for the 8259A whenever interrupts are routed ++ * through I/O APICs. Also IRQ0 has to be enabled in ++ * the 8259A which implies the virtual wire has to be ++ * disabled in the local APIC. ++ */ ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ init_8259A(1); ++ timer_ack = 1; ++ if (timer_over_8254 > 0) ++ enable_8259A_irq(0); ++ ++ pin1 = find_isa_irq_pin(0, mp_INT); ++ apic1 = find_isa_irq_apic(0, mp_INT); ++ pin2 = ioapic_i8259.pin; ++ apic2 = ioapic_i8259.apic; ++ ++ if (pin1 == 0) ++ timer_uses_ioapic_pin_0 = 1; ++ ++ printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", ++ vector, apic1, pin1, apic2, pin2); ++ ++ if (pin1 != -1) { ++ /* ++ * Ok, does IRQ0 through the IOAPIC work? ++ */ ++ unmask_IO_APIC_irq(0); ++ if (timer_irq_works()) { ++ if (nmi_watchdog == NMI_IO_APIC) { ++ disable_8259A_irq(0); ++ setup_nmi(); ++ enable_8259A_irq(0); ++ } ++ if (disable_timer_pin_1 > 0) ++ clear_IO_APIC_pin(0, pin1); ++ return; ++ } ++ clear_IO_APIC_pin(apic1, pin1); ++ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " ++ "IO-APIC\n"); ++ } ++ ++ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); ++ if (pin2 != -1) { ++ printk("\n..... (found pin %d) ...", pin2); ++ /* ++ * legacy devices should be connected to IO APIC #0 ++ */ ++ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); ++ if (timer_irq_works()) { ++ printk("works.\n"); ++ if (pin1 != -1) ++ replace_pin_at_irq(0, apic1, pin1, apic2, pin2); ++ else ++ add_pin_to_irq(0, apic2, pin2); ++ if (nmi_watchdog == NMI_IO_APIC) { ++ setup_nmi(); ++ } ++ return; ++ } ++ /* ++ * Cleanup, just in case ... ++ */ ++ clear_IO_APIC_pin(apic2, pin2); ++ } ++ printk(" failed.\n"); ++ ++ if (nmi_watchdog == NMI_IO_APIC) { ++ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); ++ nmi_watchdog = 0; ++ } ++ ++ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); ++ ++ disable_8259A_irq(0); ++ irq_desc[0].chip = &lapic_irq_type; ++ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ ++ enable_8259A_irq(0); ++ ++ if (timer_irq_works()) { ++ printk(" works.\n"); ++ return; ++ } ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); ++ printk(" failed.\n"); ++ ++ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); ++ ++ timer_ack = 0; ++ init_8259A(0); ++ make_8259A_irq(0); ++ apic_write_around(APIC_LVT0, APIC_DM_EXTINT); ++ ++ unlock_ExtINT_logic(); ++ ++ if (timer_irq_works()) { ++ printk(" works.\n"); ++ return; ++ } ++ printk(" failed :(.\n"); ++ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " ++ "report. Then try booting with the 'noapic' option"); ++} ++#else ++int timer_uses_ioapic_pin_0 = 0; ++#define check_timer() ((void)0) ++#endif ++ ++/* ++ * ++ * IRQ's that are handled by the PIC in the MPS IOAPIC case. ++ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. ++ * Linux doesn't really care, as it's not actually used ++ * for any interrupt handling anyway. ++ */ ++#define PIC_IRQS (1 << PIC_CASCADE_IR) ++ ++void __init setup_IO_APIC(void) ++{ ++ enable_IO_APIC(); ++ ++ if (acpi_ioapic) ++ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ ++ else ++ io_apic_irqs = ~PIC_IRQS; ++ ++ printk("ENABLING IO-APIC IRQs\n"); ++ ++ /* ++ * Set up IO-APIC IRQ routing. ++ */ ++ if (!acpi_ioapic) ++ setup_ioapic_ids_from_mpc(); ++#ifndef CONFIG_XEN ++ sync_Arb_IDs(); ++#endif ++ setup_IO_APIC_irqs(); ++ init_IO_APIC_traps(); ++ check_timer(); ++ if (!acpi_ioapic) ++ print_IO_APIC(); ++} ++ ++static int __init setup_disable_8254_timer(char *s) ++{ ++ timer_over_8254 = -1; ++ return 1; ++} ++static int __init setup_enable_8254_timer(char *s) ++{ ++ timer_over_8254 = 2; ++ return 1; ++} ++ ++__setup("disable_8254_timer", setup_disable_8254_timer); ++__setup("enable_8254_timer", setup_enable_8254_timer); ++ ++/* ++ * Called after all the initialization is done. If we didnt find any ++ * APIC bugs then we can allow the modify fast path ++ */ ++ ++static int __init io_apic_bug_finalize(void) ++{ ++ if(sis_apic_bug == -1) ++ sis_apic_bug = 0; ++ if (is_initial_xendomain()) { ++ struct xen_platform_op op = { .cmd = XENPF_platform_quirk }; ++ op.u.platform_quirk.quirk_id = sis_apic_bug ? ++ QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL; ++ VOID(HYPERVISOR_platform_op(&op)); ++ } ++ return 0; ++} ++ ++late_initcall(io_apic_bug_finalize); ++ ++struct sysfs_ioapic_data { ++ struct sys_device dev; ++ struct IO_APIC_route_entry entry[0]; ++}; ++static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; ++ ++static int ioapic_suspend(struct sys_device *dev, pm_message_t state) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ unsigned long flags; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { ++ *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); ++ *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++static int ioapic_resume(struct sys_device *dev) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ unsigned long flags; ++ union IO_APIC_reg_00 reg_00; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(dev->id, 0); ++ if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { ++ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; ++ io_apic_write(dev->id, 0, reg_00.raw); ++ } ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { ++ io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); ++ io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++static struct sysdev_class ioapic_sysdev_class = { ++ set_kset_name("ioapic"), ++#ifndef CONFIG_XEN ++ .suspend = ioapic_suspend, ++ .resume = ioapic_resume, ++#endif ++}; ++ ++static int __init ioapic_init_sysfs(void) ++{ ++ struct sys_device * dev; ++ int i, size, error = 0; ++ ++ error = sysdev_class_register(&ioapic_sysdev_class); ++ if (error) ++ return error; ++ ++ for (i = 0; i < nr_ioapics; i++ ) { ++ size = sizeof(struct sys_device) + nr_ioapic_registers[i] ++ * sizeof(struct IO_APIC_route_entry); ++ mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); ++ if (!mp_ioapic_data[i]) { ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ memset(mp_ioapic_data[i], 0, size); ++ dev = &mp_ioapic_data[i]->dev; ++ dev->id = i; ++ dev->cls = &ioapic_sysdev_class; ++ error = sysdev_register(dev); ++ if (error) { ++ kfree(mp_ioapic_data[i]); ++ mp_ioapic_data[i] = NULL; ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ } ++ ++ return 0; ++} ++ ++device_initcall(ioapic_init_sysfs); ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based IOAPIC Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++int __init io_apic_get_unique_id (int ioapic, int apic_id) ++{ ++#ifndef CONFIG_XEN ++ union IO_APIC_reg_00 reg_00; ++ static physid_mask_t apic_id_map = PHYSID_MASK_NONE; ++ physid_mask_t tmp; ++ unsigned long flags; ++ int i = 0; ++ ++ /* ++ * The P4 platform supports up to 256 APIC IDs on two separate APIC ++ * buses (one for LAPICs, one for IOAPICs), where predecessors only ++ * supports up to 16 on one shared APIC bus. ++ * ++ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full ++ * advantage of new APIC bus architecture. ++ */ ++ ++ if (physids_empty(apic_id_map)) ++ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ if (apic_id >= get_physical_broadcast()) { ++ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " ++ "%d\n", ioapic, apic_id, reg_00.bits.ID); ++ apic_id = reg_00.bits.ID; ++ } ++ ++ /* ++ * Every APIC in a system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(apic_id_map, apic_id)) { ++ ++ for (i = 0; i < get_physical_broadcast(); i++) { ++ if (!check_apicid_used(apic_id_map, i)) ++ break; ++ } ++ ++ if (i == get_physical_broadcast()) ++ panic("Max apic_id exceeded!\n"); ++ ++ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " ++ "trying %d\n", ioapic, apic_id, i); ++ ++ apic_id = i; ++ } ++ ++ tmp = apicid_to_cpu_present(apic_id); ++ physids_or(apic_id_map, apic_id_map, tmp); ++ ++ if (reg_00.bits.ID != apic_id) { ++ reg_00.bits.ID = apic_id; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0, reg_00.raw); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* Sanity check */ ++ if (reg_00.bits.ID != apic_id) { ++ printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); ++ return -1; ++ } ++ } ++ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); ++#endif /* !CONFIG_XEN */ ++ ++ return apic_id; ++} ++ ++ ++int __init io_apic_get_version (int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.version; ++} ++ ++ ++int __init io_apic_get_redir_entries (int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.entries; ++} ++ ++ ++int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ if (!IO_APIC_IRQ(irq)) { ++ printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ++ ioapic); ++ return -EINVAL; ++ } ++ ++ /* ++ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. ++ * Note that we mask (disable) IRQs now -- these get enabled when the ++ * corresponding device driver registers for this IRQ. ++ */ ++ ++ memset(&entry,0,sizeof(entry)); ++ ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = INT_DEST_MODE; ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.trigger = edge_level; ++ entry.polarity = active_high_low; ++ entry.mask = 1; ++ ++ /* ++ * IRQs < 16 are already in the irq_2_pin[] map ++ */ ++ if (irq >= 16) ++ add_pin_to_irq(irq, ioapic, pin); ++ ++ entry.vector = assign_irq_vector(irq); ++ ++ apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " ++ "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, ++ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, ++ edge_level, active_high_low); ++ ++ ioapic_register_intr(irq, entry.vector, edge_level); ++ ++ if (!ioapic && (irq < 16)) ++ disable_8259A_irq(irq); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); ++ set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++#endif /* CONFIG_ACPI */ +Index: head-2008-11-25/arch/x86/kernel/ioport_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/ioport_32-xen.c 2008-01-28 12:24:19.000000000 +0100 +@@ -0,0 +1,123 @@ ++/* ++ * linux/arch/i386/kernel/ioport.c ++ * ++ * This contains the io-permission bitmap code - written by obz, with changes ++ * by Linus. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ ++static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) ++{ ++ unsigned long mask; ++ unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); ++ unsigned int low_index = base & (BITS_PER_LONG-1); ++ int length = low_index + extent; ++ ++ if (low_index != 0) { ++ mask = (~0UL << low_index); ++ if (length < BITS_PER_LONG) ++ mask &= ~(~0UL << length); ++ if (new_value) ++ *bitmap_base++ |= mask; ++ else ++ *bitmap_base++ &= ~mask; ++ length -= BITS_PER_LONG; ++ } ++ ++ mask = (new_value ? ~0UL : 0UL); ++ while (length >= BITS_PER_LONG) { ++ *bitmap_base++ = mask; ++ length -= BITS_PER_LONG; ++ } ++ ++ if (length > 0) { ++ mask = ~(~0UL << length); ++ if (new_value) ++ *bitmap_base++ |= mask; ++ else ++ *bitmap_base++ &= ~mask; ++ } ++} ++ ++ ++/* ++ * this changes the io permissions bitmap in the current task. ++ */ ++asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) ++{ ++ struct thread_struct * t = ¤t->thread; ++ unsigned long *bitmap; ++ struct physdev_set_iobitmap set_iobitmap; ++ ++ if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) ++ return -EINVAL; ++ if (turn_on && !capable(CAP_SYS_RAWIO)) ++ return -EPERM; ++ ++ /* ++ * If it's the first ioperm() call in this thread's lifetime, set the ++ * IO bitmap up. ioperm() is much less timing critical than clone(), ++ * this is why we delay this operation until now: ++ */ ++ if (!t->io_bitmap_ptr) { ++ bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); ++ if (!bitmap) ++ return -ENOMEM; ++ ++ memset(bitmap, 0xff, IO_BITMAP_BYTES); ++ t->io_bitmap_ptr = bitmap; ++ set_thread_flag(TIF_IO_BITMAP); ++ ++ set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); ++ set_iobitmap.nr_ports = IO_BITMAP_BITS; ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, ++ &set_iobitmap)); ++ } ++ ++ set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); ++ ++ return 0; ++} ++ ++/* ++ * sys_iopl has to be used when you want to access the IO ports ++ * beyond the 0x3ff range: to get the full 65536 ports bitmapped ++ * you'd need 8kB of bitmaps/process, which is a bit excessive. ++ * ++ * Here we just change the eflags value on the stack: we allow ++ * only the super-user to do it. This depends on the stack-layout ++ * on system-call entry - see also fork() and the signal handling ++ * code. ++ */ ++ ++asmlinkage long sys_iopl(unsigned long unused) ++{ ++ volatile struct pt_regs * regs = (struct pt_regs *) &unused; ++ unsigned int level = regs->ebx; ++ struct thread_struct *t = ¤t->thread; ++ unsigned int old = (t->iopl >> 12) & 3; ++ ++ if (level > 3) ++ return -EINVAL; ++ /* Trying to gain more privileges? */ ++ if (level > old) { ++ if (!capable(CAP_SYS_RAWIO)) ++ return -EPERM; ++ } ++ t->iopl = level << 12; ++ set_iopl_mask(t->iopl); ++ return 0; ++} +Index: head-2008-11-25/arch/x86/kernel/irq_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/irq_32-xen.c 2008-10-29 09:55:56.000000000 +0100 +@@ -0,0 +1,324 @@ ++/* ++ * linux/arch/i386/kernel/irq.c ++ * ++ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar ++ * ++ * This file contains the lowest level x86-specific interrupt ++ * entry, irq-stacks and irq statistics code. All the remaining ++ * irq logic is done by the generic kernel/irq/ code and ++ * by the x86-specific irq controller code. (e.g. i8259.c and ++ * io_apic.c.) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; ++EXPORT_PER_CPU_SYMBOL(irq_stat); ++ ++#ifndef CONFIG_X86_LOCAL_APIC ++/* ++ * 'what should we do if we get a hw irq event on an illegal vector'. ++ * each architecture has to answer this themselves. ++ */ ++void ack_bad_irq(unsigned int irq) ++{ ++ printk("unexpected IRQ trap at vector %02x\n", irq); ++} ++#endif ++ ++#ifdef CONFIG_4KSTACKS ++/* ++ * per-CPU IRQ handling contexts (thread information and stack) ++ */ ++union irq_ctx { ++ struct thread_info tinfo; ++ u32 stack[THREAD_SIZE/sizeof(u32)]; ++}; ++ ++static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; ++static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; ++#endif ++ ++/* ++ * do_IRQ handles all normal device IRQ's (the special ++ * SMP cross-CPU interrupts have their own specific ++ * handlers). ++ */ ++fastcall unsigned int do_IRQ(struct pt_regs *regs) ++{ ++ /* high bit used in ret_from_ code */ ++ int irq = ~regs->orig_eax; ++#ifdef CONFIG_4KSTACKS ++ union irq_ctx *curctx, *irqctx; ++ u32 *isp; ++#endif ++ ++ if (unlikely((unsigned)irq >= NR_IRQS)) { ++ printk(KERN_EMERG "%s: cannot handle IRQ %d\n", ++ __FUNCTION__, irq); ++ BUG(); ++ } ++ ++ /*irq_enter();*/ ++#ifdef CONFIG_DEBUG_STACKOVERFLOW ++ /* Debugging check for stack overflow: is there less than 1KB free? */ ++ { ++ long esp; ++ ++ __asm__ __volatile__("andl %%esp,%0" : ++ "=r" (esp) : "0" (THREAD_SIZE - 1)); ++ if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { ++ printk("do_IRQ: stack overflow: %ld\n", ++ esp - sizeof(struct thread_info)); ++ dump_stack(); ++ } ++ } ++#endif ++ ++#ifdef CONFIG_4KSTACKS ++ ++ curctx = (union irq_ctx *) current_thread_info(); ++ irqctx = hardirq_ctx[smp_processor_id()]; ++ ++ /* ++ * this is where we switch to the IRQ stack. However, if we are ++ * already using the IRQ stack (because we interrupted a hardirq ++ * handler) we can't do that and just have to keep using the ++ * current stack (which is the irq stack already after all) ++ */ ++ if (curctx != irqctx) { ++ int arg1, arg2, ebx; ++ ++ /* build the stack frame on the IRQ stack */ ++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ irqctx->tinfo.task = curctx->tinfo.task; ++ irqctx->tinfo.previous_esp = current_stack_pointer; ++ ++ /* ++ * Copy the softirq bits in preempt_count so that the ++ * softirq checks work in the hardirq context. ++ */ ++ irqctx->tinfo.preempt_count = ++ (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | ++ (curctx->tinfo.preempt_count & SOFTIRQ_MASK); ++ ++ asm volatile( ++ " xchgl %%ebx,%%esp \n" ++ " call __do_IRQ \n" ++ " movl %%ebx,%%esp \n" ++ : "=a" (arg1), "=d" (arg2), "=b" (ebx) ++ : "0" (irq), "1" (regs), "2" (isp) ++ : "memory", "cc", "ecx" ++ ); ++ } else ++#endif ++ __do_IRQ(irq, regs); ++ ++ /*irq_exit();*/ ++ ++ return 1; ++} ++ ++#ifdef CONFIG_4KSTACKS ++ ++/* ++ * These should really be __section__(".bss.page_aligned") as well, but ++ * gcc's 3.0 and earlier don't handle that correctly. ++ */ ++static char softirq_stack[NR_CPUS * THREAD_SIZE] ++ __attribute__((__aligned__(THREAD_SIZE))); ++ ++static char hardirq_stack[NR_CPUS * THREAD_SIZE] ++ __attribute__((__aligned__(THREAD_SIZE))); ++ ++/* ++ * allocate per-cpu stacks for hardirq and for softirq processing ++ */ ++void irq_ctx_init(int cpu) ++{ ++ union irq_ctx *irqctx; ++ ++ if (hardirq_ctx[cpu]) ++ return; ++ ++ irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; ++ irqctx->tinfo.task = NULL; ++ irqctx->tinfo.exec_domain = NULL; ++ irqctx->tinfo.cpu = cpu; ++ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; ++ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); ++ ++ hardirq_ctx[cpu] = irqctx; ++ ++ irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; ++ irqctx->tinfo.task = NULL; ++ irqctx->tinfo.exec_domain = NULL; ++ irqctx->tinfo.cpu = cpu; ++ irqctx->tinfo.preempt_count = 0; ++ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); ++ ++ softirq_ctx[cpu] = irqctx; ++ ++ printk("CPU %u irqstacks, hard=%p soft=%p\n", ++ cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); ++} ++ ++void irq_ctx_exit(int cpu) ++{ ++ hardirq_ctx[cpu] = NULL; ++} ++ ++extern asmlinkage void __do_softirq(void); ++ ++asmlinkage void do_softirq(void) ++{ ++ unsigned long flags; ++ struct thread_info *curctx; ++ union irq_ctx *irqctx; ++ u32 *isp; ++ ++ if (in_interrupt()) ++ return; ++ ++ local_irq_save(flags); ++ ++ if (local_softirq_pending()) { ++ curctx = current_thread_info(); ++ irqctx = softirq_ctx[smp_processor_id()]; ++ irqctx->tinfo.task = curctx->task; ++ irqctx->tinfo.previous_esp = current_stack_pointer; ++ ++ /* build the stack frame on the softirq stack */ ++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ ++ asm volatile( ++ " xchgl %%ebx,%%esp \n" ++ " call __do_softirq \n" ++ " movl %%ebx,%%esp \n" ++ : "=b"(isp) ++ : "0"(isp) ++ : "memory", "cc", "edx", "ecx", "eax" ++ ); ++ /* ++ * Shouldnt happen, we returned above if in_interrupt(): ++ */ ++ WARN_ON_ONCE(softirq_count()); ++ } ++ ++ local_irq_restore(flags); ++} ++ ++EXPORT_SYMBOL(do_softirq); ++#endif ++ ++/* ++ * Interrupt statistics: ++ */ ++ ++atomic_t irq_err_count; ++ ++/* ++ * /proc/interrupts printing: ++ */ ++ ++int show_interrupts(struct seq_file *p, void *v) ++{ ++ int i = *(loff_t *) v, j; ++ struct irqaction * action; ++ unsigned long flags; ++ ++ if (i == 0) { ++ seq_printf(p, " "); ++ for_each_online_cpu(j) ++ seq_printf(p, "CPU%-8d",j); ++ seq_putc(p, '\n'); ++ } ++ ++ if (i < NR_IRQS) { ++ spin_lock_irqsave(&irq_desc[i].lock, flags); ++ action = irq_desc[i].action; ++ if (!action) ++ goto skip; ++ seq_printf(p, "%3d: ",i); ++#ifndef CONFIG_SMP ++ seq_printf(p, "%10u ", kstat_irqs(i)); ++#else ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); ++#endif ++ seq_printf(p, " %14s", irq_desc[i].chip->typename); ++ seq_printf(p, " %s", action->name); ++ ++ for (action=action->next; action; action = action->next) ++ seq_printf(p, ", %s", action->name); ++ ++ seq_putc(p, '\n'); ++skip: ++ spin_unlock_irqrestore(&irq_desc[i].lock, flags); ++ } else if (i == NR_IRQS) { ++ seq_printf(p, "NMI: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", nmi_count(j)); ++ seq_putc(p, '\n'); ++#ifdef CONFIG_X86_LOCAL_APIC ++ seq_printf(p, "LOC: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", ++ per_cpu(irq_stat,j).apic_timer_irqs); ++ seq_putc(p, '\n'); ++#endif ++ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); ++#if defined(CONFIG_X86_IO_APIC) ++ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); ++#endif ++ } ++ return 0; ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ ++void fixup_irqs(cpumask_t map) ++{ ++ unsigned int irq; ++ static int warned; ++ ++ for (irq = 0; irq < NR_IRQS; irq++) { ++ cpumask_t mask; ++ if (irq == 2) ++ continue; ++ ++ cpus_and(mask, irq_desc[irq].affinity, map); ++ if (any_online_cpu(mask) == NR_CPUS) { ++ /*printk("Breaking affinity for irq %i\n", irq);*/ ++ mask = map; ++ } ++ if (irq_desc[irq].chip->set_affinity) ++ irq_desc[irq].chip->set_affinity(irq, mask); ++ else if (irq_desc[irq].action && !(warned++)) ++ printk("Cannot set affinity for irq %i\n", irq); ++ } ++ ++#if 0 ++ barrier(); ++ /* Ingo Molnar says: "after the IO-APIC masks have been redirected ++ [note the nop - the interrupt-enable boundary on x86 is two ++ instructions from sti] - to flush out pending hardirqs and ++ IPIs. After this point nothing is supposed to reach this CPU." */ ++ __asm__ __volatile__("sti; nop; cli"); ++ barrier(); ++#else ++ /* That doesn't seem sufficient. Give it 1ms. */ ++ local_irq_enable(); ++ mdelay(1); ++ local_irq_disable(); ++#endif ++} ++#endif ++ +Index: head-2008-11-25/arch/x86/kernel/ldt_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/ldt_32-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,270 @@ ++/* ++ * linux/kernel/ldt.c ++ * ++ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds ++ * Copyright (C) 1999 Ingo Molnar ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ ++static void flush_ldt(void *null) ++{ ++ if (current->active_mm) ++ load_LDT(¤t->active_mm->context); ++} ++#endif ++ ++static int alloc_ldt(mm_context_t *pc, int mincount, int reload) ++{ ++ void *oldldt; ++ void *newldt; ++ int oldsize; ++ ++ if (mincount <= pc->size) ++ return 0; ++ oldsize = pc->size; ++ mincount = (mincount+511)&(~511); ++ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) ++ newldt = vmalloc(mincount*LDT_ENTRY_SIZE); ++ else ++ newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); ++ ++ if (!newldt) ++ return -ENOMEM; ++ ++ if (oldsize) ++ memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); ++ oldldt = pc->ldt; ++ memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); ++ pc->ldt = newldt; ++ wmb(); ++ pc->size = mincount; ++ wmb(); ++ ++ if (reload) { ++#ifdef CONFIG_SMP ++ cpumask_t mask; ++ preempt_disable(); ++#endif ++ make_pages_readonly( ++ pc->ldt, ++ (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ load_LDT(pc); ++#ifdef CONFIG_SMP ++ mask = cpumask_of_cpu(smp_processor_id()); ++ if (!cpus_equal(current->mm->cpu_vm_mask, mask)) ++ smp_call_function(flush_ldt, NULL, 1, 1); ++ preempt_enable(); ++#endif ++ } ++ if (oldsize) { ++ make_pages_writable( ++ oldldt, ++ (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(oldldt); ++ else ++ kfree(oldldt); ++ } ++ return 0; ++} ++ ++static inline int copy_ldt(mm_context_t *new, mm_context_t *old) ++{ ++ int err = alloc_ldt(new, old->size, 0); ++ if (err < 0) ++ return err; ++ memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); ++ make_pages_readonly( ++ new->ldt, ++ (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ return 0; ++} ++ ++/* ++ * we do not have to muck with descriptors here, that is ++ * done in switch_mm() as needed. ++ */ ++int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ struct mm_struct * old_mm; ++ int retval = 0; ++ ++ init_MUTEX(&mm->context.sem); ++ mm->context.size = 0; ++ mm->context.has_foreign_mappings = 0; ++ old_mm = current->mm; ++ if (old_mm && old_mm->context.size > 0) { ++ down(&old_mm->context.sem); ++ retval = copy_ldt(&mm->context, &old_mm->context); ++ up(&old_mm->context.sem); ++ } ++ return retval; ++} ++ ++/* ++ * No need to lock the MM as we are the last user ++ */ ++void destroy_context(struct mm_struct *mm) ++{ ++ if (mm->context.size) { ++ if (mm == current->active_mm) ++ clear_LDT(); ++ make_pages_writable( ++ mm->context.ldt, ++ (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(mm->context.ldt); ++ else ++ kfree(mm->context.ldt); ++ mm->context.size = 0; ++ } ++} ++ ++static int read_ldt(void __user * ptr, unsigned long bytecount) ++{ ++ int err; ++ unsigned long size; ++ struct mm_struct * mm = current->mm; ++ ++ if (!mm->context.size) ++ return 0; ++ if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) ++ bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; ++ ++ down(&mm->context.sem); ++ size = mm->context.size*LDT_ENTRY_SIZE; ++ if (size > bytecount) ++ size = bytecount; ++ ++ err = 0; ++ if (copy_to_user(ptr, mm->context.ldt, size)) ++ err = -EFAULT; ++ up(&mm->context.sem); ++ if (err < 0) ++ goto error_return; ++ if (size != bytecount) { ++ /* zero-fill the rest */ ++ if (clear_user(ptr+size, bytecount-size) != 0) { ++ err = -EFAULT; ++ goto error_return; ++ } ++ } ++ return bytecount; ++error_return: ++ return err; ++} ++ ++static int read_default_ldt(void __user * ptr, unsigned long bytecount) ++{ ++ int err; ++ unsigned long size; ++ void *address; ++ ++ err = 0; ++ address = &default_ldt[0]; ++ size = 5*sizeof(struct desc_struct); ++ if (size > bytecount) ++ size = bytecount; ++ ++ err = size; ++ if (copy_to_user(ptr, address, size)) ++ err = -EFAULT; ++ ++ return err; ++} ++ ++static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) ++{ ++ struct mm_struct * mm = current->mm; ++ __u32 entry_1, entry_2; ++ int error; ++ struct user_desc ldt_info; ++ ++ error = -EINVAL; ++ if (bytecount != sizeof(ldt_info)) ++ goto out; ++ error = -EFAULT; ++ if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) ++ goto out; ++ ++ error = -EINVAL; ++ if (ldt_info.entry_number >= LDT_ENTRIES) ++ goto out; ++ if (ldt_info.contents == 3) { ++ if (oldmode) ++ goto out; ++ if (ldt_info.seg_not_present == 0) ++ goto out; ++ } ++ ++ down(&mm->context.sem); ++ if (ldt_info.entry_number >= mm->context.size) { ++ error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); ++ if (error < 0) ++ goto out_unlock; ++ } ++ ++ /* Allow LDTs to be cleared by the user. */ ++ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { ++ if (oldmode || LDT_empty(&ldt_info)) { ++ entry_1 = 0; ++ entry_2 = 0; ++ goto install; ++ } ++ } ++ ++ entry_1 = LDT_entry_a(&ldt_info); ++ entry_2 = LDT_entry_b(&ldt_info); ++ if (oldmode) ++ entry_2 &= ~(1 << 20); ++ ++ /* Install the new entry ... */ ++install: ++ error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number, ++ entry_1, entry_2); ++ ++out_unlock: ++ up(&mm->context.sem); ++out: ++ return error; ++} ++ ++asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) ++{ ++ int ret = -ENOSYS; ++ ++ switch (func) { ++ case 0: ++ ret = read_ldt(ptr, bytecount); ++ break; ++ case 1: ++ ret = write_ldt(ptr, bytecount, 1); ++ break; ++ case 2: ++ ret = read_default_ldt(ptr, bytecount); ++ break; ++ case 0x11: ++ ret = write_ldt(ptr, bytecount, 0); ++ break; ++ } ++ return ret; ++} +Index: head-2008-11-25/arch/x86/kernel/microcode-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/microcode-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,144 @@ ++/* ++ * Intel CPU Microcode Update Driver for Linux ++ * ++ * Copyright (C) 2000-2004 Tigran Aivazian ++ * ++ * This driver allows to upgrade microcode on Intel processors ++ * belonging to IA-32 family - PentiumPro, Pentium II, ++ * Pentium III, Xeon, Pentium 4, etc. ++ * ++ * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, ++ * Order Number 245472 or free download from: ++ * ++ * http://developer.intel.com/design/pentium4/manuals/245472.htm ++ * ++ * For more information, go to http://www.urbanmyth.org/microcode ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++//#define DEBUG /* pr_debug */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); ++MODULE_AUTHOR("Tigran Aivazian "); ++MODULE_LICENSE("GPL"); ++ ++static int verbose; ++module_param(verbose, int, 0644); ++ ++#define MICROCODE_VERSION "1.14a-xen" ++ ++#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ ++#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ ++#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ ++ ++/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ ++static DEFINE_MUTEX(microcode_mutex); ++ ++static int microcode_open (struct inode *unused1, struct file *unused2) ++{ ++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; ++} ++ ++ ++static int do_microcode_update (const void __user *ubuf, size_t len) ++{ ++ int err; ++ void *kbuf; ++ ++ kbuf = vmalloc(len); ++ if (!kbuf) ++ return -ENOMEM; ++ ++ if (copy_from_user(kbuf, ubuf, len) == 0) { ++ struct xen_platform_op op; ++ ++ op.cmd = XENPF_microcode_update; ++ set_xen_guest_handle(op.u.microcode.data, kbuf); ++ op.u.microcode.length = len; ++ err = HYPERVISOR_platform_op(&op); ++ } else ++ err = -EFAULT; ++ ++ vfree(kbuf); ++ ++ return err; ++} ++ ++static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) ++{ ++ ssize_t ret; ++ ++ if (len < MC_HEADER_SIZE) { ++ printk(KERN_ERR "microcode: not enough data\n"); ++ return -EINVAL; ++ } ++ ++ mutex_lock(µcode_mutex); ++ ++ ret = do_microcode_update(buf, len); ++ if (!ret) ++ ret = (ssize_t)len; ++ ++ mutex_unlock(µcode_mutex); ++ ++ return ret; ++} ++ ++static struct file_operations microcode_fops = { ++ .owner = THIS_MODULE, ++ .write = microcode_write, ++ .open = microcode_open, ++}; ++ ++static struct miscdevice microcode_dev = { ++ .minor = MICROCODE_MINOR, ++ .name = "microcode", ++ .fops = µcode_fops, ++}; ++ ++static int __init microcode_init (void) ++{ ++ int error; ++ ++ error = misc_register(µcode_dev); ++ if (error) { ++ printk(KERN_ERR ++ "microcode: can't misc_register on minor=%d\n", ++ MICROCODE_MINOR); ++ return error; ++ } ++ ++ printk(KERN_INFO ++ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); ++ return 0; ++} ++ ++static void __exit microcode_exit (void) ++{ ++ misc_deregister(µcode_dev); ++} ++ ++module_init(microcode_init) ++module_exit(microcode_exit) ++MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +Index: head-2008-11-25/arch/x86/kernel/mpparse_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/mpparse_32-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,1185 @@ ++/* ++ * Intel Multiprocessor Specification 1.1 and 1.4 ++ * compliant MP-table parsing routines. ++ * ++ * (c) 1995 Alan Cox, Building #3 ++ * (c) 1998, 1999, 2000 Ingo Molnar ++ * ++ * Fixes ++ * Erich Boleyn : MP v1.4 and additional changes. ++ * Alan Cox : Added EBDA scanning ++ * Ingo Molnar : various cleanups and rewrites ++ * Maciej W. Rozycki: Bits for default MP configurations ++ * Paul Diefenbaugh: Added full ACPI support ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Have we found an MP table */ ++int smp_found_config; ++unsigned int __initdata maxcpus = NR_CPUS; ++ ++/* ++ * Various Linux-internal data structures created from the ++ * MP-table. ++ */ ++int apic_version [MAX_APICS]; ++int mp_bus_id_to_type [MAX_MP_BUSSES]; ++int mp_bus_id_to_node [MAX_MP_BUSSES]; ++int mp_bus_id_to_local [MAX_MP_BUSSES]; ++int quad_local_to_mp_bus_id [NR_CPUS/4][4]; ++int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; ++static int mp_current_pci_id; ++ ++/* I/O APIC entries */ ++struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; ++ ++/* # of MP IRQ source entries */ ++struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; ++ ++/* MP IRQ source entries */ ++int mp_irq_entries; ++ ++int nr_ioapics; ++ ++int pic_mode; ++unsigned long mp_lapic_addr; ++ ++unsigned int def_to_bigsmp = 0; ++ ++/* Processor that is doing the boot up */ ++unsigned int boot_cpu_physical_apicid = -1U; ++/* Internal processor count */ ++static unsigned int __devinitdata num_processors; ++ ++/* Bitmask of physically existing CPUs */ ++physid_mask_t phys_cpu_present_map; ++ ++u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; ++ ++/* ++ * Intel MP BIOS table parsing routines: ++ */ ++ ++ ++/* ++ * Checksum an MP configuration block. ++ */ ++ ++static int __init mpf_checksum(unsigned char *mp, int len) ++{ ++ int sum = 0; ++ ++ while (len--) ++ sum += *mp++; ++ ++ return sum & 0xFF; ++} ++ ++/* ++ * Have to match translation table entries to main table entries by counter ++ * hence the mpc_record variable .... can't see a less disgusting way of ++ * doing this .... ++ */ ++ ++static int mpc_record; ++static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; ++ ++#ifndef CONFIG_XEN ++static void __devinit MP_processor_info (struct mpc_config_processor *m) ++{ ++ int ver, apicid; ++ physid_mask_t phys_cpu; ++ ++ if (!(m->mpc_cpuflag & CPU_ENABLED)) ++ return; ++ ++ apicid = mpc_apic_id(m, translation_table[mpc_record]); ++ ++ if (m->mpc_featureflag&(1<<0)) ++ Dprintk(" Floating point unit present.\n"); ++ if (m->mpc_featureflag&(1<<7)) ++ Dprintk(" Machine Exception supported.\n"); ++ if (m->mpc_featureflag&(1<<8)) ++ Dprintk(" 64 bit compare & exchange supported.\n"); ++ if (m->mpc_featureflag&(1<<9)) ++ Dprintk(" Internal APIC present.\n"); ++ if (m->mpc_featureflag&(1<<11)) ++ Dprintk(" SEP present.\n"); ++ if (m->mpc_featureflag&(1<<12)) ++ Dprintk(" MTRR present.\n"); ++ if (m->mpc_featureflag&(1<<13)) ++ Dprintk(" PGE present.\n"); ++ if (m->mpc_featureflag&(1<<14)) ++ Dprintk(" MCA present.\n"); ++ if (m->mpc_featureflag&(1<<15)) ++ Dprintk(" CMOV present.\n"); ++ if (m->mpc_featureflag&(1<<16)) ++ Dprintk(" PAT present.\n"); ++ if (m->mpc_featureflag&(1<<17)) ++ Dprintk(" PSE present.\n"); ++ if (m->mpc_featureflag&(1<<18)) ++ Dprintk(" PSN present.\n"); ++ if (m->mpc_featureflag&(1<<19)) ++ Dprintk(" Cache Line Flush Instruction present.\n"); ++ /* 20 Reserved */ ++ if (m->mpc_featureflag&(1<<21)) ++ Dprintk(" Debug Trace and EMON Store present.\n"); ++ if (m->mpc_featureflag&(1<<22)) ++ Dprintk(" ACPI Thermal Throttle Registers present.\n"); ++ if (m->mpc_featureflag&(1<<23)) ++ Dprintk(" MMX present.\n"); ++ if (m->mpc_featureflag&(1<<24)) ++ Dprintk(" FXSR present.\n"); ++ if (m->mpc_featureflag&(1<<25)) ++ Dprintk(" XMM present.\n"); ++ if (m->mpc_featureflag&(1<<26)) ++ Dprintk(" Willamette New Instructions present.\n"); ++ if (m->mpc_featureflag&(1<<27)) ++ Dprintk(" Self Snoop present.\n"); ++ if (m->mpc_featureflag&(1<<28)) ++ Dprintk(" HT present.\n"); ++ if (m->mpc_featureflag&(1<<29)) ++ Dprintk(" Thermal Monitor present.\n"); ++ /* 30, 31 Reserved */ ++ ++ ++ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { ++ Dprintk(" Bootup CPU\n"); ++ boot_cpu_physical_apicid = m->mpc_apicid; ++ } ++ ++ ver = m->mpc_apicver; ++ ++ /* ++ * Validate version ++ */ ++ if (ver == 0x0) { ++ printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " ++ "fixing up to 0x10. (tell your hw vendor)\n", ++ m->mpc_apicid); ++ ver = 0x10; ++ } ++ apic_version[m->mpc_apicid] = ver; ++ ++ phys_cpu = apicid_to_cpu_present(apicid); ++ physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); ++ ++ if (num_processors >= NR_CPUS) { ++ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." ++ " Processor ignored.\n", NR_CPUS); ++ return; ++ } ++ ++ if (num_processors >= maxcpus) { ++ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." ++ " Processor ignored.\n", maxcpus); ++ return; ++ } ++ ++ cpu_set(num_processors, cpu_possible_map); ++ num_processors++; ++ ++ /* ++ * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y ++ * but we need to work other dependencies like SMP_SUSPEND etc ++ * before this can be done without some confusion. ++ * if (CPU_HOTPLUG_ENABLED || num_processors > 8) ++ * - Ashok Raj ++ */ ++ if (num_processors > 8) { ++ switch (boot_cpu_data.x86_vendor) { ++ case X86_VENDOR_INTEL: ++ if (!APIC_XAPIC(ver)) { ++ def_to_bigsmp = 0; ++ break; ++ } ++ /* If P4 and above fall through */ ++ case X86_VENDOR_AMD: ++ def_to_bigsmp = 1; ++ } ++ } ++ bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; ++} ++#else ++void __init MP_processor_info (struct mpc_config_processor *m) ++{ ++ num_processors++; ++} ++#endif /* CONFIG_XEN */ ++ ++static void __init MP_bus_info (struct mpc_config_bus *m) ++{ ++ char str[7]; ++ ++ memcpy(str, m->mpc_bustype, 6); ++ str[6] = 0; ++ ++ mpc_oem_bus_info(m, str, translation_table[mpc_record]); ++ ++ if (m->mpc_busid >= MAX_MP_BUSSES) { ++ printk(KERN_WARNING "MP table busid value (%d) for bustype %s " ++ " is too large, max. supported is %d\n", ++ m->mpc_busid, str, MAX_MP_BUSSES - 1); ++ return; ++ } ++ ++ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; ++ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; ++ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { ++ mpc_oem_pci_bus(m, translation_table[mpc_record]); ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; ++ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; ++ mp_current_pci_id++; ++ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; ++ } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; ++ } else { ++ printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); ++ } ++} ++ ++static void __init MP_ioapic_info (struct mpc_config_ioapic *m) ++{ ++ if (!(m->mpc_flags & MPC_APIC_USABLE)) ++ return; ++ ++ printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", ++ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", ++ MAX_IO_APICS, nr_ioapics); ++ panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); ++ } ++ if (!m->mpc_apicaddr) { ++ printk(KERN_ERR "WARNING: bogus zero I/O APIC address" ++ " found in MP table, skipping!\n"); ++ return; ++ } ++ mp_ioapics[nr_ioapics] = *m; ++ nr_ioapics++; ++} ++ ++static void __init MP_intsrc_info (struct mpc_config_intsrc *m) ++{ ++ mp_irqs [mp_irq_entries] = *m; ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d," ++ " IRQ %02x, APIC ID %x, APIC INT %02x\n", ++ m->mpc_irqtype, m->mpc_irqflag & 3, ++ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, ++ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!!\n"); ++} ++ ++static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) ++{ ++ Dprintk("Lint: type %d, pol %d, trig %d, bus %d," ++ " IRQ %02x, APIC ID %x, APIC LINT %02x\n", ++ m->mpc_irqtype, m->mpc_irqflag & 3, ++ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, ++ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); ++ /* ++ * Well it seems all SMP boards in existence ++ * use ExtINT/LVT1 == LINT0 and ++ * NMI/LVT2 == LINT1 - the following check ++ * will show us if this assumptions is false. ++ * Until then we do not have to add baggage. ++ */ ++ if ((m->mpc_irqtype == mp_ExtINT) && ++ (m->mpc_destapiclint != 0)) ++ BUG(); ++ if ((m->mpc_irqtype == mp_NMI) && ++ (m->mpc_destapiclint != 1)) ++ BUG(); ++} ++ ++#ifdef CONFIG_X86_NUMAQ ++static void __init MP_translation_info (struct mpc_config_translation *m) ++{ ++ printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); ++ ++ if (mpc_record >= MAX_MPC_ENTRY) ++ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); ++ else ++ translation_table[mpc_record] = m; /* stash this for later */ ++ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) ++ node_set_online(m->trans_quad); ++} ++ ++/* ++ * Read/parse the MPC oem tables ++ */ ++ ++static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ ++ unsigned short oemsize) ++{ ++ int count = sizeof (*oemtable); /* the header size */ ++ unsigned char *oemptr = ((unsigned char *)oemtable)+count; ++ ++ mpc_record = 0; ++ printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); ++ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) ++ { ++ printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", ++ oemtable->oem_signature[0], ++ oemtable->oem_signature[1], ++ oemtable->oem_signature[2], ++ oemtable->oem_signature[3]); ++ return; ++ } ++ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) ++ { ++ printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); ++ return; ++ } ++ while (count < oemtable->oem_length) { ++ switch (*oemptr) { ++ case MP_TRANSLATION: ++ { ++ struct mpc_config_translation *m= ++ (struct mpc_config_translation *)oemptr; ++ MP_translation_info(m); ++ oemptr += sizeof(*m); ++ count += sizeof(*m); ++ ++mpc_record; ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr); ++ return; ++ } ++ } ++ } ++} ++ ++static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, ++ char *productid) ++{ ++ if (strncmp(oem, "IBM NUMA", 8)) ++ printk("Warning! May not be a NUMA-Q system!\n"); ++ if (mpc->mpc_oemptr) ++ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, ++ mpc->mpc_oemsize); ++} ++#endif /* CONFIG_X86_NUMAQ */ ++ ++/* ++ * Read/parse the MPC ++ */ ++ ++static int __init smp_read_mpc(struct mp_config_table *mpc) ++{ ++ char str[16]; ++ char oem[10]; ++ int count=sizeof(*mpc); ++ unsigned char *mpt=((unsigned char *)mpc)+count; ++ ++ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { ++ printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", ++ *(u32 *)mpc->mpc_signature); ++ return 0; ++ } ++ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { ++ printk(KERN_ERR "SMP mptable: checksum error!\n"); ++ return 0; ++ } ++ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { ++ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", ++ mpc->mpc_spec); ++ return 0; ++ } ++ if (!mpc->mpc_lapic) { ++ printk(KERN_ERR "SMP mptable: null local APIC address!\n"); ++ return 0; ++ } ++ memcpy(oem,mpc->mpc_oem,8); ++ oem[8]=0; ++ printk(KERN_INFO "OEM ID: %s ",oem); ++ ++ memcpy(str,mpc->mpc_productid,12); ++ str[12]=0; ++ printk("Product ID: %s ",str); ++ ++ mps_oem_check(mpc, oem, str); ++ ++ printk("APIC at: 0x%lX\n",mpc->mpc_lapic); ++ ++ /* ++ * Save the local APIC address (it might be non-default) -- but only ++ * if we're not using ACPI. ++ */ ++ if (!acpi_lapic) ++ mp_lapic_addr = mpc->mpc_lapic; ++ ++ /* ++ * Now process the configuration blocks. ++ */ ++ mpc_record = 0; ++ while (count < mpc->mpc_length) { ++ switch(*mpt) { ++ case MP_PROCESSOR: ++ { ++ struct mpc_config_processor *m= ++ (struct mpc_config_processor *)mpt; ++ /* ACPI may have already provided this data */ ++ if (!acpi_lapic) ++ MP_processor_info(m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); ++ break; ++ } ++ case MP_BUS: ++ { ++ struct mpc_config_bus *m= ++ (struct mpc_config_bus *)mpt; ++ MP_bus_info(m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); ++ break; ++ } ++ case MP_IOAPIC: ++ { ++ struct mpc_config_ioapic *m= ++ (struct mpc_config_ioapic *)mpt; ++ MP_ioapic_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ case MP_INTSRC: ++ { ++ struct mpc_config_intsrc *m= ++ (struct mpc_config_intsrc *)mpt; ++ ++ MP_intsrc_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ case MP_LINTSRC: ++ { ++ struct mpc_config_lintsrc *m= ++ (struct mpc_config_lintsrc *)mpt; ++ MP_lintsrc_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ default: ++ { ++ count = mpc->mpc_length; ++ break; ++ } ++ } ++ ++mpc_record; ++ } ++ clustered_apic_check(); ++ if (!num_processors) ++ printk(KERN_ERR "SMP mptable: no processors registered!\n"); ++ return num_processors; ++} ++ ++static int __init ELCR_trigger(unsigned int irq) ++{ ++ unsigned int port; ++ ++ port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++} ++ ++static void __init construct_default_ioirq_mptable(int mpc_default_type) ++{ ++ struct mpc_config_intsrc intsrc; ++ int i; ++ int ELCR_fallback = 0; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqflag = 0; /* conforming */ ++ intsrc.mpc_srcbus = 0; ++ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; ++ ++ intsrc.mpc_irqtype = mp_INT; ++ ++ /* ++ * If true, we have an ISA/PCI system with no IRQ entries ++ * in the MP table. To prevent the PCI interrupts from being set up ++ * incorrectly, we try to use the ELCR. The sanity check to see if ++ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can ++ * never be level sensitive, so we simply see if the ELCR agrees. ++ * If it does, we assume it's valid. ++ */ ++ if (mpc_default_type == 5) { ++ printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); ++ ++ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) ++ printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n"); ++ else { ++ printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); ++ ELCR_fallback = 1; ++ } ++ } ++ ++ for (i = 0; i < 16; i++) { ++ switch (mpc_default_type) { ++ case 2: ++ if (i == 0 || i == 13) ++ continue; /* IRQ0 & IRQ13 not connected */ ++ /* fall through */ ++ default: ++ if (i == 2) ++ continue; /* IRQ2 is never connected */ ++ } ++ ++ if (ELCR_fallback) { ++ /* ++ * If the ELCR indicates a level-sensitive interrupt, we ++ * copy that information over to the MP table in the ++ * irqflag field (level sensitive, active high polarity). ++ */ ++ if (ELCR_trigger(i)) ++ intsrc.mpc_irqflag = 13; ++ else ++ intsrc.mpc_irqflag = 0; ++ } ++ ++ intsrc.mpc_srcbusirq = i; ++ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ ++ MP_intsrc_info(&intsrc); ++ } ++ ++ intsrc.mpc_irqtype = mp_ExtINT; ++ intsrc.mpc_srcbusirq = 0; ++ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ ++ MP_intsrc_info(&intsrc); ++} ++ ++static inline void __init construct_default_ISA_mptable(int mpc_default_type) ++{ ++ struct mpc_config_processor processor; ++ struct mpc_config_bus bus; ++ struct mpc_config_ioapic ioapic; ++ struct mpc_config_lintsrc lintsrc; ++ int linttypes[2] = { mp_ExtINT, mp_NMI }; ++ int i; ++ ++ /* ++ * local APIC has default address ++ */ ++ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; ++ ++ /* ++ * 2 CPUs, numbered 0 & 1. ++ */ ++ processor.mpc_type = MP_PROCESSOR; ++ /* Either an integrated APIC or a discrete 82489DX. */ ++ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ processor.mpc_cpuflag = CPU_ENABLED; ++ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | ++ (boot_cpu_data.x86_model << 4) | ++ boot_cpu_data.x86_mask; ++ processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_reserved[0] = 0; ++ processor.mpc_reserved[1] = 0; ++ for (i = 0; i < 2; i++) { ++ processor.mpc_apicid = i; ++ MP_processor_info(&processor); ++ } ++ ++ bus.mpc_type = MP_BUS; ++ bus.mpc_busid = 0; ++ switch (mpc_default_type) { ++ default: ++ printk("???\n"); ++ printk(KERN_ERR "Unknown standard configuration %d\n", ++ mpc_default_type); ++ /* fall through */ ++ case 1: ++ case 5: ++ memcpy(bus.mpc_bustype, "ISA ", 6); ++ break; ++ case 2: ++ case 6: ++ case 3: ++ memcpy(bus.mpc_bustype, "EISA ", 6); ++ break; ++ case 4: ++ case 7: ++ memcpy(bus.mpc_bustype, "MCA ", 6); ++ } ++ MP_bus_info(&bus); ++ if (mpc_default_type > 4) { ++ bus.mpc_busid = 1; ++ memcpy(bus.mpc_bustype, "PCI ", 6); ++ MP_bus_info(&bus); ++ } ++ ++ ioapic.mpc_type = MP_IOAPIC; ++ ioapic.mpc_apicid = 2; ++ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ ioapic.mpc_flags = MPC_APIC_USABLE; ++ ioapic.mpc_apicaddr = 0xFEC00000; ++ MP_ioapic_info(&ioapic); ++ ++ /* ++ * We set up most of the low 16 IO-APIC pins according to MPS rules. ++ */ ++ construct_default_ioirq_mptable(mpc_default_type); ++ ++ lintsrc.mpc_type = MP_LINTSRC; ++ lintsrc.mpc_irqflag = 0; /* conforming */ ++ lintsrc.mpc_srcbusid = 0; ++ lintsrc.mpc_srcbusirq = 0; ++ lintsrc.mpc_destapic = MP_APIC_ALL; ++ for (i = 0; i < 2; i++) { ++ lintsrc.mpc_irqtype = linttypes[i]; ++ lintsrc.mpc_destapiclint = i; ++ MP_lintsrc_info(&lintsrc); ++ } ++} ++ ++static struct intel_mp_floating *mpf_found; ++ ++/* ++ * Scan the memory blocks for an SMP configuration block. ++ */ ++void __init get_smp_config (void) ++{ ++ struct intel_mp_floating *mpf = mpf_found; ++ ++ /* ++ * ACPI supports both logical (e.g. Hyper-Threading) and physical ++ * processors, where MPS only supports physical. ++ */ ++ if (acpi_lapic && acpi_ioapic) { ++ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); ++ return; ++ } ++ else if (acpi_lapic) ++ printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); ++ ++ printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); ++ if (mpf->mpf_feature2 & (1<<7)) { ++ printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); ++ pic_mode = 1; ++ } else { ++ printk(KERN_INFO " Virtual Wire compatibility mode.\n"); ++ pic_mode = 0; ++ } ++ ++ /* ++ * Now see if we need to read further. ++ */ ++ if (mpf->mpf_feature1 != 0) { ++ ++ printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); ++ construct_default_ISA_mptable(mpf->mpf_feature1); ++ ++ } else if (mpf->mpf_physptr) { ++ ++ /* ++ * Read the physical hardware table. Anything here will ++ * override the defaults. ++ */ ++ if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) { ++ smp_found_config = 0; ++ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); ++ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); ++ return; ++ } ++ /* ++ * If there are no explicit MP IRQ entries, then we are ++ * broken. We set up most of the low 16 IO-APIC pins to ++ * ISA defaults and hope it will work. ++ */ ++ if (!mp_irq_entries) { ++ struct mpc_config_bus bus; ++ ++ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); ++ ++ bus.mpc_type = MP_BUS; ++ bus.mpc_busid = 0; ++ memcpy(bus.mpc_bustype, "ISA ", 6); ++ MP_bus_info(&bus); ++ ++ construct_default_ioirq_mptable(0); ++ } ++ ++ } else ++ BUG(); ++ ++ printk(KERN_INFO "Processors: %d\n", num_processors); ++ /* ++ * Only use the first configuration found. ++ */ ++} ++ ++static int __init smp_scan_config (unsigned long base, unsigned long length) ++{ ++ unsigned long *bp = isa_bus_to_virt(base); ++ struct intel_mp_floating *mpf; ++ ++ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); ++ if (sizeof(*mpf) != 16) ++ printk("Error: MPF size\n"); ++ ++ while (length > 0) { ++ mpf = (struct intel_mp_floating *)bp; ++ if ((*bp == SMP_MAGIC_IDENT) && ++ (mpf->mpf_length == 1) && ++ !mpf_checksum((unsigned char *)bp, 16) && ++ ((mpf->mpf_specification == 1) ++ || (mpf->mpf_specification == 4)) ) { ++ ++ smp_found_config = 1; ++#ifndef CONFIG_XEN ++ printk(KERN_INFO "found SMP MP-table at %08lx\n", ++ virt_to_phys(mpf)); ++ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); ++ if (mpf->mpf_physptr) { ++ /* ++ * We cannot access to MPC table to compute ++ * table size yet, as only few megabytes from ++ * the bottom is mapped now. ++ * PC-9800's MPC table places on the very last ++ * of physical memory; so that simply reserving ++ * PAGE_SIZE from mpg->mpf_physptr yields BUG() ++ * in reserve_bootmem. ++ */ ++ unsigned long size = PAGE_SIZE; ++ unsigned long end = max_low_pfn * PAGE_SIZE; ++ if (mpf->mpf_physptr + size > end) ++ size = end - mpf->mpf_physptr; ++ reserve_bootmem(mpf->mpf_physptr, size); ++ } ++#else ++ printk(KERN_INFO "found SMP MP-table at %08lx\n", ++ ((unsigned long)bp - (unsigned long)isa_bus_to_virt(base)) + base); ++#endif ++ ++ mpf_found = mpf; ++ return 1; ++ } ++ bp += 4; ++ length -= 16; ++ } ++ return 0; ++} ++ ++void __init find_smp_config (void) ++{ ++#ifndef CONFIG_XEN ++ unsigned int address; ++#endif ++ ++ /* ++ * FIXME: Linux assumes you have 640K of base ram.. ++ * this continues the error... ++ * ++ * 1) Scan the bottom 1K for a signature ++ * 2) Scan the top 1K of base RAM ++ * 3) Scan the 64K of bios ++ */ ++ if (smp_scan_config(0x0,0x400) || ++ smp_scan_config(639*0x400,0x400) || ++ smp_scan_config(0xF0000,0x10000)) ++ return; ++ /* ++ * If it is an SMP machine we should know now, unless the ++ * configuration is in an EISA/MCA bus machine with an ++ * extended bios data area. ++ * ++ * there is a real-mode segmented pointer pointing to the ++ * 4K EBDA area at 0x40E, calculate and scan it here. ++ * ++ * NOTE! There are Linux loaders that will corrupt the EBDA ++ * area, and as such this kind of SMP config may be less ++ * trustworthy, simply because the SMP table may have been ++ * stomped on during early boot. These loaders are buggy and ++ * should be fixed. ++ * ++ * MP1.4 SPEC states to only scan first 1K of 4K EBDA. ++ */ ++ ++#ifndef CONFIG_XEN ++ address = get_bios_ebda(); ++ if (address) ++ smp_scan_config(address, 0x400); ++#endif ++} ++ ++int es7000_plat; ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based MP Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++void __init mp_register_lapic_address ( ++ u64 address) ++{ ++#ifndef CONFIG_XEN ++ mp_lapic_addr = (unsigned long) address; ++ ++ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); ++ ++ if (boot_cpu_physical_apicid == -1U) ++ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); ++ ++ Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); ++#endif ++} ++ ++ ++void __devinit mp_register_lapic ( ++ u8 id, ++ u8 enabled) ++{ ++ struct mpc_config_processor processor; ++ int boot_cpu = 0; ++ ++ if (MAX_APICS - id <= 0) { ++ printk(KERN_WARNING "Processor #%d invalid (max %d)\n", ++ id, MAX_APICS); ++ return; ++ } ++ ++ if (id == boot_cpu_physical_apicid) ++ boot_cpu = 1; ++ ++#ifndef CONFIG_XEN ++ processor.mpc_type = MP_PROCESSOR; ++ processor.mpc_apicid = id; ++ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); ++ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); ++ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); ++ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | ++ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; ++ processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_reserved[0] = 0; ++ processor.mpc_reserved[1] = 0; ++#endif ++ ++ MP_processor_info(&processor); ++} ++ ++#ifdef CONFIG_X86_IO_APIC ++ ++#define MP_ISA_BUS 0 ++#define MP_MAX_IOAPIC_PIN 127 ++ ++static struct mp_ioapic_routing { ++ int apic_id; ++ int gsi_base; ++ int gsi_end; ++ u32 pin_programmed[4]; ++} mp_ioapic_routing[MAX_IO_APICS]; ++ ++ ++static int mp_find_ioapic ( ++ int gsi) ++{ ++ int i = 0; ++ ++ /* Find the IOAPIC that manages this GSI. */ ++ for (i = 0; i < nr_ioapics; i++) { ++ if ((gsi >= mp_ioapic_routing[i].gsi_base) ++ && (gsi <= mp_ioapic_routing[i].gsi_end)) ++ return i; ++ } ++ ++ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); ++ ++ return -1; ++} ++ ++ ++void __init mp_register_ioapic ( ++ u8 id, ++ u32 address, ++ u32 gsi_base) ++{ ++ int idx = 0; ++ int tmpid; ++ ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " ++ "(found %d)\n", MAX_IO_APICS, nr_ioapics); ++ panic("Recompile kernel with bigger MAX_IO_APICS!\n"); ++ } ++ if (!address) { ++ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" ++ " found in MADT table, skipping!\n"); ++ return; ++ } ++ ++ idx = nr_ioapics++; ++ ++ mp_ioapics[idx].mpc_type = MP_IOAPIC; ++ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; ++ mp_ioapics[idx].mpc_apicaddr = address; ++ ++#ifndef CONFIG_XEN ++ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); ++#endif ++ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ tmpid = io_apic_get_unique_id(idx, id); ++ else ++ tmpid = id; ++ if (tmpid == -1) { ++ nr_ioapics--; ++ return; ++ } ++ mp_ioapics[idx].mpc_apicid = tmpid; ++ mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); ++ ++ /* ++ * Build basic GSI lookup table to facilitate gsi->io_apic lookups ++ * and to prevent reprogramming of IOAPIC pins (PCI GSIs). ++ */ ++ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; ++ mp_ioapic_routing[idx].gsi_base = gsi_base; ++ mp_ioapic_routing[idx].gsi_end = gsi_base + ++ io_apic_get_redir_entries(idx); ++ ++ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " ++ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, ++ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, ++ mp_ioapic_routing[idx].gsi_base, ++ mp_ioapic_routing[idx].gsi_end); ++ ++ return; ++} ++ ++ ++void __init mp_override_legacy_irq ( ++ u8 bus_irq, ++ u8 polarity, ++ u8 trigger, ++ u32 gsi) ++{ ++ struct mpc_config_intsrc intsrc; ++ int ioapic = -1; ++ int pin = -1; ++ ++ /* ++ * Convert 'gsi' to 'ioapic.pin'. ++ */ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) ++ return; ++ pin = gsi - mp_ioapic_routing[ioapic].gsi_base; ++ ++ /* ++ * TBD: This check is for faulty timer entries, where the override ++ * erroneously sets the trigger to level, resulting in a HUGE ++ * increase of timer interrupts! ++ */ ++ if ((bus_irq == 0) && (trigger == 3)) ++ trigger = 1; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqtype = mp_INT; ++ intsrc.mpc_irqflag = (trigger << 2) | polarity; ++ intsrc.mpc_srcbus = MP_ISA_BUS; ++ intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ ++ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ ++ intsrc.mpc_dstirq = pin; /* INTIN# */ ++ ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", ++ intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, ++ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, ++ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); ++ ++ mp_irqs[mp_irq_entries] = intsrc; ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!\n"); ++ ++ return; ++} ++ ++void __init mp_config_acpi_legacy_irqs (void) ++{ ++ struct mpc_config_intsrc intsrc; ++ int i = 0; ++ int ioapic = -1; ++ ++ /* ++ * Fabricate the legacy ISA bus (bus #31). ++ */ ++ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; ++ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); ++ ++ /* ++ * Older generations of ES7000 have no legacy identity mappings ++ */ ++ if (es7000_plat == 1) ++ return; ++ ++ /* ++ * Locate the IOAPIC that manages the ISA IRQs (0-15). ++ */ ++ ioapic = mp_find_ioapic(0); ++ if (ioapic < 0) ++ return; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqflag = 0; /* Conforming */ ++ intsrc.mpc_srcbus = MP_ISA_BUS; ++ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; ++ ++ /* ++ * Use the default configuration for the IRQs 0-15. Unless ++ * overriden by (MADT) interrupt source override entries. ++ */ ++ for (i = 0; i < 16; i++) { ++ int idx; ++ ++ for (idx = 0; idx < mp_irq_entries; idx++) { ++ struct mpc_config_intsrc *irq = mp_irqs + idx; ++ ++ /* Do we already have a mapping for this ISA IRQ? */ ++ if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) ++ break; ++ ++ /* Do we already have a mapping for this IOAPIC pin */ ++ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && ++ (irq->mpc_dstirq == i)) ++ break; ++ } ++ ++ if (idx != mp_irq_entries) { ++ printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); ++ continue; /* IRQ already used */ ++ } ++ ++ intsrc.mpc_irqtype = mp_INT; ++ intsrc.mpc_srcbusirq = i; /* Identity mapped */ ++ intsrc.mpc_dstirq = i; ++ ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " ++ "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, ++ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, ++ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, ++ intsrc.mpc_dstirq); ++ ++ mp_irqs[mp_irq_entries] = intsrc; ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!\n"); ++ } ++} ++ ++#define MAX_GSI_NUM 4096 ++ ++int mp_register_gsi (u32 gsi, int triggering, int polarity) ++{ ++ int ioapic = -1; ++ int ioapic_pin = 0; ++ int idx, bit = 0; ++ static int pci_irq = 16; ++ /* ++ * Mapping between Global System Interrups, which ++ * represent all possible interrupts, and IRQs ++ * assigned to actual devices. ++ */ ++ static int gsi_to_irq[MAX_GSI_NUM]; ++ ++ /* Don't set up the ACPI SCI because it's already set up */ ++ if (acpi_fadt.sci_int == gsi) ++ return gsi; ++ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) { ++ printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); ++ return gsi; ++ } ++ ++ ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; ++ ++ if (ioapic_renumber_irq) ++ gsi = ioapic_renumber_irq(ioapic, gsi); ++ ++ /* ++ * Avoid pin reprogramming. PRTs typically include entries ++ * with redundant pin->gsi mappings (but unique PCI devices); ++ * we only program the IOAPIC on the first. ++ */ ++ bit = ioapic_pin % 32; ++ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); ++ if (idx > 3) { ++ printk(KERN_ERR "Invalid reference to IOAPIC pin " ++ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, ++ ioapic_pin); ++ return gsi; ++ } ++ if ((1< 15), but ++ * avoid a problem where the 8254 timer (IRQ0) is setup ++ * via an override (so it's not on pin 0 of the ioapic), ++ * and at the same time, the pin 0 interrupt is a PCI ++ * type. The gsi > 15 test could cause these two pins ++ * to be shared as IRQ0, and they are not shareable. ++ * So test for this condition, and if necessary, avoid ++ * the pin collision. ++ */ ++ if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) ++ gsi = pci_irq++; ++ /* ++ * Don't assign IRQ used by ACPI SCI ++ */ ++ if (gsi == acpi_fadt.sci_int) ++ gsi = pci_irq++; ++ gsi_to_irq[irq] = gsi; ++ } else { ++ printk(KERN_ERR "GSI %u is too high\n", gsi); ++ return gsi; ++ } ++ } ++ ++ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, ++ triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, ++ polarity == ACPI_ACTIVE_HIGH ? 0 : 1); ++ return gsi; ++} ++ ++#endif /* CONFIG_X86_IO_APIC */ ++#endif /* CONFIG_ACPI */ +Index: head-2008-11-25/arch/x86/kernel/pci-dma-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/pci-dma-xen.c 2008-10-29 09:55:56.000000000 +0100 +@@ -0,0 +1,409 @@ ++/* ++ * Dynamic DMA mapping support. ++ * ++ * On i386 there is no hardware dynamic DMA address translation, ++ * so consistent alloc/free are merely page allocation/freeing. ++ * The rest of the dynamic DMA mapping interface is implemented ++ * in asm/pci.h. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __x86_64__ ++#include ++ ++int iommu_merge __read_mostly = 0; ++EXPORT_SYMBOL(iommu_merge); ++ ++dma_addr_t bad_dma_address __read_mostly; ++EXPORT_SYMBOL(bad_dma_address); ++ ++/* This tells the BIO block layer to assume merging. Default to off ++ because we cannot guarantee merging later. */ ++int iommu_bio_merge __read_mostly = 0; ++EXPORT_SYMBOL(iommu_bio_merge); ++ ++int force_iommu __read_mostly= 0; ++ ++__init int iommu_setup(char *p) ++{ ++ return 1; ++} ++ ++void __init pci_iommu_alloc(void) ++{ ++#ifdef CONFIG_SWIOTLB ++ pci_swiotlb_init(); ++#endif ++} ++ ++static int __init pci_iommu_init(void) ++{ ++ no_iommu_init(); ++ return 0; ++} ++ ++/* Must execute after PCI subsystem */ ++fs_initcall(pci_iommu_init); ++#endif ++ ++struct dma_coherent_mem { ++ void *virt_base; ++ u32 device_base; ++ int size; ++ int flags; ++ unsigned long *bitmap; ++}; ++ ++#define IOMMU_BUG_ON(test) \ ++do { \ ++ if (unlikely(test)) { \ ++ printk(KERN_ALERT "Fatal DMA error! " \ ++ "Please use 'swiotlb=force'\n"); \ ++ BUG(); \ ++ } \ ++} while (0) ++ ++static int check_pages_physically_contiguous(unsigned long pfn, ++ unsigned int offset, ++ size_t length) ++{ ++ unsigned long next_mfn; ++ int i; ++ int nr_pages; ++ ++ next_mfn = pfn_to_mfn(pfn); ++ nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; ++ ++ for (i = 1; i < nr_pages; i++) { ++ if (pfn_to_mfn(++pfn) != ++next_mfn) ++ return 0; ++ } ++ return 1; ++} ++ ++int range_straddles_page_boundary(paddr_t p, size_t size) ++{ ++ unsigned long pfn = p >> PAGE_SHIFT; ++ unsigned int offset = p & ~PAGE_MASK; ++ ++ return ((offset + size > PAGE_SIZE) && ++ !check_pages_physically_contiguous(pfn, offset, size)); ++} ++ ++int ++dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, ++ enum dma_data_direction direction) ++{ ++ int i, rc; ++ ++ if (direction == DMA_NONE) ++ BUG(); ++ WARN_ON(nents == 0 || sg[0].length == 0); ++ ++ if (swiotlb) { ++ rc = swiotlb_map_sg(hwdev, sg, nents, direction); ++ } else { ++ for (i = 0; i < nents; i++ ) { ++ BUG_ON(!sg[i].page); ++ sg[i].dma_address = ++ gnttab_dma_map_page(sg[i].page) + sg[i].offset; ++ sg[i].dma_length = sg[i].length; ++ IOMMU_BUG_ON(address_needs_mapping( ++ hwdev, sg[i].dma_address)); ++ IOMMU_BUG_ON(range_straddles_page_boundary( ++ page_to_pseudophys(sg[i].page) + sg[i].offset, ++ sg[i].length)); ++ } ++ rc = nents; ++ } ++ ++ flush_write_buffers(); ++ return rc; ++} ++EXPORT_SYMBOL(dma_map_sg); ++ ++void ++dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, ++ enum dma_data_direction direction) ++{ ++ int i; ++ ++ BUG_ON(direction == DMA_NONE); ++ if (swiotlb) ++ swiotlb_unmap_sg(hwdev, sg, nents, direction); ++ else { ++ for (i = 0; i < nents; i++ ) ++ gnttab_dma_unmap_page(sg[i].dma_address); ++ } ++} ++EXPORT_SYMBOL(dma_unmap_sg); ++ ++#ifdef CONFIG_HIGHMEM ++dma_addr_t ++dma_map_page(struct device *dev, struct page *page, unsigned long offset, ++ size_t size, enum dma_data_direction direction) ++{ ++ dma_addr_t dma_addr; ++ ++ BUG_ON(direction == DMA_NONE); ++ ++ if (swiotlb) { ++ dma_addr = swiotlb_map_page( ++ dev, page, offset, size, direction); ++ } else { ++ dma_addr = gnttab_dma_map_page(page) + offset; ++ IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); ++ } ++ ++ return dma_addr; ++} ++EXPORT_SYMBOL(dma_map_page); ++ ++void ++dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, ++ enum dma_data_direction direction) ++{ ++ BUG_ON(direction == DMA_NONE); ++ if (swiotlb) ++ swiotlb_unmap_page(dev, dma_address, size, direction); ++ else ++ gnttab_dma_unmap_page(dma_address); ++} ++EXPORT_SYMBOL(dma_unmap_page); ++#endif /* CONFIG_HIGHMEM */ ++ ++int ++dma_mapping_error(dma_addr_t dma_addr) ++{ ++ if (swiotlb) ++ return swiotlb_dma_mapping_error(dma_addr); ++ return 0; ++} ++EXPORT_SYMBOL(dma_mapping_error); ++ ++int ++dma_supported(struct device *dev, u64 mask) ++{ ++ if (swiotlb) ++ return swiotlb_dma_supported(dev, mask); ++ /* ++ * By default we'll BUG when an infeasible DMA is requested, and ++ * request swiotlb=force (see IOMMU_BUG_ON). ++ */ ++ return 1; ++} ++EXPORT_SYMBOL(dma_supported); ++ ++void *dma_alloc_coherent(struct device *dev, size_t size, ++ dma_addr_t *dma_handle, gfp_t gfp) ++{ ++ void *ret; ++ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; ++ unsigned int order = get_order(size); ++ unsigned long vstart; ++ u64 mask; ++ ++ /* ignore region specifiers */ ++ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); ++ ++ if (mem) { ++ int page = bitmap_find_free_region(mem->bitmap, mem->size, ++ order); ++ if (page >= 0) { ++ *dma_handle = mem->device_base + (page << PAGE_SHIFT); ++ ret = mem->virt_base + (page << PAGE_SHIFT); ++ memset(ret, 0, size); ++ return ret; ++ } ++ if (mem->flags & DMA_MEMORY_EXCLUSIVE) ++ return NULL; ++ } ++ ++ if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) ++ gfp |= GFP_DMA; ++ ++ vstart = __get_free_pages(gfp, order); ++ ret = (void *)vstart; ++ ++ if (dev != NULL && dev->coherent_dma_mask) ++ mask = dev->coherent_dma_mask; ++ else ++ mask = 0xffffffff; ++ ++ if (ret != NULL) { ++ if (xen_create_contiguous_region(vstart, order, ++ fls64(mask)) != 0) { ++ free_pages(vstart, order); ++ return NULL; ++ } ++ memset(ret, 0, size); ++ *dma_handle = virt_to_bus(ret); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(dma_alloc_coherent); ++ ++void dma_free_coherent(struct device *dev, size_t size, ++ void *vaddr, dma_addr_t dma_handle) ++{ ++ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; ++ int order = get_order(size); ++ ++ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { ++ int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; ++ ++ bitmap_release_region(mem->bitmap, page, order); ++ } else { ++ xen_destroy_contiguous_region((unsigned long)vaddr, order); ++ free_pages((unsigned long)vaddr, order); ++ } ++} ++EXPORT_SYMBOL(dma_free_coherent); ++ ++#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY ++int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, ++ dma_addr_t device_addr, size_t size, int flags) ++{ ++ void __iomem *mem_base; ++ int pages = size >> PAGE_SHIFT; ++ int bitmap_size = (pages + 31)/32; ++ ++ if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) ++ goto out; ++ if (!size) ++ goto out; ++ if (dev->dma_mem) ++ goto out; ++ ++ /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ ++ ++ mem_base = ioremap(bus_addr, size); ++ if (!mem_base) ++ goto out; ++ ++ dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); ++ if (!dev->dma_mem) ++ goto out; ++ memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); ++ dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); ++ if (!dev->dma_mem->bitmap) ++ goto free1_out; ++ memset(dev->dma_mem->bitmap, 0, bitmap_size); ++ ++ dev->dma_mem->virt_base = mem_base; ++ dev->dma_mem->device_base = device_addr; ++ dev->dma_mem->size = pages; ++ dev->dma_mem->flags = flags; ++ ++ if (flags & DMA_MEMORY_MAP) ++ return DMA_MEMORY_MAP; ++ ++ return DMA_MEMORY_IO; ++ ++ free1_out: ++ kfree(dev->dma_mem->bitmap); ++ out: ++ return 0; ++} ++EXPORT_SYMBOL(dma_declare_coherent_memory); ++ ++void dma_release_declared_memory(struct device *dev) ++{ ++ struct dma_coherent_mem *mem = dev->dma_mem; ++ ++ if(!mem) ++ return; ++ dev->dma_mem = NULL; ++ iounmap(mem->virt_base); ++ kfree(mem->bitmap); ++ kfree(mem); ++} ++EXPORT_SYMBOL(dma_release_declared_memory); ++ ++void *dma_mark_declared_memory_occupied(struct device *dev, ++ dma_addr_t device_addr, size_t size) ++{ ++ struct dma_coherent_mem *mem = dev->dma_mem; ++ int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ int pos, err; ++ ++ if (!mem) ++ return ERR_PTR(-EINVAL); ++ ++ pos = (device_addr - mem->device_base) >> PAGE_SHIFT; ++ err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); ++ if (err != 0) ++ return ERR_PTR(err); ++ return mem->virt_base + (pos << PAGE_SHIFT); ++} ++EXPORT_SYMBOL(dma_mark_declared_memory_occupied); ++#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ ++ ++dma_addr_t ++dma_map_single(struct device *dev, void *ptr, size_t size, ++ enum dma_data_direction direction) ++{ ++ dma_addr_t dma; ++ ++ if (direction == DMA_NONE) ++ BUG(); ++ WARN_ON(size == 0); ++ ++ if (swiotlb) { ++ dma = swiotlb_map_single(dev, ptr, size, direction); ++ } else { ++ dma = gnttab_dma_map_page(virt_to_page(ptr)) + ++ offset_in_page(ptr); ++ IOMMU_BUG_ON(range_straddles_page_boundary(__pa(ptr), size)); ++ IOMMU_BUG_ON(address_needs_mapping(dev, dma)); ++ } ++ ++ flush_write_buffers(); ++ return dma; ++} ++EXPORT_SYMBOL(dma_map_single); ++ ++void ++dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (direction == DMA_NONE) ++ BUG(); ++ if (swiotlb) ++ swiotlb_unmap_single(dev, dma_addr, size, direction); ++ else ++ gnttab_dma_unmap_page(dma_addr); ++} ++EXPORT_SYMBOL(dma_unmap_single); ++ ++void ++dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (swiotlb) ++ swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction); ++} ++EXPORT_SYMBOL(dma_sync_single_for_cpu); ++ ++void ++dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (swiotlb) ++ swiotlb_sync_single_for_device(dev, dma_handle, size, direction); ++} ++EXPORT_SYMBOL(dma_sync_single_for_device); +Index: head-2008-11-25/arch/x86/kernel/process_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/process_32-xen.c 2008-07-21 11:00:32.000000000 +0200 +@@ -0,0 +1,877 @@ ++/* ++ * linux/arch/i386/kernel/process.c ++ * ++ * Copyright (C) 1995 Linus Torvalds ++ * ++ * Pentium III FXSR, SSE support ++ * Gareth Hughes , May 2000 ++ */ ++ ++/* ++ * This file handles the architecture-dependent parts of process handling.. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_MATH_EMULATION ++#include ++#endif ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); ++ ++static int hlt_counter; ++ ++unsigned long boot_option_idle_override = 0; ++EXPORT_SYMBOL(boot_option_idle_override); ++ ++/* ++ * Return saved PC of a blocked thread. ++ */ ++unsigned long thread_saved_pc(struct task_struct *tsk) ++{ ++ return ((unsigned long *)tsk->thread.esp)[3]; ++} ++ ++/* ++ * Powermanagement idle function, if any.. ++ */ ++void (*pm_idle)(void); ++EXPORT_SYMBOL(pm_idle); ++static DEFINE_PER_CPU(unsigned int, cpu_idle_state); ++ ++void disable_hlt(void) ++{ ++ hlt_counter++; ++} ++ ++EXPORT_SYMBOL(disable_hlt); ++ ++void enable_hlt(void) ++{ ++ hlt_counter--; ++} ++ ++EXPORT_SYMBOL(enable_hlt); ++ ++/* ++ * On SMP it's slightly faster (but much more power-consuming!) ++ * to poll the ->work.need_resched flag instead of waiting for the ++ * cross-CPU IPI to arrive. Use this option with caution. ++ */ ++static void poll_idle (void) ++{ ++ local_irq_enable(); ++ ++ asm volatile( ++ "2:" ++ "testl %0, %1;" ++ "rep; nop;" ++ "je 2b;" ++ : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); ++} ++ ++static void xen_idle(void) ++{ ++ local_irq_disable(); ++ ++ if (need_resched()) ++ local_irq_enable(); ++ else { ++ current_thread_info()->status &= ~TS_POLLING; ++ smp_mb__after_clear_bit(); ++ safe_halt(); ++ current_thread_info()->status |= TS_POLLING; ++ } ++} ++#ifdef CONFIG_APM_MODULE ++EXPORT_SYMBOL(default_idle); ++#endif ++ ++#ifdef CONFIG_HOTPLUG_CPU ++extern cpumask_t cpu_initialized; ++static inline void play_dead(void) ++{ ++ idle_task_exit(); ++ local_irq_disable(); ++ cpu_clear(smp_processor_id(), cpu_initialized); ++ preempt_enable_no_resched(); ++ VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); ++ cpu_bringup(); ++} ++#else ++static inline void play_dead(void) ++{ ++ BUG(); ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++/* ++ * The idle thread. There's no useful work to be ++ * done, so just try to conserve power and have a ++ * low exit latency (ie sit in a loop waiting for ++ * somebody to say that they'd like to reschedule) ++ */ ++void cpu_idle(void) ++{ ++ int cpu = smp_processor_id(); ++ ++ current_thread_info()->status |= TS_POLLING; ++ ++ /* endless idle loop with no priority at all */ ++ while (1) { ++ while (!need_resched()) { ++ void (*idle)(void); ++ ++ if (__get_cpu_var(cpu_idle_state)) ++ __get_cpu_var(cpu_idle_state) = 0; ++ ++ rmb(); ++ idle = xen_idle; /* no alternatives */ ++ ++ if (cpu_is_offline(cpu)) ++ play_dead(); ++ ++ __get_cpu_var(irq_stat).idle_timestamp = jiffies; ++ idle(); ++ } ++ preempt_enable_no_resched(); ++ schedule(); ++ preempt_disable(); ++ } ++} ++ ++void cpu_idle_wait(void) ++{ ++ unsigned int cpu, this_cpu = get_cpu(); ++ cpumask_t map; ++ ++ set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); ++ put_cpu(); ++ ++ cpus_clear(map); ++ for_each_online_cpu(cpu) { ++ per_cpu(cpu_idle_state, cpu) = 1; ++ cpu_set(cpu, map); ++ } ++ ++ __get_cpu_var(cpu_idle_state) = 0; ++ ++ wmb(); ++ do { ++ ssleep(1); ++ for_each_online_cpu(cpu) { ++ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) ++ cpu_clear(cpu, map); ++ } ++ cpus_and(map, map, cpu_online_map); ++ } while (!cpus_empty(map)); ++} ++EXPORT_SYMBOL_GPL(cpu_idle_wait); ++ ++void __devinit select_idle_routine(const struct cpuinfo_x86 *c) ++{ ++} ++ ++static int __init idle_setup (char *str) ++{ ++ if (!strncmp(str, "poll", 4)) { ++ printk("using polling idle threads.\n"); ++ pm_idle = poll_idle; ++ } ++ ++ boot_option_idle_override = 1; ++ return 1; ++} ++ ++__setup("idle=", idle_setup); ++ ++void show_regs(struct pt_regs * regs) ++{ ++ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; ++ ++ printk("\n"); ++ printk("Pid: %d, comm: %20s\n", current->pid, current->comm); ++ printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); ++ print_symbol("EIP is at %s\n", regs->eip); ++ ++ if (user_mode_vm(regs)) ++ printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); ++ printk(" EFLAGS: %08lx %s (%s %.*s)\n", ++ regs->eflags, print_tainted(), system_utsname.release, ++ (int)strcspn(system_utsname.version, " "), ++ system_utsname.version); ++ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", ++ regs->eax,regs->ebx,regs->ecx,regs->edx); ++ printk("ESI: %08lx EDI: %08lx EBP: %08lx", ++ regs->esi, regs->edi, regs->ebp); ++ printk(" DS: %04x ES: %04x\n", ++ 0xffff & regs->xds,0xffff & regs->xes); ++ ++ cr0 = read_cr0(); ++ cr2 = read_cr2(); ++ cr3 = read_cr3(); ++ cr4 = read_cr4_safe(); ++ printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); ++ show_trace(NULL, regs, ®s->esp); ++} ++ ++/* ++ * This gets run with %ebx containing the ++ * function to call, and %edx containing ++ * the "args". ++ */ ++extern void kernel_thread_helper(void); ++__asm__(".section .text\n" ++ ".align 4\n" ++ "kernel_thread_helper:\n\t" ++ "movl %edx,%eax\n\t" ++ "pushl %edx\n\t" ++ "call *%ebx\n\t" ++ "pushl %eax\n\t" ++ "call do_exit\n" ++ ".previous"); ++ ++/* ++ * Create a kernel thread ++ */ ++int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) ++{ ++ struct pt_regs regs; ++ ++ memset(®s, 0, sizeof(regs)); ++ ++ regs.ebx = (unsigned long) fn; ++ regs.edx = (unsigned long) arg; ++ ++ regs.xds = __USER_DS; ++ regs.xes = __USER_DS; ++ regs.orig_eax = -1; ++ regs.eip = (unsigned long) kernel_thread_helper; ++ regs.xcs = GET_KERNEL_CS(); ++ regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; ++ ++ /* Ok, create the new process.. */ ++ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); ++} ++EXPORT_SYMBOL(kernel_thread); ++ ++/* ++ * Free current thread data structures etc.. ++ */ ++void exit_thread(void) ++{ ++ /* The process may have allocated an io port bitmap... nuke it. */ ++ if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { ++ struct task_struct *tsk = current; ++ struct thread_struct *t = &tsk->thread; ++ struct physdev_set_iobitmap set_iobitmap; ++ memset(&set_iobitmap, 0, sizeof(set_iobitmap)); ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, ++ &set_iobitmap)); ++ kfree(t->io_bitmap_ptr); ++ t->io_bitmap_ptr = NULL; ++ clear_thread_flag(TIF_IO_BITMAP); ++ } ++} ++ ++void flush_thread(void) ++{ ++ struct task_struct *tsk = current; ++ ++ memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); ++ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); ++ clear_tsk_thread_flag(tsk, TIF_DEBUG); ++ /* ++ * Forget coprocessor state.. ++ */ ++ clear_fpu(tsk); ++ clear_used_math(); ++} ++ ++void release_thread(struct task_struct *dead_task) ++{ ++ BUG_ON(dead_task->mm); ++ release_vm86_irqs(dead_task); ++} ++ ++/* ++ * This gets called before we allocate a new thread and copy ++ * the current task into it. ++ */ ++void prepare_to_copy(struct task_struct *tsk) ++{ ++ unlazy_fpu(tsk); ++} ++ ++int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, ++ unsigned long unused, ++ struct task_struct * p, struct pt_regs * regs) ++{ ++ struct pt_regs * childregs; ++ struct task_struct *tsk; ++ int err; ++ ++ childregs = task_pt_regs(p); ++ *childregs = *regs; ++ childregs->eax = 0; ++ childregs->esp = esp; ++ ++ p->thread.esp = (unsigned long) childregs; ++ p->thread.esp0 = (unsigned long) (childregs+1); ++ ++ p->thread.eip = (unsigned long) ret_from_fork; ++ ++ savesegment(fs,p->thread.fs); ++ savesegment(gs,p->thread.gs); ++ ++ tsk = current; ++ if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { ++ p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); ++ if (!p->thread.io_bitmap_ptr) { ++ p->thread.io_bitmap_max = 0; ++ return -ENOMEM; ++ } ++ memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, ++ IO_BITMAP_BYTES); ++ set_tsk_thread_flag(p, TIF_IO_BITMAP); ++ } ++ ++ /* ++ * Set a new TLS for the child thread? ++ */ ++ if (clone_flags & CLONE_SETTLS) { ++ struct desc_struct *desc; ++ struct user_desc info; ++ int idx; ++ ++ err = -EFAULT; ++ if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) ++ goto out; ++ err = -EINVAL; ++ if (LDT_empty(&info)) ++ goto out; ++ ++ idx = info.entry_number; ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ goto out; ++ ++ desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; ++ desc->a = LDT_entry_a(&info); ++ desc->b = LDT_entry_b(&info); ++ } ++ ++ p->thread.iopl = current->thread.iopl; ++ ++ err = 0; ++ out: ++ if (err && p->thread.io_bitmap_ptr) { ++ kfree(p->thread.io_bitmap_ptr); ++ p->thread.io_bitmap_max = 0; ++ } ++ return err; ++} ++ ++/* ++ * fill in the user structure for a core dump.. ++ */ ++void dump_thread(struct pt_regs * regs, struct user * dump) ++{ ++ int i; ++ ++/* changed the size calculations - should hopefully work better. lbt */ ++ dump->magic = CMAGIC; ++ dump->start_code = 0; ++ dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); ++ dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; ++ dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; ++ dump->u_dsize -= dump->u_tsize; ++ dump->u_ssize = 0; ++ for (i = 0; i < 8; i++) ++ dump->u_debugreg[i] = current->thread.debugreg[i]; ++ ++ if (dump->start_stack < TASK_SIZE) ++ dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; ++ ++ dump->regs.ebx = regs->ebx; ++ dump->regs.ecx = regs->ecx; ++ dump->regs.edx = regs->edx; ++ dump->regs.esi = regs->esi; ++ dump->regs.edi = regs->edi; ++ dump->regs.ebp = regs->ebp; ++ dump->regs.eax = regs->eax; ++ dump->regs.ds = regs->xds; ++ dump->regs.es = regs->xes; ++ savesegment(fs,dump->regs.fs); ++ savesegment(gs,dump->regs.gs); ++ dump->regs.orig_eax = regs->orig_eax; ++ dump->regs.eip = regs->eip; ++ dump->regs.cs = regs->xcs; ++ dump->regs.eflags = regs->eflags; ++ dump->regs.esp = regs->esp; ++ dump->regs.ss = regs->xss; ++ ++ dump->u_fpvalid = dump_fpu (regs, &dump->i387); ++} ++EXPORT_SYMBOL(dump_thread); ++ ++/* ++ * Capture the user space registers if the task is not running (in user space) ++ */ ++int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) ++{ ++ struct pt_regs ptregs = *task_pt_regs(tsk); ++ ptregs.xcs &= 0xffff; ++ ptregs.xds &= 0xffff; ++ ptregs.xes &= 0xffff; ++ ptregs.xss &= 0xffff; ++ ++ elf_core_copy_regs(regs, &ptregs); ++ ++ return 1; ++} ++ ++static noinline void __switch_to_xtra(struct task_struct *next_p) ++{ ++ struct thread_struct *next; ++ ++ next = &next_p->thread; ++ ++ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { ++ set_debugreg(next->debugreg[0], 0); ++ set_debugreg(next->debugreg[1], 1); ++ set_debugreg(next->debugreg[2], 2); ++ set_debugreg(next->debugreg[3], 3); ++ /* no 4 and 5 */ ++ set_debugreg(next->debugreg[6], 6); ++ set_debugreg(next->debugreg[7], 7); ++ } ++} ++ ++/* ++ * This function selects if the context switch from prev to next ++ * has to tweak the TSC disable bit in the cr4. ++ */ ++static inline void disable_tsc(struct task_struct *prev_p, ++ struct task_struct *next_p) ++{ ++ struct thread_info *prev, *next; ++ ++ /* ++ * gcc should eliminate the ->thread_info dereference if ++ * has_secure_computing returns 0 at compile time (SECCOMP=n). ++ */ ++ prev = task_thread_info(prev_p); ++ next = task_thread_info(next_p); ++ ++ if (has_secure_computing(prev) || has_secure_computing(next)) { ++ /* slow path here */ ++ if (has_secure_computing(prev) && ++ !has_secure_computing(next)) { ++ write_cr4(read_cr4() & ~X86_CR4_TSD); ++ } else if (!has_secure_computing(prev) && ++ has_secure_computing(next)) ++ write_cr4(read_cr4() | X86_CR4_TSD); ++ } ++} ++ ++/* ++ * switch_to(x,yn) should switch tasks from x to y. ++ * ++ * We fsave/fwait so that an exception goes off at the right time ++ * (as a call from the fsave or fwait in effect) rather than to ++ * the wrong process. Lazy FP saving no longer makes any sense ++ * with modern CPU's, and this simplifies a lot of things (SMP ++ * and UP become the same). ++ * ++ * NOTE! We used to use the x86 hardware context switching. The ++ * reason for not using it any more becomes apparent when you ++ * try to recover gracefully from saved state that is no longer ++ * valid (stale segment register values in particular). With the ++ * hardware task-switch, there is no way to fix up bad state in ++ * a reasonable manner. ++ * ++ * The fact that Intel documents the hardware task-switching to ++ * be slow is a fairly red herring - this code is not noticeably ++ * faster. However, there _is_ some room for improvement here, ++ * so the performance issues may eventually be a valid point. ++ * More important, however, is the fact that this allows us much ++ * more flexibility. ++ * ++ * The return value (in %eax) will be the "prev" task after ++ * the task-switch, and shows up in ret_from_fork in entry.S, ++ * for example. ++ */ ++struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ++{ ++ struct thread_struct *prev = &prev_p->thread, ++ *next = &next_p->thread; ++ int cpu = smp_processor_id(); ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct *tss = &per_cpu(init_tss, cpu); ++#endif ++#if CONFIG_XEN_COMPAT > 0x030002 ++ struct physdev_set_iopl iopl_op; ++ struct physdev_set_iobitmap iobmp_op; ++#else ++ struct physdev_op _pdo[2], *pdo = _pdo; ++#define iopl_op pdo->u.set_iopl ++#define iobmp_op pdo->u.set_iobitmap ++#endif ++ multicall_entry_t _mcl[8], *mcl = _mcl; ++ ++ /* XEN NOTE: FS/GS saved in switch_mm(), not here. */ ++ ++ /* ++ * This is basically '__unlazy_fpu', except that we queue a ++ * multicall to indicate FPU task switch, rather than ++ * synchronously trapping to Xen. ++ */ ++ if (prev_p->thread_info->status & TS_USEDFPU) { ++ __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ ++ mcl->op = __HYPERVISOR_fpu_taskswitch; ++ mcl->args[0] = 1; ++ mcl++; ++ } ++#if 0 /* lazy fpu sanity check */ ++ else BUG_ON(!(read_cr0() & 8)); ++#endif ++ ++ /* ++ * Reload esp0. ++ * This is load_esp0(tss, next) with a multicall. ++ */ ++ mcl->op = __HYPERVISOR_stack_switch; ++ mcl->args[0] = __KERNEL_DS; ++ mcl->args[1] = next->esp0; ++ mcl++; ++ ++ /* ++ * Load the per-thread Thread-Local Storage descriptor. ++ * This is load_TLS(next, cpu) with multicalls. ++ */ ++#define C(i) do { \ ++ if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \ ++ next->tls_array[i].b != prev->tls_array[i].b)) { \ ++ mcl->op = __HYPERVISOR_update_descriptor; \ ++ *(u64 *)&mcl->args[0] = virt_to_machine( \ ++ &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\ ++ *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i]; \ ++ mcl++; \ ++ } \ ++} while (0) ++ C(0); C(1); C(2); ++#undef C ++ ++ if (unlikely(prev->iopl != next->iopl)) { ++ iopl_op.iopl = (next->iopl == 0) ? 1 : (next->iopl >> 12) & 3; ++#if CONFIG_XEN_COMPAT > 0x030002 ++ mcl->op = __HYPERVISOR_physdev_op; ++ mcl->args[0] = PHYSDEVOP_set_iopl; ++ mcl->args[1] = (unsigned long)&iopl_op; ++#else ++ mcl->op = __HYPERVISOR_physdev_op_compat; ++ pdo->cmd = PHYSDEVOP_set_iopl; ++ mcl->args[0] = (unsigned long)pdo++; ++#endif ++ mcl++; ++ } ++ ++ if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { ++ set_xen_guest_handle(iobmp_op.bitmap, ++ (char *)next->io_bitmap_ptr); ++ iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0; ++#if CONFIG_XEN_COMPAT > 0x030002 ++ mcl->op = __HYPERVISOR_physdev_op; ++ mcl->args[0] = PHYSDEVOP_set_iobitmap; ++ mcl->args[1] = (unsigned long)&iobmp_op; ++#else ++ mcl->op = __HYPERVISOR_physdev_op_compat; ++ pdo->cmd = PHYSDEVOP_set_iobitmap; ++ mcl->args[0] = (unsigned long)pdo++; ++#endif ++ mcl++; ++ } ++ ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ BUG_ON(pdo > _pdo + ARRAY_SIZE(_pdo)); ++#endif ++ BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl)); ++ if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) ++ BUG(); ++ ++ /* ++ * Restore %fs and %gs if needed. ++ * ++ * Glibc normally makes %fs be zero, and %gs is one of ++ * the TLS segments. ++ */ ++ if (unlikely(next->fs)) ++ loadsegment(fs, next->fs); ++ ++ if (next->gs) ++ loadsegment(gs, next->gs); ++ ++ /* ++ * Now maybe handle debug registers ++ */ ++ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) ++ __switch_to_xtra(next_p); ++ ++ disable_tsc(prev_p, next_p); ++ ++ return prev_p; ++} ++ ++asmlinkage int sys_fork(struct pt_regs regs) ++{ ++ return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); ++} ++ ++asmlinkage int sys_clone(struct pt_regs regs) ++{ ++ unsigned long clone_flags; ++ unsigned long newsp; ++ int __user *parent_tidptr, *child_tidptr; ++ ++ clone_flags = regs.ebx; ++ newsp = regs.ecx; ++ parent_tidptr = (int __user *)regs.edx; ++ child_tidptr = (int __user *)regs.edi; ++ if (!newsp) ++ newsp = regs.esp; ++ return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); ++} ++ ++/* ++ * This is trivial, and on the face of it looks like it ++ * could equally well be done in user mode. ++ * ++ * Not so, for quite unobvious reasons - register pressure. ++ * In user mode vfork() cannot have a stack frame, and if ++ * done by calling the "clone()" system call directly, you ++ * do not have enough call-clobbered registers to hold all ++ * the information you need. ++ */ ++asmlinkage int sys_vfork(struct pt_regs regs) ++{ ++ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); ++} ++ ++/* ++ * sys_execve() executes a new program. ++ */ ++asmlinkage int sys_execve(struct pt_regs regs) ++{ ++ int error; ++ char * filename; ++ ++ filename = getname((char __user *) regs.ebx); ++ error = PTR_ERR(filename); ++ if (IS_ERR(filename)) ++ goto out; ++ error = do_execve(filename, ++ (char __user * __user *) regs.ecx, ++ (char __user * __user *) regs.edx, ++ ®s); ++ if (error == 0) { ++ task_lock(current); ++ current->ptrace &= ~PT_DTRACE; ++ task_unlock(current); ++ /* Make sure we don't return using sysenter.. */ ++ set_thread_flag(TIF_IRET); ++ } ++ putname(filename); ++out: ++ return error; ++} ++ ++#define top_esp (THREAD_SIZE - sizeof(unsigned long)) ++#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) ++ ++unsigned long get_wchan(struct task_struct *p) ++{ ++ unsigned long ebp, esp, eip; ++ unsigned long stack_page; ++ int count = 0; ++ if (!p || p == current || p->state == TASK_RUNNING) ++ return 0; ++ stack_page = (unsigned long)task_stack_page(p); ++ esp = p->thread.esp; ++ if (!stack_page || esp < stack_page || esp > top_esp+stack_page) ++ return 0; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ ebp = *(unsigned long *) esp; ++ do { ++ if (ebp < stack_page || ebp > top_ebp+stack_page) ++ return 0; ++ eip = *(unsigned long *) (ebp+4); ++ if (!in_sched_functions(eip)) ++ return eip; ++ ebp = *(unsigned long *) ebp; ++ } while (count++ < 16); ++ return 0; ++} ++ ++/* ++ * sys_alloc_thread_area: get a yet unused TLS descriptor index. ++ */ ++static int get_free_idx(void) ++{ ++ struct thread_struct *t = ¤t->thread; ++ int idx; ++ ++ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) ++ if (desc_empty(t->tls_array + idx)) ++ return idx + GDT_ENTRY_TLS_MIN; ++ return -ESRCH; ++} ++ ++/* ++ * Set a given TLS descriptor: ++ */ ++asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) ++{ ++ struct thread_struct *t = ¤t->thread; ++ struct user_desc info; ++ struct desc_struct *desc; ++ int cpu, idx; ++ ++ if (copy_from_user(&info, u_info, sizeof(info))) ++ return -EFAULT; ++ idx = info.entry_number; ++ ++ /* ++ * index -1 means the kernel should try to find and ++ * allocate an empty descriptor: ++ */ ++ if (idx == -1) { ++ idx = get_free_idx(); ++ if (idx < 0) ++ return idx; ++ if (put_user(idx, &u_info->entry_number)) ++ return -EFAULT; ++ } ++ ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ return -EINVAL; ++ ++ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; ++ ++ /* ++ * We must not get preempted while modifying the TLS. ++ */ ++ cpu = get_cpu(); ++ ++ if (LDT_empty(&info)) { ++ desc->a = 0; ++ desc->b = 0; ++ } else { ++ desc->a = LDT_entry_a(&info); ++ desc->b = LDT_entry_b(&info); ++ } ++ load_TLS(t, cpu); ++ ++ put_cpu(); ++ ++ return 0; ++} ++ ++/* ++ * Get the current Thread-Local Storage area: ++ */ ++ ++#define GET_BASE(desc) ( \ ++ (((desc)->a >> 16) & 0x0000ffff) | \ ++ (((desc)->b << 16) & 0x00ff0000) | \ ++ ( (desc)->b & 0xff000000) ) ++ ++#define GET_LIMIT(desc) ( \ ++ ((desc)->a & 0x0ffff) | \ ++ ((desc)->b & 0xf0000) ) ++ ++#define GET_32BIT(desc) (((desc)->b >> 22) & 1) ++#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) ++#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) ++#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) ++#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) ++#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) ++ ++asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) ++{ ++ struct user_desc info; ++ struct desc_struct *desc; ++ int idx; ++ ++ if (get_user(idx, &u_info->entry_number)) ++ return -EFAULT; ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ return -EINVAL; ++ ++ memset(&info, 0, sizeof(info)); ++ ++ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; ++ ++ info.entry_number = idx; ++ info.base_addr = GET_BASE(desc); ++ info.limit = GET_LIMIT(desc); ++ info.seg_32bit = GET_32BIT(desc); ++ info.contents = GET_CONTENTS(desc); ++ info.read_exec_only = !GET_WRITABLE(desc); ++ info.limit_in_pages = GET_LIMIT_PAGES(desc); ++ info.seg_not_present = !GET_PRESENT(desc); ++ info.useable = GET_USEABLE(desc); ++ ++ if (copy_to_user(u_info, &info, sizeof(info))) ++ return -EFAULT; ++ return 0; ++} ++ ++unsigned long arch_align_stack(unsigned long sp) ++{ ++ if (randomize_va_space) ++ sp -= get_random_int() % 8192; ++ return sp & ~0xf; ++} +Index: head-2008-11-25/arch/x86/kernel/quirks-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/quirks-xen.c 2008-01-28 12:24:19.000000000 +0100 +@@ -0,0 +1,47 @@ ++/* ++ * This file contains work-arounds for x86 and x86_64 platform bugs. ++ */ ++#include ++#include ++ ++#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) ++ ++static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) ++{ ++ u8 config, rev; ++ u32 word; ++ ++ /* BIOS may enable hardware IRQ balancing for ++ * E7520/E7320/E7525(revision ID 0x9 and below) ++ * based platforms. ++ * Disable SW irqbalance/affinity on those platforms. ++ */ ++ pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); ++ if (rev > 0x9) ++ return; ++ ++ printk(KERN_INFO "Intel E7520/7320/7525 detected."); ++ ++ /* enable access to config space*/ ++ pci_read_config_byte(dev, 0xf4, &config); ++ pci_write_config_byte(dev, 0xf4, config|0x2); ++ ++ /* read xTPR register */ ++ raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); ++ ++ if (!(word & (1 << 13))) { ++ struct xen_platform_op op; ++ printk(KERN_INFO "Disabling irq balancing and affinity\n"); ++ op.cmd = XENPF_platform_quirk; ++ op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING; ++ WARN_ON(HYPERVISOR_platform_op(&op)); ++ } ++ ++ /* put back the original value for config space*/ ++ if (!(config & 0x2)) ++ pci_write_config_byte(dev, 0xf4, config); ++} ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); ++#endif +Index: head-2008-11-25/arch/x86/kernel/setup_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/setup_32-xen.c 2008-04-22 15:41:51.000000000 +0200 +@@ -0,0 +1,1919 @@ ++/* ++ * linux/arch/i386/kernel/setup.c ++ * ++ * Copyright (C) 1995 Linus Torvalds ++ * ++ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 ++ * ++ * Memory region support ++ * David Parsons , July-August 1999 ++ * ++ * Added E820 sanitization routine (removes overlapping memory regions); ++ * Brian Moyle , February 2001 ++ * ++ * Moved CPU detection code to cpu/${cpu}.c ++ * Patrick Mochel , March 2002 ++ * ++ * Provisions for empty E820 memory regions (reported by certain BIOSes). ++ * Alex Achenbach , December 2002. ++ * ++ */ ++ ++/* ++ * This file handles the architecture-dependent parts of initialization ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include