]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.xen/xen3-auto-xen-drivers.diff
Add a patch to fix Intel E100 wake-on-lan problems.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.xen / xen3-auto-xen-drivers.diff
CommitLineData
6a930a95
BS
1Subject: xen3 xen-drivers
2From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd)
3Patch-mainline: obsolete
4Acked-by: jbeulich@novell.com
5
6Index: head-2008-11-25/drivers/xen/balloon/Makefile
7===================================================================
8--- /dev/null 1970-01-01 00:00:00.000000000 +0000
9+++ head-2008-11-25/drivers/xen/balloon/Makefile 2007-06-12 13:13:44.000000000 +0200
10@@ -0,0 +1,2 @@
11+
12+obj-y := balloon.o sysfs.o
13Index: head-2008-11-25/drivers/xen/balloon/balloon.c
14===================================================================
15--- /dev/null 1970-01-01 00:00:00.000000000 +0000
16+++ head-2008-11-25/drivers/xen/balloon/balloon.c 2008-07-21 11:00:33.000000000 +0200
17@@ -0,0 +1,724 @@
18+/******************************************************************************
19+ * balloon.c
20+ *
21+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
22+ *
23+ * Copyright (c) 2003, B Dragovic
24+ * Copyright (c) 2003-2004, M Williamson, K Fraser
25+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
26+ *
27+ * This program is free software; you can redistribute it and/or
28+ * modify it under the terms of the GNU General Public License version 2
29+ * as published by the Free Software Foundation; or, when distributed
30+ * separately from the Linux kernel or incorporated into other
31+ * software packages, subject to the following license:
32+ *
33+ * Permission is hereby granted, free of charge, to any person obtaining a copy
34+ * of this source file (the "Software"), to deal in the Software without
35+ * restriction, including without limitation the rights to use, copy, modify,
36+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
37+ * and to permit persons to whom the Software is furnished to do so, subject to
38+ * the following conditions:
39+ *
40+ * The above copyright notice and this permission notice shall be included in
41+ * all copies or substantial portions of the Software.
42+ *
43+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
44+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
45+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
46+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
47+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
48+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
49+ * IN THE SOFTWARE.
50+ */
51+
52+#include <linux/kernel.h>
53+#include <linux/module.h>
54+#include <linux/sched.h>
55+#include <linux/errno.h>
56+#include <linux/mm.h>
57+#include <linux/mman.h>
58+#include <linux/smp_lock.h>
59+#include <linux/pagemap.h>
60+#include <linux/bootmem.h>
61+#include <linux/highmem.h>
62+#include <linux/vmalloc.h>
63+#include <linux/mutex.h>
64+#include <xen/xen_proc.h>
65+#include <asm/hypervisor.h>
66+#include <xen/balloon.h>
67+#include <xen/interface/memory.h>
68+#include <asm/maddr.h>
69+#include <asm/page.h>
70+#include <asm/pgalloc.h>
71+#include <asm/pgtable.h>
72+#include <asm/uaccess.h>
73+#include <asm/tlb.h>
74+#include <linux/highmem.h>
75+#include <linux/list.h>
76+#include <xen/xenbus.h>
77+#include "common.h"
78+
79+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
80+#include <xen/platform-compat.h>
81+#endif
82+
83+#ifdef CONFIG_PROC_FS
84+static struct proc_dir_entry *balloon_pde;
85+#endif
86+
87+static DEFINE_MUTEX(balloon_mutex);
88+
89+/*
90+ * Protects atomic reservation decrease/increase against concurrent increases.
91+ * Also protects non-atomic updates of current_pages and driver_pages, and
92+ * balloon lists.
93+ */
94+DEFINE_SPINLOCK(balloon_lock);
95+
96+struct balloon_stats balloon_stats;
97+
98+/* We increase/decrease in batches which fit in a page */
99+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
100+
101+/* VM /proc information for memory */
102+extern unsigned long totalram_pages;
103+
104+#ifndef MODULE
105+extern unsigned long totalhigh_pages;
106+#define inc_totalhigh_pages() (totalhigh_pages++)
107+#define dec_totalhigh_pages() (totalhigh_pages--)
108+#else
109+#define inc_totalhigh_pages() ((void)0)
110+#define dec_totalhigh_pages() ((void)0)
111+#endif
112+
113+/* List of ballooned pages, threaded through the mem_map array. */
114+static LIST_HEAD(ballooned_pages);
115+
116+/* Main work function, always executed in process context. */
117+static void balloon_process(void *unused);
118+static DECLARE_WORK(balloon_worker, balloon_process, NULL);
119+static struct timer_list balloon_timer;
120+
121+/* When ballooning out (allocating memory to return to Xen) we don't really
122+ want the kernel to try too hard since that can trigger the oom killer. */
123+#define GFP_BALLOON \
124+ (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD)
125+
126+#define PAGE_TO_LIST(p) (&(p)->lru)
127+#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
128+#define UNLIST_PAGE(p) \
129+ do { \
130+ list_del(PAGE_TO_LIST(p)); \
131+ PAGE_TO_LIST(p)->next = NULL; \
132+ PAGE_TO_LIST(p)->prev = NULL; \
133+ } while(0)
134+
135+#define IPRINTK(fmt, args...) \
136+ printk(KERN_INFO "xen_mem: " fmt, ##args)
137+#define WPRINTK(fmt, args...) \
138+ printk(KERN_WARNING "xen_mem: " fmt, ##args)
139+
140+/* balloon_append: add the given page to the balloon. */
141+static void balloon_append(struct page *page)
142+{
143+ /* Lowmem is re-populated first, so highmem pages go at list tail. */
144+ if (PageHighMem(page)) {
145+ list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
146+ bs.balloon_high++;
147+ dec_totalhigh_pages();
148+ } else {
149+ list_add(PAGE_TO_LIST(page), &ballooned_pages);
150+ bs.balloon_low++;
151+ }
152+}
153+
154+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
155+static struct page *balloon_retrieve(void)
156+{
157+ struct page *page;
158+
159+ if (list_empty(&ballooned_pages))
160+ return NULL;
161+
162+ page = LIST_TO_PAGE(ballooned_pages.next);
163+ UNLIST_PAGE(page);
164+
165+ if (PageHighMem(page)) {
166+ bs.balloon_high--;
167+ inc_totalhigh_pages();
168+ }
169+ else
170+ bs.balloon_low--;
171+
172+ return page;
173+}
174+
175+static struct page *balloon_first_page(void)
176+{
177+ if (list_empty(&ballooned_pages))
178+ return NULL;
179+ return LIST_TO_PAGE(ballooned_pages.next);
180+}
181+
182+static struct page *balloon_next_page(struct page *page)
183+{
184+ struct list_head *next = PAGE_TO_LIST(page)->next;
185+ if (next == &ballooned_pages)
186+ return NULL;
187+ return LIST_TO_PAGE(next);
188+}
189+
190+static inline void balloon_free_page(struct page *page)
191+{
192+#ifndef MODULE
193+ if (put_page_testzero(page))
194+ free_cold_page(page);
195+#else
196+ /* free_cold_page() is not being exported. */
197+ __free_page(page);
198+#endif
199+}
200+
201+static void balloon_alarm(unsigned long unused)
202+{
203+ schedule_work(&balloon_worker);
204+}
205+
206+static unsigned long current_target(void)
207+{
208+ unsigned long target = min(bs.target_pages, bs.hard_limit);
209+ if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
210+ target = bs.current_pages + bs.balloon_low + bs.balloon_high;
211+ return target;
212+}
213+
214+static unsigned long minimum_target(void)
215+{
216+#ifndef CONFIG_XEN
217+#define max_pfn num_physpages
218+#endif
219+ unsigned long min_pages, curr_pages = current_target();
220+
221+#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
222+ /* Simple continuous piecewiese linear function:
223+ * max MiB -> min MiB gradient
224+ * 0 0
225+ * 16 16
226+ * 32 24
227+ * 128 72 (1/2)
228+ * 512 168 (1/4)
229+ * 2048 360 (1/8)
230+ * 8192 552 (1/32)
231+ * 32768 1320
232+ * 131072 4392
233+ */
234+ if (max_pfn < MB2PAGES(128))
235+ min_pages = MB2PAGES(8) + (max_pfn >> 1);
236+ else if (max_pfn < MB2PAGES(512))
237+ min_pages = MB2PAGES(40) + (max_pfn >> 2);
238+ else if (max_pfn < MB2PAGES(2048))
239+ min_pages = MB2PAGES(104) + (max_pfn >> 3);
240+ else
241+ min_pages = MB2PAGES(296) + (max_pfn >> 5);
242+#undef MB2PAGES
243+
244+ /* Don't enforce growth */
245+ return min(min_pages, curr_pages);
246+#ifndef CONFIG_XEN
247+#undef max_pfn
248+#endif
249+}
250+
251+static int increase_reservation(unsigned long nr_pages)
252+{
253+ unsigned long pfn, i, flags;
254+ struct page *page;
255+ long rc;
256+ struct xen_memory_reservation reservation = {
257+ .address_bits = 0,
258+ .extent_order = 0,
259+ .domid = DOMID_SELF
260+ };
261+
262+ if (nr_pages > ARRAY_SIZE(frame_list))
263+ nr_pages = ARRAY_SIZE(frame_list);
264+
265+ balloon_lock(flags);
266+
267+ page = balloon_first_page();
268+ for (i = 0; i < nr_pages; i++) {
269+ BUG_ON(page == NULL);
270+ frame_list[i] = page_to_pfn(page);;
271+ page = balloon_next_page(page);
272+ }
273+
274+ set_xen_guest_handle(reservation.extent_start, frame_list);
275+ reservation.nr_extents = nr_pages;
276+ rc = HYPERVISOR_memory_op(
277+ XENMEM_populate_physmap, &reservation);
278+ if (rc < nr_pages) {
279+ if (rc > 0) {
280+ int ret;
281+
282+ /* We hit the Xen hard limit: reprobe. */
283+ reservation.nr_extents = rc;
284+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
285+ &reservation);
286+ BUG_ON(ret != rc);
287+ }
288+ if (rc >= 0)
289+ bs.hard_limit = (bs.current_pages + rc -
290+ bs.driver_pages);
291+ goto out;
292+ }
293+
294+ for (i = 0; i < nr_pages; i++) {
295+ page = balloon_retrieve();
296+ BUG_ON(page == NULL);
297+
298+ pfn = page_to_pfn(page);
299+ BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
300+ phys_to_machine_mapping_valid(pfn));
301+
302+ set_phys_to_machine(pfn, frame_list[i]);
303+
304+#ifdef CONFIG_XEN
305+ /* Link back into the page tables if not highmem. */
306+ if (pfn < max_low_pfn) {
307+ int ret;
308+ ret = HYPERVISOR_update_va_mapping(
309+ (unsigned long)__va(pfn << PAGE_SHIFT),
310+ pfn_pte_ma(frame_list[i], PAGE_KERNEL),
311+ 0);
312+ BUG_ON(ret);
313+ }
314+#endif
315+
316+ /* Relinquish the page back to the allocator. */
317+ ClearPageReserved(page);
318+ init_page_count(page);
319+ balloon_free_page(page);
320+ }
321+
322+ bs.current_pages += nr_pages;
323+ totalram_pages = bs.current_pages;
324+
325+ out:
326+ balloon_unlock(flags);
327+
328+ return 0;
329+}
330+
331+static int decrease_reservation(unsigned long nr_pages)
332+{
333+ unsigned long pfn, i, flags;
334+ struct page *page;
335+ void *v;
336+ int need_sleep = 0;
337+ int ret;
338+ struct xen_memory_reservation reservation = {
339+ .address_bits = 0,
340+ .extent_order = 0,
341+ .domid = DOMID_SELF
342+ };
343+
344+ if (nr_pages > ARRAY_SIZE(frame_list))
345+ nr_pages = ARRAY_SIZE(frame_list);
346+
347+ for (i = 0; i < nr_pages; i++) {
348+ if ((page = alloc_page(GFP_BALLOON)) == NULL) {
349+ nr_pages = i;
350+ need_sleep = 1;
351+ break;
352+ }
353+
354+ pfn = page_to_pfn(page);
355+ frame_list[i] = pfn_to_mfn(pfn);
356+
357+ if (!PageHighMem(page)) {
358+ v = phys_to_virt(pfn << PAGE_SHIFT);
359+ scrub_pages(v, 1);
360+#ifdef CONFIG_XEN
361+ ret = HYPERVISOR_update_va_mapping(
362+ (unsigned long)v, __pte_ma(0), 0);
363+ BUG_ON(ret);
364+#endif
365+ }
366+#ifdef CONFIG_XEN_SCRUB_PAGES
367+ else {
368+ v = kmap(page);
369+ scrub_pages(v, 1);
370+ kunmap(page);
371+ }
372+#endif
373+ }
374+
375+#ifdef CONFIG_XEN
376+ /* Ensure that ballooned highmem pages don't have kmaps. */
377+ kmap_flush_unused();
378+ flush_tlb_all();
379+#endif
380+
381+ balloon_lock(flags);
382+
383+ /* No more mappings: invalidate P2M and add to balloon. */
384+ for (i = 0; i < nr_pages; i++) {
385+ pfn = mfn_to_pfn(frame_list[i]);
386+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
387+ balloon_append(pfn_to_page(pfn));
388+ }
389+
390+ set_xen_guest_handle(reservation.extent_start, frame_list);
391+ reservation.nr_extents = nr_pages;
392+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
393+ BUG_ON(ret != nr_pages);
394+
395+ bs.current_pages -= nr_pages;
396+ totalram_pages = bs.current_pages;
397+
398+ balloon_unlock(flags);
399+
400+ return need_sleep;
401+}
402+
403+/*
404+ * We avoid multiple worker processes conflicting via the balloon mutex.
405+ * We may of course race updates of the target counts (which are protected
406+ * by the balloon lock), or with changes to the Xen hard limit, but we will
407+ * recover from these in time.
408+ */
409+static void balloon_process(void *unused)
410+{
411+ int need_sleep = 0;
412+ long credit;
413+
414+ mutex_lock(&balloon_mutex);
415+
416+ do {
417+ credit = current_target() - bs.current_pages;
418+ if (credit > 0)
419+ need_sleep = (increase_reservation(credit) != 0);
420+ if (credit < 0)
421+ need_sleep = (decrease_reservation(-credit) != 0);
422+
423+#ifndef CONFIG_PREEMPT
424+ if (need_resched())
425+ schedule();
426+#endif
427+ } while ((credit != 0) && !need_sleep);
428+
429+ /* Schedule more work if there is some still to be done. */
430+ if (current_target() != bs.current_pages)
431+ mod_timer(&balloon_timer, jiffies + HZ);
432+
433+ mutex_unlock(&balloon_mutex);
434+}
435+
436+/* Resets the Xen limit, sets new target, and kicks off processing. */
437+void balloon_set_new_target(unsigned long target)
438+{
439+ /* No need for lock. Not read-modify-write updates. */
440+ bs.hard_limit = ~0UL;
441+ bs.target_pages = max(target, minimum_target());
442+ schedule_work(&balloon_worker);
443+}
444+
445+static struct xenbus_watch target_watch =
446+{
447+ .node = "memory/target"
448+};
449+
450+/* React to a change in the target key */
451+static void watch_target(struct xenbus_watch *watch,
452+ const char **vec, unsigned int len)
453+{
454+ unsigned long long new_target;
455+ int err;
456+
457+ err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
458+ if (err != 1) {
459+ /* This is ok (for domain0 at least) - so just return */
460+ return;
461+ }
462+
463+ /* The given memory/target value is in KiB, so it needs converting to
464+ * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
465+ */
466+ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
467+}
468+
469+static int balloon_init_watcher(struct notifier_block *notifier,
470+ unsigned long event,
471+ void *data)
472+{
473+ int err;
474+
475+ err = register_xenbus_watch(&target_watch);
476+ if (err)
477+ printk(KERN_ERR "Failed to set balloon watcher\n");
478+
479+ return NOTIFY_DONE;
480+}
481+
482+#ifdef CONFIG_PROC_FS
483+static int balloon_write(struct file *file, const char __user *buffer,
484+ unsigned long count, void *data)
485+{
486+ char memstring[64], *endchar;
487+ unsigned long long target_bytes;
488+
489+ if (!capable(CAP_SYS_ADMIN))
490+ return -EPERM;
491+
492+ if (count <= 1)
493+ return -EBADMSG; /* runt */
494+ if (count > sizeof(memstring))
495+ return -EFBIG; /* too long */
496+
497+ if (copy_from_user(memstring, buffer, count))
498+ return -EFAULT;
499+ memstring[sizeof(memstring)-1] = '\0';
500+
501+ target_bytes = memparse(memstring, &endchar);
502+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
503+
504+ return count;
505+}
506+
507+static int balloon_read(char *page, char **start, off_t off,
508+ int count, int *eof, void *data)
509+{
510+ int len;
511+
512+ len = sprintf(
513+ page,
514+ "Current allocation: %8lu kB\n"
515+ "Requested target: %8lu kB\n"
516+ "Low-mem balloon: %8lu kB\n"
517+ "High-mem balloon: %8lu kB\n"
518+ "Driver pages: %8lu kB\n"
519+ "Xen hard limit: ",
520+ PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
521+ PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
522+ PAGES2KB(bs.driver_pages));
523+
524+ if (bs.hard_limit != ~0UL)
525+ len += sprintf(page + len, "%8lu kB\n",
526+ PAGES2KB(bs.hard_limit));
527+ else
528+ len += sprintf(page + len, " ??? kB\n");
529+
530+ *eof = 1;
531+ return len;
532+}
533+#endif
534+
535+static struct notifier_block xenstore_notifier;
536+
537+static int __init balloon_init(void)
538+{
539+#if defined(CONFIG_X86) && defined(CONFIG_XEN)
540+ unsigned long pfn;
541+ struct page *page;
542+#endif
543+
544+ if (!is_running_on_xen())
545+ return -ENODEV;
546+
547+ IPRINTK("Initialising balloon driver.\n");
548+
549+#ifdef CONFIG_XEN
550+ bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
551+ totalram_pages = bs.current_pages;
552+#else
553+ bs.current_pages = totalram_pages;
554+#endif
555+ bs.target_pages = bs.current_pages;
556+ bs.balloon_low = 0;
557+ bs.balloon_high = 0;
558+ bs.driver_pages = 0UL;
559+ bs.hard_limit = ~0UL;
560+
561+ init_timer(&balloon_timer);
562+ balloon_timer.data = 0;
563+ balloon_timer.function = balloon_alarm;
564+
565+#ifdef CONFIG_PROC_FS
566+ if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
567+ WPRINTK("Unable to create /proc/xen/balloon.\n");
568+ return -1;
569+ }
570+
571+ balloon_pde->read_proc = balloon_read;
572+ balloon_pde->write_proc = balloon_write;
573+#endif
574+ balloon_sysfs_init();
575+
576+#if defined(CONFIG_X86) && defined(CONFIG_XEN)
577+ /* Initialise the balloon with excess memory space. */
578+ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
579+ page = pfn_to_page(pfn);
580+ if (!PageReserved(page))
581+ balloon_append(page);
582+ }
583+#endif
584+
585+ target_watch.callback = watch_target;
586+ xenstore_notifier.notifier_call = balloon_init_watcher;
587+
588+ register_xenstore_notifier(&xenstore_notifier);
589+
590+ return 0;
591+}
592+
593+subsys_initcall(balloon_init);
594+
595+static void __exit balloon_exit(void)
596+{
597+ /* XXX - release balloon here */
598+ return;
599+}
600+
601+module_exit(balloon_exit);
602+
603+void balloon_update_driver_allowance(long delta)
604+{
605+ unsigned long flags;
606+
607+ balloon_lock(flags);
608+ bs.driver_pages += delta;
609+ balloon_unlock(flags);
610+}
611+
612+#ifdef CONFIG_XEN
613+static int dealloc_pte_fn(
614+ pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
615+{
616+ unsigned long mfn = pte_mfn(*pte);
617+ int ret;
618+ struct xen_memory_reservation reservation = {
619+ .nr_extents = 1,
620+ .extent_order = 0,
621+ .domid = DOMID_SELF
622+ };
623+ set_xen_guest_handle(reservation.extent_start, &mfn);
624+ set_pte_at(&init_mm, addr, pte, __pte_ma(0));
625+ set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
626+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
627+ BUG_ON(ret != 1);
628+ return 0;
629+}
630+#endif
631+
632+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
633+{
634+ unsigned long flags;
635+ void *v;
636+ struct page *page, **pagevec;
637+ int i, ret;
638+
639+ pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
640+ if (pagevec == NULL)
641+ return NULL;
642+
643+ for (i = 0; i < nr_pages; i++) {
644+ page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
645+ if (page == NULL)
646+ goto err;
647+
648+ v = page_address(page);
649+ scrub_pages(v, 1);
650+
651+ balloon_lock(flags);
652+
653+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
654+ unsigned long gmfn = page_to_pfn(page);
655+ struct xen_memory_reservation reservation = {
656+ .nr_extents = 1,
657+ .extent_order = 0,
658+ .domid = DOMID_SELF
659+ };
660+ set_xen_guest_handle(reservation.extent_start, &gmfn);
661+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
662+ &reservation);
663+ if (ret == 1)
664+ ret = 0; /* success */
665+ } else {
666+#ifdef CONFIG_XEN
667+ ret = apply_to_page_range(&init_mm, (unsigned long)v,
668+ PAGE_SIZE, dealloc_pte_fn,
669+ NULL);
670+#else
671+ /* Cannot handle non-auto translate mode. */
672+ ret = 1;
673+#endif
674+ }
675+
676+ if (ret != 0) {
677+ balloon_unlock(flags);
678+ balloon_free_page(page);
679+ goto err;
680+ }
681+
682+ totalram_pages = --bs.current_pages;
683+
684+ balloon_unlock(flags);
685+ }
686+
687+ out:
688+ schedule_work(&balloon_worker);
689+#ifdef CONFIG_XEN
690+ flush_tlb_all();
691+#endif
692+ return pagevec;
693+
694+ err:
695+ balloon_lock(flags);
696+ while (--i >= 0)
697+ balloon_append(pagevec[i]);
698+ balloon_unlock(flags);
699+ kfree(pagevec);
700+ pagevec = NULL;
701+ goto out;
702+}
703+
704+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
705+{
706+ unsigned long flags;
707+ int i;
708+
709+ if (pagevec == NULL)
710+ return;
711+
712+ balloon_lock(flags);
713+ for (i = 0; i < nr_pages; i++) {
714+ BUG_ON(page_count(pagevec[i]) != 1);
715+ balloon_append(pagevec[i]);
716+ }
717+ balloon_unlock(flags);
718+
719+ kfree(pagevec);
720+
721+ schedule_work(&balloon_worker);
722+}
723+
724+void balloon_release_driver_page(struct page *page)
725+{
726+ unsigned long flags;
727+
728+ balloon_lock(flags);
729+ balloon_append(page);
730+ bs.driver_pages--;
731+ balloon_unlock(flags);
732+
733+ schedule_work(&balloon_worker);
734+}
735+
736+EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
737+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
738+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
739+EXPORT_SYMBOL_GPL(balloon_release_driver_page);
740+
741+MODULE_LICENSE("Dual BSD/GPL");
742Index: head-2008-11-25/drivers/xen/balloon/common.h
743===================================================================
744--- /dev/null 1970-01-01 00:00:00.000000000 +0000
745+++ head-2008-11-25/drivers/xen/balloon/common.h 2007-06-12 13:13:44.000000000 +0200
746@@ -0,0 +1,58 @@
747+/******************************************************************************
748+ * balloon/common.h
749+ *
750+ * This program is free software; you can redistribute it and/or
751+ * modify it under the terms of the GNU General Public License version 2
752+ * as published by the Free Software Foundation; or, when distributed
753+ * separately from the Linux kernel or incorporated into other
754+ * software packages, subject to the following license:
755+ *
756+ * Permission is hereby granted, free of charge, to any person obtaining a copy
757+ * of this source file (the "Software"), to deal in the Software without
758+ * restriction, including without limitation the rights to use, copy, modify,
759+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
760+ * and to permit persons to whom the Software is furnished to do so, subject to
761+ * the following conditions:
762+ *
763+ * The above copyright notice and this permission notice shall be included in
764+ * all copies or substantial portions of the Software.
765+ *
766+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
767+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
768+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
769+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
770+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
771+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
772+ * IN THE SOFTWARE.
773+ */
774+
775+#ifndef __XEN_BALLOON_COMMON_H__
776+#define __XEN_BALLOON_COMMON_H__
777+
778+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
779+
780+struct balloon_stats {
781+ /* We aim for 'current allocation' == 'target allocation'. */
782+ unsigned long current_pages;
783+ unsigned long target_pages;
784+ /* We may hit the hard limit in Xen. If we do then we remember it. */
785+ unsigned long hard_limit;
786+ /*
787+ * Drivers may alter the memory reservation independently, but they
788+ * must inform the balloon driver so we avoid hitting the hard limit.
789+ */
790+ unsigned long driver_pages;
791+ /* Number of pages in high- and low-memory balloons. */
792+ unsigned long balloon_low;
793+ unsigned long balloon_high;
794+};
795+
796+extern struct balloon_stats balloon_stats;
797+#define bs balloon_stats
798+
799+int balloon_sysfs_init(void);
800+void balloon_sysfs_exit(void);
801+
802+void balloon_set_new_target(unsigned long target);
803+
804+#endif /* __XEN_BALLOON_COMMON_H__ */
805Index: head-2008-11-25/drivers/xen/balloon/sysfs.c
806===================================================================
807--- /dev/null 1970-01-01 00:00:00.000000000 +0000
808+++ head-2008-11-25/drivers/xen/balloon/sysfs.c 2008-04-02 12:34:02.000000000 +0200
809@@ -0,0 +1,170 @@
810+/******************************************************************************
811+ * balloon/sysfs.c
812+ *
813+ * Xen balloon driver - sysfs interfaces.
814+ *
815+ * This program is free software; you can redistribute it and/or
816+ * modify it under the terms of the GNU General Public License version 2
817+ * as published by the Free Software Foundation; or, when distributed
818+ * separately from the Linux kernel or incorporated into other
819+ * software packages, subject to the following license:
820+ *
821+ * Permission is hereby granted, free of charge, to any person obtaining a copy
822+ * of this source file (the "Software"), to deal in the Software without
823+ * restriction, including without limitation the rights to use, copy, modify,
824+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
825+ * and to permit persons to whom the Software is furnished to do so, subject to
826+ * the following conditions:
827+ *
828+ * The above copyright notice and this permission notice shall be included in
829+ * all copies or substantial portions of the Software.
830+ *
831+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
832+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
833+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
834+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
835+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
836+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
837+ * IN THE SOFTWARE.
838+ */
839+
840+#include <linux/capability.h>
841+#include <linux/errno.h>
842+#include <linux/stat.h>
843+#include <linux/string.h>
844+#include <linux/sysdev.h>
845+#include "common.h"
846+
847+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
848+#include <xen/platform-compat.h>
849+#endif
850+
851+#define BALLOON_CLASS_NAME "xen_memory"
852+
853+#define BALLOON_SHOW(name, format, args...) \
854+ static ssize_t show_##name(struct sys_device *dev, \
855+ char *buf) \
856+ { \
857+ return sprintf(buf, format, ##args); \
858+ } \
859+ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
860+
861+BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
862+BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
863+BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
864+BALLOON_SHOW(hard_limit_kb,
865+ (bs.hard_limit!=~0UL) ? "%lu\n" : "???\n",
866+ (bs.hard_limit!=~0UL) ? PAGES2KB(bs.hard_limit) : 0);
867+BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
868+
869+static ssize_t show_target_kb(struct sys_device *dev, char *buf)
870+{
871+ return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
872+}
873+
874+static ssize_t store_target_kb(struct sys_device *dev,
875+ const char *buf,
876+ size_t count)
877+{
878+ char memstring[64], *endchar;
879+ unsigned long long target_bytes;
880+
881+ if (!capable(CAP_SYS_ADMIN))
882+ return -EPERM;
883+
884+ if (count <= 1)
885+ return -EBADMSG; /* runt */
886+ if (count > sizeof(memstring))
887+ return -EFBIG; /* too long */
888+ strcpy(memstring, buf);
889+
890+ target_bytes = memparse(memstring, &endchar);
891+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
892+
893+ return count;
894+}
895+
896+static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
897+ show_target_kb, store_target_kb);
898+
899+static struct sysdev_attribute *balloon_attrs[] = {
900+ &attr_target_kb,
901+};
902+
903+static struct attribute *balloon_info_attrs[] = {
904+ &attr_current_kb.attr,
905+ &attr_low_kb.attr,
906+ &attr_high_kb.attr,
907+ &attr_hard_limit_kb.attr,
908+ &attr_driver_kb.attr,
909+ NULL
910+};
911+
912+static struct attribute_group balloon_info_group = {
913+ .name = "info",
914+ .attrs = balloon_info_attrs,
915+};
916+
917+static struct sysdev_class balloon_sysdev_class = {
918+ set_kset_name(BALLOON_CLASS_NAME),
919+};
920+
921+static struct sys_device balloon_sysdev;
922+
923+static int register_balloon(struct sys_device *sysdev)
924+{
925+ int i, error;
926+
927+ error = sysdev_class_register(&balloon_sysdev_class);
928+ if (error)
929+ return error;
930+
931+ sysdev->id = 0;
932+ sysdev->cls = &balloon_sysdev_class;
933+
934+ error = sysdev_register(sysdev);
935+ if (error) {
936+ sysdev_class_unregister(&balloon_sysdev_class);
937+ return error;
938+ }
939+
940+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
941+ error = sysdev_create_file(sysdev, balloon_attrs[i]);
942+ if (error)
943+ goto fail;
944+ }
945+
946+ error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
947+ if (error)
948+ goto fail;
949+
950+ return 0;
951+
952+ fail:
953+ while (--i >= 0)
954+ sysdev_remove_file(sysdev, balloon_attrs[i]);
955+ sysdev_unregister(sysdev);
956+ sysdev_class_unregister(&balloon_sysdev_class);
957+ return error;
958+}
959+
960+static void unregister_balloon(struct sys_device *sysdev)
961+{
962+ int i;
963+
964+ sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
965+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
966+ sysdev_remove_file(sysdev, balloon_attrs[i]);
967+ sysdev_unregister(sysdev);
968+ sysdev_class_unregister(&balloon_sysdev_class);
969+}
970+
971+int balloon_sysfs_init(void)
972+{
973+ return register_balloon(&balloon_sysdev);
974+}
975+
976+void balloon_sysfs_exit(void)
977+{
978+ unregister_balloon(&balloon_sysdev);
979+}
980Index: head-2008-11-25/drivers/xen/blkback/Makefile
981===================================================================
982--- /dev/null 1970-01-01 00:00:00.000000000 +0000
983+++ head-2008-11-25/drivers/xen/blkback/Makefile 2007-06-12 13:13:44.000000000 +0200
984@@ -0,0 +1,3 @@
985+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o
986+
987+blkbk-y := blkback.o xenbus.o interface.o vbd.o
988Index: head-2008-11-25/drivers/xen/blkback/blkback.c
989===================================================================
990--- /dev/null 1970-01-01 00:00:00.000000000 +0000
991+++ head-2008-11-25/drivers/xen/blkback/blkback.c 2008-11-10 11:44:21.000000000 +0100
992@@ -0,0 +1,656 @@
993+/******************************************************************************
994+ * arch/xen/drivers/blkif/backend/main.c
995+ *
996+ * Back-end of the driver for virtual block devices. This portion of the
997+ * driver exports a 'unified' block-device interface that can be accessed
998+ * by any operating system that implements a compatible front end. A
999+ * reference front-end implementation can be found in:
1000+ * arch/xen/drivers/blkif/frontend
1001+ *
1002+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
1003+ * Copyright (c) 2005, Christopher Clark
1004+ *
1005+ * This program is free software; you can redistribute it and/or
1006+ * modify it under the terms of the GNU General Public License version 2
1007+ * as published by the Free Software Foundation; or, when distributed
1008+ * separately from the Linux kernel or incorporated into other
1009+ * software packages, subject to the following license:
1010+ *
1011+ * Permission is hereby granted, free of charge, to any person obtaining a copy
1012+ * of this source file (the "Software"), to deal in the Software without
1013+ * restriction, including without limitation the rights to use, copy, modify,
1014+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1015+ * and to permit persons to whom the Software is furnished to do so, subject to
1016+ * the following conditions:
1017+ *
1018+ * The above copyright notice and this permission notice shall be included in
1019+ * all copies or substantial portions of the Software.
1020+ *
1021+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1022+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1023+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1024+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1025+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1026+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1027+ * IN THE SOFTWARE.
1028+ */
1029+
1030+#include <linux/spinlock.h>
1031+#include <linux/kthread.h>
1032+#include <linux/list.h>
1033+#include <linux/delay.h>
1034+#include <xen/balloon.h>
1035+#include <asm/hypervisor.h>
1036+#include "common.h"
1037+
1038+/*
1039+ * These are rather arbitrary. They are fairly large because adjacent requests
1040+ * pulled from a communication ring are quite likely to end up being part of
1041+ * the same scatter/gather request at the disc.
1042+ *
1043+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
1044+ *
1045+ * This will increase the chances of being able to write whole tracks.
1046+ * 64 should be enough to keep us competitive with Linux.
1047+ */
1048+static int blkif_reqs = 64;
1049+module_param_named(reqs, blkif_reqs, int, 0);
1050+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
1051+
1052+/* Run-time switchable: /sys/module/blkback/parameters/ */
1053+static unsigned int log_stats = 0;
1054+static unsigned int debug_lvl = 0;
1055+module_param(log_stats, int, 0644);
1056+module_param(debug_lvl, int, 0644);
1057+
1058+/*
1059+ * Each outstanding request that we've passed to the lower device layers has a
1060+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
1061+ * the pendcnt towards zero. When it hits zero, the specified domain has a
1062+ * response queued for it, with the saved 'id' passed back.
1063+ */
1064+typedef struct {
1065+ blkif_t *blkif;
1066+ u64 id;
1067+ int nr_pages;
1068+ atomic_t pendcnt;
1069+ unsigned short operation;
1070+ int status;
1071+ struct list_head free_list;
1072+} pending_req_t;
1073+
1074+static pending_req_t *pending_reqs;
1075+static struct list_head pending_free;
1076+static DEFINE_SPINLOCK(pending_free_lock);
1077+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
1078+
1079+#define BLKBACK_INVALID_HANDLE (~0)
1080+
1081+static struct page **pending_pages;
1082+static grant_handle_t *pending_grant_handles;
1083+
1084+static inline int vaddr_pagenr(pending_req_t *req, int seg)
1085+{
1086+ return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
1087+}
1088+
1089+static inline unsigned long vaddr(pending_req_t *req, int seg)
1090+{
1091+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
1092+ return (unsigned long)pfn_to_kaddr(pfn);
1093+}
1094+
1095+#define pending_handle(_req, _seg) \
1096+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
1097+
1098+
1099+static int do_block_io_op(blkif_t *blkif);
1100+static void dispatch_rw_block_io(blkif_t *blkif,
1101+ blkif_request_t *req,
1102+ pending_req_t *pending_req);
1103+static void make_response(blkif_t *blkif, u64 id,
1104+ unsigned short op, int st);
1105+
1106+/******************************************************************
1107+ * misc small helpers
1108+ */
1109+static pending_req_t* alloc_req(void)
1110+{
1111+ pending_req_t *req = NULL;
1112+ unsigned long flags;
1113+
1114+ spin_lock_irqsave(&pending_free_lock, flags);
1115+ if (!list_empty(&pending_free)) {
1116+ req = list_entry(pending_free.next, pending_req_t, free_list);
1117+ list_del(&req->free_list);
1118+ }
1119+ spin_unlock_irqrestore(&pending_free_lock, flags);
1120+ return req;
1121+}
1122+
1123+static void free_req(pending_req_t *req)
1124+{
1125+ unsigned long flags;
1126+ int was_empty;
1127+
1128+ spin_lock_irqsave(&pending_free_lock, flags);
1129+ was_empty = list_empty(&pending_free);
1130+ list_add(&req->free_list, &pending_free);
1131+ spin_unlock_irqrestore(&pending_free_lock, flags);
1132+ if (was_empty)
1133+ wake_up(&pending_free_wq);
1134+}
1135+
1136+static void unplug_queue(blkif_t *blkif)
1137+{
1138+ if (blkif->plug == NULL)
1139+ return;
1140+ if (blkif->plug->unplug_fn)
1141+ blkif->plug->unplug_fn(blkif->plug);
1142+ blk_put_queue(blkif->plug);
1143+ blkif->plug = NULL;
1144+}
1145+
1146+static void plug_queue(blkif_t *blkif, struct block_device *bdev)
1147+{
1148+ request_queue_t *q = bdev_get_queue(bdev);
1149+
1150+ if (q == blkif->plug)
1151+ return;
1152+ unplug_queue(blkif);
1153+ blk_get_queue(q);
1154+ blkif->plug = q;
1155+}
1156+
1157+static void fast_flush_area(pending_req_t *req)
1158+{
1159+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1160+ unsigned int i, invcount = 0;
1161+ grant_handle_t handle;
1162+ int ret;
1163+
1164+ for (i = 0; i < req->nr_pages; i++) {
1165+ handle = pending_handle(req, i);
1166+ if (handle == BLKBACK_INVALID_HANDLE)
1167+ continue;
1168+ gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
1169+ GNTMAP_host_map, handle);
1170+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
1171+ invcount++;
1172+ }
1173+
1174+ ret = HYPERVISOR_grant_table_op(
1175+ GNTTABOP_unmap_grant_ref, unmap, invcount);
1176+ BUG_ON(ret);
1177+}
1178+
1179+/******************************************************************
1180+ * SCHEDULER FUNCTIONS
1181+ */
1182+
1183+static void print_stats(blkif_t *blkif)
1184+{
1185+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
1186+ current->comm, blkif->st_oo_req,
1187+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
1188+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
1189+ blkif->st_rd_req = 0;
1190+ blkif->st_wr_req = 0;
1191+ blkif->st_oo_req = 0;
1192+}
1193+
1194+int blkif_schedule(void *arg)
1195+{
1196+ blkif_t *blkif = arg;
1197+
1198+ blkif_get(blkif);
1199+
1200+ if (debug_lvl)
1201+ printk(KERN_DEBUG "%s: started\n", current->comm);
1202+
1203+ while (!kthread_should_stop()) {
1204+ if (try_to_freeze())
1205+ continue;
1206+
1207+ wait_event_interruptible(
1208+ blkif->wq,
1209+ blkif->waiting_reqs || kthread_should_stop());
1210+ wait_event_interruptible(
1211+ pending_free_wq,
1212+ !list_empty(&pending_free) || kthread_should_stop());
1213+
1214+ blkif->waiting_reqs = 0;
1215+ smp_mb(); /* clear flag *before* checking for work */
1216+
1217+ if (do_block_io_op(blkif))
1218+ blkif->waiting_reqs = 1;
1219+ unplug_queue(blkif);
1220+
1221+ if (log_stats && time_after(jiffies, blkif->st_print))
1222+ print_stats(blkif);
1223+ }
1224+
1225+ if (log_stats)
1226+ print_stats(blkif);
1227+ if (debug_lvl)
1228+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
1229+
1230+ blkif->xenblkd = NULL;
1231+ blkif_put(blkif);
1232+
1233+ return 0;
1234+}
1235+
1236+/******************************************************************
1237+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
1238+ */
1239+
1240+static void __end_block_io_op(pending_req_t *pending_req, int error)
1241+{
1242+ /* An error fails the entire request. */
1243+ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
1244+ (error == -EOPNOTSUPP)) {
1245+ DPRINTK("blkback: write barrier op failed, not supported\n");
1246+ blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
1247+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
1248+ } else if (error) {
1249+ DPRINTK("Buffer not up-to-date at end of operation, "
1250+ "error=%d\n", error);
1251+ pending_req->status = BLKIF_RSP_ERROR;
1252+ }
1253+
1254+ if (atomic_dec_and_test(&pending_req->pendcnt)) {
1255+ fast_flush_area(pending_req);
1256+ make_response(pending_req->blkif, pending_req->id,
1257+ pending_req->operation, pending_req->status);
1258+ blkif_put(pending_req->blkif);
1259+ free_req(pending_req);
1260+ }
1261+}
1262+
1263+static int end_block_io_op(struct bio *bio, unsigned int done, int error)
1264+{
1265+ if (bio->bi_size != 0)
1266+ return 1;
1267+ __end_block_io_op(bio->bi_private, error);
1268+ bio_put(bio);
1269+ return error;
1270+}
1271+
1272+
1273+/******************************************************************************
1274+ * NOTIFICATION FROM GUEST OS.
1275+ */
1276+
1277+static void blkif_notify_work(blkif_t *blkif)
1278+{
1279+ blkif->waiting_reqs = 1;
1280+ wake_up(&blkif->wq);
1281+}
1282+
1283+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1284+{
1285+ blkif_notify_work(dev_id);
1286+ return IRQ_HANDLED;
1287+}
1288+
1289+
1290+
1291+/******************************************************************
1292+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
1293+ */
1294+
1295+static int do_block_io_op(blkif_t *blkif)
1296+{
1297+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1298+ blkif_request_t req;
1299+ pending_req_t *pending_req;
1300+ RING_IDX rc, rp;
1301+ int more_to_do = 0;
1302+
1303+ rc = blk_rings->common.req_cons;
1304+ rp = blk_rings->common.sring->req_prod;
1305+ rmb(); /* Ensure we see queued requests up to 'rp'. */
1306+
1307+ while (rc != rp) {
1308+
1309+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
1310+ break;
1311+
1312+ pending_req = alloc_req();
1313+ if (NULL == pending_req) {
1314+ blkif->st_oo_req++;
1315+ more_to_do = 1;
1316+ break;
1317+ }
1318+
1319+ if (kthread_should_stop()) {
1320+ more_to_do = 1;
1321+ break;
1322+ }
1323+
1324+ switch (blkif->blk_protocol) {
1325+ case BLKIF_PROTOCOL_NATIVE:
1326+ memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
1327+ break;
1328+ case BLKIF_PROTOCOL_X86_32:
1329+ blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
1330+ break;
1331+ case BLKIF_PROTOCOL_X86_64:
1332+ blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
1333+ break;
1334+ default:
1335+ BUG();
1336+ }
1337+ blk_rings->common.req_cons = ++rc; /* before make_response() */
1338+
1339+ /* Apply all sanity checks to /private copy/ of request. */
1340+ barrier();
1341+
1342+ switch (req.operation) {
1343+ case BLKIF_OP_READ:
1344+ blkif->st_rd_req++;
1345+ dispatch_rw_block_io(blkif, &req, pending_req);
1346+ break;
1347+ case BLKIF_OP_WRITE_BARRIER:
1348+ blkif->st_br_req++;
1349+ /* fall through */
1350+ case BLKIF_OP_WRITE:
1351+ blkif->st_wr_req++;
1352+ dispatch_rw_block_io(blkif, &req, pending_req);
1353+ break;
1354+ default:
1355+ /* A good sign something is wrong: sleep for a while to
1356+ * avoid excessive CPU consumption by a bad guest. */
1357+ msleep(1);
1358+ DPRINTK("error: unknown block io operation [%d]\n",
1359+ req.operation);
1360+ make_response(blkif, req.id, req.operation,
1361+ BLKIF_RSP_ERROR);
1362+ free_req(pending_req);
1363+ break;
1364+ }
1365+
1366+ /* Yield point for this unbounded loop. */
1367+ cond_resched();
1368+ }
1369+
1370+ return more_to_do;
1371+}
1372+
1373+static void dispatch_rw_block_io(blkif_t *blkif,
1374+ blkif_request_t *req,
1375+ pending_req_t *pending_req)
1376+{
1377+ extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
1378+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1379+ struct phys_req preq;
1380+ struct {
1381+ unsigned long buf; unsigned int nsec;
1382+ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1383+ unsigned int nseg;
1384+ struct bio *bio = NULL;
1385+ int ret, i;
1386+ int operation;
1387+
1388+ switch (req->operation) {
1389+ case BLKIF_OP_READ:
1390+ operation = READ;
1391+ break;
1392+ case BLKIF_OP_WRITE:
1393+ operation = WRITE;
1394+ break;
1395+ case BLKIF_OP_WRITE_BARRIER:
1396+ operation = WRITE_BARRIER;
1397+ break;
1398+ default:
1399+ operation = 0; /* make gcc happy */
1400+ BUG();
1401+ }
1402+
1403+ /* Check that number of segments is sane. */
1404+ nseg = req->nr_segments;
1405+ if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
1406+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
1407+ DPRINTK("Bad number of segments in request (%d)\n", nseg);
1408+ goto fail_response;
1409+ }
1410+
1411+ preq.dev = req->handle;
1412+ preq.sector_number = req->sector_number;
1413+ preq.nr_sects = 0;
1414+
1415+ pending_req->blkif = blkif;
1416+ pending_req->id = req->id;
1417+ pending_req->operation = req->operation;
1418+ pending_req->status = BLKIF_RSP_OKAY;
1419+ pending_req->nr_pages = nseg;
1420+
1421+ for (i = 0; i < nseg; i++) {
1422+ uint32_t flags;
1423+
1424+ seg[i].nsec = req->seg[i].last_sect -
1425+ req->seg[i].first_sect + 1;
1426+
1427+ if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
1428+ (req->seg[i].last_sect < req->seg[i].first_sect))
1429+ goto fail_response;
1430+ preq.nr_sects += seg[i].nsec;
1431+
1432+ flags = GNTMAP_host_map;
1433+ if (operation != READ)
1434+ flags |= GNTMAP_readonly;
1435+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
1436+ req->seg[i].gref, blkif->domid);
1437+ }
1438+
1439+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
1440+ BUG_ON(ret);
1441+
1442+ for (i = 0; i < nseg; i++) {
1443+ if (unlikely(map[i].status != 0)) {
1444+ DPRINTK("invalid buffer -- could not remap it\n");
1445+ map[i].handle = BLKBACK_INVALID_HANDLE;
1446+ ret |= 1;
1447+ }
1448+
1449+ pending_handle(pending_req, i) = map[i].handle;
1450+
1451+ if (ret)
1452+ continue;
1453+
1454+ set_phys_to_machine(__pa(vaddr(
1455+ pending_req, i)) >> PAGE_SHIFT,
1456+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
1457+ seg[i].buf = map[i].dev_bus_addr |
1458+ (req->seg[i].first_sect << 9);
1459+ }
1460+
1461+ if (ret)
1462+ goto fail_flush;
1463+
1464+ if (vbd_translate(&preq, blkif, operation) != 0) {
1465+ DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
1466+ operation == READ ? "read" : "write",
1467+ preq.sector_number,
1468+ preq.sector_number + preq.nr_sects, preq.dev);
1469+ goto fail_flush;
1470+ }
1471+
1472+ plug_queue(blkif, preq.bdev);
1473+ atomic_set(&pending_req->pendcnt, 1);
1474+ blkif_get(blkif);
1475+
1476+ for (i = 0; i < nseg; i++) {
1477+ if (((int)preq.sector_number|(int)seg[i].nsec) &
1478+ ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
1479+ DPRINTK("Misaligned I/O request from domain %d",
1480+ blkif->domid);
1481+ goto fail_put_bio;
1482+ }
1483+
1484+ while ((bio == NULL) ||
1485+ (bio_add_page(bio,
1486+ virt_to_page(vaddr(pending_req, i)),
1487+ seg[i].nsec << 9,
1488+ seg[i].buf & ~PAGE_MASK) == 0)) {
1489+ if (bio) {
1490+ atomic_inc(&pending_req->pendcnt);
1491+ submit_bio(operation, bio);
1492+ }
1493+
1494+ bio = bio_alloc(GFP_KERNEL, nseg-i);
1495+ if (unlikely(bio == NULL))
1496+ goto fail_put_bio;
1497+
1498+ bio->bi_bdev = preq.bdev;
1499+ bio->bi_private = pending_req;
1500+ bio->bi_end_io = end_block_io_op;
1501+ bio->bi_sector = preq.sector_number;
1502+ }
1503+
1504+ preq.sector_number += seg[i].nsec;
1505+ }
1506+
1507+ if (!bio) {
1508+ BUG_ON(operation != WRITE_BARRIER);
1509+ bio = bio_alloc(GFP_KERNEL, 0);
1510+ if (unlikely(bio == NULL))
1511+ goto fail_put_bio;
1512+
1513+ bio->bi_bdev = preq.bdev;
1514+ bio->bi_private = pending_req;
1515+ bio->bi_end_io = end_block_io_op;
1516+ bio->bi_sector = -1;
1517+ }
1518+
1519+ submit_bio(operation, bio);
1520+
1521+ if (operation == READ)
1522+ blkif->st_rd_sect += preq.nr_sects;
1523+ else if (operation == WRITE || operation == WRITE_BARRIER)
1524+ blkif->st_wr_sect += preq.nr_sects;
1525+
1526+ return;
1527+
1528+ fail_flush:
1529+ fast_flush_area(pending_req);
1530+ fail_response:
1531+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
1532+ free_req(pending_req);
1533+ msleep(1); /* back off a bit */
1534+ return;
1535+
1536+ fail_put_bio:
1537+ __end_block_io_op(pending_req, -EINVAL);
1538+ if (bio)
1539+ bio_put(bio);
1540+ unplug_queue(blkif);
1541+ msleep(1); /* back off a bit */
1542+ return;
1543+}
1544+
1545+
1546+
1547+/******************************************************************
1548+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
1549+ */
1550+
1551+
1552+static void make_response(blkif_t *blkif, u64 id,
1553+ unsigned short op, int st)
1554+{
1555+ blkif_response_t resp;
1556+ unsigned long flags;
1557+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1558+ int more_to_do = 0;
1559+ int notify;
1560+
1561+ resp.id = id;
1562+ resp.operation = op;
1563+ resp.status = st;
1564+
1565+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
1566+ /* Place on the response ring for the relevant domain. */
1567+ switch (blkif->blk_protocol) {
1568+ case BLKIF_PROTOCOL_NATIVE:
1569+ memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
1570+ &resp, sizeof(resp));
1571+ break;
1572+ case BLKIF_PROTOCOL_X86_32:
1573+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
1574+ &resp, sizeof(resp));
1575+ break;
1576+ case BLKIF_PROTOCOL_X86_64:
1577+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
1578+ &resp, sizeof(resp));
1579+ break;
1580+ default:
1581+ BUG();
1582+ }
1583+ blk_rings->common.rsp_prod_pvt++;
1584+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
1585+ if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
1586+ /*
1587+ * Tail check for pending requests. Allows frontend to avoid
1588+ * notifications if requests are already in flight (lower
1589+ * overheads and promotes batching).
1590+ */
1591+ RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
1592+
1593+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
1594+ more_to_do = 1;
1595+ }
1596+
1597+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
1598+
1599+ if (more_to_do)
1600+ blkif_notify_work(blkif);
1601+ if (notify)
1602+ notify_remote_via_irq(blkif->irq);
1603+}
1604+
1605+static int __init blkif_init(void)
1606+{
1607+ int i, mmap_pages;
1608+
1609+ if (!is_running_on_xen())
1610+ return -ENODEV;
1611+
1612+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
1613+
1614+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
1615+ blkif_reqs, GFP_KERNEL);
1616+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
1617+ mmap_pages, GFP_KERNEL);
1618+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
1619+
1620+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
1621+ goto out_of_memory;
1622+
1623+ for (i = 0; i < mmap_pages; i++)
1624+ pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
1625+
1626+ blkif_interface_init();
1627+
1628+ memset(pending_reqs, 0, sizeof(pending_reqs));
1629+ INIT_LIST_HEAD(&pending_free);
1630+
1631+ for (i = 0; i < blkif_reqs; i++)
1632+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
1633+
1634+ blkif_xenbus_init();
1635+
1636+ return 0;
1637+
1638+ out_of_memory:
1639+ kfree(pending_reqs);
1640+ kfree(pending_grant_handles);
1641+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
1642+ printk("%s: out of memory\n", __FUNCTION__);
1643+ return -ENOMEM;
1644+}
1645+
1646+module_init(blkif_init);
1647+
1648+MODULE_LICENSE("Dual BSD/GPL");
1649Index: head-2008-11-25/drivers/xen/blkback/common.h
1650===================================================================
1651--- /dev/null 1970-01-01 00:00:00.000000000 +0000
1652+++ head-2008-11-25/drivers/xen/blkback/common.h 2008-05-08 14:02:04.000000000 +0200
1653@@ -0,0 +1,139 @@
1654+/*
1655+ * This program is free software; you can redistribute it and/or
1656+ * modify it under the terms of the GNU General Public License version 2
1657+ * as published by the Free Software Foundation; or, when distributed
1658+ * separately from the Linux kernel or incorporated into other
1659+ * software packages, subject to the following license:
1660+ *
1661+ * Permission is hereby granted, free of charge, to any person obtaining a copy
1662+ * of this source file (the "Software"), to deal in the Software without
1663+ * restriction, including without limitation the rights to use, copy, modify,
1664+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1665+ * and to permit persons to whom the Software is furnished to do so, subject to
1666+ * the following conditions:
1667+ *
1668+ * The above copyright notice and this permission notice shall be included in
1669+ * all copies or substantial portions of the Software.
1670+ *
1671+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1672+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1673+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1674+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1675+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1676+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1677+ * IN THE SOFTWARE.
1678+ */
1679+
1680+#ifndef __BLKIF__BACKEND__COMMON_H__
1681+#define __BLKIF__BACKEND__COMMON_H__
1682+
1683+#include <linux/version.h>
1684+#include <linux/module.h>
1685+#include <linux/interrupt.h>
1686+#include <linux/slab.h>
1687+#include <linux/blkdev.h>
1688+#include <linux/vmalloc.h>
1689+#include <linux/wait.h>
1690+#include <asm/io.h>
1691+#include <asm/setup.h>
1692+#include <asm/pgalloc.h>
1693+#include <xen/evtchn.h>
1694+#include <asm/hypervisor.h>
1695+#include <xen/blkif.h>
1696+#include <xen/gnttab.h>
1697+#include <xen/driver_util.h>
1698+#include <xen/xenbus.h>
1699+
1700+#define DPRINTK(_f, _a...) \
1701+ pr_debug("(file=%s, line=%d) " _f, \
1702+ __FILE__ , __LINE__ , ## _a )
1703+
1704+struct vbd {
1705+ blkif_vdev_t handle; /* what the domain refers to this vbd as */
1706+ unsigned char readonly; /* Non-zero -> read-only */
1707+ unsigned char type; /* VDISK_xxx */
1708+ u32 pdevice; /* phys device that this vbd maps to */
1709+ struct block_device *bdev;
1710+};
1711+
1712+struct backend_info;
1713+
1714+typedef struct blkif_st {
1715+ /* Unique identifier for this interface. */
1716+ domid_t domid;
1717+ unsigned int handle;
1718+ /* Physical parameters of the comms window. */
1719+ unsigned int irq;
1720+ /* Comms information. */
1721+ enum blkif_protocol blk_protocol;
1722+ blkif_back_rings_t blk_rings;
1723+ struct vm_struct *blk_ring_area;
1724+ /* The VBD attached to this interface. */
1725+ struct vbd vbd;
1726+ /* Back pointer to the backend_info. */
1727+ struct backend_info *be;
1728+ /* Private fields. */
1729+ spinlock_t blk_ring_lock;
1730+ atomic_t refcnt;
1731+
1732+ wait_queue_head_t wq;
1733+ struct task_struct *xenblkd;
1734+ unsigned int waiting_reqs;
1735+ request_queue_t *plug;
1736+
1737+ /* statistics */
1738+ unsigned long st_print;
1739+ int st_rd_req;
1740+ int st_wr_req;
1741+ int st_oo_req;
1742+ int st_br_req;
1743+ int st_rd_sect;
1744+ int st_wr_sect;
1745+
1746+ wait_queue_head_t waiting_to_free;
1747+
1748+ grant_handle_t shmem_handle;
1749+ grant_ref_t shmem_ref;
1750+} blkif_t;
1751+
1752+blkif_t *blkif_alloc(domid_t domid);
1753+void blkif_disconnect(blkif_t *blkif);
1754+void blkif_free(blkif_t *blkif);
1755+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
1756+
1757+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
1758+#define blkif_put(_b) \
1759+ do { \
1760+ if (atomic_dec_and_test(&(_b)->refcnt)) \
1761+ wake_up(&(_b)->waiting_to_free);\
1762+ } while (0)
1763+
1764+/* Create a vbd. */
1765+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
1766+ unsigned minor, int readonly, int cdrom);
1767+void vbd_free(struct vbd *vbd);
1768+
1769+unsigned long long vbd_size(struct vbd *vbd);
1770+unsigned int vbd_info(struct vbd *vbd);
1771+unsigned long vbd_secsize(struct vbd *vbd);
1772+
1773+struct phys_req {
1774+ unsigned short dev;
1775+ unsigned short nr_sects;
1776+ struct block_device *bdev;
1777+ blkif_sector_t sector_number;
1778+};
1779+
1780+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
1781+
1782+void blkif_interface_init(void);
1783+
1784+void blkif_xenbus_init(void);
1785+
1786+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
1787+int blkif_schedule(void *arg);
1788+
1789+int blkback_barrier(struct xenbus_transaction xbt,
1790+ struct backend_info *be, int state);
1791+
1792+#endif /* __BLKIF__BACKEND__COMMON_H__ */
1793Index: head-2008-11-25/drivers/xen/blkback/interface.c
1794===================================================================
1795--- /dev/null 1970-01-01 00:00:00.000000000 +0000
1796+++ head-2008-11-25/drivers/xen/blkback/interface.c 2007-06-12 13:13:44.000000000 +0200
1797@@ -0,0 +1,181 @@
1798+/******************************************************************************
1799+ * arch/xen/drivers/blkif/backend/interface.c
1800+ *
1801+ * Block-device interface management.
1802+ *
1803+ * Copyright (c) 2004, Keir Fraser
1804+ *
1805+ * This program is free software; you can redistribute it and/or
1806+ * modify it under the terms of the GNU General Public License version 2
1807+ * as published by the Free Software Foundation; or, when distributed
1808+ * separately from the Linux kernel or incorporated into other
1809+ * software packages, subject to the following license:
1810+ *
1811+ * Permission is hereby granted, free of charge, to any person obtaining a copy
1812+ * of this source file (the "Software"), to deal in the Software without
1813+ * restriction, including without limitation the rights to use, copy, modify,
1814+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1815+ * and to permit persons to whom the Software is furnished to do so, subject to
1816+ * the following conditions:
1817+ *
1818+ * The above copyright notice and this permission notice shall be included in
1819+ * all copies or substantial portions of the Software.
1820+ *
1821+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1822+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1823+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1824+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1825+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1826+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1827+ * IN THE SOFTWARE.
1828+ */
1829+
1830+#include "common.h"
1831+#include <xen/evtchn.h>
1832+#include <linux/kthread.h>
1833+
1834+static kmem_cache_t *blkif_cachep;
1835+
1836+blkif_t *blkif_alloc(domid_t domid)
1837+{
1838+ blkif_t *blkif;
1839+
1840+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
1841+ if (!blkif)
1842+ return ERR_PTR(-ENOMEM);
1843+
1844+ memset(blkif, 0, sizeof(*blkif));
1845+ blkif->domid = domid;
1846+ spin_lock_init(&blkif->blk_ring_lock);
1847+ atomic_set(&blkif->refcnt, 1);
1848+ init_waitqueue_head(&blkif->wq);
1849+ blkif->st_print = jiffies;
1850+ init_waitqueue_head(&blkif->waiting_to_free);
1851+
1852+ return blkif;
1853+}
1854+
1855+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
1856+{
1857+ struct gnttab_map_grant_ref op;
1858+
1859+ gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
1860+ GNTMAP_host_map, shared_page, blkif->domid);
1861+
1862+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
1863+ BUG();
1864+
1865+ if (op.status) {
1866+ DPRINTK(" Grant table operation failure !\n");
1867+ return op.status;
1868+ }
1869+
1870+ blkif->shmem_ref = shared_page;
1871+ blkif->shmem_handle = op.handle;
1872+
1873+ return 0;
1874+}
1875+
1876+static void unmap_frontend_page(blkif_t *blkif)
1877+{
1878+ struct gnttab_unmap_grant_ref op;
1879+
1880+ gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
1881+ GNTMAP_host_map, blkif->shmem_handle);
1882+
1883+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
1884+ BUG();
1885+}
1886+
1887+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
1888+{
1889+ int err;
1890+
1891+ /* Already connected through? */
1892+ if (blkif->irq)
1893+ return 0;
1894+
1895+ if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
1896+ return -ENOMEM;
1897+
1898+ err = map_frontend_page(blkif, shared_page);
1899+ if (err) {
1900+ free_vm_area(blkif->blk_ring_area);
1901+ return err;
1902+ }
1903+
1904+ switch (blkif->blk_protocol) {
1905+ case BLKIF_PROTOCOL_NATIVE:
1906+ {
1907+ blkif_sring_t *sring;
1908+ sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
1909+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
1910+ break;
1911+ }
1912+ case BLKIF_PROTOCOL_X86_32:
1913+ {
1914+ blkif_x86_32_sring_t *sring_x86_32;
1915+ sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
1916+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
1917+ break;
1918+ }
1919+ case BLKIF_PROTOCOL_X86_64:
1920+ {
1921+ blkif_x86_64_sring_t *sring_x86_64;
1922+ sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
1923+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
1924+ break;
1925+ }
1926+ default:
1927+ BUG();
1928+ }
1929+
1930+ err = bind_interdomain_evtchn_to_irqhandler(
1931+ blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
1932+ if (err < 0)
1933+ {
1934+ unmap_frontend_page(blkif);
1935+ free_vm_area(blkif->blk_ring_area);
1936+ blkif->blk_rings.common.sring = NULL;
1937+ return err;
1938+ }
1939+ blkif->irq = err;
1940+
1941+ return 0;
1942+}
1943+
1944+void blkif_disconnect(blkif_t *blkif)
1945+{
1946+ if (blkif->xenblkd) {
1947+ kthread_stop(blkif->xenblkd);
1948+ blkif->xenblkd = NULL;
1949+ }
1950+
1951+ atomic_dec(&blkif->refcnt);
1952+ wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
1953+ atomic_inc(&blkif->refcnt);
1954+
1955+ if (blkif->irq) {
1956+ unbind_from_irqhandler(blkif->irq, blkif);
1957+ blkif->irq = 0;
1958+ }
1959+
1960+ if (blkif->blk_rings.common.sring) {
1961+ unmap_frontend_page(blkif);
1962+ free_vm_area(blkif->blk_ring_area);
1963+ blkif->blk_rings.common.sring = NULL;
1964+ }
1965+}
1966+
1967+void blkif_free(blkif_t *blkif)
1968+{
1969+ if (!atomic_dec_and_test(&blkif->refcnt))
1970+ BUG();
1971+ kmem_cache_free(blkif_cachep, blkif);
1972+}
1973+
1974+void __init blkif_interface_init(void)
1975+{
1976+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
1977+ 0, 0, NULL, NULL);
1978+}
1979Index: head-2008-11-25/drivers/xen/blkback/vbd.c
1980===================================================================
1981--- /dev/null 1970-01-01 00:00:00.000000000 +0000
1982+++ head-2008-11-25/drivers/xen/blkback/vbd.c 2008-05-08 14:02:04.000000000 +0200
1983@@ -0,0 +1,118 @@
1984+/******************************************************************************
1985+ * blkback/vbd.c
1986+ *
1987+ * Routines for managing virtual block devices (VBDs).
1988+ *
1989+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
1990+ *
1991+ * This program is free software; you can redistribute it and/or
1992+ * modify it under the terms of the GNU General Public License version 2
1993+ * as published by the Free Software Foundation; or, when distributed
1994+ * separately from the Linux kernel or incorporated into other
1995+ * software packages, subject to the following license:
1996+ *
1997+ * Permission is hereby granted, free of charge, to any person obtaining a copy
1998+ * of this source file (the "Software"), to deal in the Software without
1999+ * restriction, including without limitation the rights to use, copy, modify,
2000+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2001+ * and to permit persons to whom the Software is furnished to do so, subject to
2002+ * the following conditions:
2003+ *
2004+ * The above copyright notice and this permission notice shall be included in
2005+ * all copies or substantial portions of the Software.
2006+ *
2007+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2008+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2009+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2010+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2011+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2012+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2013+ * IN THE SOFTWARE.
2014+ */
2015+
2016+#include "common.h"
2017+
2018+#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
2019+ (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
2020+
2021+unsigned long long vbd_size(struct vbd *vbd)
2022+{
2023+ return vbd_sz(vbd);
2024+}
2025+
2026+unsigned int vbd_info(struct vbd *vbd)
2027+{
2028+ return vbd->type | (vbd->readonly?VDISK_READONLY:0);
2029+}
2030+
2031+unsigned long vbd_secsize(struct vbd *vbd)
2032+{
2033+ return bdev_hardsect_size(vbd->bdev);
2034+}
2035+
2036+int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
2037+ unsigned minor, int readonly, int cdrom)
2038+{
2039+ struct vbd *vbd;
2040+ struct block_device *bdev;
2041+
2042+ vbd = &blkif->vbd;
2043+ vbd->handle = handle;
2044+ vbd->readonly = readonly;
2045+ vbd->type = 0;
2046+
2047+ vbd->pdevice = MKDEV(major, minor);
2048+
2049+ bdev = open_by_devnum(vbd->pdevice,
2050+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
2051+
2052+ if (IS_ERR(bdev)) {
2053+ DPRINTK("vbd_creat: device %08x could not be opened.\n",
2054+ vbd->pdevice);
2055+ return -ENOENT;
2056+ }
2057+
2058+ vbd->bdev = bdev;
2059+
2060+ if (vbd->bdev->bd_disk == NULL) {
2061+ DPRINTK("vbd_creat: device %08x doesn't exist.\n",
2062+ vbd->pdevice);
2063+ vbd_free(vbd);
2064+ return -ENOENT;
2065+ }
2066+
2067+ if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
2068+ vbd->type |= VDISK_CDROM;
2069+ if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
2070+ vbd->type |= VDISK_REMOVABLE;
2071+
2072+ DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
2073+ handle, blkif->domid);
2074+ return 0;
2075+}
2076+
2077+void vbd_free(struct vbd *vbd)
2078+{
2079+ if (vbd->bdev)
2080+ blkdev_put(vbd->bdev);
2081+ vbd->bdev = NULL;
2082+}
2083+
2084+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
2085+{
2086+ struct vbd *vbd = &blkif->vbd;
2087+ int rc = -EACCES;
2088+
2089+ if ((operation != READ) && vbd->readonly)
2090+ goto out;
2091+
2092+ if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
2093+ goto out;
2094+
2095+ req->dev = vbd->pdevice;
2096+ req->bdev = vbd->bdev;
2097+ rc = 0;
2098+
2099+ out:
2100+ return rc;
2101+}
2102Index: head-2008-11-25/drivers/xen/blkback/xenbus.c
2103===================================================================
2104--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2105+++ head-2008-11-25/drivers/xen/blkback/xenbus.c 2008-05-08 14:02:04.000000000 +0200
2106@@ -0,0 +1,541 @@
2107+/* Xenbus code for blkif backend
2108+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
2109+ Copyright (C) 2005 XenSource Ltd
2110+
2111+ This program is free software; you can redistribute it and/or modify
2112+ it under the terms of the GNU General Public License as published by
2113+ the Free Software Foundation; either version 2 of the License, or
2114+ (at your option) any later version.
2115+
2116+ This program is distributed in the hope that it will be useful,
2117+ but WITHOUT ANY WARRANTY; without even the implied warranty of
2118+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2119+ GNU General Public License for more details.
2120+
2121+ You should have received a copy of the GNU General Public License
2122+ along with this program; if not, write to the Free Software
2123+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2124+*/
2125+
2126+#include <stdarg.h>
2127+#include <linux/module.h>
2128+#include <linux/kthread.h>
2129+#include "common.h"
2130+
2131+#undef DPRINTK
2132+#define DPRINTK(fmt, args...) \
2133+ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
2134+ __FUNCTION__, __LINE__, ##args)
2135+
2136+struct backend_info
2137+{
2138+ struct xenbus_device *dev;
2139+ blkif_t *blkif;
2140+ struct xenbus_watch backend_watch;
2141+ unsigned major;
2142+ unsigned minor;
2143+ char *mode;
2144+};
2145+
2146+static void connect(struct backend_info *);
2147+static int connect_ring(struct backend_info *);
2148+static void backend_changed(struct xenbus_watch *, const char **,
2149+ unsigned int);
2150+
2151+static int blkback_name(blkif_t *blkif, char *buf)
2152+{
2153+ char *devpath, *devname;
2154+ struct xenbus_device *dev = blkif->be->dev;
2155+
2156+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
2157+ if (IS_ERR(devpath))
2158+ return PTR_ERR(devpath);
2159+
2160+ if ((devname = strstr(devpath, "/dev/")) != NULL)
2161+ devname += strlen("/dev/");
2162+ else
2163+ devname = devpath;
2164+
2165+ snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
2166+ kfree(devpath);
2167+
2168+ return 0;
2169+}
2170+
2171+static void update_blkif_status(blkif_t *blkif)
2172+{
2173+ int err;
2174+ char name[TASK_COMM_LEN];
2175+
2176+ /* Not ready to connect? */
2177+ if (!blkif->irq || !blkif->vbd.bdev)
2178+ return;
2179+
2180+ /* Already connected? */
2181+ if (blkif->be->dev->state == XenbusStateConnected)
2182+ return;
2183+
2184+ /* Attempt to connect: exit if we fail to. */
2185+ connect(blkif->be);
2186+ if (blkif->be->dev->state != XenbusStateConnected)
2187+ return;
2188+
2189+ err = blkback_name(blkif, name);
2190+ if (err) {
2191+ xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
2192+ return;
2193+ }
2194+
2195+ blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
2196+ if (IS_ERR(blkif->xenblkd)) {
2197+ err = PTR_ERR(blkif->xenblkd);
2198+ blkif->xenblkd = NULL;
2199+ xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
2200+ }
2201+}
2202+
2203+
2204+/****************************************************************
2205+ * sysfs interface for VBD I/O requests
2206+ */
2207+
2208+#define VBD_SHOW(name, format, args...) \
2209+ static ssize_t show_##name(struct device *_dev, \
2210+ struct device_attribute *attr, \
2211+ char *buf) \
2212+ { \
2213+ struct xenbus_device *dev = to_xenbus_device(_dev); \
2214+ struct backend_info *be = dev->dev.driver_data; \
2215+ \
2216+ return sprintf(buf, format, ##args); \
2217+ } \
2218+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
2219+
2220+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
2221+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
2222+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
2223+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
2224+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
2225+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
2226+
2227+static struct attribute *vbdstat_attrs[] = {
2228+ &dev_attr_oo_req.attr,
2229+ &dev_attr_rd_req.attr,
2230+ &dev_attr_wr_req.attr,
2231+ &dev_attr_br_req.attr,
2232+ &dev_attr_rd_sect.attr,
2233+ &dev_attr_wr_sect.attr,
2234+ NULL
2235+};
2236+
2237+static struct attribute_group vbdstat_group = {
2238+ .name = "statistics",
2239+ .attrs = vbdstat_attrs,
2240+};
2241+
2242+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
2243+VBD_SHOW(mode, "%s\n", be->mode);
2244+
2245+int xenvbd_sysfs_addif(struct xenbus_device *dev)
2246+{
2247+ int error;
2248+
2249+ error = device_create_file(&dev->dev, &dev_attr_physical_device);
2250+ if (error)
2251+ goto fail1;
2252+
2253+ error = device_create_file(&dev->dev, &dev_attr_mode);
2254+ if (error)
2255+ goto fail2;
2256+
2257+ error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
2258+ if (error)
2259+ goto fail3;
2260+
2261+ return 0;
2262+
2263+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2264+fail2: device_remove_file(&dev->dev, &dev_attr_mode);
2265+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
2266+ return error;
2267+}
2268+
2269+void xenvbd_sysfs_delif(struct xenbus_device *dev)
2270+{
2271+ sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2272+ device_remove_file(&dev->dev, &dev_attr_mode);
2273+ device_remove_file(&dev->dev, &dev_attr_physical_device);
2274+}
2275+
2276+static int blkback_remove(struct xenbus_device *dev)
2277+{
2278+ struct backend_info *be = dev->dev.driver_data;
2279+
2280+ DPRINTK("");
2281+
2282+ if (be->major || be->minor)
2283+ xenvbd_sysfs_delif(dev);
2284+
2285+ if (be->backend_watch.node) {
2286+ unregister_xenbus_watch(&be->backend_watch);
2287+ kfree(be->backend_watch.node);
2288+ be->backend_watch.node = NULL;
2289+ }
2290+
2291+ if (be->blkif) {
2292+ blkif_disconnect(be->blkif);
2293+ vbd_free(&be->blkif->vbd);
2294+ blkif_free(be->blkif);
2295+ be->blkif = NULL;
2296+ }
2297+
2298+ kfree(be);
2299+ dev->dev.driver_data = NULL;
2300+ return 0;
2301+}
2302+
2303+int blkback_barrier(struct xenbus_transaction xbt,
2304+ struct backend_info *be, int state)
2305+{
2306+ struct xenbus_device *dev = be->dev;
2307+ int err;
2308+
2309+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
2310+ "%d", state);
2311+ if (err)
2312+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
2313+
2314+ return err;
2315+}
2316+
2317+/**
2318+ * Entry point to this code when a new device is created. Allocate the basic
2319+ * structures, and watch the store waiting for the hotplug scripts to tell us
2320+ * the device's physical major and minor numbers. Switch to InitWait.
2321+ */
2322+static int blkback_probe(struct xenbus_device *dev,
2323+ const struct xenbus_device_id *id)
2324+{
2325+ int err;
2326+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
2327+ GFP_KERNEL);
2328+ if (!be) {
2329+ xenbus_dev_fatal(dev, -ENOMEM,
2330+ "allocating backend structure");
2331+ return -ENOMEM;
2332+ }
2333+ be->dev = dev;
2334+ dev->dev.driver_data = be;
2335+
2336+ be->blkif = blkif_alloc(dev->otherend_id);
2337+ if (IS_ERR(be->blkif)) {
2338+ err = PTR_ERR(be->blkif);
2339+ be->blkif = NULL;
2340+ xenbus_dev_fatal(dev, err, "creating block interface");
2341+ goto fail;
2342+ }
2343+
2344+ /* setup back pointer */
2345+ be->blkif->be = be;
2346+
2347+ err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
2348+ &be->backend_watch, backend_changed);
2349+ if (err)
2350+ goto fail;
2351+
2352+ err = xenbus_switch_state(dev, XenbusStateInitWait);
2353+ if (err)
2354+ goto fail;
2355+
2356+ return 0;
2357+
2358+fail:
2359+ DPRINTK("failed");
2360+ blkback_remove(dev);
2361+ return err;
2362+}
2363+
2364+
2365+/**
2366+ * Callback received when the hotplug scripts have placed the physical-device
2367+ * node. Read it and the mode node, and create a vbd. If the frontend is
2368+ * ready, connect.
2369+ */
2370+static void backend_changed(struct xenbus_watch *watch,
2371+ const char **vec, unsigned int len)
2372+{
2373+ int err;
2374+ unsigned major;
2375+ unsigned minor;
2376+ struct backend_info *be
2377+ = container_of(watch, struct backend_info, backend_watch);
2378+ struct xenbus_device *dev = be->dev;
2379+ int cdrom = 0;
2380+ char *device_type;
2381+
2382+ DPRINTK("");
2383+
2384+ err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
2385+ &major, &minor);
2386+ if (XENBUS_EXIST_ERR(err)) {
2387+ /* Since this watch will fire once immediately after it is
2388+ registered, we expect this. Ignore it, and wait for the
2389+ hotplug scripts. */
2390+ return;
2391+ }
2392+ if (err != 2) {
2393+ xenbus_dev_fatal(dev, err, "reading physical-device");
2394+ return;
2395+ }
2396+
2397+ if ((be->major || be->minor) &&
2398+ ((be->major != major) || (be->minor != minor))) {
2399+ printk(KERN_WARNING
2400+ "blkback: changing physical device (from %x:%x to "
2401+ "%x:%x) not supported.\n", be->major, be->minor,
2402+ major, minor);
2403+ return;
2404+ }
2405+
2406+ be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
2407+ if (IS_ERR(be->mode)) {
2408+ err = PTR_ERR(be->mode);
2409+ be->mode = NULL;
2410+ xenbus_dev_fatal(dev, err, "reading mode");
2411+ return;
2412+ }
2413+
2414+ device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
2415+ if (!IS_ERR(device_type)) {
2416+ cdrom = strcmp(device_type, "cdrom") == 0;
2417+ kfree(device_type);
2418+ }
2419+
2420+ if (be->major == 0 && be->minor == 0) {
2421+ /* Front end dir is a number, which is used as the handle. */
2422+
2423+ char *p = strrchr(dev->otherend, '/') + 1;
2424+ long handle = simple_strtoul(p, NULL, 0);
2425+
2426+ be->major = major;
2427+ be->minor = minor;
2428+
2429+ err = vbd_create(be->blkif, handle, major, minor,
2430+ (NULL == strchr(be->mode, 'w')), cdrom);
2431+ if (err) {
2432+ be->major = be->minor = 0;
2433+ xenbus_dev_fatal(dev, err, "creating vbd structure");
2434+ return;
2435+ }
2436+
2437+ err = xenvbd_sysfs_addif(dev);
2438+ if (err) {
2439+ vbd_free(&be->blkif->vbd);
2440+ be->major = be->minor = 0;
2441+ xenbus_dev_fatal(dev, err, "creating sysfs entries");
2442+ return;
2443+ }
2444+
2445+ /* We're potentially connected now */
2446+ update_blkif_status(be->blkif);
2447+ }
2448+}
2449+
2450+
2451+/**
2452+ * Callback received when the frontend's state changes.
2453+ */
2454+static void frontend_changed(struct xenbus_device *dev,
2455+ enum xenbus_state frontend_state)
2456+{
2457+ struct backend_info *be = dev->dev.driver_data;
2458+ int err;
2459+
2460+ DPRINTK("%s", xenbus_strstate(frontend_state));
2461+
2462+ switch (frontend_state) {
2463+ case XenbusStateInitialising:
2464+ if (dev->state == XenbusStateClosed) {
2465+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
2466+ __FUNCTION__, dev->nodename);
2467+ xenbus_switch_state(dev, XenbusStateInitWait);
2468+ }
2469+ break;
2470+
2471+ case XenbusStateInitialised:
2472+ case XenbusStateConnected:
2473+ /* Ensure we connect even when two watches fire in
2474+ close successsion and we miss the intermediate value
2475+ of frontend_state. */
2476+ if (dev->state == XenbusStateConnected)
2477+ break;
2478+
2479+ err = connect_ring(be);
2480+ if (err)
2481+ break;
2482+ update_blkif_status(be->blkif);
2483+ break;
2484+
2485+ case XenbusStateClosing:
2486+ blkif_disconnect(be->blkif);
2487+ xenbus_switch_state(dev, XenbusStateClosing);
2488+ break;
2489+
2490+ case XenbusStateClosed:
2491+ xenbus_switch_state(dev, XenbusStateClosed);
2492+ if (xenbus_dev_is_online(dev))
2493+ break;
2494+ /* fall through if not online */
2495+ case XenbusStateUnknown:
2496+ device_unregister(&dev->dev);
2497+ break;
2498+
2499+ default:
2500+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
2501+ frontend_state);
2502+ break;
2503+ }
2504+}
2505+
2506+
2507+/* ** Connection ** */
2508+
2509+
2510+/**
2511+ * Write the physical details regarding the block device to the store, and
2512+ * switch to Connected state.
2513+ */
2514+static void connect(struct backend_info *be)
2515+{
2516+ struct xenbus_transaction xbt;
2517+ int err;
2518+ struct xenbus_device *dev = be->dev;
2519+
2520+ DPRINTK("%s", dev->otherend);
2521+
2522+ /* Supply the information about the device the frontend needs */
2523+again:
2524+ err = xenbus_transaction_start(&xbt);
2525+ if (err) {
2526+ xenbus_dev_fatal(dev, err, "starting transaction");
2527+ return;
2528+ }
2529+
2530+ err = blkback_barrier(xbt, be, 1);
2531+ if (err)
2532+ goto abort;
2533+
2534+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
2535+ vbd_size(&be->blkif->vbd));
2536+ if (err) {
2537+ xenbus_dev_fatal(dev, err, "writing %s/sectors",
2538+ dev->nodename);
2539+ goto abort;
2540+ }
2541+
2542+ /* FIXME: use a typename instead */
2543+ err = xenbus_printf(xbt, dev->nodename, "info", "%u",
2544+ vbd_info(&be->blkif->vbd));
2545+ if (err) {
2546+ xenbus_dev_fatal(dev, err, "writing %s/info",
2547+ dev->nodename);
2548+ goto abort;
2549+ }
2550+ err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
2551+ vbd_secsize(&be->blkif->vbd));
2552+ if (err) {
2553+ xenbus_dev_fatal(dev, err, "writing %s/sector-size",
2554+ dev->nodename);
2555+ goto abort;
2556+ }
2557+
2558+ err = xenbus_transaction_end(xbt, 0);
2559+ if (err == -EAGAIN)
2560+ goto again;
2561+ if (err)
2562+ xenbus_dev_fatal(dev, err, "ending transaction");
2563+
2564+ err = xenbus_switch_state(dev, XenbusStateConnected);
2565+ if (err)
2566+ xenbus_dev_fatal(dev, err, "switching to Connected state",
2567+ dev->nodename);
2568+
2569+ return;
2570+ abort:
2571+ xenbus_transaction_end(xbt, 1);
2572+}
2573+
2574+
2575+static int connect_ring(struct backend_info *be)
2576+{
2577+ struct xenbus_device *dev = be->dev;
2578+ unsigned long ring_ref;
2579+ unsigned int evtchn;
2580+ char protocol[64] = "";
2581+ int err;
2582+
2583+ DPRINTK("%s", dev->otherend);
2584+
2585+ err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
2586+ "event-channel", "%u", &evtchn, NULL);
2587+ if (err) {
2588+ xenbus_dev_fatal(dev, err,
2589+ "reading %s/ring-ref and event-channel",
2590+ dev->otherend);
2591+ return err;
2592+ }
2593+
2594+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
2595+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
2596+ "%63s", protocol, NULL);
2597+ if (err)
2598+ strcpy(protocol, "unspecified, assuming native");
2599+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
2600+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
2601+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
2602+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
2603+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
2604+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
2605+ else {
2606+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
2607+ return -1;
2608+ }
2609+ printk(KERN_INFO
2610+ "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
2611+ ring_ref, evtchn, be->blkif->blk_protocol, protocol);
2612+
2613+ /* Map the shared frame, irq etc. */
2614+ err = blkif_map(be->blkif, ring_ref, evtchn);
2615+ if (err) {
2616+ xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
2617+ ring_ref, evtchn);
2618+ return err;
2619+ }
2620+
2621+ return 0;
2622+}
2623+
2624+
2625+/* ** Driver Registration ** */
2626+
2627+
2628+static const struct xenbus_device_id blkback_ids[] = {
2629+ { "vbd" },
2630+ { "" }
2631+};
2632+
2633+
2634+static struct xenbus_driver blkback = {
2635+ .name = "vbd",
2636+ .owner = THIS_MODULE,
2637+ .ids = blkback_ids,
2638+ .probe = blkback_probe,
2639+ .remove = blkback_remove,
2640+ .otherend_changed = frontend_changed
2641+};
2642+
2643+
2644+void blkif_xenbus_init(void)
2645+{
2646+ xenbus_register_backend(&blkback);
2647+}
2648Index: head-2008-11-25/drivers/xen/blkfront/Makefile
2649===================================================================
2650--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2651+++ head-2008-11-25/drivers/xen/blkfront/Makefile 2007-06-12 13:13:44.000000000 +0200
2652@@ -0,0 +1,5 @@
2653+
2654+obj-$(CONFIG_XEN_BLKDEV_FRONTEND) := xenblk.o
2655+
2656+xenblk-objs := blkfront.o vbd.o
2657+
2658Index: head-2008-11-25/drivers/xen/blkfront/blkfront.c
2659===================================================================
2660--- /dev/null 1970-01-01 00:00:00.000000000 +0000
2661+++ head-2008-11-25/drivers/xen/blkfront/blkfront.c 2008-08-07 12:44:36.000000000 +0200
2662@@ -0,0 +1,936 @@
2663+/******************************************************************************
2664+ * blkfront.c
2665+ *
2666+ * XenLinux virtual block-device driver.
2667+ *
2668+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
2669+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
2670+ * Copyright (c) 2004, Christian Limpach
2671+ * Copyright (c) 2004, Andrew Warfield
2672+ * Copyright (c) 2005, Christopher Clark
2673+ * Copyright (c) 2005, XenSource Ltd
2674+ *
2675+ * This program is free software; you can redistribute it and/or
2676+ * modify it under the terms of the GNU General Public License version 2
2677+ * as published by the Free Software Foundation; or, when distributed
2678+ * separately from the Linux kernel or incorporated into other
2679+ * software packages, subject to the following license:
2680+ *
2681+ * Permission is hereby granted, free of charge, to any person obtaining a copy
2682+ * of this source file (the "Software"), to deal in the Software without
2683+ * restriction, including without limitation the rights to use, copy, modify,
2684+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2685+ * and to permit persons to whom the Software is furnished to do so, subject to
2686+ * the following conditions:
2687+ *
2688+ * The above copyright notice and this permission notice shall be included in
2689+ * all copies or substantial portions of the Software.
2690+ *
2691+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2692+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2693+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2694+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2695+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2696+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2697+ * IN THE SOFTWARE.
2698+ */
2699+
2700+#include <linux/version.h>
2701+#include "block.h"
2702+#include <linux/cdrom.h>
2703+#include <linux/sched.h>
2704+#include <linux/interrupt.h>
2705+#include <scsi/scsi.h>
2706+#include <xen/evtchn.h>
2707+#include <xen/xenbus.h>
2708+#include <xen/interface/grant_table.h>
2709+#include <xen/interface/io/protocols.h>
2710+#include <xen/gnttab.h>
2711+#include <asm/hypervisor.h>
2712+#include <asm/maddr.h>
2713+
2714+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
2715+#include <xen/platform-compat.h>
2716+#endif
2717+
2718+#define BLKIF_STATE_DISCONNECTED 0
2719+#define BLKIF_STATE_CONNECTED 1
2720+#define BLKIF_STATE_SUSPENDED 2
2721+
2722+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
2723+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
2724+#define GRANT_INVALID_REF 0
2725+
2726+static void connect(struct blkfront_info *);
2727+static void blkfront_closing(struct xenbus_device *);
2728+static int blkfront_remove(struct xenbus_device *);
2729+static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
2730+static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
2731+
2732+static void kick_pending_request_queues(struct blkfront_info *);
2733+
2734+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
2735+static void blkif_restart_queue(void *arg);
2736+static void blkif_recover(struct blkfront_info *);
2737+static void blkif_completion(struct blk_shadow *);
2738+static void blkif_free(struct blkfront_info *, int);
2739+
2740+
2741+/**
2742+ * Entry point to this code when a new device is created. Allocate the basic
2743+ * structures and the ring buffer for communication with the backend, and
2744+ * inform the backend of the appropriate details for those. Switch to
2745+ * Initialised state.
2746+ */
2747+static int blkfront_probe(struct xenbus_device *dev,
2748+ const struct xenbus_device_id *id)
2749+{
2750+ int err, vdevice, i;
2751+ struct blkfront_info *info;
2752+
2753+ /* FIXME: Use dynamic device id if this is not set. */
2754+ err = xenbus_scanf(XBT_NIL, dev->nodename,
2755+ "virtual-device", "%i", &vdevice);
2756+ if (err != 1) {
2757+ /* go looking in the extended area instead */
2758+ err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
2759+ "%i", &vdevice);
2760+ if (err != 1) {
2761+ xenbus_dev_fatal(dev, err, "reading virtual-device");
2762+ return err;
2763+ }
2764+ }
2765+
2766+ info = kzalloc(sizeof(*info), GFP_KERNEL);
2767+ if (!info) {
2768+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
2769+ return -ENOMEM;
2770+ }
2771+
2772+ info->xbdev = dev;
2773+ info->vdevice = vdevice;
2774+ info->connected = BLKIF_STATE_DISCONNECTED;
2775+ INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
2776+
2777+ for (i = 0; i < BLK_RING_SIZE; i++)
2778+ info->shadow[i].req.id = i+1;
2779+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
2780+
2781+ /* Front end dir is a number, which is used as the id. */
2782+ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
2783+ dev->dev.driver_data = info;
2784+
2785+ err = talk_to_backend(dev, info);
2786+ if (err) {
2787+ kfree(info);
2788+ dev->dev.driver_data = NULL;
2789+ return err;
2790+ }
2791+
2792+ return 0;
2793+}
2794+
2795+
2796+/**
2797+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
2798+ * driver restart. We tear down our blkif structure and recreate it, but
2799+ * leave the device-layer structures intact so that this is transparent to the
2800+ * rest of the kernel.
2801+ */
2802+static int blkfront_resume(struct xenbus_device *dev)
2803+{
2804+ struct blkfront_info *info = dev->dev.driver_data;
2805+ int err;
2806+
2807+ DPRINTK("blkfront_resume: %s\n", dev->nodename);
2808+
2809+ blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
2810+
2811+ err = talk_to_backend(dev, info);
2812+ if (info->connected == BLKIF_STATE_SUSPENDED && !err)
2813+ blkif_recover(info);
2814+
2815+ return err;
2816+}
2817+
2818+
2819+/* Common code used when first setting up, and when resuming. */
2820+static int talk_to_backend(struct xenbus_device *dev,
2821+ struct blkfront_info *info)
2822+{
2823+ const char *message = NULL;
2824+ struct xenbus_transaction xbt;
2825+ int err;
2826+
2827+ /* Create shared ring, alloc event channel. */
2828+ err = setup_blkring(dev, info);
2829+ if (err)
2830+ goto out;
2831+
2832+again:
2833+ err = xenbus_transaction_start(&xbt);
2834+ if (err) {
2835+ xenbus_dev_fatal(dev, err, "starting transaction");
2836+ goto destroy_blkring;
2837+ }
2838+
2839+ err = xenbus_printf(xbt, dev->nodename,
2840+ "ring-ref","%u", info->ring_ref);
2841+ if (err) {
2842+ message = "writing ring-ref";
2843+ goto abort_transaction;
2844+ }
2845+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
2846+ irq_to_evtchn_port(info->irq));
2847+ if (err) {
2848+ message = "writing event-channel";
2849+ goto abort_transaction;
2850+ }
2851+ err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
2852+ XEN_IO_PROTO_ABI_NATIVE);
2853+ if (err) {
2854+ message = "writing protocol";
2855+ goto abort_transaction;
2856+ }
2857+
2858+ err = xenbus_transaction_end(xbt, 0);
2859+ if (err) {
2860+ if (err == -EAGAIN)
2861+ goto again;
2862+ xenbus_dev_fatal(dev, err, "completing transaction");
2863+ goto destroy_blkring;
2864+ }
2865+
2866+ xenbus_switch_state(dev, XenbusStateInitialised);
2867+
2868+ return 0;
2869+
2870+ abort_transaction:
2871+ xenbus_transaction_end(xbt, 1);
2872+ if (message)
2873+ xenbus_dev_fatal(dev, err, "%s", message);
2874+ destroy_blkring:
2875+ blkif_free(info, 0);
2876+ out:
2877+ return err;
2878+}
2879+
2880+
2881+static int setup_blkring(struct xenbus_device *dev,
2882+ struct blkfront_info *info)
2883+{
2884+ blkif_sring_t *sring;
2885+ int err;
2886+
2887+ info->ring_ref = GRANT_INVALID_REF;
2888+
2889+ sring = (blkif_sring_t *)__get_free_page(GFP_NOIO | __GFP_HIGH);
2890+ if (!sring) {
2891+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
2892+ return -ENOMEM;
2893+ }
2894+ SHARED_RING_INIT(sring);
2895+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
2896+
2897+ err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
2898+ if (err < 0) {
2899+ free_page((unsigned long)sring);
2900+ info->ring.sring = NULL;
2901+ goto fail;
2902+ }
2903+ info->ring_ref = err;
2904+
2905+ err = bind_listening_port_to_irqhandler(
2906+ dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
2907+ if (err <= 0) {
2908+ xenbus_dev_fatal(dev, err,
2909+ "bind_listening_port_to_irqhandler");
2910+ goto fail;
2911+ }
2912+ info->irq = err;
2913+
2914+ return 0;
2915+fail:
2916+ blkif_free(info, 0);
2917+ return err;
2918+}
2919+
2920+
2921+/**
2922+ * Callback received when the backend's state changes.
2923+ */
2924+static void backend_changed(struct xenbus_device *dev,
2925+ enum xenbus_state backend_state)
2926+{
2927+ struct blkfront_info *info = dev->dev.driver_data;
2928+ struct block_device *bd;
2929+
2930+ DPRINTK("blkfront:backend_changed.\n");
2931+
2932+ switch (backend_state) {
2933+ case XenbusStateInitialising:
2934+ case XenbusStateInitWait:
2935+ case XenbusStateInitialised:
2936+ case XenbusStateReconfiguring:
2937+ case XenbusStateReconfigured:
2938+ case XenbusStateUnknown:
2939+ case XenbusStateClosed:
2940+ break;
2941+
2942+ case XenbusStateConnected:
2943+ connect(info);
2944+ break;
2945+
2946+ case XenbusStateClosing:
2947+ bd = bdget(info->dev);
2948+ if (bd == NULL)
2949+ xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
2950+
2951+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
2952+ down(&bd->bd_sem);
2953+#else
2954+ mutex_lock(&bd->bd_mutex);
2955+#endif
2956+ if (info->users > 0)
2957+ xenbus_dev_error(dev, -EBUSY,
2958+ "Device in use; refusing to close");
2959+ else
2960+ blkfront_closing(dev);
2961+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
2962+ up(&bd->bd_sem);
2963+#else
2964+ mutex_unlock(&bd->bd_mutex);
2965+#endif
2966+ bdput(bd);
2967+ break;
2968+ }
2969+}
2970+
2971+
2972+/* ** Connection ** */
2973+
2974+
2975+/*
2976+ * Invoked when the backend is finally 'ready' (and has told produced
2977+ * the details about the physical device - #sectors, size, etc).
2978+ */
2979+static void connect(struct blkfront_info *info)
2980+{
2981+ unsigned long long sectors;
2982+ unsigned long sector_size;
2983+ unsigned int binfo;
2984+ int err;
2985+
2986+ if ((info->connected == BLKIF_STATE_CONNECTED) ||
2987+ (info->connected == BLKIF_STATE_SUSPENDED) )
2988+ return;
2989+
2990+ DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
2991+
2992+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
2993+ "sectors", "%Lu", &sectors,
2994+ "info", "%u", &binfo,
2995+ "sector-size", "%lu", &sector_size,
2996+ NULL);
2997+ if (err) {
2998+ xenbus_dev_fatal(info->xbdev, err,
2999+ "reading backend fields at %s",
3000+ info->xbdev->otherend);
3001+ return;
3002+ }
3003+
3004+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
3005+ "feature-barrier", "%lu", &info->feature_barrier,
3006+ NULL);
3007+ if (err)
3008+ info->feature_barrier = 0;
3009+
3010+ err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
3011+ if (err) {
3012+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
3013+ info->xbdev->otherend);
3014+ return;
3015+ }
3016+
3017+ err = xlvbd_sysfs_addif(info);
3018+ if (err) {
3019+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_sysfs_addif at %s",
3020+ info->xbdev->otherend);
3021+ return;
3022+ }
3023+
3024+ (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
3025+
3026+ /* Kick pending requests. */
3027+ spin_lock_irq(&blkif_io_lock);
3028+ info->connected = BLKIF_STATE_CONNECTED;
3029+ kick_pending_request_queues(info);
3030+ spin_unlock_irq(&blkif_io_lock);
3031+
3032+ add_disk(info->gd);
3033+
3034+ info->is_ready = 1;
3035+}
3036+
3037+/**
3038+ * Handle the change of state of the backend to Closing. We must delete our
3039+ * device-layer structures now, to ensure that writes are flushed through to
3040+ * the backend. Once is this done, we can switch to Closed in
3041+ * acknowledgement.
3042+ */
3043+static void blkfront_closing(struct xenbus_device *dev)
3044+{
3045+ struct blkfront_info *info = dev->dev.driver_data;
3046+ unsigned long flags;
3047+
3048+ DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
3049+
3050+ if (info->rq == NULL)
3051+ goto out;
3052+
3053+ spin_lock_irqsave(&blkif_io_lock, flags);
3054+ /* No more blkif_request(). */
3055+ blk_stop_queue(info->rq);
3056+ /* No more gnttab callback work. */
3057+ gnttab_cancel_free_callback(&info->callback);
3058+ spin_unlock_irqrestore(&blkif_io_lock, flags);
3059+
3060+ /* Flush gnttab callback work. Must be done with no locks held. */
3061+ flush_scheduled_work();
3062+
3063+ xlvbd_sysfs_delif(info);
3064+
3065+ xlvbd_del(info);
3066+
3067+ out:
3068+ xenbus_frontend_closed(dev);
3069+}
3070+
3071+
3072+static int blkfront_remove(struct xenbus_device *dev)
3073+{
3074+ struct blkfront_info *info = dev->dev.driver_data;
3075+
3076+ DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
3077+
3078+ blkif_free(info, 0);
3079+
3080+ kfree(info);
3081+
3082+ return 0;
3083+}
3084+
3085+
3086+static inline int GET_ID_FROM_FREELIST(
3087+ struct blkfront_info *info)
3088+{
3089+ unsigned long free = info->shadow_free;
3090+ BUG_ON(free > BLK_RING_SIZE);
3091+ info->shadow_free = info->shadow[free].req.id;
3092+ info->shadow[free].req.id = 0x0fffffee; /* debug */
3093+ return free;
3094+}
3095+
3096+static inline void ADD_ID_TO_FREELIST(
3097+ struct blkfront_info *info, unsigned long id)
3098+{
3099+ info->shadow[id].req.id = info->shadow_free;
3100+ info->shadow[id].request = 0;
3101+ info->shadow_free = id;
3102+}
3103+
3104+static inline void flush_requests(struct blkfront_info *info)
3105+{
3106+ int notify;
3107+
3108+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
3109+
3110+ if (notify)
3111+ notify_remote_via_irq(info->irq);
3112+}
3113+
3114+static void kick_pending_request_queues(struct blkfront_info *info)
3115+{
3116+ if (!RING_FULL(&info->ring)) {
3117+ /* Re-enable calldowns. */
3118+ blk_start_queue(info->rq);
3119+ /* Kick things off immediately. */
3120+ do_blkif_request(info->rq);
3121+ }
3122+}
3123+
3124+static void blkif_restart_queue(void *arg)
3125+{
3126+ struct blkfront_info *info = (struct blkfront_info *)arg;
3127+ spin_lock_irq(&blkif_io_lock);
3128+ if (info->connected == BLKIF_STATE_CONNECTED)
3129+ kick_pending_request_queues(info);
3130+ spin_unlock_irq(&blkif_io_lock);
3131+}
3132+
3133+static void blkif_restart_queue_callback(void *arg)
3134+{
3135+ struct blkfront_info *info = (struct blkfront_info *)arg;
3136+ schedule_work(&info->work);
3137+}
3138+
3139+int blkif_open(struct inode *inode, struct file *filep)
3140+{
3141+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
3142+ info->users++;
3143+ return 0;
3144+}
3145+
3146+
3147+int blkif_release(struct inode *inode, struct file *filep)
3148+{
3149+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
3150+ info->users--;
3151+ if (info->users == 0) {
3152+ /* Check whether we have been instructed to close. We will
3153+ have ignored this request initially, as the device was
3154+ still mounted. */
3155+ struct xenbus_device * dev = info->xbdev;
3156+ enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
3157+
3158+ if (state == XenbusStateClosing && info->is_ready)
3159+ blkfront_closing(dev);
3160+ }
3161+ return 0;
3162+}
3163+
3164+
3165+int blkif_ioctl(struct inode *inode, struct file *filep,
3166+ unsigned command, unsigned long argument)
3167+{
3168+ int i;
3169+
3170+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
3171+ command, (long)argument, inode->i_rdev);
3172+
3173+ switch (command) {
3174+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
3175+ case HDIO_GETGEO: {
3176+ struct block_device *bd = inode->i_bdev;
3177+ struct hd_geometry geo;
3178+ int ret;
3179+
3180+ if (!argument)
3181+ return -EINVAL;
3182+
3183+ geo.start = get_start_sect(bd);
3184+ ret = blkif_getgeo(bd, &geo);
3185+ if (ret)
3186+ return ret;
3187+
3188+ if (copy_to_user((struct hd_geometry __user *)argument, &geo,
3189+ sizeof(geo)))
3190+ return -EFAULT;
3191+
3192+ return 0;
3193+ }
3194+#endif
3195+ case CDROMMULTISESSION:
3196+ DPRINTK("FIXME: support multisession CDs later\n");
3197+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
3198+ if (put_user(0, (char __user *)(argument + i)))
3199+ return -EFAULT;
3200+ return 0;
3201+
3202+ case CDROM_GET_CAPABILITY: {
3203+ struct blkfront_info *info =
3204+ inode->i_bdev->bd_disk->private_data;
3205+ struct gendisk *gd = info->gd;
3206+ if (gd->flags & GENHD_FL_CD)
3207+ return 0;
3208+ return -EINVAL;
3209+ }
3210+ default:
3211+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
3212+ command);*/
3213+ return -EINVAL; /* same return as native Linux */
3214+ }
3215+
3216+ return 0;
3217+}
3218+
3219+
3220+int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
3221+{
3222+ /* We don't have real geometry info, but let's at least return
3223+ values consistent with the size of the device */
3224+ sector_t nsect = get_capacity(bd->bd_disk);
3225+ sector_t cylinders = nsect;
3226+
3227+ hg->heads = 0xff;
3228+ hg->sectors = 0x3f;
3229+ sector_div(cylinders, hg->heads * hg->sectors);
3230+ hg->cylinders = cylinders;
3231+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
3232+ hg->cylinders = 0xffff;
3233+ return 0;
3234+}
3235+
3236+
3237+/*
3238+ * blkif_queue_request
3239+ *
3240+ * request block io
3241+ *
3242+ * id: for guest use only.
3243+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
3244+ * buffer: buffer to read/write into. this should be a
3245+ * virtual address in the guest os.
3246+ */
3247+static int blkif_queue_request(struct request *req)
3248+{
3249+ struct blkfront_info *info = req->rq_disk->private_data;
3250+ unsigned long buffer_mfn;
3251+ blkif_request_t *ring_req;
3252+ struct bio *bio;
3253+ struct bio_vec *bvec;
3254+ int idx;
3255+ unsigned long id;
3256+ unsigned int fsect, lsect;
3257+ int ref;
3258+ grant_ref_t gref_head;
3259+
3260+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
3261+ return 1;
3262+
3263+ if (gnttab_alloc_grant_references(
3264+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
3265+ gnttab_request_free_callback(
3266+ &info->callback,
3267+ blkif_restart_queue_callback,
3268+ info,
3269+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
3270+ return 1;
3271+ }
3272+
3273+ /* Fill out a communications ring structure. */
3274+ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
3275+ id = GET_ID_FROM_FREELIST(info);
3276+ info->shadow[id].request = (unsigned long)req;
3277+
3278+ ring_req->id = id;
3279+ ring_req->sector_number = (blkif_sector_t)req->sector;
3280+ ring_req->handle = info->handle;
3281+
3282+ ring_req->operation = rq_data_dir(req) ?
3283+ BLKIF_OP_WRITE : BLKIF_OP_READ;
3284+ if (blk_barrier_rq(req))
3285+ ring_req->operation = BLKIF_OP_WRITE_BARRIER;
3286+
3287+ ring_req->nr_segments = 0;
3288+ rq_for_each_bio (bio, req) {
3289+ bio_for_each_segment (bvec, bio, idx) {
3290+ BUG_ON(ring_req->nr_segments
3291+ == BLKIF_MAX_SEGMENTS_PER_REQUEST);
3292+ buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
3293+ fsect = bvec->bv_offset >> 9;
3294+ lsect = fsect + (bvec->bv_len >> 9) - 1;
3295+ /* install a grant reference. */
3296+ ref = gnttab_claim_grant_reference(&gref_head);
3297+ BUG_ON(ref == -ENOSPC);
3298+
3299+ gnttab_grant_foreign_access_ref(
3300+ ref,
3301+ info->xbdev->otherend_id,
3302+ buffer_mfn,
3303+ rq_data_dir(req) ? GTF_readonly : 0 );
3304+
3305+ info->shadow[id].frame[ring_req->nr_segments] =
3306+ mfn_to_pfn(buffer_mfn);
3307+
3308+ ring_req->seg[ring_req->nr_segments] =
3309+ (struct blkif_request_segment) {
3310+ .gref = ref,
3311+ .first_sect = fsect,
3312+ .last_sect = lsect };
3313+
3314+ ring_req->nr_segments++;
3315+ }
3316+ }
3317+
3318+ info->ring.req_prod_pvt++;
3319+
3320+ /* Keep a private copy so we can reissue requests when recovering. */
3321+ info->shadow[id].req = *ring_req;
3322+
3323+ gnttab_free_grant_references(gref_head);
3324+
3325+ return 0;
3326+}
3327+
3328+/*
3329+ * do_blkif_request
3330+ * read a block; request is in a request queue
3331+ */
3332+void do_blkif_request(request_queue_t *rq)
3333+{
3334+ struct blkfront_info *info = NULL;
3335+ struct request *req;
3336+ int queued;
3337+
3338+ DPRINTK("Entered do_blkif_request\n");
3339+
3340+ queued = 0;
3341+
3342+ while ((req = elv_next_request(rq)) != NULL) {
3343+ info = req->rq_disk->private_data;
3344+ if (!blk_fs_request(req)) {
3345+ end_request(req, 0);
3346+ continue;
3347+ }
3348+
3349+ if (RING_FULL(&info->ring))
3350+ goto wait;
3351+
3352+ DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
3353+ "(%u/%li) buffer:%p [%s]\n",
3354+ req, req->cmd, (long long)req->sector,
3355+ req->current_nr_sectors,
3356+ req->nr_sectors, req->buffer,
3357+ rq_data_dir(req) ? "write" : "read");
3358+
3359+
3360+ blkdev_dequeue_request(req);
3361+ if (blkif_queue_request(req)) {
3362+ blk_requeue_request(rq, req);
3363+ wait:
3364+ /* Avoid pointless unplugs. */
3365+ blk_stop_queue(rq);
3366+ break;
3367+ }
3368+
3369+ queued++;
3370+ }
3371+
3372+ if (queued != 0)
3373+ flush_requests(info);
3374+}
3375+
3376+
3377+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
3378+{
3379+ struct request *req;
3380+ blkif_response_t *bret;
3381+ RING_IDX i, rp;
3382+ unsigned long flags;
3383+ struct blkfront_info *info = (struct blkfront_info *)dev_id;
3384+ int uptodate;
3385+
3386+ spin_lock_irqsave(&blkif_io_lock, flags);
3387+
3388+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
3389+ spin_unlock_irqrestore(&blkif_io_lock, flags);
3390+ return IRQ_HANDLED;
3391+ }
3392+
3393+ again:
3394+ rp = info->ring.sring->rsp_prod;
3395+ rmb(); /* Ensure we see queued responses up to 'rp'. */
3396+
3397+ for (i = info->ring.rsp_cons; i != rp; i++) {
3398+ unsigned long id;
3399+ int ret;
3400+
3401+ bret = RING_GET_RESPONSE(&info->ring, i);
3402+ id = bret->id;
3403+ req = (struct request *)info->shadow[id].request;
3404+
3405+ blkif_completion(&info->shadow[id]);
3406+
3407+ ADD_ID_TO_FREELIST(info, id);
3408+
3409+ uptodate = (bret->status == BLKIF_RSP_OKAY);
3410+ switch (bret->operation) {
3411+ case BLKIF_OP_WRITE_BARRIER:
3412+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
3413+ printk("blkfront: %s: write barrier op failed\n",
3414+ info->gd->disk_name);
3415+ uptodate = -EOPNOTSUPP;
3416+ info->feature_barrier = 0;
3417+ xlvbd_barrier(info);
3418+ }
3419+ /* fall through */
3420+ case BLKIF_OP_READ:
3421+ case BLKIF_OP_WRITE:
3422+ if (unlikely(bret->status != BLKIF_RSP_OKAY))
3423+ DPRINTK("Bad return from blkdev data "
3424+ "request: %x\n", bret->status);
3425+
3426+ ret = end_that_request_first(req, uptodate,
3427+ req->hard_nr_sectors);
3428+ BUG_ON(ret);
3429+ end_that_request_last(req, uptodate);
3430+ break;
3431+ default:
3432+ BUG();
3433+ }
3434+ }
3435+
3436+ info->ring.rsp_cons = i;
3437+
3438+ if (i != info->ring.req_prod_pvt) {
3439+ int more_to_do;
3440+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
3441+ if (more_to_do)
3442+ goto again;
3443+ } else
3444+ info->ring.sring->rsp_event = i + 1;
3445+
3446+ kick_pending_request_queues(info);
3447+
3448+ spin_unlock_irqrestore(&blkif_io_lock, flags);
3449+
3450+ return IRQ_HANDLED;
3451+}
3452+
3453+static void blkif_free(struct blkfront_info *info, int suspend)
3454+{
3455+ /* Prevent new requests being issued until we fix things up. */
3456+ spin_lock_irq(&blkif_io_lock);
3457+ info->connected = suspend ?
3458+ BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
3459+ /* No more blkif_request(). */
3460+ if (info->rq)
3461+ blk_stop_queue(info->rq);
3462+ /* No more gnttab callback work. */
3463+ gnttab_cancel_free_callback(&info->callback);
3464+ spin_unlock_irq(&blkif_io_lock);
3465+
3466+ /* Flush gnttab callback work. Must be done with no locks held. */
3467+ flush_scheduled_work();
3468+
3469+ /* Free resources associated with old device channel. */
3470+ if (info->ring_ref != GRANT_INVALID_REF) {
3471+ gnttab_end_foreign_access(info->ring_ref,
3472+ (unsigned long)info->ring.sring);
3473+ info->ring_ref = GRANT_INVALID_REF;
3474+ info->ring.sring = NULL;
3475+ }
3476+ if (info->irq)
3477+ unbind_from_irqhandler(info->irq, info);
3478+ info->irq = 0;
3479+}
3480+
3481+static void blkif_completion(struct blk_shadow *s)
3482+{
3483+ int i;
3484+ for (i = 0; i < s->req.nr_segments; i++)
3485+ gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
3486+}
3487+
3488+static void blkif_recover(struct blkfront_info *info)
3489+{
3490+ int i;
3491+ blkif_request_t *req;
3492+ struct blk_shadow *copy;
3493+ int j;
3494+
3495+ /* Stage 1: Make a safe copy of the shadow state. */
3496+ copy = kmalloc(sizeof(info->shadow), GFP_NOIO | __GFP_NOFAIL | __GFP_HIGH);
3497+ memcpy(copy, info->shadow, sizeof(info->shadow));
3498+
3499+ /* Stage 2: Set up free list. */
3500+ memset(&info->shadow, 0, sizeof(info->shadow));
3501+ for (i = 0; i < BLK_RING_SIZE; i++)
3502+ info->shadow[i].req.id = i+1;
3503+ info->shadow_free = info->ring.req_prod_pvt;
3504+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
3505+
3506+ /* Stage 3: Find pending requests and requeue them. */
3507+ for (i = 0; i < BLK_RING_SIZE; i++) {
3508+ /* Not in use? */
3509+ if (copy[i].request == 0)
3510+ continue;
3511+
3512+ /* Grab a request slot and copy shadow state into it. */
3513+ req = RING_GET_REQUEST(
3514+ &info->ring, info->ring.req_prod_pvt);
3515+ *req = copy[i].req;
3516+
3517+ /* We get a new request id, and must reset the shadow state. */
3518+ req->id = GET_ID_FROM_FREELIST(info);
3519+ memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
3520+
3521+ /* Rewrite any grant references invalidated by susp/resume. */
3522+ for (j = 0; j < req->nr_segments; j++)
3523+ gnttab_grant_foreign_access_ref(
3524+ req->seg[j].gref,
3525+ info->xbdev->otherend_id,
3526+ pfn_to_mfn(info->shadow[req->id].frame[j]),
3527+ rq_data_dir((struct request *)
3528+ info->shadow[req->id].request) ?
3529+ GTF_readonly : 0);
3530+ info->shadow[req->id].req = *req;
3531+
3532+ info->ring.req_prod_pvt++;
3533+ }
3534+
3535+ kfree(copy);
3536+
3537+ (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
3538+
3539+ spin_lock_irq(&blkif_io_lock);
3540+
3541+ /* Now safe for us to use the shared ring */
3542+ info->connected = BLKIF_STATE_CONNECTED;
3543+
3544+ /* Send off requeued requests */
3545+ flush_requests(info);
3546+
3547+ /* Kick any other new requests queued since we resumed */
3548+ kick_pending_request_queues(info);
3549+
3550+ spin_unlock_irq(&blkif_io_lock);
3551+}
3552+
3553+int blkfront_is_ready(struct xenbus_device *dev)
3554+{
3555+ struct blkfront_info *info = dev->dev.driver_data;
3556+
3557+ return info->is_ready;
3558+}
3559+
3560+
3561+/* ** Driver Registration ** */
3562+
3563+
3564+static const struct xenbus_device_id blkfront_ids[] = {
3565+ { "vbd" },
3566+ { "" }
3567+};
3568+MODULE_ALIAS("xen:vbd");
3569+
3570+static struct xenbus_driver blkfront = {
3571+ .name = "vbd",
3572+ .owner = THIS_MODULE,
3573+ .ids = blkfront_ids,
3574+ .probe = blkfront_probe,
3575+ .remove = blkfront_remove,
3576+ .resume = blkfront_resume,
3577+ .otherend_changed = backend_changed,
3578+ .is_ready = blkfront_is_ready,
3579+};
3580+
3581+
3582+static int __init xlblk_init(void)
3583+{
3584+ if (!is_running_on_xen())
3585+ return -ENODEV;
3586+
3587+ return xenbus_register_frontend(&blkfront);
3588+}
3589+module_init(xlblk_init);
3590+
3591+
3592+static void __exit xlblk_exit(void)
3593+{
3594+ return xenbus_unregister_driver(&blkfront);
3595+}
3596+module_exit(xlblk_exit);
3597+
3598+MODULE_LICENSE("Dual BSD/GPL");
3599Index: head-2008-11-25/drivers/xen/blkfront/block.h
3600===================================================================
3601--- /dev/null 1970-01-01 00:00:00.000000000 +0000
3602+++ head-2008-11-25/drivers/xen/blkfront/block.h 2008-08-07 12:44:36.000000000 +0200
3603@@ -0,0 +1,158 @@
3604+/******************************************************************************
3605+ * block.h
3606+ *
3607+ * Shared definitions between all levels of XenLinux Virtual block devices.
3608+ *
3609+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
3610+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
3611+ * Copyright (c) 2004-2005, Christian Limpach
3612+ *
3613+ * This program is free software; you can redistribute it and/or
3614+ * modify it under the terms of the GNU General Public License version 2
3615+ * as published by the Free Software Foundation; or, when distributed
3616+ * separately from the Linux kernel or incorporated into other
3617+ * software packages, subject to the following license:
3618+ *
3619+ * Permission is hereby granted, free of charge, to any person obtaining a copy
3620+ * of this source file (the "Software"), to deal in the Software without
3621+ * restriction, including without limitation the rights to use, copy, modify,
3622+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
3623+ * and to permit persons to whom the Software is furnished to do so, subject to
3624+ * the following conditions:
3625+ *
3626+ * The above copyright notice and this permission notice shall be included in
3627+ * all copies or substantial portions of the Software.
3628+ *
3629+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3630+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3631+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3632+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3633+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
3634+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
3635+ * IN THE SOFTWARE.
3636+ */
3637+
3638+#ifndef __XEN_DRIVERS_BLOCK_H__
3639+#define __XEN_DRIVERS_BLOCK_H__
3640+
3641+#include <linux/version.h>
3642+#include <linux/module.h>
3643+#include <linux/kernel.h>
3644+#include <linux/sched.h>
3645+#include <linux/slab.h>
3646+#include <linux/string.h>
3647+#include <linux/errno.h>
3648+#include <linux/fs.h>
3649+#include <linux/hdreg.h>
3650+#include <linux/blkdev.h>
3651+#include <linux/major.h>
3652+#include <asm/hypervisor.h>
3653+#include <xen/xenbus.h>
3654+#include <xen/gnttab.h>
3655+#include <xen/interface/xen.h>
3656+#include <xen/interface/io/blkif.h>
3657+#include <xen/interface/io/ring.h>
3658+#include <asm/io.h>
3659+#include <asm/atomic.h>
3660+#include <asm/uaccess.h>
3661+
3662+#define DPRINTK(_f, _a...) pr_debug(_f, ## _a)
3663+
3664+#if 0
3665+#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
3666+#else
3667+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
3668+#endif
3669+
3670+struct xlbd_type_info
3671+{
3672+ int partn_shift;
3673+ int disks_per_major;
3674+ char *devname;
3675+ char *diskname;
3676+};
3677+
3678+struct xlbd_major_info
3679+{
3680+ int major;
3681+ int index;
3682+ int usage;
3683+ struct xlbd_type_info *type;
3684+};
3685+
3686+struct blk_shadow {
3687+ blkif_request_t req;
3688+ unsigned long request;
3689+ unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
3690+};
3691+
3692+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
3693+
3694+/*
3695+ * We have one of these per vbd, whether ide, scsi or 'other'. They
3696+ * hang in private_data off the gendisk structure. We may end up
3697+ * putting all kinds of interesting stuff here :-)
3698+ */
3699+struct blkfront_info
3700+{
3701+ struct xenbus_device *xbdev;
3702+ dev_t dev;
3703+ struct gendisk *gd;
3704+ int vdevice;
3705+ blkif_vdev_t handle;
3706+ int connected;
3707+ int ring_ref;
3708+ blkif_front_ring_t ring;
3709+ unsigned int irq;
3710+ struct xlbd_major_info *mi;
3711+ request_queue_t *rq;
3712+ struct work_struct work;
3713+ struct gnttab_free_callback callback;
3714+ struct blk_shadow shadow[BLK_RING_SIZE];
3715+ unsigned long shadow_free;
3716+ int feature_barrier;
3717+ int is_ready;
3718+
3719+ /**
3720+ * The number of people holding this device open. We won't allow a
3721+ * hot-unplug unless this is 0.
3722+ */
3723+ int users;
3724+};
3725+
3726+extern spinlock_t blkif_io_lock;
3727+
3728+extern int blkif_open(struct inode *inode, struct file *filep);
3729+extern int blkif_release(struct inode *inode, struct file *filep);
3730+extern int blkif_ioctl(struct inode *inode, struct file *filep,
3731+ unsigned command, unsigned long argument);
3732+extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
3733+extern int blkif_check(dev_t dev);
3734+extern int blkif_revalidate(dev_t dev);
3735+extern void do_blkif_request (request_queue_t *rq);
3736+
3737+/* Virtual block-device subsystem. */
3738+/* Note that xlvbd_add doesn't call add_disk for you: you're expected
3739+ to call add_disk on info->gd once the disk is properly connected
3740+ up. */
3741+int xlvbd_add(blkif_sector_t capacity, int device,
3742+ u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
3743+void xlvbd_del(struct blkfront_info *info);
3744+int xlvbd_barrier(struct blkfront_info *info);
3745+
3746+#ifdef CONFIG_SYSFS
3747+int xlvbd_sysfs_addif(struct blkfront_info *info);
3748+void xlvbd_sysfs_delif(struct blkfront_info *info);
3749+#else
3750+static inline int xlvbd_sysfs_addif(struct blkfront_info *info)
3751+{
3752+ return 0;
3753+}
3754+
3755+static inline void xlvbd_sysfs_delif(struct blkfront_info *info)
3756+{
3757+ ;
3758+}
3759+#endif
3760+
3761+#endif /* __XEN_DRIVERS_BLOCK_H__ */
3762Index: head-2008-11-25/drivers/xen/blkfront/vbd.c
3763===================================================================
3764--- /dev/null 1970-01-01 00:00:00.000000000 +0000
3765+++ head-2008-11-25/drivers/xen/blkfront/vbd.c 2008-08-07 12:44:36.000000000 +0200
3766@@ -0,0 +1,460 @@
3767+/******************************************************************************
3768+ * vbd.c
3769+ *
3770+ * XenLinux virtual block-device driver (xvd).
3771+ *
3772+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
3773+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
3774+ * Copyright (c) 2004-2005, Christian Limpach
3775+ *
3776+ * This program is free software; you can redistribute it and/or
3777+ * modify it under the terms of the GNU General Public License version 2
3778+ * as published by the Free Software Foundation; or, when distributed
3779+ * separately from the Linux kernel or incorporated into other
3780+ * software packages, subject to the following license:
3781+ *
3782+ * Permission is hereby granted, free of charge, to any person obtaining a copy
3783+ * of this source file (the "Software"), to deal in the Software without
3784+ * restriction, including without limitation the rights to use, copy, modify,
3785+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
3786+ * and to permit persons to whom the Software is furnished to do so, subject to
3787+ * the following conditions:
3788+ *
3789+ * The above copyright notice and this permission notice shall be included in
3790+ * all copies or substantial portions of the Software.
3791+ *
3792+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3793+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3794+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3795+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3796+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
3797+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
3798+ * IN THE SOFTWARE.
3799+ */
3800+
3801+#include "block.h"
3802+#include <linux/blkdev.h>
3803+#include <linux/list.h>
3804+
3805+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
3806+#include <xen/platform-compat.h>
3807+#endif
3808+
3809+#define BLKIF_MAJOR(dev) ((dev)>>8)
3810+#define BLKIF_MINOR(dev) ((dev) & 0xff)
3811+
3812+#define EXT_SHIFT 28
3813+#define EXTENDED (1<<EXT_SHIFT)
3814+#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
3815+#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
3816+
3817+/*
3818+ * For convenience we distinguish between ide, scsi and 'other' (i.e.,
3819+ * potentially combinations of the two) in the naming scheme and in a few other
3820+ * places.
3821+ */
3822+
3823+#define NUM_IDE_MAJORS 10
3824+#define NUM_SCSI_MAJORS 17
3825+#define NUM_VBD_MAJORS 2
3826+
3827+static struct xlbd_type_info xlbd_ide_type = {
3828+ .partn_shift = 6,
3829+ .disks_per_major = 2,
3830+ .devname = "ide",
3831+ .diskname = "hd",
3832+};
3833+
3834+static struct xlbd_type_info xlbd_scsi_type = {
3835+ .partn_shift = 4,
3836+ .disks_per_major = 16,
3837+ .devname = "sd",
3838+ .diskname = "sd",
3839+};
3840+
3841+static struct xlbd_type_info xlbd_vbd_type = {
3842+ .partn_shift = 4,
3843+ .disks_per_major = 16,
3844+ .devname = "xvd",
3845+ .diskname = "xvd",
3846+};
3847+
3848+static struct xlbd_type_info xlbd_vbd_type_ext = {
3849+ .partn_shift = 8,
3850+ .disks_per_major = 256,
3851+ .devname = "xvd",
3852+ .diskname = "xvd",
3853+};
3854+
3855+static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
3856+ NUM_VBD_MAJORS];
3857+
3858+#define XLBD_MAJOR_IDE_START 0
3859+#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS)
3860+#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
3861+
3862+#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1
3863+#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
3864+#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
3865+
3866+static struct block_device_operations xlvbd_block_fops =
3867+{
3868+ .owner = THIS_MODULE,
3869+ .open = blkif_open,
3870+ .release = blkif_release,
3871+ .ioctl = blkif_ioctl,
3872+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
3873+ .getgeo = blkif_getgeo
3874+#endif
3875+};
3876+
3877+DEFINE_SPINLOCK(blkif_io_lock);
3878+
3879+static struct xlbd_major_info *
3880+xlbd_alloc_major_info(int major, int minor, int index)
3881+{
3882+ struct xlbd_major_info *ptr;
3883+ int do_register;
3884+
3885+ ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
3886+ if (ptr == NULL)
3887+ return NULL;
3888+
3889+ ptr->major = major;
3890+ do_register = 1;
3891+
3892+ switch (index) {
3893+ case XLBD_MAJOR_IDE_RANGE:
3894+ ptr->type = &xlbd_ide_type;
3895+ ptr->index = index - XLBD_MAJOR_IDE_START;
3896+ break;
3897+ case XLBD_MAJOR_SCSI_RANGE:
3898+ ptr->type = &xlbd_scsi_type;
3899+ ptr->index = index - XLBD_MAJOR_SCSI_START;
3900+ break;
3901+ case XLBD_MAJOR_VBD_RANGE:
3902+ ptr->index = 0;
3903+ if ((index - XLBD_MAJOR_VBD_START) == 0)
3904+ ptr->type = &xlbd_vbd_type;
3905+ else
3906+ ptr->type = &xlbd_vbd_type_ext;
3907+
3908+ /*
3909+ * if someone already registered block major 202,
3910+ * don't try to register it again
3911+ */
3912+ if (major_info[XLBD_MAJOR_VBD_START] != NULL)
3913+ do_register = 0;
3914+ break;
3915+ }
3916+
3917+ if (do_register) {
3918+ if (register_blkdev(ptr->major, ptr->type->devname)) {
3919+ kfree(ptr);
3920+ return NULL;
3921+ }
3922+
3923+ printk("xen-vbd: registered block device major %i\n", ptr->major);
3924+ }
3925+
3926+ major_info[index] = ptr;
3927+ return ptr;
3928+}
3929+
3930+static struct xlbd_major_info *
3931+xlbd_get_major_info(int major, int minor, int vdevice)
3932+{
3933+ struct xlbd_major_info *mi;
3934+ int index;
3935+
3936+ switch (major) {
3937+ case IDE0_MAJOR: index = 0; break;
3938+ case IDE1_MAJOR: index = 1; break;
3939+ case IDE2_MAJOR: index = 2; break;
3940+ case IDE3_MAJOR: index = 3; break;
3941+ case IDE4_MAJOR: index = 4; break;
3942+ case IDE5_MAJOR: index = 5; break;
3943+ case IDE6_MAJOR: index = 6; break;
3944+ case IDE7_MAJOR: index = 7; break;
3945+ case IDE8_MAJOR: index = 8; break;
3946+ case IDE9_MAJOR: index = 9; break;
3947+ case SCSI_DISK0_MAJOR: index = 10; break;
3948+ case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
3949+ index = 11 + major - SCSI_DISK1_MAJOR;
3950+ break;
3951+ case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
3952+ index = 18 + major - SCSI_DISK8_MAJOR;
3953+ break;
3954+ case SCSI_CDROM_MAJOR: index = 26; break;
3955+ default:
3956+ if (!VDEV_IS_EXTENDED(vdevice))
3957+ index = 27;
3958+ else
3959+ index = 28;
3960+ break;
3961+ }
3962+
3963+ mi = ((major_info[index] != NULL) ? major_info[index] :
3964+ xlbd_alloc_major_info(major, minor, index));
3965+ if (mi)
3966+ mi->usage++;
3967+ return mi;
3968+}
3969+
3970+static void
3971+xlbd_put_major_info(struct xlbd_major_info *mi)
3972+{
3973+ mi->usage--;
3974+ /* XXX: release major if 0 */
3975+}
3976+
3977+static int
3978+xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
3979+{
3980+ request_queue_t *rq;
3981+
3982+ rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
3983+ if (rq == NULL)
3984+ return -1;
3985+
3986+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
3987+ elevator_init(rq, "noop");
3988+#else
3989+ elevator_init(rq, &elevator_noop);
3990+#endif
3991+
3992+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
3993+ blk_queue_hardsect_size(rq, sector_size);
3994+ blk_queue_max_sectors(rq, 512);
3995+
3996+ /* Each segment in a request is up to an aligned page in size. */
3997+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
3998+ blk_queue_max_segment_size(rq, PAGE_SIZE);
3999+
4000+ /* Ensure a merged request will fit in a single I/O ring slot. */
4001+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
4002+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
4003+
4004+ /* Make sure buffer addresses are sector-aligned. */
4005+ blk_queue_dma_alignment(rq, 511);
4006+
4007+ /* Make sure we don't use bounce buffers. */
4008+ blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
4009+
4010+ gd->queue = rq;
4011+
4012+ return 0;
4013+}
4014+
4015+static int
4016+xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
4017+ u16 vdisk_info, u16 sector_size,
4018+ struct blkfront_info *info)
4019+{
4020+ struct gendisk *gd;
4021+ struct xlbd_major_info *mi;
4022+ int nr_minors = 1;
4023+ int err = -ENODEV;
4024+ unsigned int offset;
4025+
4026+ BUG_ON(info->gd != NULL);
4027+ BUG_ON(info->mi != NULL);
4028+ BUG_ON(info->rq != NULL);
4029+
4030+ mi = xlbd_get_major_info(major, minor, vdevice);
4031+ if (mi == NULL)
4032+ goto out;
4033+ info->mi = mi;
4034+
4035+ if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
4036+ nr_minors = 1 << mi->type->partn_shift;
4037+
4038+ gd = alloc_disk(nr_minors);
4039+ if (gd == NULL)
4040+ goto out;
4041+
4042+ offset = mi->index * mi->type->disks_per_major +
4043+ (minor >> mi->type->partn_shift);
4044+ if (nr_minors > 1) {
4045+ if (offset < 26) {
4046+ sprintf(gd->disk_name, "%s%c",
4047+ mi->type->diskname, 'a' + offset );
4048+ }
4049+ else {
4050+ sprintf(gd->disk_name, "%s%c%c",
4051+ mi->type->diskname,
4052+ 'a' + ((offset/26)-1), 'a' + (offset%26) );
4053+ }
4054+ }
4055+ else {
4056+ if (offset < 26) {
4057+ sprintf(gd->disk_name, "%s%c%d",
4058+ mi->type->diskname,
4059+ 'a' + offset,
4060+ minor & ((1 << mi->type->partn_shift) - 1));
4061+ }
4062+ else {
4063+ sprintf(gd->disk_name, "%s%c%c%d",
4064+ mi->type->diskname,
4065+ 'a' + ((offset/26)-1), 'a' + (offset%26),
4066+ minor & ((1 << mi->type->partn_shift) - 1));
4067+ }
4068+ }
4069+
4070+ gd->major = mi->major;
4071+ gd->first_minor = minor;
4072+ gd->fops = &xlvbd_block_fops;
4073+ gd->private_data = info;
4074+ gd->driverfs_dev = &(info->xbdev->dev);
4075+ set_capacity(gd, capacity);
4076+
4077+ if (xlvbd_init_blk_queue(gd, sector_size)) {
4078+ del_gendisk(gd);
4079+ goto out;
4080+ }
4081+
4082+ info->rq = gd->queue;
4083+ info->gd = gd;
4084+
4085+ if (info->feature_barrier)
4086+ xlvbd_barrier(info);
4087+
4088+ if (vdisk_info & VDISK_READONLY)
4089+ set_disk_ro(gd, 1);
4090+
4091+ if (vdisk_info & VDISK_REMOVABLE)
4092+ gd->flags |= GENHD_FL_REMOVABLE;
4093+
4094+ if (vdisk_info & VDISK_CDROM)
4095+ gd->flags |= GENHD_FL_CD;
4096+
4097+ return 0;
4098+
4099+ out:
4100+ if (mi)
4101+ xlbd_put_major_info(mi);
4102+ info->mi = NULL;
4103+ return err;
4104+}
4105+
4106+int
4107+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
4108+ u16 sector_size, struct blkfront_info *info)
4109+{
4110+ struct block_device *bd;
4111+ int err = 0;
4112+ int major, minor;
4113+
4114+ if ((vdevice>>EXT_SHIFT) > 1) {
4115+ /* this is above the extended range; something is wrong */
4116+ printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
4117+ return -ENODEV;
4118+ }
4119+
4120+ if (!VDEV_IS_EXTENDED(vdevice)) {
4121+ major = BLKIF_MAJOR(vdevice);
4122+ minor = BLKIF_MINOR(vdevice);
4123+ }
4124+ else {
4125+ major = 202;
4126+ minor = BLKIF_MINOR_EXT(vdevice);
4127+ }
4128+
4129+ info->dev = MKDEV(major, minor);
4130+ bd = bdget(info->dev);
4131+ if (bd == NULL)
4132+ return -ENODEV;
4133+
4134+ err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
4135+ sector_size, info);
4136+
4137+ bdput(bd);
4138+ return err;
4139+}
4140+
4141+void
4142+xlvbd_del(struct blkfront_info *info)
4143+{
4144+ if (info->mi == NULL)
4145+ return;
4146+
4147+ BUG_ON(info->gd == NULL);
4148+ del_gendisk(info->gd);
4149+ put_disk(info->gd);
4150+ info->gd = NULL;
4151+
4152+ xlbd_put_major_info(info->mi);
4153+ info->mi = NULL;
4154+
4155+ BUG_ON(info->rq == NULL);
4156+ blk_cleanup_queue(info->rq);
4157+ info->rq = NULL;
4158+}
4159+
4160+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
4161+int
4162+xlvbd_barrier(struct blkfront_info *info)
4163+{
4164+ int err;
4165+
4166+ err = blk_queue_ordered(info->rq,
4167+ info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
4168+ if (err)
4169+ return err;
4170+ printk(KERN_INFO "blkfront: %s: barriers %s\n",
4171+ info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled");
4172+ return 0;
4173+}
4174+#else
4175+int
4176+xlvbd_barrier(struct blkfront_info *info)
4177+{
4178+ printk(KERN_INFO "blkfront: %s: barriers disabled\n", info->gd->disk_name);
4179+ return -ENOSYS;
4180+}
4181+#endif
4182+
4183+#ifdef CONFIG_SYSFS
4184+static ssize_t show_media(struct device *dev,
4185+ struct device_attribute *attr, char *buf)
4186+{
4187+ struct xenbus_device *xendev = to_xenbus_device(dev);
4188+ struct blkfront_info *info = xendev->dev.driver_data;
4189+
4190+ if (info->gd->flags & GENHD_FL_CD)
4191+ return sprintf(buf, "cdrom\n");
4192+ return sprintf(buf, "disk\n");
4193+}
4194+
4195+static struct device_attribute xlvbd_attrs[] = {
4196+ __ATTR(media, S_IRUGO, show_media, NULL),
4197+};
4198+
4199+int xlvbd_sysfs_addif(struct blkfront_info *info)
4200+{
4201+ int i;
4202+ int error = 0;
4203+
4204+ for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++) {
4205+ error = device_create_file(info->gd->driverfs_dev,
4206+ &xlvbd_attrs[i]);
4207+ if (error)
4208+ goto fail;
4209+ }
4210+ return 0;
4211+
4212+fail:
4213+ while (--i >= 0)
4214+ device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]);
4215+ return error;
4216+}
4217+
4218+void xlvbd_sysfs_delif(struct blkfront_info *info)
4219+{
4220+ int i;
4221+
4222+ for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++)
4223+ device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]);
4224+}
4225+
4226+#endif /* CONFIG_SYSFS */
4227Index: head-2008-11-25/drivers/xen/blktap/Makefile
4228===================================================================
4229--- /dev/null 1970-01-01 00:00:00.000000000 +0000
4230+++ head-2008-11-25/drivers/xen/blktap/Makefile 2007-06-12 13:13:44.000000000 +0200
4231@@ -0,0 +1,5 @@
4232+LINUXINCLUDE += -I../xen/include/public/io
4233+
4234+obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o
4235+
4236+xenblktap-y := xenbus.o interface.o blktap.o
4237Index: head-2008-11-25/drivers/xen/blktap/blktap.c
4238===================================================================
4239--- /dev/null 1970-01-01 00:00:00.000000000 +0000
4240+++ head-2008-11-25/drivers/xen/blktap/blktap.c 2008-11-10 11:44:21.000000000 +0100
4241@@ -0,0 +1,1704 @@
4242+/******************************************************************************
4243+ * drivers/xen/blktap/blktap.c
4244+ *
4245+ * Back-end driver for user level virtual block devices. This portion of the
4246+ * driver exports a 'unified' block-device interface that can be accessed
4247+ * by any operating system that implements a compatible front end. Requests
4248+ * are remapped to a user-space memory region.
4249+ *
4250+ * Based on the blkback driver code.
4251+ *
4252+ * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
4253+ *
4254+ * Clean ups and fix ups:
4255+ * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
4256+ *
4257+ * This program is free software; you can redistribute it and/or
4258+ * modify it under the terms of the GNU General Public License version 2
4259+ * as published by the Free Software Foundation; or, when distributed
4260+ * separately from the Linux kernel or incorporated into other
4261+ * software packages, subject to the following license:
4262+ *
4263+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4264+ * of this source file (the "Software"), to deal in the Software without
4265+ * restriction, including without limitation the rights to use, copy, modify,
4266+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
4267+ * and to permit persons to whom the Software is furnished to do so, subject to
4268+ * the following conditions:
4269+ *
4270+ * The above copyright notice and this permission notice shall be included in
4271+ * all copies or substantial portions of the Software.
4272+ *
4273+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4274+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4275+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4276+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4277+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
4278+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
4279+ * IN THE SOFTWARE.
4280+ */
4281+
4282+#include <linux/spinlock.h>
4283+#include <linux/kthread.h>
4284+#include <linux/list.h>
4285+#include <asm/hypervisor.h>
4286+#include "common.h"
4287+#include <xen/balloon.h>
4288+#include <xen/driver_util.h>
4289+#include <linux/kernel.h>
4290+#include <linux/fs.h>
4291+#include <linux/mm.h>
4292+#include <linux/errno.h>
4293+#include <linux/major.h>
4294+#include <linux/gfp.h>
4295+#include <linux/poll.h>
4296+#include <linux/delay.h>
4297+#include <asm/tlbflush.h>
4298+
4299+#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
4300+#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */
4301+
4302+/*
4303+ * The maximum number of requests that can be outstanding at any time
4304+ * is determined by
4305+ *
4306+ * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
4307+ *
4308+ * where mmap_alloc < MAX_DYNAMIC_MEM.
4309+ *
4310+ * TODO:
4311+ * mmap_alloc is initialised to 2 and should be adjustable on the fly via
4312+ * sysfs.
4313+ */
4314+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
4315+#define MAX_DYNAMIC_MEM BLK_RING_SIZE
4316+#define MAX_PENDING_REQS BLK_RING_SIZE
4317+#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
4318+#define MMAP_VADDR(_start, _req,_seg) \
4319+ (_start + \
4320+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
4321+ ((_seg) * PAGE_SIZE))
4322+static int blkif_reqs = MAX_PENDING_REQS;
4323+static int mmap_pages = MMAP_PAGES;
4324+
4325+#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
4326+ * have a bunch of pages reserved for shared
4327+ * memory rings.
4328+ */
4329+
4330+/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
4331+typedef struct domid_translate {
4332+ unsigned short domid;
4333+ unsigned short busid;
4334+} domid_translate_t ;
4335+
4336+typedef struct domid_translate_ext {
4337+ unsigned short domid;
4338+ u32 busid;
4339+} domid_translate_ext_t ;
4340+
4341+/*Data struct associated with each of the tapdisk devices*/
4342+typedef struct tap_blkif {
4343+ struct vm_area_struct *vma; /*Shared memory area */
4344+ unsigned long rings_vstart; /*Kernel memory mapping */
4345+ unsigned long user_vstart; /*User memory mapping */
4346+ unsigned long dev_inuse; /*One process opens device at a time. */
4347+ unsigned long dev_pending; /*In process of being opened */
4348+ unsigned long ring_ok; /*make this ring->state */
4349+ blkif_front_ring_t ufe_ring; /*Rings up to user space. */
4350+ wait_queue_head_t wait; /*for poll */
4351+ unsigned long mode; /*current switching mode */
4352+ int minor; /*Minor number for tapdisk device */
4353+ pid_t pid; /*tapdisk process id */
4354+ enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
4355+ shutdown */
4356+ unsigned long *idx_map; /*Record the user ring id to kern
4357+ [req id, idx] tuple */
4358+ blkif_t *blkif; /*Associate blkif with tapdev */
4359+ struct domid_translate_ext trans; /*Translation from domid to bus. */
4360+} tap_blkif_t;
4361+
4362+static struct tap_blkif *tapfds[MAX_TAP_DEV];
4363+static int blktap_next_minor;
4364+
4365+module_param(blkif_reqs, int, 0);
4366+/* Run-time switchable: /sys/module/blktap/parameters/ */
4367+static unsigned int log_stats = 0;
4368+static unsigned int debug_lvl = 0;
4369+module_param(log_stats, int, 0644);
4370+module_param(debug_lvl, int, 0644);
4371+
4372+/*
4373+ * Each outstanding request that we've passed to the lower device layers has a
4374+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
4375+ * the pendcnt towards zero. When it hits zero, the specified domain has a
4376+ * response queued for it, with the saved 'id' passed back.
4377+ */
4378+typedef struct {
4379+ blkif_t *blkif;
4380+ u64 id;
4381+ unsigned short mem_idx;
4382+ int nr_pages;
4383+ atomic_t pendcnt;
4384+ unsigned short operation;
4385+ int status;
4386+ struct list_head free_list;
4387+ int inuse;
4388+} pending_req_t;
4389+
4390+static pending_req_t *pending_reqs[MAX_PENDING_REQS];
4391+static struct list_head pending_free;
4392+static DEFINE_SPINLOCK(pending_free_lock);
4393+static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
4394+static int alloc_pending_reqs;
4395+
4396+typedef unsigned int PEND_RING_IDX;
4397+
4398+static inline int MASK_PEND_IDX(int i) {
4399+ return (i & (MAX_PENDING_REQS-1));
4400+}
4401+
4402+static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
4403+ return (req - pending_reqs[idx]);
4404+}
4405+
4406+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
4407+
4408+#define BLKBACK_INVALID_HANDLE (~0)
4409+
4410+static struct page **foreign_pages[MAX_DYNAMIC_MEM];
4411+static inline unsigned long idx_to_kaddr(
4412+ unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
4413+{
4414+ unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx;
4415+ unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]);
4416+ return (unsigned long)pfn_to_kaddr(pfn);
4417+}
4418+
4419+static unsigned short mmap_alloc = 0;
4420+static unsigned short mmap_lock = 0;
4421+static unsigned short mmap_inuse = 0;
4422+
4423+/******************************************************************
4424+ * GRANT HANDLES
4425+ */
4426+
4427+/* When using grant tables to map a frame for device access then the
4428+ * handle returned must be used to unmap the frame. This is needed to
4429+ * drop the ref count on the frame.
4430+ */
4431+struct grant_handle_pair
4432+{
4433+ grant_handle_t kernel;
4434+ grant_handle_t user;
4435+};
4436+#define INVALID_GRANT_HANDLE 0xFFFF
4437+
4438+static struct grant_handle_pair
4439+ pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
4440+#define pending_handle(_id, _idx, _i) \
4441+ (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
4442+ + (_i)])
4443+
4444+
4445+static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
4446+
4447+#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */
4448+#define BLKTAP_DEV_DIR "/dev/xen"
4449+
4450+static int blktap_major;
4451+
4452+/* blktap IOCTLs: */
4453+#define BLKTAP_IOCTL_KICK_FE 1
4454+#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */
4455+#define BLKTAP_IOCTL_SETMODE 3
4456+#define BLKTAP_IOCTL_SENDPID 4
4457+#define BLKTAP_IOCTL_NEWINTF 5
4458+#define BLKTAP_IOCTL_MINOR 6
4459+#define BLKTAP_IOCTL_MAJOR 7
4460+#define BLKTAP_QUERY_ALLOC_REQS 8
4461+#define BLKTAP_IOCTL_FREEINTF 9
4462+#define BLKTAP_IOCTL_NEWINTF_EXT 50
4463+#define BLKTAP_IOCTL_PRINT_IDXS 100
4464+
4465+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
4466+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
4467+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
4468+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */
4469+
4470+#define BLKTAP_MODE_INTERPOSE \
4471+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
4472+
4473+
4474+static inline int BLKTAP_MODE_VALID(unsigned long arg)
4475+{
4476+ return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
4477+ (arg == BLKTAP_MODE_INTERCEPT_FE) ||
4478+ (arg == BLKTAP_MODE_INTERPOSE ));
4479+}
4480+
4481+/* Requests passing through the tap to userspace are re-assigned an ID.
4482+ * We must record a mapping between the BE [IDX,ID] tuple and the userspace
4483+ * ring ID.
4484+ */
4485+
4486+static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
4487+{
4488+ return ((fe_dom << 16) | MASK_PEND_IDX(idx));
4489+}
4490+
4491+extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
4492+{
4493+ return (PEND_RING_IDX)(id & 0x0000ffff);
4494+}
4495+
4496+extern inline int ID_TO_MIDX(unsigned long id)
4497+{
4498+ return (int)(id >> 16);
4499+}
4500+
4501+#define INVALID_REQ 0xdead0000
4502+
4503+/*TODO: Convert to a free list*/
4504+static inline int GET_NEXT_REQ(unsigned long *idx_map)
4505+{
4506+ int i;
4507+ for (i = 0; i < MAX_PENDING_REQS; i++)
4508+ if (idx_map[i] == INVALID_REQ)
4509+ return i;
4510+
4511+ return INVALID_REQ;
4512+}
4513+
4514+static inline int OFFSET_TO_USR_IDX(int offset)
4515+{
4516+ return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
4517+}
4518+
4519+static inline int OFFSET_TO_SEG(int offset)
4520+{
4521+ return offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
4522+}
4523+
4524+
4525+#define BLKTAP_INVALID_HANDLE(_g) \
4526+ (((_g->kernel) == INVALID_GRANT_HANDLE) && \
4527+ ((_g->user) == INVALID_GRANT_HANDLE))
4528+
4529+#define BLKTAP_INVALIDATE_HANDLE(_g) do { \
4530+ (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \
4531+ } while(0)
4532+
4533+
4534+/******************************************************************
4535+ * BLKTAP VM OPS
4536+ */
4537+
4538+static struct page *blktap_nopage(struct vm_area_struct *vma,
4539+ unsigned long address,
4540+ int *type)
4541+{
4542+ /*
4543+ * if the page has not been mapped in by the driver then return
4544+ * NOPAGE_SIGBUS to the domain.
4545+ */
4546+
4547+ return NOPAGE_SIGBUS;
4548+}
4549+
4550+static pte_t blktap_clear_pte(struct vm_area_struct *vma,
4551+ unsigned long uvaddr,
4552+ pte_t *ptep, int is_fullmm)
4553+{
4554+ pte_t copy;
4555+ tap_blkif_t *info;
4556+ int offset, seg, usr_idx, pending_idx, mmap_idx;
4557+ unsigned long uvstart = vma->vm_start + (RING_PAGES << PAGE_SHIFT);
4558+ unsigned long kvaddr;
4559+ struct page **map;
4560+ struct page *pg;
4561+ struct grant_handle_pair *khandle;
4562+ struct gnttab_unmap_grant_ref unmap[2];
4563+ int count = 0;
4564+
4565+ /*
4566+ * If the address is before the start of the grant mapped region or
4567+ * if vm_file is NULL (meaning mmap failed and we have nothing to do)
4568+ */
4569+ if (uvaddr < uvstart || vma->vm_file == NULL)
4570+ return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
4571+ ptep, is_fullmm);
4572+
4573+ info = vma->vm_file->private_data;
4574+ map = vma->vm_private_data;
4575+
4576+ /* TODO Should these be changed to if statements? */
4577+ BUG_ON(!info);
4578+ BUG_ON(!info->idx_map);
4579+ BUG_ON(!map);
4580+
4581+ offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT);
4582+ usr_idx = OFFSET_TO_USR_IDX(offset);
4583+ seg = OFFSET_TO_SEG(offset);
4584+
4585+ pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
4586+ mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
4587+
4588+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg);
4589+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
4590+ ClearPageReserved(pg);
4591+ map[offset + RING_PAGES] = NULL;
4592+
4593+ khandle = &pending_handle(mmap_idx, pending_idx, seg);
4594+
4595+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
4596+ gnttab_set_unmap_op(&unmap[count], kvaddr,
4597+ GNTMAP_host_map, khandle->kernel);
4598+ count++;
4599+
4600+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
4601+ INVALID_P2M_ENTRY);
4602+ }
4603+
4604+ if (khandle->user != INVALID_GRANT_HANDLE) {
4605+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
4606+
4607+ copy = *ptep;
4608+ gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep),
4609+ GNTMAP_host_map
4610+ | GNTMAP_application_map
4611+ | GNTMAP_contains_pte,
4612+ khandle->user);
4613+ count++;
4614+ } else {
4615+ BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap));
4616+
4617+ /* USING SHADOW PAGE TABLES. */
4618+ copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
4619+ is_fullmm);
4620+ }
4621+
4622+ if (count) {
4623+ BLKTAP_INVALIDATE_HANDLE(khandle);
4624+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
4625+ unmap, count))
4626+ BUG();
4627+ }
4628+
4629+ return copy;
4630+}
4631+
4632+struct vm_operations_struct blktap_vm_ops = {
4633+ nopage: blktap_nopage,
4634+ zap_pte: blktap_clear_pte,
4635+};
4636+
4637+/******************************************************************
4638+ * BLKTAP FILE OPS
4639+ */
4640+
4641+/*Function Declarations*/
4642+static tap_blkif_t *get_next_free_dev(void);
4643+static int blktap_open(struct inode *inode, struct file *filp);
4644+static int blktap_release(struct inode *inode, struct file *filp);
4645+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
4646+static int blktap_ioctl(struct inode *inode, struct file *filp,
4647+ unsigned int cmd, unsigned long arg);
4648+static unsigned int blktap_poll(struct file *file, poll_table *wait);
4649+
4650+static const struct file_operations blktap_fops = {
4651+ .owner = THIS_MODULE,
4652+ .poll = blktap_poll,
4653+ .ioctl = blktap_ioctl,
4654+ .open = blktap_open,
4655+ .release = blktap_release,
4656+ .mmap = blktap_mmap,
4657+};
4658+
4659+
4660+static tap_blkif_t *get_next_free_dev(void)
4661+{
4662+ struct class *class;
4663+ tap_blkif_t *info;
4664+ int minor;
4665+
4666+ /*
4667+ * This is called only from the ioctl, which
4668+ * means we should always have interrupts enabled.
4669+ */
4670+ BUG_ON(irqs_disabled());
4671+
4672+ spin_lock_irq(&pending_free_lock);
4673+
4674+ /* tapfds[0] is always NULL */
4675+
4676+ for (minor = 1; minor < blktap_next_minor; minor++) {
4677+ info = tapfds[minor];
4678+ /* we could have failed a previous attempt. */
4679+ if (!info ||
4680+ ((info->dev_inuse == 0) &&
4681+ (info->dev_pending == 0)) ) {
4682+ info->dev_pending = 1;
4683+ goto found;
4684+ }
4685+ }
4686+ info = NULL;
4687+ minor = -1;
4688+
4689+ /*
4690+ * We didn't find free device. If we can still allocate
4691+ * more, then we grab the next device minor that is
4692+ * available. This is done while we are still under
4693+ * the protection of the pending_free_lock.
4694+ */
4695+ if (blktap_next_minor < MAX_TAP_DEV)
4696+ minor = blktap_next_minor++;
4697+found:
4698+ spin_unlock_irq(&pending_free_lock);
4699+
4700+ if (!info && minor > 0) {
4701+ info = kzalloc(sizeof(*info), GFP_KERNEL);
4702+ if (unlikely(!info)) {
4703+ /*
4704+ * If we failed here, try to put back
4705+ * the next minor number. But if one
4706+ * was just taken, then we just lose this
4707+ * minor. We can try to allocate this
4708+ * minor again later.
4709+ */
4710+ spin_lock_irq(&pending_free_lock);
4711+ if (blktap_next_minor == minor+1)
4712+ blktap_next_minor--;
4713+ spin_unlock_irq(&pending_free_lock);
4714+ goto out;
4715+ }
4716+
4717+ info->minor = minor;
4718+ /*
4719+ * Make sure that we have a minor before others can
4720+ * see us.
4721+ */
4722+ wmb();
4723+ tapfds[minor] = info;
4724+
4725+ if ((class = get_xen_class()) != NULL)
4726+ class_device_create(class, NULL,
4727+ MKDEV(blktap_major, minor), NULL,
4728+ "blktap%d", minor);
4729+ }
4730+
4731+out:
4732+ return info;
4733+}
4734+
4735+int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif)
4736+{
4737+ tap_blkif_t *info;
4738+ int i;
4739+
4740+ for (i = 1; i < blktap_next_minor; i++) {
4741+ info = tapfds[i];
4742+ if ( info &&
4743+ (info->trans.domid == domid) &&
4744+ (info->trans.busid == xenbus_id) ) {
4745+ info->blkif = blkif;
4746+ info->status = RUNNING;
4747+ return i;
4748+ }
4749+ }
4750+ return -1;
4751+}
4752+
4753+void signal_tapdisk(int idx)
4754+{
4755+ tap_blkif_t *info;
4756+ struct task_struct *ptask;
4757+
4758+ /*
4759+ * if the userland tools set things up wrong, this could be negative;
4760+ * just don't try to signal in this case
4761+ */
4762+ if (idx < 0)
4763+ return;
4764+
4765+ info = tapfds[idx];
4766+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
4767+ return;
4768+
4769+ if (info->pid > 0) {
4770+ ptask = find_task_by_pid(info->pid);
4771+ if (ptask)
4772+ info->status = CLEANSHUTDOWN;
4773+ }
4774+ info->blkif = NULL;
4775+
4776+ return;
4777+}
4778+
4779+static int blktap_open(struct inode *inode, struct file *filp)
4780+{
4781+ blkif_sring_t *sring;
4782+ int idx = iminor(inode) - BLKTAP_MINOR;
4783+ tap_blkif_t *info;
4784+ int i;
4785+
4786+ /* ctrl device, treat differently */
4787+ if (!idx)
4788+ return 0;
4789+
4790+ info = tapfds[idx];
4791+
4792+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
4793+ WPRINTK("Unable to open device /dev/xen/blktap%d\n",
4794+ idx);
4795+ return -ENODEV;
4796+ }
4797+
4798+ DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
4799+
4800+ /*Only one process can access device at a time*/
4801+ if (test_and_set_bit(0, &info->dev_inuse))
4802+ return -EBUSY;
4803+
4804+ info->dev_pending = 0;
4805+
4806+ /* Allocate the fe ring. */
4807+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
4808+ if (sring == NULL)
4809+ goto fail_nomem;
4810+
4811+ SetPageReserved(virt_to_page(sring));
4812+
4813+ SHARED_RING_INIT(sring);
4814+ FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
4815+
4816+ filp->private_data = info;
4817+ info->vma = NULL;
4818+
4819+ info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS,
4820+ GFP_KERNEL);
4821+
4822+ if (info->idx_map == NULL)
4823+ goto fail_nomem;
4824+
4825+ if (idx > 0) {
4826+ init_waitqueue_head(&info->wait);
4827+ for (i = 0; i < MAX_PENDING_REQS; i++)
4828+ info->idx_map[i] = INVALID_REQ;
4829+ }
4830+
4831+ DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
4832+ return 0;
4833+
4834+ fail_nomem:
4835+ return -ENOMEM;
4836+}
4837+
4838+static int blktap_release(struct inode *inode, struct file *filp)
4839+{
4840+ tap_blkif_t *info = filp->private_data;
4841+
4842+ /* check for control device */
4843+ if (!info)
4844+ return 0;
4845+
4846+ info->dev_inuse = 0;
4847+ DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
4848+
4849+ /* Free the ring page. */
4850+ ClearPageReserved(virt_to_page(info->ufe_ring.sring));
4851+ free_page((unsigned long) info->ufe_ring.sring);
4852+
4853+ /* Clear any active mappings and free foreign map table */
4854+ if (info->vma) {
4855+ struct mm_struct *mm = info->vma->vm_mm;
4856+
4857+ down_write(&mm->mmap_sem);
4858+ zap_page_range(
4859+ info->vma, info->vma->vm_start,
4860+ info->vma->vm_end - info->vma->vm_start, NULL);
4861+ up_write(&mm->mmap_sem);
4862+
4863+ kfree(info->vma->vm_private_data);
4864+
4865+ info->vma = NULL;
4866+ }
4867+
4868+ if (info->idx_map) {
4869+ kfree(info->idx_map);
4870+ info->idx_map = NULL;
4871+ }
4872+
4873+ if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
4874+ if (info->blkif->xenblkd != NULL) {
4875+ kthread_stop(info->blkif->xenblkd);
4876+ info->blkif->xenblkd = NULL;
4877+ }
4878+ info->status = CLEANSHUTDOWN;
4879+ }
4880+
4881+ return 0;
4882+}
4883+
4884+
4885+/* Note on mmap:
4886+ * We need to map pages to user space in a way that will allow the block
4887+ * subsystem set up direct IO to them. This couldn't be done before, because
4888+ * there isn't really a sane way to translate a user virtual address down to a
4889+ * physical address when the page belongs to another domain.
4890+ *
4891+ * My first approach was to map the page in to kernel memory, add an entry
4892+ * for it in the physical frame list (using alloc_lomem_region as in blkback)
4893+ * and then attempt to map that page up to user space. This is disallowed
4894+ * by xen though, which realizes that we don't really own the machine frame
4895+ * underlying the physical page.
4896+ *
4897+ * The new approach is to provide explicit support for this in xen linux.
4898+ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
4899+ * mapped from other vms. vma->vm_private_data is set up as a mapping
4900+ * from pages to actual page structs. There is a new clause in get_user_pages
4901+ * that does the right thing for this sort of mapping.
4902+ */
4903+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
4904+{
4905+ int size;
4906+ struct page **map;
4907+ int i;
4908+ tap_blkif_t *info = filp->private_data;
4909+ int ret;
4910+
4911+ if (info == NULL) {
4912+ WPRINTK("blktap: mmap, retrieving idx failed\n");
4913+ return -ENOMEM;
4914+ }
4915+
4916+ vma->vm_flags |= VM_RESERVED;
4917+ vma->vm_ops = &blktap_vm_ops;
4918+
4919+ size = vma->vm_end - vma->vm_start;
4920+ if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
4921+ WPRINTK("you _must_ map exactly %d pages!\n",
4922+ mmap_pages + RING_PAGES);
4923+ return -EAGAIN;
4924+ }
4925+
4926+ size >>= PAGE_SHIFT;
4927+ info->rings_vstart = vma->vm_start;
4928+ info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
4929+
4930+ /* Map the ring pages to the start of the region and reserve it. */
4931+ if (xen_feature(XENFEAT_auto_translated_physmap))
4932+ ret = vm_insert_page(vma, vma->vm_start,
4933+ virt_to_page(info->ufe_ring.sring));
4934+ else
4935+ ret = remap_pfn_range(vma, vma->vm_start,
4936+ __pa(info->ufe_ring.sring) >> PAGE_SHIFT,
4937+ PAGE_SIZE, vma->vm_page_prot);
4938+ if (ret) {
4939+ WPRINTK("Mapping user ring failed!\n");
4940+ goto fail;
4941+ }
4942+
4943+ /* Mark this VM as containing foreign pages, and set up mappings. */
4944+ map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
4945+ * sizeof(struct page *),
4946+ GFP_KERNEL);
4947+ if (map == NULL) {
4948+ WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
4949+ goto fail;
4950+ }
4951+
4952+ for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
4953+ map[i] = NULL;
4954+
4955+ vma->vm_private_data = map;
4956+ vma->vm_flags |= VM_FOREIGN;
4957+ vma->vm_flags |= VM_DONTCOPY;
4958+
4959+#ifdef CONFIG_X86
4960+ vma->vm_mm->context.has_foreign_mappings = 1;
4961+#endif
4962+
4963+ info->vma = vma;
4964+ info->ring_ok = 1;
4965+ return 0;
4966+ fail:
4967+ /* Clear any active mappings. */
4968+ zap_page_range(vma, vma->vm_start,
4969+ vma->vm_end - vma->vm_start, NULL);
4970+
4971+ return -ENOMEM;
4972+}
4973+
4974+
4975+static int blktap_ioctl(struct inode *inode, struct file *filp,
4976+ unsigned int cmd, unsigned long arg)
4977+{
4978+ tap_blkif_t *info = filp->private_data;
4979+
4980+ switch(cmd) {
4981+ case BLKTAP_IOCTL_KICK_FE:
4982+ {
4983+ /* There are fe messages to process. */
4984+ return blktap_read_ufe_ring(info);
4985+ }
4986+ case BLKTAP_IOCTL_SETMODE:
4987+ {
4988+ if (info) {
4989+ if (BLKTAP_MODE_VALID(arg)) {
4990+ info->mode = arg;
4991+ /* XXX: may need to flush rings here. */
4992+ DPRINTK("blktap: set mode to %lx\n",
4993+ arg);
4994+ return 0;
4995+ }
4996+ }
4997+ return 0;
4998+ }
4999+ case BLKTAP_IOCTL_PRINT_IDXS:
5000+ {
5001+ if (info) {
5002+ printk("User Rings: \n-----------\n");
5003+ printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
5004+ "| req_prod: %2d, rsp_prod: %2d\n",
5005+ info->ufe_ring.rsp_cons,
5006+ info->ufe_ring.req_prod_pvt,
5007+ info->ufe_ring.sring->req_prod,
5008+ info->ufe_ring.sring->rsp_prod);
5009+ }
5010+ return 0;
5011+ }
5012+ case BLKTAP_IOCTL_SENDPID:
5013+ {
5014+ if (info) {
5015+ info->pid = (pid_t)arg;
5016+ DPRINTK("blktap: pid received %d\n",
5017+ info->pid);
5018+ }
5019+ return 0;
5020+ }
5021+ case BLKTAP_IOCTL_NEWINTF:
5022+ {
5023+ uint64_t val = (uint64_t)arg;
5024+ domid_translate_t *tr = (domid_translate_t *)&val;
5025+
5026+ DPRINTK("NEWINTF Req for domid %d and bus id %d\n",
5027+ tr->domid, tr->busid);
5028+ info = get_next_free_dev();
5029+ if (!info) {
5030+ WPRINTK("Error initialising /dev/xen/blktap - "
5031+ "No more devices\n");
5032+ return -1;
5033+ }
5034+ info->trans.domid = tr->domid;
5035+ info->trans.busid = tr->busid;
5036+ return info->minor;
5037+ }
5038+ case BLKTAP_IOCTL_NEWINTF_EXT:
5039+ {
5040+ void __user *udata = (void __user *) arg;
5041+ domid_translate_ext_t tr;
5042+
5043+ if (copy_from_user(&tr, udata, sizeof(domid_translate_ext_t)))
5044+ return -EFAULT;
5045+
5046+ DPRINTK("NEWINTF_EXT Req for domid %d and bus id %d\n",
5047+ tr.domid, tr.busid);
5048+ info = get_next_free_dev();
5049+ if (!info) {
5050+ WPRINTK("Error initialising /dev/xen/blktap - "
5051+ "No more devices\n");
5052+ return -1;
5053+ }
5054+ info->trans.domid = tr.domid;
5055+ info->trans.busid = tr.busid;
5056+ return info->minor;
5057+ }
5058+ case BLKTAP_IOCTL_FREEINTF:
5059+ {
5060+ unsigned long dev = arg;
5061+ unsigned long flags;
5062+
5063+ info = tapfds[dev];
5064+
5065+ if ((dev > MAX_TAP_DEV) || !info)
5066+ return 0; /* should this be an error? */
5067+
5068+ spin_lock_irqsave(&pending_free_lock, flags);
5069+ if (info->dev_pending)
5070+ info->dev_pending = 0;
5071+ spin_unlock_irqrestore(&pending_free_lock, flags);
5072+
5073+ return 0;
5074+ }
5075+ case BLKTAP_IOCTL_MINOR:
5076+ {
5077+ unsigned long dev = arg;
5078+
5079+ info = tapfds[dev];
5080+
5081+ if ((dev > MAX_TAP_DEV) || !info)
5082+ return -EINVAL;
5083+
5084+ return info->minor;
5085+ }
5086+ case BLKTAP_IOCTL_MAJOR:
5087+ return blktap_major;
5088+
5089+ case BLKTAP_QUERY_ALLOC_REQS:
5090+ {
5091+ WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
5092+ alloc_pending_reqs, blkif_reqs);
5093+ return (alloc_pending_reqs/blkif_reqs) * 100;
5094+ }
5095+ }
5096+ return -ENOIOCTLCMD;
5097+}
5098+
5099+static unsigned int blktap_poll(struct file *filp, poll_table *wait)
5100+{
5101+ tap_blkif_t *info = filp->private_data;
5102+
5103+ /* do not work on the control device */
5104+ if (!info)
5105+ return 0;
5106+
5107+ poll_wait(filp, &info->wait, wait);
5108+ if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
5109+ RING_PUSH_REQUESTS(&info->ufe_ring);
5110+ return POLLIN | POLLRDNORM;
5111+ }
5112+ return 0;
5113+}
5114+
5115+void blktap_kick_user(int idx)
5116+{
5117+ tap_blkif_t *info;
5118+
5119+ info = tapfds[idx];
5120+
5121+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
5122+ return;
5123+
5124+ wake_up_interruptible(&info->wait);
5125+
5126+ return;
5127+}
5128+
5129+static int do_block_io_op(blkif_t *blkif);
5130+static void dispatch_rw_block_io(blkif_t *blkif,
5131+ blkif_request_t *req,
5132+ pending_req_t *pending_req);
5133+static void make_response(blkif_t *blkif, u64 id,
5134+ unsigned short op, int st);
5135+
5136+/******************************************************************
5137+ * misc small helpers
5138+ */
5139+static int req_increase(void)
5140+{
5141+ int i, j;
5142+
5143+ if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock)
5144+ return -EINVAL;
5145+
5146+ pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t)
5147+ * blkif_reqs, GFP_KERNEL);
5148+ foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages);
5149+
5150+ if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc])
5151+ goto out_of_memory;
5152+
5153+ DPRINTK("%s: reqs=%d, pages=%d\n",
5154+ __FUNCTION__, blkif_reqs, mmap_pages);
5155+
5156+ for (i = 0; i < MAX_PENDING_REQS; i++) {
5157+ list_add_tail(&pending_reqs[mmap_alloc][i].free_list,
5158+ &pending_free);
5159+ pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
5160+ for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
5161+ BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc,
5162+ i, j));
5163+ }
5164+
5165+ mmap_alloc++;
5166+ DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
5167+ return 0;
5168+
5169+ out_of_memory:
5170+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
5171+ kfree(pending_reqs[mmap_alloc]);
5172+ WPRINTK("%s: out of memory\n", __FUNCTION__);
5173+ return -ENOMEM;
5174+}
5175+
5176+static void mmap_req_del(int mmap)
5177+{
5178+ BUG_ON(!spin_is_locked(&pending_free_lock));
5179+
5180+ kfree(pending_reqs[mmap]);
5181+ pending_reqs[mmap] = NULL;
5182+
5183+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
5184+ foreign_pages[mmap] = NULL;
5185+
5186+ mmap_lock = 0;
5187+ DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
5188+ mmap_alloc--;
5189+}
5190+
5191+static pending_req_t* alloc_req(void)
5192+{
5193+ pending_req_t *req = NULL;
5194+ unsigned long flags;
5195+
5196+ spin_lock_irqsave(&pending_free_lock, flags);
5197+
5198+ if (!list_empty(&pending_free)) {
5199+ req = list_entry(pending_free.next, pending_req_t, free_list);
5200+ list_del(&req->free_list);
5201+ }
5202+
5203+ if (req) {
5204+ req->inuse = 1;
5205+ alloc_pending_reqs++;
5206+ }
5207+ spin_unlock_irqrestore(&pending_free_lock, flags);
5208+
5209+ return req;
5210+}
5211+
5212+static void free_req(pending_req_t *req)
5213+{
5214+ unsigned long flags;
5215+ int was_empty;
5216+
5217+ spin_lock_irqsave(&pending_free_lock, flags);
5218+
5219+ alloc_pending_reqs--;
5220+ req->inuse = 0;
5221+ if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
5222+ mmap_inuse--;
5223+ if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
5224+ spin_unlock_irqrestore(&pending_free_lock, flags);
5225+ return;
5226+ }
5227+ was_empty = list_empty(&pending_free);
5228+ list_add(&req->free_list, &pending_free);
5229+
5230+ spin_unlock_irqrestore(&pending_free_lock, flags);
5231+
5232+ if (was_empty)
5233+ wake_up(&pending_free_wq);
5234+}
5235+
5236+static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
5237+ int tapidx)
5238+{
5239+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
5240+ unsigned int i, invcount = 0, locked = 0;
5241+ struct grant_handle_pair *khandle;
5242+ uint64_t ptep;
5243+ int ret, mmap_idx;
5244+ unsigned long kvaddr, uvaddr;
5245+ tap_blkif_t *info;
5246+ struct mm_struct *mm;
5247+
5248+
5249+ info = tapfds[tapidx];
5250+
5251+ if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
5252+ WPRINTK("fast_flush: Couldn't get info!\n");
5253+ return;
5254+ }
5255+
5256+ mm = info->vma ? info->vma->vm_mm : NULL;
5257+
5258+ if (info->vma != NULL &&
5259+ xen_feature(XENFEAT_auto_translated_physmap)) {
5260+ down_write(&mm->mmap_sem);
5261+ zap_page_range(info->vma,
5262+ MMAP_VADDR(info->user_vstart, u_idx, 0),
5263+ req->nr_pages << PAGE_SHIFT, NULL);
5264+ up_write(&mm->mmap_sem);
5265+ return;
5266+ }
5267+
5268+ mmap_idx = req->mem_idx;
5269+
5270+ for (i = 0; i < req->nr_pages; i++) {
5271+ kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
5272+ uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
5273+
5274+ khandle = &pending_handle(mmap_idx, k_idx, i);
5275+
5276+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
5277+ gnttab_set_unmap_op(&unmap[invcount],
5278+ idx_to_kaddr(mmap_idx, k_idx, i),
5279+ GNTMAP_host_map, khandle->kernel);
5280+ invcount++;
5281+
5282+ set_phys_to_machine(
5283+ __pa(idx_to_kaddr(mmap_idx, k_idx, i))
5284+ >> PAGE_SHIFT, INVALID_P2M_ENTRY);
5285+ }
5286+
5287+ if (khandle->user != INVALID_GRANT_HANDLE) {
5288+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
5289+ if (!locked++)
5290+ down_write(&mm->mmap_sem);
5291+ if (create_lookup_pte_addr(
5292+ mm,
5293+ MMAP_VADDR(info->user_vstart, u_idx, i),
5294+ &ptep) !=0) {
5295+ up_write(&mm->mmap_sem);
5296+ WPRINTK("Couldn't get a pte addr!\n");
5297+ return;
5298+ }
5299+
5300+ gnttab_set_unmap_op(&unmap[invcount], ptep,
5301+ GNTMAP_host_map
5302+ | GNTMAP_application_map
5303+ | GNTMAP_contains_pte,
5304+ khandle->user);
5305+ invcount++;
5306+ }
5307+
5308+ BLKTAP_INVALIDATE_HANDLE(khandle);
5309+ }
5310+ ret = HYPERVISOR_grant_table_op(
5311+ GNTTABOP_unmap_grant_ref, unmap, invcount);
5312+ BUG_ON(ret);
5313+
5314+ if (info->vma != NULL &&
5315+ !xen_feature(XENFEAT_auto_translated_physmap)) {
5316+ if (!locked++)
5317+ down_write(&mm->mmap_sem);
5318+ zap_page_range(info->vma,
5319+ MMAP_VADDR(info->user_vstart, u_idx, 0),
5320+ req->nr_pages << PAGE_SHIFT, NULL);
5321+ }
5322+
5323+ if (locked)
5324+ up_write(&mm->mmap_sem);
5325+}
5326+
5327+/******************************************************************
5328+ * SCHEDULER FUNCTIONS
5329+ */
5330+
5331+static void print_stats(blkif_t *blkif)
5332+{
5333+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
5334+ current->comm, blkif->st_oo_req,
5335+ blkif->st_rd_req, blkif->st_wr_req);
5336+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
5337+ blkif->st_rd_req = 0;
5338+ blkif->st_wr_req = 0;
5339+ blkif->st_oo_req = 0;
5340+}
5341+
5342+int tap_blkif_schedule(void *arg)
5343+{
5344+ blkif_t *blkif = arg;
5345+
5346+ blkif_get(blkif);
5347+
5348+ if (debug_lvl)
5349+ printk(KERN_DEBUG "%s: started\n", current->comm);
5350+
5351+ while (!kthread_should_stop()) {
5352+ if (try_to_freeze())
5353+ continue;
5354+
5355+ wait_event_interruptible(
5356+ blkif->wq,
5357+ blkif->waiting_reqs || kthread_should_stop());
5358+ wait_event_interruptible(
5359+ pending_free_wq,
5360+ !list_empty(&pending_free) || kthread_should_stop());
5361+
5362+ blkif->waiting_reqs = 0;
5363+ smp_mb(); /* clear flag *before* checking for work */
5364+
5365+ if (do_block_io_op(blkif))
5366+ blkif->waiting_reqs = 1;
5367+
5368+ if (log_stats && time_after(jiffies, blkif->st_print))
5369+ print_stats(blkif);
5370+ }
5371+
5372+ if (log_stats)
5373+ print_stats(blkif);
5374+ if (debug_lvl)
5375+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
5376+
5377+ blkif->xenblkd = NULL;
5378+ blkif_put(blkif);
5379+
5380+ return 0;
5381+}
5382+
5383+/******************************************************************
5384+ * COMPLETION CALLBACK -- Called by user level ioctl()
5385+ */
5386+
5387+static int blktap_read_ufe_ring(tap_blkif_t *info)
5388+{
5389+ /* This is called to read responses from the UFE ring. */
5390+ RING_IDX i, j, rp;
5391+ blkif_response_t *resp;
5392+ blkif_t *blkif=NULL;
5393+ int pending_idx, usr_idx, mmap_idx;
5394+ pending_req_t *pending_req;
5395+
5396+ if (!info)
5397+ return 0;
5398+
5399+ /* We currently only forward packets in INTERCEPT_FE mode. */
5400+ if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
5401+ return 0;
5402+
5403+ /* for each outstanding message on the UFEring */
5404+ rp = info->ufe_ring.sring->rsp_prod;
5405+ rmb();
5406+
5407+ for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
5408+ blkif_response_t res;
5409+ resp = RING_GET_RESPONSE(&info->ufe_ring, i);
5410+ memcpy(&res, resp, sizeof(res));
5411+ mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
5412+ ++info->ufe_ring.rsp_cons;
5413+
5414+ /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
5415+ usr_idx = (int)res.id;
5416+ pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
5417+ mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
5418+
5419+ if ( (mmap_idx >= mmap_alloc) ||
5420+ (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
5421+ WPRINTK("Incorrect req map"
5422+ "[%d], internal map [%d,%d (%d)]\n",
5423+ usr_idx, mmap_idx,
5424+ ID_TO_IDX(info->idx_map[usr_idx]),
5425+ MASK_PEND_IDX(
5426+ ID_TO_IDX(info->idx_map[usr_idx])));
5427+
5428+ pending_req = &pending_reqs[mmap_idx][pending_idx];
5429+ blkif = pending_req->blkif;
5430+
5431+ for (j = 0; j < pending_req->nr_pages; j++) {
5432+
5433+ unsigned long kvaddr, uvaddr;
5434+ struct page **map = info->vma->vm_private_data;
5435+ struct page *pg;
5436+ int offset;
5437+
5438+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
5439+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j);
5440+
5441+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5442+ ClearPageReserved(pg);
5443+ offset = (uvaddr - info->vma->vm_start)
5444+ >> PAGE_SHIFT;
5445+ map[offset] = NULL;
5446+ }
5447+ fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
5448+ info->idx_map[usr_idx] = INVALID_REQ;
5449+ make_response(blkif, pending_req->id, res.operation,
5450+ res.status);
5451+ blkif_put(pending_req->blkif);
5452+ free_req(pending_req);
5453+ }
5454+
5455+ return 0;
5456+}
5457+
5458+
5459+/******************************************************************************
5460+ * NOTIFICATION FROM GUEST OS.
5461+ */
5462+
5463+static void blkif_notify_work(blkif_t *blkif)
5464+{
5465+ blkif->waiting_reqs = 1;
5466+ wake_up(&blkif->wq);
5467+}
5468+
5469+irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
5470+{
5471+ blkif_notify_work(dev_id);
5472+ return IRQ_HANDLED;
5473+}
5474+
5475+
5476+
5477+/******************************************************************
5478+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
5479+ */
5480+static int print_dbug = 1;
5481+static int do_block_io_op(blkif_t *blkif)
5482+{
5483+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
5484+ blkif_request_t req;
5485+ pending_req_t *pending_req;
5486+ RING_IDX rc, rp;
5487+ int more_to_do = 0;
5488+ tap_blkif_t *info;
5489+
5490+ rc = blk_rings->common.req_cons;
5491+ rp = blk_rings->common.sring->req_prod;
5492+ rmb(); /* Ensure we see queued requests up to 'rp'. */
5493+
5494+ /*Check blkif has corresponding UE ring*/
5495+ if (blkif->dev_num < 0) {
5496+ /*oops*/
5497+ if (print_dbug) {
5498+ WPRINTK("Corresponding UE "
5499+ "ring does not exist!\n");
5500+ print_dbug = 0; /*We only print this message once*/
5501+ }
5502+ return 0;
5503+ }
5504+
5505+ info = tapfds[blkif->dev_num];
5506+
5507+ if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
5508+ if (print_dbug) {
5509+ WPRINTK("Can't get UE info!\n");
5510+ print_dbug = 0;
5511+ }
5512+ return 0;
5513+ }
5514+
5515+ while (rc != rp) {
5516+
5517+ if (RING_FULL(&info->ufe_ring)) {
5518+ WPRINTK("RING_FULL! More to do\n");
5519+ more_to_do = 1;
5520+ break;
5521+ }
5522+
5523+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) {
5524+ WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
5525+ " More to do\n");
5526+ more_to_do = 1;
5527+ break;
5528+ }
5529+
5530+ pending_req = alloc_req();
5531+ if (NULL == pending_req) {
5532+ blkif->st_oo_req++;
5533+ more_to_do = 1;
5534+ break;
5535+ }
5536+
5537+ if (kthread_should_stop()) {
5538+ more_to_do = 1;
5539+ break;
5540+ }
5541+
5542+ switch (blkif->blk_protocol) {
5543+ case BLKIF_PROTOCOL_NATIVE:
5544+ memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc),
5545+ sizeof(req));
5546+ break;
5547+ case BLKIF_PROTOCOL_X86_32:
5548+ blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
5549+ break;
5550+ case BLKIF_PROTOCOL_X86_64:
5551+ blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
5552+ break;
5553+ default:
5554+ BUG();
5555+ }
5556+ blk_rings->common.req_cons = ++rc; /* before make_response() */
5557+
5558+ /* Apply all sanity checks to /private copy/ of request. */
5559+ barrier();
5560+
5561+ switch (req.operation) {
5562+ case BLKIF_OP_READ:
5563+ blkif->st_rd_req++;
5564+ dispatch_rw_block_io(blkif, &req, pending_req);
5565+ break;
5566+
5567+ case BLKIF_OP_WRITE:
5568+ blkif->st_wr_req++;
5569+ dispatch_rw_block_io(blkif, &req, pending_req);
5570+ break;
5571+
5572+ default:
5573+ /* A good sign something is wrong: sleep for a while to
5574+ * avoid excessive CPU consumption by a bad guest. */
5575+ msleep(1);
5576+ WPRINTK("unknown operation [%d]\n",
5577+ req.operation);
5578+ make_response(blkif, req.id, req.operation,
5579+ BLKIF_RSP_ERROR);
5580+ free_req(pending_req);
5581+ break;
5582+ }
5583+
5584+ /* Yield point for this unbounded loop. */
5585+ cond_resched();
5586+ }
5587+
5588+ blktap_kick_user(blkif->dev_num);
5589+
5590+ return more_to_do;
5591+}
5592+
5593+static void dispatch_rw_block_io(blkif_t *blkif,
5594+ blkif_request_t *req,
5595+ pending_req_t *pending_req)
5596+{
5597+ extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
5598+ int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
5599+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
5600+ unsigned int nseg;
5601+ int ret, i, nr_sects = 0;
5602+ tap_blkif_t *info;
5603+ blkif_request_t *target;
5604+ int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
5605+ int usr_idx;
5606+ uint16_t mmap_idx = pending_req->mem_idx;
5607+ struct mm_struct *mm;
5608+
5609+ if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
5610+ goto fail_response;
5611+
5612+ info = tapfds[blkif->dev_num];
5613+ if (info == NULL)
5614+ goto fail_response;
5615+
5616+ /* Check we have space on user ring - should never fail. */
5617+ usr_idx = GET_NEXT_REQ(info->idx_map);
5618+ if (usr_idx == INVALID_REQ) {
5619+ BUG();
5620+ goto fail_response;
5621+ }
5622+
5623+ /* Check that number of segments is sane. */
5624+ nseg = req->nr_segments;
5625+ if ( unlikely(nseg == 0) ||
5626+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
5627+ WPRINTK("Bad number of segments in request (%d)\n", nseg);
5628+ goto fail_response;
5629+ }
5630+
5631+ /* Make sure userspace is ready. */
5632+ if (!info->ring_ok) {
5633+ WPRINTK("blktap: ring not ready for requests!\n");
5634+ goto fail_response;
5635+ }
5636+
5637+ if (RING_FULL(&info->ufe_ring)) {
5638+ WPRINTK("blktap: fe_ring is full, can't add "
5639+ "IO Request will be dropped. %d %d\n",
5640+ RING_SIZE(&info->ufe_ring),
5641+ RING_SIZE(&blkif->blk_rings.common));
5642+ goto fail_response;
5643+ }
5644+
5645+ pending_req->blkif = blkif;
5646+ pending_req->id = req->id;
5647+ pending_req->operation = operation;
5648+ pending_req->status = BLKIF_RSP_OKAY;
5649+ pending_req->nr_pages = nseg;
5650+ op = 0;
5651+ mm = info->vma->vm_mm;
5652+ if (!xen_feature(XENFEAT_auto_translated_physmap))
5653+ down_write(&mm->mmap_sem);
5654+ for (i = 0; i < nseg; i++) {
5655+ unsigned long uvaddr;
5656+ unsigned long kvaddr;
5657+ uint64_t ptep;
5658+ uint32_t flags;
5659+
5660+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
5661+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
5662+
5663+ flags = GNTMAP_host_map;
5664+ if (operation == WRITE)
5665+ flags |= GNTMAP_readonly;
5666+ gnttab_set_map_op(&map[op], kvaddr, flags,
5667+ req->seg[i].gref, blkif->domid);
5668+ op++;
5669+
5670+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
5671+ /* Now map it to user. */
5672+ ret = create_lookup_pte_addr(mm, uvaddr, &ptep);
5673+ if (ret) {
5674+ up_write(&mm->mmap_sem);
5675+ WPRINTK("Couldn't get a pte addr!\n");
5676+ goto fail_flush;
5677+ }
5678+
5679+ flags = GNTMAP_host_map | GNTMAP_application_map
5680+ | GNTMAP_contains_pte;
5681+ if (operation == WRITE)
5682+ flags |= GNTMAP_readonly;
5683+ gnttab_set_map_op(&map[op], ptep, flags,
5684+ req->seg[i].gref, blkif->domid);
5685+ op++;
5686+ }
5687+
5688+ nr_sects += (req->seg[i].last_sect -
5689+ req->seg[i].first_sect + 1);
5690+ }
5691+
5692+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
5693+ BUG_ON(ret);
5694+
5695+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
5696+ up_write(&mm->mmap_sem);
5697+
5698+ for (i = 0; i < (nseg*2); i+=2) {
5699+ unsigned long uvaddr;
5700+ unsigned long kvaddr;
5701+ unsigned long offset;
5702+ struct page *pg;
5703+
5704+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
5705+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2);
5706+
5707+ if (unlikely(map[i].status != 0)) {
5708+ WPRINTK("invalid kernel buffer -- "
5709+ "could not remap it\n");
5710+ ret |= 1;
5711+ map[i].handle = INVALID_GRANT_HANDLE;
5712+ }
5713+
5714+ if (unlikely(map[i+1].status != 0)) {
5715+ WPRINTK("invalid user buffer -- "
5716+ "could not remap it\n");
5717+ ret |= 1;
5718+ map[i+1].handle = INVALID_GRANT_HANDLE;
5719+ }
5720+
5721+ pending_handle(mmap_idx, pending_idx, i/2).kernel
5722+ = map[i].handle;
5723+ pending_handle(mmap_idx, pending_idx, i/2).user
5724+ = map[i+1].handle;
5725+
5726+ if (ret)
5727+ continue;
5728+
5729+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
5730+ FOREIGN_FRAME(map[i].dev_bus_addr
5731+ >> PAGE_SHIFT));
5732+ offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
5733+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5734+ ((struct page **)info->vma->vm_private_data)[offset] =
5735+ pg;
5736+ }
5737+ } else {
5738+ for (i = 0; i < nseg; i++) {
5739+ unsigned long uvaddr;
5740+ unsigned long kvaddr;
5741+ unsigned long offset;
5742+ struct page *pg;
5743+
5744+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
5745+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
5746+
5747+ if (unlikely(map[i].status != 0)) {
5748+ WPRINTK("invalid kernel buffer -- "
5749+ "could not remap it\n");
5750+ ret |= 1;
5751+ map[i].handle = INVALID_GRANT_HANDLE;
5752+ }
5753+
5754+ pending_handle(mmap_idx, pending_idx, i).kernel
5755+ = map[i].handle;
5756+
5757+ if (ret)
5758+ continue;
5759+
5760+ offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
5761+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5762+ ((struct page **)info->vma->vm_private_data)[offset] =
5763+ pg;
5764+ }
5765+ }
5766+
5767+ if (ret)
5768+ goto fail_flush;
5769+
5770+ if (xen_feature(XENFEAT_auto_translated_physmap))
5771+ down_write(&mm->mmap_sem);
5772+ /* Mark mapped pages as reserved: */
5773+ for (i = 0; i < req->nr_segments; i++) {
5774+ unsigned long kvaddr;
5775+ struct page *pg;
5776+
5777+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
5778+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5779+ SetPageReserved(pg);
5780+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
5781+ ret = vm_insert_page(info->vma,
5782+ MMAP_VADDR(info->user_vstart,
5783+ usr_idx, i), pg);
5784+ if (ret) {
5785+ up_write(&mm->mmap_sem);
5786+ goto fail_flush;
5787+ }
5788+ }
5789+ }
5790+ if (xen_feature(XENFEAT_auto_translated_physmap))
5791+ up_write(&mm->mmap_sem);
5792+
5793+ /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
5794+ info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
5795+
5796+ blkif_get(blkif);
5797+ /* Finally, write the request message to the user ring. */
5798+ target = RING_GET_REQUEST(&info->ufe_ring,
5799+ info->ufe_ring.req_prod_pvt);
5800+ memcpy(target, req, sizeof(*req));
5801+ target->id = usr_idx;
5802+ wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
5803+ info->ufe_ring.req_prod_pvt++;
5804+
5805+ if (operation == READ)
5806+ blkif->st_rd_sect += nr_sects;
5807+ else if (operation == WRITE)
5808+ blkif->st_wr_sect += nr_sects;
5809+
5810+ return;
5811+
5812+ fail_flush:
5813+ WPRINTK("Reached Fail_flush\n");
5814+ fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
5815+ fail_response:
5816+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
5817+ free_req(pending_req);
5818+ msleep(1); /* back off a bit */
5819+}
5820+
5821+
5822+
5823+/******************************************************************
5824+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
5825+ */
5826+
5827+
5828+static void make_response(blkif_t *blkif, u64 id,
5829+ unsigned short op, int st)
5830+{
5831+ blkif_response_t resp;
5832+ unsigned long flags;
5833+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
5834+ int more_to_do = 0;
5835+ int notify;
5836+
5837+ resp.id = id;
5838+ resp.operation = op;
5839+ resp.status = st;
5840+
5841+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
5842+ /* Place on the response ring for the relevant domain. */
5843+ switch (blkif->blk_protocol) {
5844+ case BLKIF_PROTOCOL_NATIVE:
5845+ memcpy(RING_GET_RESPONSE(&blk_rings->native,
5846+ blk_rings->native.rsp_prod_pvt),
5847+ &resp, sizeof(resp));
5848+ break;
5849+ case BLKIF_PROTOCOL_X86_32:
5850+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_32,
5851+ blk_rings->x86_32.rsp_prod_pvt),
5852+ &resp, sizeof(resp));
5853+ break;
5854+ case BLKIF_PROTOCOL_X86_64:
5855+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_64,
5856+ blk_rings->x86_64.rsp_prod_pvt),
5857+ &resp, sizeof(resp));
5858+ break;
5859+ default:
5860+ BUG();
5861+ }
5862+ blk_rings->common.rsp_prod_pvt++;
5863+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
5864+
5865+ if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
5866+ /*
5867+ * Tail check for pending requests. Allows frontend to avoid
5868+ * notifications if requests are already in flight (lower
5869+ * overheads and promotes batching).
5870+ */
5871+ RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
5872+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
5873+ more_to_do = 1;
5874+ }
5875+
5876+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
5877+ if (more_to_do)
5878+ blkif_notify_work(blkif);
5879+ if (notify)
5880+ notify_remote_via_irq(blkif->irq);
5881+}
5882+
5883+static int __init blkif_init(void)
5884+{
5885+ int i, ret;
5886+ struct class *class;
5887+
5888+ if (!is_running_on_xen())
5889+ return -ENODEV;
5890+
5891+ INIT_LIST_HEAD(&pending_free);
5892+ for(i = 0; i < 2; i++) {
5893+ ret = req_increase();
5894+ if (ret)
5895+ break;
5896+ }
5897+ if (i == 0)
5898+ return ret;
5899+
5900+ tap_blkif_interface_init();
5901+
5902+ alloc_pending_reqs = 0;
5903+
5904+ tap_blkif_xenbus_init();
5905+
5906+ /* Dynamically allocate a major for this device */
5907+ ret = register_chrdev(0, "blktap", &blktap_fops);
5908+
5909+ if (ret < 0) {
5910+ WPRINTK("Couldn't register /dev/xen/blktap\n");
5911+ return -ENOMEM;
5912+ }
5913+
5914+ blktap_major = ret;
5915+
5916+ /* tapfds[0] is always NULL */
5917+ blktap_next_minor++;
5918+
5919+ DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
5920+
5921+ /* Make sure the xen class exists */
5922+ if ((class = get_xen_class()) != NULL) {
5923+ /*
5924+ * This will allow udev to create the blktap ctrl device.
5925+ * We only want to create blktap0 first. We don't want
5926+ * to flood the sysfs system with needless blktap devices.
5927+ * We only create the device when a request of a new device is
5928+ * made.
5929+ */
5930+ class_device_create(class, NULL,
5931+ MKDEV(blktap_major, 0), NULL,
5932+ "blktap0");
5933+ } else {
5934+ /* this is bad, but not fatal */
5935+ WPRINTK("blktap: sysfs xen_class not created\n");
5936+ }
5937+
5938+ DPRINTK("Blktap device successfully created\n");
5939+
5940+ return 0;
5941+}
5942+
5943+module_init(blkif_init);
5944+
5945+MODULE_LICENSE("Dual BSD/GPL");
5946Index: head-2008-11-25/drivers/xen/blktap/common.h
5947===================================================================
5948--- /dev/null 1970-01-01 00:00:00.000000000 +0000
5949+++ head-2008-11-25/drivers/xen/blktap/common.h 2008-09-15 13:40:15.000000000 +0200
5950@@ -0,0 +1,122 @@
5951+/*
5952+ * This program is free software; you can redistribute it and/or
5953+ * modify it under the terms of the GNU General Public License version 2
5954+ * as published by the Free Software Foundation; or, when distributed
5955+ * separately from the Linux kernel or incorporated into other
5956+ * software packages, subject to the following license:
5957+ *
5958+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5959+ * of this source file (the "Software"), to deal in the Software without
5960+ * restriction, including without limitation the rights to use, copy, modify,
5961+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
5962+ * and to permit persons to whom the Software is furnished to do so, subject to
5963+ * the following conditions:
5964+ *
5965+ * The above copyright notice and this permission notice shall be included in
5966+ * all copies or substantial portions of the Software.
5967+ *
5968+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5969+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5970+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5971+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5972+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
5973+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
5974+ * IN THE SOFTWARE.
5975+ */
5976+
5977+#ifndef __BLKIF__BACKEND__COMMON_H__
5978+#define __BLKIF__BACKEND__COMMON_H__
5979+
5980+#include <linux/version.h>
5981+#include <linux/module.h>
5982+#include <linux/interrupt.h>
5983+#include <linux/slab.h>
5984+#include <linux/blkdev.h>
5985+#include <linux/vmalloc.h>
5986+#include <asm/io.h>
5987+#include <asm/setup.h>
5988+#include <asm/pgalloc.h>
5989+#include <xen/evtchn.h>
5990+#include <asm/hypervisor.h>
5991+#include <xen/blkif.h>
5992+#include <xen/gnttab.h>
5993+#include <xen/driver_util.h>
5994+
5995+#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \
5996+ __FILE__ , __LINE__ , ## _a )
5997+
5998+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
5999+
6000+struct backend_info;
6001+
6002+typedef struct blkif_st {
6003+ /* Unique identifier for this interface. */
6004+ domid_t domid;
6005+ unsigned int handle;
6006+ /* Physical parameters of the comms window. */
6007+ unsigned int irq;
6008+ /* Comms information. */
6009+ enum blkif_protocol blk_protocol;
6010+ blkif_back_rings_t blk_rings;
6011+ struct vm_struct *blk_ring_area;
6012+ /* Back pointer to the backend_info. */
6013+ struct backend_info *be;
6014+ /* Private fields. */
6015+ spinlock_t blk_ring_lock;
6016+ atomic_t refcnt;
6017+
6018+ wait_queue_head_t wq;
6019+ struct task_struct *xenblkd;
6020+ unsigned int waiting_reqs;
6021+ request_queue_t *plug;
6022+
6023+ /* statistics */
6024+ unsigned long st_print;
6025+ int st_rd_req;
6026+ int st_wr_req;
6027+ int st_oo_req;
6028+ int st_rd_sect;
6029+ int st_wr_sect;
6030+
6031+ wait_queue_head_t waiting_to_free;
6032+
6033+ grant_handle_t shmem_handle;
6034+ grant_ref_t shmem_ref;
6035+
6036+ int dev_num;
6037+ uint64_t sectors;
6038+} blkif_t;
6039+
6040+blkif_t *tap_alloc_blkif(domid_t domid);
6041+void tap_blkif_free(blkif_t *blkif);
6042+void tap_blkif_kmem_cache_free(blkif_t *blkif);
6043+int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
6044+ unsigned int evtchn);
6045+void tap_blkif_unmap(blkif_t *blkif);
6046+
6047+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
6048+#define blkif_put(_b) \
6049+ do { \
6050+ if (atomic_dec_and_test(&(_b)->refcnt)) \
6051+ wake_up(&(_b)->waiting_to_free);\
6052+ } while (0)
6053+
6054+
6055+struct phys_req {
6056+ unsigned short dev;
6057+ unsigned short nr_sects;
6058+ struct block_device *bdev;
6059+ blkif_sector_t sector_number;
6060+};
6061+
6062+void tap_blkif_interface_init(void);
6063+
6064+void tap_blkif_xenbus_init(void);
6065+
6066+irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
6067+int tap_blkif_schedule(void *arg);
6068+
6069+int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
6070+void signal_tapdisk(int idx);
6071+
6072+#endif /* __BLKIF__BACKEND__COMMON_H__ */
6073Index: head-2008-11-25/drivers/xen/blktap/interface.c
6074===================================================================
6075--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6076+++ head-2008-11-25/drivers/xen/blktap/interface.c 2008-09-15 13:40:15.000000000 +0200
6077@@ -0,0 +1,181 @@
6078+/******************************************************************************
6079+ * drivers/xen/blktap/interface.c
6080+ *
6081+ * Block-device interface management.
6082+ *
6083+ * Copyright (c) 2004, Keir Fraser
6084+ *
6085+ * This program is free software; you can redistribute it and/or
6086+ * modify it under the terms of the GNU General Public License version 2
6087+ * as published by the Free Software Foundation; or, when distributed
6088+ * separately from the Linux kernel or incorporated into other
6089+ * software packages, subject to the following license:
6090+ *
6091+ * Permission is hereby granted, free of charge, to any person obtaining a copy
6092+ * of this source file (the "Software"), to deal in the Software without
6093+ * restriction, including without limitation the rights to use, copy, modify,
6094+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
6095+ * and to permit persons to whom the Software is furnished to do so, subject to
6096+ * the following conditions:
6097+ *
6098+ * The above copyright notice and this permission notice shall be included in
6099+ * all copies or substantial portions of the Software.
6100+ *
6101+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6102+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6103+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6104+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
6105+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
6106+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
6107+ * IN THE SOFTWARE.
6108+
6109+ */
6110+
6111+#include "common.h"
6112+#include <xen/evtchn.h>
6113+
6114+static kmem_cache_t *blkif_cachep;
6115+
6116+blkif_t *tap_alloc_blkif(domid_t domid)
6117+{
6118+ blkif_t *blkif;
6119+
6120+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
6121+ if (!blkif)
6122+ return ERR_PTR(-ENOMEM);
6123+
6124+ memset(blkif, 0, sizeof(*blkif));
6125+ blkif->domid = domid;
6126+ spin_lock_init(&blkif->blk_ring_lock);
6127+ atomic_set(&blkif->refcnt, 1);
6128+ init_waitqueue_head(&blkif->wq);
6129+ blkif->st_print = jiffies;
6130+ init_waitqueue_head(&blkif->waiting_to_free);
6131+
6132+ return blkif;
6133+}
6134+
6135+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
6136+{
6137+ struct gnttab_map_grant_ref op;
6138+
6139+ gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
6140+ GNTMAP_host_map, shared_page, blkif->domid);
6141+
6142+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
6143+ BUG();
6144+
6145+ if (op.status) {
6146+ DPRINTK(" Grant table operation failure !\n");
6147+ return op.status;
6148+ }
6149+
6150+ blkif->shmem_ref = shared_page;
6151+ blkif->shmem_handle = op.handle;
6152+
6153+ return 0;
6154+}
6155+
6156+static void unmap_frontend_page(blkif_t *blkif)
6157+{
6158+ struct gnttab_unmap_grant_ref op;
6159+
6160+ gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
6161+ GNTMAP_host_map, blkif->shmem_handle);
6162+
6163+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
6164+ BUG();
6165+}
6166+
6167+int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
6168+ unsigned int evtchn)
6169+{
6170+ int err;
6171+
6172+ /* Already connected through? */
6173+ if (blkif->irq)
6174+ return 0;
6175+
6176+ if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
6177+ return -ENOMEM;
6178+
6179+ err = map_frontend_page(blkif, shared_page);
6180+ if (err) {
6181+ free_vm_area(blkif->blk_ring_area);
6182+ return err;
6183+ }
6184+
6185+ switch (blkif->blk_protocol) {
6186+ case BLKIF_PROTOCOL_NATIVE:
6187+ {
6188+ blkif_sring_t *sring;
6189+ sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
6190+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
6191+ break;
6192+ }
6193+ case BLKIF_PROTOCOL_X86_32:
6194+ {
6195+ blkif_x86_32_sring_t *sring_x86_32;
6196+ sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
6197+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
6198+ break;
6199+ }
6200+ case BLKIF_PROTOCOL_X86_64:
6201+ {
6202+ blkif_x86_64_sring_t *sring_x86_64;
6203+ sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
6204+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
6205+ break;
6206+ }
6207+ default:
6208+ BUG();
6209+ }
6210+
6211+ err = bind_interdomain_evtchn_to_irqhandler(
6212+ blkif->domid, evtchn, tap_blkif_be_int,
6213+ 0, "blkif-backend", blkif);
6214+ if (err < 0) {
6215+ unmap_frontend_page(blkif);
6216+ free_vm_area(blkif->blk_ring_area);
6217+ blkif->blk_rings.common.sring = NULL;
6218+ return err;
6219+ }
6220+ blkif->irq = err;
6221+
6222+ return 0;
6223+}
6224+
6225+void tap_blkif_unmap(blkif_t *blkif)
6226+{
6227+ if (blkif->irq) {
6228+ unbind_from_irqhandler(blkif->irq, blkif);
6229+ blkif->irq = 0;
6230+ }
6231+ if (blkif->blk_rings.common.sring) {
6232+ unmap_frontend_page(blkif);
6233+ free_vm_area(blkif->blk_ring_area);
6234+ blkif->blk_rings.common.sring = NULL;
6235+ }
6236+}
6237+
6238+void tap_blkif_free(blkif_t *blkif)
6239+{
6240+ atomic_dec(&blkif->refcnt);
6241+ wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
6242+ atomic_inc(&blkif->refcnt);
6243+
6244+ tap_blkif_unmap(blkif);
6245+}
6246+
6247+void tap_blkif_kmem_cache_free(blkif_t *blkif)
6248+{
6249+ if (!atomic_dec_and_test(&blkif->refcnt))
6250+ BUG();
6251+ kmem_cache_free(blkif_cachep, blkif);
6252+}
6253+
6254+void __init tap_blkif_interface_init(void)
6255+{
6256+ blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t),
6257+ 0, 0, NULL, NULL);
6258+}
6259Index: head-2008-11-25/drivers/xen/blktap/xenbus.c
6260===================================================================
6261--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6262+++ head-2008-11-25/drivers/xen/blktap/xenbus.c 2008-09-15 13:40:15.000000000 +0200
6263@@ -0,0 +1,479 @@
6264+/* drivers/xen/blktap/xenbus.c
6265+ *
6266+ * Xenbus code for blktap
6267+ *
6268+ * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
6269+ *
6270+ * Based on the blkback xenbus code:
6271+ *
6272+ * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
6273+ * Copyright (C) 2005 XenSource Ltd
6274+ *
6275+ * This program is free software; you can redistribute it and/or
6276+ * modify it under the terms of the GNU General Public License version 2
6277+ * as published by the Free Software Foundation; or, when distributed
6278+ * separately from the Linux kernel or incorporated into other
6279+ * software packages, subject to the following license:
6280+ *
6281+ * Permission is hereby granted, free of charge, to any person obtaining a copy
6282+ * of this source file (the "Software"), to deal in the Software without
6283+ * restriction, including without limitation the rights to use, copy, modify,
6284+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
6285+ * and to permit persons to whom the Software is furnished to do so, subject to
6286+ * the following conditions:
6287+ *
6288+ * The above copyright notice and this permission notice shall be included in
6289+ * all copies or substantial portions of the Software.
6290+ *
6291+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6292+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6293+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6294+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
6295+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
6296+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
6297+ * IN THE SOFTWARE.
6298+ */
6299+
6300+#include <stdarg.h>
6301+#include <linux/module.h>
6302+#include <linux/kthread.h>
6303+#include <xen/xenbus.h>
6304+#include "common.h"
6305+
6306+
6307+struct backend_info
6308+{
6309+ struct xenbus_device *dev;
6310+ blkif_t *blkif;
6311+ struct xenbus_watch backend_watch;
6312+ int xenbus_id;
6313+ int group_added;
6314+};
6315+
6316+
6317+static void connect(struct backend_info *);
6318+static int connect_ring(struct backend_info *);
6319+static int blktap_remove(struct xenbus_device *dev);
6320+static int blktap_probe(struct xenbus_device *dev,
6321+ const struct xenbus_device_id *id);
6322+static void tap_backend_changed(struct xenbus_watch *, const char **,
6323+ unsigned int);
6324+static void tap_frontend_changed(struct xenbus_device *dev,
6325+ enum xenbus_state frontend_state);
6326+
6327+static int strsep_len(const char *str, char c, unsigned int len)
6328+{
6329+ unsigned int i;
6330+
6331+ for (i = 0; str[i]; i++)
6332+ if (str[i] == c) {
6333+ if (len == 0)
6334+ return i;
6335+ len--;
6336+ }
6337+ return (len == 0) ? i : -ERANGE;
6338+}
6339+
6340+static long get_id(const char *str)
6341+{
6342+ int len,end;
6343+ const char *ptr;
6344+ char *tptr, num[10];
6345+
6346+ len = strsep_len(str, '/', 2);
6347+ end = strlen(str);
6348+ if ( (len < 0) || (end < 0) ) return -1;
6349+
6350+ ptr = str + len + 1;
6351+ strncpy(num,ptr,end - len);
6352+ tptr = num + (end - (len + 1));
6353+ *tptr = '\0';
6354+ DPRINTK("Get_id called for %s (%s)\n",str,num);
6355+
6356+ return simple_strtol(num, NULL, 10);
6357+}
6358+
6359+static int blktap_name(blkif_t *blkif, char *buf)
6360+{
6361+ char *devpath, *devname;
6362+ struct xenbus_device *dev = blkif->be->dev;
6363+
6364+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
6365+ if (IS_ERR(devpath))
6366+ return PTR_ERR(devpath);
6367+
6368+ if ((devname = strstr(devpath, "/dev/")) != NULL)
6369+ devname += strlen("/dev/");
6370+ else
6371+ devname = devpath;
6372+
6373+ snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname);
6374+ kfree(devpath);
6375+
6376+ return 0;
6377+}
6378+
6379+/****************************************************************
6380+ * sysfs interface for I/O requests of blktap device
6381+ */
6382+
6383+#define VBD_SHOW(name, format, args...) \
6384+ static ssize_t show_##name(struct device *_dev, \
6385+ struct device_attribute *attr, \
6386+ char *buf) \
6387+ { \
6388+ struct xenbus_device *dev = to_xenbus_device(_dev); \
6389+ struct backend_info *be = dev->dev.driver_data; \
6390+ \
6391+ return sprintf(buf, format, ##args); \
6392+ } \
6393+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
6394+
6395+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
6396+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
6397+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
6398+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
6399+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
6400+
6401+static struct attribute *tapstat_attrs[] = {
6402+ &dev_attr_oo_req.attr,
6403+ &dev_attr_rd_req.attr,
6404+ &dev_attr_wr_req.attr,
6405+ &dev_attr_rd_sect.attr,
6406+ &dev_attr_wr_sect.attr,
6407+ NULL
6408+};
6409+
6410+static struct attribute_group tapstat_group = {
6411+ .name = "statistics",
6412+ .attrs = tapstat_attrs,
6413+};
6414+
6415+int xentap_sysfs_addif(struct xenbus_device *dev)
6416+{
6417+ int err;
6418+ struct backend_info *be = dev->dev.driver_data;
6419+ err = sysfs_create_group(&dev->dev.kobj, &tapstat_group);
6420+ if (!err)
6421+ be->group_added = 1;
6422+ return err;
6423+}
6424+
6425+void xentap_sysfs_delif(struct xenbus_device *dev)
6426+{
6427+ struct backend_info *be = dev->dev.driver_data;
6428+ sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
6429+ be->group_added = 0;
6430+}
6431+
6432+static int blktap_remove(struct xenbus_device *dev)
6433+{
6434+ struct backend_info *be = dev->dev.driver_data;
6435+
6436+ if (be->group_added)
6437+ xentap_sysfs_delif(be->dev);
6438+ if (be->backend_watch.node) {
6439+ unregister_xenbus_watch(&be->backend_watch);
6440+ kfree(be->backend_watch.node);
6441+ be->backend_watch.node = NULL;
6442+ }
6443+ if (be->blkif) {
6444+ if (be->blkif->xenblkd)
6445+ kthread_stop(be->blkif->xenblkd);
6446+ signal_tapdisk(be->blkif->dev_num);
6447+ tap_blkif_free(be->blkif);
6448+ tap_blkif_kmem_cache_free(be->blkif);
6449+ be->blkif = NULL;
6450+ }
6451+ kfree(be);
6452+ dev->dev.driver_data = NULL;
6453+ return 0;
6454+}
6455+
6456+static void tap_update_blkif_status(blkif_t *blkif)
6457+{
6458+ int err;
6459+ char name[TASK_COMM_LEN];
6460+
6461+ /* Not ready to connect? */
6462+ if(!blkif->irq || !blkif->sectors) {
6463+ return;
6464+ }
6465+
6466+ /* Already connected? */
6467+ if (blkif->be->dev->state == XenbusStateConnected)
6468+ return;
6469+
6470+ /* Attempt to connect: exit if we fail to. */
6471+ connect(blkif->be);
6472+ if (blkif->be->dev->state != XenbusStateConnected)
6473+ return;
6474+
6475+ err = blktap_name(blkif, name);
6476+ if (err) {
6477+ xenbus_dev_error(blkif->be->dev, err, "get blktap dev name");
6478+ return;
6479+ }
6480+
6481+ if (!blkif->be->group_added) {
6482+ err = xentap_sysfs_addif(blkif->be->dev);
6483+ if (err) {
6484+ xenbus_dev_fatal(blkif->be->dev, err,
6485+ "creating sysfs entries");
6486+ return;
6487+ }
6488+ }
6489+
6490+ blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name);
6491+ if (IS_ERR(blkif->xenblkd)) {
6492+ err = PTR_ERR(blkif->xenblkd);
6493+ blkif->xenblkd = NULL;
6494+ xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd");
6495+ WPRINTK("Error starting thread\n");
6496+ }
6497+}
6498+
6499+/**
6500+ * Entry point to this code when a new device is created. Allocate
6501+ * the basic structures, and watch the store waiting for the
6502+ * user-space program to tell us the physical device info. Switch to
6503+ * InitWait.
6504+ */
6505+static int blktap_probe(struct xenbus_device *dev,
6506+ const struct xenbus_device_id *id)
6507+{
6508+ int err;
6509+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
6510+ GFP_KERNEL);
6511+ if (!be) {
6512+ xenbus_dev_fatal(dev, -ENOMEM,
6513+ "allocating backend structure");
6514+ return -ENOMEM;
6515+ }
6516+
6517+ be->dev = dev;
6518+ dev->dev.driver_data = be;
6519+ be->xenbus_id = get_id(dev->nodename);
6520+
6521+ be->blkif = tap_alloc_blkif(dev->otherend_id);
6522+ if (IS_ERR(be->blkif)) {
6523+ err = PTR_ERR(be->blkif);
6524+ be->blkif = NULL;
6525+ xenbus_dev_fatal(dev, err, "creating block interface");
6526+ goto fail;
6527+ }
6528+
6529+ /* setup back pointer */
6530+ be->blkif->be = be;
6531+ be->blkif->sectors = 0;
6532+
6533+ /* set a watch on disk info, waiting for userspace to update details*/
6534+ err = xenbus_watch_path2(dev, dev->nodename, "info",
6535+ &be->backend_watch, tap_backend_changed);
6536+ if (err)
6537+ goto fail;
6538+
6539+ err = xenbus_switch_state(dev, XenbusStateInitWait);
6540+ if (err)
6541+ goto fail;
6542+ return 0;
6543+
6544+fail:
6545+ DPRINTK("blktap probe failed\n");
6546+ blktap_remove(dev);
6547+ return err;
6548+}
6549+
6550+
6551+/**
6552+ * Callback received when the user space code has placed the device
6553+ * information in xenstore.
6554+ */
6555+static void tap_backend_changed(struct xenbus_watch *watch,
6556+ const char **vec, unsigned int len)
6557+{
6558+ int err;
6559+ unsigned long info;
6560+ struct backend_info *be
6561+ = container_of(watch, struct backend_info, backend_watch);
6562+ struct xenbus_device *dev = be->dev;
6563+
6564+ /**
6565+ * Check to see whether userspace code has opened the image
6566+ * and written sector
6567+ * and disk info to xenstore
6568+ */
6569+ err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info,
6570+ NULL);
6571+ if (XENBUS_EXIST_ERR(err))
6572+ return;
6573+ if (err) {
6574+ xenbus_dev_error(dev, err, "getting info");
6575+ return;
6576+ }
6577+
6578+ DPRINTK("Userspace update on disk info, %lu\n",info);
6579+
6580+ err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu",
6581+ &be->blkif->sectors, NULL);
6582+
6583+ /* Associate tap dev with domid*/
6584+ be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id,
6585+ be->blkif);
6586+ DPRINTK("Thread started for domid [%d], connecting disk\n",
6587+ be->blkif->dev_num);
6588+
6589+ tap_update_blkif_status(be->blkif);
6590+}
6591+
6592+/**
6593+ * Callback received when the frontend's state changes.
6594+ */
6595+static void tap_frontend_changed(struct xenbus_device *dev,
6596+ enum xenbus_state frontend_state)
6597+{
6598+ struct backend_info *be = dev->dev.driver_data;
6599+ int err;
6600+
6601+ DPRINTK("\n");
6602+
6603+ switch (frontend_state) {
6604+ case XenbusStateInitialising:
6605+ if (dev->state == XenbusStateClosed) {
6606+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
6607+ __FUNCTION__, dev->nodename);
6608+ xenbus_switch_state(dev, XenbusStateInitWait);
6609+ }
6610+ break;
6611+
6612+ case XenbusStateInitialised:
6613+ case XenbusStateConnected:
6614+ /* Ensure we connect even when two watches fire in
6615+ close successsion and we miss the intermediate value
6616+ of frontend_state. */
6617+ if (dev->state == XenbusStateConnected)
6618+ break;
6619+
6620+ err = connect_ring(be);
6621+ if (err)
6622+ break;
6623+ tap_update_blkif_status(be->blkif);
6624+ break;
6625+
6626+ case XenbusStateClosing:
6627+ if (be->blkif->xenblkd) {
6628+ kthread_stop(be->blkif->xenblkd);
6629+ be->blkif->xenblkd = NULL;
6630+ }
6631+ tap_blkif_free(be->blkif);
6632+ xenbus_switch_state(dev, XenbusStateClosing);
6633+ break;
6634+
6635+ case XenbusStateClosed:
6636+ xenbus_switch_state(dev, XenbusStateClosed);
6637+ if (xenbus_dev_is_online(dev))
6638+ break;
6639+ /* fall through if not online */
6640+ case XenbusStateUnknown:
6641+ device_unregister(&dev->dev);
6642+ break;
6643+
6644+ default:
6645+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
6646+ frontend_state);
6647+ break;
6648+ }
6649+}
6650+
6651+
6652+/**
6653+ * Switch to Connected state.
6654+ */
6655+static void connect(struct backend_info *be)
6656+{
6657+ int err;
6658+
6659+ struct xenbus_device *dev = be->dev;
6660+
6661+ err = xenbus_switch_state(dev, XenbusStateConnected);
6662+ if (err)
6663+ xenbus_dev_fatal(dev, err, "switching to Connected state",
6664+ dev->nodename);
6665+
6666+ return;
6667+}
6668+
6669+
6670+static int connect_ring(struct backend_info *be)
6671+{
6672+ struct xenbus_device *dev = be->dev;
6673+ unsigned long ring_ref;
6674+ unsigned int evtchn;
6675+ char protocol[64];
6676+ int err;
6677+
6678+ DPRINTK("%s\n", dev->otherend);
6679+
6680+ err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
6681+ &ring_ref, "event-channel", "%u", &evtchn, NULL);
6682+ if (err) {
6683+ xenbus_dev_fatal(dev, err,
6684+ "reading %s/ring-ref and event-channel",
6685+ dev->otherend);
6686+ return err;
6687+ }
6688+
6689+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
6690+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
6691+ "%63s", protocol, NULL);
6692+ if (err)
6693+ strcpy(protocol, "unspecified, assuming native");
6694+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
6695+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
6696+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
6697+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
6698+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
6699+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
6700+ else {
6701+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
6702+ return -1;
6703+ }
6704+ printk(KERN_INFO
6705+ "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
6706+ ring_ref, evtchn, be->blkif->blk_protocol, protocol);
6707+
6708+ /* Map the shared frame, irq etc. */
6709+ err = tap_blkif_map(be->blkif, ring_ref, evtchn);
6710+ if (err) {
6711+ xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
6712+ ring_ref, evtchn);
6713+ return err;
6714+ }
6715+
6716+ return 0;
6717+}
6718+
6719+
6720+/* ** Driver Registration ** */
6721+
6722+
6723+static const struct xenbus_device_id blktap_ids[] = {
6724+ { "tap" },
6725+ { "" }
6726+};
6727+
6728+
6729+static struct xenbus_driver blktap = {
6730+ .name = "tap",
6731+ .owner = THIS_MODULE,
6732+ .ids = blktap_ids,
6733+ .probe = blktap_probe,
6734+ .remove = blktap_remove,
6735+ .otherend_changed = tap_frontend_changed
6736+};
6737+
6738+
6739+void tap_blkif_xenbus_init(void)
6740+{
6741+ xenbus_register_backend(&blktap);
6742+}
6743Index: head-2008-11-25/drivers/xen/char/Makefile
6744===================================================================
6745--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6746+++ head-2008-11-25/drivers/xen/char/Makefile 2007-07-10 09:42:30.000000000 +0200
6747@@ -0,0 +1 @@
6748+obj-$(CONFIG_XEN_DEVMEM) := mem.o
6749Index: head-2008-11-25/drivers/xen/char/mem.c
6750===================================================================
6751--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6752+++ head-2008-11-25/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200
6753@@ -0,0 +1,190 @@
6754+/*
6755+ * Originally from linux/drivers/char/mem.c
6756+ *
6757+ * Copyright (C) 1991, 1992 Linus Torvalds
6758+ *
6759+ * Added devfs support.
6760+ * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
6761+ * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
6762+ */
6763+
6764+#include <linux/mm.h>
6765+#include <linux/miscdevice.h>
6766+#include <linux/slab.h>
6767+#include <linux/vmalloc.h>
6768+#include <linux/mman.h>
6769+#include <linux/random.h>
6770+#include <linux/init.h>
6771+#include <linux/raw.h>
6772+#include <linux/tty.h>
6773+#include <linux/capability.h>
6774+#include <linux/smp_lock.h>
6775+#include <linux/ptrace.h>
6776+#include <linux/device.h>
6777+#include <asm/pgalloc.h>
6778+#include <asm/uaccess.h>
6779+#include <asm/io.h>
6780+#include <asm/hypervisor.h>
6781+
6782+static inline int uncached_access(struct file *file)
6783+{
6784+ if (file->f_flags & O_SYNC)
6785+ return 1;
6786+ /* Xen sets correct MTRR type on non-RAM for us. */
6787+ return 0;
6788+}
6789+
6790+/*
6791+ * This funcion reads the *physical* memory. The f_pos points directly to the
6792+ * memory location.
6793+ */
6794+static ssize_t read_mem(struct file * file, char __user * buf,
6795+ size_t count, loff_t *ppos)
6796+{
6797+ unsigned long p = *ppos, ignored;
6798+ ssize_t read = 0, sz;
6799+ void __iomem *v;
6800+
6801+ while (count > 0) {
6802+ /*
6803+ * Handle first page in case it's not aligned
6804+ */
6805+ if (-p & (PAGE_SIZE - 1))
6806+ sz = -p & (PAGE_SIZE - 1);
6807+ else
6808+ sz = PAGE_SIZE;
6809+
6810+ sz = min_t(unsigned long, sz, count);
6811+
6812+ v = ioremap(p, sz);
6813+ if (IS_ERR(v) || v == NULL) {
6814+ /*
6815+ * Some programs (e.g., dmidecode) groove off into
6816+ * weird RAM areas where no tables can possibly exist
6817+ * (because Xen will have stomped on them!). These
6818+ * programs get rather upset if we let them know that
6819+ * Xen failed their access, so we fake out a read of
6820+ * all zeroes.
6821+ */
6822+ if (clear_user(buf, count))
6823+ return -EFAULT;
6824+ read += count;
6825+ break;
6826+ }
6827+
6828+ ignored = copy_to_user(buf, v, sz);
6829+ iounmap(v);
6830+ if (ignored)
6831+ return -EFAULT;
6832+ buf += sz;
6833+ p += sz;
6834+ count -= sz;
6835+ read += sz;
6836+ }
6837+
6838+ *ppos += read;
6839+ return read;
6840+}
6841+
6842+static ssize_t write_mem(struct file * file, const char __user * buf,
6843+ size_t count, loff_t *ppos)
6844+{
6845+ unsigned long p = *ppos, ignored;
6846+ ssize_t written = 0, sz;
6847+ void __iomem *v;
6848+
6849+ while (count > 0) {
6850+ /*
6851+ * Handle first page in case it's not aligned
6852+ */
6853+ if (-p & (PAGE_SIZE - 1))
6854+ sz = -p & (PAGE_SIZE - 1);
6855+ else
6856+ sz = PAGE_SIZE;
6857+
6858+ sz = min_t(unsigned long, sz, count);
6859+
6860+ v = ioremap(p, sz);
6861+ if (v == NULL)
6862+ break;
6863+ if (IS_ERR(v)) {
6864+ if (written == 0)
6865+ return PTR_ERR(v);
6866+ break;
6867+ }
6868+
6869+ ignored = copy_from_user(v, buf, sz);
6870+ iounmap(v);
6871+ if (ignored) {
6872+ written += sz - ignored;
6873+ if (written)
6874+ break;
6875+ return -EFAULT;
6876+ }
6877+ buf += sz;
6878+ p += sz;
6879+ count -= sz;
6880+ written += sz;
6881+ }
6882+
6883+ *ppos += written;
6884+ return written;
6885+}
6886+
6887+#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
6888+static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
6889+{
6890+ size_t size = vma->vm_end - vma->vm_start;
6891+
6892+ if (uncached_access(file))
6893+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
6894+
6895+ /* We want to return the real error code, not EAGAIN. */
6896+ return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
6897+ size, vma->vm_page_prot, DOMID_IO);
6898+}
6899+#endif
6900+
6901+/*
6902+ * The memory devices use the full 32/64 bits of the offset, and so we cannot
6903+ * check against negative addresses: they are ok. The return value is weird,
6904+ * though, in that case (0).
6905+ *
6906+ * also note that seeking relative to the "end of file" isn't supported:
6907+ * it has no meaning, so it returns -EINVAL.
6908+ */
6909+static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
6910+{
6911+ loff_t ret;
6912+
6913+ mutex_lock(&file->f_dentry->d_inode->i_mutex);
6914+ switch (orig) {
6915+ case 0:
6916+ file->f_pos = offset;
6917+ ret = file->f_pos;
6918+ force_successful_syscall_return();
6919+ break;
6920+ case 1:
6921+ file->f_pos += offset;
6922+ ret = file->f_pos;
6923+ force_successful_syscall_return();
6924+ break;
6925+ default:
6926+ ret = -EINVAL;
6927+ }
6928+ mutex_unlock(&file->f_dentry->d_inode->i_mutex);
6929+ return ret;
6930+}
6931+
6932+static int open_mem(struct inode * inode, struct file * filp)
6933+{
6934+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
6935+}
6936+
6937+const struct file_operations mem_fops = {
6938+ .llseek = memory_lseek,
6939+ .read = read_mem,
6940+ .write = write_mem,
6941+ .mmap = xen_mmap_mem,
6942+ .open = open_mem,
6943+};
6944Index: head-2008-11-25/drivers/xen/console/Makefile
6945===================================================================
6946--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6947+++ head-2008-11-25/drivers/xen/console/Makefile 2007-06-12 13:13:44.000000000 +0200
6948@@ -0,0 +1,2 @@
6949+
6950+obj-y := console.o xencons_ring.o
6951Index: head-2008-11-25/drivers/xen/console/console.c
6952===================================================================
6953--- /dev/null 1970-01-01 00:00:00.000000000 +0000
6954+++ head-2008-11-25/drivers/xen/console/console.c 2007-10-15 09:39:38.000000000 +0200
6955@@ -0,0 +1,731 @@
6956+/******************************************************************************
6957+ * console.c
6958+ *
6959+ * Virtual console driver.
6960+ *
6961+ * Copyright (c) 2002-2004, K A Fraser.
6962+ *
6963+ * This program is free software; you can redistribute it and/or
6964+ * modify it under the terms of the GNU General Public License version 2
6965+ * as published by the Free Software Foundation; or, when distributed
6966+ * separately from the Linux kernel or incorporated into other
6967+ * software packages, subject to the following license:
6968+ *
6969+ * Permission is hereby granted, free of charge, to any person obtaining a copy
6970+ * of this source file (the "Software"), to deal in the Software without
6971+ * restriction, including without limitation the rights to use, copy, modify,
6972+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
6973+ * and to permit persons to whom the Software is furnished to do so, subject to
6974+ * the following conditions:
6975+ *
6976+ * The above copyright notice and this permission notice shall be included in
6977+ * all copies or substantial portions of the Software.
6978+ *
6979+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6980+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6981+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6982+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
6983+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
6984+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
6985+ * IN THE SOFTWARE.
6986+ */
6987+
6988+#include <linux/version.h>
6989+#include <linux/module.h>
6990+#include <linux/errno.h>
6991+#include <linux/signal.h>
6992+#include <linux/sched.h>
6993+#include <linux/interrupt.h>
6994+#include <linux/tty.h>
6995+#include <linux/tty_flip.h>
6996+#include <linux/serial.h>
6997+#include <linux/major.h>
6998+#include <linux/ptrace.h>
6999+#include <linux/ioport.h>
7000+#include <linux/mm.h>
7001+#include <linux/slab.h>
7002+#include <linux/init.h>
7003+#include <linux/console.h>
7004+#include <linux/bootmem.h>
7005+#include <linux/sysrq.h>
7006+#include <linux/screen_info.h>
7007+#include <linux/vt.h>
7008+#include <asm/io.h>
7009+#include <asm/irq.h>
7010+#include <asm/uaccess.h>
7011+#include <xen/interface/xen.h>
7012+#include <xen/interface/event_channel.h>
7013+#include <asm/hypervisor.h>
7014+#include <xen/evtchn.h>
7015+#include <xen/xenbus.h>
7016+#include <xen/xencons.h>
7017+
7018+/*
7019+ * Modes:
7020+ * 'xencons=off' [XC_OFF]: Console is disabled.
7021+ * 'xencons=tty' [XC_TTY]: Console attached to '/dev/tty[0-9]+'.
7022+ * 'xencons=ttyS' [XC_SERIAL]: Console attached to '/dev/ttyS[0-9]+'.
7023+ * 'xencons=xvc' [XC_XVC]: Console attached to '/dev/xvc0'.
7024+ * default: XC_XVC
7025+ *
7026+ * NB. In mode XC_TTY, we create dummy consoles for tty2-63. This suppresses
7027+ * warnings from standard distro startup scripts.
7028+ */
7029+static enum {
7030+ XC_OFF, XC_TTY, XC_SERIAL, XC_XVC
7031+} xc_mode = XC_XVC;
7032+static int xc_num = -1;
7033+
7034+/* /dev/xvc0 device number allocated by lanana.org. */
7035+#define XEN_XVC_MAJOR 204
7036+#define XEN_XVC_MINOR 191
7037+
7038+#ifdef CONFIG_MAGIC_SYSRQ
7039+static unsigned long sysrq_requested;
7040+extern int sysrq_enabled;
7041+#endif
7042+
7043+static int __init xencons_setup(char *str)
7044+{
7045+ char *q;
7046+ int n;
7047+ extern int console_use_vt;
7048+
7049+ console_use_vt = 1;
7050+ if (!strncmp(str, "ttyS", 4)) {
7051+ xc_mode = XC_SERIAL;
7052+ str += 4;
7053+ } else if (!strncmp(str, "tty", 3)) {
7054+ xc_mode = XC_TTY;
7055+ str += 3;
7056+ console_use_vt = 0;
7057+ } else if (!strncmp(str, "xvc", 3)) {
7058+ xc_mode = XC_XVC;
7059+ str += 3;
7060+ } else if (!strncmp(str, "off", 3)) {
7061+ xc_mode = XC_OFF;
7062+ str += 3;
7063+ }
7064+
7065+ n = simple_strtol(str, &q, 10);
7066+ if (q != str)
7067+ xc_num = n;
7068+
7069+ return 1;
7070+}
7071+__setup("xencons=", xencons_setup);
7072+
7073+/* The kernel and user-land drivers share a common transmit buffer. */
7074+static unsigned int wbuf_size = 4096;
7075+#define WBUF_MASK(_i) ((_i)&(wbuf_size-1))
7076+static char *wbuf;
7077+static unsigned int wc, wp; /* write_cons, write_prod */
7078+
7079+static int __init xencons_bufsz_setup(char *str)
7080+{
7081+ unsigned int goal;
7082+ goal = simple_strtoul(str, NULL, 0);
7083+ if (goal) {
7084+ goal = roundup_pow_of_two(goal);
7085+ if (wbuf_size < goal)
7086+ wbuf_size = goal;
7087+ }
7088+ return 1;
7089+}
7090+__setup("xencons_bufsz=", xencons_bufsz_setup);
7091+
7092+/* This lock protects accesses to the common transmit buffer. */
7093+static DEFINE_SPINLOCK(xencons_lock);
7094+
7095+/* Common transmit-kick routine. */
7096+static void __xencons_tx_flush(void);
7097+
7098+static struct tty_driver *xencons_driver;
7099+
7100+/******************** Kernel console driver ********************************/
7101+
7102+static void kcons_write(struct console *c, const char *s, unsigned int count)
7103+{
7104+ int i = 0;
7105+ unsigned long flags;
7106+
7107+ spin_lock_irqsave(&xencons_lock, flags);
7108+
7109+ while (i < count) {
7110+ for (; i < count; i++) {
7111+ if ((wp - wc) >= (wbuf_size - 1))
7112+ break;
7113+ if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n')
7114+ wbuf[WBUF_MASK(wp++)] = '\r';
7115+ }
7116+
7117+ __xencons_tx_flush();
7118+ }
7119+
7120+ spin_unlock_irqrestore(&xencons_lock, flags);
7121+}
7122+
7123+static void kcons_write_dom0(struct console *c, const char *s, unsigned int count)
7124+{
7125+
7126+ while (count > 0) {
7127+ int rc;
7128+ rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
7129+ if (rc <= 0)
7130+ break;
7131+ count -= rc;
7132+ s += rc;
7133+ }
7134+}
7135+
7136+static struct tty_driver *kcons_device(struct console *c, int *index)
7137+{
7138+ *index = 0;
7139+ return xencons_driver;
7140+}
7141+
7142+static struct console kcons_info = {
7143+ .device = kcons_device,
7144+ .flags = CON_PRINTBUFFER | CON_ENABLED,
7145+ .index = -1,
7146+};
7147+
7148+static int __init xen_console_init(void)
7149+{
7150+ if (!is_running_on_xen())
7151+ goto out;
7152+
7153+ if (is_initial_xendomain()) {
7154+ kcons_info.write = kcons_write_dom0;
7155+ } else {
7156+ if (!xen_start_info->console.domU.evtchn)
7157+ goto out;
7158+ kcons_info.write = kcons_write;
7159+ }
7160+
7161+ switch (xc_mode) {
7162+ case XC_XVC:
7163+ strcpy(kcons_info.name, "xvc");
7164+ if (xc_num == -1)
7165+ xc_num = 0;
7166+ break;
7167+
7168+ case XC_SERIAL:
7169+ strcpy(kcons_info.name, "ttyS");
7170+ if (xc_num == -1)
7171+ xc_num = 0;
7172+ break;
7173+
7174+ case XC_TTY:
7175+ strcpy(kcons_info.name, "tty");
7176+ if (xc_num == -1)
7177+ xc_num = 1;
7178+ break;
7179+
7180+ default:
7181+ goto out;
7182+ }
7183+
7184+ wbuf = alloc_bootmem(wbuf_size);
7185+
7186+ register_console(&kcons_info);
7187+
7188+ out:
7189+ return 0;
7190+}
7191+console_initcall(xen_console_init);
7192+
7193+/*** Useful function for console debugging -- goes straight to Xen. ***/
7194+asmlinkage int xprintk(const char *fmt, ...)
7195+{
7196+ va_list args;
7197+ int printk_len;
7198+ static char printk_buf[1024];
7199+
7200+ /* Emit the output into the temporary buffer */
7201+ va_start(args, fmt);
7202+ printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
7203+ va_end(args);
7204+
7205+ /* Send the processed output directly to Xen. */
7206+ kcons_write_dom0(NULL, printk_buf, printk_len);
7207+
7208+ return 0;
7209+}
7210+
7211+/*** Forcibly flush console data before dying. ***/
7212+void xencons_force_flush(void)
7213+{
7214+ int sz;
7215+
7216+ /* Emergency console is synchronous, so there's nothing to flush. */
7217+ if (!is_running_on_xen() ||
7218+ is_initial_xendomain() ||
7219+ !xen_start_info->console.domU.evtchn)
7220+ return;
7221+
7222+ /* Spin until console data is flushed through to the daemon. */
7223+ while (wc != wp) {
7224+ int sent = 0;
7225+ if ((sz = wp - wc) == 0)
7226+ continue;
7227+ sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
7228+ if (sent > 0)
7229+ wc += sent;
7230+ }
7231+}
7232+
7233+
7234+void __init dom0_init_screen_info(const struct dom0_vga_console_info *info, size_t size)
7235+{
7236+ /* This is drawn from a dump from vgacon:startup in
7237+ * standard Linux. */
7238+ screen_info.orig_video_mode = 3;
7239+ screen_info.orig_video_isVGA = 1;
7240+ screen_info.orig_video_lines = 25;
7241+ screen_info.orig_video_cols = 80;
7242+ screen_info.orig_video_ega_bx = 3;
7243+ screen_info.orig_video_points = 16;
7244+ screen_info.orig_y = screen_info.orig_video_lines - 1;
7245+
7246+ switch (info->video_type) {
7247+ case XEN_VGATYPE_TEXT_MODE_3:
7248+ if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
7249+ + sizeof(info->u.text_mode_3))
7250+ break;
7251+ screen_info.orig_video_lines = info->u.text_mode_3.rows;
7252+ screen_info.orig_video_cols = info->u.text_mode_3.columns;
7253+ screen_info.orig_x = info->u.text_mode_3.cursor_x;
7254+ screen_info.orig_y = info->u.text_mode_3.cursor_y;
7255+ screen_info.orig_video_points =
7256+ info->u.text_mode_3.font_height;
7257+ break;
7258+
7259+ case XEN_VGATYPE_VESA_LFB:
7260+ if (size < offsetof(struct dom0_vga_console_info,
7261+ u.vesa_lfb.gbl_caps))
7262+ break;
7263+ screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
7264+ screen_info.lfb_width = info->u.vesa_lfb.width;
7265+ screen_info.lfb_height = info->u.vesa_lfb.height;
7266+ screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel;
7267+ screen_info.lfb_base = info->u.vesa_lfb.lfb_base;
7268+ screen_info.lfb_size = info->u.vesa_lfb.lfb_size;
7269+ screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line;
7270+ screen_info.red_size = info->u.vesa_lfb.red_size;
7271+ screen_info.red_pos = info->u.vesa_lfb.red_pos;
7272+ screen_info.green_size = info->u.vesa_lfb.green_size;
7273+ screen_info.green_pos = info->u.vesa_lfb.green_pos;
7274+ screen_info.blue_size = info->u.vesa_lfb.blue_size;
7275+ screen_info.blue_pos = info->u.vesa_lfb.blue_pos;
7276+ screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size;
7277+ screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos;
7278+ if (size >= offsetof(struct dom0_vga_console_info,
7279+ u.vesa_lfb.gbl_caps)
7280+ + sizeof(info->u.vesa_lfb.gbl_caps))
7281+ screen_info.capabilities = info->u.vesa_lfb.gbl_caps;
7282+ if (size >= offsetof(struct dom0_vga_console_info,
7283+ u.vesa_lfb.mode_attrs)
7284+ + sizeof(info->u.vesa_lfb.mode_attrs))
7285+ screen_info.vesa_attributes = info->u.vesa_lfb.mode_attrs;
7286+ break;
7287+ }
7288+}
7289+
7290+
7291+/******************** User-space console driver (/dev/console) ************/
7292+
7293+#define DRV(_d) (_d)
7294+#define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \
7295+ ((_tty)->index != (xc_num - 1)))
7296+
7297+static struct termios *xencons_termios[MAX_NR_CONSOLES];
7298+static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
7299+static struct tty_struct *xencons_tty;
7300+static int xencons_priv_irq;
7301+static char x_char;
7302+
7303+void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
7304+{
7305+ int i;
7306+ unsigned long flags;
7307+
7308+ spin_lock_irqsave(&xencons_lock, flags);
7309+ if (xencons_tty == NULL)
7310+ goto out;
7311+
7312+ for (i = 0; i < len; i++) {
7313+#ifdef CONFIG_MAGIC_SYSRQ
7314+ if (sysrq_enabled) {
7315+ if (buf[i] == '\x0f') { /* ^O */
7316+ if (!sysrq_requested) {
7317+ sysrq_requested = jiffies;
7318+ continue; /* don't print sysrq key */
7319+ }
7320+ sysrq_requested = 0;
7321+ } else if (sysrq_requested) {
7322+ unsigned long sysrq_timeout =
7323+ sysrq_requested + HZ*2;
7324+ sysrq_requested = 0;
7325+ if (time_before(jiffies, sysrq_timeout)) {
7326+ spin_unlock_irqrestore(
7327+ &xencons_lock, flags);
7328+ handle_sysrq(
7329+ buf[i], regs, xencons_tty);
7330+ spin_lock_irqsave(
7331+ &xencons_lock, flags);
7332+ continue;
7333+ }
7334+ }
7335+ }
7336+#endif
7337+ tty_insert_flip_char(xencons_tty, buf[i], 0);
7338+ }
7339+ tty_flip_buffer_push(xencons_tty);
7340+
7341+ out:
7342+ spin_unlock_irqrestore(&xencons_lock, flags);
7343+}
7344+
7345+static void __xencons_tx_flush(void)
7346+{
7347+ int sent, sz, work_done = 0;
7348+
7349+ if (x_char) {
7350+ if (is_initial_xendomain())
7351+ kcons_write_dom0(NULL, &x_char, 1);
7352+ else
7353+ while (x_char)
7354+ if (xencons_ring_send(&x_char, 1) == 1)
7355+ break;
7356+ x_char = 0;
7357+ work_done = 1;
7358+ }
7359+
7360+ while (wc != wp) {
7361+ sz = wp - wc;
7362+ if (sz > (wbuf_size - WBUF_MASK(wc)))
7363+ sz = wbuf_size - WBUF_MASK(wc);
7364+ if (is_initial_xendomain()) {
7365+ kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
7366+ wc += sz;
7367+ } else {
7368+ sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
7369+ if (sent == 0)
7370+ break;
7371+ wc += sent;
7372+ }
7373+ work_done = 1;
7374+ }
7375+
7376+ if (work_done && (xencons_tty != NULL)) {
7377+ wake_up_interruptible(&xencons_tty->write_wait);
7378+ if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
7379+ (xencons_tty->ldisc.write_wakeup != NULL))
7380+ (xencons_tty->ldisc.write_wakeup)(xencons_tty);
7381+ }
7382+}
7383+
7384+void xencons_tx(void)
7385+{
7386+ unsigned long flags;
7387+
7388+ spin_lock_irqsave(&xencons_lock, flags);
7389+ __xencons_tx_flush();
7390+ spin_unlock_irqrestore(&xencons_lock, flags);
7391+}
7392+
7393+/* Privileged receive callback and transmit kicker. */
7394+static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
7395+ struct pt_regs *regs)
7396+{
7397+ static char rbuf[16];
7398+ int l;
7399+
7400+ while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
7401+ xencons_rx(rbuf, l, regs);
7402+
7403+ xencons_tx();
7404+
7405+ return IRQ_HANDLED;
7406+}
7407+
7408+static int xencons_write_room(struct tty_struct *tty)
7409+{
7410+ return wbuf_size - (wp - wc);
7411+}
7412+
7413+static int xencons_chars_in_buffer(struct tty_struct *tty)
7414+{
7415+ return wp - wc;
7416+}
7417+
7418+static void xencons_send_xchar(struct tty_struct *tty, char ch)
7419+{
7420+ unsigned long flags;
7421+
7422+ if (DUMMY_TTY(tty))
7423+ return;
7424+
7425+ spin_lock_irqsave(&xencons_lock, flags);
7426+ x_char = ch;
7427+ __xencons_tx_flush();
7428+ spin_unlock_irqrestore(&xencons_lock, flags);
7429+}
7430+
7431+static void xencons_throttle(struct tty_struct *tty)
7432+{
7433+ if (DUMMY_TTY(tty))
7434+ return;
7435+
7436+ if (I_IXOFF(tty))
7437+ xencons_send_xchar(tty, STOP_CHAR(tty));
7438+}
7439+
7440+static void xencons_unthrottle(struct tty_struct *tty)
7441+{
7442+ if (DUMMY_TTY(tty))
7443+ return;
7444+
7445+ if (I_IXOFF(tty)) {
7446+ if (x_char != 0)
7447+ x_char = 0;
7448+ else
7449+ xencons_send_xchar(tty, START_CHAR(tty));
7450+ }
7451+}
7452+
7453+static void xencons_flush_buffer(struct tty_struct *tty)
7454+{
7455+ unsigned long flags;
7456+
7457+ if (DUMMY_TTY(tty))
7458+ return;
7459+
7460+ spin_lock_irqsave(&xencons_lock, flags);
7461+ wc = wp = 0;
7462+ spin_unlock_irqrestore(&xencons_lock, flags);
7463+}
7464+
7465+static inline int __xencons_put_char(int ch)
7466+{
7467+ char _ch = (char)ch;
7468+ if ((wp - wc) == wbuf_size)
7469+ return 0;
7470+ wbuf[WBUF_MASK(wp++)] = _ch;
7471+ return 1;
7472+}
7473+
7474+static int xencons_write(
7475+ struct tty_struct *tty,
7476+ const unsigned char *buf,
7477+ int count)
7478+{
7479+ int i;
7480+ unsigned long flags;
7481+
7482+ if (DUMMY_TTY(tty))
7483+ return count;
7484+
7485+ spin_lock_irqsave(&xencons_lock, flags);
7486+
7487+ for (i = 0; i < count; i++)
7488+ if (!__xencons_put_char(buf[i]))
7489+ break;
7490+
7491+ if (i != 0)
7492+ __xencons_tx_flush();
7493+
7494+ spin_unlock_irqrestore(&xencons_lock, flags);
7495+
7496+ return i;
7497+}
7498+
7499+static void xencons_put_char(struct tty_struct *tty, u_char ch)
7500+{
7501+ unsigned long flags;
7502+
7503+ if (DUMMY_TTY(tty))
7504+ return;
7505+
7506+ spin_lock_irqsave(&xencons_lock, flags);
7507+ (void)__xencons_put_char(ch);
7508+ spin_unlock_irqrestore(&xencons_lock, flags);
7509+}
7510+
7511+static void xencons_flush_chars(struct tty_struct *tty)
7512+{
7513+ unsigned long flags;
7514+
7515+ if (DUMMY_TTY(tty))
7516+ return;
7517+
7518+ spin_lock_irqsave(&xencons_lock, flags);
7519+ __xencons_tx_flush();
7520+ spin_unlock_irqrestore(&xencons_lock, flags);
7521+}
7522+
7523+static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
7524+{
7525+ unsigned long orig_jiffies = jiffies;
7526+
7527+ if (DUMMY_TTY(tty))
7528+ return;
7529+
7530+ while (DRV(tty->driver)->chars_in_buffer(tty)) {
7531+ set_current_state(TASK_INTERRUPTIBLE);
7532+ schedule_timeout(1);
7533+ if (signal_pending(current))
7534+ break;
7535+ if (timeout && time_after(jiffies, orig_jiffies + timeout))
7536+ break;
7537+ }
7538+
7539+ set_current_state(TASK_RUNNING);
7540+}
7541+
7542+static int xencons_open(struct tty_struct *tty, struct file *filp)
7543+{
7544+ unsigned long flags;
7545+
7546+ if (DUMMY_TTY(tty))
7547+ return 0;
7548+
7549+ spin_lock_irqsave(&xencons_lock, flags);
7550+ tty->driver_data = NULL;
7551+ if (xencons_tty == NULL)
7552+ xencons_tty = tty;
7553+ __xencons_tx_flush();
7554+ spin_unlock_irqrestore(&xencons_lock, flags);
7555+
7556+ return 0;
7557+}
7558+
7559+static void xencons_close(struct tty_struct *tty, struct file *filp)
7560+{
7561+ unsigned long flags;
7562+
7563+ if (DUMMY_TTY(tty))
7564+ return;
7565+
7566+ mutex_lock(&tty_mutex);
7567+
7568+ if (tty->count != 1) {
7569+ mutex_unlock(&tty_mutex);
7570+ return;
7571+ }
7572+
7573+ /* Prevent other threads from re-opening this tty. */
7574+ set_bit(TTY_CLOSING, &tty->flags);
7575+ mutex_unlock(&tty_mutex);
7576+
7577+ tty->closing = 1;
7578+ tty_wait_until_sent(tty, 0);
7579+ if (DRV(tty->driver)->flush_buffer != NULL)
7580+ DRV(tty->driver)->flush_buffer(tty);
7581+ if (tty->ldisc.flush_buffer != NULL)
7582+ tty->ldisc.flush_buffer(tty);
7583+ tty->closing = 0;
7584+ spin_lock_irqsave(&xencons_lock, flags);
7585+ xencons_tty = NULL;
7586+ spin_unlock_irqrestore(&xencons_lock, flags);
7587+}
7588+
7589+static struct tty_operations xencons_ops = {
7590+ .open = xencons_open,
7591+ .close = xencons_close,
7592+ .write = xencons_write,
7593+ .write_room = xencons_write_room,
7594+ .put_char = xencons_put_char,
7595+ .flush_chars = xencons_flush_chars,
7596+ .chars_in_buffer = xencons_chars_in_buffer,
7597+ .send_xchar = xencons_send_xchar,
7598+ .flush_buffer = xencons_flush_buffer,
7599+ .throttle = xencons_throttle,
7600+ .unthrottle = xencons_unthrottle,
7601+ .wait_until_sent = xencons_wait_until_sent,
7602+};
7603+
7604+static int __init xencons_init(void)
7605+{
7606+ int rc;
7607+
7608+ if (!is_running_on_xen())
7609+ return -ENODEV;
7610+
7611+ if (xc_mode == XC_OFF)
7612+ return 0;
7613+
7614+ if (!is_initial_xendomain()) {
7615+ rc = xencons_ring_init();
7616+ if (rc)
7617+ return rc;
7618+ }
7619+
7620+ xencons_driver = alloc_tty_driver((xc_mode == XC_TTY) ?
7621+ MAX_NR_CONSOLES : 1);
7622+ if (xencons_driver == NULL)
7623+ return -ENOMEM;
7624+
7625+ DRV(xencons_driver)->name = "xencons";
7626+ DRV(xencons_driver)->major = TTY_MAJOR;
7627+ DRV(xencons_driver)->type = TTY_DRIVER_TYPE_SERIAL;
7628+ DRV(xencons_driver)->subtype = SERIAL_TYPE_NORMAL;
7629+ DRV(xencons_driver)->init_termios = tty_std_termios;
7630+ DRV(xencons_driver)->flags =
7631+ TTY_DRIVER_REAL_RAW |
7632+ TTY_DRIVER_RESET_TERMIOS;
7633+ DRV(xencons_driver)->termios = xencons_termios;
7634+ DRV(xencons_driver)->termios_locked = xencons_termios_locked;
7635+
7636+ switch (xc_mode) {
7637+ case XC_XVC:
7638+ DRV(xencons_driver)->name = "xvc";
7639+ DRV(xencons_driver)->major = XEN_XVC_MAJOR;
7640+ DRV(xencons_driver)->minor_start = XEN_XVC_MINOR;
7641+ DRV(xencons_driver)->name_base = xc_num;
7642+ break;
7643+ case XC_SERIAL:
7644+ DRV(xencons_driver)->name = "ttyS";
7645+ DRV(xencons_driver)->minor_start = 64 + xc_num;
7646+ DRV(xencons_driver)->name_base = xc_num;
7647+ break;
7648+ default:
7649+ DRV(xencons_driver)->name = "tty";
7650+ DRV(xencons_driver)->minor_start = 1;
7651+ DRV(xencons_driver)->name_base = 1;
7652+ break;
7653+ }
7654+
7655+ tty_set_operations(xencons_driver, &xencons_ops);
7656+
7657+ if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) {
7658+ printk("WARNING: Failed to register Xen virtual "
7659+ "console driver as '%s%d'\n",
7660+ DRV(xencons_driver)->name,
7661+ DRV(xencons_driver)->name_base);
7662+ put_tty_driver(xencons_driver);
7663+ xencons_driver = NULL;
7664+ return rc;
7665+ }
7666+
7667+ if (is_initial_xendomain()) {
7668+ xencons_priv_irq = bind_virq_to_irqhandler(
7669+ VIRQ_CONSOLE,
7670+ 0,
7671+ xencons_priv_interrupt,
7672+ 0,
7673+ "console",
7674+ NULL);
7675+ BUG_ON(xencons_priv_irq < 0);
7676+ }
7677+
7678+ printk("Xen virtual console successfully installed as %s%d\n",
7679+ DRV(xencons_driver)->name, xc_num);
7680+
7681+ return 0;
7682+}
7683+
7684+module_init(xencons_init);
7685+
7686+MODULE_LICENSE("Dual BSD/GPL");
7687Index: head-2008-11-25/drivers/xen/console/xencons_ring.c
7688===================================================================
7689--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7690+++ head-2008-11-25/drivers/xen/console/xencons_ring.c 2007-06-12 13:13:44.000000000 +0200
7691@@ -0,0 +1,143 @@
7692+/*
7693+ * This program is free software; you can redistribute it and/or
7694+ * modify it under the terms of the GNU General Public License version 2
7695+ * as published by the Free Software Foundation; or, when distributed
7696+ * separately from the Linux kernel or incorporated into other
7697+ * software packages, subject to the following license:
7698+ *
7699+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7700+ * of this source file (the "Software"), to deal in the Software without
7701+ * restriction, including without limitation the rights to use, copy, modify,
7702+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
7703+ * and to permit persons to whom the Software is furnished to do so, subject to
7704+ * the following conditions:
7705+ *
7706+ * The above copyright notice and this permission notice shall be included in
7707+ * all copies or substantial portions of the Software.
7708+ *
7709+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7710+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7711+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7712+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7713+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
7714+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
7715+ * IN THE SOFTWARE.
7716+ */
7717+
7718+#include <linux/version.h>
7719+#include <linux/module.h>
7720+#include <linux/errno.h>
7721+#include <linux/signal.h>
7722+#include <linux/sched.h>
7723+#include <linux/interrupt.h>
7724+#include <linux/tty.h>
7725+#include <linux/tty_flip.h>
7726+#include <linux/serial.h>
7727+#include <linux/major.h>
7728+#include <linux/ptrace.h>
7729+#include <linux/ioport.h>
7730+#include <linux/mm.h>
7731+#include <linux/slab.h>
7732+
7733+#include <asm/hypervisor.h>
7734+#include <xen/evtchn.h>
7735+#include <xen/xencons.h>
7736+#include <linux/wait.h>
7737+#include <linux/interrupt.h>
7738+#include <linux/sched.h>
7739+#include <linux/err.h>
7740+#include <xen/interface/io/console.h>
7741+
7742+static int xencons_irq;
7743+
7744+static inline struct xencons_interface *xencons_interface(void)
7745+{
7746+ return mfn_to_virt(xen_start_info->console.domU.mfn);
7747+}
7748+
7749+static inline void notify_daemon(void)
7750+{
7751+ /* Use evtchn: this is called early, before irq is set up. */
7752+ notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
7753+}
7754+
7755+int xencons_ring_send(const char *data, unsigned len)
7756+{
7757+ int sent = 0;
7758+ struct xencons_interface *intf = xencons_interface();
7759+ XENCONS_RING_IDX cons, prod;
7760+
7761+ cons = intf->out_cons;
7762+ prod = intf->out_prod;
7763+ mb();
7764+ BUG_ON((prod - cons) > sizeof(intf->out));
7765+
7766+ while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
7767+ intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
7768+
7769+ wmb();
7770+ intf->out_prod = prod;
7771+
7772+ notify_daemon();
7773+
7774+ return sent;
7775+}
7776+
7777+static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
7778+{
7779+ struct xencons_interface *intf = xencons_interface();
7780+ XENCONS_RING_IDX cons, prod;
7781+
7782+ cons = intf->in_cons;
7783+ prod = intf->in_prod;
7784+ mb();
7785+ BUG_ON((prod - cons) > sizeof(intf->in));
7786+
7787+ while (cons != prod) {
7788+ xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
7789+ cons++;
7790+ }
7791+
7792+ mb();
7793+ intf->in_cons = cons;
7794+
7795+ notify_daemon();
7796+
7797+ xencons_tx();
7798+
7799+ return IRQ_HANDLED;
7800+}
7801+
7802+int xencons_ring_init(void)
7803+{
7804+ int irq;
7805+
7806+ if (xencons_irq)
7807+ unbind_from_irqhandler(xencons_irq, NULL);
7808+ xencons_irq = 0;
7809+
7810+ if (!is_running_on_xen() ||
7811+ is_initial_xendomain() ||
7812+ !xen_start_info->console.domU.evtchn)
7813+ return -ENODEV;
7814+
7815+ irq = bind_caller_port_to_irqhandler(
7816+ xen_start_info->console.domU.evtchn,
7817+ handle_input, 0, "xencons", NULL);
7818+ if (irq < 0) {
7819+ printk(KERN_ERR "XEN console request irq failed %i\n", irq);
7820+ return irq;
7821+ }
7822+
7823+ xencons_irq = irq;
7824+
7825+ /* In case we have in-flight data after save/restore... */
7826+ notify_daemon();
7827+
7828+ return 0;
7829+}
7830+
7831+void xencons_resume(void)
7832+{
7833+ (void)xencons_ring_init();
7834+}
7835Index: head-2008-11-25/drivers/xen/core/Makefile
7836===================================================================
7837--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7838+++ head-2008-11-25/drivers/xen/core/Makefile 2008-07-21 11:00:33.000000000 +0200
7839@@ -0,0 +1,14 @@
7840+#
7841+# Makefile for the linux kernel.
7842+#
7843+
7844+obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o firmware.o
7845+
7846+obj-$(CONFIG_PCI) += pci.o
7847+obj-$(CONFIG_PROC_FS) += xen_proc.o
7848+obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor_sysfs.o
7849+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
7850+obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
7851+obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
7852+obj-$(CONFIG_KEXEC) += machine_kexec.o
7853+obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
7854Index: head-2008-11-25/drivers/xen/core/cpu_hotplug.c
7855===================================================================
7856--- /dev/null 1970-01-01 00:00:00.000000000 +0000
7857+++ head-2008-11-25/drivers/xen/core/cpu_hotplug.c 2008-01-21 11:15:26.000000000 +0100
7858@@ -0,0 +1,173 @@
7859+#include <linux/init.h>
7860+#include <linux/kernel.h>
7861+#include <linux/sched.h>
7862+#include <linux/notifier.h>
7863+#include <linux/cpu.h>
7864+#include <xen/cpu_hotplug.h>
7865+#include <xen/xenbus.h>
7866+
7867+/*
7868+ * Set of CPUs that remote admin software will allow us to bring online.
7869+ * Notified to us via xenbus.
7870+ */
7871+static cpumask_t xenbus_allowed_cpumask;
7872+
7873+/* Set of CPUs that local admin will allow us to bring online. */
7874+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
7875+
7876+static int local_cpu_hotplug_request(void)
7877+{
7878+ /*
7879+ * We assume a CPU hotplug request comes from local admin if it is made
7880+ * via a userspace process (i.e., one with a real mm_struct).
7881+ */
7882+ return (current->mm != NULL);
7883+}
7884+
7885+static void vcpu_hotplug(unsigned int cpu)
7886+{
7887+ int err;
7888+ char dir[32], state[32];
7889+
7890+ if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
7891+ return;
7892+
7893+ sprintf(dir, "cpu/%u", cpu);
7894+ err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
7895+ if (err != 1) {
7896+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
7897+ return;
7898+ }
7899+
7900+ if (strcmp(state, "online") == 0) {
7901+ cpu_set(cpu, xenbus_allowed_cpumask);
7902+ (void)cpu_up(cpu);
7903+ } else if (strcmp(state, "offline") == 0) {
7904+ cpu_clear(cpu, xenbus_allowed_cpumask);
7905+ (void)cpu_down(cpu);
7906+ } else {
7907+ printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
7908+ state, cpu);
7909+ }
7910+}
7911+
7912+static void handle_vcpu_hotplug_event(
7913+ struct xenbus_watch *watch, const char **vec, unsigned int len)
7914+{
7915+ unsigned int cpu;
7916+ char *cpustr;
7917+ const char *node = vec[XS_WATCH_PATH];
7918+
7919+ if ((cpustr = strstr(node, "cpu/")) != NULL) {
7920+ sscanf(cpustr, "cpu/%u", &cpu);
7921+ vcpu_hotplug(cpu);
7922+ }
7923+}
7924+
7925+static int smpboot_cpu_notify(struct notifier_block *notifier,
7926+ unsigned long action, void *hcpu)
7927+{
7928+ unsigned int cpu = (long)hcpu;
7929+
7930+ /*
7931+ * We do this in a callback notifier rather than __cpu_disable()
7932+ * because local_cpu_hotplug_request() does not work in the latter
7933+ * as it's always executed from within a stopmachine kthread.
7934+ */
7935+ if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
7936+ cpu_clear(cpu, local_allowed_cpumask);
7937+
7938+ return NOTIFY_OK;
7939+}
7940+
7941+static int setup_cpu_watcher(struct notifier_block *notifier,
7942+ unsigned long event, void *data)
7943+{
7944+ unsigned int i;
7945+
7946+ static struct xenbus_watch cpu_watch = {
7947+ .node = "cpu",
7948+ .callback = handle_vcpu_hotplug_event,
7949+ .flags = XBWF_new_thread };
7950+ (void)register_xenbus_watch(&cpu_watch);
7951+
7952+ if (!is_initial_xendomain()) {
7953+ for_each_possible_cpu(i)
7954+ vcpu_hotplug(i);
7955+ printk(KERN_INFO "Brought up %ld CPUs\n",
7956+ (long)num_online_cpus());
7957+ }
7958+
7959+ return NOTIFY_DONE;
7960+}
7961+
7962+static int __init setup_vcpu_hotplug_event(void)
7963+{
7964+ static struct notifier_block hotplug_cpu = {
7965+ .notifier_call = smpboot_cpu_notify };
7966+ static struct notifier_block xsn_cpu = {
7967+ .notifier_call = setup_cpu_watcher };
7968+
7969+ if (!is_running_on_xen())
7970+ return -ENODEV;
7971+
7972+ register_cpu_notifier(&hotplug_cpu);
7973+ register_xenstore_notifier(&xsn_cpu);
7974+
7975+ return 0;
7976+}
7977+
7978+arch_initcall(setup_vcpu_hotplug_event);
7979+
7980+int smp_suspend(void)
7981+{
7982+ unsigned int cpu;
7983+ int err;
7984+
7985+ for_each_online_cpu(cpu) {
7986+ if (cpu == 0)
7987+ continue;
7988+ err = cpu_down(cpu);
7989+ if (err) {
7990+ printk(KERN_CRIT "Failed to take all CPUs "
7991+ "down: %d.\n", err);
7992+ for_each_possible_cpu(cpu)
7993+ vcpu_hotplug(cpu);
7994+ return err;
7995+ }
7996+ }
7997+
7998+ return 0;
7999+}
8000+
8001+void smp_resume(void)
8002+{
8003+ unsigned int cpu;
8004+
8005+ for_each_possible_cpu(cpu)
8006+ vcpu_hotplug(cpu);
8007+}
8008+
8009+int cpu_up_check(unsigned int cpu)
8010+{
8011+ int rc = 0;
8012+
8013+ if (local_cpu_hotplug_request()) {
8014+ cpu_set(cpu, local_allowed_cpumask);
8015+ if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
8016+ printk("%s: attempt to bring up CPU %u disallowed by "
8017+ "remote admin.\n", __FUNCTION__, cpu);
8018+ rc = -EBUSY;
8019+ }
8020+ } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
8021+ !cpu_isset(cpu, xenbus_allowed_cpumask)) {
8022+ rc = -EBUSY;
8023+ }
8024+
8025+ return rc;
8026+}
8027+
8028+void init_xenbus_allowed_cpumask(void)
8029+{
8030+ xenbus_allowed_cpumask = cpu_present_map;
8031+}
8032Index: head-2008-11-25/drivers/xen/core/evtchn.c
8033===================================================================
8034--- /dev/null 1970-01-01 00:00:00.000000000 +0000
8035+++ head-2008-11-25/drivers/xen/core/evtchn.c 2008-11-10 11:44:21.000000000 +0100
8036@@ -0,0 +1,1140 @@
8037+/******************************************************************************
8038+ * evtchn.c
8039+ *
8040+ * Communication via Xen event channels.
8041+ *
8042+ * Copyright (c) 2002-2005, K A Fraser
8043+ *
8044+ * This program is free software; you can redistribute it and/or
8045+ * modify it under the terms of the GNU General Public License version 2
8046+ * as published by the Free Software Foundation; or, when distributed
8047+ * separately from the Linux kernel or incorporated into other
8048+ * software packages, subject to the following license:
8049+ *
8050+ * Permission is hereby granted, free of charge, to any person obtaining a copy
8051+ * of this source file (the "Software"), to deal in the Software without
8052+ * restriction, including without limitation the rights to use, copy, modify,
8053+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
8054+ * and to permit persons to whom the Software is furnished to do so, subject to
8055+ * the following conditions:
8056+ *
8057+ * The above copyright notice and this permission notice shall be included in
8058+ * all copies or substantial portions of the Software.
8059+ *
8060+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8061+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
8062+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
8063+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
8064+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
8065+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
8066+ * IN THE SOFTWARE.
8067+ */
8068+
8069+#include <linux/module.h>
8070+#include <linux/irq.h>
8071+#include <linux/interrupt.h>
8072+#include <linux/sched.h>
8073+#include <linux/kernel_stat.h>
8074+#include <linux/version.h>
8075+#include <asm/atomic.h>
8076+#include <asm/system.h>
8077+#include <asm/ptrace.h>
8078+#include <asm/synch_bitops.h>
8079+#include <xen/evtchn.h>
8080+#include <xen/interface/event_channel.h>
8081+#include <xen/interface/physdev.h>
8082+#include <asm/hypervisor.h>
8083+#include <linux/mc146818rtc.h> /* RTC_IRQ */
8084+
8085+/*
8086+ * This lock protects updates to the following mapping and reference-count
8087+ * arrays. The lock does not need to be acquired to read the mapping tables.
8088+ */
8089+static DEFINE_SPINLOCK(irq_mapping_update_lock);
8090+
8091+/* IRQ <-> event-channel mappings. */
8092+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
8093+ [0 ... NR_EVENT_CHANNELS-1] = -1 };
8094+
8095+/* Packed IRQ information: binding type, sub-type index, and event channel. */
8096+static u32 irq_info[NR_IRQS];
8097+
8098+/* Binding types. */
8099+enum {
8100+ IRQT_UNBOUND,
8101+ IRQT_PIRQ,
8102+ IRQT_VIRQ,
8103+ IRQT_IPI,
8104+ IRQT_LOCAL_PORT,
8105+ IRQT_CALLER_PORT,
8106+ _IRQT_COUNT
8107+};
8108+
8109+#define _IRQT_BITS 4
8110+#define _EVTCHN_BITS 12
8111+#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS)
8112+
8113+/* Constructor for packed IRQ information. */
8114+static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
8115+{
8116+ BUILD_BUG_ON(_IRQT_COUNT > (1U << _IRQT_BITS));
8117+
8118+ BUILD_BUG_ON(NR_PIRQS > (1U << _INDEX_BITS));
8119+ BUILD_BUG_ON(NR_VIRQS > (1U << _INDEX_BITS));
8120+ BUILD_BUG_ON(NR_IPIS > (1U << _INDEX_BITS));
8121+ BUG_ON(index >> _INDEX_BITS);
8122+
8123+ BUILD_BUG_ON(NR_EVENT_CHANNELS > (1U << _EVTCHN_BITS));
8124+
8125+ return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn);
8126+}
8127+
8128+/* Convenient shorthand for packed representation of an unbound IRQ. */
8129+#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
8130+
8131+/*
8132+ * Accessors for packed IRQ information.
8133+ */
8134+
8135+static inline unsigned int evtchn_from_irq(int irq)
8136+{
8137+ return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1);
8138+}
8139+
8140+static inline unsigned int index_from_irq(int irq)
8141+{
8142+ return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
8143+}
8144+
8145+static inline unsigned int type_from_irq(int irq)
8146+{
8147+ return irq_info[irq] >> (32 - _IRQT_BITS);
8148+}
8149+
8150+/* IRQ <-> VIRQ mapping. */
8151+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
8152+
8153+/* IRQ <-> IPI mapping. */
8154+#ifndef NR_IPIS
8155+#define NR_IPIS 1
8156+#endif
8157+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
8158+
8159+/* Reference counts for bindings to IRQs. */
8160+static int irq_bindcount[NR_IRQS];
8161+
8162+/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
8163+static DECLARE_BITMAP(pirq_needs_eoi, NR_PIRQS);
8164+
8165+#ifdef CONFIG_SMP
8166+
8167+static u8 cpu_evtchn[NR_EVENT_CHANNELS];
8168+static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
8169+
8170+static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
8171+ unsigned int idx)
8172+{
8173+ return (sh->evtchn_pending[idx] &
8174+ cpu_evtchn_mask[cpu][idx] &
8175+ ~sh->evtchn_mask[idx]);
8176+}
8177+
8178+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
8179+{
8180+ shared_info_t *s = HYPERVISOR_shared_info;
8181+ int irq = evtchn_to_irq[chn];
8182+
8183+ BUG_ON(!test_bit(chn, s->evtchn_mask));
8184+
8185+ if (irq != -1)
8186+ set_native_irq_info(irq, cpumask_of_cpu(cpu));
8187+
8188+ clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
8189+ set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
8190+ cpu_evtchn[chn] = cpu;
8191+}
8192+
8193+static void init_evtchn_cpu_bindings(void)
8194+{
8195+ int i;
8196+
8197+ /* By default all event channels notify CPU#0. */
8198+ for (i = 0; i < NR_IRQS; i++)
8199+ set_native_irq_info(i, cpumask_of_cpu(0));
8200+
8201+ memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
8202+ memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
8203+}
8204+
8205+static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
8206+{
8207+ return cpu_evtchn[evtchn];
8208+}
8209+
8210+#else
8211+
8212+static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
8213+ unsigned int idx)
8214+{
8215+ return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
8216+}
8217+
8218+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
8219+{
8220+}
8221+
8222+static void init_evtchn_cpu_bindings(void)
8223+{
8224+}
8225+
8226+static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
8227+{
8228+ return 0;
8229+}
8230+
8231+#endif
8232+
8233+/* Upcall to generic IRQ layer. */
8234+#ifdef CONFIG_X86
8235+extern fastcall unsigned int do_IRQ(struct pt_regs *regs);
8236+void __init xen_init_IRQ(void);
8237+void __init init_IRQ(void)
8238+{
8239+ irq_ctx_init(0);
8240+ xen_init_IRQ();
8241+}
8242+#if defined (__i386__)
8243+static inline void exit_idle(void) {}
8244+#define IRQ_REG orig_eax
8245+#elif defined (__x86_64__)
8246+#include <asm/idle.h>
8247+#define IRQ_REG orig_rax
8248+#endif
8249+#define do_IRQ(irq, regs) do { \
8250+ (regs)->IRQ_REG = ~(irq); \
8251+ do_IRQ((regs)); \
8252+} while (0)
8253+#endif
8254+
8255+/* Xen will never allocate port zero for any purpose. */
8256+#define VALID_EVTCHN(chn) ((chn) != 0)
8257+
8258+/*
8259+ * Force a proper event-channel callback from Xen after clearing the
8260+ * callback mask. We do this in a very simple manner, by making a call
8261+ * down into Xen. The pending flag will be checked by Xen on return.
8262+ */
8263+void force_evtchn_callback(void)
8264+{
8265+ VOID(HYPERVISOR_xen_version(0, NULL));
8266+}
8267+/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
8268+EXPORT_SYMBOL(force_evtchn_callback);
8269+
8270+static DEFINE_PER_CPU(unsigned int, upcall_count) = { 0 };
8271+static DEFINE_PER_CPU(unsigned int, last_processed_l1i) = { BITS_PER_LONG - 1 };
8272+static DEFINE_PER_CPU(unsigned int, last_processed_l2i) = { BITS_PER_LONG - 1 };
8273+
8274+/* NB. Interrupts are disabled on entry. */
8275+asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
8276+{
8277+ unsigned long l1, l2;
8278+ unsigned long masked_l1, masked_l2;
8279+ unsigned int l1i, l2i, port, count;
8280+ int irq;
8281+ unsigned int cpu = smp_processor_id();
8282+ shared_info_t *s = HYPERVISOR_shared_info;
8283+ vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
8284+
8285+ exit_idle();
8286+ irq_enter();
8287+
8288+ do {
8289+ /* Avoid a callback storm when we reenable delivery. */
8290+ vcpu_info->evtchn_upcall_pending = 0;
8291+
8292+ /* Nested invocations bail immediately. */
8293+ if (unlikely(per_cpu(upcall_count, cpu)++))
8294+ break;
8295+
8296+#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
8297+ /* Clear master flag /before/ clearing selector flag. */
8298+ wmb();
8299+#endif
8300+ l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
8301+
8302+ l1i = per_cpu(last_processed_l1i, cpu);
8303+ l2i = per_cpu(last_processed_l2i, cpu);
8304+
8305+ while (l1 != 0) {
8306+
8307+ l1i = (l1i + 1) % BITS_PER_LONG;
8308+ masked_l1 = l1 & ((~0UL) << l1i);
8309+
8310+ if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */
8311+ l1i = BITS_PER_LONG - 1;
8312+ l2i = BITS_PER_LONG - 1;
8313+ continue;
8314+ }
8315+ l1i = __ffs(masked_l1);
8316+
8317+ do {
8318+ l2 = active_evtchns(cpu, s, l1i);
8319+
8320+ l2i = (l2i + 1) % BITS_PER_LONG;
8321+ masked_l2 = l2 & ((~0UL) << l2i);
8322+
8323+ if (masked_l2 == 0) { /* if we masked out all events, move on */
8324+ l2i = BITS_PER_LONG - 1;
8325+ break;
8326+ }
8327+
8328+ l2i = __ffs(masked_l2);
8329+
8330+ /* process port */
8331+ port = (l1i * BITS_PER_LONG) + l2i;
8332+ if ((irq = evtchn_to_irq[port]) != -1)
8333+ do_IRQ(irq, regs);
8334+ else
8335+ evtchn_device_upcall(port);
8336+
8337+ /* if this is the final port processed, we'll pick up here+1 next time */
8338+ per_cpu(last_processed_l1i, cpu) = l1i;
8339+ per_cpu(last_processed_l2i, cpu) = l2i;
8340+
8341+ } while (l2i != BITS_PER_LONG - 1);
8342+
8343+ l2 = active_evtchns(cpu, s, l1i);
8344+ if (l2 == 0) /* we handled all ports, so we can clear the selector bit */
8345+ l1 &= ~(1UL << l1i);
8346+
8347+ }
8348+
8349+ /* If there were nested callbacks then we have more to do. */
8350+ count = per_cpu(upcall_count, cpu);
8351+ per_cpu(upcall_count, cpu) = 0;
8352+ } while (unlikely(count != 1));
8353+
8354+ irq_exit();
8355+}
8356+
8357+static int find_unbound_irq(void)
8358+{
8359+ static int warned;
8360+ int irq;
8361+
8362+ for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++)
8363+ if (irq_bindcount[irq] == 0)
8364+ return irq;
8365+
8366+ if (!warned) {
8367+ warned = 1;
8368+ printk(KERN_WARNING "No available IRQ to bind to: "
8369+ "increase NR_DYNIRQS.\n");
8370+ }
8371+
8372+ return -ENOSPC;
8373+}
8374+
8375+static int bind_caller_port_to_irq(unsigned int caller_port)
8376+{
8377+ int irq;
8378+
8379+ spin_lock(&irq_mapping_update_lock);
8380+
8381+ if ((irq = evtchn_to_irq[caller_port]) == -1) {
8382+ if ((irq = find_unbound_irq()) < 0)
8383+ goto out;
8384+
8385+ evtchn_to_irq[caller_port] = irq;
8386+ irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
8387+ }
8388+
8389+ irq_bindcount[irq]++;
8390+
8391+ out:
8392+ spin_unlock(&irq_mapping_update_lock);
8393+ return irq;
8394+}
8395+
8396+static int bind_local_port_to_irq(unsigned int local_port)
8397+{
8398+ int irq;
8399+
8400+ spin_lock(&irq_mapping_update_lock);
8401+
8402+ BUG_ON(evtchn_to_irq[local_port] != -1);
8403+
8404+ if ((irq = find_unbound_irq()) < 0) {
8405+ struct evtchn_close close = { .port = local_port };
8406+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
8407+ BUG();
8408+ goto out;
8409+ }
8410+
8411+ evtchn_to_irq[local_port] = irq;
8412+ irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
8413+ irq_bindcount[irq]++;
8414+
8415+ out:
8416+ spin_unlock(&irq_mapping_update_lock);
8417+ return irq;
8418+}
8419+
8420+static int bind_listening_port_to_irq(unsigned int remote_domain)
8421+{
8422+ struct evtchn_alloc_unbound alloc_unbound;
8423+ int err;
8424+
8425+ alloc_unbound.dom = DOMID_SELF;
8426+ alloc_unbound.remote_dom = remote_domain;
8427+
8428+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
8429+ &alloc_unbound);
8430+
8431+ return err ? : bind_local_port_to_irq(alloc_unbound.port);
8432+}
8433+
8434+static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
8435+ unsigned int remote_port)
8436+{
8437+ struct evtchn_bind_interdomain bind_interdomain;
8438+ int err;
8439+
8440+ bind_interdomain.remote_dom = remote_domain;
8441+ bind_interdomain.remote_port = remote_port;
8442+
8443+ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
8444+ &bind_interdomain);
8445+
8446+ return err ? : bind_local_port_to_irq(bind_interdomain.local_port);
8447+}
8448+
8449+static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
8450+{
8451+ struct evtchn_bind_virq bind_virq;
8452+ int evtchn, irq;
8453+
8454+ spin_lock(&irq_mapping_update_lock);
8455+
8456+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
8457+ if ((irq = find_unbound_irq()) < 0)
8458+ goto out;
8459+
8460+ bind_virq.virq = virq;
8461+ bind_virq.vcpu = cpu;
8462+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
8463+ &bind_virq) != 0)
8464+ BUG();
8465+ evtchn = bind_virq.port;
8466+
8467+ evtchn_to_irq[evtchn] = irq;
8468+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
8469+
8470+ per_cpu(virq_to_irq, cpu)[virq] = irq;
8471+
8472+ bind_evtchn_to_cpu(evtchn, cpu);
8473+ }
8474+
8475+ irq_bindcount[irq]++;
8476+
8477+ out:
8478+ spin_unlock(&irq_mapping_update_lock);
8479+ return irq;
8480+}
8481+
8482+static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
8483+{
8484+ struct evtchn_bind_ipi bind_ipi;
8485+ int evtchn, irq;
8486+
8487+ spin_lock(&irq_mapping_update_lock);
8488+
8489+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
8490+ if ((irq = find_unbound_irq()) < 0)
8491+ goto out;
8492+
8493+ bind_ipi.vcpu = cpu;
8494+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
8495+ &bind_ipi) != 0)
8496+ BUG();
8497+ evtchn = bind_ipi.port;
8498+
8499+ evtchn_to_irq[evtchn] = irq;
8500+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
8501+
8502+ per_cpu(ipi_to_irq, cpu)[ipi] = irq;
8503+
8504+ bind_evtchn_to_cpu(evtchn, cpu);
8505+ }
8506+
8507+ irq_bindcount[irq]++;
8508+
8509+ out:
8510+ spin_unlock(&irq_mapping_update_lock);
8511+ return irq;
8512+}
8513+
8514+static void unbind_from_irq(unsigned int irq)
8515+{
8516+ struct evtchn_close close;
8517+ unsigned int cpu;
8518+ int evtchn = evtchn_from_irq(irq);
8519+
8520+ spin_lock(&irq_mapping_update_lock);
8521+
8522+ if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
8523+ close.port = evtchn;
8524+ if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
8525+ HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
8526+ BUG();
8527+
8528+ switch (type_from_irq(irq)) {
8529+ case IRQT_VIRQ:
8530+ per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
8531+ [index_from_irq(irq)] = -1;
8532+ break;
8533+ case IRQT_IPI:
8534+ per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
8535+ [index_from_irq(irq)] = -1;
8536+ break;
8537+ default:
8538+ break;
8539+ }
8540+
8541+ /* Closed ports are implicitly re-bound to VCPU0. */
8542+ bind_evtchn_to_cpu(evtchn, 0);
8543+
8544+ evtchn_to_irq[evtchn] = -1;
8545+ irq_info[irq] = IRQ_UNBOUND;
8546+
8547+ /* Zap stats across IRQ changes of use. */
8548+ for_each_possible_cpu(cpu)
8549+ kstat_cpu(cpu).irqs[irq] = 0;
8550+ }
8551+
8552+ spin_unlock(&irq_mapping_update_lock);
8553+}
8554+
8555+int bind_caller_port_to_irqhandler(
8556+ unsigned int caller_port,
8557+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
8558+ unsigned long irqflags,
8559+ const char *devname,
8560+ void *dev_id)
8561+{
8562+ int irq, retval;
8563+
8564+ irq = bind_caller_port_to_irq(caller_port);
8565+ if (irq < 0)
8566+ return irq;
8567+
8568+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
8569+ if (retval != 0) {
8570+ unbind_from_irq(irq);
8571+ return retval;
8572+ }
8573+
8574+ return irq;
8575+}
8576+EXPORT_SYMBOL_GPL(bind_caller_port_to_irqhandler);
8577+
8578+int bind_listening_port_to_irqhandler(
8579+ unsigned int remote_domain,
8580+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
8581+ unsigned long irqflags,
8582+ const char *devname,
8583+ void *dev_id)
8584+{
8585+ int irq, retval;
8586+
8587+ irq = bind_listening_port_to_irq(remote_domain);
8588+ if (irq < 0)
8589+ return irq;
8590+
8591+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
8592+ if (retval != 0) {
8593+ unbind_from_irq(irq);
8594+ return retval;
8595+ }
8596+
8597+ return irq;
8598+}
8599+EXPORT_SYMBOL_GPL(bind_listening_port_to_irqhandler);
8600+
8601+int bind_interdomain_evtchn_to_irqhandler(
8602+ unsigned int remote_domain,
8603+ unsigned int remote_port,
8604+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
8605+ unsigned long irqflags,
8606+ const char *devname,
8607+ void *dev_id)
8608+{
8609+ int irq, retval;
8610+
8611+ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
8612+ if (irq < 0)
8613+ return irq;
8614+
8615+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
8616+ if (retval != 0) {
8617+ unbind_from_irq(irq);
8618+ return retval;
8619+ }
8620+
8621+ return irq;
8622+}
8623+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
8624+
8625+int bind_virq_to_irqhandler(
8626+ unsigned int virq,
8627+ unsigned int cpu,
8628+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
8629+ unsigned long irqflags,
8630+ const char *devname,
8631+ void *dev_id)
8632+{
8633+ int irq, retval;
8634+
8635+ irq = bind_virq_to_irq(virq, cpu);
8636+ if (irq < 0)
8637+ return irq;
8638+
8639+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
8640+ if (retval != 0) {
8641+ unbind_from_irq(irq);
8642+ return retval;
8643+ }
8644+
8645+ return irq;
8646+}
8647+EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
8648+
8649+int bind_ipi_to_irqhandler(
8650+ unsigned int ipi,
8651+ unsigned int cpu,
8652+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
8653+ unsigned long irqflags,
8654+ const char *devname,
8655+ void *dev_id)
8656+{
8657+ int irq, retval;
8658+
8659+ irq = bind_ipi_to_irq(ipi, cpu);
8660+ if (irq < 0)
8661+ return irq;
8662+
8663+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
8664+ if (retval != 0) {
8665+ unbind_from_irq(irq);
8666+ return retval;
8667+ }
8668+
8669+ return irq;
8670+}
8671+EXPORT_SYMBOL_GPL(bind_ipi_to_irqhandler);
8672+
8673+void unbind_from_irqhandler(unsigned int irq, void *dev_id)
8674+{
8675+ free_irq(irq, dev_id);
8676+ unbind_from_irq(irq);
8677+}
8678+EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
8679+
8680+#ifdef CONFIG_SMP
8681+void rebind_evtchn_to_cpu(int port, unsigned int cpu)
8682+{
8683+ struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu };
8684+ int masked;
8685+
8686+ masked = test_and_set_evtchn_mask(port);
8687+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv) == 0)
8688+ bind_evtchn_to_cpu(port, cpu);
8689+ if (!masked)
8690+ unmask_evtchn(port);
8691+}
8692+
8693+static void rebind_irq_to_cpu(unsigned int irq, unsigned int tcpu)
8694+{
8695+ int evtchn = evtchn_from_irq(irq);
8696+
8697+ if (VALID_EVTCHN(evtchn))
8698+ rebind_evtchn_to_cpu(evtchn, tcpu);
8699+}
8700+
8701+static void set_affinity_irq(unsigned int irq, cpumask_t dest)
8702+{
8703+ unsigned tcpu = first_cpu(dest);
8704+ rebind_irq_to_cpu(irq, tcpu);
8705+}
8706+#endif
8707+
8708+int resend_irq_on_evtchn(unsigned int irq)
8709+{
8710+ int masked, evtchn = evtchn_from_irq(irq);
8711+ shared_info_t *s = HYPERVISOR_shared_info;
8712+
8713+ if (!VALID_EVTCHN(evtchn))
8714+ return 1;
8715+
8716+ masked = test_and_set_evtchn_mask(evtchn);
8717+ synch_set_bit(evtchn, s->evtchn_pending);
8718+ if (!masked)
8719+ unmask_evtchn(evtchn);
8720+
8721+ return 1;
8722+}
8723+
8724+/*
8725+ * Interface to generic handling in irq.c
8726+ */
8727+
8728+static unsigned int startup_dynirq(unsigned int irq)
8729+{
8730+ int evtchn = evtchn_from_irq(irq);
8731+
8732+ if (VALID_EVTCHN(evtchn))
8733+ unmask_evtchn(evtchn);
8734+ return 0;
8735+}
8736+
8737+static void shutdown_dynirq(unsigned int irq)
8738+{
8739+ int evtchn = evtchn_from_irq(irq);
8740+
8741+ if (VALID_EVTCHN(evtchn))
8742+ mask_evtchn(evtchn);
8743+}
8744+
8745+static void enable_dynirq(unsigned int irq)
8746+{
8747+ int evtchn = evtchn_from_irq(irq);
8748+
8749+ if (VALID_EVTCHN(evtchn))
8750+ unmask_evtchn(evtchn);
8751+}
8752+
8753+static void disable_dynirq(unsigned int irq)
8754+{
8755+ int evtchn = evtchn_from_irq(irq);
8756+
8757+ if (VALID_EVTCHN(evtchn))
8758+ mask_evtchn(evtchn);
8759+}
8760+
8761+static void ack_dynirq(unsigned int irq)
8762+{
8763+ int evtchn = evtchn_from_irq(irq);
8764+
8765+ move_native_irq(irq);
8766+
8767+ if (VALID_EVTCHN(evtchn)) {
8768+ mask_evtchn(evtchn);
8769+ clear_evtchn(evtchn);
8770+ }
8771+}
8772+
8773+static void end_dynirq(unsigned int irq)
8774+{
8775+ int evtchn = evtchn_from_irq(irq);
8776+
8777+ if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED))
8778+ unmask_evtchn(evtchn);
8779+}
8780+
8781+static struct hw_interrupt_type dynirq_type = {
8782+ .typename = "Dynamic-irq",
8783+ .startup = startup_dynirq,
8784+ .shutdown = shutdown_dynirq,
8785+ .enable = enable_dynirq,
8786+ .disable = disable_dynirq,
8787+ .ack = ack_dynirq,
8788+ .end = end_dynirq,
8789+#ifdef CONFIG_SMP
8790+ .set_affinity = set_affinity_irq,
8791+#endif
8792+ .retrigger = resend_irq_on_evtchn,
8793+};
8794+
8795+static inline void pirq_unmask_notify(int irq)
8796+{
8797+ struct physdev_eoi eoi = { .irq = evtchn_get_xen_pirq(irq) };
8798+ if (unlikely(test_bit(irq - PIRQ_BASE, pirq_needs_eoi)))
8799+ VOID(HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi));
8800+}
8801+
8802+static inline void pirq_query_unmask(int irq)
8803+{
8804+ struct physdev_irq_status_query irq_status;
8805+ irq_status.irq = evtchn_get_xen_pirq(irq);
8806+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
8807+ irq_status.flags = 0;
8808+ clear_bit(irq - PIRQ_BASE, pirq_needs_eoi);
8809+ if (irq_status.flags & XENIRQSTAT_needs_eoi)
8810+ set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
8811+}
8812+
8813+/*
8814+ * On startup, if there is no action associated with the IRQ then we are
8815+ * probing. In this case we should not share with others as it will confuse us.
8816+ */
8817+#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
8818+
8819+static unsigned int startup_pirq(unsigned int irq)
8820+{
8821+ struct evtchn_bind_pirq bind_pirq;
8822+ int evtchn = evtchn_from_irq(irq);
8823+
8824+ if (VALID_EVTCHN(evtchn))
8825+ goto out;
8826+
8827+ bind_pirq.pirq = evtchn_get_xen_pirq(irq);
8828+ /* NB. We are happy to share unless we are probing. */
8829+ bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
8830+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
8831+ if (!probing_irq(irq))
8832+ printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
8833+ irq);
8834+ return 0;
8835+ }
8836+ evtchn = bind_pirq.port;
8837+
8838+ pirq_query_unmask(irq);
8839+
8840+ evtchn_to_irq[evtchn] = irq;
8841+ bind_evtchn_to_cpu(evtchn, 0);
8842+ irq_info[irq] = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
8843+
8844+ out:
8845+ unmask_evtchn(evtchn);
8846+ pirq_unmask_notify(irq);
8847+
8848+ return 0;
8849+}
8850+
8851+static void shutdown_pirq(unsigned int irq)
8852+{
8853+ struct evtchn_close close;
8854+ int evtchn = evtchn_from_irq(irq);
8855+
8856+ if (!VALID_EVTCHN(evtchn))
8857+ return;
8858+
8859+ mask_evtchn(evtchn);
8860+
8861+ close.port = evtchn;
8862+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
8863+ BUG();
8864+
8865+ bind_evtchn_to_cpu(evtchn, 0);
8866+ evtchn_to_irq[evtchn] = -1;
8867+ irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
8868+}
8869+
8870+static void enable_pirq(unsigned int irq)
8871+{
8872+ startup_pirq(irq);
8873+}
8874+
8875+static void disable_pirq(unsigned int irq)
8876+{
8877+}
8878+
8879+static void ack_pirq(unsigned int irq)
8880+{
8881+ int evtchn = evtchn_from_irq(irq);
8882+
8883+ move_native_irq(irq);
8884+
8885+ if (VALID_EVTCHN(evtchn)) {
8886+ mask_evtchn(evtchn);
8887+ clear_evtchn(evtchn);
8888+ }
8889+}
8890+
8891+static void end_pirq(unsigned int irq)
8892+{
8893+ int evtchn = evtchn_from_irq(irq);
8894+
8895+ if ((irq_desc[irq].status & (IRQ_DISABLED|IRQ_PENDING)) ==
8896+ (IRQ_DISABLED|IRQ_PENDING)) {
8897+ shutdown_pirq(irq);
8898+ } else if (VALID_EVTCHN(evtchn)) {
8899+ unmask_evtchn(evtchn);
8900+ pirq_unmask_notify(irq);
8901+ }
8902+}
8903+
8904+static struct hw_interrupt_type pirq_type = {
8905+ .typename = "Phys-irq",
8906+ .startup = startup_pirq,
8907+ .shutdown = shutdown_pirq,
8908+ .enable = enable_pirq,
8909+ .disable = disable_pirq,
8910+ .ack = ack_pirq,
8911+ .end = end_pirq,
8912+#ifdef CONFIG_SMP
8913+ .set_affinity = set_affinity_irq,
8914+#endif
8915+ .retrigger = resend_irq_on_evtchn,
8916+};
8917+
8918+int irq_ignore_unhandled(unsigned int irq)
8919+{
8920+ struct physdev_irq_status_query irq_status = { .irq = irq };
8921+
8922+ if (!is_running_on_xen())
8923+ return 0;
8924+
8925+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
8926+ return 0;
8927+ return !!(irq_status.flags & XENIRQSTAT_shared);
8928+}
8929+
8930+void notify_remote_via_irq(int irq)
8931+{
8932+ int evtchn = evtchn_from_irq(irq);
8933+
8934+ if (VALID_EVTCHN(evtchn))
8935+ notify_remote_via_evtchn(evtchn);
8936+}
8937+EXPORT_SYMBOL_GPL(notify_remote_via_irq);
8938+
8939+int irq_to_evtchn_port(int irq)
8940+{
8941+ return evtchn_from_irq(irq);
8942+}
8943+EXPORT_SYMBOL_GPL(irq_to_evtchn_port);
8944+
8945+void mask_evtchn(int port)
8946+{
8947+ shared_info_t *s = HYPERVISOR_shared_info;
8948+ synch_set_bit(port, s->evtchn_mask);
8949+}
8950+EXPORT_SYMBOL_GPL(mask_evtchn);
8951+
8952+void unmask_evtchn(int port)
8953+{
8954+ shared_info_t *s = HYPERVISOR_shared_info;
8955+ unsigned int cpu = smp_processor_id();
8956+ vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
8957+
8958+ BUG_ON(!irqs_disabled());
8959+
8960+ /* Slow path (hypercall) if this is a non-local port. */
8961+ if (unlikely(cpu != cpu_from_evtchn(port))) {
8962+ struct evtchn_unmask unmask = { .port = port };
8963+ VOID(HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask));
8964+ return;
8965+ }
8966+
8967+ synch_clear_bit(port, s->evtchn_mask);
8968+
8969+ /* Did we miss an interrupt 'edge'? Re-fire if so. */
8970+ if (synch_test_bit(port, s->evtchn_pending) &&
8971+ !synch_test_and_set_bit(port / BITS_PER_LONG,
8972+ &vcpu_info->evtchn_pending_sel))
8973+ vcpu_info->evtchn_upcall_pending = 1;
8974+}
8975+EXPORT_SYMBOL_GPL(unmask_evtchn);
8976+
8977+void disable_all_local_evtchn(void)
8978+{
8979+ unsigned i, cpu = smp_processor_id();
8980+ shared_info_t *s = HYPERVISOR_shared_info;
8981+
8982+ for (i = 0; i < NR_EVENT_CHANNELS; ++i)
8983+ if (cpu_from_evtchn(i) == cpu)
8984+ synch_set_bit(i, &s->evtchn_mask[0]);
8985+}
8986+
8987+static void restore_cpu_virqs(unsigned int cpu)
8988+{
8989+ struct evtchn_bind_virq bind_virq;
8990+ int virq, irq, evtchn;
8991+
8992+ for (virq = 0; virq < NR_VIRQS; virq++) {
8993+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
8994+ continue;
8995+
8996+ BUG_ON(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0));
8997+
8998+ /* Get a new binding from Xen. */
8999+ bind_virq.virq = virq;
9000+ bind_virq.vcpu = cpu;
9001+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
9002+ &bind_virq) != 0)
9003+ BUG();
9004+ evtchn = bind_virq.port;
9005+
9006+ /* Record the new mapping. */
9007+ evtchn_to_irq[evtchn] = irq;
9008+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
9009+ bind_evtchn_to_cpu(evtchn, cpu);
9010+
9011+ /* Ready for use. */
9012+ unmask_evtchn(evtchn);
9013+ }
9014+}
9015+
9016+static void restore_cpu_ipis(unsigned int cpu)
9017+{
9018+ struct evtchn_bind_ipi bind_ipi;
9019+ int ipi, irq, evtchn;
9020+
9021+ for (ipi = 0; ipi < NR_IPIS; ipi++) {
9022+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
9023+ continue;
9024+
9025+ BUG_ON(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0));
9026+
9027+ /* Get a new binding from Xen. */
9028+ bind_ipi.vcpu = cpu;
9029+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
9030+ &bind_ipi) != 0)
9031+ BUG();
9032+ evtchn = bind_ipi.port;
9033+
9034+ /* Record the new mapping. */
9035+ evtchn_to_irq[evtchn] = irq;
9036+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
9037+ bind_evtchn_to_cpu(evtchn, cpu);
9038+
9039+ /* Ready for use. */
9040+ unmask_evtchn(evtchn);
9041+
9042+ }
9043+}
9044+
9045+void irq_resume(void)
9046+{
9047+ unsigned int cpu, irq, evtchn;
9048+
9049+ init_evtchn_cpu_bindings();
9050+
9051+ /* New event-channel space is not 'live' yet. */
9052+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
9053+ mask_evtchn(evtchn);
9054+
9055+ /* Check that no PIRQs are still bound. */
9056+ for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++)
9057+ BUG_ON(irq_info[irq] != IRQ_UNBOUND);
9058+
9059+ /* No IRQ <-> event-channel mappings. */
9060+ for (irq = 0; irq < NR_IRQS; irq++)
9061+ irq_info[irq] &= ~((1U << _EVTCHN_BITS) - 1);
9062+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
9063+ evtchn_to_irq[evtchn] = -1;
9064+
9065+ for_each_possible_cpu(cpu) {
9066+ restore_cpu_virqs(cpu);
9067+ restore_cpu_ipis(cpu);
9068+ }
9069+
9070+}
9071+
9072+#if defined(CONFIG_X86_IO_APIC)
9073+#define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE))
9074+#elif defined(CONFIG_X86)
9075+#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < 16)
9076+#else
9077+#define identity_mapped_irq(irq) (1)
9078+#endif
9079+
9080+void evtchn_register_pirq(int irq)
9081+{
9082+ BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS);
9083+ if (identity_mapped_irq(irq))
9084+ return;
9085+ irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
9086+ irq_desc[irq].chip = &pirq_type;
9087+}
9088+
9089+int evtchn_map_pirq(int irq, int xen_pirq)
9090+{
9091+ if (irq < 0) {
9092+ static DEFINE_SPINLOCK(irq_alloc_lock);
9093+
9094+ irq = PIRQ_BASE + NR_PIRQS - 1;
9095+ spin_lock(&irq_alloc_lock);
9096+ do {
9097+ if (identity_mapped_irq(irq))
9098+ continue;
9099+ if (!index_from_irq(irq)) {
9100+ BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
9101+ irq_info[irq] = mk_irq_info(IRQT_PIRQ,
9102+ xen_pirq, 0);
9103+ break;
9104+ }
9105+ } while (--irq >= PIRQ_BASE);
9106+ spin_unlock(&irq_alloc_lock);
9107+ if (irq < PIRQ_BASE)
9108+ return -ENOSPC;
9109+ irq_desc[irq].chip = &pirq_type;
9110+ } else if (!xen_pirq) {
9111+ if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
9112+ return -EINVAL;
9113+ irq_desc[irq].chip = &no_irq_type;
9114+ irq_info[irq] = IRQ_UNBOUND;
9115+ return 0;
9116+ } else if (type_from_irq(irq) != IRQT_PIRQ
9117+ || index_from_irq(irq) != xen_pirq) {
9118+ printk(KERN_ERR "IRQ#%d is already mapped to %d:%u - "
9119+ "cannot map to PIRQ#%u\n",
9120+ irq, type_from_irq(irq), index_from_irq(irq), xen_pirq);
9121+ return -EINVAL;
9122+ }
9123+ return index_from_irq(irq) ? irq : -EINVAL;
9124+}
9125+
9126+int evtchn_get_xen_pirq(int irq)
9127+{
9128+ if (identity_mapped_irq(irq))
9129+ return irq;
9130+ BUG_ON(type_from_irq(irq) != IRQT_PIRQ);
9131+ return index_from_irq(irq);
9132+}
9133+
9134+void __init xen_init_IRQ(void)
9135+{
9136+ unsigned int i;
9137+
9138+ init_evtchn_cpu_bindings();
9139+
9140+ /* No event channels are 'live' right now. */
9141+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
9142+ mask_evtchn(i);
9143+
9144+ /* No IRQ -> event-channel mappings. */
9145+ for (i = 0; i < NR_IRQS; i++)
9146+ irq_info[i] = IRQ_UNBOUND;
9147+
9148+ /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
9149+ for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
9150+ irq_bindcount[i] = 0;
9151+
9152+ irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
9153+ irq_desc[i].action = NULL;
9154+ irq_desc[i].depth = 1;
9155+ irq_desc[i].chip = &dynirq_type;
9156+ }
9157+
9158+ /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
9159+ for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) {
9160+ irq_bindcount[i] = 1;
9161+
9162+ if (!identity_mapped_irq(i))
9163+ continue;
9164+
9165+#ifdef RTC_IRQ
9166+ /* If not domain 0, force our RTC driver to fail its probe. */
9167+ if (i - PIRQ_BASE == RTC_IRQ && !is_initial_xendomain())
9168+ continue;
9169+#endif
9170+
9171+ irq_desc[i].status = IRQ_DISABLED;
9172+ irq_desc[i].action = NULL;
9173+ irq_desc[i].depth = 1;
9174+ irq_desc[i].chip = &pirq_type;
9175+ }
9176+}
9177Index: head-2008-11-25/drivers/xen/core/features.c
9178===================================================================
9179--- /dev/null 1970-01-01 00:00:00.000000000 +0000
9180+++ head-2008-11-25/drivers/xen/core/features.c 2007-06-12 13:13:44.000000000 +0200
9181@@ -0,0 +1,34 @@
9182+/******************************************************************************
9183+ * features.c
9184+ *
9185+ * Xen feature flags.
9186+ *
9187+ * Copyright (c) 2006, Ian Campbell, XenSource Inc.
9188+ */
9189+#include <linux/types.h>
9190+#include <linux/cache.h>
9191+#include <linux/module.h>
9192+#include <asm/hypervisor.h>
9193+#include <xen/features.h>
9194+
9195+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
9196+#include <xen/platform-compat.h>
9197+#endif
9198+
9199+u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
9200+/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
9201+EXPORT_SYMBOL(xen_features);
9202+
9203+void setup_xen_features(void)
9204+{
9205+ xen_feature_info_t fi;
9206+ int i, j;
9207+
9208+ for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
9209+ fi.submap_idx = i;
9210+ if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
9211+ break;
9212+ for (j=0; j<32; j++)
9213+ xen_features[i*32+j] = !!(fi.submap & 1<<j);
9214+ }
9215+}
9216Index: head-2008-11-25/drivers/xen/core/firmware.c
9217===================================================================
9218--- /dev/null 1970-01-01 00:00:00.000000000 +0000
9219+++ head-2008-11-25/drivers/xen/core/firmware.c 2007-06-22 09:08:06.000000000 +0200
9220@@ -0,0 +1,74 @@
9221+#include <linux/kernel.h>
9222+#include <linux/errno.h>
9223+#include <linux/init.h>
9224+#include <linux/edd.h>
9225+#include <video/edid.h>
9226+#include <xen/interface/platform.h>
9227+#include <asm/hypervisor.h>
9228+
9229+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
9230+void __init copy_edd(void)
9231+{
9232+ int ret;
9233+ struct xen_platform_op op;
9234+
9235+ if (!is_initial_xendomain())
9236+ return;
9237+
9238+ op.cmd = XENPF_firmware_info;
9239+
9240+ op.u.firmware_info.type = XEN_FW_DISK_INFO;
9241+ for (op.u.firmware_info.index = 0;
9242+ edd.edd_info_nr < EDDMAXNR;
9243+ op.u.firmware_info.index++) {
9244+ struct edd_info *info = edd.edd_info + edd.edd_info_nr;
9245+
9246+ info->params.length = sizeof(info->params);
9247+ set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
9248+ &info->params);
9249+ ret = HYPERVISOR_platform_op(&op);
9250+ if (ret)
9251+ break;
9252+
9253+#define C(x) info->x = op.u.firmware_info.u.disk_info.x
9254+ C(device);
9255+ C(version);
9256+ C(interface_support);
9257+ C(legacy_max_cylinder);
9258+ C(legacy_max_head);
9259+ C(legacy_sectors_per_track);
9260+#undef C
9261+
9262+ edd.edd_info_nr++;
9263+ }
9264+
9265+ op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
9266+ for (op.u.firmware_info.index = 0;
9267+ edd.mbr_signature_nr < EDD_MBR_SIG_MAX;
9268+ op.u.firmware_info.index++) {
9269+ ret = HYPERVISOR_platform_op(&op);
9270+ if (ret)
9271+ break;
9272+ edd.mbr_signature[edd.mbr_signature_nr++] =
9273+ op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
9274+ }
9275+}
9276+#endif
9277+
9278+void __init copy_edid(void)
9279+{
9280+#if defined(CONFIG_FIRMWARE_EDID) && defined(CONFIG_X86)
9281+ struct xen_platform_op op;
9282+
9283+ if (!is_initial_xendomain())
9284+ return;
9285+
9286+ op.cmd = XENPF_firmware_info;
9287+ op.u.firmware_info.index = 0;
9288+ op.u.firmware_info.type = XEN_FW_VBEDDC_INFO;
9289+ set_xen_guest_handle(op.u.firmware_info.u.vbeddc_info.edid,
9290+ edid_info.dummy);
9291+ if (HYPERVISOR_platform_op(&op) != 0)
9292+ memset(edid_info.dummy, 0x13, sizeof(edid_info.dummy));
9293+#endif
9294+}
9295Index: head-2008-11-25/drivers/xen/core/gnttab.c
9296===================================================================
9297--- /dev/null 1970-01-01 00:00:00.000000000 +0000
9298+++ head-2008-11-25/drivers/xen/core/gnttab.c 2008-11-04 11:13:10.000000000 +0100
9299@@ -0,0 +1,772 @@
9300+/******************************************************************************
9301+ * gnttab.c
9302+ *
9303+ * Granting foreign access to our memory reservation.
9304+ *
9305+ * Copyright (c) 2005-2006, Christopher Clark
9306+ * Copyright (c) 2004-2005, K A Fraser
9307+ *
9308+ * This program is free software; you can redistribute it and/or
9309+ * modify it under the terms of the GNU General Public License version 2
9310+ * as published by the Free Software Foundation; or, when distributed
9311+ * separately from the Linux kernel or incorporated into other
9312+ * software packages, subject to the following license:
9313+ *
9314+ * Permission is hereby granted, free of charge, to any person obtaining a copy
9315+ * of this source file (the "Software"), to deal in the Software without
9316+ * restriction, including without limitation the rights to use, copy, modify,
9317+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
9318+ * and to permit persons to whom the Software is furnished to do so, subject to
9319+ * the following conditions:
9320+ *
9321+ * The above copyright notice and this permission notice shall be included in
9322+ * all copies or substantial portions of the Software.
9323+ *
9324+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
9325+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9326+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
9327+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
9328+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
9329+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
9330+ * IN THE SOFTWARE.
9331+ */
9332+
9333+#include <linux/module.h>
9334+#include <linux/sched.h>
9335+#include <linux/mm.h>
9336+#include <linux/seqlock.h>
9337+#include <xen/interface/xen.h>
9338+#include <xen/gnttab.h>
9339+#include <asm/pgtable.h>
9340+#include <asm/uaccess.h>
9341+#include <asm/synch_bitops.h>
9342+#include <asm/io.h>
9343+#include <xen/interface/memory.h>
9344+#include <xen/driver_util.h>
9345+#include <asm/gnttab_dma.h>
9346+
9347+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
9348+#include <xen/platform-compat.h>
9349+#endif
9350+
9351+/* External tools reserve first few grant table entries. */
9352+#define NR_RESERVED_ENTRIES 8
9353+#define GNTTAB_LIST_END 0xffffffff
9354+#define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
9355+
9356+static grant_ref_t **gnttab_list;
9357+static unsigned int nr_grant_frames;
9358+static unsigned int boot_max_nr_grant_frames;
9359+static int gnttab_free_count;
9360+static grant_ref_t gnttab_free_head;
9361+static DEFINE_SPINLOCK(gnttab_list_lock);
9362+
9363+static struct grant_entry *shared;
9364+
9365+static struct gnttab_free_callback *gnttab_free_callback_list;
9366+
9367+static int gnttab_expand(unsigned int req_entries);
9368+
9369+#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
9370+#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
9371+
9372+#define nr_freelist_frames(grant_frames) \
9373+ (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP)
9374+
9375+static int get_free_entries(int count)
9376+{
9377+ unsigned long flags;
9378+ int ref, rc;
9379+ grant_ref_t head;
9380+
9381+ spin_lock_irqsave(&gnttab_list_lock, flags);
9382+
9383+ if ((gnttab_free_count < count) &&
9384+ ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
9385+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
9386+ return rc;
9387+ }
9388+
9389+ ref = head = gnttab_free_head;
9390+ gnttab_free_count -= count;
9391+ while (count-- > 1)
9392+ head = gnttab_entry(head);
9393+ gnttab_free_head = gnttab_entry(head);
9394+ gnttab_entry(head) = GNTTAB_LIST_END;
9395+
9396+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
9397+
9398+ return ref;
9399+}
9400+
9401+#define get_free_entry() get_free_entries(1)
9402+
9403+static void do_free_callbacks(void)
9404+{
9405+ struct gnttab_free_callback *callback, *next;
9406+
9407+ callback = gnttab_free_callback_list;
9408+ gnttab_free_callback_list = NULL;
9409+
9410+ while (callback != NULL) {
9411+ next = callback->next;
9412+ if (gnttab_free_count >= callback->count) {
9413+ callback->next = NULL;
9414+ callback->queued = 0;
9415+ callback->fn(callback->arg);
9416+ } else {
9417+ callback->next = gnttab_free_callback_list;
9418+ gnttab_free_callback_list = callback;
9419+ }
9420+ callback = next;
9421+ }
9422+}
9423+
9424+static inline void check_free_callbacks(void)
9425+{
9426+ if (unlikely(gnttab_free_callback_list))
9427+ do_free_callbacks();
9428+}
9429+
9430+static void put_free_entry(grant_ref_t ref)
9431+{
9432+ unsigned long flags;
9433+ spin_lock_irqsave(&gnttab_list_lock, flags);
9434+ gnttab_entry(ref) = gnttab_free_head;
9435+ gnttab_free_head = ref;
9436+ gnttab_free_count++;
9437+ check_free_callbacks();
9438+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
9439+}
9440+
9441+/*
9442+ * Public grant-issuing interface functions
9443+ */
9444+
9445+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
9446+ int flags)
9447+{
9448+ int ref;
9449+
9450+ if (unlikely((ref = get_free_entry()) < 0))
9451+ return -ENOSPC;
9452+
9453+ shared[ref].frame = frame;
9454+ shared[ref].domid = domid;
9455+ wmb();
9456+ BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
9457+ shared[ref].flags = GTF_permit_access | flags;
9458+
9459+ return ref;
9460+}
9461+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
9462+
9463+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
9464+ unsigned long frame, int flags)
9465+{
9466+ shared[ref].frame = frame;
9467+ shared[ref].domid = domid;
9468+ wmb();
9469+ BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
9470+ shared[ref].flags = GTF_permit_access | flags;
9471+}
9472+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
9473+
9474+
9475+int gnttab_query_foreign_access(grant_ref_t ref)
9476+{
9477+ u16 nflags;
9478+
9479+ nflags = shared[ref].flags;
9480+
9481+ return (nflags & (GTF_reading|GTF_writing));
9482+}
9483+EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
9484+
9485+int gnttab_end_foreign_access_ref(grant_ref_t ref)
9486+{
9487+ u16 flags, nflags;
9488+
9489+ nflags = shared[ref].flags;
9490+ do {
9491+ if ((flags = nflags) & (GTF_reading|GTF_writing)) {
9492+ printk(KERN_DEBUG "WARNING: g.e. still in use!\n");
9493+ return 0;
9494+ }
9495+ } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) !=
9496+ flags);
9497+
9498+ return 1;
9499+}
9500+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
9501+
9502+void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page)
9503+{
9504+ if (gnttab_end_foreign_access_ref(ref)) {
9505+ put_free_entry(ref);
9506+ if (page != 0)
9507+ free_page(page);
9508+ } else {
9509+ /* XXX This needs to be fixed so that the ref and page are
9510+ placed on a list to be freed up later. */
9511+ printk(KERN_DEBUG
9512+ "WARNING: leaking g.e. and page still in use!\n");
9513+ }
9514+}
9515+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
9516+
9517+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
9518+{
9519+ int ref;
9520+
9521+ if (unlikely((ref = get_free_entry()) < 0))
9522+ return -ENOSPC;
9523+ gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
9524+
9525+ return ref;
9526+}
9527+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
9528+
9529+void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
9530+ unsigned long pfn)
9531+{
9532+ shared[ref].frame = pfn;
9533+ shared[ref].domid = domid;
9534+ wmb();
9535+ shared[ref].flags = GTF_accept_transfer;
9536+}
9537+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
9538+
9539+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
9540+{
9541+ unsigned long frame;
9542+ u16 flags;
9543+
9544+ /*
9545+ * If a transfer is not even yet started, try to reclaim the grant
9546+ * reference and return failure (== 0).
9547+ */
9548+ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
9549+ if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags)
9550+ return 0;
9551+ cpu_relax();
9552+ }
9553+
9554+ /* If a transfer is in progress then wait until it is completed. */
9555+ while (!(flags & GTF_transfer_completed)) {
9556+ flags = shared[ref].flags;
9557+ cpu_relax();
9558+ }
9559+
9560+ /* Read the frame number /after/ reading completion status. */
9561+ rmb();
9562+ frame = shared[ref].frame;
9563+ BUG_ON(frame == 0);
9564+
9565+ return frame;
9566+}
9567+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
9568+
9569+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
9570+{
9571+ unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
9572+ put_free_entry(ref);
9573+ return frame;
9574+}
9575+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
9576+
9577+void gnttab_free_grant_reference(grant_ref_t ref)
9578+{
9579+ put_free_entry(ref);
9580+}
9581+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
9582+
9583+void gnttab_free_grant_references(grant_ref_t head)
9584+{
9585+ grant_ref_t ref;
9586+ unsigned long flags;
9587+ int count = 1;
9588+ if (head == GNTTAB_LIST_END)
9589+ return;
9590+ spin_lock_irqsave(&gnttab_list_lock, flags);
9591+ ref = head;
9592+ while (gnttab_entry(ref) != GNTTAB_LIST_END) {
9593+ ref = gnttab_entry(ref);
9594+ count++;
9595+ }
9596+ gnttab_entry(ref) = gnttab_free_head;
9597+ gnttab_free_head = head;
9598+ gnttab_free_count += count;
9599+ check_free_callbacks();
9600+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
9601+}
9602+EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
9603+
9604+int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
9605+{
9606+ int h = get_free_entries(count);
9607+
9608+ if (h < 0)
9609+ return -ENOSPC;
9610+
9611+ *head = h;
9612+
9613+ return 0;
9614+}
9615+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
9616+
9617+int gnttab_empty_grant_references(const grant_ref_t *private_head)
9618+{
9619+ return (*private_head == GNTTAB_LIST_END);
9620+}
9621+EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
9622+
9623+int gnttab_claim_grant_reference(grant_ref_t *private_head)
9624+{
9625+ grant_ref_t g = *private_head;
9626+ if (unlikely(g == GNTTAB_LIST_END))
9627+ return -ENOSPC;
9628+ *private_head = gnttab_entry(g);
9629+ return g;
9630+}
9631+EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
9632+
9633+void gnttab_release_grant_reference(grant_ref_t *private_head,
9634+ grant_ref_t release)
9635+{
9636+ gnttab_entry(release) = *private_head;
9637+ *private_head = release;
9638+}
9639+EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
9640+
9641+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
9642+ void (*fn)(void *), void *arg, u16 count)
9643+{
9644+ unsigned long flags;
9645+ spin_lock_irqsave(&gnttab_list_lock, flags);
9646+ if (callback->queued)
9647+ goto out;
9648+ callback->fn = fn;
9649+ callback->arg = arg;
9650+ callback->count = count;
9651+ callback->queued = 1;
9652+ callback->next = gnttab_free_callback_list;
9653+ gnttab_free_callback_list = callback;
9654+ check_free_callbacks();
9655+out:
9656+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
9657+}
9658+EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
9659+
9660+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
9661+{
9662+ struct gnttab_free_callback **pcb;
9663+ unsigned long flags;
9664+
9665+ spin_lock_irqsave(&gnttab_list_lock, flags);
9666+ for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
9667+ if (*pcb == callback) {
9668+ *pcb = callback->next;
9669+ callback->queued = 0;
9670+ break;
9671+ }
9672+ }
9673+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
9674+}
9675+EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
9676+
9677+static int grow_gnttab_list(unsigned int more_frames)
9678+{
9679+ unsigned int new_nr_grant_frames, extra_entries, i;
9680+ unsigned int nr_glist_frames, new_nr_glist_frames;
9681+
9682+ new_nr_grant_frames = nr_grant_frames + more_frames;
9683+ extra_entries = more_frames * ENTRIES_PER_GRANT_FRAME;
9684+
9685+ nr_glist_frames = nr_freelist_frames(nr_grant_frames);
9686+ new_nr_glist_frames = nr_freelist_frames(new_nr_grant_frames);
9687+ for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
9688+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
9689+ if (!gnttab_list[i])
9690+ goto grow_nomem;
9691+ }
9692+
9693+ for (i = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
9694+ i < ENTRIES_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
9695+ gnttab_entry(i) = i + 1;
9696+
9697+ gnttab_entry(i) = gnttab_free_head;
9698+ gnttab_free_head = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
9699+ gnttab_free_count += extra_entries;
9700+
9701+ nr_grant_frames = new_nr_grant_frames;
9702+
9703+ check_free_callbacks();
9704+
9705+ return 0;
9706+
9707+grow_nomem:
9708+ for ( ; i >= nr_glist_frames; i--)
9709+ free_page((unsigned long) gnttab_list[i]);
9710+ return -ENOMEM;
9711+}
9712+
9713+static unsigned int __max_nr_grant_frames(void)
9714+{
9715+ struct gnttab_query_size query;
9716+ int rc;
9717+
9718+ query.dom = DOMID_SELF;
9719+
9720+ rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
9721+ if ((rc < 0) || (query.status != GNTST_okay))
9722+ return 4; /* Legacy max supported number of frames */
9723+
9724+ return query.max_nr_frames;
9725+}
9726+
9727+static inline unsigned int max_nr_grant_frames(void)
9728+{
9729+ unsigned int xen_max = __max_nr_grant_frames();
9730+
9731+ if (xen_max > boot_max_nr_grant_frames)
9732+ return boot_max_nr_grant_frames;
9733+ return xen_max;
9734+}
9735+
9736+#ifdef CONFIG_XEN
9737+
9738+static DEFINE_SEQLOCK(gnttab_dma_lock);
9739+
9740+#ifdef CONFIG_X86
9741+static int map_pte_fn(pte_t *pte, struct page *pmd_page,
9742+ unsigned long addr, void *data)
9743+{
9744+ unsigned long **frames = (unsigned long **)data;
9745+
9746+ set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
9747+ (*frames)++;
9748+ return 0;
9749+}
9750+
9751+static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
9752+ unsigned long addr, void *data)
9753+{
9754+
9755+ set_pte_at(&init_mm, addr, pte, __pte(0));
9756+ return 0;
9757+}
9758+
9759+void *arch_gnttab_alloc_shared(unsigned long *frames)
9760+{
9761+ struct vm_struct *area;
9762+ area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
9763+ BUG_ON(area == NULL);
9764+ return area->addr;
9765+}
9766+#endif /* CONFIG_X86 */
9767+
9768+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
9769+{
9770+ struct gnttab_setup_table setup;
9771+ unsigned long *frames;
9772+ unsigned int nr_gframes = end_idx + 1;
9773+ int rc;
9774+
9775+ frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
9776+ if (!frames)
9777+ return -ENOMEM;
9778+
9779+ setup.dom = DOMID_SELF;
9780+ setup.nr_frames = nr_gframes;
9781+ set_xen_guest_handle(setup.frame_list, frames);
9782+
9783+ rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
9784+ if (rc == -ENOSYS) {
9785+ kfree(frames);
9786+ return -ENOSYS;
9787+ }
9788+
9789+ BUG_ON(rc || setup.status);
9790+
9791+ if (shared == NULL)
9792+ shared = arch_gnttab_alloc_shared(frames);
9793+
9794+#ifdef CONFIG_X86
9795+ rc = apply_to_page_range(&init_mm, (unsigned long)shared,
9796+ PAGE_SIZE * nr_gframes,
9797+ map_pte_fn, &frames);
9798+ BUG_ON(rc);
9799+ frames -= nr_gframes; /* adjust after map_pte_fn() */
9800+#endif /* CONFIG_X86 */
9801+
9802+ kfree(frames);
9803+
9804+ return 0;
9805+}
9806+
9807+static void gnttab_page_free(struct page *page)
9808+{
9809+ ClearPageForeign(page);
9810+ gnttab_reset_grant_page(page);
9811+ put_page(page);
9812+}
9813+
9814+/*
9815+ * Must not be called with IRQs off. This should only be used on the
9816+ * slow path.
9817+ *
9818+ * Copy a foreign granted page to local memory.
9819+ */
9820+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
9821+{
9822+ struct gnttab_unmap_and_replace unmap;
9823+ mmu_update_t mmu;
9824+ struct page *page;
9825+ struct page *new_page;
9826+ void *new_addr;
9827+ void *addr;
9828+ paddr_t pfn;
9829+ maddr_t mfn;
9830+ maddr_t new_mfn;
9831+ int err;
9832+
9833+ page = *pagep;
9834+ if (!get_page_unless_zero(page))
9835+ return -ENOENT;
9836+
9837+ err = -ENOMEM;
9838+ new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
9839+ if (!new_page)
9840+ goto out;
9841+
9842+ new_addr = page_address(new_page);
9843+ addr = page_address(page);
9844+ memcpy(new_addr, addr, PAGE_SIZE);
9845+
9846+ pfn = page_to_pfn(page);
9847+ mfn = pfn_to_mfn(pfn);
9848+ new_mfn = virt_to_mfn(new_addr);
9849+
9850+ write_seqlock(&gnttab_dma_lock);
9851+
9852+ /* Make seq visible before checking page_mapped. */
9853+ smp_mb();
9854+
9855+ /* Has the page been DMA-mapped? */
9856+ if (unlikely(page_mapped(page))) {
9857+ write_sequnlock(&gnttab_dma_lock);
9858+ put_page(new_page);
9859+ err = -EBUSY;
9860+ goto out;
9861+ }
9862+
9863+ if (!xen_feature(XENFEAT_auto_translated_physmap))
9864+ set_phys_to_machine(pfn, new_mfn);
9865+
9866+ gnttab_set_replace_op(&unmap, (unsigned long)addr,
9867+ (unsigned long)new_addr, ref);
9868+
9869+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
9870+ &unmap, 1);
9871+ BUG_ON(err);
9872+ BUG_ON(unmap.status);
9873+
9874+ write_sequnlock(&gnttab_dma_lock);
9875+
9876+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
9877+ set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
9878+
9879+ mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
9880+ mmu.val = pfn;
9881+ err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
9882+ BUG_ON(err);
9883+ }
9884+
9885+ new_page->mapping = page->mapping;
9886+ new_page->index = page->index;
9887+ set_bit(PG_foreign, &new_page->flags);
9888+ *pagep = new_page;
9889+
9890+ SetPageForeign(page, gnttab_page_free);
9891+ page->mapping = NULL;
9892+
9893+out:
9894+ put_page(page);
9895+ return err;
9896+}
9897+EXPORT_SYMBOL_GPL(gnttab_copy_grant_page);
9898+
9899+void gnttab_reset_grant_page(struct page *page)
9900+{
9901+ init_page_count(page);
9902+ reset_page_mapcount(page);
9903+}
9904+EXPORT_SYMBOL_GPL(gnttab_reset_grant_page);
9905+
9906+/*
9907+ * Keep track of foreign pages marked as PageForeign so that we don't
9908+ * return them to the remote domain prematurely.
9909+ *
9910+ * PageForeign pages are pinned down by increasing their mapcount.
9911+ *
9912+ * All other pages are simply returned as is.
9913+ */
9914+void __gnttab_dma_map_page(struct page *page)
9915+{
9916+ unsigned int seq;
9917+
9918+ if (!is_running_on_xen() || !PageForeign(page))
9919+ return;
9920+
9921+ do {
9922+ seq = read_seqbegin(&gnttab_dma_lock);
9923+
9924+ if (gnttab_dma_local_pfn(page))
9925+ break;
9926+
9927+ atomic_set(&page->_mapcount, 0);
9928+
9929+ /* Make _mapcount visible before read_seqretry. */
9930+ smp_mb();
9931+ } while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
9932+}
9933+
9934+int gnttab_resume(void)
9935+{
9936+ if (max_nr_grant_frames() < nr_grant_frames)
9937+ return -ENOSYS;
9938+ return gnttab_map(0, nr_grant_frames - 1);
9939+}
9940+
9941+int gnttab_suspend(void)
9942+{
9943+#ifdef CONFIG_X86
9944+ apply_to_page_range(&init_mm, (unsigned long)shared,
9945+ PAGE_SIZE * nr_grant_frames,
9946+ unmap_pte_fn, NULL);
9947+#endif
9948+ return 0;
9949+}
9950+
9951+#else /* !CONFIG_XEN */
9952+
9953+#include <platform-pci.h>
9954+
9955+static unsigned long resume_frames;
9956+
9957+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
9958+{
9959+ struct xen_add_to_physmap xatp;
9960+ unsigned int i = end_idx;
9961+
9962+ /* Loop backwards, so that the first hypercall has the largest index,
9963+ * ensuring that the table will grow only once.
9964+ */
9965+ do {
9966+ xatp.domid = DOMID_SELF;
9967+ xatp.idx = i;
9968+ xatp.space = XENMAPSPACE_grant_table;
9969+ xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
9970+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
9971+ BUG();
9972+ } while (i-- > start_idx);
9973+
9974+ return 0;
9975+}
9976+
9977+int gnttab_resume(void)
9978+{
9979+ unsigned int max_nr_gframes, nr_gframes;
9980+
9981+ nr_gframes = nr_grant_frames;
9982+ max_nr_gframes = max_nr_grant_frames();
9983+ if (max_nr_gframes < nr_gframes)
9984+ return -ENOSYS;
9985+
9986+ if (!resume_frames) {
9987+ resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
9988+ shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
9989+ if (shared == NULL) {
9990+ printk("error to ioremap gnttab share frames\n");
9991+ return -1;
9992+ }
9993+ }
9994+
9995+ gnttab_map(0, nr_gframes - 1);
9996+
9997+ return 0;
9998+}
9999+
10000+#endif /* !CONFIG_XEN */
10001+
10002+static int gnttab_expand(unsigned int req_entries)
10003+{
10004+ int rc;
10005+ unsigned int cur, extra;
10006+
10007+ cur = nr_grant_frames;
10008+ extra = ((req_entries + (ENTRIES_PER_GRANT_FRAME-1)) /
10009+ ENTRIES_PER_GRANT_FRAME);
10010+ if (cur + extra > max_nr_grant_frames())
10011+ return -ENOSPC;
10012+
10013+ if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
10014+ rc = grow_gnttab_list(extra);
10015+
10016+ return rc;
10017+}
10018+
10019+int __devinit gnttab_init(void)
10020+{
10021+ int i;
10022+ unsigned int max_nr_glist_frames, nr_glist_frames;
10023+ unsigned int nr_init_grefs;
10024+
10025+ if (!is_running_on_xen())
10026+ return -ENODEV;
10027+
10028+ nr_grant_frames = 1;
10029+ boot_max_nr_grant_frames = __max_nr_grant_frames();
10030+
10031+ /* Determine the maximum number of frames required for the
10032+ * grant reference free list on the current hypervisor.
10033+ */
10034+ max_nr_glist_frames = nr_freelist_frames(boot_max_nr_grant_frames);
10035+
10036+ gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
10037+ GFP_KERNEL);
10038+ if (gnttab_list == NULL)
10039+ return -ENOMEM;
10040+
10041+ nr_glist_frames = nr_freelist_frames(nr_grant_frames);
10042+ for (i = 0; i < nr_glist_frames; i++) {
10043+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
10044+ if (gnttab_list[i] == NULL)
10045+ goto ini_nomem;
10046+ }
10047+
10048+ if (gnttab_resume() < 0)
10049+ return -ENODEV;
10050+
10051+ nr_init_grefs = nr_grant_frames * ENTRIES_PER_GRANT_FRAME;
10052+
10053+ for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
10054+ gnttab_entry(i) = i + 1;
10055+
10056+ gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
10057+ gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
10058+ gnttab_free_head = NR_RESERVED_ENTRIES;
10059+
10060+ return 0;
10061+
10062+ ini_nomem:
10063+ for (i--; i >= 0; i--)
10064+ free_page((unsigned long)gnttab_list[i]);
10065+ kfree(gnttab_list);
10066+ return -ENOMEM;
10067+}
10068+
10069+#ifdef CONFIG_XEN
10070+core_initcall(gnttab_init);
10071+#endif
10072Index: head-2008-11-25/drivers/xen/core/hypervisor_sysfs.c
10073===================================================================
10074--- /dev/null 1970-01-01 00:00:00.000000000 +0000
10075+++ head-2008-11-25/drivers/xen/core/hypervisor_sysfs.c 2007-07-10 09:42:30.000000000 +0200
10076@@ -0,0 +1,57 @@
10077+/*
10078+ * copyright (c) 2006 IBM Corporation
10079+ * Authored by: Mike D. Day <ncmike@us.ibm.com>
10080+ *
10081+ * This program is free software; you can redistribute it and/or modify
10082+ * it under the terms of the GNU General Public License version 2 as
10083+ * published by the Free Software Foundation.
10084+ */
10085+
10086+#include <linux/kernel.h>
10087+#include <linux/module.h>
10088+#include <linux/kobject.h>
10089+#include <xen/hypervisor_sysfs.h>
10090+#include <asm/hypervisor.h>
10091+
10092+static ssize_t hyp_sysfs_show(struct kobject *kobj,
10093+ struct attribute *attr,
10094+ char *buffer)
10095+{
10096+ struct hyp_sysfs_attr *hyp_attr;
10097+ hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
10098+ if (hyp_attr->show)
10099+ return hyp_attr->show(hyp_attr, buffer);
10100+ return 0;
10101+}
10102+
10103+static ssize_t hyp_sysfs_store(struct kobject *kobj,
10104+ struct attribute *attr,
10105+ const char *buffer,
10106+ size_t len)
10107+{
10108+ struct hyp_sysfs_attr *hyp_attr;
10109+ hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
10110+ if (hyp_attr->store)
10111+ return hyp_attr->store(hyp_attr, buffer, len);
10112+ return 0;
10113+}
10114+
10115+static struct sysfs_ops hyp_sysfs_ops = {
10116+ .show = hyp_sysfs_show,
10117+ .store = hyp_sysfs_store,
10118+};
10119+
10120+static struct kobj_type hyp_sysfs_kobj_type = {
10121+ .sysfs_ops = &hyp_sysfs_ops,
10122+};
10123+
10124+static int __init hypervisor_subsys_init(void)
10125+{
10126+ if (!is_running_on_xen())
10127+ return -ENODEV;
10128+
10129+ hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
10130+ return 0;
10131+}
10132+
10133+device_initcall(hypervisor_subsys_init);
10134Index: head-2008-11-25/drivers/xen/core/machine_kexec.c
10135===================================================================
10136--- /dev/null 1970-01-01 00:00:00.000000000 +0000
10137+++ head-2008-11-25/drivers/xen/core/machine_kexec.c 2008-10-13 13:43:45.000000000 +0200
10138@@ -0,0 +1,222 @@
10139+/*
10140+ * drivers/xen/core/machine_kexec.c
10141+ * handle transition of Linux booting another kernel
10142+ */
10143+
10144+#include <linux/kexec.h>
10145+#include <xen/interface/kexec.h>
10146+#include <linux/mm.h>
10147+#include <linux/bootmem.h>
10148+
10149+extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki,
10150+ struct kimage *image);
10151+extern int machine_kexec_setup_resources(struct resource *hypervisor,
10152+ struct resource *phys_cpus,
10153+ int nr_phys_cpus);
10154+extern void machine_kexec_register_resources(struct resource *res);
10155+
10156+static int __initdata xen_max_nr_phys_cpus;
10157+static struct resource xen_hypervisor_res;
10158+static struct resource *xen_phys_cpus;
10159+
10160+size_t vmcoreinfo_size_xen;
10161+unsigned long paddr_vmcoreinfo_xen;
10162+
10163+void __init xen_machine_kexec_setup_resources(void)
10164+{
10165+ xen_kexec_range_t range;
10166+ struct resource *res;
10167+ int k = 0;
10168+ int rc;
10169+
10170+ if (!is_initial_xendomain())
10171+ return;
10172+
10173+ /* determine maximum number of physical cpus */
10174+
10175+ while (1) {
10176+ memset(&range, 0, sizeof(range));
10177+ range.range = KEXEC_RANGE_MA_CPU;
10178+ range.nr = k;
10179+
10180+ if(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10181+ break;
10182+
10183+ k++;
10184+ }
10185+
10186+ if (k == 0)
10187+ return;
10188+
10189+ xen_max_nr_phys_cpus = k;
10190+
10191+ /* allocate xen_phys_cpus */
10192+
10193+ xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource));
10194+ BUG_ON(xen_phys_cpus == NULL);
10195+
10196+ /* fill in xen_phys_cpus with per-cpu crash note information */
10197+
10198+ for (k = 0; k < xen_max_nr_phys_cpus; k++) {
10199+ memset(&range, 0, sizeof(range));
10200+ range.range = KEXEC_RANGE_MA_CPU;
10201+ range.nr = k;
10202+
10203+ if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10204+ goto err;
10205+
10206+ res = xen_phys_cpus + k;
10207+
10208+ memset(res, 0, sizeof(*res));
10209+ res->name = "Crash note";
10210+ res->start = range.start;
10211+ res->end = range.start + range.size - 1;
10212+ res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
10213+ }
10214+
10215+ /* fill in xen_hypervisor_res with hypervisor machine address range */
10216+
10217+ memset(&range, 0, sizeof(range));
10218+ range.range = KEXEC_RANGE_MA_XEN;
10219+
10220+ if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10221+ goto err;
10222+
10223+ xen_hypervisor_res.name = "Hypervisor code and data";
10224+ xen_hypervisor_res.start = range.start;
10225+ xen_hypervisor_res.end = range.start + range.size - 1;
10226+ xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
10227+
10228+ /* fill in crashk_res if range is reserved by hypervisor */
10229+
10230+ memset(&range, 0, sizeof(range));
10231+ range.range = KEXEC_RANGE_MA_CRASH;
10232+
10233+ if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10234+ goto err;
10235+
10236+ if (range.size) {
10237+ crashk_res.start = range.start;
10238+ crashk_res.end = range.start + range.size - 1;
10239+ }
10240+
10241+ /* get physical address of vmcoreinfo */
10242+ memset(&range, 0, sizeof(range));
10243+ range.range = KEXEC_RANGE_MA_VMCOREINFO;
10244+
10245+ rc = HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range);
10246+
10247+ if (rc == 0) {
10248+ /* Hypercall succeeded */
10249+ vmcoreinfo_size_xen = range.size;
10250+ paddr_vmcoreinfo_xen = range.start;
10251+
10252+ } else {
10253+ /* Hypercall failed.
10254+ * Indicate not to create sysfs file by resetting globals
10255+ */
10256+ vmcoreinfo_size_xen = 0;
10257+ paddr_vmcoreinfo_xen = 0;
10258+
10259+ /* The KEXEC_CMD_kexec_get_range hypercall did not implement
10260+ * KEXEC_RANGE_MA_VMCOREINFO until Xen 3.3.
10261+ * Do not bail out if it fails for this reason.
10262+ */
10263+ if (rc != -EINVAL)
10264+ return;
10265+ }
10266+
10267+ if (machine_kexec_setup_resources(&xen_hypervisor_res, xen_phys_cpus,
10268+ xen_max_nr_phys_cpus))
10269+ goto err;
10270+
10271+ return;
10272+
10273+ err:
10274+ /*
10275+ * It isn't possible to free xen_phys_cpus this early in the
10276+ * boot. Failure at this stage is unexpected and the amount of
10277+ * memory is small therefore we tolerate the potential leak.
10278+ */
10279+ xen_max_nr_phys_cpus = 0;
10280+ return;
10281+}
10282+
10283+void __init xen_machine_kexec_register_resources(struct resource *res)
10284+{
10285+ request_resource(res, &xen_hypervisor_res);
10286+ machine_kexec_register_resources(res);
10287+}
10288+
10289+static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
10290+{
10291+ machine_kexec_setup_load_arg(xki, image);
10292+
10293+ xki->indirection_page = image->head;
10294+ xki->start_address = image->start;
10295+}
10296+
10297+/*
10298+ * Load the image into xen so xen can kdump itself
10299+ * This might have been done in prepare, but prepare
10300+ * is currently called too early. It might make sense
10301+ * to move prepare, but for now, just add an extra hook.
10302+ */
10303+int xen_machine_kexec_load(struct kimage *image)
10304+{
10305+ xen_kexec_load_t xkl;
10306+
10307+ memset(&xkl, 0, sizeof(xkl));
10308+ xkl.type = image->type;
10309+ setup_load_arg(&xkl.image, image);
10310+ return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl);
10311+}
10312+
10313+/*
10314+ * Unload the image that was stored by machine_kexec_load()
10315+ * This might have been done in machine_kexec_cleanup() but it
10316+ * is called too late, and its possible xen could try and kdump
10317+ * using resources that have been freed.
10318+ */
10319+void xen_machine_kexec_unload(struct kimage *image)
10320+{
10321+ xen_kexec_load_t xkl;
10322+
10323+ memset(&xkl, 0, sizeof(xkl));
10324+ xkl.type = image->type;
10325+ WARN_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl));
10326+}
10327+
10328+/*
10329+ * Do not allocate memory (or fail in any way) in machine_kexec().
10330+ * We are past the point of no return, committed to rebooting now.
10331+ *
10332+ * This has the hypervisor move to the prefered reboot CPU,
10333+ * stop all CPUs and kexec. That is it combines machine_shutdown()
10334+ * and machine_kexec() in Linux kexec terms.
10335+ */
10336+NORET_TYPE void machine_kexec(struct kimage *image)
10337+{
10338+ xen_kexec_exec_t xke;
10339+
10340+ memset(&xke, 0, sizeof(xke));
10341+ xke.type = image->type;
10342+ VOID(HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke));
10343+ panic("KEXEC_CMD_kexec hypercall should not return\n");
10344+}
10345+
10346+void machine_shutdown(void)
10347+{
10348+ /* do nothing */
10349+}
10350+
10351+
10352+/*
10353+ * Local variables:
10354+ * c-file-style: "linux"
10355+ * indent-tabs-mode: t
10356+ * c-indent-level: 8
10357+ * c-basic-offset: 8
10358+ * tab-width: 8
10359+ * End:
10360+ */
10361Index: head-2008-11-25/drivers/xen/core/machine_reboot.c
10362===================================================================
10363--- /dev/null 1970-01-01 00:00:00.000000000 +0000
10364+++ head-2008-11-25/drivers/xen/core/machine_reboot.c 2008-09-01 12:07:31.000000000 +0200
10365@@ -0,0 +1,247 @@
10366+#include <linux/version.h>
10367+#include <linux/kernel.h>
10368+#include <linux/mm.h>
10369+#include <linux/unistd.h>
10370+#include <linux/module.h>
10371+#include <linux/reboot.h>
10372+#include <linux/sysrq.h>
10373+#include <linux/stringify.h>
10374+#include <linux/stop_machine.h>
10375+#include <asm/irq.h>
10376+#include <asm/mmu_context.h>
10377+#include <xen/evtchn.h>
10378+#include <asm/hypervisor.h>
10379+#include <xen/xenbus.h>
10380+#include <linux/cpu.h>
10381+#include <xen/gnttab.h>
10382+#include <xen/xencons.h>
10383+#include <xen/cpu_hotplug.h>
10384+#include <xen/interface/vcpu.h>
10385+
10386+#if defined(__i386__) || defined(__x86_64__)
10387+
10388+/*
10389+ * Power off function, if any
10390+ */
10391+void (*pm_power_off)(void);
10392+EXPORT_SYMBOL(pm_power_off);
10393+
10394+void machine_emergency_restart(void)
10395+{
10396+ /* We really want to get pending console data out before we die. */
10397+ xencons_force_flush();
10398+ HYPERVISOR_shutdown(SHUTDOWN_reboot);
10399+}
10400+
10401+void machine_restart(char * __unused)
10402+{
10403+ machine_emergency_restart();
10404+}
10405+
10406+void machine_halt(void)
10407+{
10408+ machine_power_off();
10409+}
10410+
10411+void machine_power_off(void)
10412+{
10413+ /* We really want to get pending console data out before we die. */
10414+ xencons_force_flush();
10415+ if (pm_power_off)
10416+ pm_power_off();
10417+ HYPERVISOR_shutdown(SHUTDOWN_poweroff);
10418+}
10419+
10420+int reboot_thru_bios = 0; /* for dmi_scan.c */
10421+EXPORT_SYMBOL(machine_restart);
10422+EXPORT_SYMBOL(machine_halt);
10423+EXPORT_SYMBOL(machine_power_off);
10424+
10425+static void pre_suspend(void)
10426+{
10427+ HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
10428+ WARN_ON(HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
10429+ __pte_ma(0), 0));
10430+
10431+ xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
10432+ xen_start_info->console.domU.mfn =
10433+ mfn_to_pfn(xen_start_info->console.domU.mfn);
10434+}
10435+
10436+static void post_suspend(int suspend_cancelled)
10437+{
10438+ int i, j, k, fpp;
10439+ unsigned long shinfo_mfn;
10440+ extern unsigned long max_pfn;
10441+ extern unsigned long *pfn_to_mfn_frame_list_list;
10442+ extern unsigned long *pfn_to_mfn_frame_list[];
10443+
10444+ if (suspend_cancelled) {
10445+ xen_start_info->store_mfn =
10446+ pfn_to_mfn(xen_start_info->store_mfn);
10447+ xen_start_info->console.domU.mfn =
10448+ pfn_to_mfn(xen_start_info->console.domU.mfn);
10449+ } else {
10450+#ifdef CONFIG_SMP
10451+ cpu_initialized_map = cpu_online_map;
10452+#endif
10453+ }
10454+
10455+ shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
10456+ if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
10457+ pfn_pte_ma(shinfo_mfn, PAGE_KERNEL),
10458+ 0))
10459+ BUG();
10460+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
10461+
10462+ memset(empty_zero_page, 0, PAGE_SIZE);
10463+
10464+ fpp = PAGE_SIZE/sizeof(unsigned long);
10465+ for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
10466+ if ((j % fpp) == 0) {
10467+ k++;
10468+ pfn_to_mfn_frame_list_list[k] =
10469+ virt_to_mfn(pfn_to_mfn_frame_list[k]);
10470+ j = 0;
10471+ }
10472+ pfn_to_mfn_frame_list[k][j] =
10473+ virt_to_mfn(&phys_to_machine_mapping[i]);
10474+ }
10475+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
10476+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
10477+ virt_to_mfn(pfn_to_mfn_frame_list_list);
10478+}
10479+
10480+#else /* !(defined(__i386__) || defined(__x86_64__)) */
10481+
10482+#ifndef HAVE_XEN_PRE_SUSPEND
10483+#define xen_pre_suspend() ((void)0)
10484+#endif
10485+
10486+#ifndef HAVE_XEN_POST_SUSPEND
10487+#define xen_post_suspend(x) ((void)0)
10488+#endif
10489+
10490+#define switch_idle_mm() ((void)0)
10491+#define mm_pin_all() ((void)0)
10492+#define pre_suspend() xen_pre_suspend()
10493+#define post_suspend(x) xen_post_suspend(x)
10494+
10495+#endif
10496+
10497+struct suspend {
10498+ int fast_suspend;
10499+ void (*resume_notifier)(int);
10500+};
10501+
10502+static int take_machine_down(void *_suspend)
10503+{
10504+ struct suspend *suspend = _suspend;
10505+ int suspend_cancelled, err;
10506+ extern void time_resume(void);
10507+
10508+ if (suspend->fast_suspend) {
10509+ BUG_ON(!irqs_disabled());
10510+ } else {
10511+ BUG_ON(irqs_disabled());
10512+
10513+ for (;;) {
10514+ err = smp_suspend();
10515+ if (err)
10516+ return err;
10517+
10518+ xenbus_suspend();
10519+ preempt_disable();
10520+
10521+ if (num_online_cpus() == 1)
10522+ break;
10523+
10524+ preempt_enable();
10525+ xenbus_suspend_cancel();
10526+ }
10527+
10528+ local_irq_disable();
10529+ }
10530+
10531+ mm_pin_all();
10532+ gnttab_suspend();
10533+ pre_suspend();
10534+
10535+ /*
10536+ * This hypercall returns 1 if suspend was cancelled or the domain was
10537+ * merely checkpointed, and 0 if it is resuming in a new domain.
10538+ */
10539+ suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
10540+
10541+ suspend->resume_notifier(suspend_cancelled);
10542+ post_suspend(suspend_cancelled);
10543+ gnttab_resume();
10544+ if (!suspend_cancelled) {
10545+ irq_resume();
10546+#ifdef __x86_64__
10547+ /*
10548+ * Older versions of Xen do not save/restore the user %cr3.
10549+ * We do it here just in case, but there's no need if we are
10550+ * in fast-suspend mode as that implies a new enough Xen.
10551+ */
10552+ if (!suspend->fast_suspend)
10553+ xen_new_user_pt(__pa(__user_pgd(
10554+ current->active_mm->pgd)));
10555+#endif
10556+ }
10557+ time_resume();
10558+
10559+ if (!suspend->fast_suspend)
10560+ local_irq_enable();
10561+
10562+ return suspend_cancelled;
10563+}
10564+
10565+int __xen_suspend(int fast_suspend, void (*resume_notifier)(int))
10566+{
10567+ int err, suspend_cancelled;
10568+ struct suspend suspend;
10569+
10570+ BUG_ON(smp_processor_id() != 0);
10571+ BUG_ON(in_interrupt());
10572+
10573+#if defined(__i386__) || defined(__x86_64__)
10574+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
10575+ printk(KERN_WARNING "Cannot suspend in "
10576+ "auto_translated_physmap mode.\n");
10577+ return -EOPNOTSUPP;
10578+ }
10579+#endif
10580+
10581+ /* If we are definitely UP then 'slow mode' is actually faster. */
10582+ if (num_possible_cpus() == 1)
10583+ fast_suspend = 0;
10584+
10585+ suspend.fast_suspend = fast_suspend;
10586+ suspend.resume_notifier = resume_notifier;
10587+
10588+ if (fast_suspend) {
10589+ xenbus_suspend();
10590+ err = stop_machine_run(take_machine_down, &suspend, 0);
10591+ if (err < 0)
10592+ xenbus_suspend_cancel();
10593+ } else {
10594+ err = take_machine_down(&suspend);
10595+ }
10596+
10597+ if (err < 0)
10598+ return err;
10599+
10600+ suspend_cancelled = err;
10601+ if (!suspend_cancelled) {
10602+ xencons_resume();
10603+ xenbus_resume();
10604+ } else {
10605+ xenbus_suspend_cancel();
10606+ }
10607+
10608+ if (!fast_suspend)
10609+ smp_resume();
10610+
10611+ return 0;
10612+}
10613Index: head-2008-11-25/drivers/xen/core/pci.c
10614===================================================================
10615--- /dev/null 1970-01-01 00:00:00.000000000 +0000
10616+++ head-2008-11-25/drivers/xen/core/pci.c 2008-11-10 11:44:21.000000000 +0100
10617@@ -0,0 +1,59 @@
10618+/*
10619+ * vim:shiftwidth=8:noexpandtab
10620+ */
10621+
10622+#include <linux/kernel.h>
10623+#include <linux/init.h>
10624+#include <linux/pci.h>
10625+#include <xen/interface/physdev.h>
10626+
10627+static int (*pci_bus_probe)(struct device *dev);
10628+static int (*pci_bus_remove)(struct device *dev);
10629+
10630+static int pci_bus_probe_wrapper(struct device *dev)
10631+{
10632+ int r;
10633+ struct pci_dev *pci_dev = to_pci_dev(dev);
10634+ struct physdev_manage_pci manage_pci;
10635+ manage_pci.bus = pci_dev->bus->number;
10636+ manage_pci.devfn = pci_dev->devfn;
10637+
10638+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci);
10639+ if (r && r != -ENOSYS)
10640+ return r;
10641+
10642+ r = pci_bus_probe(dev);
10643+ return r;
10644+}
10645+
10646+static int pci_bus_remove_wrapper(struct device *dev)
10647+{
10648+ int r;
10649+ struct pci_dev *pci_dev = to_pci_dev(dev);
10650+ struct physdev_manage_pci manage_pci;
10651+ manage_pci.bus = pci_dev->bus->number;
10652+ manage_pci.devfn = pci_dev->devfn;
10653+
10654+ r = pci_bus_remove(dev);
10655+ /* dev and pci_dev are no longer valid!! */
10656+
10657+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
10658+ &manage_pci));
10659+ return r;
10660+}
10661+
10662+static int __init hook_pci_bus(void)
10663+{
10664+ if (!is_running_on_xen() || !is_initial_xendomain())
10665+ return 0;
10666+
10667+ pci_bus_probe = pci_bus_type.probe;
10668+ pci_bus_type.probe = pci_bus_probe_wrapper;
10669+
10670+ pci_bus_remove = pci_bus_type.remove;
10671+ pci_bus_type.remove = pci_bus_remove_wrapper;
10672+
10673+ return 0;
10674+}
10675+
10676+core_initcall(hook_pci_bus);
10677Index: head-2008-11-25/drivers/xen/core/reboot.c
10678===================================================================
10679--- /dev/null 1970-01-01 00:00:00.000000000 +0000
10680+++ head-2008-11-25/drivers/xen/core/reboot.c 2008-08-07 12:44:36.000000000 +0200
10681@@ -0,0 +1,335 @@
10682+#define __KERNEL_SYSCALLS__
10683+#include <linux/version.h>
10684+#include <linux/kernel.h>
10685+#include <linux/unistd.h>
10686+#include <linux/module.h>
10687+#include <linux/reboot.h>
10688+#include <linux/sysrq.h>
10689+#include <asm/hypervisor.h>
10690+#include <xen/xenbus.h>
10691+#include <xen/evtchn.h>
10692+#include <linux/kmod.h>
10693+#include <linux/slab.h>
10694+#include <linux/workqueue.h>
10695+
10696+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
10697+#include <xen/platform-compat.h>
10698+#endif
10699+
10700+MODULE_LICENSE("Dual BSD/GPL");
10701+
10702+#define SHUTDOWN_INVALID -1
10703+#define SHUTDOWN_POWEROFF 0
10704+#define SHUTDOWN_SUSPEND 2
10705+#define SHUTDOWN_RESUMING 3
10706+#define SHUTDOWN_HALT 4
10707+
10708+/* Ignore multiple shutdown requests. */
10709+static int shutting_down = SHUTDOWN_INVALID;
10710+
10711+/* Was last suspend request cancelled? */
10712+static int suspend_cancelled;
10713+
10714+/* Can we leave APs online when we suspend? */
10715+static int fast_suspend;
10716+
10717+static void __shutdown_handler(void *unused);
10718+static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
10719+
10720+static int setup_suspend_evtchn(void);
10721+
10722+int __xen_suspend(int fast_suspend, void (*resume_notifier)(int));
10723+
10724+static int shutdown_process(void *__unused)
10725+{
10726+ static char *envp[] = { "HOME=/", "TERM=linux",
10727+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
10728+ static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
10729+
10730+ extern asmlinkage long sys_reboot(int magic1, int magic2,
10731+ unsigned int cmd, void *arg);
10732+
10733+ if ((shutting_down == SHUTDOWN_POWEROFF) ||
10734+ (shutting_down == SHUTDOWN_HALT)) {
10735+ if (call_usermodehelper("/sbin/poweroff", poweroff_argv,
10736+ envp, 0) < 0) {
10737+#ifdef CONFIG_XEN
10738+ sys_reboot(LINUX_REBOOT_MAGIC1,
10739+ LINUX_REBOOT_MAGIC2,
10740+ LINUX_REBOOT_CMD_POWER_OFF,
10741+ NULL);
10742+#endif /* CONFIG_XEN */
10743+ }
10744+ }
10745+
10746+ shutting_down = SHUTDOWN_INVALID; /* could try again */
10747+
10748+ return 0;
10749+}
10750+
10751+static void xen_resume_notifier(int _suspend_cancelled)
10752+{
10753+ int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING);
10754+ BUG_ON(old_state != SHUTDOWN_SUSPEND);
10755+ suspend_cancelled = _suspend_cancelled;
10756+}
10757+
10758+static int xen_suspend(void *__unused)
10759+{
10760+ int err, old_state;
10761+
10762+ daemonize("suspend");
10763+ err = set_cpus_allowed(current, cpumask_of_cpu(0));
10764+ if (err) {
10765+ printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err);
10766+ goto fail;
10767+ }
10768+
10769+ do {
10770+ err = __xen_suspend(fast_suspend, xen_resume_notifier);
10771+ if (err) {
10772+ printk(KERN_ERR "Xen suspend failed (%d)\n", err);
10773+ goto fail;
10774+ }
10775+ if (!suspend_cancelled)
10776+ setup_suspend_evtchn();
10777+ old_state = cmpxchg(
10778+ &shutting_down, SHUTDOWN_RESUMING, SHUTDOWN_INVALID);
10779+ } while (old_state == SHUTDOWN_SUSPEND);
10780+
10781+ switch (old_state) {
10782+ case SHUTDOWN_INVALID:
10783+ case SHUTDOWN_SUSPEND:
10784+ BUG();
10785+ case SHUTDOWN_RESUMING:
10786+ break;
10787+ default:
10788+ schedule_work(&shutdown_work);
10789+ break;
10790+ }
10791+
10792+ return 0;
10793+
10794+ fail:
10795+ old_state = xchg(&shutting_down, SHUTDOWN_INVALID);
10796+ BUG_ON(old_state != SHUTDOWN_SUSPEND);
10797+ return 0;
10798+}
10799+
10800+static void switch_shutdown_state(int new_state)
10801+{
10802+ int prev_state, old_state = SHUTDOWN_INVALID;
10803+
10804+ /* We only drive shutdown_state into an active state. */
10805+ if (new_state == SHUTDOWN_INVALID)
10806+ return;
10807+
10808+ do {
10809+ /* We drop this transition if already in an active state. */
10810+ if ((old_state != SHUTDOWN_INVALID) &&
10811+ (old_state != SHUTDOWN_RESUMING))
10812+ return;
10813+ /* Attempt to transition. */
10814+ prev_state = old_state;
10815+ old_state = cmpxchg(&shutting_down, old_state, new_state);
10816+ } while (old_state != prev_state);
10817+
10818+ /* Either we kick off the work, or we leave it to xen_suspend(). */
10819+ if (old_state == SHUTDOWN_INVALID)
10820+ schedule_work(&shutdown_work);
10821+ else
10822+ BUG_ON(old_state != SHUTDOWN_RESUMING);
10823+}
10824+
10825+static void __shutdown_handler(void *unused)
10826+{
10827+ int err;
10828+
10829+ err = kernel_thread((shutting_down == SHUTDOWN_SUSPEND) ?
10830+ xen_suspend : shutdown_process,
10831+ NULL, CLONE_FS | CLONE_FILES);
10832+
10833+ if (err < 0) {
10834+ printk(KERN_WARNING "Error creating shutdown process (%d): "
10835+ "retrying...\n", -err);
10836+ schedule_delayed_work(&shutdown_work, HZ/2);
10837+ }
10838+}
10839+
10840+static void shutdown_handler(struct xenbus_watch *watch,
10841+ const char **vec, unsigned int len)
10842+{
10843+ extern void ctrl_alt_del(void);
10844+ char *str;
10845+ struct xenbus_transaction xbt;
10846+ int err, new_state = SHUTDOWN_INVALID;
10847+
10848+ if ((shutting_down != SHUTDOWN_INVALID) &&
10849+ (shutting_down != SHUTDOWN_RESUMING))
10850+ return;
10851+
10852+ again:
10853+ err = xenbus_transaction_start(&xbt);
10854+ if (err)
10855+ return;
10856+
10857+ str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
10858+ /* Ignore read errors and empty reads. */
10859+ if (XENBUS_IS_ERR_READ(str)) {
10860+ xenbus_transaction_end(xbt, 1);
10861+ return;
10862+ }
10863+
10864+ xenbus_write(xbt, "control", "shutdown", "");
10865+
10866+ err = xenbus_transaction_end(xbt, 0);
10867+ if (err == -EAGAIN) {
10868+ kfree(str);
10869+ goto again;
10870+ }
10871+
10872+ if (strcmp(str, "poweroff") == 0)
10873+ new_state = SHUTDOWN_POWEROFF;
10874+ else if (strcmp(str, "reboot") == 0)
10875+ ctrl_alt_del();
10876+ else if (strcmp(str, "suspend") == 0)
10877+ new_state = SHUTDOWN_SUSPEND;
10878+ else if (strcmp(str, "halt") == 0)
10879+ new_state = SHUTDOWN_HALT;
10880+ else
10881+ printk("Ignoring shutdown request: %s\n", str);
10882+
10883+ switch_shutdown_state(new_state);
10884+
10885+ kfree(str);
10886+}
10887+
10888+static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
10889+ unsigned int len)
10890+{
10891+ char sysrq_key = '\0';
10892+ struct xenbus_transaction xbt;
10893+ int err;
10894+
10895+ again:
10896+ err = xenbus_transaction_start(&xbt);
10897+ if (err)
10898+ return;
10899+ if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
10900+ printk(KERN_ERR "Unable to read sysrq code in "
10901+ "control/sysrq\n");
10902+ xenbus_transaction_end(xbt, 1);
10903+ return;
10904+ }
10905+
10906+ if (sysrq_key != '\0')
10907+ xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
10908+
10909+ err = xenbus_transaction_end(xbt, 0);
10910+ if (err == -EAGAIN)
10911+ goto again;
10912+
10913+#ifdef CONFIG_MAGIC_SYSRQ
10914+ if (sysrq_key != '\0')
10915+ handle_sysrq(sysrq_key, NULL, NULL);
10916+#endif
10917+}
10918+
10919+static struct xenbus_watch shutdown_watch = {
10920+ .node = "control/shutdown",
10921+ .callback = shutdown_handler
10922+};
10923+
10924+static struct xenbus_watch sysrq_watch = {
10925+ .node = "control/sysrq",
10926+ .callback = sysrq_handler
10927+};
10928+
10929+static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
10930+{
10931+ switch_shutdown_state(SHUTDOWN_SUSPEND);
10932+ return IRQ_HANDLED;
10933+}
10934+
10935+static int setup_suspend_evtchn(void)
10936+{
10937+ static int irq;
10938+ int port;
10939+ char portstr[16];
10940+
10941+ if (irq > 0)
10942+ unbind_from_irqhandler(irq, NULL);
10943+
10944+ irq = bind_listening_port_to_irqhandler(0, suspend_int, 0, "suspend",
10945+ NULL);
10946+ if (irq <= 0)
10947+ return -1;
10948+
10949+ port = irq_to_evtchn_port(irq);
10950+ printk(KERN_INFO "suspend: event channel %d\n", port);
10951+ sprintf(portstr, "%d", port);
10952+ xenbus_write(XBT_NIL, "device/suspend", "event-channel", portstr);
10953+
10954+ return 0;
10955+}
10956+
10957+static int setup_shutdown_watcher(void)
10958+{
10959+ int err;
10960+
10961+ xenbus_scanf(XBT_NIL, "control",
10962+ "platform-feature-multiprocessor-suspend",
10963+ "%d", &fast_suspend);
10964+
10965+ err = register_xenbus_watch(&shutdown_watch);
10966+ if (err) {
10967+ printk(KERN_ERR "Failed to set shutdown watcher\n");
10968+ return err;
10969+ }
10970+
10971+ err = register_xenbus_watch(&sysrq_watch);
10972+ if (err) {
10973+ printk(KERN_ERR "Failed to set sysrq watcher\n");
10974+ return err;
10975+ }
10976+
10977+ /* suspend event channel */
10978+ err = setup_suspend_evtchn();
10979+ if (err) {
10980+ printk(KERN_ERR "Failed to register suspend event channel\n");
10981+ return err;
10982+ }
10983+
10984+ return 0;
10985+}
10986+
10987+#ifdef CONFIG_XEN
10988+
10989+static int shutdown_event(struct notifier_block *notifier,
10990+ unsigned long event,
10991+ void *data)
10992+{
10993+ setup_shutdown_watcher();
10994+ return NOTIFY_DONE;
10995+}
10996+
10997+static int __init setup_shutdown_event(void)
10998+{
10999+ static struct notifier_block xenstore_notifier = {
11000+ .notifier_call = shutdown_event
11001+ };
11002+ register_xenstore_notifier(&xenstore_notifier);
11003+
11004+ return 0;
11005+}
11006+
11007+subsys_initcall(setup_shutdown_event);
11008+
11009+#else /* !defined(CONFIG_XEN) */
11010+
11011+int xen_reboot_init(void)
11012+{
11013+ return setup_shutdown_watcher();
11014+}
11015+
11016+#endif /* !defined(CONFIG_XEN) */
11017Index: head-2008-11-25/drivers/xen/core/smpboot.c
11018===================================================================
11019--- /dev/null 1970-01-01 00:00:00.000000000 +0000
11020+++ head-2008-11-25/drivers/xen/core/smpboot.c 2008-03-06 08:54:32.000000000 +0100
11021@@ -0,0 +1,464 @@
11022+/*
11023+ * Xen SMP booting functions
11024+ *
11025+ * See arch/i386/kernel/smpboot.c for copyright and credits for derived
11026+ * portions of this file.
11027+ */
11028+
11029+#include <linux/module.h>
11030+#include <linux/init.h>
11031+#include <linux/kernel.h>
11032+#include <linux/mm.h>
11033+#include <linux/sched.h>
11034+#include <linux/kernel_stat.h>
11035+#include <linux/smp_lock.h>
11036+#include <linux/irq.h>
11037+#include <linux/bootmem.h>
11038+#include <linux/notifier.h>
11039+#include <linux/cpu.h>
11040+#include <linux/percpu.h>
11041+#include <asm/desc.h>
11042+#include <asm/arch_hooks.h>
11043+#include <asm/pgalloc.h>
11044+#include <xen/evtchn.h>
11045+#include <xen/interface/vcpu.h>
11046+#include <xen/cpu_hotplug.h>
11047+#include <xen/xenbus.h>
11048+
11049+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
11050+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
11051+
11052+extern int local_setup_timer(unsigned int cpu);
11053+extern void local_teardown_timer(unsigned int cpu);
11054+
11055+extern void hypervisor_callback(void);
11056+extern void failsafe_callback(void);
11057+extern void system_call(void);
11058+extern void smp_trap_init(trap_info_t *);
11059+
11060+/* Number of siblings per CPU package */
11061+int smp_num_siblings = 1;
11062+
11063+cpumask_t cpu_online_map;
11064+EXPORT_SYMBOL(cpu_online_map);
11065+cpumask_t cpu_possible_map;
11066+EXPORT_SYMBOL(cpu_possible_map);
11067+cpumask_t cpu_initialized_map;
11068+
11069+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
11070+EXPORT_SYMBOL(cpu_data);
11071+
11072+#ifdef CONFIG_HOTPLUG_CPU
11073+DEFINE_PER_CPU(int, cpu_state) = { 0 };
11074+#endif
11075+
11076+static DEFINE_PER_CPU(int, resched_irq);
11077+static DEFINE_PER_CPU(int, callfunc_irq);
11078+static char resched_name[NR_CPUS][15];
11079+static char callfunc_name[NR_CPUS][15];
11080+
11081+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
11082+
11083+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
11084+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
11085+EXPORT_SYMBOL(cpu_core_map);
11086+
11087+#if defined(__i386__)
11088+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
11089+EXPORT_SYMBOL(x86_cpu_to_apicid);
11090+#elif !defined(CONFIG_X86_IO_APIC)
11091+unsigned int maxcpus = NR_CPUS;
11092+#endif
11093+
11094+void __init prefill_possible_map(void)
11095+{
11096+ int i, rc;
11097+
11098+ for_each_possible_cpu(i)
11099+ if (i != smp_processor_id())
11100+ return;
11101+
11102+ for (i = 0; i < NR_CPUS; i++) {
11103+ rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
11104+ if (rc >= 0)
11105+ cpu_set(i, cpu_possible_map);
11106+ }
11107+}
11108+
11109+void __init smp_alloc_memory(void)
11110+{
11111+}
11112+
11113+static inline void
11114+set_cpu_sibling_map(unsigned int cpu)
11115+{
11116+ cpu_data[cpu].phys_proc_id = cpu;
11117+ cpu_data[cpu].cpu_core_id = 0;
11118+
11119+ cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
11120+ cpu_core_map[cpu] = cpumask_of_cpu(cpu);
11121+
11122+ cpu_data[cpu].booted_cores = 1;
11123+}
11124+
11125+static void
11126+remove_siblinginfo(unsigned int cpu)
11127+{
11128+ cpu_data[cpu].phys_proc_id = BAD_APICID;
11129+ cpu_data[cpu].cpu_core_id = BAD_APICID;
11130+
11131+ cpus_clear(cpu_sibling_map[cpu]);
11132+ cpus_clear(cpu_core_map[cpu]);
11133+
11134+ cpu_data[cpu].booted_cores = 0;
11135+}
11136+
11137+static int __cpuinit xen_smp_intr_init(unsigned int cpu)
11138+{
11139+ int rc;
11140+
11141+ per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
11142+
11143+ sprintf(resched_name[cpu], "resched%u", cpu);
11144+ rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
11145+ cpu,
11146+ smp_reschedule_interrupt,
11147+ SA_INTERRUPT,
11148+ resched_name[cpu],
11149+ NULL);
11150+ if (rc < 0)
11151+ goto fail;
11152+ per_cpu(resched_irq, cpu) = rc;
11153+
11154+ sprintf(callfunc_name[cpu], "callfunc%u", cpu);
11155+ rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
11156+ cpu,
11157+ smp_call_function_interrupt,
11158+ SA_INTERRUPT,
11159+ callfunc_name[cpu],
11160+ NULL);
11161+ if (rc < 0)
11162+ goto fail;
11163+ per_cpu(callfunc_irq, cpu) = rc;
11164+
11165+ if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
11166+ goto fail;
11167+
11168+ return 0;
11169+
11170+ fail:
11171+ if (per_cpu(resched_irq, cpu) >= 0)
11172+ unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
11173+ if (per_cpu(callfunc_irq, cpu) >= 0)
11174+ unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
11175+ return rc;
11176+}
11177+
11178+#ifdef CONFIG_HOTPLUG_CPU
11179+static void xen_smp_intr_exit(unsigned int cpu)
11180+{
11181+ if (cpu != 0)
11182+ local_teardown_timer(cpu);
11183+
11184+ unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
11185+ unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
11186+}
11187+#endif
11188+
11189+void __cpuinit cpu_bringup(void)
11190+{
11191+ cpu_init();
11192+ identify_cpu(cpu_data + smp_processor_id());
11193+ touch_softlockup_watchdog();
11194+ preempt_disable();
11195+ local_irq_enable();
11196+}
11197+
11198+static void __cpuinit cpu_bringup_and_idle(void)
11199+{
11200+ cpu_bringup();
11201+ cpu_idle();
11202+}
11203+
11204+static void __cpuinit cpu_initialize_context(unsigned int cpu)
11205+{
11206+ /* vcpu_guest_context_t is too large to allocate on the stack.
11207+ * Hence we allocate statically and protect it with a lock */
11208+ static vcpu_guest_context_t ctxt;
11209+ static DEFINE_SPINLOCK(ctxt_lock);
11210+
11211+ struct task_struct *idle = idle_task(cpu);
11212+#ifdef __x86_64__
11213+ struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
11214+#else
11215+ struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
11216+#endif
11217+
11218+ if (cpu_test_and_set(cpu, cpu_initialized_map))
11219+ return;
11220+
11221+ spin_lock(&ctxt_lock);
11222+
11223+ memset(&ctxt, 0, sizeof(ctxt));
11224+
11225+ ctxt.flags = VGCF_IN_KERNEL;
11226+ ctxt.user_regs.ds = __USER_DS;
11227+ ctxt.user_regs.es = __USER_DS;
11228+ ctxt.user_regs.fs = 0;
11229+ ctxt.user_regs.gs = 0;
11230+ ctxt.user_regs.ss = __KERNEL_DS;
11231+ ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
11232+ ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
11233+
11234+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
11235+
11236+ smp_trap_init(ctxt.trap_ctxt);
11237+
11238+ ctxt.ldt_ents = 0;
11239+
11240+ ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
11241+ ctxt.gdt_ents = gdt_descr->size / 8;
11242+
11243+#ifdef __i386__
11244+ ctxt.user_regs.cs = __KERNEL_CS;
11245+ ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
11246+
11247+ ctxt.kernel_ss = __KERNEL_DS;
11248+ ctxt.kernel_sp = idle->thread.esp0;
11249+
11250+ ctxt.event_callback_cs = __KERNEL_CS;
11251+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
11252+ ctxt.failsafe_callback_cs = __KERNEL_CS;
11253+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
11254+
11255+ ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
11256+#else /* __x86_64__ */
11257+ ctxt.user_regs.cs = __KERNEL_CS;
11258+ ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
11259+
11260+ ctxt.kernel_ss = __KERNEL_DS;
11261+ ctxt.kernel_sp = idle->thread.rsp0;
11262+
11263+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
11264+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
11265+ ctxt.syscall_callback_eip = (unsigned long)system_call;
11266+
11267+ ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
11268+
11269+ ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
11270+#endif
11271+
11272+ if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
11273+ BUG();
11274+
11275+ spin_unlock(&ctxt_lock);
11276+}
11277+
11278+void __init smp_prepare_cpus(unsigned int max_cpus)
11279+{
11280+ unsigned int cpu;
11281+ struct task_struct *idle;
11282+ int apicid, acpiid;
11283+ struct vcpu_get_physid cpu_id;
11284+#ifdef __x86_64__
11285+ struct desc_ptr *gdt_descr;
11286+#else
11287+ struct Xgt_desc_struct *gdt_descr;
11288+#endif
11289+
11290+ apicid = 0;
11291+ if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) {
11292+ apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
11293+ acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
11294+#ifdef CONFIG_ACPI
11295+ if (acpiid != 0xff)
11296+ x86_acpiid_to_apicid[acpiid] = apicid;
11297+#endif
11298+ }
11299+ boot_cpu_data.apicid = apicid;
11300+ cpu_data[0] = boot_cpu_data;
11301+
11302+ cpu_2_logical_apicid[0] = apicid;
11303+ x86_cpu_to_apicid[0] = apicid;
11304+
11305+ current_thread_info()->cpu = 0;
11306+
11307+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
11308+ cpus_clear(cpu_sibling_map[cpu]);
11309+ cpus_clear(cpu_core_map[cpu]);
11310+ }
11311+
11312+ set_cpu_sibling_map(0);
11313+
11314+ if (xen_smp_intr_init(0))
11315+ BUG();
11316+
11317+ cpu_initialized_map = cpumask_of_cpu(0);
11318+
11319+ /* Restrict the possible_map according to max_cpus. */
11320+ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
11321+ for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
11322+ continue;
11323+ cpu_clear(cpu, cpu_possible_map);
11324+ }
11325+
11326+ for_each_possible_cpu (cpu) {
11327+ if (cpu == 0)
11328+ continue;
11329+
11330+#ifdef __x86_64__
11331+ gdt_descr = &cpu_gdt_descr[cpu];
11332+#else
11333+ gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
11334+#endif
11335+ gdt_descr->address = get_zeroed_page(GFP_KERNEL);
11336+ if (unlikely(!gdt_descr->address)) {
11337+ printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
11338+ cpu);
11339+ continue;
11340+ }
11341+ gdt_descr->size = GDT_SIZE;
11342+ memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
11343+ make_page_readonly(
11344+ (void *)gdt_descr->address,
11345+ XENFEAT_writable_descriptor_tables);
11346+
11347+ apicid = cpu;
11348+ if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
11349+ apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
11350+ acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
11351+#ifdef CONFIG_ACPI
11352+ if (acpiid != 0xff)
11353+ x86_acpiid_to_apicid[acpiid] = apicid;
11354+#endif
11355+ }
11356+ cpu_data[cpu] = boot_cpu_data;
11357+ cpu_data[cpu].apicid = apicid;
11358+
11359+ cpu_2_logical_apicid[cpu] = apicid;
11360+ x86_cpu_to_apicid[cpu] = apicid;
11361+
11362+ idle = fork_idle(cpu);
11363+ if (IS_ERR(idle))
11364+ panic("failed fork for CPU %d", cpu);
11365+
11366+#ifdef __x86_64__
11367+ cpu_pda(cpu)->pcurrent = idle;
11368+ cpu_pda(cpu)->cpunumber = cpu;
11369+ clear_ti_thread_flag(idle->thread_info, TIF_FORK);
11370+#endif
11371+
11372+ irq_ctx_init(cpu);
11373+
11374+#ifdef CONFIG_HOTPLUG_CPU
11375+ if (is_initial_xendomain())
11376+ cpu_set(cpu, cpu_present_map);
11377+#else
11378+ cpu_set(cpu, cpu_present_map);
11379+#endif
11380+ }
11381+
11382+ init_xenbus_allowed_cpumask();
11383+
11384+#ifdef CONFIG_X86_IO_APIC
11385+ /*
11386+ * Here we can be sure that there is an IO-APIC in the system. Let's
11387+ * go and set it up:
11388+ */
11389+ if (!skip_ioapic_setup && nr_ioapics)
11390+ setup_IO_APIC();
11391+#endif
11392+}
11393+
11394+void __devinit smp_prepare_boot_cpu(void)
11395+{
11396+ prefill_possible_map();
11397+}
11398+
11399+#ifdef CONFIG_HOTPLUG_CPU
11400+
11401+/*
11402+ * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
11403+ * But do it early enough to catch critical for_each_present_cpu() loops
11404+ * in i386-specific code.
11405+ */
11406+static int __init initialize_cpu_present_map(void)
11407+{
11408+ cpu_present_map = cpu_possible_map;
11409+ return 0;
11410+}
11411+core_initcall(initialize_cpu_present_map);
11412+
11413+int __cpu_disable(void)
11414+{
11415+ cpumask_t map = cpu_online_map;
11416+ unsigned int cpu = smp_processor_id();
11417+
11418+ if (cpu == 0)
11419+ return -EBUSY;
11420+
11421+ remove_siblinginfo(cpu);
11422+
11423+ cpu_clear(cpu, map);
11424+ fixup_irqs(map);
11425+ cpu_clear(cpu, cpu_online_map);
11426+
11427+ return 0;
11428+}
11429+
11430+void __cpu_die(unsigned int cpu)
11431+{
11432+ while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
11433+ current->state = TASK_UNINTERRUPTIBLE;
11434+ schedule_timeout(HZ/10);
11435+ }
11436+
11437+ xen_smp_intr_exit(cpu);
11438+
11439+ if (num_online_cpus() == 1)
11440+ alternatives_smp_switch(0);
11441+}
11442+
11443+#endif /* CONFIG_HOTPLUG_CPU */
11444+
11445+int __cpuinit __cpu_up(unsigned int cpu)
11446+{
11447+ int rc;
11448+
11449+ rc = cpu_up_check(cpu);
11450+ if (rc)
11451+ return rc;
11452+
11453+ cpu_initialize_context(cpu);
11454+
11455+ if (num_online_cpus() == 1)
11456+ alternatives_smp_switch(1);
11457+
11458+ /* This must be done before setting cpu_online_map */
11459+ set_cpu_sibling_map(cpu);
11460+ wmb();
11461+
11462+ rc = xen_smp_intr_init(cpu);
11463+ if (rc) {
11464+ remove_siblinginfo(cpu);
11465+ return rc;
11466+ }
11467+
11468+ cpu_set(cpu, cpu_online_map);
11469+
11470+ rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
11471+ BUG_ON(rc);
11472+
11473+ return 0;
11474+}
11475+
11476+void __init smp_cpus_done(unsigned int max_cpus)
11477+{
11478+}
11479+
11480+#ifndef CONFIG_X86_LOCAL_APIC
11481+int setup_profiling_timer(unsigned int multiplier)
11482+{
11483+ return -EINVAL;
11484+}
11485+#endif
11486Index: head-2008-11-25/drivers/xen/core/xen_proc.c
11487===================================================================
11488--- /dev/null 1970-01-01 00:00:00.000000000 +0000
11489+++ head-2008-11-25/drivers/xen/core/xen_proc.c 2007-06-12 13:13:44.000000000 +0200
11490@@ -0,0 +1,23 @@
11491+
11492+#include <linux/module.h>
11493+#include <linux/proc_fs.h>
11494+#include <xen/xen_proc.h>
11495+
11496+static struct proc_dir_entry *xen_base;
11497+
11498+struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
11499+{
11500+ if ( xen_base == NULL )
11501+ if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
11502+ panic("Couldn't create /proc/xen");
11503+ return create_proc_entry(name, mode, xen_base);
11504+}
11505+
11506+EXPORT_SYMBOL_GPL(create_xen_proc_entry);
11507+
11508+void remove_xen_proc_entry(const char *name)
11509+{
11510+ remove_proc_entry(name, xen_base);
11511+}
11512+
11513+EXPORT_SYMBOL_GPL(remove_xen_proc_entry);
11514Index: head-2008-11-25/drivers/xen/core/xen_sysfs.c
11515===================================================================
11516--- /dev/null 1970-01-01 00:00:00.000000000 +0000
11517+++ head-2008-11-25/drivers/xen/core/xen_sysfs.c 2008-10-29 09:55:56.000000000 +0100
11518@@ -0,0 +1,427 @@
11519+/*
11520+ * copyright (c) 2006 IBM Corporation
11521+ * Authored by: Mike D. Day <ncmike@us.ibm.com>
11522+ *
11523+ * This program is free software; you can redistribute it and/or modify
11524+ * it under the terms of the GNU General Public License version 2 as
11525+ * published by the Free Software Foundation.
11526+ */
11527+
11528+#include <linux/err.h>
11529+#include <linux/kernel.h>
11530+#include <linux/module.h>
11531+#include <linux/init.h>
11532+#include <asm/hypervisor.h>
11533+#include <xen/features.h>
11534+#include <xen/hypervisor_sysfs.h>
11535+#include <xen/xenbus.h>
11536+#include <xen/interface/kexec.h>
11537+
11538+MODULE_LICENSE("GPL");
11539+MODULE_AUTHOR("Mike D. Day <ncmike@us.ibm.com>");
11540+
11541+static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
11542+{
11543+ return sprintf(buffer, "xen\n");
11544+}
11545+
11546+HYPERVISOR_ATTR_RO(type);
11547+
11548+static int __init xen_sysfs_type_init(void)
11549+{
11550+ return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
11551+}
11552+
11553+static void xen_sysfs_type_destroy(void)
11554+{
11555+ sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
11556+}
11557+
11558+/* xen version attributes */
11559+static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
11560+{
11561+ int version = HYPERVISOR_xen_version(XENVER_version, NULL);
11562+ if (version)
11563+ return sprintf(buffer, "%d\n", version >> 16);
11564+ return -ENODEV;
11565+}
11566+
11567+HYPERVISOR_ATTR_RO(major);
11568+
11569+static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
11570+{
11571+ int version = HYPERVISOR_xen_version(XENVER_version, NULL);
11572+ if (version)
11573+ return sprintf(buffer, "%d\n", version & 0xff);
11574+ return -ENODEV;
11575+}
11576+
11577+HYPERVISOR_ATTR_RO(minor);
11578+
11579+static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
11580+{
11581+ int ret = -ENOMEM;
11582+ char *extra;
11583+
11584+ extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
11585+ if (extra) {
11586+ ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
11587+ if (!ret)
11588+ ret = sprintf(buffer, "%s\n", extra);
11589+ kfree(extra);
11590+ }
11591+
11592+ return ret;
11593+}
11594+
11595+HYPERVISOR_ATTR_RO(extra);
11596+
11597+static struct attribute *version_attrs[] = {
11598+ &major_attr.attr,
11599+ &minor_attr.attr,
11600+ &extra_attr.attr,
11601+ NULL
11602+};
11603+
11604+static struct attribute_group version_group = {
11605+ .name = "version",
11606+ .attrs = version_attrs,
11607+};
11608+
11609+static int __init xen_sysfs_version_init(void)
11610+{
11611+ return sysfs_create_group(&hypervisor_subsys.kset.kobj,
11612+ &version_group);
11613+}
11614+
11615+static void xen_sysfs_version_destroy(void)
11616+{
11617+ sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group);
11618+}
11619+
11620+/* UUID */
11621+
11622+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
11623+{
11624+ char *vm, *val;
11625+ int ret;
11626+ extern int xenstored_ready;
11627+
11628+ if (!xenstored_ready)
11629+ return -EBUSY;
11630+
11631+ vm = xenbus_read(XBT_NIL, "vm", "", NULL);
11632+ if (IS_ERR(vm))
11633+ return PTR_ERR(vm);
11634+ val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
11635+ kfree(vm);
11636+ if (IS_ERR(val))
11637+ return PTR_ERR(val);
11638+ ret = sprintf(buffer, "%s\n", val);
11639+ kfree(val);
11640+ return ret;
11641+}
11642+
11643+HYPERVISOR_ATTR_RO(uuid);
11644+
11645+static int __init xen_sysfs_uuid_init(void)
11646+{
11647+ return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
11648+}
11649+
11650+static void xen_sysfs_uuid_destroy(void)
11651+{
11652+ sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
11653+}
11654+
11655+/* xen compilation attributes */
11656+
11657+static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
11658+{
11659+ int ret = -ENOMEM;
11660+ struct xen_compile_info *info;
11661+
11662+ info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
11663+ if (info) {
11664+ ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
11665+ if (!ret)
11666+ ret = sprintf(buffer, "%s\n", info->compiler);
11667+ kfree(info);
11668+ }
11669+
11670+ return ret;
11671+}
11672+
11673+HYPERVISOR_ATTR_RO(compiler);
11674+
11675+static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
11676+{
11677+ int ret = -ENOMEM;
11678+ struct xen_compile_info *info;
11679+
11680+ info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
11681+ if (info) {
11682+ ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
11683+ if (!ret)
11684+ ret = sprintf(buffer, "%s\n", info->compile_by);
11685+ kfree(info);
11686+ }
11687+
11688+ return ret;
11689+}
11690+
11691+HYPERVISOR_ATTR_RO(compiled_by);
11692+
11693+static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
11694+{
11695+ int ret = -ENOMEM;
11696+ struct xen_compile_info *info;
11697+
11698+ info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
11699+ if (info) {
11700+ ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
11701+ if (!ret)
11702+ ret = sprintf(buffer, "%s\n", info->compile_date);
11703+ kfree(info);
11704+ }
11705+
11706+ return ret;
11707+}
11708+
11709+HYPERVISOR_ATTR_RO(compile_date);
11710+
11711+static struct attribute *xen_compile_attrs[] = {
11712+ &compiler_attr.attr,
11713+ &compiled_by_attr.attr,
11714+ &compile_date_attr.attr,
11715+ NULL
11716+};
11717+
11718+static struct attribute_group xen_compilation_group = {
11719+ .name = "compilation",
11720+ .attrs = xen_compile_attrs,
11721+};
11722+
11723+int __init static xen_compilation_init(void)
11724+{
11725+ return sysfs_create_group(&hypervisor_subsys.kset.kobj,
11726+ &xen_compilation_group);
11727+}
11728+
11729+static void xen_compilation_destroy(void)
11730+{
11731+ sysfs_remove_group(&hypervisor_subsys.kset.kobj,
11732+ &xen_compilation_group);
11733+}
11734+
11735+/* xen properties info */
11736+
11737+static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
11738+{
11739+ int ret = -ENOMEM;
11740+ char *caps;
11741+
11742+ caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
11743+ if (caps) {
11744+ ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
11745+ if (!ret)
11746+ ret = sprintf(buffer, "%s\n", caps);
11747+ kfree(caps);
11748+ }
11749+
11750+ return ret;
11751+}
11752+
11753+HYPERVISOR_ATTR_RO(capabilities);
11754+
11755+static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
11756+{
11757+ int ret = -ENOMEM;
11758+ char *cset;
11759+
11760+ cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
11761+ if (cset) {
11762+ ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
11763+ if (!ret)
11764+ ret = sprintf(buffer, "%s\n", cset);
11765+ kfree(cset);
11766+ }
11767+
11768+ return ret;
11769+}
11770+
11771+HYPERVISOR_ATTR_RO(changeset);
11772+
11773+static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
11774+{
11775+ int ret = -ENOMEM;
11776+ struct xen_platform_parameters *parms;
11777+
11778+ parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
11779+ if (parms) {
11780+ ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
11781+ parms);
11782+ if (!ret)
11783+ ret = sprintf(buffer, "%lx\n", parms->virt_start);
11784+ kfree(parms);
11785+ }
11786+
11787+ return ret;
11788+}
11789+
11790+HYPERVISOR_ATTR_RO(virtual_start);
11791+
11792+static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
11793+{
11794+ int ret;
11795+
11796+ ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
11797+ if (ret > 0)
11798+ ret = sprintf(buffer, "%x\n", ret);
11799+
11800+ return ret;
11801+}
11802+
11803+HYPERVISOR_ATTR_RO(pagesize);
11804+
11805+/* eventually there will be several more features to export */
11806+static ssize_t xen_feature_show(int index, char *buffer)
11807+{
11808+ int ret = -ENOMEM;
11809+ struct xen_feature_info *info;
11810+
11811+ info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
11812+ if (info) {
11813+ info->submap_idx = index;
11814+ ret = HYPERVISOR_xen_version(XENVER_get_features, info);
11815+ if (!ret)
11816+ ret = sprintf(buffer, "%d\n", info->submap);
11817+ kfree(info);
11818+ }
11819+
11820+ return ret;
11821+}
11822+
11823+static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
11824+{
11825+ return xen_feature_show(XENFEAT_writable_page_tables, buffer);
11826+}
11827+
11828+HYPERVISOR_ATTR_RO(writable_pt);
11829+
11830+static struct attribute *xen_properties_attrs[] = {
11831+ &capabilities_attr.attr,
11832+ &changeset_attr.attr,
11833+ &virtual_start_attr.attr,
11834+ &pagesize_attr.attr,
11835+ &writable_pt_attr.attr,
11836+ NULL
11837+};
11838+
11839+static struct attribute_group xen_properties_group = {
11840+ .name = "properties",
11841+ .attrs = xen_properties_attrs,
11842+};
11843+
11844+static int __init xen_properties_init(void)
11845+{
11846+ return sysfs_create_group(&hypervisor_subsys.kset.kobj,
11847+ &xen_properties_group);
11848+}
11849+
11850+static void xen_properties_destroy(void)
11851+{
11852+ sysfs_remove_group(&hypervisor_subsys.kset.kobj,
11853+ &xen_properties_group);
11854+}
11855+
11856+#ifdef CONFIG_KEXEC
11857+
11858+extern size_t vmcoreinfo_size_xen;
11859+extern unsigned long paddr_vmcoreinfo_xen;
11860+
11861+static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
11862+{
11863+ return sprintf(page, "%lx %zx\n",
11864+ paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
11865+}
11866+
11867+HYPERVISOR_ATTR_RO(vmcoreinfo);
11868+
11869+static int __init xen_sysfs_vmcoreinfo_init(void)
11870+{
11871+ return sysfs_create_file(&hypervisor_subsys.kset.kobj,
11872+ &vmcoreinfo_attr.attr);
11873+}
11874+
11875+static void xen_sysfs_vmcoreinfo_destroy(void)
11876+{
11877+ sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr);
11878+}
11879+
11880+#endif
11881+
11882+static int __init hyper_sysfs_init(void)
11883+{
11884+ int ret;
11885+
11886+ if (!is_running_on_xen())
11887+ return -ENODEV;
11888+
11889+ ret = xen_sysfs_type_init();
11890+ if (ret)
11891+ goto out;
11892+ ret = xen_sysfs_version_init();
11893+ if (ret)
11894+ goto version_out;
11895+ ret = xen_compilation_init();
11896+ if (ret)
11897+ goto comp_out;
11898+ ret = xen_sysfs_uuid_init();
11899+ if (ret)
11900+ goto uuid_out;
11901+ ret = xen_properties_init();
11902+ if (ret)
11903+ goto prop_out;
11904+#ifdef CONFIG_KEXEC
11905+ if (vmcoreinfo_size_xen != 0) {
11906+ ret = xen_sysfs_vmcoreinfo_init();
11907+ if (ret)
11908+ goto vmcoreinfo_out;
11909+ }
11910+#endif
11911+
11912+ goto out;
11913+
11914+#ifdef CONFIG_KEXEC
11915+vmcoreinfo_out:
11916+#endif
11917+ xen_properties_destroy();
11918+prop_out:
11919+ xen_sysfs_uuid_destroy();
11920+uuid_out:
11921+ xen_compilation_destroy();
11922+comp_out:
11923+ xen_sysfs_version_destroy();
11924+version_out:
11925+ xen_sysfs_type_destroy();
11926+out:
11927+ return ret;
11928+}
11929+
11930+static void __exit hyper_sysfs_exit(void)
11931+{
11932+#ifdef CONFIG_KEXEC
11933+ if (vmcoreinfo_size_xen != 0)
11934+ xen_sysfs_vmcoreinfo_destroy();
11935+#endif
11936+ xen_properties_destroy();
11937+ xen_compilation_destroy();
11938+ xen_sysfs_uuid_destroy();
11939+ xen_sysfs_version_destroy();
11940+ xen_sysfs_type_destroy();
11941+
11942+}
11943+
11944+module_init(hyper_sysfs_init);
11945+module_exit(hyper_sysfs_exit);
11946Index: head-2008-11-25/drivers/xen/core/xencomm.c
11947===================================================================
11948--- /dev/null 1970-01-01 00:00:00.000000000 +0000
11949+++ head-2008-11-25/drivers/xen/core/xencomm.c 2007-11-12 08:41:05.000000000 +0100
11950@@ -0,0 +1,229 @@
11951+/*
11952+ * This program is free software; you can redistribute it and/or modify
11953+ * it under the terms of the GNU General Public License as published by
11954+ * the Free Software Foundation; either version 2 of the License, or
11955+ * (at your option) any later version.
11956+ *
11957+ * This program is distributed in the hope that it will be useful,
11958+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11959+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11960+ * GNU General Public License for more details.
11961+ *
11962+ * You should have received a copy of the GNU General Public License
11963+ * along with this program; if not, write to the Free Software
11964+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
11965+ *
11966+ * Copyright (C) IBM Corp. 2006
11967+ *
11968+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
11969+ */
11970+
11971+#include <linux/gfp.h>
11972+#include <linux/mm.h>
11973+#include <asm/page.h>
11974+#include <xen/xencomm.h>
11975+#include <xen/interface/xen.h>
11976+#ifdef __ia64__
11977+#include <asm/xen/xencomm.h> /* for is_kern_addr() */
11978+#endif
11979+
11980+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
11981+#include <xen/platform-compat.h>
11982+#endif
11983+
11984+static int xencomm_init(struct xencomm_desc *desc,
11985+ void *buffer, unsigned long bytes)
11986+{
11987+ unsigned long recorded = 0;
11988+ int i = 0;
11989+
11990+ while ((recorded < bytes) && (i < desc->nr_addrs)) {
11991+ unsigned long vaddr = (unsigned long)buffer + recorded;
11992+ unsigned long paddr;
11993+ int offset;
11994+ int chunksz;
11995+
11996+ offset = vaddr % PAGE_SIZE; /* handle partial pages */
11997+ chunksz = min(PAGE_SIZE - offset, bytes - recorded);
11998+
11999+ paddr = xencomm_vtop(vaddr);
12000+ if (paddr == ~0UL) {
12001+ printk("%s: couldn't translate vaddr %lx\n",
12002+ __func__, vaddr);
12003+ return -EINVAL;
12004+ }
12005+
12006+ desc->address[i++] = paddr;
12007+ recorded += chunksz;
12008+ }
12009+
12010+ if (recorded < bytes) {
12011+ printk("%s: could only translate %ld of %ld bytes\n",
12012+ __func__, recorded, bytes);
12013+ return -ENOSPC;
12014+ }
12015+
12016+ /* mark remaining addresses invalid (just for safety) */
12017+ while (i < desc->nr_addrs)
12018+ desc->address[i++] = XENCOMM_INVALID;
12019+
12020+ desc->magic = XENCOMM_MAGIC;
12021+
12022+ return 0;
12023+}
12024+
12025+static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
12026+ void *buffer, unsigned long bytes)
12027+{
12028+ struct xencomm_desc *desc;
12029+ unsigned long buffer_ulong = (unsigned long)buffer;
12030+ unsigned long start = buffer_ulong & PAGE_MASK;
12031+ unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
12032+ unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
12033+ unsigned long size = sizeof(*desc) +
12034+ sizeof(desc->address[0]) * nr_addrs;
12035+
12036+ /*
12037+ * slab allocator returns at least sizeof(void*) aligned pointer.
12038+ * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
12039+ * cross page boundary.
12040+ */
12041+ if (sizeof(*desc) > sizeof(void*)) {
12042+ unsigned long order = get_order(size);
12043+ desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
12044+ order);
12045+ if (desc == NULL)
12046+ return NULL;
12047+
12048+ desc->nr_addrs =
12049+ ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
12050+ sizeof(*desc->address);
12051+ } else {
12052+ desc = kmalloc(size, gfp_mask);
12053+ if (desc == NULL)
12054+ return NULL;
12055+
12056+ desc->nr_addrs = nr_addrs;
12057+ }
12058+ return desc;
12059+}
12060+
12061+void xencomm_free(struct xencomm_handle *desc)
12062+{
12063+ if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
12064+ struct xencomm_desc *desc__ = (struct xencomm_desc*)desc;
12065+ if (sizeof(*desc__) > sizeof(void*)) {
12066+ unsigned long size = sizeof(*desc__) +
12067+ sizeof(desc__->address[0]) * desc__->nr_addrs;
12068+ unsigned long order = get_order(size);
12069+ free_pages((unsigned long)__va(desc), order);
12070+ } else
12071+ kfree(__va(desc));
12072+ }
12073+}
12074+
12075+static int xencomm_create(void *buffer, unsigned long bytes, struct xencomm_desc **ret, gfp_t gfp_mask)
12076+{
12077+ struct xencomm_desc *desc;
12078+ int rc;
12079+
12080+ pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
12081+
12082+ if (bytes == 0) {
12083+ /* don't create a descriptor; Xen recognizes NULL. */
12084+ BUG_ON(buffer != NULL);
12085+ *ret = NULL;
12086+ return 0;
12087+ }
12088+
12089+ BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
12090+
12091+ desc = xencomm_alloc(gfp_mask, buffer, bytes);
12092+ if (!desc) {
12093+ printk("%s failure\n", "xencomm_alloc");
12094+ return -ENOMEM;
12095+ }
12096+
12097+ rc = xencomm_init(desc, buffer, bytes);
12098+ if (rc) {
12099+ printk("%s failure: %d\n", "xencomm_init", rc);
12100+ xencomm_free((struct xencomm_handle *)__pa(desc));
12101+ return rc;
12102+ }
12103+
12104+ *ret = desc;
12105+ return 0;
12106+}
12107+
12108+/* check if memory address is within VMALLOC region */
12109+static int is_phys_contiguous(unsigned long addr)
12110+{
12111+ if (!is_kernel_addr(addr))
12112+ return 0;
12113+
12114+ return (addr < VMALLOC_START) || (addr >= VMALLOC_END);
12115+}
12116+
12117+static struct xencomm_handle *xencomm_create_inline(void *ptr)
12118+{
12119+ unsigned long paddr;
12120+
12121+ BUG_ON(!is_phys_contiguous((unsigned long)ptr));
12122+
12123+ paddr = (unsigned long)xencomm_pa(ptr);
12124+ BUG_ON(paddr & XENCOMM_INLINE_FLAG);
12125+ return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
12126+}
12127+
12128+/* "mini" routine, for stack-based communications: */
12129+static int xencomm_create_mini(void *buffer,
12130+ unsigned long bytes, struct xencomm_mini *xc_desc,
12131+ struct xencomm_desc **ret)
12132+{
12133+ int rc = 0;
12134+ struct xencomm_desc *desc;
12135+ BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
12136+
12137+ desc = (void *)xc_desc;
12138+
12139+ desc->nr_addrs = XENCOMM_MINI_ADDRS;
12140+
12141+ if (!(rc = xencomm_init(desc, buffer, bytes)))
12142+ *ret = desc;
12143+
12144+ return rc;
12145+}
12146+
12147+struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
12148+{
12149+ int rc;
12150+ struct xencomm_desc *desc;
12151+
12152+ if (is_phys_contiguous((unsigned long)ptr))
12153+ return xencomm_create_inline(ptr);
12154+
12155+ rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
12156+
12157+ if (rc || desc == NULL)
12158+ return NULL;
12159+
12160+ return xencomm_pa(desc);
12161+}
12162+
12163+struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
12164+ struct xencomm_mini *xc_desc)
12165+{
12166+ int rc;
12167+ struct xencomm_desc *desc = NULL;
12168+
12169+ if (is_phys_contiguous((unsigned long)ptr))
12170+ return xencomm_create_inline(ptr);
12171+
12172+ rc = xencomm_create_mini(ptr, bytes, xc_desc,
12173+ &desc);
12174+
12175+ if (rc)
12176+ return NULL;
12177+
12178+ return xencomm_pa(desc);
12179+}
12180Index: head-2008-11-25/drivers/xen/evtchn/Makefile
12181===================================================================
12182--- /dev/null 1970-01-01 00:00:00.000000000 +0000
12183+++ head-2008-11-25/drivers/xen/evtchn/Makefile 2007-06-12 13:13:44.000000000 +0200
12184@@ -0,0 +1,2 @@
12185+
12186+obj-y := evtchn.o
12187Index: head-2008-11-25/drivers/xen/evtchn/evtchn.c
12188===================================================================
12189--- /dev/null 1970-01-01 00:00:00.000000000 +0000
12190+++ head-2008-11-25/drivers/xen/evtchn/evtchn.c 2008-08-07 12:44:36.000000000 +0200
12191@@ -0,0 +1,560 @@
12192+/******************************************************************************
12193+ * evtchn.c
12194+ *
12195+ * Driver for receiving and demuxing event-channel signals.
12196+ *
12197+ * Copyright (c) 2004-2005, K A Fraser
12198+ * Multi-process extensions Copyright (c) 2004, Steven Smith
12199+ *
12200+ * This program is free software; you can redistribute it and/or
12201+ * modify it under the terms of the GNU General Public License version 2
12202+ * as published by the Free Software Foundation; or, when distributed
12203+ * separately from the Linux kernel or incorporated into other
12204+ * software packages, subject to the following license:
12205+ *
12206+ * Permission is hereby granted, free of charge, to any person obtaining a copy
12207+ * of this source file (the "Software"), to deal in the Software without
12208+ * restriction, including without limitation the rights to use, copy, modify,
12209+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
12210+ * and to permit persons to whom the Software is furnished to do so, subject to
12211+ * the following conditions:
12212+ *
12213+ * The above copyright notice and this permission notice shall be included in
12214+ * all copies or substantial portions of the Software.
12215+ *
12216+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12217+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
12218+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
12219+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
12220+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12221+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
12222+ * IN THE SOFTWARE.
12223+ */
12224+
12225+#include <linux/module.h>
12226+#include <linux/kernel.h>
12227+#include <linux/sched.h>
12228+#include <linux/slab.h>
12229+#include <linux/string.h>
12230+#include <linux/errno.h>
12231+#include <linux/fs.h>
12232+#include <linux/errno.h>
12233+#include <linux/miscdevice.h>
12234+#include <linux/major.h>
12235+#include <linux/proc_fs.h>
12236+#include <linux/stat.h>
12237+#include <linux/poll.h>
12238+#include <linux/irq.h>
12239+#include <linux/init.h>
12240+#include <linux/gfp.h>
12241+#include <linux/mutex.h>
12242+#include <linux/cpu.h>
12243+#include <xen/evtchn.h>
12244+#include <xen/public/evtchn.h>
12245+
12246+struct per_user_data {
12247+ /* Notification ring, accessed via /dev/xen/evtchn. */
12248+#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
12249+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
12250+ evtchn_port_t *ring;
12251+ unsigned int ring_cons, ring_prod, ring_overflow;
12252+ struct mutex ring_cons_mutex; /* protect against concurrent readers */
12253+
12254+ /* Processes wait on this queue when ring is empty. */
12255+ wait_queue_head_t evtchn_wait;
12256+ struct fasync_struct *evtchn_async_queue;
12257+
12258+ int bind_cpu;
12259+ int nr_event_wrong_delivery;
12260+};
12261+
12262+/* Who's bound to each port? */
12263+static struct per_user_data *port_user[NR_EVENT_CHANNELS];
12264+static spinlock_t port_user_lock;
12265+
12266+void evtchn_device_upcall(int port)
12267+{
12268+ struct per_user_data *u;
12269+
12270+ spin_lock(&port_user_lock);
12271+
12272+ mask_evtchn(port);
12273+ clear_evtchn(port);
12274+
12275+ if ((u = port_user[port]) != NULL) {
12276+ if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
12277+ u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
12278+ wmb(); /* Ensure ring contents visible */
12279+ if (u->ring_cons == u->ring_prod++) {
12280+ wake_up_interruptible(&u->evtchn_wait);
12281+ kill_fasync(&u->evtchn_async_queue,
12282+ SIGIO, POLL_IN);
12283+ }
12284+ } else {
12285+ u->ring_overflow = 1;
12286+ }
12287+ }
12288+
12289+ spin_unlock(&port_user_lock);
12290+}
12291+
12292+static void evtchn_check_wrong_delivery(struct per_user_data *u)
12293+{
12294+ evtchn_port_t port;
12295+ unsigned int current_cpu = smp_processor_id();
12296+
12297+ /* Delivered to correct CPU? All is good. */
12298+ if (u->bind_cpu == current_cpu) {
12299+ u->nr_event_wrong_delivery = 0;
12300+ return;
12301+ }
12302+
12303+ /* Tolerate up to 100 consecutive misdeliveries. */
12304+ if (++u->nr_event_wrong_delivery < 100)
12305+ return;
12306+
12307+ spin_lock_irq(&port_user_lock);
12308+
12309+ for (port = 0; port < NR_EVENT_CHANNELS; port++)
12310+ if (port_user[port] == u)
12311+ rebind_evtchn_to_cpu(port, current_cpu);
12312+
12313+ u->bind_cpu = current_cpu;
12314+ u->nr_event_wrong_delivery = 0;
12315+
12316+ spin_unlock_irq(&port_user_lock);
12317+}
12318+
12319+static ssize_t evtchn_read(struct file *file, char __user *buf,
12320+ size_t count, loff_t *ppos)
12321+{
12322+ int rc;
12323+ unsigned int c, p, bytes1 = 0, bytes2 = 0;
12324+ struct per_user_data *u = file->private_data;
12325+
12326+ /* Whole number of ports. */
12327+ count &= ~(sizeof(evtchn_port_t)-1);
12328+
12329+ if (count == 0)
12330+ return 0;
12331+
12332+ if (count > PAGE_SIZE)
12333+ count = PAGE_SIZE;
12334+
12335+ for (;;) {
12336+ mutex_lock(&u->ring_cons_mutex);
12337+
12338+ rc = -EFBIG;
12339+ if (u->ring_overflow)
12340+ goto unlock_out;
12341+
12342+ if ((c = u->ring_cons) != (p = u->ring_prod))
12343+ break;
12344+
12345+ mutex_unlock(&u->ring_cons_mutex);
12346+
12347+ if (file->f_flags & O_NONBLOCK)
12348+ return -EAGAIN;
12349+
12350+ rc = wait_event_interruptible(
12351+ u->evtchn_wait, u->ring_cons != u->ring_prod);
12352+ if (rc)
12353+ return rc;
12354+ }
12355+
12356+ /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
12357+ if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
12358+ bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
12359+ sizeof(evtchn_port_t);
12360+ bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
12361+ } else {
12362+ bytes1 = (p - c) * sizeof(evtchn_port_t);
12363+ bytes2 = 0;
12364+ }
12365+
12366+ /* Truncate chunks according to caller's maximum byte count. */
12367+ if (bytes1 > count) {
12368+ bytes1 = count;
12369+ bytes2 = 0;
12370+ } else if ((bytes1 + bytes2) > count) {
12371+ bytes2 = count - bytes1;
12372+ }
12373+
12374+ rc = -EFAULT;
12375+ rmb(); /* Ensure that we see the port before we copy it. */
12376+ if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
12377+ ((bytes2 != 0) &&
12378+ copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
12379+ goto unlock_out;
12380+
12381+ evtchn_check_wrong_delivery(u);
12382+
12383+ u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
12384+ rc = bytes1 + bytes2;
12385+
12386+ unlock_out:
12387+ mutex_unlock(&u->ring_cons_mutex);
12388+ return rc;
12389+}
12390+
12391+static ssize_t evtchn_write(struct file *file, const char __user *buf,
12392+ size_t count, loff_t *ppos)
12393+{
12394+ int rc, i;
12395+ evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
12396+ struct per_user_data *u = file->private_data;
12397+
12398+ if (kbuf == NULL)
12399+ return -ENOMEM;
12400+
12401+ /* Whole number of ports. */
12402+ count &= ~(sizeof(evtchn_port_t)-1);
12403+
12404+ rc = 0;
12405+ if (count == 0)
12406+ goto out;
12407+
12408+ if (count > PAGE_SIZE)
12409+ count = PAGE_SIZE;
12410+
12411+ rc = -EFAULT;
12412+ if (copy_from_user(kbuf, buf, count) != 0)
12413+ goto out;
12414+
12415+ spin_lock_irq(&port_user_lock);
12416+ for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
12417+ if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
12418+ unmask_evtchn(kbuf[i]);
12419+ spin_unlock_irq(&port_user_lock);
12420+
12421+ rc = count;
12422+
12423+ out:
12424+ free_page((unsigned long)kbuf);
12425+ return rc;
12426+}
12427+
12428+static unsigned int next_bind_cpu(cpumask_t map)
12429+{
12430+ static unsigned int bind_cpu;
12431+ bind_cpu = next_cpu(bind_cpu, map);
12432+ if (bind_cpu >= NR_CPUS)
12433+ bind_cpu = first_cpu(map);
12434+ return bind_cpu;
12435+}
12436+
12437+static void evtchn_bind_to_user(struct per_user_data *u, int port)
12438+{
12439+ spin_lock_irq(&port_user_lock);
12440+
12441+ BUG_ON(port_user[port] != NULL);
12442+ port_user[port] = u;
12443+
12444+ if (u->bind_cpu == -1)
12445+ u->bind_cpu = next_bind_cpu(cpu_online_map);
12446+
12447+ rebind_evtchn_to_cpu(port, u->bind_cpu);
12448+
12449+ unmask_evtchn(port);
12450+
12451+ spin_unlock_irq(&port_user_lock);
12452+}
12453+
12454+static long evtchn_ioctl(struct file *file,
12455+ unsigned int cmd, unsigned long arg)
12456+{
12457+ int rc;
12458+ struct per_user_data *u = file->private_data;
12459+ void __user *uarg = (void __user *) arg;
12460+
12461+ switch (cmd) {
12462+ case IOCTL_EVTCHN_BIND_VIRQ: {
12463+ struct ioctl_evtchn_bind_virq bind;
12464+ struct evtchn_bind_virq bind_virq;
12465+
12466+ rc = -EFAULT;
12467+ if (copy_from_user(&bind, uarg, sizeof(bind)))
12468+ break;
12469+
12470+ bind_virq.virq = bind.virq;
12471+ bind_virq.vcpu = 0;
12472+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
12473+ &bind_virq);
12474+ if (rc != 0)
12475+ break;
12476+
12477+ rc = bind_virq.port;
12478+ evtchn_bind_to_user(u, rc);
12479+ break;
12480+ }
12481+
12482+ case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
12483+ struct ioctl_evtchn_bind_interdomain bind;
12484+ struct evtchn_bind_interdomain bind_interdomain;
12485+
12486+ rc = -EFAULT;
12487+ if (copy_from_user(&bind, uarg, sizeof(bind)))
12488+ break;
12489+
12490+ bind_interdomain.remote_dom = bind.remote_domain;
12491+ bind_interdomain.remote_port = bind.remote_port;
12492+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
12493+ &bind_interdomain);
12494+ if (rc != 0)
12495+ break;
12496+
12497+ rc = bind_interdomain.local_port;
12498+ evtchn_bind_to_user(u, rc);
12499+ break;
12500+ }
12501+
12502+ case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
12503+ struct ioctl_evtchn_bind_unbound_port bind;
12504+ struct evtchn_alloc_unbound alloc_unbound;
12505+
12506+ rc = -EFAULT;
12507+ if (copy_from_user(&bind, uarg, sizeof(bind)))
12508+ break;
12509+
12510+ alloc_unbound.dom = DOMID_SELF;
12511+ alloc_unbound.remote_dom = bind.remote_domain;
12512+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
12513+ &alloc_unbound);
12514+ if (rc != 0)
12515+ break;
12516+
12517+ rc = alloc_unbound.port;
12518+ evtchn_bind_to_user(u, rc);
12519+ break;
12520+ }
12521+
12522+ case IOCTL_EVTCHN_UNBIND: {
12523+ struct ioctl_evtchn_unbind unbind;
12524+ struct evtchn_close close;
12525+ int ret;
12526+
12527+ rc = -EFAULT;
12528+ if (copy_from_user(&unbind, uarg, sizeof(unbind)))
12529+ break;
12530+
12531+ rc = -EINVAL;
12532+ if (unbind.port >= NR_EVENT_CHANNELS)
12533+ break;
12534+
12535+ spin_lock_irq(&port_user_lock);
12536+
12537+ rc = -ENOTCONN;
12538+ if (port_user[unbind.port] != u) {
12539+ spin_unlock_irq(&port_user_lock);
12540+ break;
12541+ }
12542+
12543+ port_user[unbind.port] = NULL;
12544+ mask_evtchn(unbind.port);
12545+ rebind_evtchn_to_cpu(unbind.port, 0);
12546+
12547+ spin_unlock_irq(&port_user_lock);
12548+
12549+ close.port = unbind.port;
12550+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
12551+ BUG_ON(ret);
12552+
12553+ rc = 0;
12554+ break;
12555+ }
12556+
12557+ case IOCTL_EVTCHN_NOTIFY: {
12558+ struct ioctl_evtchn_notify notify;
12559+
12560+ rc = -EFAULT;
12561+ if (copy_from_user(&notify, uarg, sizeof(notify)))
12562+ break;
12563+
12564+ if (notify.port >= NR_EVENT_CHANNELS) {
12565+ rc = -EINVAL;
12566+ } else if (port_user[notify.port] != u) {
12567+ rc = -ENOTCONN;
12568+ } else {
12569+ notify_remote_via_evtchn(notify.port);
12570+ rc = 0;
12571+ }
12572+ break;
12573+ }
12574+
12575+ case IOCTL_EVTCHN_RESET: {
12576+ /* Initialise the ring to empty. Clear errors. */
12577+ mutex_lock(&u->ring_cons_mutex);
12578+ spin_lock_irq(&port_user_lock);
12579+ u->ring_cons = u->ring_prod = u->ring_overflow = 0;
12580+ spin_unlock_irq(&port_user_lock);
12581+ mutex_unlock(&u->ring_cons_mutex);
12582+ rc = 0;
12583+ break;
12584+ }
12585+
12586+ default:
12587+ rc = -ENOSYS;
12588+ break;
12589+ }
12590+
12591+ return rc;
12592+}
12593+
12594+static unsigned int evtchn_poll(struct file *file, poll_table *wait)
12595+{
12596+ unsigned int mask = POLLOUT | POLLWRNORM;
12597+ struct per_user_data *u = file->private_data;
12598+
12599+ poll_wait(file, &u->evtchn_wait, wait);
12600+ if (u->ring_cons != u->ring_prod)
12601+ mask |= POLLIN | POLLRDNORM;
12602+ if (u->ring_overflow)
12603+ mask = POLLERR;
12604+ return mask;
12605+}
12606+
12607+static int evtchn_fasync(int fd, struct file *filp, int on)
12608+{
12609+ struct per_user_data *u = filp->private_data;
12610+ return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
12611+}
12612+
12613+static int evtchn_open(struct inode *inode, struct file *filp)
12614+{
12615+ struct per_user_data *u;
12616+
12617+ if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL)
12618+ return -ENOMEM;
12619+
12620+ memset(u, 0, sizeof(*u));
12621+ init_waitqueue_head(&u->evtchn_wait);
12622+
12623+ u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
12624+ if (u->ring == NULL) {
12625+ kfree(u);
12626+ return -ENOMEM;
12627+ }
12628+
12629+ mutex_init(&u->ring_cons_mutex);
12630+
12631+ filp->private_data = u;
12632+
12633+ u->bind_cpu = -1;
12634+
12635+ return 0;
12636+}
12637+
12638+static int evtchn_release(struct inode *inode, struct file *filp)
12639+{
12640+ int i;
12641+ struct per_user_data *u = filp->private_data;
12642+ struct evtchn_close close;
12643+
12644+ spin_lock_irq(&port_user_lock);
12645+
12646+ free_page((unsigned long)u->ring);
12647+
12648+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
12649+ int ret;
12650+ if (port_user[i] != u)
12651+ continue;
12652+
12653+ port_user[i] = NULL;
12654+ mask_evtchn(i);
12655+ rebind_evtchn_to_cpu(i, 0);
12656+
12657+ close.port = i;
12658+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
12659+ BUG_ON(ret);
12660+ }
12661+
12662+ spin_unlock_irq(&port_user_lock);
12663+
12664+ kfree(u);
12665+
12666+ return 0;
12667+}
12668+
12669+static const struct file_operations evtchn_fops = {
12670+ .owner = THIS_MODULE,
12671+ .read = evtchn_read,
12672+ .write = evtchn_write,
12673+ .unlocked_ioctl = evtchn_ioctl,
12674+ .poll = evtchn_poll,
12675+ .fasync = evtchn_fasync,
12676+ .open = evtchn_open,
12677+ .release = evtchn_release,
12678+};
12679+
12680+static struct miscdevice evtchn_miscdev = {
12681+ .minor = MISC_DYNAMIC_MINOR,
12682+ .name = "evtchn",
12683+ .fops = &evtchn_fops,
12684+};
12685+
12686+static int __cpuinit evtchn_cpu_notify(struct notifier_block *nfb,
12687+ unsigned long action, void *hcpu)
12688+{
12689+ int hotcpu = (unsigned long)hcpu;
12690+ cpumask_t map = cpu_online_map;
12691+ int port, newcpu;
12692+ struct per_user_data *u;
12693+
12694+ switch (action) {
12695+ case CPU_DOWN_PREPARE:
12696+ cpu_clear(hotcpu, map);
12697+ spin_lock_irq(&port_user_lock);
12698+ for (port = 0; port < NR_EVENT_CHANNELS; port++) {
12699+ if ((u = port_user[port]) != NULL &&
12700+ u->bind_cpu == hotcpu &&
12701+ (newcpu = next_bind_cpu(map)) < NR_CPUS) {
12702+ rebind_evtchn_to_cpu(port, newcpu);
12703+ u->bind_cpu = newcpu;
12704+ }
12705+ }
12706+ spin_unlock_irq(&port_user_lock);
12707+ break;
12708+ default:
12709+ return NOTIFY_DONE;
12710+ }
12711+ return NOTIFY_OK;
12712+}
12713+
12714+static struct notifier_block __cpuinitdata evtchn_cpu_nfb = {
12715+ .notifier_call = evtchn_cpu_notify
12716+};
12717+
12718+static int __init evtchn_init(void)
12719+{
12720+ int err;
12721+
12722+ if (!is_running_on_xen())
12723+ return -ENODEV;
12724+
12725+ spin_lock_init(&port_user_lock);
12726+ memset(port_user, 0, sizeof(port_user));
12727+
12728+ /* Create '/dev/misc/evtchn'. */
12729+ err = misc_register(&evtchn_miscdev);
12730+ if (err != 0) {
12731+ printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
12732+ return err;
12733+ }
12734+
12735+ register_cpu_notifier(&evtchn_cpu_nfb);
12736+
12737+ printk("Event-channel device installed.\n");
12738+
12739+ return 0;
12740+}
12741+
12742+static void __exit evtchn_cleanup(void)
12743+{
12744+ misc_deregister(&evtchn_miscdev);
12745+ unregister_cpu_notifier(&evtchn_cpu_nfb);
12746+}
12747+
12748+module_init(evtchn_init);
12749+module_exit(evtchn_cleanup);
12750+
12751+MODULE_LICENSE("Dual BSD/GPL");
12752Index: head-2008-11-25/drivers/xen/fbfront/Makefile
12753===================================================================
12754--- /dev/null 1970-01-01 00:00:00.000000000 +0000
12755+++ head-2008-11-25/drivers/xen/fbfront/Makefile 2007-06-12 13:13:45.000000000 +0200
12756@@ -0,0 +1,2 @@
12757+obj-$(CONFIG_XEN_FRAMEBUFFER) := xenfb.o
12758+obj-$(CONFIG_XEN_KEYBOARD) += xenkbd.o
12759Index: head-2008-11-25/drivers/xen/fbfront/xenfb.c
12760===================================================================
12761--- /dev/null 1970-01-01 00:00:00.000000000 +0000
12762+++ head-2008-11-25/drivers/xen/fbfront/xenfb.c 2008-11-25 12:22:34.000000000 +0100
12763@@ -0,0 +1,887 @@
12764+/*
12765+ * linux/drivers/video/xenfb.c -- Xen para-virtual frame buffer device
12766+ *
12767+ * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com>
12768+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
12769+ *
12770+ * Based on linux/drivers/video/q40fb.c
12771+ *
12772+ * This file is subject to the terms and conditions of the GNU General Public
12773+ * License. See the file COPYING in the main directory of this archive for
12774+ * more details.
12775+ */
12776+
12777+/*
12778+ * TODO:
12779+ *
12780+ * Switch to grant tables when they become capable of dealing with the
12781+ * frame buffer.
12782+ */
12783+
12784+#include <linux/kernel.h>
12785+#include <linux/errno.h>
12786+#include <linux/fb.h>
12787+#include <linux/module.h>
12788+#include <linux/vmalloc.h>
12789+#include <linux/mm.h>
12790+#include <linux/mutex.h>
12791+#include <asm/hypervisor.h>
12792+#include <xen/evtchn.h>
12793+#include <xen/interface/io/fbif.h>
12794+#include <xen/interface/io/protocols.h>
12795+#include <xen/xenbus.h>
12796+#include <linux/kthread.h>
12797+
12798+struct xenfb_mapping
12799+{
12800+ struct list_head link;
12801+ struct vm_area_struct *vma;
12802+ atomic_t map_refs;
12803+ int faults;
12804+ struct xenfb_info *info;
12805+};
12806+
12807+struct xenfb_info
12808+{
12809+ struct task_struct *kthread;
12810+ wait_queue_head_t wq;
12811+
12812+ unsigned char *fb;
12813+ struct fb_info *fb_info;
12814+ struct timer_list refresh;
12815+ int dirty;
12816+ int x1, y1, x2, y2; /* dirty rectangle,
12817+ protected by dirty_lock */
12818+ spinlock_t dirty_lock;
12819+ struct mutex mm_lock;
12820+ int nr_pages;
12821+ struct page **pages;
12822+ struct list_head mappings; /* protected by mm_lock */
12823+
12824+ int irq;
12825+ struct xenfb_page *page;
12826+ unsigned long *mfns;
12827+ int update_wanted; /* XENFB_TYPE_UPDATE wanted */
12828+ int feature_resize; /* Backend has resize feature */
12829+ struct xenfb_resize resize;
12830+ int resize_dpy;
12831+ spinlock_t resize_lock;
12832+
12833+ struct xenbus_device *xbdev;
12834+};
12835+
12836+/*
12837+ * There are three locks:
12838+ * spinlock resize_lock protecting resize_dpy and resize
12839+ * spinlock dirty_lock protecting the dirty rectangle
12840+ * mutex mm_lock protecting mappings.
12841+ *
12842+ * How the dirty and mapping locks work together
12843+ *
12844+ * The problem is that dirty rectangle and mappings aren't
12845+ * independent: the dirty rectangle must cover all faulted pages in
12846+ * mappings. We need to prove that our locking maintains this
12847+ * invariant.
12848+ *
12849+ * There are several kinds of critical regions:
12850+ *
12851+ * 1. Holding only dirty_lock: xenfb_refresh(). May run in
12852+ * interrupts. Extends the dirty rectangle. Trivially preserves
12853+ * invariant.
12854+ *
12855+ * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close(). Touch
12856+ * only mappings. The former creates unfaulted pages. Preserves
12857+ * invariant. The latter removes pages. Preserves invariant.
12858+ *
12859+ * 3. Holding both locks: xenfb_vm_nopage(). Extends the dirty
12860+ * rectangle and updates mappings consistently. Preserves
12861+ * invariant.
12862+ *
12863+ * 4. The ugliest one: xenfb_update_screen(). Clear the dirty
12864+ * rectangle and update mappings consistently.
12865+ *
12866+ * We can't simply hold both locks, because zap_page_range() cannot
12867+ * be called with a spinlock held.
12868+ *
12869+ * Therefore, we first clear the dirty rectangle with both locks
12870+ * held. Then we unlock dirty_lock and update the mappings.
12871+ * Critical regions that hold only dirty_lock may interfere with
12872+ * that. This can only be region 1: xenfb_refresh(). But that
12873+ * just extends the dirty rectangle, which can't harm the
12874+ * invariant.
12875+ *
12876+ * But FIXME: the invariant is too weak. It misses that the fault
12877+ * record in mappings must be consistent with the mapping of pages in
12878+ * the associated address space! do_no_page() updates the PTE after
12879+ * xenfb_vm_nopage() returns, i.e. outside the critical region. This
12880+ * allows the following race:
12881+ *
12882+ * X writes to some address in the Xen frame buffer
12883+ * Fault - call do_no_page()
12884+ * call xenfb_vm_nopage()
12885+ * grab mm_lock
12886+ * map->faults++;
12887+ * release mm_lock
12888+ * return back to do_no_page()
12889+ * (preempted, or SMP)
12890+ * Xen worker thread runs.
12891+ * grab mm_lock
12892+ * look at mappings
12893+ * find this mapping, zaps its pages (but page not in pte yet)
12894+ * clear map->faults
12895+ * releases mm_lock
12896+ * (back to X process)
12897+ * put page in X's pte
12898+ *
12899+ * Oh well, we wont be updating the writes to this page anytime soon.
12900+ */
12901+#define MB_ (1024*1024)
12902+#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8)
12903+
12904+enum {KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT};
12905+static int video[KPARAM_CNT] = {2, XENFB_WIDTH, XENFB_HEIGHT};
12906+module_param_array(video, int, NULL, 0);
12907+MODULE_PARM_DESC(video,
12908+ "Size of video memory in MB and width,height in pixels, default = (2,800,600)");
12909+
12910+static int xenfb_fps = 20;
12911+
12912+static int xenfb_remove(struct xenbus_device *);
12913+static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
12914+static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
12915+static void xenfb_disconnect_backend(struct xenfb_info *);
12916+
12917+static void xenfb_send_event(struct xenfb_info *info,
12918+ union xenfb_out_event *event)
12919+{
12920+ __u32 prod;
12921+
12922+ prod = info->page->out_prod;
12923+ /* caller ensures !xenfb_queue_full() */
12924+ mb(); /* ensure ring space available */
12925+ XENFB_OUT_RING_REF(info->page, prod) = *event;
12926+ wmb(); /* ensure ring contents visible */
12927+ info->page->out_prod = prod + 1;
12928+
12929+ notify_remote_via_irq(info->irq);
12930+}
12931+
12932+static void xenfb_do_update(struct xenfb_info *info,
12933+ int x, int y, int w, int h)
12934+{
12935+ union xenfb_out_event event;
12936+
12937+ memset(&event, 0, sizeof(event));
12938+ event.type = XENFB_TYPE_UPDATE;
12939+ event.update.x = x;
12940+ event.update.y = y;
12941+ event.update.width = w;
12942+ event.update.height = h;
12943+
12944+ /* caller ensures !xenfb_queue_full() */
12945+ xenfb_send_event(info, &event);
12946+}
12947+
12948+static void xenfb_do_resize(struct xenfb_info *info)
12949+{
12950+ union xenfb_out_event event;
12951+
12952+ memset(&event, 0, sizeof(event));
12953+ event.resize = info->resize;
12954+
12955+ /* caller ensures !xenfb_queue_full() */
12956+ xenfb_send_event(info, &event);
12957+}
12958+
12959+static int xenfb_queue_full(struct xenfb_info *info)
12960+{
12961+ __u32 cons, prod;
12962+
12963+ prod = info->page->out_prod;
12964+ cons = info->page->out_cons;
12965+ return prod - cons == XENFB_OUT_RING_LEN;
12966+}
12967+
12968+static void xenfb_update_screen(struct xenfb_info *info)
12969+{
12970+ unsigned long flags;
12971+ int y1, y2, x1, x2;
12972+ struct xenfb_mapping *map;
12973+
12974+ if (!info->update_wanted)
12975+ return;
12976+ if (xenfb_queue_full(info))
12977+ return;
12978+
12979+ mutex_lock(&info->mm_lock);
12980+
12981+ spin_lock_irqsave(&info->dirty_lock, flags);
12982+ y1 = info->y1;
12983+ y2 = info->y2;
12984+ x1 = info->x1;
12985+ x2 = info->x2;
12986+ info->x1 = info->y1 = INT_MAX;
12987+ info->x2 = info->y2 = 0;
12988+ spin_unlock_irqrestore(&info->dirty_lock, flags);
12989+
12990+ list_for_each_entry(map, &info->mappings, link) {
12991+ if (!map->faults)
12992+ continue;
12993+ zap_page_range(map->vma, map->vma->vm_start,
12994+ map->vma->vm_end - map->vma->vm_start, NULL);
12995+ map->faults = 0;
12996+ }
12997+
12998+ mutex_unlock(&info->mm_lock);
12999+
13000+ if (x2 < x1 || y2 < y1) {
13001+ printk("xenfb_update_screen bogus rect %d %d %d %d\n",
13002+ x1, x2, y1, y2);
13003+ WARN_ON(1);
13004+ }
13005+ xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1);
13006+}
13007+
13008+static void xenfb_handle_resize_dpy(struct xenfb_info *info)
13009+{
13010+ unsigned long flags;
13011+
13012+ spin_lock_irqsave(&info->resize_lock, flags);
13013+ if (info->resize_dpy) {
13014+ if (!xenfb_queue_full(info)) {
13015+ info->resize_dpy = 0;
13016+ xenfb_do_resize(info);
13017+ }
13018+ }
13019+ spin_unlock_irqrestore(&info->resize_lock, flags);
13020+}
13021+
13022+static int xenfb_thread(void *data)
13023+{
13024+ struct xenfb_info *info = data;
13025+
13026+ while (!kthread_should_stop()) {
13027+ xenfb_handle_resize_dpy(info);
13028+ if (info->dirty) {
13029+ info->dirty = 0;
13030+ xenfb_update_screen(info);
13031+ }
13032+ wait_event_interruptible(info->wq,
13033+ kthread_should_stop() || info->dirty);
13034+ try_to_freeze();
13035+ }
13036+ return 0;
13037+}
13038+
13039+static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green,
13040+ unsigned blue, unsigned transp,
13041+ struct fb_info *info)
13042+{
13043+ u32 v;
13044+
13045+ if (regno > info->cmap.len)
13046+ return 1;
13047+
13048+ red >>= (16 - info->var.red.length);
13049+ green >>= (16 - info->var.green.length);
13050+ blue >>= (16 - info->var.blue.length);
13051+
13052+ v = (red << info->var.red.offset) |
13053+ (green << info->var.green.offset) |
13054+ (blue << info->var.blue.offset);
13055+
13056+ /* FIXME is this sane? check against xxxfb_setcolreg()! */
13057+ switch (info->var.bits_per_pixel) {
13058+ case 16:
13059+ case 24:
13060+ case 32:
13061+ ((u32 *)info->pseudo_palette)[regno] = v;
13062+ break;
13063+ }
13064+
13065+ return 0;
13066+}
13067+
13068+static void xenfb_timer(unsigned long data)
13069+{
13070+ struct xenfb_info *info = (struct xenfb_info *)data;
13071+ wake_up(&info->wq);
13072+}
13073+
13074+static void __xenfb_refresh(struct xenfb_info *info,
13075+ int x1, int y1, int w, int h)
13076+{
13077+ int y2, x2;
13078+
13079+ y2 = y1 + h;
13080+ x2 = x1 + w;
13081+
13082+ if (info->y1 > y1)
13083+ info->y1 = y1;
13084+ if (info->y2 < y2)
13085+ info->y2 = y2;
13086+ if (info->x1 > x1)
13087+ info->x1 = x1;
13088+ if (info->x2 < x2)
13089+ info->x2 = x2;
13090+ info->dirty = 1;
13091+
13092+ if (timer_pending(&info->refresh))
13093+ return;
13094+
13095+ mod_timer(&info->refresh, jiffies + HZ/xenfb_fps);
13096+}
13097+
13098+static void xenfb_refresh(struct xenfb_info *info,
13099+ int x1, int y1, int w, int h)
13100+{
13101+ unsigned long flags;
13102+
13103+ spin_lock_irqsave(&info->dirty_lock, flags);
13104+ __xenfb_refresh(info, x1, y1, w, h);
13105+ spin_unlock_irqrestore(&info->dirty_lock, flags);
13106+}
13107+
13108+static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
13109+{
13110+ struct xenfb_info *info = p->par;
13111+
13112+ cfb_fillrect(p, rect);
13113+ xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height);
13114+}
13115+
13116+static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image)
13117+{
13118+ struct xenfb_info *info = p->par;
13119+
13120+ cfb_imageblit(p, image);
13121+ xenfb_refresh(info, image->dx, image->dy, image->width, image->height);
13122+}
13123+
13124+static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
13125+{
13126+ struct xenfb_info *info = p->par;
13127+
13128+ cfb_copyarea(p, area);
13129+ xenfb_refresh(info, area->dx, area->dy, area->width, area->height);
13130+}
13131+
13132+static void xenfb_vm_open(struct vm_area_struct *vma)
13133+{
13134+ struct xenfb_mapping *map = vma->vm_private_data;
13135+ atomic_inc(&map->map_refs);
13136+}
13137+
13138+static void xenfb_vm_close(struct vm_area_struct *vma)
13139+{
13140+ struct xenfb_mapping *map = vma->vm_private_data;
13141+ struct xenfb_info *info = map->info;
13142+
13143+ mutex_lock(&info->mm_lock);
13144+ if (atomic_dec_and_test(&map->map_refs)) {
13145+ list_del(&map->link);
13146+ kfree(map);
13147+ }
13148+ mutex_unlock(&info->mm_lock);
13149+}
13150+
13151+static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
13152+ unsigned long vaddr, int *type)
13153+{
13154+ struct xenfb_mapping *map = vma->vm_private_data;
13155+ struct xenfb_info *info = map->info;
13156+ int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
13157+ unsigned long flags;
13158+ struct page *page;
13159+ int y1, y2;
13160+
13161+ if (pgnr >= info->nr_pages)
13162+ return NOPAGE_SIGBUS;
13163+
13164+ mutex_lock(&info->mm_lock);
13165+ spin_lock_irqsave(&info->dirty_lock, flags);
13166+ page = info->pages[pgnr];
13167+ get_page(page);
13168+ map->faults++;
13169+
13170+ y1 = pgnr * PAGE_SIZE / info->fb_info->fix.line_length;
13171+ y2 = (pgnr * PAGE_SIZE + PAGE_SIZE - 1) / info->fb_info->fix.line_length;
13172+ if (y2 > info->fb_info->var.yres)
13173+ y2 = info->fb_info->var.yres;
13174+ __xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1);
13175+ spin_unlock_irqrestore(&info->dirty_lock, flags);
13176+ mutex_unlock(&info->mm_lock);
13177+
13178+ if (type)
13179+ *type = VM_FAULT_MINOR;
13180+
13181+ return page;
13182+}
13183+
13184+static struct vm_operations_struct xenfb_vm_ops = {
13185+ .open = xenfb_vm_open,
13186+ .close = xenfb_vm_close,
13187+ .nopage = xenfb_vm_nopage,
13188+};
13189+
13190+static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
13191+{
13192+ struct xenfb_info *info = fb_info->par;
13193+ struct xenfb_mapping *map;
13194+ int map_pages;
13195+
13196+ if (!(vma->vm_flags & VM_WRITE))
13197+ return -EINVAL;
13198+ if (!(vma->vm_flags & VM_SHARED))
13199+ return -EINVAL;
13200+ if (vma->vm_pgoff != 0)
13201+ return -EINVAL;
13202+
13203+ map_pages = (vma->vm_end - vma->vm_start + PAGE_SIZE-1) >> PAGE_SHIFT;
13204+ if (map_pages > info->nr_pages)
13205+ return -EINVAL;
13206+
13207+ map = kzalloc(sizeof(*map), GFP_KERNEL);
13208+ if (map == NULL)
13209+ return -ENOMEM;
13210+
13211+ map->vma = vma;
13212+ map->faults = 0;
13213+ map->info = info;
13214+ atomic_set(&map->map_refs, 1);
13215+
13216+ mutex_lock(&info->mm_lock);
13217+ list_add(&map->link, &info->mappings);
13218+ mutex_unlock(&info->mm_lock);
13219+
13220+ vma->vm_ops = &xenfb_vm_ops;
13221+ vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED);
13222+ vma->vm_private_data = map;
13223+
13224+ return 0;
13225+}
13226+
13227+static int
13228+xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
13229+{
13230+ struct xenfb_info *xenfb_info;
13231+ int required_mem_len;
13232+
13233+ xenfb_info = info->par;
13234+
13235+ if (!xenfb_info->feature_resize) {
13236+ if (var->xres == video[KPARAM_WIDTH] &&
13237+ var->yres == video[KPARAM_HEIGHT] &&
13238+ var->bits_per_pixel == xenfb_info->page->depth) {
13239+ return 0;
13240+ }
13241+ return -EINVAL;
13242+ }
13243+
13244+ /* Can't resize past initial width and height */
13245+ if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT])
13246+ return -EINVAL;
13247+
13248+ required_mem_len = var->xres * var->yres * (xenfb_info->page->depth / 8);
13249+ if (var->bits_per_pixel == xenfb_info->page->depth &&
13250+ var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) &&
13251+ required_mem_len <= info->fix.smem_len) {
13252+ var->xres_virtual = var->xres;
13253+ var->yres_virtual = var->yres;
13254+ return 0;
13255+ }
13256+ return -EINVAL;
13257+}
13258+
13259+static int xenfb_set_par(struct fb_info *info)
13260+{
13261+ struct xenfb_info *xenfb_info;
13262+ unsigned long flags;
13263+
13264+ xenfb_info = info->par;
13265+
13266+ spin_lock_irqsave(&xenfb_info->resize_lock, flags);
13267+ xenfb_info->resize.type = XENFB_TYPE_RESIZE;
13268+ xenfb_info->resize.width = info->var.xres;
13269+ xenfb_info->resize.height = info->var.yres;
13270+ xenfb_info->resize.stride = info->fix.line_length;
13271+ xenfb_info->resize.depth = info->var.bits_per_pixel;
13272+ xenfb_info->resize.offset = 0;
13273+ xenfb_info->resize_dpy = 1;
13274+ spin_unlock_irqrestore(&xenfb_info->resize_lock, flags);
13275+ return 0;
13276+}
13277+
13278+static struct fb_ops xenfb_fb_ops = {
13279+ .owner = THIS_MODULE,
13280+ .fb_setcolreg = xenfb_setcolreg,
13281+ .fb_fillrect = xenfb_fillrect,
13282+ .fb_copyarea = xenfb_copyarea,
13283+ .fb_imageblit = xenfb_imageblit,
13284+ .fb_mmap = xenfb_mmap,
13285+ .fb_check_var = xenfb_check_var,
13286+ .fb_set_par = xenfb_set_par,
13287+};
13288+
13289+static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
13290+ struct pt_regs *regs)
13291+{
13292+ /*
13293+ * No in events recognized, simply ignore them all.
13294+ * If you need to recognize some, see xenbkd's input_handler()
13295+ * for how to do that.
13296+ */
13297+ struct xenfb_info *info = dev_id;
13298+ struct xenfb_page *page = info->page;
13299+
13300+ if (page->in_cons != page->in_prod) {
13301+ info->page->in_cons = info->page->in_prod;
13302+ notify_remote_via_irq(info->irq);
13303+ }
13304+ return IRQ_HANDLED;
13305+}
13306+
13307+static unsigned long vmalloc_to_mfn(void *address)
13308+{
13309+ return pfn_to_mfn(vmalloc_to_pfn(address));
13310+}
13311+
13312+static int __devinit xenfb_probe(struct xenbus_device *dev,
13313+ const struct xenbus_device_id *id)
13314+{
13315+ struct xenfb_info *info;
13316+ struct fb_info *fb_info;
13317+ int fb_size;
13318+ int val;
13319+ int ret;
13320+
13321+ info = kzalloc(sizeof(*info), GFP_KERNEL);
13322+ if (info == NULL) {
13323+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
13324+ return -ENOMEM;
13325+ }
13326+
13327+ /* Limit kernel param videoram amount to what is in xenstore */
13328+ if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) {
13329+ if (val < video[KPARAM_MEM])
13330+ video[KPARAM_MEM] = val;
13331+ }
13332+
13333+ /* If requested res does not fit in available memory, use default */
13334+ fb_size = video[KPARAM_MEM] * MB_;
13335+ if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH/8 > fb_size) {
13336+ video[KPARAM_WIDTH] = XENFB_WIDTH;
13337+ video[KPARAM_HEIGHT] = XENFB_HEIGHT;
13338+ fb_size = XENFB_DEFAULT_FB_LEN;
13339+ }
13340+
13341+ dev->dev.driver_data = info;
13342+ info->xbdev = dev;
13343+ info->irq = -1;
13344+ info->x1 = info->y1 = INT_MAX;
13345+ spin_lock_init(&info->dirty_lock);
13346+ spin_lock_init(&info->resize_lock);
13347+ mutex_init(&info->mm_lock);
13348+ init_waitqueue_head(&info->wq);
13349+ init_timer(&info->refresh);
13350+ info->refresh.function = xenfb_timer;
13351+ info->refresh.data = (unsigned long)info;
13352+ INIT_LIST_HEAD(&info->mappings);
13353+
13354+ info->fb = vmalloc(fb_size);
13355+ if (info->fb == NULL)
13356+ goto error_nomem;
13357+ memset(info->fb, 0, fb_size);
13358+
13359+ info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
13360+
13361+ info->pages = kmalloc(sizeof(struct page *) * info->nr_pages,
13362+ GFP_KERNEL);
13363+ if (info->pages == NULL)
13364+ goto error_nomem;
13365+
13366+ info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
13367+ if (!info->mfns)
13368+ goto error_nomem;
13369+
13370+ /* set up shared page */
13371+ info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
13372+ if (!info->page)
13373+ goto error_nomem;
13374+
13375+ fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
13376+ /* see fishy hackery below */
13377+ if (fb_info == NULL)
13378+ goto error_nomem;
13379+
13380+ /* FIXME fishy hackery */
13381+ fb_info->pseudo_palette = fb_info->par;
13382+ fb_info->par = info;
13383+ /* /FIXME */
13384+ fb_info->screen_base = info->fb;
13385+
13386+ fb_info->fbops = &xenfb_fb_ops;
13387+ fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH];
13388+ fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT];
13389+ fb_info->var.bits_per_pixel = XENFB_DEPTH;
13390+
13391+ fb_info->var.red = (struct fb_bitfield){16, 8, 0};
13392+ fb_info->var.green = (struct fb_bitfield){8, 8, 0};
13393+ fb_info->var.blue = (struct fb_bitfield){0, 8, 0};
13394+
13395+ fb_info->var.activate = FB_ACTIVATE_NOW;
13396+ fb_info->var.height = -1;
13397+ fb_info->var.width = -1;
13398+ fb_info->var.vmode = FB_VMODE_NONINTERLACED;
13399+
13400+ fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
13401+ fb_info->fix.line_length = fb_info->var.xres * (XENFB_DEPTH / 8);
13402+ fb_info->fix.smem_start = 0;
13403+ fb_info->fix.smem_len = fb_size;
13404+ strcpy(fb_info->fix.id, "xen");
13405+ fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
13406+ fb_info->fix.accel = FB_ACCEL_NONE;
13407+
13408+ fb_info->flags = FBINFO_FLAG_DEFAULT;
13409+
13410+ ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
13411+ if (ret < 0) {
13412+ framebuffer_release(fb_info);
13413+ xenbus_dev_fatal(dev, ret, "fb_alloc_cmap");
13414+ goto error;
13415+ }
13416+
13417+ xenfb_init_shared_page(info, fb_info);
13418+
13419+ ret = register_framebuffer(fb_info);
13420+ if (ret) {
13421+ fb_dealloc_cmap(&info->fb_info->cmap);
13422+ framebuffer_release(fb_info);
13423+ xenbus_dev_fatal(dev, ret, "register_framebuffer");
13424+ goto error;
13425+ }
13426+ info->fb_info = fb_info;
13427+
13428+ ret = xenfb_connect_backend(dev, info);
13429+ if (ret < 0)
13430+ goto error;
13431+
13432+ /* FIXME should this be delayed until backend XenbusStateConnected? */
13433+ info->kthread = kthread_run(xenfb_thread, info, "xenfb thread");
13434+ if (IS_ERR(info->kthread)) {
13435+ ret = PTR_ERR(info->kthread);
13436+ info->kthread = NULL;
13437+ xenbus_dev_fatal(dev, ret, "register_framebuffer");
13438+ goto error;
13439+ }
13440+
13441+ return 0;
13442+
13443+ error_nomem:
13444+ ret = -ENOMEM;
13445+ xenbus_dev_fatal(dev, ret, "allocating device memory");
13446+ error:
13447+ xenfb_remove(dev);
13448+ return ret;
13449+}
13450+
13451+static int xenfb_resume(struct xenbus_device *dev)
13452+{
13453+ struct xenfb_info *info = dev->dev.driver_data;
13454+
13455+ xenfb_disconnect_backend(info);
13456+ xenfb_init_shared_page(info, info->fb_info);
13457+ return xenfb_connect_backend(dev, info);
13458+}
13459+
13460+static int xenfb_remove(struct xenbus_device *dev)
13461+{
13462+ struct xenfb_info *info = dev->dev.driver_data;
13463+
13464+ del_timer(&info->refresh);
13465+ if (info->kthread)
13466+ kthread_stop(info->kthread);
13467+ xenfb_disconnect_backend(info);
13468+ if (info->fb_info) {
13469+ unregister_framebuffer(info->fb_info);
13470+ fb_dealloc_cmap(&info->fb_info->cmap);
13471+ framebuffer_release(info->fb_info);
13472+ }
13473+ free_page((unsigned long)info->page);
13474+ vfree(info->mfns);
13475+ kfree(info->pages);
13476+ vfree(info->fb);
13477+ kfree(info);
13478+
13479+ return 0;
13480+}
13481+
13482+static void xenfb_init_shared_page(struct xenfb_info *info,
13483+ struct fb_info * fb_info)
13484+{
13485+ int i;
13486+ int epd = PAGE_SIZE / sizeof(info->mfns[0]);
13487+
13488+ for (i = 0; i < info->nr_pages; i++)
13489+ info->pages[i] = vmalloc_to_page(info->fb + i * PAGE_SIZE);
13490+
13491+ for (i = 0; i < info->nr_pages; i++)
13492+ info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
13493+
13494+ for (i = 0; i * epd < info->nr_pages; i++)
13495+ info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]);
13496+
13497+ info->page->width = fb_info->var.xres;
13498+ info->page->height = fb_info->var.yres;
13499+ info->page->depth = fb_info->var.bits_per_pixel;
13500+ info->page->line_length = fb_info->fix.line_length;
13501+ info->page->mem_length = fb_info->fix.smem_len;
13502+ info->page->in_cons = info->page->in_prod = 0;
13503+ info->page->out_cons = info->page->out_prod = 0;
13504+}
13505+
13506+static int xenfb_connect_backend(struct xenbus_device *dev,
13507+ struct xenfb_info *info)
13508+{
13509+ int ret;
13510+ struct xenbus_transaction xbt;
13511+
13512+ ret = bind_listening_port_to_irqhandler(
13513+ dev->otherend_id, xenfb_event_handler, 0, "xenfb", info);
13514+ if (ret < 0) {
13515+ xenbus_dev_fatal(dev, ret,
13516+ "bind_listening_port_to_irqhandler");
13517+ return ret;
13518+ }
13519+ info->irq = ret;
13520+
13521+ again:
13522+ ret = xenbus_transaction_start(&xbt);
13523+ if (ret) {
13524+ xenbus_dev_fatal(dev, ret, "starting transaction");
13525+ return ret;
13526+ }
13527+ ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
13528+ virt_to_mfn(info->page));
13529+ if (ret)
13530+ goto error_xenbus;
13531+ ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
13532+ irq_to_evtchn_port(info->irq));
13533+ if (ret)
13534+ goto error_xenbus;
13535+ ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
13536+ XEN_IO_PROTO_ABI_NATIVE);
13537+ if (ret)
13538+ goto error_xenbus;
13539+ ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1");
13540+ if (ret)
13541+ goto error_xenbus;
13542+ ret = xenbus_transaction_end(xbt, 0);
13543+ if (ret) {
13544+ if (ret == -EAGAIN)
13545+ goto again;
13546+ xenbus_dev_fatal(dev, ret, "completing transaction");
13547+ return ret;
13548+ }
13549+
13550+ xenbus_switch_state(dev, XenbusStateInitialised);
13551+ return 0;
13552+
13553+ error_xenbus:
13554+ xenbus_transaction_end(xbt, 1);
13555+ xenbus_dev_fatal(dev, ret, "writing xenstore");
13556+ return ret;
13557+}
13558+
13559+static void xenfb_disconnect_backend(struct xenfb_info *info)
13560+{
13561+ if (info->irq >= 0)
13562+ unbind_from_irqhandler(info->irq, info);
13563+ info->irq = -1;
13564+}
13565+
13566+static void xenfb_backend_changed(struct xenbus_device *dev,
13567+ enum xenbus_state backend_state)
13568+{
13569+ struct xenfb_info *info = dev->dev.driver_data;
13570+ int val;
13571+
13572+ switch (backend_state) {
13573+ case XenbusStateInitialising:
13574+ case XenbusStateInitialised:
13575+ case XenbusStateReconfiguring:
13576+ case XenbusStateReconfigured:
13577+ case XenbusStateUnknown:
13578+ case XenbusStateClosed:
13579+ break;
13580+
13581+ case XenbusStateInitWait:
13582+ InitWait:
13583+ xenbus_switch_state(dev, XenbusStateConnected);
13584+ break;
13585+
13586+ case XenbusStateConnected:
13587+ /*
13588+ * Work around xenbus race condition: If backend goes
13589+ * through InitWait to Connected fast enough, we can
13590+ * get Connected twice here.
13591+ */
13592+ if (dev->state != XenbusStateConnected)
13593+ goto InitWait; /* no InitWait seen yet, fudge it */
13594+
13595+ if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13596+ "request-update", "%d", &val) < 0)
13597+ val = 0;
13598+ if (val)
13599+ info->update_wanted = 1;
13600+
13601+ if (xenbus_scanf(XBT_NIL, dev->otherend,
13602+ "feature-resize", "%d", &val) < 0)
13603+ val = 0;
13604+ info->feature_resize = val;
13605+ break;
13606+
13607+ case XenbusStateClosing:
13608+ // FIXME is this safe in any dev->state?
13609+ xenbus_frontend_closed(dev);
13610+ break;
13611+ }
13612+}
13613+
13614+static const struct xenbus_device_id xenfb_ids[] = {
13615+ { "vfb" },
13616+ { "" }
13617+};
13618+MODULE_ALIAS("xen:vfb");
13619+
13620+static struct xenbus_driver xenfb_driver = {
13621+ .name = "vfb",
13622+ .owner = THIS_MODULE,
13623+ .ids = xenfb_ids,
13624+ .probe = xenfb_probe,
13625+ .remove = xenfb_remove,
13626+ .resume = xenfb_resume,
13627+ .otherend_changed = xenfb_backend_changed,
13628+};
13629+
13630+static int __init xenfb_init(void)
13631+{
13632+ if (!is_running_on_xen())
13633+ return -ENODEV;
13634+
13635+ /* Nothing to do if running in dom0. */
13636+ if (is_initial_xendomain())
13637+ return -ENODEV;
13638+
13639+ return xenbus_register_frontend(&xenfb_driver);
13640+}
13641+
13642+static void __exit xenfb_cleanup(void)
13643+{
13644+ return xenbus_unregister_driver(&xenfb_driver);
13645+}
13646+
13647+module_init(xenfb_init);
13648+module_exit(xenfb_cleanup);
13649+
13650+MODULE_LICENSE("GPL");
13651Index: head-2008-11-25/drivers/xen/fbfront/xenkbd.c
13652===================================================================
13653--- /dev/null 1970-01-01 00:00:00.000000000 +0000
13654+++ head-2008-11-25/drivers/xen/fbfront/xenkbd.c 2008-04-02 12:34:02.000000000 +0200
13655@@ -0,0 +1,354 @@
13656+/*
13657+ * linux/drivers/input/keyboard/xenkbd.c -- Xen para-virtual input device
13658+ *
13659+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
13660+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
13661+ *
13662+ * Based on linux/drivers/input/mouse/sermouse.c
13663+ *
13664+ * This file is subject to the terms and conditions of the GNU General Public
13665+ * License. See the file COPYING in the main directory of this archive for
13666+ * more details.
13667+ */
13668+
13669+/*
13670+ * TODO:
13671+ *
13672+ * Switch to grant tables together with xenfb.c.
13673+ */
13674+
13675+#include <linux/kernel.h>
13676+#include <linux/errno.h>
13677+#include <linux/module.h>
13678+#include <linux/input.h>
13679+#include <asm/hypervisor.h>
13680+#include <xen/evtchn.h>
13681+#include <xen/interface/io/fbif.h>
13682+#include <xen/interface/io/kbdif.h>
13683+#include <xen/xenbus.h>
13684+
13685+struct xenkbd_info
13686+{
13687+ struct input_dev *kbd;
13688+ struct input_dev *ptr;
13689+ struct xenkbd_page *page;
13690+ int irq;
13691+ struct xenbus_device *xbdev;
13692+ char phys[32];
13693+};
13694+
13695+static int xenkbd_remove(struct xenbus_device *);
13696+static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
13697+static void xenkbd_disconnect_backend(struct xenkbd_info *);
13698+
13699+/*
13700+ * Note: if you need to send out events, see xenfb_do_update() for how
13701+ * to do that.
13702+ */
13703+
13704+static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
13705+{
13706+ struct xenkbd_info *info = dev_id;
13707+ struct xenkbd_page *page = info->page;
13708+ __u32 cons, prod;
13709+
13710+ prod = page->in_prod;
13711+ if (prod == page->in_cons)
13712+ return IRQ_HANDLED;
13713+ rmb(); /* ensure we see ring contents up to prod */
13714+ for (cons = page->in_cons; cons != prod; cons++) {
13715+ union xenkbd_in_event *event;
13716+ struct input_dev *dev;
13717+ event = &XENKBD_IN_RING_REF(page, cons);
13718+
13719+ dev = info->ptr;
13720+ switch (event->type) {
13721+ case XENKBD_TYPE_MOTION:
13722+ if (event->motion.rel_z)
13723+ input_report_rel(dev, REL_WHEEL,
13724+ -event->motion.rel_z);
13725+ input_report_rel(dev, REL_X, event->motion.rel_x);
13726+ input_report_rel(dev, REL_Y, event->motion.rel_y);
13727+ break;
13728+ case XENKBD_TYPE_KEY:
13729+ dev = NULL;
13730+ if (test_bit(event->key.keycode, info->kbd->keybit))
13731+ dev = info->kbd;
13732+ if (test_bit(event->key.keycode, info->ptr->keybit))
13733+ dev = info->ptr;
13734+ if (dev)
13735+ input_report_key(dev, event->key.keycode,
13736+ event->key.pressed);
13737+ else
13738+ printk("xenkbd: unhandled keycode 0x%x\n",
13739+ event->key.keycode);
13740+ break;
13741+ case XENKBD_TYPE_POS:
13742+ if (event->pos.rel_z)
13743+ input_report_rel(dev, REL_WHEEL,
13744+ -event->pos.rel_z);
13745+ input_report_abs(dev, ABS_X, event->pos.abs_x);
13746+ input_report_abs(dev, ABS_Y, event->pos.abs_y);
13747+ break;
13748+ }
13749+ if (dev)
13750+ input_sync(dev);
13751+ }
13752+ mb(); /* ensure we got ring contents */
13753+ page->in_cons = cons;
13754+ notify_remote_via_irq(info->irq);
13755+
13756+ return IRQ_HANDLED;
13757+}
13758+
13759+int __devinit xenkbd_probe(struct xenbus_device *dev,
13760+ const struct xenbus_device_id *id)
13761+{
13762+ int ret, i;
13763+ struct xenkbd_info *info;
13764+ struct input_dev *kbd, *ptr;
13765+
13766+ info = kzalloc(sizeof(*info), GFP_KERNEL);
13767+ if (!info) {
13768+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
13769+ return -ENOMEM;
13770+ }
13771+ dev->dev.driver_data = info;
13772+ info->xbdev = dev;
13773+ snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
13774+
13775+ info->page = (void *)__get_free_page(GFP_KERNEL);
13776+ if (!info->page)
13777+ goto error_nomem;
13778+ info->page->in_cons = info->page->in_prod = 0;
13779+ info->page->out_cons = info->page->out_prod = 0;
13780+
13781+ /* keyboard */
13782+ kbd = input_allocate_device();
13783+ if (!kbd)
13784+ goto error_nomem;
13785+ kbd->name = "Xen Virtual Keyboard";
13786+ kbd->phys = info->phys;
13787+ kbd->id.bustype = BUS_PCI;
13788+ kbd->id.vendor = 0x5853;
13789+ kbd->id.product = 0xffff;
13790+ kbd->evbit[0] = BIT(EV_KEY);
13791+ for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
13792+ set_bit(i, kbd->keybit);
13793+ for (i = KEY_OK; i < KEY_MAX; i++)
13794+ set_bit(i, kbd->keybit);
13795+
13796+ ret = input_register_device(kbd);
13797+ if (ret) {
13798+ input_free_device(kbd);
13799+ xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
13800+ goto error;
13801+ }
13802+ info->kbd = kbd;
13803+
13804+ /* pointing device */
13805+ ptr = input_allocate_device();
13806+ if (!ptr)
13807+ goto error_nomem;
13808+ ptr->name = "Xen Virtual Pointer";
13809+ ptr->phys = info->phys;
13810+ ptr->id.bustype = BUS_PCI;
13811+ ptr->id.vendor = 0x5853;
13812+ ptr->id.product = 0xfffe;
13813+ ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
13814+ for (i = BTN_LEFT; i <= BTN_TASK; i++)
13815+ set_bit(i, ptr->keybit);
13816+ ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL);
13817+ input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
13818+ input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
13819+
13820+ ret = input_register_device(ptr);
13821+ if (ret) {
13822+ input_free_device(ptr);
13823+ xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
13824+ goto error;
13825+ }
13826+ info->ptr = ptr;
13827+
13828+ ret = xenkbd_connect_backend(dev, info);
13829+ if (ret < 0)
13830+ goto error;
13831+
13832+ return 0;
13833+
13834+ error_nomem:
13835+ ret = -ENOMEM;
13836+ xenbus_dev_fatal(dev, ret, "allocating device memory");
13837+ error:
13838+ xenkbd_remove(dev);
13839+ return ret;
13840+}
13841+
13842+static int xenkbd_resume(struct xenbus_device *dev)
13843+{
13844+ struct xenkbd_info *info = dev->dev.driver_data;
13845+
13846+ xenkbd_disconnect_backend(info);
13847+ info->page->in_cons = info->page->in_prod = 0;
13848+ info->page->out_cons = info->page->out_prod = 0;
13849+ return xenkbd_connect_backend(dev, info);
13850+}
13851+
13852+static int xenkbd_remove(struct xenbus_device *dev)
13853+{
13854+ struct xenkbd_info *info = dev->dev.driver_data;
13855+
13856+ xenkbd_disconnect_backend(info);
13857+ input_unregister_device(info->kbd);
13858+ input_unregister_device(info->ptr);
13859+ free_page((unsigned long)info->page);
13860+ kfree(info);
13861+ return 0;
13862+}
13863+
13864+static int xenkbd_connect_backend(struct xenbus_device *dev,
13865+ struct xenkbd_info *info)
13866+{
13867+ int ret;
13868+ struct xenbus_transaction xbt;
13869+
13870+ ret = bind_listening_port_to_irqhandler(
13871+ dev->otherend_id, input_handler, 0, "xenkbd", info);
13872+ if (ret < 0) {
13873+ xenbus_dev_fatal(dev, ret,
13874+ "bind_listening_port_to_irqhandler");
13875+ return ret;
13876+ }
13877+ info->irq = ret;
13878+
13879+ again:
13880+ ret = xenbus_transaction_start(&xbt);
13881+ if (ret) {
13882+ xenbus_dev_fatal(dev, ret, "starting transaction");
13883+ return ret;
13884+ }
13885+ ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
13886+ virt_to_mfn(info->page));
13887+ if (ret)
13888+ goto error_xenbus;
13889+ ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
13890+ irq_to_evtchn_port(info->irq));
13891+ if (ret)
13892+ goto error_xenbus;
13893+ ret = xenbus_transaction_end(xbt, 0);
13894+ if (ret) {
13895+ if (ret == -EAGAIN)
13896+ goto again;
13897+ xenbus_dev_fatal(dev, ret, "completing transaction");
13898+ return ret;
13899+ }
13900+
13901+ xenbus_switch_state(dev, XenbusStateInitialised);
13902+ return 0;
13903+
13904+ error_xenbus:
13905+ xenbus_transaction_end(xbt, 1);
13906+ xenbus_dev_fatal(dev, ret, "writing xenstore");
13907+ return ret;
13908+}
13909+
13910+static void xenkbd_disconnect_backend(struct xenkbd_info *info)
13911+{
13912+ if (info->irq >= 0)
13913+ unbind_from_irqhandler(info->irq, info);
13914+ info->irq = -1;
13915+}
13916+
13917+static void xenkbd_backend_changed(struct xenbus_device *dev,
13918+ enum xenbus_state backend_state)
13919+{
13920+ struct xenkbd_info *info = dev->dev.driver_data;
13921+ int ret, val;
13922+
13923+ switch (backend_state) {
13924+ case XenbusStateInitialising:
13925+ case XenbusStateInitialised:
13926+ case XenbusStateReconfiguring:
13927+ case XenbusStateReconfigured:
13928+ case XenbusStateUnknown:
13929+ case XenbusStateClosed:
13930+ break;
13931+
13932+ case XenbusStateInitWait:
13933+ InitWait:
13934+ ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13935+ "feature-abs-pointer", "%d", &val);
13936+ if (ret < 0)
13937+ val = 0;
13938+ if (val) {
13939+ ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
13940+ "request-abs-pointer", "1");
13941+ if (ret)
13942+ ; /* FIXME */
13943+ }
13944+ xenbus_switch_state(dev, XenbusStateConnected);
13945+ break;
13946+
13947+ case XenbusStateConnected:
13948+ /*
13949+ * Work around xenbus race condition: If backend goes
13950+ * through InitWait to Connected fast enough, we can
13951+ * get Connected twice here.
13952+ */
13953+ if (dev->state != XenbusStateConnected)
13954+ goto InitWait; /* no InitWait seen yet, fudge it */
13955+
13956+ /* Set input abs params to match backend screen res */
13957+ if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13958+ "width", "%d", &val) > 0 )
13959+ input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0);
13960+
13961+ if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13962+ "height", "%d", &val) > 0 )
13963+ input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0);
13964+
13965+ break;
13966+
13967+ case XenbusStateClosing:
13968+ xenbus_frontend_closed(dev);
13969+ break;
13970+ }
13971+}
13972+
13973+static const struct xenbus_device_id xenkbd_ids[] = {
13974+ { "vkbd" },
13975+ { "" }
13976+};
13977+MODULE_ALIAS("xen:vkbd");
13978+
13979+static struct xenbus_driver xenkbd_driver = {
13980+ .name = "vkbd",
13981+ .owner = THIS_MODULE,
13982+ .ids = xenkbd_ids,
13983+ .probe = xenkbd_probe,
13984+ .remove = xenkbd_remove,
13985+ .resume = xenkbd_resume,
13986+ .otherend_changed = xenkbd_backend_changed,
13987+};
13988+
13989+static int __init xenkbd_init(void)
13990+{
13991+ if (!is_running_on_xen())
13992+ return -ENODEV;
13993+
13994+ /* Nothing to do if running in dom0. */
13995+ if (is_initial_xendomain())
13996+ return -ENODEV;
13997+
13998+ return xenbus_register_frontend(&xenkbd_driver);
13999+}
14000+
14001+static void __exit xenkbd_cleanup(void)
14002+{
14003+ return xenbus_unregister_driver(&xenkbd_driver);
14004+}
14005+
14006+module_init(xenkbd_init);
14007+module_exit(xenkbd_cleanup);
14008+
14009+MODULE_LICENSE("GPL");
14010Index: head-2008-11-25/drivers/xen/gntdev/Makefile
14011===================================================================
14012--- /dev/null 1970-01-01 00:00:00.000000000 +0000
14013+++ head-2008-11-25/drivers/xen/gntdev/Makefile 2008-01-07 13:19:18.000000000 +0100
14014@@ -0,0 +1 @@
14015+obj-$(CONFIG_XEN_GRANT_DEV) := gntdev.o
14016Index: head-2008-11-25/drivers/xen/gntdev/gntdev.c
14017===================================================================
14018--- /dev/null 1970-01-01 00:00:00.000000000 +0000
14019+++ head-2008-11-25/drivers/xen/gntdev/gntdev.c 2008-07-21 11:00:33.000000000 +0200
14020@@ -0,0 +1,1074 @@
14021+/******************************************************************************
14022+ * gntdev.c
14023+ *
14024+ * Device for accessing (in user-space) pages that have been granted by other
14025+ * domains.
14026+ *
14027+ * Copyright (c) 2006-2007, D G Murray.
14028+ *
14029+ * This program is distributed in the hope that it will be useful,
14030+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14031+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14032+ * GNU General Public License for more details.
14033+ *
14034+ * You should have received a copy of the GNU General Public License
14035+ * along with this program; if not, write to the Free Software
14036+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14037+ */
14038+
14039+#include <asm/atomic.h>
14040+#include <linux/module.h>
14041+#include <linux/kernel.h>
14042+#include <linux/init.h>
14043+#include <linux/fs.h>
14044+#include <linux/device.h>
14045+#include <linux/mm.h>
14046+#include <linux/mman.h>
14047+#include <asm/uaccess.h>
14048+#include <asm/io.h>
14049+#include <xen/gnttab.h>
14050+#include <asm/hypervisor.h>
14051+#include <xen/balloon.h>
14052+#include <xen/evtchn.h>
14053+#include <xen/driver_util.h>
14054+
14055+#include <linux/types.h>
14056+#include <xen/public/gntdev.h>
14057+
14058+
14059+#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
14060+#define DRIVER_DESC "User-space granted page access driver"
14061+
14062+MODULE_LICENSE("GPL");
14063+MODULE_AUTHOR(DRIVER_AUTHOR);
14064+MODULE_DESCRIPTION(DRIVER_DESC);
14065+
14066+#define MAX_GRANTS_LIMIT 1024
14067+#define DEFAULT_MAX_GRANTS 128
14068+
14069+/* A slot can be in one of three states:
14070+ *
14071+ * 0. GNTDEV_SLOT_INVALID:
14072+ * This slot is not associated with a grant reference, and is therefore free
14073+ * to be overwritten by a new grant reference.
14074+ *
14075+ * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
14076+ * This slot is associated with a grant reference (via the
14077+ * IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
14078+ *
14079+ * 2. GNTDEV_SLOT_MAPPED:
14080+ * This slot is associated with a grant reference, and has been mmap()-ed.
14081+ */
14082+typedef enum gntdev_slot_state {
14083+ GNTDEV_SLOT_INVALID = 0,
14084+ GNTDEV_SLOT_NOT_YET_MAPPED,
14085+ GNTDEV_SLOT_MAPPED
14086+} gntdev_slot_state_t;
14087+
14088+#define GNTDEV_INVALID_HANDLE -1
14089+#define GNTDEV_FREE_LIST_INVALID -1
14090+/* Each opened instance of gntdev is associated with a list of grants,
14091+ * represented by an array of elements of the following type,
14092+ * gntdev_grant_info_t.
14093+ */
14094+typedef struct gntdev_grant_info {
14095+ gntdev_slot_state_t state;
14096+ union {
14097+ uint32_t free_list_index;
14098+ struct {
14099+ domid_t domid;
14100+ grant_ref_t ref;
14101+ grant_handle_t kernel_handle;
14102+ grant_handle_t user_handle;
14103+ uint64_t dev_bus_addr;
14104+ } valid;
14105+ } u;
14106+} gntdev_grant_info_t;
14107+
14108+/* Private data structure, which is stored in the file pointer for files
14109+ * associated with this device.
14110+ */
14111+typedef struct gntdev_file_private_data {
14112+
14113+ /* Array of grant information. */
14114+ gntdev_grant_info_t *grants;
14115+ uint32_t grants_size;
14116+
14117+ /* Read/write semaphore used to protect the grants array. */
14118+ struct rw_semaphore grants_sem;
14119+
14120+ /* An array of indices of free slots in the grants array.
14121+ * N.B. An entry in this list may temporarily have the value
14122+ * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
14123+ * from the list by the contiguous allocator, but the list has not yet
14124+ * been compressed. However, this is not visible across invocations of
14125+ * the device.
14126+ */
14127+ int32_t *free_list;
14128+
14129+ /* The number of free slots in the grants array. */
14130+ uint32_t free_list_size;
14131+
14132+ /* Read/write semaphore used to protect the free list. */
14133+ struct rw_semaphore free_list_sem;
14134+
14135+ /* Index of the next slot after the most recent contiguous allocation,
14136+ * for use in a next-fit allocator.
14137+ */
14138+ uint32_t next_fit_index;
14139+
14140+ /* Used to map grants into the kernel, before mapping them into user
14141+ * space.
14142+ */
14143+ struct page **foreign_pages;
14144+
14145+} gntdev_file_private_data_t;
14146+
14147+/* Module lifecycle operations. */
14148+static int __init gntdev_init(void);
14149+static void __exit gntdev_exit(void);
14150+
14151+module_init(gntdev_init);
14152+module_exit(gntdev_exit);
14153+
14154+/* File operations. */
14155+static int gntdev_open(struct inode *inode, struct file *flip);
14156+static int gntdev_release(struct inode *inode, struct file *flip);
14157+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
14158+static long gntdev_ioctl(struct file *flip,
14159+ unsigned int cmd, unsigned long arg);
14160+
14161+static const struct file_operations gntdev_fops = {
14162+ .owner = THIS_MODULE,
14163+ .open = gntdev_open,
14164+ .release = gntdev_release,
14165+ .mmap = gntdev_mmap,
14166+ .unlocked_ioctl = gntdev_ioctl
14167+};
14168+
14169+/* VM operations. */
14170+static void gntdev_vma_close(struct vm_area_struct *vma);
14171+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
14172+ pte_t *ptep, int is_fullmm);
14173+
14174+static struct vm_operations_struct gntdev_vmops = {
14175+ .close = gntdev_vma_close,
14176+ .zap_pte = gntdev_clear_pte
14177+};
14178+
14179+/* Global variables. */
14180+
14181+/* The driver major number, for use when unregistering the driver. */
14182+static int gntdev_major;
14183+
14184+#define GNTDEV_NAME "gntdev"
14185+
14186+/* Memory mapping functions
14187+ * ------------------------
14188+ *
14189+ * Every granted page is mapped into both kernel and user space, and the two
14190+ * following functions return the respective virtual addresses of these pages.
14191+ *
14192+ * When shadow paging is disabled, the granted page is mapped directly into
14193+ * user space; when it is enabled, it is mapped into the kernel and remapped
14194+ * into user space using vm_insert_page() (see gntdev_mmap(), below).
14195+ */
14196+
14197+/* Returns the virtual address (in user space) of the @page_index'th page
14198+ * in the given VM area.
14199+ */
14200+static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
14201+ int page_index)
14202+{
14203+ return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
14204+}
14205+
14206+/* Returns the virtual address (in kernel space) of the @slot_index'th page
14207+ * mapped by the gntdev instance that owns the given private data struct.
14208+ */
14209+static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
14210+ int slot_index)
14211+{
14212+ unsigned long pfn;
14213+ void *kaddr;
14214+ pfn = page_to_pfn(priv->foreign_pages[slot_index]);
14215+ kaddr = pfn_to_kaddr(pfn);
14216+ return (unsigned long) kaddr;
14217+}
14218+
14219+/* Helper functions. */
14220+
14221+/* Adds information about a grant reference to the list of grants in the file's
14222+ * private data structure. Returns non-zero on failure. On success, sets the
14223+ * value of *offset to the offset that should be mmap()-ed in order to map the
14224+ * grant reference.
14225+ */
14226+static int add_grant_reference(struct file *flip,
14227+ struct ioctl_gntdev_grant_ref *op,
14228+ uint64_t *offset)
14229+{
14230+ gntdev_file_private_data_t *private_data
14231+ = (gntdev_file_private_data_t *) flip->private_data;
14232+
14233+ uint32_t slot_index;
14234+
14235+ if (unlikely(private_data->free_list_size == 0)) {
14236+ return -ENOMEM;
14237+ }
14238+
14239+ slot_index = private_data->free_list[--private_data->free_list_size];
14240+ private_data->free_list[private_data->free_list_size]
14241+ = GNTDEV_FREE_LIST_INVALID;
14242+
14243+ /* Copy the grant information into file's private data. */
14244+ private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
14245+ private_data->grants[slot_index].u.valid.domid = op->domid;
14246+ private_data->grants[slot_index].u.valid.ref = op->ref;
14247+
14248+ /* The offset is calculated as the index of the chosen entry in the
14249+ * file's private data's array of grant information. This is then
14250+ * shifted to give an offset into the virtual "file address space".
14251+ */
14252+ *offset = slot_index << PAGE_SHIFT;
14253+
14254+ return 0;
14255+}
14256+
14257+/* Adds the @count grant references to the contiguous range in the slot array
14258+ * beginning at @first_slot. It is assumed that @first_slot was returned by a
14259+ * previous invocation of find_contiguous_free_range(), during the same
14260+ * invocation of the driver.
14261+ */
14262+static int add_grant_references(struct file *flip,
14263+ int count,
14264+ struct ioctl_gntdev_grant_ref *ops,
14265+ uint32_t first_slot)
14266+{
14267+ gntdev_file_private_data_t *private_data
14268+ = (gntdev_file_private_data_t *) flip->private_data;
14269+ int i;
14270+
14271+ for (i = 0; i < count; ++i) {
14272+
14273+ /* First, mark the slot's entry in the free list as invalid. */
14274+ int free_list_index =
14275+ private_data->grants[first_slot+i].u.free_list_index;
14276+ private_data->free_list[free_list_index] =
14277+ GNTDEV_FREE_LIST_INVALID;
14278+
14279+ /* Now, update the slot. */
14280+ private_data->grants[first_slot+i].state =
14281+ GNTDEV_SLOT_NOT_YET_MAPPED;
14282+ private_data->grants[first_slot+i].u.valid.domid =
14283+ ops[i].domid;
14284+ private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
14285+ }
14286+
14287+ return 0;
14288+}
14289+
14290+/* Scans through the free list for @flip, removing entries that are marked as
14291+ * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
14292+ * the number of valid entries.
14293+ */
14294+static void compress_free_list(struct file *flip)
14295+{
14296+ gntdev_file_private_data_t *private_data
14297+ = (gntdev_file_private_data_t *) flip->private_data;
14298+ int i, j = 0, old_size, slot_index;
14299+
14300+ old_size = private_data->free_list_size;
14301+ for (i = 0; i < old_size; ++i) {
14302+ if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
14303+ if (i > j) {
14304+ slot_index = private_data->free_list[i];
14305+ private_data->free_list[j] = slot_index;
14306+ private_data->grants[slot_index].u
14307+ .free_list_index = j;
14308+ private_data->free_list[i]
14309+ = GNTDEV_FREE_LIST_INVALID;
14310+ }
14311+ ++j;
14312+ } else {
14313+ --private_data->free_list_size;
14314+ }
14315+ }
14316+}
14317+
14318+/* Searches the grant array in the private data of @flip for a range of
14319+ * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
14320+ *
14321+ * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
14322+ */
14323+static int find_contiguous_free_range(struct file *flip,
14324+ uint32_t num_slots)
14325+{
14326+ gntdev_file_private_data_t *private_data
14327+ = (gntdev_file_private_data_t *) flip->private_data;
14328+
14329+ int i;
14330+ int start_index = private_data->next_fit_index;
14331+ int range_start = 0, range_length;
14332+
14333+ if (private_data->free_list_size < num_slots) {
14334+ return -ENOMEM;
14335+ }
14336+
14337+ /* First search from the start_index to the end of the array. */
14338+ range_length = 0;
14339+ for (i = start_index; i < private_data->grants_size; ++i) {
14340+ if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
14341+ if (range_length == 0) {
14342+ range_start = i;
14343+ }
14344+ ++range_length;
14345+ if (range_length == num_slots) {
14346+ return range_start;
14347+ }
14348+ }
14349+ }
14350+
14351+ /* Now search from the start of the array to the start_index. */
14352+ range_length = 0;
14353+ for (i = 0; i < start_index; ++i) {
14354+ if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
14355+ if (range_length == 0) {
14356+ range_start = i;
14357+ }
14358+ ++range_length;
14359+ if (range_length == num_slots) {
14360+ return range_start;
14361+ }
14362+ }
14363+ }
14364+
14365+ return -ENOMEM;
14366+}
14367+
14368+static int init_private_data(gntdev_file_private_data_t *priv,
14369+ uint32_t max_grants)
14370+{
14371+ int i;
14372+
14373+ /* Allocate space for the kernel-mapping of granted pages. */
14374+ priv->foreign_pages =
14375+ alloc_empty_pages_and_pagevec(max_grants);
14376+ if (!priv->foreign_pages)
14377+ goto nomem_out;
14378+
14379+ /* Allocate the grant list and free-list. */
14380+ priv->grants = kmalloc(max_grants * sizeof(gntdev_grant_info_t),
14381+ GFP_KERNEL);
14382+ if (!priv->grants)
14383+ goto nomem_out2;
14384+ priv->free_list = kmalloc(max_grants * sizeof(int32_t), GFP_KERNEL);
14385+ if (!priv->free_list)
14386+ goto nomem_out3;
14387+
14388+ /* Initialise the free-list, which contains all slots at first. */
14389+ for (i = 0; i < max_grants; ++i) {
14390+ priv->free_list[max_grants - i - 1] = i;
14391+ priv->grants[i].state = GNTDEV_SLOT_INVALID;
14392+ priv->grants[i].u.free_list_index = max_grants - i - 1;
14393+ }
14394+ priv->grants_size = max_grants;
14395+ priv->free_list_size = max_grants;
14396+ priv->next_fit_index = 0;
14397+
14398+ return 0;
14399+
14400+nomem_out3:
14401+ kfree(priv->grants);
14402+nomem_out2:
14403+ free_empty_pages_and_pagevec(priv->foreign_pages, max_grants);
14404+nomem_out:
14405+ return -ENOMEM;
14406+
14407+}
14408+
14409+/* Interface functions. */
14410+
14411+/* Initialises the driver. Called when the module is loaded. */
14412+static int __init gntdev_init(void)
14413+{
14414+ struct class *class;
14415+ struct class_device *device;
14416+
14417+ if (!is_running_on_xen()) {
14418+ printk(KERN_ERR "You must be running Xen to use gntdev\n");
14419+ return -ENODEV;
14420+ }
14421+
14422+ gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
14423+ if (gntdev_major < 0)
14424+ {
14425+ printk(KERN_ERR "Could not register gntdev device\n");
14426+ return -ENOMEM;
14427+ }
14428+
14429+ /* Note that if the sysfs code fails, we will still initialise the
14430+ * device, and output the major number so that the device can be
14431+ * created manually using mknod.
14432+ */
14433+ if ((class = get_xen_class()) == NULL) {
14434+ printk(KERN_ERR "Error setting up xen_class\n");
14435+ printk(KERN_ERR "gntdev created with major number = %d\n",
14436+ gntdev_major);
14437+ return 0;
14438+ }
14439+
14440+ device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
14441+ NULL, GNTDEV_NAME);
14442+ if (IS_ERR(device)) {
14443+ printk(KERN_ERR "Error creating gntdev device in xen_class\n");
14444+ printk(KERN_ERR "gntdev created with major number = %d\n",
14445+ gntdev_major);
14446+ return 0;
14447+ }
14448+
14449+ return 0;
14450+}
14451+
14452+/* Cleans up and unregisters the driver. Called when the driver is unloaded.
14453+ */
14454+static void __exit gntdev_exit(void)
14455+{
14456+ struct class *class;
14457+ if ((class = get_xen_class()) != NULL)
14458+ class_device_destroy(class, MKDEV(gntdev_major, 0));
14459+ unregister_chrdev(gntdev_major, GNTDEV_NAME);
14460+}
14461+
14462+/* Called when the device is opened. */
14463+static int gntdev_open(struct inode *inode, struct file *flip)
14464+{
14465+ gntdev_file_private_data_t *private_data;
14466+
14467+ try_module_get(THIS_MODULE);
14468+
14469+ /* Allocate space for the per-instance private data. */
14470+ private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
14471+ if (!private_data)
14472+ goto nomem_out;
14473+
14474+ /* These will be lazily initialised by init_private_data. */
14475+ private_data->grants = NULL;
14476+ private_data->free_list = NULL;
14477+ private_data->foreign_pages = NULL;
14478+
14479+ init_rwsem(&private_data->grants_sem);
14480+ init_rwsem(&private_data->free_list_sem);
14481+
14482+ flip->private_data = private_data;
14483+
14484+ return 0;
14485+
14486+nomem_out:
14487+ return -ENOMEM;
14488+}
14489+
14490+/* Called when the device is closed.
14491+ */
14492+static int gntdev_release(struct inode *inode, struct file *flip)
14493+{
14494+ if (flip->private_data) {
14495+ gntdev_file_private_data_t *private_data =
14496+ (gntdev_file_private_data_t *) flip->private_data;
14497+ if (private_data->foreign_pages)
14498+ free_empty_pages_and_pagevec
14499+ (private_data->foreign_pages,
14500+ private_data->grants_size);
14501+ if (private_data->grants)
14502+ kfree(private_data->grants);
14503+ if (private_data->free_list)
14504+ kfree(private_data->free_list);
14505+ kfree(private_data);
14506+ }
14507+ module_put(THIS_MODULE);
14508+ return 0;
14509+}
14510+
14511+/* Called when an attempt is made to mmap() the device. The private data from
14512+ * @flip contains the list of grant references that can be mapped. The vm_pgoff
14513+ * field of @vma contains the index into that list that refers to the grant
14514+ * reference that will be mapped. Only mappings that are a multiple of
14515+ * PAGE_SIZE are handled.
14516+ */
14517+static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma)
14518+{
14519+ struct gnttab_map_grant_ref op;
14520+ unsigned long slot_index = vma->vm_pgoff;
14521+ unsigned long kernel_vaddr, user_vaddr;
14522+ uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
14523+ uint64_t ptep;
14524+ int ret;
14525+ int flags;
14526+ int i;
14527+ struct page *page;
14528+ gntdev_file_private_data_t *private_data = flip->private_data;
14529+
14530+ if (unlikely(!private_data)) {
14531+ printk(KERN_ERR "File's private data is NULL.\n");
14532+ return -EINVAL;
14533+ }
14534+
14535+ /* Test to make sure that the grants array has been initialised. */
14536+ down_read(&private_data->grants_sem);
14537+ if (unlikely(!private_data->grants)) {
14538+ up_read(&private_data->grants_sem);
14539+ printk(KERN_ERR "Attempted to mmap before ioctl.\n");
14540+ return -EINVAL;
14541+ }
14542+ up_read(&private_data->grants_sem);
14543+
14544+ if (unlikely((size <= 0) ||
14545+ (size + slot_index) > private_data->grants_size)) {
14546+ printk(KERN_ERR "Invalid number of pages or offset"
14547+ "(num_pages = %d, first_slot = %ld).\n",
14548+ size, slot_index);
14549+ return -ENXIO;
14550+ }
14551+
14552+ if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
14553+ printk(KERN_ERR "Writable mappings must be shared.\n");
14554+ return -EINVAL;
14555+ }
14556+
14557+ /* Slots must be in the NOT_YET_MAPPED state. */
14558+ down_write(&private_data->grants_sem);
14559+ for (i = 0; i < size; ++i) {
14560+ if (private_data->grants[slot_index + i].state !=
14561+ GNTDEV_SLOT_NOT_YET_MAPPED) {
14562+ printk(KERN_ERR "Slot (index = %ld) is in the wrong "
14563+ "state (%d).\n", slot_index + i,
14564+ private_data->grants[slot_index + i].state);
14565+ up_write(&private_data->grants_sem);
14566+ return -EINVAL;
14567+ }
14568+ }
14569+
14570+ /* Install the hook for unmapping. */
14571+ vma->vm_ops = &gntdev_vmops;
14572+
14573+ /* The VM area contains pages from another VM. */
14574+ vma->vm_flags |= VM_FOREIGN;
14575+ vma->vm_private_data = kzalloc(size * sizeof(struct page *),
14576+ GFP_KERNEL);
14577+ if (vma->vm_private_data == NULL) {
14578+ printk(KERN_ERR "Couldn't allocate mapping structure for VM "
14579+ "area.\n");
14580+ return -ENOMEM;
14581+ }
14582+
14583+ /* This flag prevents Bad PTE errors when the memory is unmapped. */
14584+ vma->vm_flags |= VM_RESERVED;
14585+
14586+ /* This flag prevents this VM area being copied on a fork(). A better
14587+ * behaviour might be to explicitly carry out the appropriate mappings
14588+ * on fork(), but I don't know if there's a hook for this.
14589+ */
14590+ vma->vm_flags |= VM_DONTCOPY;
14591+
14592+#ifdef CONFIG_X86
14593+ /* This flag ensures that the page tables are not unpinned before the
14594+ * VM area is unmapped. Therefore Xen still recognises the PTE as
14595+ * belonging to an L1 pagetable, and the grant unmap operation will
14596+ * succeed, even if the process does not exit cleanly.
14597+ */
14598+ vma->vm_mm->context.has_foreign_mappings = 1;
14599+#endif
14600+
14601+ for (i = 0; i < size; ++i) {
14602+
14603+ flags = GNTMAP_host_map;
14604+ if (!(vma->vm_flags & VM_WRITE))
14605+ flags |= GNTMAP_readonly;
14606+
14607+ kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
14608+ user_vaddr = get_user_vaddr(vma, i);
14609+ page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
14610+
14611+ gnttab_set_map_op(&op, kernel_vaddr, flags,
14612+ private_data->grants[slot_index+i]
14613+ .u.valid.ref,
14614+ private_data->grants[slot_index+i]
14615+ .u.valid.domid);
14616+
14617+ /* Carry out the mapping of the grant reference. */
14618+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
14619+ &op, 1);
14620+ BUG_ON(ret);
14621+ if (op.status) {
14622+ printk(KERN_ERR "Error mapping the grant reference "
14623+ "into the kernel (%d). domid = %d; ref = %d\n",
14624+ op.status,
14625+ private_data->grants[slot_index+i]
14626+ .u.valid.domid,
14627+ private_data->grants[slot_index+i]
14628+ .u.valid.ref);
14629+ goto undo_map_out;
14630+ }
14631+
14632+ /* Store a reference to the page that will be mapped into user
14633+ * space.
14634+ */
14635+ ((struct page **) vma->vm_private_data)[i] = page;
14636+
14637+ /* Mark mapped page as reserved. */
14638+ SetPageReserved(page);
14639+
14640+ /* Record the grant handle, for use in the unmap operation. */
14641+ private_data->grants[slot_index+i].u.valid.kernel_handle =
14642+ op.handle;
14643+ private_data->grants[slot_index+i].u.valid.dev_bus_addr =
14644+ op.dev_bus_addr;
14645+
14646+ private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
14647+ private_data->grants[slot_index+i].u.valid.user_handle =
14648+ GNTDEV_INVALID_HANDLE;
14649+
14650+ /* Now perform the mapping to user space. */
14651+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
14652+
14653+ /* NOT USING SHADOW PAGE TABLES. */
14654+ /* In this case, we map the grant(s) straight into user
14655+ * space.
14656+ */
14657+
14658+ /* Get the machine address of the PTE for the user
14659+ * page.
14660+ */
14661+ if ((ret = create_lookup_pte_addr(vma->vm_mm,
14662+ vma->vm_start
14663+ + (i << PAGE_SHIFT),
14664+ &ptep)))
14665+ {
14666+ printk(KERN_ERR "Error obtaining PTE pointer "
14667+ "(%d).\n", ret);
14668+ goto undo_map_out;
14669+ }
14670+
14671+ /* Configure the map operation. */
14672+
14673+ /* The reference is to be used by host CPUs. */
14674+ flags = GNTMAP_host_map;
14675+
14676+ /* Specifies a user space mapping. */
14677+ flags |= GNTMAP_application_map;
14678+
14679+ /* The map request contains the machine address of the
14680+ * PTE to update.
14681+ */
14682+ flags |= GNTMAP_contains_pte;
14683+
14684+ if (!(vma->vm_flags & VM_WRITE))
14685+ flags |= GNTMAP_readonly;
14686+
14687+ gnttab_set_map_op(&op, ptep, flags,
14688+ private_data->grants[slot_index+i]
14689+ .u.valid.ref,
14690+ private_data->grants[slot_index+i]
14691+ .u.valid.domid);
14692+
14693+ /* Carry out the mapping of the grant reference. */
14694+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
14695+ &op, 1);
14696+ BUG_ON(ret);
14697+ if (op.status) {
14698+ printk(KERN_ERR "Error mapping the grant "
14699+ "reference into user space (%d). domid "
14700+ "= %d; ref = %d\n", op.status,
14701+ private_data->grants[slot_index+i].u
14702+ .valid.domid,
14703+ private_data->grants[slot_index+i].u
14704+ .valid.ref);
14705+ goto undo_map_out;
14706+ }
14707+
14708+ /* Record the grant handle, for use in the unmap
14709+ * operation.
14710+ */
14711+ private_data->grants[slot_index+i].u.
14712+ valid.user_handle = op.handle;
14713+
14714+ /* Update p2m structure with the new mapping. */
14715+ set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
14716+ FOREIGN_FRAME(private_data->
14717+ grants[slot_index+i]
14718+ .u.valid.dev_bus_addr
14719+ >> PAGE_SHIFT));
14720+ } else {
14721+ /* USING SHADOW PAGE TABLES. */
14722+ /* In this case, we simply insert the page into the VM
14723+ * area. */
14724+ ret = vm_insert_page(vma, user_vaddr, page);
14725+ }
14726+
14727+ }
14728+
14729+ up_write(&private_data->grants_sem);
14730+ return 0;
14731+
14732+undo_map_out:
14733+ /* If we have a mapping failure, the unmapping will be taken care of
14734+ * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
14735+ * All we need to do here is free the vma_private_data.
14736+ */
14737+ kfree(vma->vm_private_data);
14738+
14739+ /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
14740+ * to NULL on failure. However, we need this in gntdev_clear_pte() to
14741+ * unmap the grants. Therefore, we smuggle a reference to the file's
14742+ * private data in the VM area's private data pointer.
14743+ */
14744+ vma->vm_private_data = private_data;
14745+
14746+ up_write(&private_data->grants_sem);
14747+
14748+ return -ENOMEM;
14749+}
14750+
14751+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
14752+ pte_t *ptep, int is_fullmm)
14753+{
14754+ int slot_index, ret;
14755+ pte_t copy;
14756+ struct gnttab_unmap_grant_ref op;
14757+ gntdev_file_private_data_t *private_data;
14758+
14759+ /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
14760+ * to NULL on failure. However, we need this in gntdev_clear_pte() to
14761+ * unmap the grants. Therefore, we smuggle a reference to the file's
14762+ * private data in the VM area's private data pointer.
14763+ */
14764+ if (vma->vm_file) {
14765+ private_data = (gntdev_file_private_data_t *)
14766+ vma->vm_file->private_data;
14767+ } else if (vma->vm_private_data) {
14768+ private_data = (gntdev_file_private_data_t *)
14769+ vma->vm_private_data;
14770+ } else {
14771+ private_data = NULL; /* gcc warning */
14772+ BUG();
14773+ }
14774+
14775+ /* Copy the existing value of the PTE for returning. */
14776+ copy = *ptep;
14777+
14778+ /* Calculate the grant relating to this PTE. */
14779+ slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
14780+
14781+ /* Only unmap grants if the slot has been mapped. This could be being
14782+ * called from a failing mmap().
14783+ */
14784+ if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
14785+
14786+ /* First, we clear the user space mapping, if it has been made.
14787+ */
14788+ if (private_data->grants[slot_index].u.valid.user_handle !=
14789+ GNTDEV_INVALID_HANDLE &&
14790+ !xen_feature(XENFEAT_auto_translated_physmap)) {
14791+ /* NOT USING SHADOW PAGE TABLES. */
14792+ gnttab_set_unmap_op(&op, virt_to_machine(ptep),
14793+ GNTMAP_contains_pte,
14794+ private_data->grants[slot_index]
14795+ .u.valid.user_handle);
14796+ ret = HYPERVISOR_grant_table_op(
14797+ GNTTABOP_unmap_grant_ref, &op, 1);
14798+ BUG_ON(ret);
14799+ if (op.status)
14800+ printk("User unmap grant status = %d\n",
14801+ op.status);
14802+ } else {
14803+ /* USING SHADOW PAGE TABLES. */
14804+ pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
14805+ }
14806+
14807+ /* Finally, we unmap the grant from kernel space. */
14808+ gnttab_set_unmap_op(&op,
14809+ get_kernel_vaddr(private_data, slot_index),
14810+ GNTMAP_host_map,
14811+ private_data->grants[slot_index].u.valid
14812+ .kernel_handle);
14813+ ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
14814+ &op, 1);
14815+ BUG_ON(ret);
14816+ if (op.status)
14817+ printk("Kernel unmap grant status = %d\n", op.status);
14818+
14819+
14820+ /* Return slot to the not-yet-mapped state, so that it may be
14821+ * mapped again, or removed by a subsequent ioctl.
14822+ */
14823+ private_data->grants[slot_index].state =
14824+ GNTDEV_SLOT_NOT_YET_MAPPED;
14825+
14826+ /* Invalidate the physical to machine mapping for this page. */
14827+ set_phys_to_machine(__pa(get_kernel_vaddr(private_data,
14828+ slot_index))
14829+ >> PAGE_SHIFT, INVALID_P2M_ENTRY);
14830+
14831+ } else {
14832+ pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
14833+ }
14834+
14835+ return copy;
14836+}
14837+
14838+/* "Destructor" for a VM area.
14839+ */
14840+static void gntdev_vma_close(struct vm_area_struct *vma) {
14841+ if (vma->vm_private_data) {
14842+ kfree(vma->vm_private_data);
14843+ }
14844+}
14845+
14846+/* Called when an ioctl is made on the device.
14847+ */
14848+static long gntdev_ioctl(struct file *flip,
14849+ unsigned int cmd, unsigned long arg)
14850+{
14851+ int rc = 0;
14852+ gntdev_file_private_data_t *private_data =
14853+ (gntdev_file_private_data_t *) flip->private_data;
14854+
14855+ /* On the first invocation, we will lazily initialise the grant array
14856+ * and free-list.
14857+ */
14858+ if (unlikely(!private_data->grants)
14859+ && likely(cmd != IOCTL_GNTDEV_SET_MAX_GRANTS)) {
14860+ down_write(&private_data->grants_sem);
14861+
14862+ if (unlikely(private_data->grants)) {
14863+ up_write(&private_data->grants_sem);
14864+ goto private_data_initialised;
14865+ }
14866+
14867+ /* Just use the default. Setting to a non-default is handled
14868+ * in the ioctl switch.
14869+ */
14870+ rc = init_private_data(private_data, DEFAULT_MAX_GRANTS);
14871+
14872+ up_write(&private_data->grants_sem);
14873+
14874+ if (rc) {
14875+ printk (KERN_ERR "Initialising gntdev private data "
14876+ "failed.\n");
14877+ return rc;
14878+ }
14879+ }
14880+
14881+private_data_initialised:
14882+ switch (cmd) {
14883+ case IOCTL_GNTDEV_MAP_GRANT_REF:
14884+ {
14885+ struct ioctl_gntdev_map_grant_ref op;
14886+ down_write(&private_data->grants_sem);
14887+ down_write(&private_data->free_list_sem);
14888+
14889+ if ((rc = copy_from_user(&op, (void __user *) arg,
14890+ sizeof(op)))) {
14891+ rc = -EFAULT;
14892+ goto map_out;
14893+ }
14894+ if (unlikely(op.count <= 0)) {
14895+ rc = -EINVAL;
14896+ goto map_out;
14897+ }
14898+
14899+ if (op.count == 1) {
14900+ if ((rc = add_grant_reference(flip, &op.refs[0],
14901+ &op.index)) < 0) {
14902+ printk(KERN_ERR "Adding grant reference "
14903+ "failed (%d).\n", rc);
14904+ goto map_out;
14905+ }
14906+ } else {
14907+ struct ioctl_gntdev_grant_ref *refs, *u;
14908+ refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
14909+ if (!refs) {
14910+ rc = -ENOMEM;
14911+ goto map_out;
14912+ }
14913+ u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
14914+ if ((rc = copy_from_user(refs,
14915+ (void __user *)u,
14916+ sizeof(*refs) * op.count))) {
14917+ printk(KERN_ERR "Copying refs from user failed"
14918+ " (%d).\n", rc);
14919+ rc = -EINVAL;
14920+ goto map_out;
14921+ }
14922+ if ((rc = find_contiguous_free_range(flip, op.count))
14923+ < 0) {
14924+ printk(KERN_ERR "Finding contiguous range "
14925+ "failed (%d).\n", rc);
14926+ kfree(refs);
14927+ goto map_out;
14928+ }
14929+ op.index = rc << PAGE_SHIFT;
14930+ if ((rc = add_grant_references(flip, op.count,
14931+ refs, rc))) {
14932+ printk(KERN_ERR "Adding grant references "
14933+ "failed (%d).\n", rc);
14934+ kfree(refs);
14935+ goto map_out;
14936+ }
14937+ compress_free_list(flip);
14938+ kfree(refs);
14939+ }
14940+ if ((rc = copy_to_user((void __user *) arg,
14941+ &op,
14942+ sizeof(op)))) {
14943+ printk(KERN_ERR "Copying result back to user failed "
14944+ "(%d)\n", rc);
14945+ rc = -EFAULT;
14946+ goto map_out;
14947+ }
14948+ map_out:
14949+ up_write(&private_data->grants_sem);
14950+ up_write(&private_data->free_list_sem);
14951+ return rc;
14952+ }
14953+ case IOCTL_GNTDEV_UNMAP_GRANT_REF:
14954+ {
14955+ struct ioctl_gntdev_unmap_grant_ref op;
14956+ int i, start_index;
14957+
14958+ down_write(&private_data->grants_sem);
14959+ down_write(&private_data->free_list_sem);
14960+
14961+ if ((rc = copy_from_user(&op,
14962+ (void __user *) arg,
14963+ sizeof(op)))) {
14964+ rc = -EFAULT;
14965+ goto unmap_out;
14966+ }
14967+
14968+ start_index = op.index >> PAGE_SHIFT;
14969+
14970+ /* First, check that all pages are in the NOT_YET_MAPPED
14971+ * state.
14972+ */
14973+ for (i = 0; i < op.count; ++i) {
14974+ if (unlikely
14975+ (private_data->grants[start_index + i].state
14976+ != GNTDEV_SLOT_NOT_YET_MAPPED)) {
14977+ if (private_data->grants[start_index + i].state
14978+ == GNTDEV_SLOT_INVALID) {
14979+ printk(KERN_ERR
14980+ "Tried to remove an invalid "
14981+ "grant at offset 0x%x.",
14982+ (start_index + i)
14983+ << PAGE_SHIFT);
14984+ rc = -EINVAL;
14985+ } else {
14986+ printk(KERN_ERR
14987+ "Tried to remove a grant which "
14988+ "is currently mmap()-ed at "
14989+ "offset 0x%x.",
14990+ (start_index + i)
14991+ << PAGE_SHIFT);
14992+ rc = -EBUSY;
14993+ }
14994+ goto unmap_out;
14995+ }
14996+ }
14997+
14998+ /* Unmap pages and add them to the free list.
14999+ */
15000+ for (i = 0; i < op.count; ++i) {
15001+ private_data->grants[start_index+i].state =
15002+ GNTDEV_SLOT_INVALID;
15003+ private_data->grants[start_index+i].u.free_list_index =
15004+ private_data->free_list_size;
15005+ private_data->free_list[private_data->free_list_size] =
15006+ start_index + i;
15007+ ++private_data->free_list_size;
15008+ }
15009+
15010+ unmap_out:
15011+ up_write(&private_data->grants_sem);
15012+ up_write(&private_data->free_list_sem);
15013+ return rc;
15014+ }
15015+ case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
15016+ {
15017+ struct ioctl_gntdev_get_offset_for_vaddr op;
15018+ struct vm_area_struct *vma;
15019+ unsigned long vaddr;
15020+
15021+ if ((rc = copy_from_user(&op,
15022+ (void __user *) arg,
15023+ sizeof(op)))) {
15024+ rc = -EFAULT;
15025+ goto get_offset_out;
15026+ }
15027+ vaddr = (unsigned long)op.vaddr;
15028+
15029+ down_read(&current->mm->mmap_sem);
15030+ vma = find_vma(current->mm, vaddr);
15031+ if (vma == NULL) {
15032+ rc = -EFAULT;
15033+ goto get_offset_unlock_out;
15034+ }
15035+ if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
15036+ printk(KERN_ERR "The vaddr specified does not belong "
15037+ "to a gntdev instance: %#lx\n", vaddr);
15038+ rc = -EFAULT;
15039+ goto get_offset_unlock_out;
15040+ }
15041+ if (vma->vm_start != vaddr) {
15042+ printk(KERN_ERR "The vaddr specified in an "
15043+ "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
15044+ "the start of the VM area. vma->vm_start = "
15045+ "%#lx; vaddr = %#lx\n",
15046+ vma->vm_start, vaddr);
15047+ rc = -EFAULT;
15048+ goto get_offset_unlock_out;
15049+ }
15050+ op.offset = vma->vm_pgoff << PAGE_SHIFT;
15051+ op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
15052+ up_read(&current->mm->mmap_sem);
15053+ if ((rc = copy_to_user((void __user *) arg,
15054+ &op,
15055+ sizeof(op)))) {
15056+ rc = -EFAULT;
15057+ goto get_offset_out;
15058+ }
15059+ goto get_offset_out;
15060+ get_offset_unlock_out:
15061+ up_read(&current->mm->mmap_sem);
15062+ get_offset_out:
15063+ return rc;
15064+ }
15065+ case IOCTL_GNTDEV_SET_MAX_GRANTS:
15066+ {
15067+ struct ioctl_gntdev_set_max_grants op;
15068+ if ((rc = copy_from_user(&op,
15069+ (void __user *) arg,
15070+ sizeof(op)))) {
15071+ rc = -EFAULT;
15072+ goto set_max_out;
15073+ }
15074+ down_write(&private_data->grants_sem);
15075+ if (private_data->grants) {
15076+ rc = -EBUSY;
15077+ goto set_max_unlock_out;
15078+ }
15079+ if (op.count > MAX_GRANTS_LIMIT) {
15080+ rc = -EINVAL;
15081+ goto set_max_unlock_out;
15082+ }
15083+ rc = init_private_data(private_data, op.count);
15084+ set_max_unlock_out:
15085+ up_write(&private_data->grants_sem);
15086+ set_max_out:
15087+ return rc;
15088+ }
15089+ default:
15090+ return -ENOIOCTLCMD;
15091+ }
15092+
15093+ return 0;
15094+}
15095Index: head-2008-11-25/drivers/xen/netback/Makefile
15096===================================================================
15097--- /dev/null 1970-01-01 00:00:00.000000000 +0000
15098+++ head-2008-11-25/drivers/xen/netback/Makefile 2007-07-12 08:54:23.000000000 +0200
15099@@ -0,0 +1,5 @@
15100+obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
15101+obj-$(CONFIG_XEN_NETDEV_LOOPBACK) += netloop.o
15102+
15103+netbk-y := netback.o xenbus.o interface.o accel.o
15104+netloop-y := loopback.o
15105Index: head-2008-11-25/drivers/xen/netback/accel.c
15106===================================================================
15107--- /dev/null 1970-01-01 00:00:00.000000000 +0000
15108+++ head-2008-11-25/drivers/xen/netback/accel.c 2008-01-07 13:19:18.000000000 +0100
15109@@ -0,0 +1,269 @@
15110+/******************************************************************************
15111+ * drivers/xen/netback/accel.c
15112+ *
15113+ * Interface between backend virtual network device and accelerated plugin.
15114+ *
15115+ * Copyright (C) 2007 Solarflare Communications, Inc
15116+ *
15117+ * This program is free software; you can redistribute it and/or
15118+ * modify it under the terms of the GNU General Public License version 2
15119+ * as published by the Free Software Foundation; or, when distributed
15120+ * separately from the Linux kernel or incorporated into other
15121+ * software packages, subject to the following license:
15122+ *
15123+ * Permission is hereby granted, free of charge, to any person obtaining a copy
15124+ * of this source file (the "Software"), to deal in the Software without
15125+ * restriction, including without limitation the rights to use, copy, modify,
15126+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15127+ * and to permit persons to whom the Software is furnished to do so, subject to
15128+ * the following conditions:
15129+ *
15130+ * The above copyright notice and this permission notice shall be included in
15131+ * all copies or substantial portions of the Software.
15132+ *
15133+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15134+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15135+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15136+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15137+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15138+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15139+ * IN THE SOFTWARE.
15140+ */
15141+
15142+#include <linux/list.h>
15143+#include <asm/atomic.h>
15144+#include <xen/xenbus.h>
15145+#include <linux/mutex.h>
15146+
15147+#include "common.h"
15148+
15149+#if 0
15150+#undef DPRINTK
15151+#define DPRINTK(fmt, args...) \
15152+ printk("netback/accel (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
15153+#endif
15154+
15155+/*
15156+ * A list of available netback accelerator plugin modules (each list
15157+ * entry is of type struct netback_accelerator)
15158+ */
15159+static struct list_head accelerators_list;
15160+/* Lock used to protect access to accelerators_list */
15161+DEFINE_MUTEX(accelerators_mutex);
15162+
15163+/*
15164+ * Compare a backend to an accelerator, and decide if they are
15165+ * compatible (i.e. if the accelerator should be used by the
15166+ * backend)
15167+ */
15168+static int match_accelerator(struct xenbus_device *xendev,
15169+ struct backend_info *be,
15170+ struct netback_accelerator *accelerator)
15171+{
15172+ int rc = 0;
15173+ char *eth_name = xenbus_read(XBT_NIL, xendev->nodename, "accel", NULL);
15174+
15175+ if (IS_ERR(eth_name)) {
15176+ /* Probably means not present */
15177+ DPRINTK("%s: no match due to xenbus_read accel error %d\n",
15178+ __FUNCTION__, PTR_ERR(eth_name));
15179+ return 0;
15180+ } else {
15181+ if (!strcmp(eth_name, accelerator->eth_name))
15182+ rc = 1;
15183+ kfree(eth_name);
15184+ return rc;
15185+ }
15186+}
15187+
15188+
15189+static void do_probe(struct backend_info *be,
15190+ struct netback_accelerator *accelerator,
15191+ struct xenbus_device *xendev)
15192+{
15193+ be->accelerator = accelerator;
15194+ atomic_inc(&be->accelerator->use_count);
15195+ if (be->accelerator->hooks->probe(xendev) != 0) {
15196+ atomic_dec(&be->accelerator->use_count);
15197+ module_put(be->accelerator->hooks->owner);
15198+ be->accelerator = NULL;
15199+ }
15200+}
15201+
15202+
15203+/*
15204+ * Notify suitable backends that a new accelerator is available and
15205+ * connected. This will also notify the accelerator plugin module
15206+ * that it is being used for a device through the probe hook.
15207+ */
15208+static int netback_accelerator_probe_backend(struct device *dev, void *arg)
15209+{
15210+ struct netback_accelerator *accelerator =
15211+ (struct netback_accelerator *)arg;
15212+ struct xenbus_device *xendev = to_xenbus_device(dev);
15213+
15214+ if (!strcmp("vif", xendev->devicetype)) {
15215+ struct backend_info *be = xendev->dev.driver_data;
15216+
15217+ if (match_accelerator(xendev, be, accelerator) &&
15218+ try_module_get(accelerator->hooks->owner)) {
15219+ do_probe(be, accelerator, xendev);
15220+ }
15221+ }
15222+ return 0;
15223+}
15224+
15225+
15226+/*
15227+ * Notify suitable backends that an accelerator is unavailable.
15228+ */
15229+static int netback_accelerator_remove_backend(struct device *dev, void *arg)
15230+{
15231+ struct xenbus_device *xendev = to_xenbus_device(dev);
15232+ struct netback_accelerator *accelerator =
15233+ (struct netback_accelerator *)arg;
15234+
15235+ if (!strcmp("vif", xendev->devicetype)) {
15236+ struct backend_info *be = xendev->dev.driver_data;
15237+
15238+ if (be->accelerator == accelerator) {
15239+ be->accelerator->hooks->remove(xendev);
15240+ atomic_dec(&be->accelerator->use_count);
15241+ module_put(be->accelerator->hooks->owner);
15242+ be->accelerator = NULL;
15243+ }
15244+ }
15245+ return 0;
15246+}
15247+
15248+
15249+
15250+/*
15251+ * Entry point for an netback accelerator plugin module. Called to
15252+ * advertise its presence, and connect to any suitable backends.
15253+ */
15254+int netback_connect_accelerator(unsigned version, int id, const char *eth_name,
15255+ struct netback_accel_hooks *hooks)
15256+{
15257+ struct netback_accelerator *new_accelerator;
15258+ unsigned eth_name_len;
15259+
15260+ if (version != NETBACK_ACCEL_VERSION) {
15261+ if (version > NETBACK_ACCEL_VERSION) {
15262+ /* Caller has higher version number, leave it
15263+ up to them to decide whether to continue.
15264+ They can recall with a lower number if
15265+ they're happy to be compatible with us */
15266+ return NETBACK_ACCEL_VERSION;
15267+ } else {
15268+ /* We have a more recent version than caller.
15269+ Currently reject, but may in future be able
15270+ to be backwardly compatible */
15271+ return -EPROTO;
15272+ }
15273+ }
15274+
15275+ new_accelerator =
15276+ kmalloc(sizeof(struct netback_accelerator), GFP_KERNEL);
15277+ if (!new_accelerator) {
15278+ DPRINTK("%s: failed to allocate memory for accelerator\n",
15279+ __FUNCTION__);
15280+ return -ENOMEM;
15281+ }
15282+
15283+ new_accelerator->id = id;
15284+
15285+ eth_name_len = strlen(eth_name)+1;
15286+ new_accelerator->eth_name = kmalloc(eth_name_len, GFP_KERNEL);
15287+ if (!new_accelerator->eth_name) {
15288+ DPRINTK("%s: failed to allocate memory for eth_name string\n",
15289+ __FUNCTION__);
15290+ kfree(new_accelerator);
15291+ return -ENOMEM;
15292+ }
15293+ strlcpy(new_accelerator->eth_name, eth_name, eth_name_len);
15294+
15295+ new_accelerator->hooks = hooks;
15296+
15297+ atomic_set(&new_accelerator->use_count, 0);
15298+
15299+ mutex_lock(&accelerators_mutex);
15300+ list_add(&new_accelerator->link, &accelerators_list);
15301+
15302+ /* tell existing backends about new plugin */
15303+ xenbus_for_each_backend(new_accelerator,
15304+ netback_accelerator_probe_backend);
15305+
15306+ mutex_unlock(&accelerators_mutex);
15307+
15308+ return 0;
15309+
15310+}
15311+EXPORT_SYMBOL_GPL(netback_connect_accelerator);
15312+
15313+
15314+/*
15315+ * Disconnect an accelerator plugin module that has previously been
15316+ * connected.
15317+ */
15318+void netback_disconnect_accelerator(int id, const char *eth_name)
15319+{
15320+ struct netback_accelerator *accelerator, *next;
15321+
15322+ mutex_lock(&accelerators_mutex);
15323+ list_for_each_entry_safe(accelerator, next, &accelerators_list, link) {
15324+ if (!strcmp(eth_name, accelerator->eth_name)) {
15325+ xenbus_for_each_backend
15326+ (accelerator, netback_accelerator_remove_backend);
15327+ BUG_ON(atomic_read(&accelerator->use_count) != 0);
15328+ list_del(&accelerator->link);
15329+ kfree(accelerator->eth_name);
15330+ kfree(accelerator);
15331+ break;
15332+ }
15333+ }
15334+ mutex_unlock(&accelerators_mutex);
15335+}
15336+EXPORT_SYMBOL_GPL(netback_disconnect_accelerator);
15337+
15338+
15339+void netback_probe_accelerators(struct backend_info *be,
15340+ struct xenbus_device *dev)
15341+{
15342+ struct netback_accelerator *accelerator;
15343+
15344+ /*
15345+ * Check list of accelerators to see if any is suitable, and
15346+ * use it if it is.
15347+ */
15348+ mutex_lock(&accelerators_mutex);
15349+ list_for_each_entry(accelerator, &accelerators_list, link) {
15350+ if (match_accelerator(dev, be, accelerator) &&
15351+ try_module_get(accelerator->hooks->owner)) {
15352+ do_probe(be, accelerator, dev);
15353+ break;
15354+ }
15355+ }
15356+ mutex_unlock(&accelerators_mutex);
15357+}
15358+
15359+
15360+void netback_remove_accelerators(struct backend_info *be,
15361+ struct xenbus_device *dev)
15362+{
15363+ mutex_lock(&accelerators_mutex);
15364+ /* Notify the accelerator (if any) of this device's removal */
15365+ if (be->accelerator != NULL) {
15366+ be->accelerator->hooks->remove(dev);
15367+ atomic_dec(&be->accelerator->use_count);
15368+ module_put(be->accelerator->hooks->owner);
15369+ be->accelerator = NULL;
15370+ }
15371+ mutex_unlock(&accelerators_mutex);
15372+}
15373+
15374+
15375+void netif_accel_init(void)
15376+{
15377+ INIT_LIST_HEAD(&accelerators_list);
15378+}
15379Index: head-2008-11-25/drivers/xen/netback/common.h
15380===================================================================
15381--- /dev/null 1970-01-01 00:00:00.000000000 +0000
15382+++ head-2008-11-25/drivers/xen/netback/common.h 2008-01-07 13:19:18.000000000 +0100
15383@@ -0,0 +1,217 @@
15384+/******************************************************************************
15385+ * arch/xen/drivers/netif/backend/common.h
15386+ *
15387+ * This program is free software; you can redistribute it and/or
15388+ * modify it under the terms of the GNU General Public License version 2
15389+ * as published by the Free Software Foundation; or, when distributed
15390+ * separately from the Linux kernel or incorporated into other
15391+ * software packages, subject to the following license:
15392+ *
15393+ * Permission is hereby granted, free of charge, to any person obtaining a copy
15394+ * of this source file (the "Software"), to deal in the Software without
15395+ * restriction, including without limitation the rights to use, copy, modify,
15396+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15397+ * and to permit persons to whom the Software is furnished to do so, subject to
15398+ * the following conditions:
15399+ *
15400+ * The above copyright notice and this permission notice shall be included in
15401+ * all copies or substantial portions of the Software.
15402+ *
15403+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15404+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15405+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15406+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15407+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15408+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15409+ * IN THE SOFTWARE.
15410+ */
15411+
15412+#ifndef __NETIF__BACKEND__COMMON_H__
15413+#define __NETIF__BACKEND__COMMON_H__
15414+
15415+#include <linux/version.h>
15416+#include <linux/module.h>
15417+#include <linux/interrupt.h>
15418+#include <linux/slab.h>
15419+#include <linux/ip.h>
15420+#include <linux/in.h>
15421+#include <linux/netdevice.h>
15422+#include <linux/etherdevice.h>
15423+#include <linux/wait.h>
15424+#include <xen/evtchn.h>
15425+#include <xen/interface/io/netif.h>
15426+#include <asm/io.h>
15427+#include <asm/pgalloc.h>
15428+#include <xen/interface/grant_table.h>
15429+#include <xen/gnttab.h>
15430+#include <xen/driver_util.h>
15431+#include <xen/xenbus.h>
15432+
15433+#define DPRINTK(_f, _a...) \
15434+ pr_debug("(file=%s, line=%d) " _f, \
15435+ __FILE__ , __LINE__ , ## _a )
15436+#define IPRINTK(fmt, args...) \
15437+ printk(KERN_INFO "xen_net: " fmt, ##args)
15438+#define WPRINTK(fmt, args...) \
15439+ printk(KERN_WARNING "xen_net: " fmt, ##args)
15440+
15441+typedef struct netif_st {
15442+ /* Unique identifier for this interface. */
15443+ domid_t domid;
15444+ unsigned int handle;
15445+
15446+ u8 fe_dev_addr[6];
15447+
15448+ /* Physical parameters of the comms window. */
15449+ grant_handle_t tx_shmem_handle;
15450+ grant_ref_t tx_shmem_ref;
15451+ grant_handle_t rx_shmem_handle;
15452+ grant_ref_t rx_shmem_ref;
15453+ unsigned int irq;
15454+
15455+ /* The shared rings and indexes. */
15456+ netif_tx_back_ring_t tx;
15457+ netif_rx_back_ring_t rx;
15458+ struct vm_struct *tx_comms_area;
15459+ struct vm_struct *rx_comms_area;
15460+
15461+ /* Set of features that can be turned on in dev->features. */
15462+ int features;
15463+
15464+ /* Internal feature information. */
15465+ u8 can_queue:1; /* can queue packets for receiver? */
15466+ u8 copying_receiver:1; /* copy packets to receiver? */
15467+
15468+ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
15469+ RING_IDX rx_req_cons_peek;
15470+
15471+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
15472+ unsigned long credit_bytes;
15473+ unsigned long credit_usec;
15474+ unsigned long remaining_credit;
15475+ struct timer_list credit_timeout;
15476+
15477+ /* Enforce draining of the transmit queue. */
15478+ struct timer_list tx_queue_timeout;
15479+
15480+ /* Miscellaneous private stuff. */
15481+ struct list_head list; /* scheduling list */
15482+ atomic_t refcnt;
15483+ struct net_device *dev;
15484+ struct net_device_stats stats;
15485+
15486+ unsigned int carrier;
15487+
15488+ wait_queue_head_t waiting_to_free;
15489+} netif_t;
15490+
15491+/*
15492+ * Implement our own carrier flag: the network stack's version causes delays
15493+ * when the carrier is re-enabled (in particular, dev_activate() may not
15494+ * immediately be called, which can cause packet loss; also the etherbridge
15495+ * can be rather lazy in activating its port).
15496+ */
15497+#define netback_carrier_on(netif) ((netif)->carrier = 1)
15498+#define netback_carrier_off(netif) ((netif)->carrier = 0)
15499+#define netback_carrier_ok(netif) ((netif)->carrier)
15500+
15501+enum {
15502+ NETBK_DONT_COPY_SKB,
15503+ NETBK_DELAYED_COPY_SKB,
15504+ NETBK_ALWAYS_COPY_SKB,
15505+};
15506+
15507+extern int netbk_copy_skb_mode;
15508+
15509+/* Function pointers into netback accelerator plugin modules */
15510+struct netback_accel_hooks {
15511+ struct module *owner;
15512+ int (*probe)(struct xenbus_device *dev);
15513+ int (*remove)(struct xenbus_device *dev);
15514+};
15515+
15516+/* Structure to track the state of a netback accelerator plugin */
15517+struct netback_accelerator {
15518+ struct list_head link;
15519+ int id;
15520+ char *eth_name;
15521+ atomic_t use_count;
15522+ struct netback_accel_hooks *hooks;
15523+};
15524+
15525+struct backend_info {
15526+ struct xenbus_device *dev;
15527+ netif_t *netif;
15528+ enum xenbus_state frontend_state;
15529+
15530+ /* State relating to the netback accelerator */
15531+ void *netback_accel_priv;
15532+ /* The accelerator that this backend is currently using */
15533+ struct netback_accelerator *accelerator;
15534+};
15535+
15536+#define NETBACK_ACCEL_VERSION 0x00010001
15537+
15538+/*
15539+ * Connect an accelerator plugin module to netback. Returns zero on
15540+ * success, < 0 on error, > 0 (with highest version number supported)
15541+ * if version mismatch.
15542+ */
15543+extern int netback_connect_accelerator(unsigned version,
15544+ int id, const char *eth_name,
15545+ struct netback_accel_hooks *hooks);
15546+/* Disconnect a previously connected accelerator plugin module */
15547+extern void netback_disconnect_accelerator(int id, const char *eth_name);
15548+
15549+
15550+extern
15551+void netback_probe_accelerators(struct backend_info *be,
15552+ struct xenbus_device *dev);
15553+extern
15554+void netback_remove_accelerators(struct backend_info *be,
15555+ struct xenbus_device *dev);
15556+extern
15557+void netif_accel_init(void);
15558+
15559+
15560+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
15561+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
15562+
15563+void netif_disconnect(netif_t *netif);
15564+
15565+netif_t *netif_alloc(domid_t domid, unsigned int handle);
15566+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
15567+ unsigned long rx_ring_ref, unsigned int evtchn);
15568+
15569+#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
15570+#define netif_put(_b) \
15571+ do { \
15572+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
15573+ wake_up(&(_b)->waiting_to_free); \
15574+ } while (0)
15575+
15576+void netif_xenbus_init(void);
15577+
15578+#define netif_schedulable(netif) \
15579+ (netif_running((netif)->dev) && netback_carrier_ok(netif))
15580+
15581+void netif_schedule_work(netif_t *netif);
15582+void netif_deschedule_work(netif_t *netif);
15583+
15584+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
15585+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
15586+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
15587+
15588+static inline int netbk_can_queue(struct net_device *dev)
15589+{
15590+ netif_t *netif = netdev_priv(dev);
15591+ return netif->can_queue;
15592+}
15593+
15594+static inline int netbk_can_sg(struct net_device *dev)
15595+{
15596+ netif_t *netif = netdev_priv(dev);
15597+ return netif->features & NETIF_F_SG;
15598+}
15599+
15600+#endif /* __NETIF__BACKEND__COMMON_H__ */
15601Index: head-2008-11-25/drivers/xen/netback/interface.c
15602===================================================================
15603--- /dev/null 1970-01-01 00:00:00.000000000 +0000
15604+++ head-2008-11-25/drivers/xen/netback/interface.c 2007-06-12 13:13:45.000000000 +0200
15605@@ -0,0 +1,336 @@
15606+/******************************************************************************
15607+ * arch/xen/drivers/netif/backend/interface.c
15608+ *
15609+ * Network-device interface management.
15610+ *
15611+ * Copyright (c) 2004-2005, Keir Fraser
15612+ *
15613+ * This program is free software; you can redistribute it and/or
15614+ * modify it under the terms of the GNU General Public License version 2
15615+ * as published by the Free Software Foundation; or, when distributed
15616+ * separately from the Linux kernel or incorporated into other
15617+ * software packages, subject to the following license:
15618+ *
15619+ * Permission is hereby granted, free of charge, to any person obtaining a copy
15620+ * of this source file (the "Software"), to deal in the Software without
15621+ * restriction, including without limitation the rights to use, copy, modify,
15622+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15623+ * and to permit persons to whom the Software is furnished to do so, subject to
15624+ * the following conditions:
15625+ *
15626+ * The above copyright notice and this permission notice shall be included in
15627+ * all copies or substantial portions of the Software.
15628+ *
15629+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15630+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15631+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15632+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15633+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15634+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15635+ * IN THE SOFTWARE.
15636+ */
15637+
15638+#include "common.h"
15639+#include <linux/ethtool.h>
15640+#include <linux/rtnetlink.h>
15641+
15642+/*
15643+ * Module parameter 'queue_length':
15644+ *
15645+ * Enables queuing in the network stack when a client has run out of receive
15646+ * descriptors. Although this feature can improve receive bandwidth by avoiding
15647+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
15648+ * unbounded time. This is bad if those packets hold onto foreign resources.
15649+ * For example, consider a packet that holds onto resources belonging to the
15650+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
15651+ * vif1.1 which is not activated in the guest): in this situation the guest
15652+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
15653+ * run a timer (tx_queue_timeout) to drain the queue when the interface is
15654+ * blocked.
15655+ */
15656+static unsigned long netbk_queue_length = 32;
15657+module_param_named(queue_length, netbk_queue_length, ulong, 0);
15658+
15659+static void __netif_up(netif_t *netif)
15660+{
15661+ enable_irq(netif->irq);
15662+ netif_schedule_work(netif);
15663+}
15664+
15665+static void __netif_down(netif_t *netif)
15666+{
15667+ disable_irq(netif->irq);
15668+ netif_deschedule_work(netif);
15669+}
15670+
15671+static int net_open(struct net_device *dev)
15672+{
15673+ netif_t *netif = netdev_priv(dev);
15674+ if (netback_carrier_ok(netif)) {
15675+ __netif_up(netif);
15676+ netif_start_queue(dev);
15677+ }
15678+ return 0;
15679+}
15680+
15681+static int net_close(struct net_device *dev)
15682+{
15683+ netif_t *netif = netdev_priv(dev);
15684+ if (netback_carrier_ok(netif))
15685+ __netif_down(netif);
15686+ netif_stop_queue(dev);
15687+ return 0;
15688+}
15689+
15690+static int netbk_change_mtu(struct net_device *dev, int mtu)
15691+{
15692+ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
15693+
15694+ if (mtu > max)
15695+ return -EINVAL;
15696+ dev->mtu = mtu;
15697+ return 0;
15698+}
15699+
15700+static int netbk_set_sg(struct net_device *dev, u32 data)
15701+{
15702+ if (data) {
15703+ netif_t *netif = netdev_priv(dev);
15704+
15705+ if (!(netif->features & NETIF_F_SG))
15706+ return -ENOSYS;
15707+ }
15708+
15709+ return ethtool_op_set_sg(dev, data);
15710+}
15711+
15712+static int netbk_set_tso(struct net_device *dev, u32 data)
15713+{
15714+ if (data) {
15715+ netif_t *netif = netdev_priv(dev);
15716+
15717+ if (!(netif->features & NETIF_F_TSO))
15718+ return -ENOSYS;
15719+ }
15720+
15721+ return ethtool_op_set_tso(dev, data);
15722+}
15723+
15724+static struct ethtool_ops network_ethtool_ops =
15725+{
15726+ .get_tx_csum = ethtool_op_get_tx_csum,
15727+ .set_tx_csum = ethtool_op_set_tx_csum,
15728+ .get_sg = ethtool_op_get_sg,
15729+ .set_sg = netbk_set_sg,
15730+ .get_tso = ethtool_op_get_tso,
15731+ .set_tso = netbk_set_tso,
15732+ .get_link = ethtool_op_get_link,
15733+};
15734+
15735+netif_t *netif_alloc(domid_t domid, unsigned int handle)
15736+{
15737+ int err = 0;
15738+ struct net_device *dev;
15739+ netif_t *netif;
15740+ char name[IFNAMSIZ] = {};
15741+
15742+ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
15743+ dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
15744+ if (dev == NULL) {
15745+ DPRINTK("Could not create netif: out of memory\n");
15746+ return ERR_PTR(-ENOMEM);
15747+ }
15748+
15749+ netif = netdev_priv(dev);
15750+ memset(netif, 0, sizeof(*netif));
15751+ netif->domid = domid;
15752+ netif->handle = handle;
15753+ atomic_set(&netif->refcnt, 1);
15754+ init_waitqueue_head(&netif->waiting_to_free);
15755+ netif->dev = dev;
15756+
15757+ netback_carrier_off(netif);
15758+
15759+ netif->credit_bytes = netif->remaining_credit = ~0UL;
15760+ netif->credit_usec = 0UL;
15761+ init_timer(&netif->credit_timeout);
15762+ /* Initialize 'expires' now: it's used to track the credit window. */
15763+ netif->credit_timeout.expires = jiffies;
15764+
15765+ init_timer(&netif->tx_queue_timeout);
15766+
15767+ dev->hard_start_xmit = netif_be_start_xmit;
15768+ dev->get_stats = netif_be_get_stats;
15769+ dev->open = net_open;
15770+ dev->stop = net_close;
15771+ dev->change_mtu = netbk_change_mtu;
15772+ dev->features = NETIF_F_IP_CSUM;
15773+
15774+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
15775+
15776+ dev->tx_queue_len = netbk_queue_length;
15777+
15778+ /*
15779+ * Initialise a dummy MAC address. We choose the numerically
15780+ * largest non-broadcast address to prevent the address getting
15781+ * stolen by an Ethernet bridge for STP purposes.
15782+ * (FE:FF:FF:FF:FF:FF)
15783+ */
15784+ memset(dev->dev_addr, 0xFF, ETH_ALEN);
15785+ dev->dev_addr[0] &= ~0x01;
15786+
15787+ rtnl_lock();
15788+ err = register_netdevice(dev);
15789+ rtnl_unlock();
15790+ if (err) {
15791+ DPRINTK("Could not register new net device %s: err=%d\n",
15792+ dev->name, err);
15793+ free_netdev(dev);
15794+ return ERR_PTR(err);
15795+ }
15796+
15797+ DPRINTK("Successfully created netif\n");
15798+ return netif;
15799+}
15800+
15801+static int map_frontend_pages(
15802+ netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
15803+{
15804+ struct gnttab_map_grant_ref op;
15805+
15806+ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
15807+ GNTMAP_host_map, tx_ring_ref, netif->domid);
15808+
15809+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
15810+ BUG();
15811+
15812+ if (op.status) {
15813+ DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
15814+ return op.status;
15815+ }
15816+
15817+ netif->tx_shmem_ref = tx_ring_ref;
15818+ netif->tx_shmem_handle = op.handle;
15819+
15820+ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
15821+ GNTMAP_host_map, rx_ring_ref, netif->domid);
15822+
15823+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
15824+ BUG();
15825+
15826+ if (op.status) {
15827+ DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
15828+ return op.status;
15829+ }
15830+
15831+ netif->rx_shmem_ref = rx_ring_ref;
15832+ netif->rx_shmem_handle = op.handle;
15833+
15834+ return 0;
15835+}
15836+
15837+static void unmap_frontend_pages(netif_t *netif)
15838+{
15839+ struct gnttab_unmap_grant_ref op;
15840+
15841+ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
15842+ GNTMAP_host_map, netif->tx_shmem_handle);
15843+
15844+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
15845+ BUG();
15846+
15847+ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
15848+ GNTMAP_host_map, netif->rx_shmem_handle);
15849+
15850+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
15851+ BUG();
15852+}
15853+
15854+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
15855+ unsigned long rx_ring_ref, unsigned int evtchn)
15856+{
15857+ int err = -ENOMEM;
15858+ netif_tx_sring_t *txs;
15859+ netif_rx_sring_t *rxs;
15860+
15861+ /* Already connected through? */
15862+ if (netif->irq)
15863+ return 0;
15864+
15865+ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
15866+ if (netif->tx_comms_area == NULL)
15867+ return -ENOMEM;
15868+ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
15869+ if (netif->rx_comms_area == NULL)
15870+ goto err_rx;
15871+
15872+ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
15873+ if (err)
15874+ goto err_map;
15875+
15876+ err = bind_interdomain_evtchn_to_irqhandler(
15877+ netif->domid, evtchn, netif_be_int, 0,
15878+ netif->dev->name, netif);
15879+ if (err < 0)
15880+ goto err_hypervisor;
15881+ netif->irq = err;
15882+ disable_irq(netif->irq);
15883+
15884+ txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
15885+ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
15886+
15887+ rxs = (netif_rx_sring_t *)
15888+ ((char *)netif->rx_comms_area->addr);
15889+ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
15890+
15891+ netif->rx_req_cons_peek = 0;
15892+
15893+ netif_get(netif);
15894+
15895+ rtnl_lock();
15896+ netback_carrier_on(netif);
15897+ if (netif_running(netif->dev))
15898+ __netif_up(netif);
15899+ rtnl_unlock();
15900+
15901+ return 0;
15902+err_hypervisor:
15903+ unmap_frontend_pages(netif);
15904+err_map:
15905+ free_vm_area(netif->rx_comms_area);
15906+err_rx:
15907+ free_vm_area(netif->tx_comms_area);
15908+ return err;
15909+}
15910+
15911+void netif_disconnect(netif_t *netif)
15912+{
15913+ if (netback_carrier_ok(netif)) {
15914+ rtnl_lock();
15915+ netback_carrier_off(netif);
15916+ netif_carrier_off(netif->dev); /* discard queued packets */
15917+ if (netif_running(netif->dev))
15918+ __netif_down(netif);
15919+ rtnl_unlock();
15920+ netif_put(netif);
15921+ }
15922+
15923+ atomic_dec(&netif->refcnt);
15924+ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
15925+
15926+ del_timer_sync(&netif->credit_timeout);
15927+ del_timer_sync(&netif->tx_queue_timeout);
15928+
15929+ if (netif->irq)
15930+ unbind_from_irqhandler(netif->irq, netif);
15931+
15932+ unregister_netdev(netif->dev);
15933+
15934+ if (netif->tx.sring) {
15935+ unmap_frontend_pages(netif);
15936+ free_vm_area(netif->tx_comms_area);
15937+ free_vm_area(netif->rx_comms_area);
15938+ }
15939+
15940+ free_netdev(netif->dev);
15941+}
15942Index: head-2008-11-25/drivers/xen/netback/loopback.c
15943===================================================================
15944--- /dev/null 1970-01-01 00:00:00.000000000 +0000
15945+++ head-2008-11-25/drivers/xen/netback/loopback.c 2007-08-06 15:10:49.000000000 +0200
15946@@ -0,0 +1,324 @@
15947+/******************************************************************************
15948+ * netback/loopback.c
15949+ *
15950+ * A two-interface loopback device to emulate a local netfront-netback
15951+ * connection. This ensures that local packet delivery looks identical
15952+ * to inter-domain delivery. Most importantly, packets delivered locally
15953+ * originating from other domains will get *copied* when they traverse this
15954+ * driver. This prevents unbounded delays in socket-buffer queues from
15955+ * causing the netback driver to "seize up".
15956+ *
15957+ * This driver creates a symmetric pair of loopback interfaces with names
15958+ * vif0.0 and veth0. The intention is that 'vif0.0' is bound to an Ethernet
15959+ * bridge, just like a proper netback interface, while a local IP interface
15960+ * is configured on 'veth0'.
15961+ *
15962+ * As with a real netback interface, vif0.0 is configured with a suitable
15963+ * dummy MAC address. No default is provided for veth0: a reasonable strategy
15964+ * is to transfer eth0's MAC address to veth0, and give eth0 a dummy address
15965+ * (to avoid confusing the Etherbridge).
15966+ *
15967+ * Copyright (c) 2005 K A Fraser
15968+ *
15969+ * This program is free software; you can redistribute it and/or
15970+ * modify it under the terms of the GNU General Public License version 2
15971+ * as published by the Free Software Foundation; or, when distributed
15972+ * separately from the Linux kernel or incorporated into other
15973+ * software packages, subject to the following license:
15974+ *
15975+ * Permission is hereby granted, free of charge, to any person obtaining a copy
15976+ * of this source file (the "Software"), to deal in the Software without
15977+ * restriction, including without limitation the rights to use, copy, modify,
15978+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15979+ * and to permit persons to whom the Software is furnished to do so, subject to
15980+ * the following conditions:
15981+ *
15982+ * The above copyright notice and this permission notice shall be included in
15983+ * all copies or substantial portions of the Software.
15984+ *
15985+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15986+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15987+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15988+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15989+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15990+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15991+ * IN THE SOFTWARE.
15992+ */
15993+
15994+#include <linux/module.h>
15995+#include <linux/netdevice.h>
15996+#include <linux/inetdevice.h>
15997+#include <linux/etherdevice.h>
15998+#include <linux/skbuff.h>
15999+#include <linux/ethtool.h>
16000+#include <net/dst.h>
16001+#include <net/xfrm.h> /* secpath_reset() */
16002+#include <asm/hypervisor.h> /* is_initial_xendomain() */
16003+
16004+static int nloopbacks = -1;
16005+module_param(nloopbacks, int, 0);
16006+MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
16007+
16008+struct net_private {
16009+ struct net_device *loopback_dev;
16010+ struct net_device_stats stats;
16011+};
16012+
16013+static int loopback_open(struct net_device *dev)
16014+{
16015+ struct net_private *np = netdev_priv(dev);
16016+ memset(&np->stats, 0, sizeof(np->stats));
16017+ netif_start_queue(dev);
16018+ return 0;
16019+}
16020+
16021+static int loopback_close(struct net_device *dev)
16022+{
16023+ netif_stop_queue(dev);
16024+ return 0;
16025+}
16026+
16027+#ifdef CONFIG_X86
16028+static int is_foreign(unsigned long pfn)
16029+{
16030+ /* NB. Play it safe for auto-translation mode. */
16031+ return (xen_feature(XENFEAT_auto_translated_physmap) ||
16032+ (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
16033+}
16034+#else
16035+/* How to detect a foreign mapping? Play it safe. */
16036+#define is_foreign(pfn) (1)
16037+#endif
16038+
16039+static int skb_remove_foreign_references(struct sk_buff *skb)
16040+{
16041+ struct page *page;
16042+ unsigned long pfn;
16043+ int i, off;
16044+ char *vaddr;
16045+
16046+ BUG_ON(skb_shinfo(skb)->frag_list);
16047+
16048+ if (skb_cloned(skb) &&
16049+ unlikely(pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
16050+ return 0;
16051+
16052+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
16053+ pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
16054+ if (!is_foreign(pfn))
16055+ continue;
16056+
16057+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
16058+ if (unlikely(!page))
16059+ return 0;
16060+
16061+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
16062+ off = skb_shinfo(skb)->frags[i].page_offset;
16063+ memcpy(page_address(page) + off,
16064+ vaddr + off,
16065+ skb_shinfo(skb)->frags[i].size);
16066+ kunmap_skb_frag(vaddr);
16067+
16068+ put_page(skb_shinfo(skb)->frags[i].page);
16069+ skb_shinfo(skb)->frags[i].page = page;
16070+ }
16071+
16072+ return 1;
16073+}
16074+
16075+static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
16076+{
16077+ struct net_private *np = netdev_priv(dev);
16078+
16079+ if (!skb_remove_foreign_references(skb)) {
16080+ np->stats.tx_dropped++;
16081+ dev_kfree_skb(skb);
16082+ return 0;
16083+ }
16084+
16085+ dst_release(skb->dst);
16086+ skb->dst = NULL;
16087+
16088+ skb_orphan(skb);
16089+
16090+ np->stats.tx_bytes += skb->len;
16091+ np->stats.tx_packets++;
16092+
16093+ /* Switch to loopback context. */
16094+ dev = np->loopback_dev;
16095+ np = netdev_priv(dev);
16096+
16097+ np->stats.rx_bytes += skb->len;
16098+ np->stats.rx_packets++;
16099+
16100+ if (skb->ip_summed == CHECKSUM_HW) {
16101+ /* Defer checksum calculation. */
16102+ skb->proto_csum_blank = 1;
16103+ /* Must be a local packet: assert its integrity. */
16104+ skb->proto_data_valid = 1;
16105+ }
16106+
16107+ skb->ip_summed = skb->proto_data_valid ?
16108+ CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
16109+
16110+ skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
16111+ skb->protocol = eth_type_trans(skb, dev);
16112+ skb->dev = dev;
16113+ dev->last_rx = jiffies;
16114+
16115+ /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
16116+ nf_reset(skb);
16117+ secpath_reset(skb);
16118+
16119+ netif_rx(skb);
16120+
16121+ return 0;
16122+}
16123+
16124+static struct net_device_stats *loopback_get_stats(struct net_device *dev)
16125+{
16126+ struct net_private *np = netdev_priv(dev);
16127+ return &np->stats;
16128+}
16129+
16130+static struct ethtool_ops network_ethtool_ops =
16131+{
16132+ .get_tx_csum = ethtool_op_get_tx_csum,
16133+ .set_tx_csum = ethtool_op_set_tx_csum,
16134+ .get_sg = ethtool_op_get_sg,
16135+ .set_sg = ethtool_op_set_sg,
16136+ .get_tso = ethtool_op_get_tso,
16137+ .set_tso = ethtool_op_set_tso,
16138+ .get_link = ethtool_op_get_link,
16139+};
16140+
16141+/*
16142+ * Nothing to do here. Virtual interface is point-to-point and the
16143+ * physical interface is probably promiscuous anyway.
16144+ */
16145+static void loopback_set_multicast_list(struct net_device *dev)
16146+{
16147+}
16148+
16149+static void loopback_construct(struct net_device *dev, struct net_device *lo)
16150+{
16151+ struct net_private *np = netdev_priv(dev);
16152+
16153+ np->loopback_dev = lo;
16154+
16155+ dev->open = loopback_open;
16156+ dev->stop = loopback_close;
16157+ dev->hard_start_xmit = loopback_start_xmit;
16158+ dev->get_stats = loopback_get_stats;
16159+ dev->set_multicast_list = loopback_set_multicast_list;
16160+ dev->change_mtu = NULL; /* allow arbitrary mtu */
16161+
16162+ dev->tx_queue_len = 0;
16163+
16164+ dev->features = (NETIF_F_HIGHDMA |
16165+ NETIF_F_LLTX |
16166+ NETIF_F_TSO |
16167+ NETIF_F_SG |
16168+ NETIF_F_IP_CSUM);
16169+
16170+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
16171+
16172+ /*
16173+ * We do not set a jumbo MTU on the interface. Otherwise the network
16174+ * stack will try to send large packets that will get dropped by the
16175+ * Ethernet bridge (unless the physical Ethernet interface is
16176+ * configured to transfer jumbo packets). If a larger MTU is desired
16177+ * then the system administrator can specify it using the 'ifconfig'
16178+ * command.
16179+ */
16180+ /*dev->mtu = 16*1024;*/
16181+}
16182+
16183+static int __init make_loopback(int i)
16184+{
16185+ struct net_device *dev1, *dev2;
16186+ char dev_name[IFNAMSIZ];
16187+ int err = -ENOMEM;
16188+
16189+ sprintf(dev_name, "vif0.%d", i);
16190+ dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
16191+ if (!dev1)
16192+ return err;
16193+
16194+ sprintf(dev_name, "veth%d", i);
16195+ dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
16196+ if (!dev2)
16197+ goto fail_netdev2;
16198+
16199+ loopback_construct(dev1, dev2);
16200+ loopback_construct(dev2, dev1);
16201+
16202+ /*
16203+ * Initialise a dummy MAC address for the 'dummy backend' interface. We
16204+ * choose the numerically largest non-broadcast address to prevent the
16205+ * address getting stolen by an Ethernet bridge for STP purposes.
16206+ */
16207+ memset(dev1->dev_addr, 0xFF, ETH_ALEN);
16208+ dev1->dev_addr[0] &= ~0x01;
16209+
16210+ if ((err = register_netdev(dev1)) != 0)
16211+ goto fail;
16212+
16213+ if ((err = register_netdev(dev2)) != 0) {
16214+ unregister_netdev(dev1);
16215+ goto fail;
16216+ }
16217+
16218+ return 0;
16219+
16220+ fail:
16221+ free_netdev(dev2);
16222+ fail_netdev2:
16223+ free_netdev(dev1);
16224+ return err;
16225+}
16226+
16227+static void __exit clean_loopback(int i)
16228+{
16229+ struct net_device *dev1, *dev2;
16230+ char dev_name[IFNAMSIZ];
16231+
16232+ sprintf(dev_name, "vif0.%d", i);
16233+ dev1 = dev_get_by_name(dev_name);
16234+ sprintf(dev_name, "veth%d", i);
16235+ dev2 = dev_get_by_name(dev_name);
16236+ if (dev1 && dev2) {
16237+ unregister_netdev(dev2);
16238+ unregister_netdev(dev1);
16239+ free_netdev(dev2);
16240+ free_netdev(dev1);
16241+ }
16242+}
16243+
16244+static int __init loopback_init(void)
16245+{
16246+ int i, err = 0;
16247+
16248+ if (nloopbacks == -1)
16249+ nloopbacks = is_initial_xendomain() ? 4 : 0;
16250+
16251+ for (i = 0; i < nloopbacks; i++)
16252+ if ((err = make_loopback(i)) != 0)
16253+ break;
16254+
16255+ return err;
16256+}
16257+
16258+module_init(loopback_init);
16259+
16260+static void __exit loopback_exit(void)
16261+{
16262+ int i;
16263+
16264+ for (i = nloopbacks; i-- > 0; )
16265+ clean_loopback(i);
16266+}
16267+
16268+module_exit(loopback_exit);
16269+
16270+MODULE_LICENSE("Dual BSD/GPL");
16271Index: head-2008-11-25/drivers/xen/netback/netback.c
16272===================================================================
16273--- /dev/null 1970-01-01 00:00:00.000000000 +0000
16274+++ head-2008-11-25/drivers/xen/netback/netback.c 2008-02-20 09:32:49.000000000 +0100
16275@@ -0,0 +1,1614 @@
16276+/******************************************************************************
16277+ * drivers/xen/netback/netback.c
16278+ *
16279+ * Back-end of the driver for virtual network devices. This portion of the
16280+ * driver exports a 'unified' network-device interface that can be accessed
16281+ * by any operating system that implements a compatible front end. A
16282+ * reference front-end implementation can be found in:
16283+ * drivers/xen/netfront/netfront.c
16284+ *
16285+ * Copyright (c) 2002-2005, K A Fraser
16286+ *
16287+ * This program is free software; you can redistribute it and/or
16288+ * modify it under the terms of the GNU General Public License version 2
16289+ * as published by the Free Software Foundation; or, when distributed
16290+ * separately from the Linux kernel or incorporated into other
16291+ * software packages, subject to the following license:
16292+ *
16293+ * Permission is hereby granted, free of charge, to any person obtaining a copy
16294+ * of this source file (the "Software"), to deal in the Software without
16295+ * restriction, including without limitation the rights to use, copy, modify,
16296+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16297+ * and to permit persons to whom the Software is furnished to do so, subject to
16298+ * the following conditions:
16299+ *
16300+ * The above copyright notice and this permission notice shall be included in
16301+ * all copies or substantial portions of the Software.
16302+ *
16303+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16304+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16305+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16306+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16307+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
16308+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
16309+ * IN THE SOFTWARE.
16310+ */
16311+
16312+#include "common.h"
16313+#include <xen/balloon.h>
16314+#include <xen/interface/memory.h>
16315+
16316+/*define NETBE_DEBUG_INTERRUPT*/
16317+
16318+/* extra field used in struct page */
16319+#define netif_page_index(pg) (*(long *)&(pg)->mapping)
16320+
16321+struct netbk_rx_meta {
16322+ skb_frag_t frag;
16323+ int id;
16324+ u8 copy:1;
16325+};
16326+
16327+struct netbk_tx_pending_inuse {
16328+ struct list_head list;
16329+ unsigned long alloc_time;
16330+};
16331+
16332+static void netif_idx_release(u16 pending_idx);
16333+static void netif_page_release(struct page *page);
16334+static void make_tx_response(netif_t *netif,
16335+ netif_tx_request_t *txp,
16336+ s8 st);
16337+static netif_rx_response_t *make_rx_response(netif_t *netif,
16338+ u16 id,
16339+ s8 st,
16340+ u16 offset,
16341+ u16 size,
16342+ u16 flags);
16343+
16344+static void net_tx_action(unsigned long unused);
16345+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
16346+
16347+static void net_rx_action(unsigned long unused);
16348+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
16349+
16350+static struct timer_list net_timer;
16351+static struct timer_list netbk_tx_pending_timer;
16352+
16353+#define MAX_PENDING_REQS 256
16354+
16355+static struct sk_buff_head rx_queue;
16356+
16357+static struct page **mmap_pages;
16358+static inline unsigned long idx_to_pfn(unsigned int idx)
16359+{
16360+ return page_to_pfn(mmap_pages[idx]);
16361+}
16362+
16363+static inline unsigned long idx_to_kaddr(unsigned int idx)
16364+{
16365+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
16366+}
16367+
16368+#define PKT_PROT_LEN 64
16369+
16370+static struct pending_tx_info {
16371+ netif_tx_request_t req;
16372+ netif_t *netif;
16373+} pending_tx_info[MAX_PENDING_REQS];
16374+static u16 pending_ring[MAX_PENDING_REQS];
16375+typedef unsigned int PEND_RING_IDX;
16376+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
16377+static PEND_RING_IDX pending_prod, pending_cons;
16378+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
16379+
16380+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
16381+static u16 dealloc_ring[MAX_PENDING_REQS];
16382+static PEND_RING_IDX dealloc_prod, dealloc_cons;
16383+
16384+/* Doubly-linked list of in-use pending entries. */
16385+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
16386+static LIST_HEAD(pending_inuse_head);
16387+
16388+static struct sk_buff_head tx_queue;
16389+
16390+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
16391+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
16392+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
16393+
16394+static struct list_head net_schedule_list;
16395+static spinlock_t net_schedule_list_lock;
16396+
16397+#define MAX_MFN_ALLOC 64
16398+static unsigned long mfn_list[MAX_MFN_ALLOC];
16399+static unsigned int alloc_index = 0;
16400+
16401+/* Setting this allows the safe use of this driver without netloop. */
16402+static int MODPARM_copy_skb = 1;
16403+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
16404+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
16405+
16406+int netbk_copy_skb_mode;
16407+
16408+static inline unsigned long alloc_mfn(void)
16409+{
16410+ BUG_ON(alloc_index == 0);
16411+ return mfn_list[--alloc_index];
16412+}
16413+
16414+static int check_mfn(int nr)
16415+{
16416+ struct xen_memory_reservation reservation = {
16417+ .extent_order = 0,
16418+ .domid = DOMID_SELF
16419+ };
16420+ int rc;
16421+
16422+ if (likely(alloc_index >= nr))
16423+ return 0;
16424+
16425+ set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
16426+ reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
16427+ rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
16428+ if (likely(rc > 0))
16429+ alloc_index += rc;
16430+
16431+ return alloc_index >= nr ? 0 : -ENOMEM;
16432+}
16433+
16434+static inline void maybe_schedule_tx_action(void)
16435+{
16436+ smp_mb();
16437+ if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
16438+ !list_empty(&net_schedule_list))
16439+ tasklet_schedule(&net_tx_tasklet);
16440+}
16441+
16442+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
16443+{
16444+ struct skb_shared_info *ninfo;
16445+ struct sk_buff *nskb;
16446+ unsigned long offset;
16447+ int ret;
16448+ int len;
16449+ int headlen;
16450+
16451+ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
16452+
16453+ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
16454+ if (unlikely(!nskb))
16455+ goto err;
16456+
16457+ skb_reserve(nskb, 16 + NET_IP_ALIGN);
16458+ headlen = nskb->end - nskb->data;
16459+ if (headlen > skb_headlen(skb))
16460+ headlen = skb_headlen(skb);
16461+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
16462+ BUG_ON(ret);
16463+
16464+ ninfo = skb_shinfo(nskb);
16465+ ninfo->gso_size = skb_shinfo(skb)->gso_size;
16466+ ninfo->gso_type = skb_shinfo(skb)->gso_type;
16467+
16468+ offset = headlen;
16469+ len = skb->len - headlen;
16470+
16471+ nskb->len = skb->len;
16472+ nskb->data_len = len;
16473+ nskb->truesize += len;
16474+
16475+ while (len) {
16476+ struct page *page;
16477+ int copy;
16478+ int zero;
16479+
16480+ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
16481+ dump_stack();
16482+ goto err_free;
16483+ }
16484+
16485+ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
16486+ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
16487+
16488+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
16489+ if (unlikely(!page))
16490+ goto err_free;
16491+
16492+ ret = skb_copy_bits(skb, offset, page_address(page), copy);
16493+ BUG_ON(ret);
16494+
16495+ ninfo->frags[ninfo->nr_frags].page = page;
16496+ ninfo->frags[ninfo->nr_frags].page_offset = 0;
16497+ ninfo->frags[ninfo->nr_frags].size = copy;
16498+ ninfo->nr_frags++;
16499+
16500+ offset += copy;
16501+ len -= copy;
16502+ }
16503+
16504+ offset = nskb->data - skb->data;
16505+
16506+ nskb->h.raw = skb->h.raw + offset;
16507+ nskb->nh.raw = skb->nh.raw + offset;
16508+ nskb->mac.raw = skb->mac.raw + offset;
16509+
16510+ return nskb;
16511+
16512+ err_free:
16513+ kfree_skb(nskb);
16514+ err:
16515+ return NULL;
16516+}
16517+
16518+static inline int netbk_max_required_rx_slots(netif_t *netif)
16519+{
16520+ if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
16521+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
16522+ return 1; /* all in one */
16523+}
16524+
16525+static inline int netbk_queue_full(netif_t *netif)
16526+{
16527+ RING_IDX peek = netif->rx_req_cons_peek;
16528+ RING_IDX needed = netbk_max_required_rx_slots(netif);
16529+
16530+ return ((netif->rx.sring->req_prod - peek) < needed) ||
16531+ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
16532+}
16533+
16534+static void tx_queue_callback(unsigned long data)
16535+{
16536+ netif_t *netif = (netif_t *)data;
16537+ if (netif_schedulable(netif))
16538+ netif_wake_queue(netif->dev);
16539+}
16540+
16541+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
16542+{
16543+ netif_t *netif = netdev_priv(dev);
16544+
16545+ BUG_ON(skb->dev != dev);
16546+
16547+ /* Drop the packet if the target domain has no receive buffers. */
16548+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
16549+ goto drop;
16550+
16551+ /*
16552+ * Copy the packet here if it's destined for a flipping interface
16553+ * but isn't flippable (e.g. extra references to data).
16554+ * XXX For now we also copy skbuffs whose head crosses a page
16555+ * boundary, because netbk_gop_skb can't handle them.
16556+ */
16557+ if (!netif->copying_receiver ||
16558+ ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
16559+ struct sk_buff *nskb = netbk_copy_skb(skb);
16560+ if ( unlikely(nskb == NULL) )
16561+ goto drop;
16562+ /* Copy only the header fields we use in this driver. */
16563+ nskb->dev = skb->dev;
16564+ nskb->ip_summed = skb->ip_summed;
16565+ nskb->proto_data_valid = skb->proto_data_valid;
16566+ dev_kfree_skb(skb);
16567+ skb = nskb;
16568+ }
16569+
16570+ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
16571+ !!skb_shinfo(skb)->gso_size;
16572+ netif_get(netif);
16573+
16574+ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
16575+ netif->rx.sring->req_event = netif->rx_req_cons_peek +
16576+ netbk_max_required_rx_slots(netif);
16577+ mb(); /* request notification /then/ check & stop the queue */
16578+ if (netbk_queue_full(netif)) {
16579+ netif_stop_queue(dev);
16580+ /*
16581+ * Schedule 500ms timeout to restart the queue, thus
16582+ * ensuring that an inactive queue will be drained.
16583+ * Packets will be immediately be dropped until more
16584+ * receive buffers become available (see
16585+ * netbk_queue_full() check above).
16586+ */
16587+ netif->tx_queue_timeout.data = (unsigned long)netif;
16588+ netif->tx_queue_timeout.function = tx_queue_callback;
16589+ __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
16590+ }
16591+ }
16592+
16593+ skb_queue_tail(&rx_queue, skb);
16594+ tasklet_schedule(&net_rx_tasklet);
16595+
16596+ return 0;
16597+
16598+ drop:
16599+ netif->stats.tx_dropped++;
16600+ dev_kfree_skb(skb);
16601+ return 0;
16602+}
16603+
16604+#if 0
16605+static void xen_network_done_notify(void)
16606+{
16607+ static struct net_device *eth0_dev = NULL;
16608+ if (unlikely(eth0_dev == NULL))
16609+ eth0_dev = __dev_get_by_name("eth0");
16610+ netif_rx_schedule(eth0_dev);
16611+}
16612+/*
16613+ * Add following to poll() function in NAPI driver (Tigon3 is example):
16614+ * if ( xen_network_done() )
16615+ * tg3_enable_ints(tp);
16616+ */
16617+int xen_network_done(void)
16618+{
16619+ return skb_queue_empty(&rx_queue);
16620+}
16621+#endif
16622+
16623+struct netrx_pending_operations {
16624+ unsigned trans_prod, trans_cons;
16625+ unsigned mmu_prod, mmu_mcl;
16626+ unsigned mcl_prod, mcl_cons;
16627+ unsigned copy_prod, copy_cons;
16628+ unsigned meta_prod, meta_cons;
16629+ mmu_update_t *mmu;
16630+ gnttab_transfer_t *trans;
16631+ gnttab_copy_t *copy;
16632+ multicall_entry_t *mcl;
16633+ struct netbk_rx_meta *meta;
16634+};
16635+
16636+/* Set up the grant operations for this fragment. If it's a flipping
16637+ interface, we also set up the unmap request from here. */
16638+static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
16639+ int i, struct netrx_pending_operations *npo,
16640+ struct page *page, unsigned long size,
16641+ unsigned long offset)
16642+{
16643+ mmu_update_t *mmu;
16644+ gnttab_transfer_t *gop;
16645+ gnttab_copy_t *copy_gop;
16646+ multicall_entry_t *mcl;
16647+ netif_rx_request_t *req;
16648+ unsigned long old_mfn, new_mfn;
16649+
16650+ old_mfn = virt_to_mfn(page_address(page));
16651+
16652+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
16653+ if (netif->copying_receiver) {
16654+ /* The fragment needs to be copied rather than
16655+ flipped. */
16656+ meta->copy = 1;
16657+ copy_gop = npo->copy + npo->copy_prod++;
16658+ copy_gop->flags = GNTCOPY_dest_gref;
16659+ if (PageForeign(page)) {
16660+ struct pending_tx_info *src_pend =
16661+ &pending_tx_info[netif_page_index(page)];
16662+ copy_gop->source.domid = src_pend->netif->domid;
16663+ copy_gop->source.u.ref = src_pend->req.gref;
16664+ copy_gop->flags |= GNTCOPY_source_gref;
16665+ } else {
16666+ copy_gop->source.domid = DOMID_SELF;
16667+ copy_gop->source.u.gmfn = old_mfn;
16668+ }
16669+ copy_gop->source.offset = offset;
16670+ copy_gop->dest.domid = netif->domid;
16671+ copy_gop->dest.offset = 0;
16672+ copy_gop->dest.u.ref = req->gref;
16673+ copy_gop->len = size;
16674+ } else {
16675+ meta->copy = 0;
16676+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
16677+ new_mfn = alloc_mfn();
16678+
16679+ /*
16680+ * Set the new P2M table entry before
16681+ * reassigning the old data page. Heed the
16682+ * comment in pgtable-2level.h:pte_page(). :-)
16683+ */
16684+ set_phys_to_machine(page_to_pfn(page), new_mfn);
16685+
16686+ mcl = npo->mcl + npo->mcl_prod++;
16687+ MULTI_update_va_mapping(mcl,
16688+ (unsigned long)page_address(page),
16689+ pfn_pte_ma(new_mfn, PAGE_KERNEL),
16690+ 0);
16691+
16692+ mmu = npo->mmu + npo->mmu_prod++;
16693+ mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
16694+ MMU_MACHPHYS_UPDATE;
16695+ mmu->val = page_to_pfn(page);
16696+ }
16697+
16698+ gop = npo->trans + npo->trans_prod++;
16699+ gop->mfn = old_mfn;
16700+ gop->domid = netif->domid;
16701+ gop->ref = req->gref;
16702+ }
16703+ return req->id;
16704+}
16705+
16706+static void netbk_gop_skb(struct sk_buff *skb,
16707+ struct netrx_pending_operations *npo)
16708+{
16709+ netif_t *netif = netdev_priv(skb->dev);
16710+ int nr_frags = skb_shinfo(skb)->nr_frags;
16711+ int i;
16712+ int extra;
16713+ struct netbk_rx_meta *head_meta, *meta;
16714+
16715+ head_meta = npo->meta + npo->meta_prod++;
16716+ head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
16717+ head_meta->frag.size = skb_shinfo(skb)->gso_size;
16718+ extra = !!head_meta->frag.size + 1;
16719+
16720+ for (i = 0; i < nr_frags; i++) {
16721+ meta = npo->meta + npo->meta_prod++;
16722+ meta->frag = skb_shinfo(skb)->frags[i];
16723+ meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
16724+ meta->frag.page,
16725+ meta->frag.size,
16726+ meta->frag.page_offset);
16727+ }
16728+
16729+ /*
16730+ * This must occur at the end to ensure that we don't trash skb_shinfo
16731+ * until we're done. We know that the head doesn't cross a page
16732+ * boundary because such packets get copied in netif_be_start_xmit.
16733+ */
16734+ head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
16735+ virt_to_page(skb->data),
16736+ skb_headlen(skb),
16737+ offset_in_page(skb->data));
16738+
16739+ netif->rx.req_cons += nr_frags + extra;
16740+}
16741+
16742+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
16743+{
16744+ int i;
16745+
16746+ for (i = 0; i < nr_frags; i++)
16747+ put_page(meta[i].frag.page);
16748+}
16749+
16750+/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
16751+ used to set up the operations on the top of
16752+ netrx_pending_operations, which have since been done. Check that
16753+ they didn't give any errors and advance over them. */
16754+static int netbk_check_gop(int nr_frags, domid_t domid,
16755+ struct netrx_pending_operations *npo)
16756+{
16757+ multicall_entry_t *mcl;
16758+ gnttab_transfer_t *gop;
16759+ gnttab_copy_t *copy_op;
16760+ int status = NETIF_RSP_OKAY;
16761+ int i;
16762+
16763+ for (i = 0; i <= nr_frags; i++) {
16764+ if (npo->meta[npo->meta_cons + i].copy) {
16765+ copy_op = npo->copy + npo->copy_cons++;
16766+ if (copy_op->status != GNTST_okay) {
16767+ DPRINTK("Bad status %d from copy to DOM%d.\n",
16768+ copy_op->status, domid);
16769+ status = NETIF_RSP_ERROR;
16770+ }
16771+ } else {
16772+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
16773+ mcl = npo->mcl + npo->mcl_cons++;
16774+ /* The update_va_mapping() must not fail. */
16775+ BUG_ON(mcl->result != 0);
16776+ }
16777+
16778+ gop = npo->trans + npo->trans_cons++;
16779+ /* Check the reassignment error code. */
16780+ if (gop->status != 0) {
16781+ DPRINTK("Bad status %d from grant transfer to DOM%u\n",
16782+ gop->status, domid);
16783+ /*
16784+ * Page no longer belongs to us unless
16785+ * GNTST_bad_page, but that should be
16786+ * a fatal error anyway.
16787+ */
16788+ BUG_ON(gop->status == GNTST_bad_page);
16789+ status = NETIF_RSP_ERROR;
16790+ }
16791+ }
16792+ }
16793+
16794+ return status;
16795+}
16796+
16797+static void netbk_add_frag_responses(netif_t *netif, int status,
16798+ struct netbk_rx_meta *meta, int nr_frags)
16799+{
16800+ int i;
16801+ unsigned long offset;
16802+
16803+ for (i = 0; i < nr_frags; i++) {
16804+ int id = meta[i].id;
16805+ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
16806+
16807+ if (meta[i].copy)
16808+ offset = 0;
16809+ else
16810+ offset = meta[i].frag.page_offset;
16811+ make_rx_response(netif, id, status, offset,
16812+ meta[i].frag.size, flags);
16813+ }
16814+}
16815+
16816+static void net_rx_action(unsigned long unused)
16817+{
16818+ netif_t *netif = NULL;
16819+ s8 status;
16820+ u16 id, irq, flags;
16821+ netif_rx_response_t *resp;
16822+ multicall_entry_t *mcl;
16823+ struct sk_buff_head rxq;
16824+ struct sk_buff *skb;
16825+ int notify_nr = 0;
16826+ int ret;
16827+ int nr_frags;
16828+ int count;
16829+ unsigned long offset;
16830+
16831+ /*
16832+ * Putting hundreds of bytes on the stack is considered rude.
16833+ * Static works because a tasklet can only be on one CPU at any time.
16834+ */
16835+ static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
16836+ static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
16837+ static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
16838+ static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
16839+ static unsigned char rx_notify[NR_IRQS];
16840+ static u16 notify_list[NET_RX_RING_SIZE];
16841+ static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
16842+
16843+ struct netrx_pending_operations npo = {
16844+ mmu: rx_mmu,
16845+ trans: grant_trans_op,
16846+ copy: grant_copy_op,
16847+ mcl: rx_mcl,
16848+ meta: meta};
16849+
16850+ skb_queue_head_init(&rxq);
16851+
16852+ count = 0;
16853+
16854+ while ((skb = skb_dequeue(&rx_queue)) != NULL) {
16855+ nr_frags = skb_shinfo(skb)->nr_frags;
16856+ *(int *)skb->cb = nr_frags;
16857+
16858+ if (!xen_feature(XENFEAT_auto_translated_physmap) &&
16859+ !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
16860+ check_mfn(nr_frags + 1)) {
16861+ /* Memory squeeze? Back off for an arbitrary while. */
16862+ if ( net_ratelimit() )
16863+ WPRINTK("Memory squeeze in netback "
16864+ "driver.\n");
16865+ mod_timer(&net_timer, jiffies + HZ);
16866+ skb_queue_head(&rx_queue, skb);
16867+ break;
16868+ }
16869+
16870+ netbk_gop_skb(skb, &npo);
16871+
16872+ count += nr_frags + 1;
16873+
16874+ __skb_queue_tail(&rxq, skb);
16875+
16876+ /* Filled the batch queue? */
16877+ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
16878+ break;
16879+ }
16880+
16881+ BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
16882+
16883+ npo.mmu_mcl = npo.mcl_prod;
16884+ if (npo.mcl_prod) {
16885+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
16886+ BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
16887+ mcl = npo.mcl + npo.mcl_prod++;
16888+
16889+ BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
16890+ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
16891+
16892+ mcl->op = __HYPERVISOR_mmu_update;
16893+ mcl->args[0] = (unsigned long)rx_mmu;
16894+ mcl->args[1] = npo.mmu_prod;
16895+ mcl->args[2] = 0;
16896+ mcl->args[3] = DOMID_SELF;
16897+ }
16898+
16899+ if (npo.trans_prod) {
16900+ BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
16901+ mcl = npo.mcl + npo.mcl_prod++;
16902+ mcl->op = __HYPERVISOR_grant_table_op;
16903+ mcl->args[0] = GNTTABOP_transfer;
16904+ mcl->args[1] = (unsigned long)grant_trans_op;
16905+ mcl->args[2] = npo.trans_prod;
16906+ }
16907+
16908+ if (npo.copy_prod) {
16909+ BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
16910+ mcl = npo.mcl + npo.mcl_prod++;
16911+ mcl->op = __HYPERVISOR_grant_table_op;
16912+ mcl->args[0] = GNTTABOP_copy;
16913+ mcl->args[1] = (unsigned long)grant_copy_op;
16914+ mcl->args[2] = npo.copy_prod;
16915+ }
16916+
16917+ /* Nothing to do? */
16918+ if (!npo.mcl_prod)
16919+ return;
16920+
16921+ BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
16922+
16923+ ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
16924+ BUG_ON(ret != 0);
16925+ /* The mmu_machphys_update() must not fail. */
16926+ BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
16927+
16928+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
16929+ nr_frags = *(int *)skb->cb;
16930+
16931+ netif = netdev_priv(skb->dev);
16932+ /* We can't rely on skb_release_data to release the
16933+ pages used by fragments for us, since it tries to
16934+ touch the pages in the fraglist. If we're in
16935+ flipping mode, that doesn't work. In copying mode,
16936+ we still have access to all of the pages, and so
16937+ it's safe to let release_data deal with it. */
16938+ /* (Freeing the fragments is safe since we copy
16939+ non-linear skbs destined for flipping interfaces) */
16940+ if (!netif->copying_receiver) {
16941+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
16942+ skb_shinfo(skb)->frag_list = NULL;
16943+ skb_shinfo(skb)->nr_frags = 0;
16944+ netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
16945+ }
16946+
16947+ netif->stats.tx_bytes += skb->len;
16948+ netif->stats.tx_packets++;
16949+
16950+ status = netbk_check_gop(nr_frags, netif->domid, &npo);
16951+
16952+ id = meta[npo.meta_cons].id;
16953+ flags = nr_frags ? NETRXF_more_data : 0;
16954+
16955+ if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
16956+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
16957+ else if (skb->proto_data_valid) /* remote but checksummed? */
16958+ flags |= NETRXF_data_validated;
16959+
16960+ if (meta[npo.meta_cons].copy)
16961+ offset = 0;
16962+ else
16963+ offset = offset_in_page(skb->data);
16964+ resp = make_rx_response(netif, id, status, offset,
16965+ skb_headlen(skb), flags);
16966+
16967+ if (meta[npo.meta_cons].frag.size) {
16968+ struct netif_extra_info *gso =
16969+ (struct netif_extra_info *)
16970+ RING_GET_RESPONSE(&netif->rx,
16971+ netif->rx.rsp_prod_pvt++);
16972+
16973+ resp->flags |= NETRXF_extra_info;
16974+
16975+ gso->u.gso.size = meta[npo.meta_cons].frag.size;
16976+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
16977+ gso->u.gso.pad = 0;
16978+ gso->u.gso.features = 0;
16979+
16980+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
16981+ gso->flags = 0;
16982+ }
16983+
16984+ netbk_add_frag_responses(netif, status,
16985+ meta + npo.meta_cons + 1,
16986+ nr_frags);
16987+
16988+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
16989+ irq = netif->irq;
16990+ if (ret && !rx_notify[irq]) {
16991+ rx_notify[irq] = 1;
16992+ notify_list[notify_nr++] = irq;
16993+ }
16994+
16995+ if (netif_queue_stopped(netif->dev) &&
16996+ netif_schedulable(netif) &&
16997+ !netbk_queue_full(netif))
16998+ netif_wake_queue(netif->dev);
16999+
17000+ netif_put(netif);
17001+ dev_kfree_skb(skb);
17002+ npo.meta_cons += nr_frags + 1;
17003+ }
17004+
17005+ while (notify_nr != 0) {
17006+ irq = notify_list[--notify_nr];
17007+ rx_notify[irq] = 0;
17008+ notify_remote_via_irq(irq);
17009+ }
17010+
17011+ /* More work to do? */
17012+ if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
17013+ tasklet_schedule(&net_rx_tasklet);
17014+#if 0
17015+ else
17016+ xen_network_done_notify();
17017+#endif
17018+}
17019+
17020+static void net_alarm(unsigned long unused)
17021+{
17022+ tasklet_schedule(&net_rx_tasklet);
17023+}
17024+
17025+static void netbk_tx_pending_timeout(unsigned long unused)
17026+{
17027+ tasklet_schedule(&net_tx_tasklet);
17028+}
17029+
17030+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
17031+{
17032+ netif_t *netif = netdev_priv(dev);
17033+ return &netif->stats;
17034+}
17035+
17036+static int __on_net_schedule_list(netif_t *netif)
17037+{
17038+ return netif->list.next != NULL;
17039+}
17040+
17041+static void remove_from_net_schedule_list(netif_t *netif)
17042+{
17043+ spin_lock_irq(&net_schedule_list_lock);
17044+ if (likely(__on_net_schedule_list(netif))) {
17045+ list_del(&netif->list);
17046+ netif->list.next = NULL;
17047+ netif_put(netif);
17048+ }
17049+ spin_unlock_irq(&net_schedule_list_lock);
17050+}
17051+
17052+static void add_to_net_schedule_list_tail(netif_t *netif)
17053+{
17054+ if (__on_net_schedule_list(netif))
17055+ return;
17056+
17057+ spin_lock_irq(&net_schedule_list_lock);
17058+ if (!__on_net_schedule_list(netif) &&
17059+ likely(netif_schedulable(netif))) {
17060+ list_add_tail(&netif->list, &net_schedule_list);
17061+ netif_get(netif);
17062+ }
17063+ spin_unlock_irq(&net_schedule_list_lock);
17064+}
17065+
17066+/*
17067+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
17068+ * If this driver is pipelining transmit requests then we can be very
17069+ * aggressive in avoiding new-packet notifications -- frontend only needs to
17070+ * send a notification if there are no outstanding unreceived responses.
17071+ * If we may be buffer transmit buffers for any reason then we must be rather
17072+ * more conservative and treat this as the final check for pending work.
17073+ */
17074+void netif_schedule_work(netif_t *netif)
17075+{
17076+ int more_to_do;
17077+
17078+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
17079+ more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
17080+#else
17081+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
17082+#endif
17083+
17084+ if (more_to_do) {
17085+ add_to_net_schedule_list_tail(netif);
17086+ maybe_schedule_tx_action();
17087+ }
17088+}
17089+
17090+void netif_deschedule_work(netif_t *netif)
17091+{
17092+ remove_from_net_schedule_list(netif);
17093+}
17094+
17095+
17096+static void tx_add_credit(netif_t *netif)
17097+{
17098+ unsigned long max_burst, max_credit;
17099+
17100+ /*
17101+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
17102+ * Otherwise the interface can seize up due to insufficient credit.
17103+ */
17104+ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
17105+ max_burst = min(max_burst, 131072UL);
17106+ max_burst = max(max_burst, netif->credit_bytes);
17107+
17108+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
17109+ max_credit = netif->remaining_credit + netif->credit_bytes;
17110+ if (max_credit < netif->remaining_credit)
17111+ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
17112+
17113+ netif->remaining_credit = min(max_credit, max_burst);
17114+}
17115+
17116+static void tx_credit_callback(unsigned long data)
17117+{
17118+ netif_t *netif = (netif_t *)data;
17119+ tx_add_credit(netif);
17120+ netif_schedule_work(netif);
17121+}
17122+
17123+static inline int copy_pending_req(PEND_RING_IDX pending_idx)
17124+{
17125+ return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
17126+ &mmap_pages[pending_idx]);
17127+}
17128+
17129+inline static void net_tx_action_dealloc(void)
17130+{
17131+ struct netbk_tx_pending_inuse *inuse, *n;
17132+ gnttab_unmap_grant_ref_t *gop;
17133+ u16 pending_idx;
17134+ PEND_RING_IDX dc, dp;
17135+ netif_t *netif;
17136+ int ret;
17137+ LIST_HEAD(list);
17138+
17139+ dc = dealloc_cons;
17140+ gop = tx_unmap_ops;
17141+
17142+ /*
17143+ * Free up any grants we have finished using
17144+ */
17145+ do {
17146+ dp = dealloc_prod;
17147+
17148+ /* Ensure we see all indices enqueued by netif_idx_release(). */
17149+ smp_rmb();
17150+
17151+ while (dc != dp) {
17152+ unsigned long pfn;
17153+
17154+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
17155+ list_move_tail(&pending_inuse[pending_idx].list, &list);
17156+
17157+ pfn = idx_to_pfn(pending_idx);
17158+ /* Already unmapped? */
17159+ if (!phys_to_machine_mapping_valid(pfn))
17160+ continue;
17161+
17162+ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
17163+ GNTMAP_host_map,
17164+ grant_tx_handle[pending_idx]);
17165+ gop++;
17166+ }
17167+
17168+ if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
17169+ list_empty(&pending_inuse_head))
17170+ break;
17171+
17172+ /* Copy any entries that have been pending for too long. */
17173+ list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
17174+ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
17175+ break;
17176+
17177+ switch (copy_pending_req(inuse - pending_inuse)) {
17178+ case 0:
17179+ list_move_tail(&inuse->list, &list);
17180+ continue;
17181+ case -EBUSY:
17182+ list_del_init(&inuse->list);
17183+ continue;
17184+ case -ENOENT:
17185+ continue;
17186+ }
17187+
17188+ break;
17189+ }
17190+ } while (dp != dealloc_prod);
17191+
17192+ dealloc_cons = dc;
17193+
17194+ ret = HYPERVISOR_grant_table_op(
17195+ GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
17196+ BUG_ON(ret);
17197+
17198+ list_for_each_entry_safe(inuse, n, &list, list) {
17199+ pending_idx = inuse - pending_inuse;
17200+
17201+ netif = pending_tx_info[pending_idx].netif;
17202+
17203+ make_tx_response(netif, &pending_tx_info[pending_idx].req,
17204+ NETIF_RSP_OKAY);
17205+
17206+ /* Ready for next use. */
17207+ gnttab_reset_grant_page(mmap_pages[pending_idx]);
17208+
17209+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
17210+
17211+ netif_put(netif);
17212+
17213+ list_del_init(&inuse->list);
17214+ }
17215+}
17216+
17217+static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
17218+{
17219+ RING_IDX cons = netif->tx.req_cons;
17220+
17221+ do {
17222+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
17223+ if (cons >= end)
17224+ break;
17225+ txp = RING_GET_REQUEST(&netif->tx, cons++);
17226+ } while (1);
17227+ netif->tx.req_cons = cons;
17228+ netif_schedule_work(netif);
17229+ netif_put(netif);
17230+}
17231+
17232+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
17233+ netif_tx_request_t *txp, int work_to_do)
17234+{
17235+ RING_IDX cons = netif->tx.req_cons;
17236+ int frags = 0;
17237+
17238+ if (!(first->flags & NETTXF_more_data))
17239+ return 0;
17240+
17241+ do {
17242+ if (frags >= work_to_do) {
17243+ DPRINTK("Need more frags\n");
17244+ return -frags;
17245+ }
17246+
17247+ if (unlikely(frags >= MAX_SKB_FRAGS)) {
17248+ DPRINTK("Too many frags\n");
17249+ return -frags;
17250+ }
17251+
17252+ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
17253+ sizeof(*txp));
17254+ if (txp->size > first->size) {
17255+ DPRINTK("Frags galore\n");
17256+ return -frags;
17257+ }
17258+
17259+ first->size -= txp->size;
17260+ frags++;
17261+
17262+ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
17263+ DPRINTK("txp->offset: %x, size: %u\n",
17264+ txp->offset, txp->size);
17265+ return -frags;
17266+ }
17267+ } while ((txp++)->flags & NETTXF_more_data);
17268+
17269+ return frags;
17270+}
17271+
17272+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
17273+ struct sk_buff *skb,
17274+ netif_tx_request_t *txp,
17275+ gnttab_map_grant_ref_t *mop)
17276+{
17277+ struct skb_shared_info *shinfo = skb_shinfo(skb);
17278+ skb_frag_t *frags = shinfo->frags;
17279+ unsigned long pending_idx = *((u16 *)skb->data);
17280+ int i, start;
17281+
17282+ /* Skip first skb fragment if it is on same page as header fragment. */
17283+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
17284+
17285+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
17286+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
17287+
17288+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
17289+ GNTMAP_host_map | GNTMAP_readonly,
17290+ txp->gref, netif->domid);
17291+
17292+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
17293+ netif_get(netif);
17294+ pending_tx_info[pending_idx].netif = netif;
17295+ frags[i].page = (void *)pending_idx;
17296+ }
17297+
17298+ return mop;
17299+}
17300+
17301+static int netbk_tx_check_mop(struct sk_buff *skb,
17302+ gnttab_map_grant_ref_t **mopp)
17303+{
17304+ gnttab_map_grant_ref_t *mop = *mopp;
17305+ int pending_idx = *((u16 *)skb->data);
17306+ netif_t *netif = pending_tx_info[pending_idx].netif;
17307+ netif_tx_request_t *txp;
17308+ struct skb_shared_info *shinfo = skb_shinfo(skb);
17309+ int nr_frags = shinfo->nr_frags;
17310+ int i, err, start;
17311+
17312+ /* Check status of header. */
17313+ err = mop->status;
17314+ if (unlikely(err)) {
17315+ txp = &pending_tx_info[pending_idx].req;
17316+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
17317+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
17318+ netif_put(netif);
17319+ } else {
17320+ set_phys_to_machine(
17321+ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
17322+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
17323+ grant_tx_handle[pending_idx] = mop->handle;
17324+ }
17325+
17326+ /* Skip first skb fragment if it is on same page as header fragment. */
17327+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
17328+
17329+ for (i = start; i < nr_frags; i++) {
17330+ int j, newerr;
17331+
17332+ pending_idx = (unsigned long)shinfo->frags[i].page;
17333+
17334+ /* Check error status: if okay then remember grant handle. */
17335+ newerr = (++mop)->status;
17336+ if (likely(!newerr)) {
17337+ set_phys_to_machine(
17338+ __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
17339+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
17340+ grant_tx_handle[pending_idx] = mop->handle;
17341+ /* Had a previous error? Invalidate this fragment. */
17342+ if (unlikely(err))
17343+ netif_idx_release(pending_idx);
17344+ continue;
17345+ }
17346+
17347+ /* Error on this fragment: respond to client with an error. */
17348+ txp = &pending_tx_info[pending_idx].req;
17349+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
17350+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
17351+ netif_put(netif);
17352+
17353+ /* Not the first error? Preceding frags already invalidated. */
17354+ if (err)
17355+ continue;
17356+
17357+ /* First error: invalidate header and preceding fragments. */
17358+ pending_idx = *((u16 *)skb->data);
17359+ netif_idx_release(pending_idx);
17360+ for (j = start; j < i; j++) {
17361+ pending_idx = (unsigned long)shinfo->frags[i].page;
17362+ netif_idx_release(pending_idx);
17363+ }
17364+
17365+ /* Remember the error: invalidate all subsequent fragments. */
17366+ err = newerr;
17367+ }
17368+
17369+ *mopp = mop + 1;
17370+ return err;
17371+}
17372+
17373+static void netbk_fill_frags(struct sk_buff *skb)
17374+{
17375+ struct skb_shared_info *shinfo = skb_shinfo(skb);
17376+ int nr_frags = shinfo->nr_frags;
17377+ int i;
17378+
17379+ for (i = 0; i < nr_frags; i++) {
17380+ skb_frag_t *frag = shinfo->frags + i;
17381+ netif_tx_request_t *txp;
17382+ unsigned long pending_idx;
17383+
17384+ pending_idx = (unsigned long)frag->page;
17385+
17386+ pending_inuse[pending_idx].alloc_time = jiffies;
17387+ list_add_tail(&pending_inuse[pending_idx].list,
17388+ &pending_inuse_head);
17389+
17390+ txp = &pending_tx_info[pending_idx].req;
17391+ frag->page = virt_to_page(idx_to_kaddr(pending_idx));
17392+ frag->size = txp->size;
17393+ frag->page_offset = txp->offset;
17394+
17395+ skb->len += txp->size;
17396+ skb->data_len += txp->size;
17397+ skb->truesize += txp->size;
17398+ }
17399+}
17400+
17401+int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
17402+ int work_to_do)
17403+{
17404+ struct netif_extra_info extra;
17405+ RING_IDX cons = netif->tx.req_cons;
17406+
17407+ do {
17408+ if (unlikely(work_to_do-- <= 0)) {
17409+ DPRINTK("Missing extra info\n");
17410+ return -EBADR;
17411+ }
17412+
17413+ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
17414+ sizeof(extra));
17415+ if (unlikely(!extra.type ||
17416+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
17417+ netif->tx.req_cons = ++cons;
17418+ DPRINTK("Invalid extra type: %d\n", extra.type);
17419+ return -EINVAL;
17420+ }
17421+
17422+ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
17423+ netif->tx.req_cons = ++cons;
17424+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
17425+
17426+ return work_to_do;
17427+}
17428+
17429+static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
17430+{
17431+ if (!gso->u.gso.size) {
17432+ DPRINTK("GSO size must not be zero.\n");
17433+ return -EINVAL;
17434+ }
17435+
17436+ /* Currently only TCPv4 S.O. is supported. */
17437+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
17438+ DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
17439+ return -EINVAL;
17440+ }
17441+
17442+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
17443+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
17444+
17445+ /* Header must be checked, and gso_segs computed. */
17446+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
17447+ skb_shinfo(skb)->gso_segs = 0;
17448+
17449+ return 0;
17450+}
17451+
17452+/* Called after netfront has transmitted */
17453+static void net_tx_action(unsigned long unused)
17454+{
17455+ struct list_head *ent;
17456+ struct sk_buff *skb;
17457+ netif_t *netif;
17458+ netif_tx_request_t txreq;
17459+ netif_tx_request_t txfrags[MAX_SKB_FRAGS];
17460+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
17461+ u16 pending_idx;
17462+ RING_IDX i;
17463+ gnttab_map_grant_ref_t *mop;
17464+ unsigned int data_len;
17465+ int ret, work_to_do;
17466+
17467+ if (dealloc_cons != dealloc_prod)
17468+ net_tx_action_dealloc();
17469+
17470+ mop = tx_map_ops;
17471+ while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
17472+ !list_empty(&net_schedule_list)) {
17473+ /* Get a netif from the list with work to do. */
17474+ ent = net_schedule_list.next;
17475+ netif = list_entry(ent, netif_t, list);
17476+ netif_get(netif);
17477+ remove_from_net_schedule_list(netif);
17478+
17479+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
17480+ if (!work_to_do) {
17481+ netif_put(netif);
17482+ continue;
17483+ }
17484+
17485+ i = netif->tx.req_cons;
17486+ rmb(); /* Ensure that we see the request before we copy it. */
17487+ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
17488+
17489+ /* Credit-based scheduling. */
17490+ if (txreq.size > netif->remaining_credit) {
17491+ unsigned long now = jiffies;
17492+ unsigned long next_credit =
17493+ netif->credit_timeout.expires +
17494+ msecs_to_jiffies(netif->credit_usec / 1000);
17495+
17496+ /* Timer could already be pending in rare cases. */
17497+ if (timer_pending(&netif->credit_timeout)) {
17498+ netif_put(netif);
17499+ continue;
17500+ }
17501+
17502+ /* Passed the point where we can replenish credit? */
17503+ if (time_after_eq(now, next_credit)) {
17504+ netif->credit_timeout.expires = now;
17505+ tx_add_credit(netif);
17506+ }
17507+
17508+ /* Still too big to send right now? Set a callback. */
17509+ if (txreq.size > netif->remaining_credit) {
17510+ netif->credit_timeout.data =
17511+ (unsigned long)netif;
17512+ netif->credit_timeout.function =
17513+ tx_credit_callback;
17514+ __mod_timer(&netif->credit_timeout,
17515+ next_credit);
17516+ netif_put(netif);
17517+ continue;
17518+ }
17519+ }
17520+ netif->remaining_credit -= txreq.size;
17521+
17522+ work_to_do--;
17523+ netif->tx.req_cons = ++i;
17524+
17525+ memset(extras, 0, sizeof(extras));
17526+ if (txreq.flags & NETTXF_extra_info) {
17527+ work_to_do = netbk_get_extras(netif, extras,
17528+ work_to_do);
17529+ i = netif->tx.req_cons;
17530+ if (unlikely(work_to_do < 0)) {
17531+ netbk_tx_err(netif, &txreq, i);
17532+ continue;
17533+ }
17534+ }
17535+
17536+ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
17537+ if (unlikely(ret < 0)) {
17538+ netbk_tx_err(netif, &txreq, i - ret);
17539+ continue;
17540+ }
17541+ i += ret;
17542+
17543+ if (unlikely(txreq.size < ETH_HLEN)) {
17544+ DPRINTK("Bad packet size: %d\n", txreq.size);
17545+ netbk_tx_err(netif, &txreq, i);
17546+ continue;
17547+ }
17548+
17549+ /* No crossing a page as the payload mustn't fragment. */
17550+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
17551+ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
17552+ txreq.offset, txreq.size,
17553+ (txreq.offset &~PAGE_MASK) + txreq.size);
17554+ netbk_tx_err(netif, &txreq, i);
17555+ continue;
17556+ }
17557+
17558+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
17559+
17560+ data_len = (txreq.size > PKT_PROT_LEN &&
17561+ ret < MAX_SKB_FRAGS) ?
17562+ PKT_PROT_LEN : txreq.size;
17563+
17564+ skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
17565+ GFP_ATOMIC | __GFP_NOWARN);
17566+ if (unlikely(skb == NULL)) {
17567+ DPRINTK("Can't allocate a skb in start_xmit.\n");
17568+ netbk_tx_err(netif, &txreq, i);
17569+ break;
17570+ }
17571+
17572+ /* Packets passed to netif_rx() must have some headroom. */
17573+ skb_reserve(skb, 16 + NET_IP_ALIGN);
17574+
17575+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
17576+ struct netif_extra_info *gso;
17577+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
17578+
17579+ if (netbk_set_skb_gso(skb, gso)) {
17580+ kfree_skb(skb);
17581+ netbk_tx_err(netif, &txreq, i);
17582+ continue;
17583+ }
17584+ }
17585+
17586+ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
17587+ GNTMAP_host_map | GNTMAP_readonly,
17588+ txreq.gref, netif->domid);
17589+ mop++;
17590+
17591+ memcpy(&pending_tx_info[pending_idx].req,
17592+ &txreq, sizeof(txreq));
17593+ pending_tx_info[pending_idx].netif = netif;
17594+ *((u16 *)skb->data) = pending_idx;
17595+
17596+ __skb_put(skb, data_len);
17597+
17598+ skb_shinfo(skb)->nr_frags = ret;
17599+ if (data_len < txreq.size) {
17600+ skb_shinfo(skb)->nr_frags++;
17601+ skb_shinfo(skb)->frags[0].page =
17602+ (void *)(unsigned long)pending_idx;
17603+ } else {
17604+ /* Discriminate from any valid pending_idx value. */
17605+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
17606+ }
17607+
17608+ __skb_queue_tail(&tx_queue, skb);
17609+
17610+ pending_cons++;
17611+
17612+ mop = netbk_get_requests(netif, skb, txfrags, mop);
17613+
17614+ netif->tx.req_cons = i;
17615+ netif_schedule_work(netif);
17616+
17617+ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
17618+ break;
17619+ }
17620+
17621+ if (mop == tx_map_ops)
17622+ return;
17623+
17624+ ret = HYPERVISOR_grant_table_op(
17625+ GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
17626+ BUG_ON(ret);
17627+
17628+ mop = tx_map_ops;
17629+ while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
17630+ netif_tx_request_t *txp;
17631+
17632+ pending_idx = *((u16 *)skb->data);
17633+ netif = pending_tx_info[pending_idx].netif;
17634+ txp = &pending_tx_info[pending_idx].req;
17635+
17636+ /* Check the remap error code. */
17637+ if (unlikely(netbk_tx_check_mop(skb, &mop))) {
17638+ DPRINTK("netback grant failed.\n");
17639+ skb_shinfo(skb)->nr_frags = 0;
17640+ kfree_skb(skb);
17641+ continue;
17642+ }
17643+
17644+ data_len = skb->len;
17645+ memcpy(skb->data,
17646+ (void *)(idx_to_kaddr(pending_idx)|txp->offset),
17647+ data_len);
17648+ if (data_len < txp->size) {
17649+ /* Append the packet payload as a fragment. */
17650+ txp->offset += data_len;
17651+ txp->size -= data_len;
17652+ } else {
17653+ /* Schedule a response immediately. */
17654+ netif_idx_release(pending_idx);
17655+ }
17656+
17657+ /*
17658+ * Old frontends do not assert data_validated but we
17659+ * can infer it from csum_blank so test both flags.
17660+ */
17661+ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
17662+ skb->ip_summed = CHECKSUM_UNNECESSARY;
17663+ skb->proto_data_valid = 1;
17664+ } else {
17665+ skb->ip_summed = CHECKSUM_NONE;
17666+ skb->proto_data_valid = 0;
17667+ }
17668+ skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
17669+
17670+ netbk_fill_frags(skb);
17671+
17672+ skb->dev = netif->dev;
17673+ skb->protocol = eth_type_trans(skb, skb->dev);
17674+
17675+ netif->stats.rx_bytes += skb->len;
17676+ netif->stats.rx_packets++;
17677+
17678+ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
17679+ unlikely(skb_linearize(skb))) {
17680+ DPRINTK("Can't linearize skb in net_tx_action.\n");
17681+ kfree_skb(skb);
17682+ continue;
17683+ }
17684+
17685+ netif_rx(skb);
17686+ netif->dev->last_rx = jiffies;
17687+ }
17688+
17689+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
17690+ !list_empty(&pending_inuse_head)) {
17691+ struct netbk_tx_pending_inuse *oldest;
17692+
17693+ oldest = list_entry(pending_inuse_head.next,
17694+ struct netbk_tx_pending_inuse, list);
17695+ mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
17696+ }
17697+}
17698+
17699+static void netif_idx_release(u16 pending_idx)
17700+{
17701+ static DEFINE_SPINLOCK(_lock);
17702+ unsigned long flags;
17703+
17704+ spin_lock_irqsave(&_lock, flags);
17705+ dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
17706+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
17707+ smp_wmb();
17708+ dealloc_prod++;
17709+ spin_unlock_irqrestore(&_lock, flags);
17710+
17711+ tasklet_schedule(&net_tx_tasklet);
17712+}
17713+
17714+static void netif_page_release(struct page *page)
17715+{
17716+ netif_idx_release(netif_page_index(page));
17717+}
17718+
17719+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
17720+{
17721+ netif_t *netif = dev_id;
17722+
17723+ add_to_net_schedule_list_tail(netif);
17724+ maybe_schedule_tx_action();
17725+
17726+ if (netif_schedulable(netif) && !netbk_queue_full(netif))
17727+ netif_wake_queue(netif->dev);
17728+
17729+ return IRQ_HANDLED;
17730+}
17731+
17732+static void make_tx_response(netif_t *netif,
17733+ netif_tx_request_t *txp,
17734+ s8 st)
17735+{
17736+ RING_IDX i = netif->tx.rsp_prod_pvt;
17737+ netif_tx_response_t *resp;
17738+ int notify;
17739+
17740+ resp = RING_GET_RESPONSE(&netif->tx, i);
17741+ resp->id = txp->id;
17742+ resp->status = st;
17743+
17744+ if (txp->flags & NETTXF_extra_info)
17745+ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
17746+
17747+ netif->tx.rsp_prod_pvt = ++i;
17748+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
17749+ if (notify)
17750+ notify_remote_via_irq(netif->irq);
17751+
17752+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
17753+ if (i == netif->tx.req_cons) {
17754+ int more_to_do;
17755+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
17756+ if (more_to_do)
17757+ add_to_net_schedule_list_tail(netif);
17758+ }
17759+#endif
17760+}
17761+
17762+static netif_rx_response_t *make_rx_response(netif_t *netif,
17763+ u16 id,
17764+ s8 st,
17765+ u16 offset,
17766+ u16 size,
17767+ u16 flags)
17768+{
17769+ RING_IDX i = netif->rx.rsp_prod_pvt;
17770+ netif_rx_response_t *resp;
17771+
17772+ resp = RING_GET_RESPONSE(&netif->rx, i);
17773+ resp->offset = offset;
17774+ resp->flags = flags;
17775+ resp->id = id;
17776+ resp->status = (s16)size;
17777+ if (st < 0)
17778+ resp->status = (s16)st;
17779+
17780+ netif->rx.rsp_prod_pvt = ++i;
17781+
17782+ return resp;
17783+}
17784+
17785+#ifdef NETBE_DEBUG_INTERRUPT
17786+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
17787+{
17788+ struct list_head *ent;
17789+ netif_t *netif;
17790+ int i = 0;
17791+
17792+ printk(KERN_ALERT "netif_schedule_list:\n");
17793+ spin_lock_irq(&net_schedule_list_lock);
17794+
17795+ list_for_each (ent, &net_schedule_list) {
17796+ netif = list_entry(ent, netif_t, list);
17797+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
17798+ "rx_resp_prod=%08x\n",
17799+ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
17800+ printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
17801+ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
17802+ printk(KERN_ALERT " shared(rx_req_prod=%08x "
17803+ "rx_resp_prod=%08x\n",
17804+ netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
17805+ printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
17806+ netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
17807+ printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
17808+ netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
17809+ i++;
17810+ }
17811+
17812+ spin_unlock_irq(&net_schedule_list_lock);
17813+ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
17814+
17815+ return IRQ_HANDLED;
17816+}
17817+#endif
17818+
17819+static int __init netback_init(void)
17820+{
17821+ int i;
17822+ struct page *page;
17823+
17824+ if (!is_running_on_xen())
17825+ return -ENODEV;
17826+
17827+ /* We can increase reservation by this much in net_rx_action(). */
17828+ balloon_update_driver_allowance(NET_RX_RING_SIZE);
17829+
17830+ skb_queue_head_init(&rx_queue);
17831+ skb_queue_head_init(&tx_queue);
17832+
17833+ init_timer(&net_timer);
17834+ net_timer.data = 0;
17835+ net_timer.function = net_alarm;
17836+
17837+ init_timer(&netbk_tx_pending_timer);
17838+ netbk_tx_pending_timer.data = 0;
17839+ netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
17840+
17841+ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
17842+ if (mmap_pages == NULL) {
17843+ printk("%s: out of memory\n", __FUNCTION__);
17844+ return -ENOMEM;
17845+ }
17846+
17847+ for (i = 0; i < MAX_PENDING_REQS; i++) {
17848+ page = mmap_pages[i];
17849+ SetPageForeign(page, netif_page_release);
17850+ netif_page_index(page) = i;
17851+ INIT_LIST_HEAD(&pending_inuse[i].list);
17852+ }
17853+
17854+ pending_cons = 0;
17855+ pending_prod = MAX_PENDING_REQS;
17856+ for (i = 0; i < MAX_PENDING_REQS; i++)
17857+ pending_ring[i] = i;
17858+
17859+ spin_lock_init(&net_schedule_list_lock);
17860+ INIT_LIST_HEAD(&net_schedule_list);
17861+
17862+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
17863+ if (MODPARM_copy_skb) {
17864+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
17865+ NULL, 0))
17866+ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
17867+ else
17868+ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
17869+ }
17870+
17871+ netif_accel_init();
17872+
17873+ netif_xenbus_init();
17874+
17875+#ifdef NETBE_DEBUG_INTERRUPT
17876+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
17877+ 0,
17878+ netif_be_dbg,
17879+ SA_SHIRQ,
17880+ "net-be-dbg",
17881+ &netif_be_dbg);
17882+#endif
17883+
17884+ return 0;
17885+}
17886+
17887+module_init(netback_init);
17888+
17889+MODULE_LICENSE("Dual BSD/GPL");
17890Index: head-2008-11-25/drivers/xen/netback/xenbus.c
17891===================================================================
17892--- /dev/null 1970-01-01 00:00:00.000000000 +0000
17893+++ head-2008-11-25/drivers/xen/netback/xenbus.c 2008-09-01 12:07:31.000000000 +0200
17894@@ -0,0 +1,454 @@
17895+/* Xenbus code for netif backend
17896+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
17897+ Copyright (C) 2005 XenSource Ltd
17898+
17899+ This program is free software; you can redistribute it and/or modify
17900+ it under the terms of the GNU General Public License as published by
17901+ the Free Software Foundation; either version 2 of the License, or
17902+ (at your option) any later version.
17903+
17904+ This program is distributed in the hope that it will be useful,
17905+ but WITHOUT ANY WARRANTY; without even the implied warranty of
17906+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17907+ GNU General Public License for more details.
17908+
17909+ You should have received a copy of the GNU General Public License
17910+ along with this program; if not, write to the Free Software
17911+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17912+*/
17913+
17914+#include <stdarg.h>
17915+#include <linux/module.h>
17916+#include <xen/xenbus.h>
17917+#include "common.h"
17918+
17919+#if 0
17920+#undef DPRINTK
17921+#define DPRINTK(fmt, args...) \
17922+ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
17923+#endif
17924+
17925+
17926+static int connect_rings(struct backend_info *);
17927+static void connect(struct backend_info *);
17928+static void backend_create_netif(struct backend_info *be);
17929+
17930+static int netback_remove(struct xenbus_device *dev)
17931+{
17932+ struct backend_info *be = dev->dev.driver_data;
17933+
17934+ netback_remove_accelerators(be, dev);
17935+
17936+ if (be->netif) {
17937+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
17938+ netif_disconnect(be->netif);
17939+ be->netif = NULL;
17940+ }
17941+ kfree(be);
17942+ dev->dev.driver_data = NULL;
17943+ return 0;
17944+}
17945+
17946+
17947+/**
17948+ * Entry point to this code when a new device is created. Allocate the basic
17949+ * structures and switch to InitWait.
17950+ */
17951+static int netback_probe(struct xenbus_device *dev,
17952+ const struct xenbus_device_id *id)
17953+{
17954+ const char *message;
17955+ struct xenbus_transaction xbt;
17956+ int err;
17957+ int sg;
17958+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
17959+ GFP_KERNEL);
17960+ if (!be) {
17961+ xenbus_dev_fatal(dev, -ENOMEM,
17962+ "allocating backend structure");
17963+ return -ENOMEM;
17964+ }
17965+
17966+ be->dev = dev;
17967+ dev->dev.driver_data = be;
17968+
17969+ sg = 1;
17970+ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
17971+ sg = 0;
17972+
17973+ do {
17974+ err = xenbus_transaction_start(&xbt);
17975+ if (err) {
17976+ xenbus_dev_fatal(dev, err, "starting transaction");
17977+ goto fail;
17978+ }
17979+
17980+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
17981+ if (err) {
17982+ message = "writing feature-sg";
17983+ goto abort_transaction;
17984+ }
17985+
17986+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
17987+ "%d", sg);
17988+ if (err) {
17989+ message = "writing feature-gso-tcpv4";
17990+ goto abort_transaction;
17991+ }
17992+
17993+ /* We support rx-copy path. */
17994+ err = xenbus_printf(xbt, dev->nodename,
17995+ "feature-rx-copy", "%d", 1);
17996+ if (err) {
17997+ message = "writing feature-rx-copy";
17998+ goto abort_transaction;
17999+ }
18000+
18001+ /*
18002+ * We don't support rx-flip path (except old guests who don't
18003+ * grok this feature flag).
18004+ */
18005+ err = xenbus_printf(xbt, dev->nodename,
18006+ "feature-rx-flip", "%d", 0);
18007+ if (err) {
18008+ message = "writing feature-rx-flip";
18009+ goto abort_transaction;
18010+ }
18011+
18012+ err = xenbus_transaction_end(xbt, 0);
18013+ } while (err == -EAGAIN);
18014+
18015+ if (err) {
18016+ xenbus_dev_fatal(dev, err, "completing transaction");
18017+ goto fail;
18018+ }
18019+
18020+ netback_probe_accelerators(be, dev);
18021+
18022+ err = xenbus_switch_state(dev, XenbusStateInitWait);
18023+ if (err)
18024+ goto fail;
18025+
18026+ /* This kicks hotplug scripts, so do it immediately. */
18027+ backend_create_netif(be);
18028+
18029+ return 0;
18030+
18031+abort_transaction:
18032+ xenbus_transaction_end(xbt, 1);
18033+ xenbus_dev_fatal(dev, err, "%s", message);
18034+fail:
18035+ DPRINTK("failed");
18036+ netback_remove(dev);
18037+ return err;
18038+}
18039+
18040+
18041+/**
18042+ * Handle the creation of the hotplug script environment. We add the script
18043+ * and vif variables to the environment, for the benefit of the vif-* hotplug
18044+ * scripts.
18045+ */
18046+static int netback_uevent(struct xenbus_device *xdev, char **envp,
18047+ int num_envp, char *buffer, int buffer_size)
18048+{
18049+ struct backend_info *be = xdev->dev.driver_data;
18050+ netif_t *netif = be->netif;
18051+ int i = 0, length = 0;
18052+ char *val;
18053+
18054+ DPRINTK("netback_uevent");
18055+
18056+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
18057+ if (IS_ERR(val)) {
18058+ int err = PTR_ERR(val);
18059+ xenbus_dev_fatal(xdev, err, "reading script");
18060+ return err;
18061+ }
18062+ else {
18063+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
18064+ &length, "script=%s", val);
18065+ kfree(val);
18066+ }
18067+
18068+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
18069+ "vif=%s", netif->dev->name);
18070+
18071+ envp[i] = NULL;
18072+
18073+ return 0;
18074+}
18075+
18076+
18077+static void backend_create_netif(struct backend_info *be)
18078+{
18079+ int err;
18080+ long handle;
18081+ struct xenbus_device *dev = be->dev;
18082+
18083+ if (be->netif != NULL)
18084+ return;
18085+
18086+ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
18087+ if (err != 1) {
18088+ xenbus_dev_fatal(dev, err, "reading handle");
18089+ return;
18090+ }
18091+
18092+ be->netif = netif_alloc(dev->otherend_id, handle);
18093+ if (IS_ERR(be->netif)) {
18094+ err = PTR_ERR(be->netif);
18095+ be->netif = NULL;
18096+ xenbus_dev_fatal(dev, err, "creating interface");
18097+ return;
18098+ }
18099+
18100+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
18101+}
18102+
18103+
18104+/**
18105+ * Callback received when the frontend's state changes.
18106+ */
18107+static void frontend_changed(struct xenbus_device *dev,
18108+ enum xenbus_state frontend_state)
18109+{
18110+ struct backend_info *be = dev->dev.driver_data;
18111+
18112+ DPRINTK("%s", xenbus_strstate(frontend_state));
18113+
18114+ be->frontend_state = frontend_state;
18115+
18116+ switch (frontend_state) {
18117+ case XenbusStateInitialising:
18118+ if (dev->state == XenbusStateClosed) {
18119+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
18120+ __FUNCTION__, dev->nodename);
18121+ xenbus_switch_state(dev, XenbusStateInitWait);
18122+ }
18123+ break;
18124+
18125+ case XenbusStateInitialised:
18126+ break;
18127+
18128+ case XenbusStateConnected:
18129+ if (dev->state == XenbusStateConnected)
18130+ break;
18131+ backend_create_netif(be);
18132+ if (be->netif)
18133+ connect(be);
18134+ break;
18135+
18136+ case XenbusStateClosing:
18137+ if (be->netif) {
18138+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
18139+ netif_disconnect(be->netif);
18140+ be->netif = NULL;
18141+ }
18142+ xenbus_switch_state(dev, XenbusStateClosing);
18143+ break;
18144+
18145+ case XenbusStateClosed:
18146+ xenbus_switch_state(dev, XenbusStateClosed);
18147+ if (xenbus_dev_is_online(dev))
18148+ break;
18149+ /* fall through if not online */
18150+ case XenbusStateUnknown:
18151+ device_unregister(&dev->dev);
18152+ break;
18153+
18154+ default:
18155+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
18156+ frontend_state);
18157+ break;
18158+ }
18159+}
18160+
18161+
18162+static void xen_net_read_rate(struct xenbus_device *dev,
18163+ unsigned long *bytes, unsigned long *usec)
18164+{
18165+ char *s, *e;
18166+ unsigned long b, u;
18167+ char *ratestr;
18168+
18169+ /* Default to unlimited bandwidth. */
18170+ *bytes = ~0UL;
18171+ *usec = 0;
18172+
18173+ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
18174+ if (IS_ERR(ratestr))
18175+ return;
18176+
18177+ s = ratestr;
18178+ b = simple_strtoul(s, &e, 10);
18179+ if ((s == e) || (*e != ','))
18180+ goto fail;
18181+
18182+ s = e + 1;
18183+ u = simple_strtoul(s, &e, 10);
18184+ if ((s == e) || (*e != '\0'))
18185+ goto fail;
18186+
18187+ *bytes = b;
18188+ *usec = u;
18189+
18190+ kfree(ratestr);
18191+ return;
18192+
18193+ fail:
18194+ WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
18195+ kfree(ratestr);
18196+}
18197+
18198+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
18199+{
18200+ char *s, *e, *macstr;
18201+ int i;
18202+
18203+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
18204+ if (IS_ERR(macstr))
18205+ return PTR_ERR(macstr);
18206+
18207+ for (i = 0; i < ETH_ALEN; i++) {
18208+ mac[i] = simple_strtoul(s, &e, 16);
18209+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
18210+ kfree(macstr);
18211+ return -ENOENT;
18212+ }
18213+ s = e+1;
18214+ }
18215+
18216+ kfree(macstr);
18217+ return 0;
18218+}
18219+
18220+static void connect(struct backend_info *be)
18221+{
18222+ int err;
18223+ struct xenbus_device *dev = be->dev;
18224+
18225+ err = connect_rings(be);
18226+ if (err)
18227+ return;
18228+
18229+ err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
18230+ if (err) {
18231+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
18232+ return;
18233+ }
18234+
18235+ xen_net_read_rate(dev, &be->netif->credit_bytes,
18236+ &be->netif->credit_usec);
18237+ be->netif->remaining_credit = be->netif->credit_bytes;
18238+
18239+ xenbus_switch_state(dev, XenbusStateConnected);
18240+
18241+ netif_wake_queue(be->netif->dev);
18242+}
18243+
18244+
18245+static int connect_rings(struct backend_info *be)
18246+{
18247+ struct xenbus_device *dev = be->dev;
18248+ unsigned long tx_ring_ref, rx_ring_ref;
18249+ unsigned int evtchn, rx_copy;
18250+ int err;
18251+ int val;
18252+
18253+ DPRINTK("");
18254+
18255+ err = xenbus_gather(XBT_NIL, dev->otherend,
18256+ "tx-ring-ref", "%lu", &tx_ring_ref,
18257+ "rx-ring-ref", "%lu", &rx_ring_ref,
18258+ "event-channel", "%u", &evtchn, NULL);
18259+ if (err) {
18260+ xenbus_dev_fatal(dev, err,
18261+ "reading %s/ring-ref and event-channel",
18262+ dev->otherend);
18263+ return err;
18264+ }
18265+
18266+ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
18267+ &rx_copy);
18268+ if (err == -ENOENT) {
18269+ err = 0;
18270+ rx_copy = 0;
18271+ }
18272+ if (err < 0) {
18273+ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
18274+ dev->otherend);
18275+ return err;
18276+ }
18277+ be->netif->copying_receiver = !!rx_copy;
18278+
18279+ if (be->netif->dev->tx_queue_len != 0) {
18280+ if (xenbus_scanf(XBT_NIL, dev->otherend,
18281+ "feature-rx-notify", "%d", &val) < 0)
18282+ val = 0;
18283+ if (val)
18284+ be->netif->can_queue = 1;
18285+ else
18286+ /* Must be non-zero for pfifo_fast to work. */
18287+ be->netif->dev->tx_queue_len = 1;
18288+ }
18289+
18290+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
18291+ val = 0;
18292+ if (val) {
18293+ be->netif->features |= NETIF_F_SG;
18294+ be->netif->dev->features |= NETIF_F_SG;
18295+ }
18296+
18297+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
18298+ &val) < 0)
18299+ val = 0;
18300+ if (val) {
18301+ be->netif->features |= NETIF_F_TSO;
18302+ be->netif->dev->features |= NETIF_F_TSO;
18303+ }
18304+
18305+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
18306+ "%d", &val) < 0)
18307+ val = 0;
18308+ if (val) {
18309+ be->netif->features &= ~NETIF_F_IP_CSUM;
18310+ be->netif->dev->features &= ~NETIF_F_IP_CSUM;
18311+ }
18312+
18313+ /* Map the shared frame, irq etc. */
18314+ err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
18315+ if (err) {
18316+ xenbus_dev_fatal(dev, err,
18317+ "mapping shared-frames %lu/%lu port %u",
18318+ tx_ring_ref, rx_ring_ref, evtchn);
18319+ return err;
18320+ }
18321+ return 0;
18322+}
18323+
18324+
18325+/* ** Driver Registration ** */
18326+
18327+
18328+static const struct xenbus_device_id netback_ids[] = {
18329+ { "vif" },
18330+ { "" }
18331+};
18332+
18333+
18334+static struct xenbus_driver netback = {
18335+ .name = "vif",
18336+ .owner = THIS_MODULE,
18337+ .ids = netback_ids,
18338+ .probe = netback_probe,
18339+ .remove = netback_remove,
18340+ .uevent = netback_uevent,
18341+ .otherend_changed = frontend_changed,
18342+};
18343+
18344+
18345+void netif_xenbus_init(void)
18346+{
18347+ xenbus_register_backend(&netback);
18348+}
18349Index: head-2008-11-25/drivers/xen/netfront/Makefile
18350===================================================================
18351--- /dev/null 1970-01-01 00:00:00.000000000 +0000
18352+++ head-2008-11-25/drivers/xen/netfront/Makefile 2007-07-12 08:54:23.000000000 +0200
18353@@ -0,0 +1,4 @@
18354+
18355+obj-$(CONFIG_XEN_NETDEV_FRONTEND) := xennet.o
18356+
18357+xennet-objs := netfront.o accel.o
18358Index: head-2008-11-25/drivers/xen/netfront/accel.c
18359===================================================================
18360--- /dev/null 1970-01-01 00:00:00.000000000 +0000
18361+++ head-2008-11-25/drivers/xen/netfront/accel.c 2008-08-07 12:44:36.000000000 +0200
18362@@ -0,0 +1,824 @@
18363+/******************************************************************************
18364+ * Virtual network driver for conversing with remote driver backends.
18365+ *
18366+ * Copyright (C) 2007 Solarflare Communications, Inc.
18367+ *
18368+ * This program is free software; you can redistribute it and/or
18369+ * modify it under the terms of the GNU General Public License version 2
18370+ * as published by the Free Software Foundation; or, when distributed
18371+ * separately from the Linux kernel or incorporated into other
18372+ * software packages, subject to the following license:
18373+ *
18374+ * Permission is hereby granted, free of charge, to any person obtaining a copy
18375+ * of this source file (the "Software"), to deal in the Software without
18376+ * restriction, including without limitation the rights to use, copy, modify,
18377+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18378+ * and to permit persons to whom the Software is furnished to do so, subject to
18379+ * the following conditions:
18380+ *
18381+ * The above copyright notice and this permission notice shall be included in
18382+ * all copies or substantial portions of the Software.
18383+ *
18384+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18385+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18386+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18387+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18388+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18389+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18390+ * IN THE SOFTWARE.
18391+ */
18392+
18393+#include <linux/netdevice.h>
18394+#include <linux/skbuff.h>
18395+#include <linux/list.h>
18396+#include <linux/mutex.h>
18397+#include <asm/hypervisor.h>
18398+#include <xen/xenbus.h>
18399+
18400+#include "netfront.h"
18401+
18402+#define DPRINTK(fmt, args...) \
18403+ pr_debug("netfront/accel (%s:%d) " fmt, \
18404+ __FUNCTION__, __LINE__, ##args)
18405+#define IPRINTK(fmt, args...) \
18406+ printk(KERN_INFO "netfront/accel: " fmt, ##args)
18407+#define WPRINTK(fmt, args...) \
18408+ printk(KERN_WARNING "netfront/accel: " fmt, ##args)
18409+
18410+static int netfront_remove_accelerator(struct netfront_info *np,
18411+ struct xenbus_device *dev);
18412+static int netfront_load_accelerator(struct netfront_info *np,
18413+ struct xenbus_device *dev,
18414+ const char *frontend);
18415+
18416+/*
18417+ * List of all netfront accelerator plugin modules available. Each
18418+ * list entry is of type struct netfront_accelerator.
18419+ */
18420+static struct list_head accelerators_list;
18421+
18422+/* Lock to protect access to accelerators_list */
18423+static spinlock_t accelerators_lock;
18424+
18425+/* Workqueue to process acceleration configuration changes */
18426+struct workqueue_struct *accel_watch_workqueue;
18427+
18428+/* Mutex to prevent concurrent loads and suspends, etc. */
18429+DEFINE_MUTEX(accelerator_mutex);
18430+
18431+void netif_init_accel(void)
18432+{
18433+ INIT_LIST_HEAD(&accelerators_list);
18434+ spin_lock_init(&accelerators_lock);
18435+
18436+ accel_watch_workqueue = create_workqueue("net_accel");
18437+}
18438+
18439+void netif_exit_accel(void)
18440+{
18441+ struct netfront_accelerator *accelerator, *tmp;
18442+ unsigned long flags;
18443+
18444+ flush_workqueue(accel_watch_workqueue);
18445+ destroy_workqueue(accel_watch_workqueue);
18446+
18447+ spin_lock_irqsave(&accelerators_lock, flags);
18448+
18449+ list_for_each_entry_safe(accelerator, tmp, &accelerators_list, link) {
18450+ BUG_ON(!list_empty(&accelerator->vif_states));
18451+
18452+ list_del(&accelerator->link);
18453+ kfree(accelerator->frontend);
18454+ kfree(accelerator);
18455+ }
18456+
18457+ spin_unlock_irqrestore(&accelerators_lock, flags);
18458+}
18459+
18460+
18461+/*
18462+ * Watch the configured accelerator and change plugin if it's modified
18463+ */
18464+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
18465+static void accel_watch_work(struct work_struct *context)
18466+#else
18467+static void accel_watch_work(void *context)
18468+#endif
18469+{
18470+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
18471+ struct netfront_accel_vif_state *vif_state =
18472+ container_of(context, struct netfront_accel_vif_state,
18473+ accel_work);
18474+#else
18475+ struct netfront_accel_vif_state *vif_state =
18476+ (struct netfront_accel_vif_state *)context;
18477+#endif
18478+ struct netfront_info *np = vif_state->np;
18479+ char *accel_frontend;
18480+ int accel_len, rc = -1;
18481+
18482+ mutex_lock(&accelerator_mutex);
18483+
18484+ accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend,
18485+ "accel-frontend", &accel_len);
18486+ if (IS_ERR(accel_frontend)) {
18487+ accel_frontend = NULL;
18488+ netfront_remove_accelerator(np, np->xbdev);
18489+ } else {
18490+ /* If this is the first time, request the accelerator,
18491+ otherwise only request one if it has changed */
18492+ if (vif_state->accel_frontend == NULL) {
18493+ rc = netfront_load_accelerator(np, np->xbdev,
18494+ accel_frontend);
18495+ } else {
18496+ if (strncmp(vif_state->accel_frontend, accel_frontend,
18497+ accel_len)) {
18498+ netfront_remove_accelerator(np, np->xbdev);
18499+ rc = netfront_load_accelerator(np, np->xbdev,
18500+ accel_frontend);
18501+ }
18502+ }
18503+ }
18504+
18505+ /* Get rid of previous state and replace with the new name */
18506+ if (vif_state->accel_frontend != NULL)
18507+ kfree(vif_state->accel_frontend);
18508+ vif_state->accel_frontend = accel_frontend;
18509+
18510+ mutex_unlock(&accelerator_mutex);
18511+
18512+ if (rc == 0) {
18513+ DPRINTK("requesting module %s\n", accel_frontend);
18514+ request_module("%s", accel_frontend);
18515+ /*
18516+ * Module should now call netfront_accelerator_loaded() once
18517+ * it's up and running, and we can continue from there
18518+ */
18519+ }
18520+}
18521+
18522+
18523+static void accel_watch_changed(struct xenbus_watch *watch,
18524+ const char **vec, unsigned int len)
18525+{
18526+ struct netfront_accel_vif_state *vif_state =
18527+ container_of(watch, struct netfront_accel_vif_state,
18528+ accel_watch);
18529+ queue_work(accel_watch_workqueue, &vif_state->accel_work);
18530+}
18531+
18532+
18533+void netfront_accelerator_add_watch(struct netfront_info *np)
18534+{
18535+ int err;
18536+
18537+ /* Check we're not trying to overwrite an existing watch */
18538+ BUG_ON(np->accel_vif_state.accel_watch.node != NULL);
18539+
18540+ /* Get a watch on the accelerator plugin */
18541+ err = xenbus_watch_path2(np->xbdev, np->xbdev->otherend,
18542+ "accel-frontend",
18543+ &np->accel_vif_state.accel_watch,
18544+ accel_watch_changed);
18545+ if (err) {
18546+ DPRINTK("%s: Failed to register accel watch: %d\n",
18547+ __FUNCTION__, err);
18548+ np->accel_vif_state.accel_watch.node = NULL;
18549+ }
18550+}
18551+
18552+
18553+static
18554+void netfront_accelerator_remove_watch(struct netfront_info *np)
18555+{
18556+ struct netfront_accel_vif_state *vif_state = &np->accel_vif_state;
18557+
18558+ /* Get rid of watch on accelerator plugin */
18559+ if (vif_state->accel_watch.node != NULL) {
18560+ unregister_xenbus_watch(&vif_state->accel_watch);
18561+ kfree(vif_state->accel_watch.node);
18562+ vif_state->accel_watch.node = NULL;
18563+
18564+ flush_workqueue(accel_watch_workqueue);
18565+
18566+ /* Clean up any state left from watch */
18567+ if (vif_state->accel_frontend != NULL) {
18568+ kfree(vif_state->accel_frontend);
18569+ vif_state->accel_frontend = NULL;
18570+ }
18571+ }
18572+}
18573+
18574+
18575+/*
18576+ * Initialise the accel_vif_state field in the netfront state
18577+ */
18578+void init_accelerator_vif(struct netfront_info *np,
18579+ struct xenbus_device *dev)
18580+{
18581+ np->accelerator = NULL;
18582+
18583+ /* It's assumed that these things don't change */
18584+ np->accel_vif_state.np = np;
18585+ np->accel_vif_state.dev = dev;
18586+
18587+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
18588+ INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work);
18589+#else
18590+ INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work,
18591+ &np->accel_vif_state);
18592+#endif
18593+}
18594+
18595+
18596+/*
18597+ * Compare a frontend description string against an accelerator to see
18598+ * if they match. Would ultimately be nice to replace the string with
18599+ * a unique numeric identifier for each accelerator.
18600+ */
18601+static int match_accelerator(const char *frontend,
18602+ struct netfront_accelerator *accelerator)
18603+{
18604+ return strcmp(frontend, accelerator->frontend) == 0;
18605+}
18606+
18607+
18608+/*
18609+ * Add a frontend vif to the list of vifs that is using a netfront
18610+ * accelerator plugin module.
18611+ */
18612+static void add_accelerator_vif(struct netfront_accelerator *accelerator,
18613+ struct netfront_info *np)
18614+{
18615+ unsigned long flags;
18616+
18617+ /* Need lock to write list */
18618+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
18619+
18620+ if (np->accelerator == NULL) {
18621+ np->accelerator = accelerator;
18622+
18623+ list_add(&np->accel_vif_state.link, &accelerator->vif_states);
18624+ } else {
18625+ /*
18626+ * May get here legitimately if suspend_cancel is
18627+ * called, but in that case configuration should not
18628+ * have changed
18629+ */
18630+ BUG_ON(np->accelerator != accelerator);
18631+ }
18632+
18633+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
18634+}
18635+
18636+
18637+/*
18638+ * Initialise the state to track an accelerator plugin module.
18639+ */
18640+static int init_accelerator(const char *frontend,
18641+ struct netfront_accelerator **result,
18642+ struct netfront_accel_hooks *hooks)
18643+{
18644+ struct netfront_accelerator *accelerator =
18645+ kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL);
18646+ unsigned long flags;
18647+ int frontend_len;
18648+
18649+ if (!accelerator) {
18650+ DPRINTK("no memory for accelerator\n");
18651+ return -ENOMEM;
18652+ }
18653+
18654+ frontend_len = strlen(frontend) + 1;
18655+ accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL);
18656+ if (!accelerator->frontend) {
18657+ DPRINTK("no memory for accelerator\n");
18658+ kfree(accelerator);
18659+ return -ENOMEM;
18660+ }
18661+ strlcpy(accelerator->frontend, frontend, frontend_len);
18662+
18663+ INIT_LIST_HEAD(&accelerator->vif_states);
18664+ spin_lock_init(&accelerator->vif_states_lock);
18665+
18666+ accelerator->hooks = hooks;
18667+
18668+ spin_lock_irqsave(&accelerators_lock, flags);
18669+ list_add(&accelerator->link, &accelerators_list);
18670+ spin_unlock_irqrestore(&accelerators_lock, flags);
18671+
18672+ *result = accelerator;
18673+
18674+ return 0;
18675+}
18676+
18677+
18678+/*
18679+ * Modify the hooks stored in the per-vif state to match that in the
18680+ * netfront accelerator's state.
18681+ */
18682+static void
18683+accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state)
18684+{
18685+ /* This function must be called with the vif_states_lock held */
18686+
18687+ DPRINTK("%p\n",vif_state);
18688+
18689+ /* Make sure there are no data path operations going on */
18690+ netif_poll_disable(vif_state->np->netdev);
18691+ netif_tx_lock_bh(vif_state->np->netdev);
18692+
18693+ vif_state->hooks = vif_state->np->accelerator->hooks;
18694+
18695+ netif_tx_unlock_bh(vif_state->np->netdev);
18696+ netif_poll_enable(vif_state->np->netdev);
18697+}
18698+
18699+
18700+static void accelerator_probe_new_vif(struct netfront_info *np,
18701+ struct xenbus_device *dev,
18702+ struct netfront_accelerator *accelerator)
18703+{
18704+ struct netfront_accel_hooks *hooks;
18705+ unsigned long flags;
18706+
18707+ DPRINTK("\n");
18708+
18709+ /* Include this frontend device on the accelerator's list */
18710+ add_accelerator_vif(accelerator, np);
18711+
18712+ hooks = accelerator->hooks;
18713+
18714+ if (hooks) {
18715+ if (hooks->new_device(np->netdev, dev) == 0) {
18716+ spin_lock_irqsave
18717+ (&accelerator->vif_states_lock, flags);
18718+
18719+ accelerator_set_vif_state_hooks(&np->accel_vif_state);
18720+
18721+ spin_unlock_irqrestore
18722+ (&accelerator->vif_states_lock, flags);
18723+ }
18724+ }
18725+
18726+ return;
18727+}
18728+
18729+
18730+/*
18731+ * Request that a particular netfront accelerator plugin is loaded.
18732+ * Usually called as a result of the vif configuration specifying
18733+ * which one to use. Must be called with accelerator_mutex held
18734+ */
18735+static int netfront_load_accelerator(struct netfront_info *np,
18736+ struct xenbus_device *dev,
18737+ const char *frontend)
18738+{
18739+ struct netfront_accelerator *accelerator;
18740+ int rc = 0;
18741+
18742+ DPRINTK(" %s\n", frontend);
18743+
18744+ /*
18745+ * Look at list of loaded accelerators to see if the requested
18746+ * one is already there
18747+ */
18748+ list_for_each_entry(accelerator, &accelerators_list, link) {
18749+ if (match_accelerator(frontend, accelerator)) {
18750+ accelerator_probe_new_vif(np, dev, accelerator);
18751+ return 0;
18752+ }
18753+ }
18754+
18755+ /* Couldn't find it, so create a new one and load the module */
18756+ if ((rc = init_accelerator(frontend, &accelerator, NULL)) < 0) {
18757+ return rc;
18758+ }
18759+
18760+ /* Include this frontend device on the accelerator's list */
18761+ add_accelerator_vif(accelerator, np);
18762+
18763+ return rc;
18764+}
18765+
18766+
18767+/*
18768+ * Go through all the netfront vifs and see if they have requested
18769+ * this accelerator. Notify the accelerator plugin of the relevant
18770+ * device if so. Called when an accelerator plugin module is first
18771+ * loaded and connects to netfront.
18772+ */
18773+static void
18774+accelerator_probe_vifs(struct netfront_accelerator *accelerator,
18775+ struct netfront_accel_hooks *hooks)
18776+{
18777+ struct netfront_accel_vif_state *vif_state, *tmp;
18778+ unsigned long flags;
18779+
18780+ DPRINTK("%p\n", accelerator);
18781+
18782+ /*
18783+ * Store the hooks for future calls to probe a new device, and
18784+ * to wire into the vif_state once the accelerator plugin is
18785+ * ready to accelerate each vif
18786+ */
18787+ BUG_ON(hooks == NULL);
18788+ accelerator->hooks = hooks;
18789+
18790+ /*
18791+ * currently hold accelerator_mutex, so don't need
18792+ * vif_states_lock to read the list
18793+ */
18794+ list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states,
18795+ link) {
18796+ struct netfront_info *np = vif_state->np;
18797+
18798+ if (hooks->new_device(np->netdev, vif_state->dev) == 0) {
18799+ spin_lock_irqsave
18800+ (&accelerator->vif_states_lock, flags);
18801+
18802+ accelerator_set_vif_state_hooks(vif_state);
18803+
18804+ spin_unlock_irqrestore
18805+ (&accelerator->vif_states_lock, flags);
18806+ }
18807+ }
18808+}
18809+
18810+
18811+/*
18812+ * Called by the netfront accelerator plugin module when it has loaded
18813+ */
18814+int netfront_accelerator_loaded(int version, const char *frontend,
18815+ struct netfront_accel_hooks *hooks)
18816+{
18817+ struct netfront_accelerator *accelerator;
18818+
18819+ if (is_initial_xendomain())
18820+ return -EINVAL;
18821+
18822+ if (version != NETFRONT_ACCEL_VERSION) {
18823+ if (version > NETFRONT_ACCEL_VERSION) {
18824+ /* Caller has higher version number, leave it
18825+ up to them to decide whether to continue.
18826+ They can re-call with a lower number if
18827+ they're happy to be compatible with us */
18828+ return NETFRONT_ACCEL_VERSION;
18829+ } else {
18830+ /* We have a more recent version than caller.
18831+ Currently reject, but may in future be able
18832+ to be backwardly compatible */
18833+ return -EPROTO;
18834+ }
18835+ }
18836+
18837+ mutex_lock(&accelerator_mutex);
18838+
18839+ /*
18840+ * Look through list of accelerators to see if it has already
18841+ * been requested
18842+ */
18843+ list_for_each_entry(accelerator, &accelerators_list, link) {
18844+ if (match_accelerator(frontend, accelerator)) {
18845+ accelerator_probe_vifs(accelerator, hooks);
18846+ goto out;
18847+ }
18848+ }
18849+
18850+ /*
18851+ * If it wasn't in the list, add it now so that when it is
18852+ * requested the caller will find it
18853+ */
18854+ DPRINTK("Couldn't find matching accelerator (%s)\n",
18855+ frontend);
18856+
18857+ init_accelerator(frontend, &accelerator, hooks);
18858+
18859+ out:
18860+ mutex_unlock(&accelerator_mutex);
18861+ return 0;
18862+}
18863+EXPORT_SYMBOL_GPL(netfront_accelerator_loaded);
18864+
18865+
18866+/*
18867+ * Remove the hooks from a single vif state.
18868+ */
18869+static void
18870+accelerator_remove_single_hook(struct netfront_accelerator *accelerator,
18871+ struct netfront_accel_vif_state *vif_state)
18872+{
18873+ /* Make sure there are no data path operations going on */
18874+ netif_poll_disable(vif_state->np->netdev);
18875+ netif_tx_lock_bh(vif_state->np->netdev);
18876+
18877+ /*
18878+ * Remove the hooks, but leave the vif_state on the
18879+ * accelerator's list as that signifies this vif is
18880+ * interested in using that accelerator if it becomes
18881+ * available again
18882+ */
18883+ vif_state->hooks = NULL;
18884+
18885+ netif_tx_unlock_bh(vif_state->np->netdev);
18886+ netif_poll_enable(vif_state->np->netdev);
18887+}
18888+
18889+
18890+/*
18891+ * Safely remove the accelerator function hooks from a netfront state.
18892+ */
18893+static void accelerator_remove_hooks(struct netfront_accelerator *accelerator)
18894+{
18895+ struct netfront_accel_hooks *hooks;
18896+ struct netfront_accel_vif_state *vif_state, *tmp;
18897+ unsigned long flags;
18898+
18899+ /* Mutex is held so don't need vif_states_lock to iterate list */
18900+ list_for_each_entry_safe(vif_state, tmp,
18901+ &accelerator->vif_states,
18902+ link) {
18903+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
18904+
18905+ if(vif_state->hooks) {
18906+ hooks = vif_state->hooks;
18907+
18908+ /* Last chance to get statistics from the accelerator */
18909+ hooks->get_stats(vif_state->np->netdev,
18910+ &vif_state->np->stats);
18911+
18912+ spin_unlock_irqrestore(&accelerator->vif_states_lock,
18913+ flags);
18914+
18915+ accelerator_remove_single_hook(accelerator, vif_state);
18916+
18917+ accelerator->hooks->remove(vif_state->dev);
18918+ } else {
18919+ spin_unlock_irqrestore(&accelerator->vif_states_lock,
18920+ flags);
18921+ }
18922+ }
18923+
18924+ accelerator->hooks = NULL;
18925+}
18926+
18927+
18928+/*
18929+ * Called by a netfront accelerator when it is unloaded. This safely
18930+ * removes the hooks into the plugin and blocks until all devices have
18931+ * finished using it, so on return it is safe to unload.
18932+ */
18933+void netfront_accelerator_stop(const char *frontend)
18934+{
18935+ struct netfront_accelerator *accelerator;
18936+ unsigned long flags;
18937+
18938+ mutex_lock(&accelerator_mutex);
18939+ spin_lock_irqsave(&accelerators_lock, flags);
18940+
18941+ list_for_each_entry(accelerator, &accelerators_list, link) {
18942+ if (match_accelerator(frontend, accelerator)) {
18943+ spin_unlock_irqrestore(&accelerators_lock, flags);
18944+
18945+ accelerator_remove_hooks(accelerator);
18946+
18947+ goto out;
18948+ }
18949+ }
18950+ spin_unlock_irqrestore(&accelerators_lock, flags);
18951+ out:
18952+ mutex_unlock(&accelerator_mutex);
18953+}
18954+EXPORT_SYMBOL_GPL(netfront_accelerator_stop);
18955+
18956+
18957+/* Helper for call_remove and do_suspend */
18958+static int do_remove(struct netfront_info *np, struct xenbus_device *dev,
18959+ unsigned long *lock_flags)
18960+{
18961+ struct netfront_accelerator *accelerator = np->accelerator;
18962+ struct netfront_accel_hooks *hooks;
18963+ int rc = 0;
18964+
18965+ if (np->accel_vif_state.hooks) {
18966+ hooks = np->accel_vif_state.hooks;
18967+
18968+ /* Last chance to get statistics from the accelerator */
18969+ hooks->get_stats(np->netdev, &np->stats);
18970+
18971+ spin_unlock_irqrestore(&accelerator->vif_states_lock,
18972+ *lock_flags);
18973+
18974+ /*
18975+ * Try and do the opposite of accelerator_probe_new_vif
18976+ * to ensure there's no state pointing back at the
18977+ * netdev
18978+ */
18979+ accelerator_remove_single_hook(accelerator,
18980+ &np->accel_vif_state);
18981+
18982+ rc = accelerator->hooks->remove(dev);
18983+
18984+ spin_lock_irqsave(&accelerator->vif_states_lock, *lock_flags);
18985+ }
18986+
18987+ return rc;
18988+}
18989+
18990+
18991+static int netfront_remove_accelerator(struct netfront_info *np,
18992+ struct xenbus_device *dev)
18993+{
18994+ struct netfront_accelerator *accelerator;
18995+ struct netfront_accel_vif_state *tmp_vif_state;
18996+ unsigned long flags;
18997+ int rc = 0;
18998+
18999+ /* Check that we've got a device that was accelerated */
19000+ if (np->accelerator == NULL)
19001+ return rc;
19002+
19003+ accelerator = np->accelerator;
19004+
19005+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19006+
19007+ list_for_each_entry(tmp_vif_state, &accelerator->vif_states,
19008+ link) {
19009+ if (tmp_vif_state == &np->accel_vif_state) {
19010+ list_del(&np->accel_vif_state.link);
19011+ break;
19012+ }
19013+ }
19014+
19015+ rc = do_remove(np, dev, &flags);
19016+
19017+ np->accelerator = NULL;
19018+
19019+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19020+
19021+ return rc;
19022+}
19023+
19024+
19025+int netfront_accelerator_call_remove(struct netfront_info *np,
19026+ struct xenbus_device *dev)
19027+{
19028+ int rc;
19029+ netfront_accelerator_remove_watch(np);
19030+ mutex_lock(&accelerator_mutex);
19031+ rc = netfront_remove_accelerator(np, dev);
19032+ mutex_unlock(&accelerator_mutex);
19033+ return rc;
19034+}
19035+
19036+
19037+int netfront_accelerator_suspend(struct netfront_info *np,
19038+ struct xenbus_device *dev)
19039+{
19040+ unsigned long flags;
19041+ int rc = 0;
19042+
19043+ netfront_accelerator_remove_watch(np);
19044+
19045+ mutex_lock(&accelerator_mutex);
19046+
19047+ /* Check that we've got a device that was accelerated */
19048+ if (np->accelerator == NULL)
19049+ goto out;
19050+
19051+ /*
19052+ * Call the remove accelerator hook, but leave the vif_state
19053+ * on the accelerator's list in case there is a suspend_cancel.
19054+ */
19055+ spin_lock_irqsave(&np->accelerator->vif_states_lock, flags);
19056+
19057+ rc = do_remove(np, dev, &flags);
19058+
19059+ spin_unlock_irqrestore(&np->accelerator->vif_states_lock, flags);
19060+ out:
19061+ mutex_unlock(&accelerator_mutex);
19062+ return rc;
19063+}
19064+
19065+
19066+int netfront_accelerator_suspend_cancel(struct netfront_info *np,
19067+ struct xenbus_device *dev)
19068+{
19069+ /*
19070+ * Setting the watch will cause it to fire and probe the
19071+ * accelerator, so no need to call accelerator_probe_new_vif()
19072+ * directly here
19073+ */
19074+ if (dev->state == XenbusStateConnected)
19075+ netfront_accelerator_add_watch(np);
19076+ return 0;
19077+}
19078+
19079+
19080+void netfront_accelerator_resume(struct netfront_info *np,
19081+ struct xenbus_device *dev)
19082+{
19083+ struct netfront_accel_vif_state *accel_vif_state = NULL;
19084+ spinlock_t *vif_states_lock;
19085+ unsigned long flags;
19086+
19087+ mutex_lock(&accelerator_mutex);
19088+
19089+ /* Check that we've got a device that was accelerated */
19090+ if(np->accelerator == NULL)
19091+ goto out;
19092+
19093+ /* Find the vif_state from the accelerator's list */
19094+ list_for_each_entry(accel_vif_state, &np->accelerator->vif_states,
19095+ link) {
19096+ if (accel_vif_state->dev == dev) {
19097+ BUG_ON(accel_vif_state != &np->accel_vif_state);
19098+
19099+ vif_states_lock = &np->accelerator->vif_states_lock;
19100+ spin_lock_irqsave(vif_states_lock, flags);
19101+
19102+ /*
19103+ * Remove it from the accelerator's list so
19104+ * state is consistent for probing new vifs
19105+ * when they get connected
19106+ */
19107+ list_del(&accel_vif_state->link);
19108+ np->accelerator = NULL;
19109+
19110+ spin_unlock_irqrestore(vif_states_lock, flags);
19111+
19112+ break;
19113+ }
19114+ }
19115+
19116+ out:
19117+ mutex_unlock(&accelerator_mutex);
19118+ return;
19119+}
19120+
19121+
19122+int netfront_check_accelerator_queue_ready(struct net_device *dev,
19123+ struct netfront_info *np)
19124+{
19125+ struct netfront_accelerator *accelerator;
19126+ struct netfront_accel_hooks *hooks;
19127+ int rc = 1;
19128+ unsigned long flags;
19129+
19130+ accelerator = np->accelerator;
19131+
19132+ /* Call the check_ready accelerator hook. */
19133+ if (np->accel_vif_state.hooks && accelerator) {
19134+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19135+ hooks = np->accel_vif_state.hooks;
19136+ if (hooks && np->accelerator == accelerator)
19137+ rc = np->accel_vif_state.hooks->check_ready(dev);
19138+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19139+ }
19140+
19141+ return rc;
19142+}
19143+
19144+
19145+void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np,
19146+ struct net_device *dev)
19147+{
19148+ struct netfront_accelerator *accelerator;
19149+ struct netfront_accel_hooks *hooks;
19150+ unsigned long flags;
19151+
19152+ accelerator = np->accelerator;
19153+
19154+ /* Call the stop_napi_interrupts accelerator hook. */
19155+ if (np->accel_vif_state.hooks && accelerator != NULL) {
19156+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19157+ hooks = np->accel_vif_state.hooks;
19158+ if (hooks && np->accelerator == accelerator)
19159+ np->accel_vif_state.hooks->stop_napi_irq(dev);
19160+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19161+ }
19162+}
19163+
19164+
19165+int netfront_accelerator_call_get_stats(struct netfront_info *np,
19166+ struct net_device *dev)
19167+{
19168+ struct netfront_accelerator *accelerator;
19169+ struct netfront_accel_hooks *hooks;
19170+ unsigned long flags;
19171+ int rc = 0;
19172+
19173+ accelerator = np->accelerator;
19174+
19175+ /* Call the get_stats accelerator hook. */
19176+ if (np->accel_vif_state.hooks && accelerator != NULL) {
19177+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19178+ hooks = np->accel_vif_state.hooks;
19179+ if (hooks && np->accelerator == accelerator)
19180+ rc = np->accel_vif_state.hooks->get_stats(dev,
19181+ &np->stats);
19182+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19183+ }
19184+ return rc;
19185+}
19186+
19187Index: head-2008-11-25/drivers/xen/netfront/netfront.c
19188===================================================================
19189--- /dev/null 1970-01-01 00:00:00.000000000 +0000
19190+++ head-2008-11-25/drivers/xen/netfront/netfront.c 2008-07-21 11:00:33.000000000 +0200
19191@@ -0,0 +1,2240 @@
19192+/******************************************************************************
19193+ * Virtual network driver for conversing with remote driver backends.
19194+ *
19195+ * Copyright (c) 2002-2005, K A Fraser
19196+ * Copyright (c) 2005, XenSource Ltd
19197+ * Copyright (C) 2007 Solarflare Communications, Inc.
19198+ *
19199+ * This program is free software; you can redistribute it and/or
19200+ * modify it under the terms of the GNU General Public License version 2
19201+ * as published by the Free Software Foundation; or, when distributed
19202+ * separately from the Linux kernel or incorporated into other
19203+ * software packages, subject to the following license:
19204+ *
19205+ * Permission is hereby granted, free of charge, to any person obtaining a copy
19206+ * of this source file (the "Software"), to deal in the Software without
19207+ * restriction, including without limitation the rights to use, copy, modify,
19208+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19209+ * and to permit persons to whom the Software is furnished to do so, subject to
19210+ * the following conditions:
19211+ *
19212+ * The above copyright notice and this permission notice shall be included in
19213+ * all copies or substantial portions of the Software.
19214+ *
19215+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19216+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19217+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19218+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19219+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19220+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19221+ * IN THE SOFTWARE.
19222+ */
19223+
19224+#include <linux/module.h>
19225+#include <linux/version.h>
19226+#include <linux/kernel.h>
19227+#include <linux/sched.h>
19228+#include <linux/slab.h>
19229+#include <linux/string.h>
19230+#include <linux/errno.h>
19231+#include <linux/netdevice.h>
19232+#include <linux/inetdevice.h>
19233+#include <linux/etherdevice.h>
19234+#include <linux/skbuff.h>
19235+#include <linux/init.h>
19236+#include <linux/bitops.h>
19237+#include <linux/ethtool.h>
19238+#include <linux/in.h>
19239+#include <linux/if_ether.h>
19240+#include <linux/io.h>
19241+#include <linux/moduleparam.h>
19242+#include <net/sock.h>
19243+#include <net/pkt_sched.h>
19244+#include <net/arp.h>
19245+#include <net/route.h>
19246+#include <asm/uaccess.h>
19247+#include <xen/evtchn.h>
19248+#include <xen/xenbus.h>
19249+#include <xen/interface/io/netif.h>
19250+#include <xen/interface/memory.h>
19251+#include <xen/balloon.h>
19252+#include <asm/page.h>
19253+#include <asm/maddr.h>
19254+#include <asm/uaccess.h>
19255+#include <xen/interface/grant_table.h>
19256+#include <xen/gnttab.h>
19257+
19258+struct netfront_cb {
19259+ struct page *page;
19260+ unsigned offset;
19261+};
19262+
19263+#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
19264+
19265+#include "netfront.h"
19266+
19267+/*
19268+ * Mutually-exclusive module options to select receive data path:
19269+ * rx_copy : Packets are copied by network backend into local memory
19270+ * rx_flip : Page containing packet data is transferred to our ownership
19271+ * For fully-virtualised guests there is no option - copying must be used.
19272+ * For paravirtualised guests, flipping is the default.
19273+ */
19274+#ifdef CONFIG_XEN
19275+static int MODPARM_rx_copy = 0;
19276+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
19277+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
19278+static int MODPARM_rx_flip = 0;
19279+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
19280+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
19281+#else
19282+static const int MODPARM_rx_copy = 1;
19283+static const int MODPARM_rx_flip = 0;
19284+#endif
19285+
19286+#define RX_COPY_THRESHOLD 256
19287+
19288+/* If we don't have GSO, fake things up so that we never try to use it. */
19289+#if defined(NETIF_F_GSO)
19290+#define HAVE_GSO 1
19291+#define HAVE_TSO 1 /* TSO is a subset of GSO */
19292+#define HAVE_CSUM_OFFLOAD 1
19293+static inline void dev_disable_gso_features(struct net_device *dev)
19294+{
19295+ /* Turn off all GSO bits except ROBUST. */
19296+ dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
19297+ dev->features |= NETIF_F_GSO_ROBUST;
19298+}
19299+#elif defined(NETIF_F_TSO)
19300+#define HAVE_GSO 0
19301+#define HAVE_TSO 1
19302+
19303+/* Some older kernels cannot cope with incorrect checksums,
19304+ * particularly in netfilter. I'm not sure there is 100% correlation
19305+ * with the presence of NETIF_F_TSO but it appears to be a good first
19306+ * approximiation.
19307+ */
19308+#define HAVE_CSUM_OFFLOAD 0
19309+
19310+#define gso_size tso_size
19311+#define gso_segs tso_segs
19312+static inline void dev_disable_gso_features(struct net_device *dev)
19313+{
19314+ /* Turn off all TSO bits. */
19315+ dev->features &= ~NETIF_F_TSO;
19316+}
19317+static inline int skb_is_gso(const struct sk_buff *skb)
19318+{
19319+ return skb_shinfo(skb)->tso_size;
19320+}
19321+static inline int skb_gso_ok(struct sk_buff *skb, int features)
19322+{
19323+ return (features & NETIF_F_TSO);
19324+}
19325+
19326+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
19327+{
19328+ return skb_is_gso(skb) &&
19329+ (!skb_gso_ok(skb, dev->features) ||
19330+ unlikely(skb->ip_summed != CHECKSUM_HW));
19331+}
19332+#else
19333+#define HAVE_GSO 0
19334+#define HAVE_TSO 0
19335+#define HAVE_CSUM_OFFLOAD 0
19336+#define netif_needs_gso(dev, skb) 0
19337+#define dev_disable_gso_features(dev) ((void)0)
19338+#define ethtool_op_set_tso(dev, data) (-ENOSYS)
19339+#endif
19340+
19341+#define GRANT_INVALID_REF 0
19342+
19343+struct netfront_rx_info {
19344+ struct netif_rx_response rx;
19345+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
19346+};
19347+
19348+/*
19349+ * Implement our own carrier flag: the network stack's version causes delays
19350+ * when the carrier is re-enabled (in particular, dev_activate() may not
19351+ * immediately be called, which can cause packet loss).
19352+ */
19353+#define netfront_carrier_on(netif) ((netif)->carrier = 1)
19354+#define netfront_carrier_off(netif) ((netif)->carrier = 0)
19355+#define netfront_carrier_ok(netif) ((netif)->carrier)
19356+
19357+/*
19358+ * Access macros for acquiring freeing slots in tx_skbs[].
19359+ */
19360+
19361+static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
19362+{
19363+ list[id] = list[0];
19364+ list[0] = (void *)(unsigned long)id;
19365+}
19366+
19367+static inline unsigned short get_id_from_freelist(struct sk_buff **list)
19368+{
19369+ unsigned int id = (unsigned int)(unsigned long)list[0];
19370+ list[0] = list[id];
19371+ return id;
19372+}
19373+
19374+static inline int xennet_rxidx(RING_IDX idx)
19375+{
19376+ return idx & (NET_RX_RING_SIZE - 1);
19377+}
19378+
19379+static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
19380+ RING_IDX ri)
19381+{
19382+ int i = xennet_rxidx(ri);
19383+ struct sk_buff *skb = np->rx_skbs[i];
19384+ np->rx_skbs[i] = NULL;
19385+ return skb;
19386+}
19387+
19388+static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
19389+ RING_IDX ri)
19390+{
19391+ int i = xennet_rxidx(ri);
19392+ grant_ref_t ref = np->grant_rx_ref[i];
19393+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
19394+ return ref;
19395+}
19396+
19397+#define DPRINTK(fmt, args...) \
19398+ pr_debug("netfront (%s:%d) " fmt, \
19399+ __FUNCTION__, __LINE__, ##args)
19400+#define IPRINTK(fmt, args...) \
19401+ printk(KERN_INFO "netfront: " fmt, ##args)
19402+#define WPRINTK(fmt, args...) \
19403+ printk(KERN_WARNING "netfront: " fmt, ##args)
19404+
19405+static int setup_device(struct xenbus_device *, struct netfront_info *);
19406+static struct net_device *create_netdev(struct xenbus_device *);
19407+
19408+static void end_access(int, void *);
19409+static void netif_disconnect_backend(struct netfront_info *);
19410+
19411+static int network_connect(struct net_device *);
19412+static void network_tx_buf_gc(struct net_device *);
19413+static void network_alloc_rx_buffers(struct net_device *);
19414+static void send_fake_arp(struct net_device *);
19415+
19416+static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
19417+
19418+#ifdef CONFIG_SYSFS
19419+static int xennet_sysfs_addif(struct net_device *netdev);
19420+static void xennet_sysfs_delif(struct net_device *netdev);
19421+#else /* !CONFIG_SYSFS */
19422+#define xennet_sysfs_addif(dev) (0)
19423+#define xennet_sysfs_delif(dev) do { } while(0)
19424+#endif
19425+
19426+static inline int xennet_can_sg(struct net_device *dev)
19427+{
19428+ return dev->features & NETIF_F_SG;
19429+}
19430+
19431+/**
19432+ * Entry point to this code when a new device is created. Allocate the basic
19433+ * structures and the ring buffers for communication with the backend, and
19434+ * inform the backend of the appropriate details for those.
19435+ */
19436+static int __devinit netfront_probe(struct xenbus_device *dev,
19437+ const struct xenbus_device_id *id)
19438+{
19439+ int err;
19440+ struct net_device *netdev;
19441+ struct netfront_info *info;
19442+
19443+ netdev = create_netdev(dev);
19444+ if (IS_ERR(netdev)) {
19445+ err = PTR_ERR(netdev);
19446+ xenbus_dev_fatal(dev, err, "creating netdev");
19447+ return err;
19448+ }
19449+
19450+ info = netdev_priv(netdev);
19451+ dev->dev.driver_data = info;
19452+
19453+ err = register_netdev(info->netdev);
19454+ if (err) {
19455+ printk(KERN_WARNING "%s: register_netdev err=%d\n",
19456+ __FUNCTION__, err);
19457+ goto fail;
19458+ }
19459+
19460+ err = xennet_sysfs_addif(info->netdev);
19461+ if (err) {
19462+ unregister_netdev(info->netdev);
19463+ printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
19464+ __FUNCTION__, err);
19465+ goto fail;
19466+ }
19467+
19468+ return 0;
19469+
19470+ fail:
19471+ free_netdev(netdev);
19472+ dev->dev.driver_data = NULL;
19473+ return err;
19474+}
19475+
19476+static int __devexit netfront_remove(struct xenbus_device *dev)
19477+{
19478+ struct netfront_info *info = dev->dev.driver_data;
19479+
19480+ DPRINTK("%s\n", dev->nodename);
19481+
19482+ netfront_accelerator_call_remove(info, dev);
19483+
19484+ netif_disconnect_backend(info);
19485+
19486+ del_timer_sync(&info->rx_refill_timer);
19487+
19488+ xennet_sysfs_delif(info->netdev);
19489+
19490+ unregister_netdev(info->netdev);
19491+
19492+ free_netdev(info->netdev);
19493+
19494+ return 0;
19495+}
19496+
19497+
19498+static int netfront_suspend(struct xenbus_device *dev)
19499+{
19500+ struct netfront_info *info = dev->dev.driver_data;
19501+ return netfront_accelerator_suspend(info, dev);
19502+}
19503+
19504+
19505+static int netfront_suspend_cancel(struct xenbus_device *dev)
19506+{
19507+ struct netfront_info *info = dev->dev.driver_data;
19508+ return netfront_accelerator_suspend_cancel(info, dev);
19509+}
19510+
19511+
19512+/**
19513+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
19514+ * driver restart. We tear down our netif structure and recreate it, but
19515+ * leave the device-layer structures intact so that this is transparent to the
19516+ * rest of the kernel.
19517+ */
19518+static int netfront_resume(struct xenbus_device *dev)
19519+{
19520+ struct netfront_info *info = dev->dev.driver_data;
19521+
19522+ DPRINTK("%s\n", dev->nodename);
19523+
19524+ netfront_accelerator_resume(info, dev);
19525+
19526+ netif_disconnect_backend(info);
19527+ return 0;
19528+}
19529+
19530+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
19531+{
19532+ char *s, *e, *macstr;
19533+ int i;
19534+
19535+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
19536+ if (IS_ERR(macstr))
19537+ return PTR_ERR(macstr);
19538+
19539+ for (i = 0; i < ETH_ALEN; i++) {
19540+ mac[i] = simple_strtoul(s, &e, 16);
19541+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
19542+ kfree(macstr);
19543+ return -ENOENT;
19544+ }
19545+ s = e+1;
19546+ }
19547+
19548+ kfree(macstr);
19549+ return 0;
19550+}
19551+
19552+/* Common code used when first setting up, and when resuming. */
19553+static int talk_to_backend(struct xenbus_device *dev,
19554+ struct netfront_info *info)
19555+{
19556+ const char *message;
19557+ struct xenbus_transaction xbt;
19558+ int err;
19559+
19560+ /* Read mac only in the first setup. */
19561+ if (!is_valid_ether_addr(info->mac)) {
19562+ err = xen_net_read_mac(dev, info->mac);
19563+ if (err) {
19564+ xenbus_dev_fatal(dev, err, "parsing %s/mac",
19565+ dev->nodename);
19566+ goto out;
19567+ }
19568+ }
19569+
19570+ /* Create shared ring, alloc event channel. */
19571+ err = setup_device(dev, info);
19572+ if (err)
19573+ goto out;
19574+
19575+ /* This will load an accelerator if one is configured when the
19576+ * watch fires */
19577+ netfront_accelerator_add_watch(info);
19578+
19579+again:
19580+ err = xenbus_transaction_start(&xbt);
19581+ if (err) {
19582+ xenbus_dev_fatal(dev, err, "starting transaction");
19583+ goto destroy_ring;
19584+ }
19585+
19586+ err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
19587+ info->tx_ring_ref);
19588+ if (err) {
19589+ message = "writing tx ring-ref";
19590+ goto abort_transaction;
19591+ }
19592+ err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
19593+ info->rx_ring_ref);
19594+ if (err) {
19595+ message = "writing rx ring-ref";
19596+ goto abort_transaction;
19597+ }
19598+ err = xenbus_printf(xbt, dev->nodename,
19599+ "event-channel", "%u",
19600+ irq_to_evtchn_port(info->irq));
19601+ if (err) {
19602+ message = "writing event-channel";
19603+ goto abort_transaction;
19604+ }
19605+
19606+ err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
19607+ info->copying_receiver);
19608+ if (err) {
19609+ message = "writing request-rx-copy";
19610+ goto abort_transaction;
19611+ }
19612+
19613+ err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
19614+ if (err) {
19615+ message = "writing feature-rx-notify";
19616+ goto abort_transaction;
19617+ }
19618+
19619+ err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload",
19620+ "%d", !HAVE_CSUM_OFFLOAD);
19621+ if (err) {
19622+ message = "writing feature-no-csum-offload";
19623+ goto abort_transaction;
19624+ }
19625+
19626+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
19627+ if (err) {
19628+ message = "writing feature-sg";
19629+ goto abort_transaction;
19630+ }
19631+
19632+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d",
19633+ HAVE_TSO);
19634+ if (err) {
19635+ message = "writing feature-gso-tcpv4";
19636+ goto abort_transaction;
19637+ }
19638+
19639+ err = xenbus_transaction_end(xbt, 0);
19640+ if (err) {
19641+ if (err == -EAGAIN)
19642+ goto again;
19643+ xenbus_dev_fatal(dev, err, "completing transaction");
19644+ goto destroy_ring;
19645+ }
19646+
19647+ return 0;
19648+
19649+ abort_transaction:
19650+ xenbus_transaction_end(xbt, 1);
19651+ xenbus_dev_fatal(dev, err, "%s", message);
19652+ destroy_ring:
19653+ netfront_accelerator_call_remove(info, dev);
19654+ netif_disconnect_backend(info);
19655+ out:
19656+ return err;
19657+}
19658+
19659+static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
19660+{
19661+ struct netif_tx_sring *txs;
19662+ struct netif_rx_sring *rxs;
19663+ int err;
19664+ struct net_device *netdev = info->netdev;
19665+
19666+ info->tx_ring_ref = GRANT_INVALID_REF;
19667+ info->rx_ring_ref = GRANT_INVALID_REF;
19668+ info->rx.sring = NULL;
19669+ info->tx.sring = NULL;
19670+ info->irq = 0;
19671+
19672+ txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
19673+ if (!txs) {
19674+ err = -ENOMEM;
19675+ xenbus_dev_fatal(dev, err, "allocating tx ring page");
19676+ goto fail;
19677+ }
19678+ SHARED_RING_INIT(txs);
19679+ FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
19680+
19681+ err = xenbus_grant_ring(dev, virt_to_mfn(txs));
19682+ if (err < 0) {
19683+ free_page((unsigned long)txs);
19684+ goto fail;
19685+ }
19686+ info->tx_ring_ref = err;
19687+
19688+ rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
19689+ if (!rxs) {
19690+ err = -ENOMEM;
19691+ xenbus_dev_fatal(dev, err, "allocating rx ring page");
19692+ goto fail;
19693+ }
19694+ SHARED_RING_INIT(rxs);
19695+ FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
19696+
19697+ err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
19698+ if (err < 0) {
19699+ free_page((unsigned long)rxs);
19700+ goto fail;
19701+ }
19702+ info->rx_ring_ref = err;
19703+
19704+ memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
19705+
19706+ err = bind_listening_port_to_irqhandler(
19707+ dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
19708+ netdev);
19709+ if (err < 0)
19710+ goto fail;
19711+ info->irq = err;
19712+
19713+ return 0;
19714+
19715+ fail:
19716+ return err;
19717+}
19718+
19719+/**
19720+ * Callback received when the backend's state changes.
19721+ */
19722+static void backend_changed(struct xenbus_device *dev,
19723+ enum xenbus_state backend_state)
19724+{
19725+ struct netfront_info *np = dev->dev.driver_data;
19726+ struct net_device *netdev = np->netdev;
19727+
19728+ DPRINTK("%s\n", xenbus_strstate(backend_state));
19729+
19730+ switch (backend_state) {
19731+ case XenbusStateInitialising:
19732+ case XenbusStateInitialised:
19733+ case XenbusStateConnected:
19734+ case XenbusStateReconfiguring:
19735+ case XenbusStateReconfigured:
19736+ case XenbusStateUnknown:
19737+ case XenbusStateClosed:
19738+ break;
19739+
19740+ case XenbusStateInitWait:
19741+ if (dev->state != XenbusStateInitialising)
19742+ break;
19743+ if (network_connect(netdev) != 0)
19744+ break;
19745+ xenbus_switch_state(dev, XenbusStateConnected);
19746+ send_fake_arp(netdev);
19747+ break;
19748+
19749+ case XenbusStateClosing:
19750+ xenbus_frontend_closed(dev);
19751+ break;
19752+ }
19753+}
19754+
19755+/** Send a packet on a net device to encourage switches to learn the
19756+ * MAC. We send a fake ARP request.
19757+ *
19758+ * @param dev device
19759+ * @return 0 on success, error code otherwise
19760+ */
19761+static void send_fake_arp(struct net_device *dev)
19762+{
19763+#ifdef CONFIG_INET
19764+ struct sk_buff *skb;
19765+ u32 src_ip, dst_ip;
19766+
19767+ dst_ip = INADDR_BROADCAST;
19768+ src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
19769+
19770+ /* No IP? Then nothing to do. */
19771+ if (src_ip == 0)
19772+ return;
19773+
19774+ skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
19775+ dst_ip, dev, src_ip,
19776+ /*dst_hw*/ NULL, /*src_hw*/ NULL,
19777+ /*target_hw*/ dev->dev_addr);
19778+ if (skb == NULL)
19779+ return;
19780+
19781+ dev_queue_xmit(skb);
19782+#endif
19783+}
19784+
19785+static inline int netfront_tx_slot_available(struct netfront_info *np)
19786+{
19787+ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
19788+ (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
19789+}
19790+
19791+
19792+static inline void network_maybe_wake_tx(struct net_device *dev)
19793+{
19794+ struct netfront_info *np = netdev_priv(dev);
19795+
19796+ if (unlikely(netif_queue_stopped(dev)) &&
19797+ netfront_tx_slot_available(np) &&
19798+ likely(netif_running(dev)) &&
19799+ netfront_check_accelerator_queue_ready(dev, np))
19800+ netif_wake_queue(dev);
19801+}
19802+
19803+
19804+int netfront_check_queue_ready(struct net_device *dev)
19805+{
19806+ struct netfront_info *np = netdev_priv(dev);
19807+
19808+ return unlikely(netif_queue_stopped(dev)) &&
19809+ netfront_tx_slot_available(np) &&
19810+ likely(netif_running(dev));
19811+}
19812+EXPORT_SYMBOL(netfront_check_queue_ready);
19813+
19814+
19815+static int network_open(struct net_device *dev)
19816+{
19817+ struct netfront_info *np = netdev_priv(dev);
19818+
19819+ memset(&np->stats, 0, sizeof(np->stats));
19820+
19821+ spin_lock_bh(&np->rx_lock);
19822+ if (netfront_carrier_ok(np)) {
19823+ network_alloc_rx_buffers(dev);
19824+ np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
19825+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
19826+ netfront_accelerator_call_stop_napi_irq(np, dev);
19827+
19828+ netif_rx_schedule(dev);
19829+ }
19830+ }
19831+ spin_unlock_bh(&np->rx_lock);
19832+
19833+ network_maybe_wake_tx(dev);
19834+
19835+ return 0;
19836+}
19837+
19838+static void network_tx_buf_gc(struct net_device *dev)
19839+{
19840+ RING_IDX cons, prod;
19841+ unsigned short id;
19842+ struct netfront_info *np = netdev_priv(dev);
19843+ struct sk_buff *skb;
19844+
19845+ BUG_ON(!netfront_carrier_ok(np));
19846+
19847+ do {
19848+ prod = np->tx.sring->rsp_prod;
19849+ rmb(); /* Ensure we see responses up to 'rp'. */
19850+
19851+ for (cons = np->tx.rsp_cons; cons != prod; cons++) {
19852+ struct netif_tx_response *txrsp;
19853+
19854+ txrsp = RING_GET_RESPONSE(&np->tx, cons);
19855+ if (txrsp->status == NETIF_RSP_NULL)
19856+ continue;
19857+
19858+ id = txrsp->id;
19859+ skb = np->tx_skbs[id];
19860+ if (unlikely(gnttab_query_foreign_access(
19861+ np->grant_tx_ref[id]) != 0)) {
19862+ printk(KERN_ALERT "network_tx_buf_gc: warning "
19863+ "-- grant still in use by backend "
19864+ "domain.\n");
19865+ BUG();
19866+ }
19867+ gnttab_end_foreign_access_ref(np->grant_tx_ref[id]);
19868+ gnttab_release_grant_reference(
19869+ &np->gref_tx_head, np->grant_tx_ref[id]);
19870+ np->grant_tx_ref[id] = GRANT_INVALID_REF;
19871+ add_id_to_freelist(np->tx_skbs, id);
19872+ dev_kfree_skb_irq(skb);
19873+ }
19874+
19875+ np->tx.rsp_cons = prod;
19876+
19877+ /*
19878+ * Set a new event, then check for race with update of tx_cons.
19879+ * Note that it is essential to schedule a callback, no matter
19880+ * how few buffers are pending. Even if there is space in the
19881+ * transmit ring, higher layers may be blocked because too much
19882+ * data is outstanding: in such cases notification from Xen is
19883+ * likely to be the only kick that we'll get.
19884+ */
19885+ np->tx.sring->rsp_event =
19886+ prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
19887+ mb();
19888+ } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
19889+
19890+ network_maybe_wake_tx(dev);
19891+}
19892+
19893+static void rx_refill_timeout(unsigned long data)
19894+{
19895+ struct net_device *dev = (struct net_device *)data;
19896+ struct netfront_info *np = netdev_priv(dev);
19897+
19898+ netfront_accelerator_call_stop_napi_irq(np, dev);
19899+
19900+ netif_rx_schedule(dev);
19901+}
19902+
19903+static void network_alloc_rx_buffers(struct net_device *dev)
19904+{
19905+ unsigned short id;
19906+ struct netfront_info *np = netdev_priv(dev);
19907+ struct sk_buff *skb;
19908+ struct page *page;
19909+ int i, batch_target, notify;
19910+ RING_IDX req_prod = np->rx.req_prod_pvt;
19911+ struct xen_memory_reservation reservation;
19912+ grant_ref_t ref;
19913+ unsigned long pfn;
19914+ void *vaddr;
19915+ int nr_flips;
19916+ netif_rx_request_t *req;
19917+
19918+ if (unlikely(!netfront_carrier_ok(np)))
19919+ return;
19920+
19921+ /*
19922+ * Allocate skbuffs greedily, even though we batch updates to the
19923+ * receive ring. This creates a less bursty demand on the memory
19924+ * allocator, so should reduce the chance of failed allocation requests
19925+ * both for ourself and for other kernel subsystems.
19926+ */
19927+ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
19928+ for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
19929+ /*
19930+ * Allocate an skb and a page. Do not use __dev_alloc_skb as
19931+ * that will allocate page-sized buffers which is not
19932+ * necessary here.
19933+ * 16 bytes added as necessary headroom for netif_receive_skb.
19934+ */
19935+ skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
19936+ GFP_ATOMIC | __GFP_NOWARN);
19937+ if (unlikely(!skb))
19938+ goto no_skb;
19939+
19940+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
19941+ if (!page) {
19942+ kfree_skb(skb);
19943+no_skb:
19944+ /* Any skbuffs queued for refill? Force them out. */
19945+ if (i != 0)
19946+ goto refill;
19947+ /* Could not allocate any skbuffs. Try again later. */
19948+ mod_timer(&np->rx_refill_timer,
19949+ jiffies + (HZ/10));
19950+ break;
19951+ }
19952+
19953+ skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
19954+ skb_shinfo(skb)->frags[0].page = page;
19955+ skb_shinfo(skb)->nr_frags = 1;
19956+ __skb_queue_tail(&np->rx_batch, skb);
19957+ }
19958+
19959+ /* Is the batch large enough to be worthwhile? */
19960+ if (i < (np->rx_target/2)) {
19961+ if (req_prod > np->rx.sring->req_prod)
19962+ goto push;
19963+ return;
19964+ }
19965+
19966+ /* Adjust our fill target if we risked running out of buffers. */
19967+ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
19968+ ((np->rx_target *= 2) > np->rx_max_target))
19969+ np->rx_target = np->rx_max_target;
19970+
19971+ refill:
19972+ for (nr_flips = i = 0; ; i++) {
19973+ if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
19974+ break;
19975+
19976+ skb->dev = dev;
19977+
19978+ id = xennet_rxidx(req_prod + i);
19979+
19980+ BUG_ON(np->rx_skbs[id]);
19981+ np->rx_skbs[id] = skb;
19982+
19983+ ref = gnttab_claim_grant_reference(&np->gref_rx_head);
19984+ BUG_ON((signed short)ref < 0);
19985+ np->grant_rx_ref[id] = ref;
19986+
19987+ pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
19988+ vaddr = page_address(skb_shinfo(skb)->frags[0].page);
19989+
19990+ req = RING_GET_REQUEST(&np->rx, req_prod + i);
19991+ if (!np->copying_receiver) {
19992+ gnttab_grant_foreign_transfer_ref(ref,
19993+ np->xbdev->otherend_id,
19994+ pfn);
19995+ np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
19996+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
19997+ /* Remove this page before passing
19998+ * back to Xen. */
19999+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
20000+ MULTI_update_va_mapping(np->rx_mcl+i,
20001+ (unsigned long)vaddr,
20002+ __pte(0), 0);
20003+ }
20004+ nr_flips++;
20005+ } else {
20006+ gnttab_grant_foreign_access_ref(ref,
20007+ np->xbdev->otherend_id,
20008+ pfn_to_mfn(pfn),
20009+ 0);
20010+ }
20011+
20012+ req->id = id;
20013+ req->gref = ref;
20014+ }
20015+
20016+ if ( nr_flips != 0 ) {
20017+ /* Tell the ballon driver what is going on. */
20018+ balloon_update_driver_allowance(i);
20019+
20020+ set_xen_guest_handle(reservation.extent_start,
20021+ np->rx_pfn_array);
20022+ reservation.nr_extents = nr_flips;
20023+ reservation.extent_order = 0;
20024+ reservation.address_bits = 0;
20025+ reservation.domid = DOMID_SELF;
20026+
20027+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20028+ /* After all PTEs have been zapped, flush the TLB. */
20029+ np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
20030+ UVMF_TLB_FLUSH|UVMF_ALL;
20031+
20032+ /* Give away a batch of pages. */
20033+ np->rx_mcl[i].op = __HYPERVISOR_memory_op;
20034+ np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
20035+ np->rx_mcl[i].args[1] = (unsigned long)&reservation;
20036+
20037+ /* Zap PTEs and give away pages in one big
20038+ * multicall. */
20039+ if (unlikely(HYPERVISOR_multicall(np->rx_mcl, i+1)))
20040+ BUG();
20041+
20042+ /* Check return status of HYPERVISOR_memory_op(). */
20043+ if (unlikely(np->rx_mcl[i].result != i))
20044+ panic("Unable to reduce memory reservation\n");
20045+ while (nr_flips--)
20046+ BUG_ON(np->rx_mcl[nr_flips].result);
20047+ } else {
20048+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
20049+ &reservation) != i)
20050+ panic("Unable to reduce memory reservation\n");
20051+ }
20052+ } else {
20053+ wmb();
20054+ }
20055+
20056+ /* Above is a suitable barrier to ensure backend will see requests. */
20057+ np->rx.req_prod_pvt = req_prod + i;
20058+ push:
20059+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
20060+ if (notify)
20061+ notify_remote_via_irq(np->irq);
20062+}
20063+
20064+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
20065+ struct netif_tx_request *tx)
20066+{
20067+ struct netfront_info *np = netdev_priv(dev);
20068+ char *data = skb->data;
20069+ unsigned long mfn;
20070+ RING_IDX prod = np->tx.req_prod_pvt;
20071+ int frags = skb_shinfo(skb)->nr_frags;
20072+ unsigned int offset = offset_in_page(data);
20073+ unsigned int len = skb_headlen(skb);
20074+ unsigned int id;
20075+ grant_ref_t ref;
20076+ int i;
20077+
20078+ while (len > PAGE_SIZE - offset) {
20079+ tx->size = PAGE_SIZE - offset;
20080+ tx->flags |= NETTXF_more_data;
20081+ len -= tx->size;
20082+ data += tx->size;
20083+ offset = 0;
20084+
20085+ id = get_id_from_freelist(np->tx_skbs);
20086+ np->tx_skbs[id] = skb_get(skb);
20087+ tx = RING_GET_REQUEST(&np->tx, prod++);
20088+ tx->id = id;
20089+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
20090+ BUG_ON((signed short)ref < 0);
20091+
20092+ mfn = virt_to_mfn(data);
20093+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
20094+ mfn, GTF_readonly);
20095+
20096+ tx->gref = np->grant_tx_ref[id] = ref;
20097+ tx->offset = offset;
20098+ tx->size = len;
20099+ tx->flags = 0;
20100+ }
20101+
20102+ for (i = 0; i < frags; i++) {
20103+ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
20104+
20105+ tx->flags |= NETTXF_more_data;
20106+
20107+ id = get_id_from_freelist(np->tx_skbs);
20108+ np->tx_skbs[id] = skb_get(skb);
20109+ tx = RING_GET_REQUEST(&np->tx, prod++);
20110+ tx->id = id;
20111+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
20112+ BUG_ON((signed short)ref < 0);
20113+
20114+ mfn = pfn_to_mfn(page_to_pfn(frag->page));
20115+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
20116+ mfn, GTF_readonly);
20117+
20118+ tx->gref = np->grant_tx_ref[id] = ref;
20119+ tx->offset = frag->page_offset;
20120+ tx->size = frag->size;
20121+ tx->flags = 0;
20122+ }
20123+
20124+ np->tx.req_prod_pvt = prod;
20125+}
20126+
20127+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
20128+{
20129+ unsigned short id;
20130+ struct netfront_info *np = netdev_priv(dev);
20131+ struct netif_tx_request *tx;
20132+ struct netif_extra_info *extra;
20133+ char *data = skb->data;
20134+ RING_IDX i;
20135+ grant_ref_t ref;
20136+ unsigned long mfn;
20137+ int notify;
20138+ int frags = skb_shinfo(skb)->nr_frags;
20139+ unsigned int offset = offset_in_page(data);
20140+ unsigned int len = skb_headlen(skb);
20141+
20142+ /* Check the fast path, if hooks are available */
20143+ if (np->accel_vif_state.hooks &&
20144+ np->accel_vif_state.hooks->start_xmit(skb, dev)) {
20145+ /* Fast path has sent this packet */
20146+ return 0;
20147+ }
20148+
20149+ frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
20150+ if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
20151+ printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
20152+ frags);
20153+ dump_stack();
20154+ goto drop;
20155+ }
20156+
20157+ spin_lock_irq(&np->tx_lock);
20158+
20159+ if (unlikely(!netfront_carrier_ok(np) ||
20160+ (frags > 1 && !xennet_can_sg(dev)) ||
20161+ netif_needs_gso(dev, skb))) {
20162+ spin_unlock_irq(&np->tx_lock);
20163+ goto drop;
20164+ }
20165+
20166+ i = np->tx.req_prod_pvt;
20167+
20168+ id = get_id_from_freelist(np->tx_skbs);
20169+ np->tx_skbs[id] = skb;
20170+
20171+ tx = RING_GET_REQUEST(&np->tx, i);
20172+
20173+ tx->id = id;
20174+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
20175+ BUG_ON((signed short)ref < 0);
20176+ mfn = virt_to_mfn(data);
20177+ gnttab_grant_foreign_access_ref(
20178+ ref, np->xbdev->otherend_id, mfn, GTF_readonly);
20179+ tx->gref = np->grant_tx_ref[id] = ref;
20180+ tx->offset = offset;
20181+ tx->size = len;
20182+
20183+ tx->flags = 0;
20184+ extra = NULL;
20185+
20186+ if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
20187+ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
20188+#ifdef CONFIG_XEN
20189+ if (skb->proto_data_valid) /* remote but checksummed? */
20190+ tx->flags |= NETTXF_data_validated;
20191+#endif
20192+
20193+#if HAVE_TSO
20194+ if (skb_shinfo(skb)->gso_size) {
20195+ struct netif_extra_info *gso = (struct netif_extra_info *)
20196+ RING_GET_REQUEST(&np->tx, ++i);
20197+
20198+ if (extra)
20199+ extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
20200+ else
20201+ tx->flags |= NETTXF_extra_info;
20202+
20203+ gso->u.gso.size = skb_shinfo(skb)->gso_size;
20204+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
20205+ gso->u.gso.pad = 0;
20206+ gso->u.gso.features = 0;
20207+
20208+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
20209+ gso->flags = 0;
20210+ extra = gso;
20211+ }
20212+#endif
20213+
20214+ np->tx.req_prod_pvt = i + 1;
20215+
20216+ xennet_make_frags(skb, dev, tx);
20217+ tx->size = skb->len;
20218+
20219+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
20220+ if (notify)
20221+ notify_remote_via_irq(np->irq);
20222+
20223+ np->stats.tx_bytes += skb->len;
20224+ np->stats.tx_packets++;
20225+ dev->trans_start = jiffies;
20226+
20227+ /* Note: It is not safe to access skb after network_tx_buf_gc()! */
20228+ network_tx_buf_gc(dev);
20229+
20230+ if (!netfront_tx_slot_available(np))
20231+ netif_stop_queue(dev);
20232+
20233+ spin_unlock_irq(&np->tx_lock);
20234+
20235+ return 0;
20236+
20237+ drop:
20238+ np->stats.tx_dropped++;
20239+ dev_kfree_skb(skb);
20240+ return 0;
20241+}
20242+
20243+static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
20244+{
20245+ struct net_device *dev = dev_id;
20246+ struct netfront_info *np = netdev_priv(dev);
20247+ unsigned long flags;
20248+
20249+ spin_lock_irqsave(&np->tx_lock, flags);
20250+
20251+ if (likely(netfront_carrier_ok(np))) {
20252+ network_tx_buf_gc(dev);
20253+ /* Under tx_lock: protects access to rx shared-ring indexes. */
20254+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
20255+ netfront_accelerator_call_stop_napi_irq(np, dev);
20256+
20257+ netif_rx_schedule(dev);
20258+ dev->last_rx = jiffies;
20259+ }
20260+ }
20261+
20262+ spin_unlock_irqrestore(&np->tx_lock, flags);
20263+
20264+ return IRQ_HANDLED;
20265+}
20266+
20267+static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
20268+ grant_ref_t ref)
20269+{
20270+ int new = xennet_rxidx(np->rx.req_prod_pvt);
20271+
20272+ BUG_ON(np->rx_skbs[new]);
20273+ np->rx_skbs[new] = skb;
20274+ np->grant_rx_ref[new] = ref;
20275+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
20276+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
20277+ np->rx.req_prod_pvt++;
20278+}
20279+
20280+int xennet_get_extras(struct netfront_info *np,
20281+ struct netif_extra_info *extras, RING_IDX rp)
20282+
20283+{
20284+ struct netif_extra_info *extra;
20285+ RING_IDX cons = np->rx.rsp_cons;
20286+ int err = 0;
20287+
20288+ do {
20289+ struct sk_buff *skb;
20290+ grant_ref_t ref;
20291+
20292+ if (unlikely(cons + 1 == rp)) {
20293+ if (net_ratelimit())
20294+ WPRINTK("Missing extra info\n");
20295+ err = -EBADR;
20296+ break;
20297+ }
20298+
20299+ extra = (struct netif_extra_info *)
20300+ RING_GET_RESPONSE(&np->rx, ++cons);
20301+
20302+ if (unlikely(!extra->type ||
20303+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
20304+ if (net_ratelimit())
20305+ WPRINTK("Invalid extra type: %d\n",
20306+ extra->type);
20307+ err = -EINVAL;
20308+ } else {
20309+ memcpy(&extras[extra->type - 1], extra,
20310+ sizeof(*extra));
20311+ }
20312+
20313+ skb = xennet_get_rx_skb(np, cons);
20314+ ref = xennet_get_rx_ref(np, cons);
20315+ xennet_move_rx_slot(np, skb, ref);
20316+ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
20317+
20318+ np->rx.rsp_cons = cons;
20319+ return err;
20320+}
20321+
20322+static int xennet_get_responses(struct netfront_info *np,
20323+ struct netfront_rx_info *rinfo, RING_IDX rp,
20324+ struct sk_buff_head *list,
20325+ int *pages_flipped_p)
20326+{
20327+ int pages_flipped = *pages_flipped_p;
20328+ struct mmu_update *mmu;
20329+ struct multicall_entry *mcl;
20330+ struct netif_rx_response *rx = &rinfo->rx;
20331+ struct netif_extra_info *extras = rinfo->extras;
20332+ RING_IDX cons = np->rx.rsp_cons;
20333+ struct sk_buff *skb = xennet_get_rx_skb(np, cons);
20334+ grant_ref_t ref = xennet_get_rx_ref(np, cons);
20335+ int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
20336+ int frags = 1;
20337+ int err = 0;
20338+ unsigned long ret;
20339+
20340+ if (rx->flags & NETRXF_extra_info) {
20341+ err = xennet_get_extras(np, extras, rp);
20342+ cons = np->rx.rsp_cons;
20343+ }
20344+
20345+ for (;;) {
20346+ unsigned long mfn;
20347+
20348+ if (unlikely(rx->status < 0 ||
20349+ rx->offset + rx->status > PAGE_SIZE)) {
20350+ if (net_ratelimit())
20351+ WPRINTK("rx->offset: %x, size: %u\n",
20352+ rx->offset, rx->status);
20353+ xennet_move_rx_slot(np, skb, ref);
20354+ err = -EINVAL;
20355+ goto next;
20356+ }
20357+
20358+ /*
20359+ * This definitely indicates a bug, either in this driver or in
20360+ * the backend driver. In future this should flag the bad
20361+ * situation to the system controller to reboot the backed.
20362+ */
20363+ if (ref == GRANT_INVALID_REF) {
20364+ if (net_ratelimit())
20365+ WPRINTK("Bad rx response id %d.\n", rx->id);
20366+ err = -EINVAL;
20367+ goto next;
20368+ }
20369+
20370+ if (!np->copying_receiver) {
20371+ /* Memory pressure, insufficient buffer
20372+ * headroom, ... */
20373+ if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
20374+ if (net_ratelimit())
20375+ WPRINTK("Unfulfilled rx req "
20376+ "(id=%d, st=%d).\n",
20377+ rx->id, rx->status);
20378+ xennet_move_rx_slot(np, skb, ref);
20379+ err = -ENOMEM;
20380+ goto next;
20381+ }
20382+
20383+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20384+ /* Remap the page. */
20385+ struct page *page =
20386+ skb_shinfo(skb)->frags[0].page;
20387+ unsigned long pfn = page_to_pfn(page);
20388+ void *vaddr = page_address(page);
20389+
20390+ mcl = np->rx_mcl + pages_flipped;
20391+ mmu = np->rx_mmu + pages_flipped;
20392+
20393+ MULTI_update_va_mapping(mcl,
20394+ (unsigned long)vaddr,
20395+ pfn_pte_ma(mfn,
20396+ PAGE_KERNEL),
20397+ 0);
20398+ mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
20399+ | MMU_MACHPHYS_UPDATE;
20400+ mmu->val = pfn;
20401+
20402+ set_phys_to_machine(pfn, mfn);
20403+ }
20404+ pages_flipped++;
20405+ } else {
20406+ ret = gnttab_end_foreign_access_ref(ref);
20407+ BUG_ON(!ret);
20408+ }
20409+
20410+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
20411+
20412+ __skb_queue_tail(list, skb);
20413+
20414+next:
20415+ if (!(rx->flags & NETRXF_more_data))
20416+ break;
20417+
20418+ if (cons + frags == rp) {
20419+ if (net_ratelimit())
20420+ WPRINTK("Need more frags\n");
20421+ err = -ENOENT;
20422+ break;
20423+ }
20424+
20425+ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
20426+ skb = xennet_get_rx_skb(np, cons + frags);
20427+ ref = xennet_get_rx_ref(np, cons + frags);
20428+ frags++;
20429+ }
20430+
20431+ if (unlikely(frags > max)) {
20432+ if (net_ratelimit())
20433+ WPRINTK("Too many frags\n");
20434+ err = -E2BIG;
20435+ }
20436+
20437+ if (unlikely(err))
20438+ np->rx.rsp_cons = cons + frags;
20439+
20440+ *pages_flipped_p = pages_flipped;
20441+
20442+ return err;
20443+}
20444+
20445+static RING_IDX xennet_fill_frags(struct netfront_info *np,
20446+ struct sk_buff *skb,
20447+ struct sk_buff_head *list)
20448+{
20449+ struct skb_shared_info *shinfo = skb_shinfo(skb);
20450+ int nr_frags = shinfo->nr_frags;
20451+ RING_IDX cons = np->rx.rsp_cons;
20452+ skb_frag_t *frag = shinfo->frags + nr_frags;
20453+ struct sk_buff *nskb;
20454+
20455+ while ((nskb = __skb_dequeue(list))) {
20456+ struct netif_rx_response *rx =
20457+ RING_GET_RESPONSE(&np->rx, ++cons);
20458+
20459+ frag->page = skb_shinfo(nskb)->frags[0].page;
20460+ frag->page_offset = rx->offset;
20461+ frag->size = rx->status;
20462+
20463+ skb->data_len += rx->status;
20464+
20465+ skb_shinfo(nskb)->nr_frags = 0;
20466+ kfree_skb(nskb);
20467+
20468+ frag++;
20469+ nr_frags++;
20470+ }
20471+
20472+ shinfo->nr_frags = nr_frags;
20473+ return cons;
20474+}
20475+
20476+static int xennet_set_skb_gso(struct sk_buff *skb,
20477+ struct netif_extra_info *gso)
20478+{
20479+ if (!gso->u.gso.size) {
20480+ if (net_ratelimit())
20481+ WPRINTK("GSO size must not be zero.\n");
20482+ return -EINVAL;
20483+ }
20484+
20485+ /* Currently only TCPv4 S.O. is supported. */
20486+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
20487+ if (net_ratelimit())
20488+ WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
20489+ return -EINVAL;
20490+ }
20491+
20492+#if HAVE_TSO
20493+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
20494+#if HAVE_GSO
20495+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
20496+
20497+ /* Header must be checked, and gso_segs computed. */
20498+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
20499+#endif
20500+ skb_shinfo(skb)->gso_segs = 0;
20501+
20502+ return 0;
20503+#else
20504+ if (net_ratelimit())
20505+ WPRINTK("GSO unsupported by this kernel.\n");
20506+ return -EINVAL;
20507+#endif
20508+}
20509+
20510+static int netif_poll(struct net_device *dev, int *pbudget)
20511+{
20512+ struct netfront_info *np = netdev_priv(dev);
20513+ struct sk_buff *skb;
20514+ struct netfront_rx_info rinfo;
20515+ struct netif_rx_response *rx = &rinfo.rx;
20516+ struct netif_extra_info *extras = rinfo.extras;
20517+ RING_IDX i, rp;
20518+ struct multicall_entry *mcl;
20519+ int work_done, budget, more_to_do = 1, accel_more_to_do = 1;
20520+ struct sk_buff_head rxq;
20521+ struct sk_buff_head errq;
20522+ struct sk_buff_head tmpq;
20523+ unsigned long flags;
20524+ unsigned int len;
20525+ int pages_flipped = 0;
20526+ int err;
20527+
20528+ spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */
20529+
20530+ if (unlikely(!netfront_carrier_ok(np))) {
20531+ spin_unlock(&np->rx_lock);
20532+ return 0;
20533+ }
20534+
20535+ skb_queue_head_init(&rxq);
20536+ skb_queue_head_init(&errq);
20537+ skb_queue_head_init(&tmpq);
20538+
20539+ if ((budget = *pbudget) > dev->quota)
20540+ budget = dev->quota;
20541+ rp = np->rx.sring->rsp_prod;
20542+ rmb(); /* Ensure we see queued responses up to 'rp'. */
20543+
20544+ i = np->rx.rsp_cons;
20545+ work_done = 0;
20546+ while ((i != rp) && (work_done < budget)) {
20547+ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
20548+ memset(extras, 0, sizeof(rinfo.extras));
20549+
20550+ err = xennet_get_responses(np, &rinfo, rp, &tmpq,
20551+ &pages_flipped);
20552+
20553+ if (unlikely(err)) {
20554+err:
20555+ while ((skb = __skb_dequeue(&tmpq)))
20556+ __skb_queue_tail(&errq, skb);
20557+ np->stats.rx_errors++;
20558+ i = np->rx.rsp_cons;
20559+ continue;
20560+ }
20561+
20562+ skb = __skb_dequeue(&tmpq);
20563+
20564+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
20565+ struct netif_extra_info *gso;
20566+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
20567+
20568+ if (unlikely(xennet_set_skb_gso(skb, gso))) {
20569+ __skb_queue_head(&tmpq, skb);
20570+ np->rx.rsp_cons += skb_queue_len(&tmpq);
20571+ goto err;
20572+ }
20573+ }
20574+
20575+ NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
20576+ NETFRONT_SKB_CB(skb)->offset = rx->offset;
20577+
20578+ len = rx->status;
20579+ if (len > RX_COPY_THRESHOLD)
20580+ len = RX_COPY_THRESHOLD;
20581+ skb_put(skb, len);
20582+
20583+ if (rx->status > len) {
20584+ skb_shinfo(skb)->frags[0].page_offset =
20585+ rx->offset + len;
20586+ skb_shinfo(skb)->frags[0].size = rx->status - len;
20587+ skb->data_len = rx->status - len;
20588+ } else {
20589+ skb_shinfo(skb)->frags[0].page = NULL;
20590+ skb_shinfo(skb)->nr_frags = 0;
20591+ }
20592+
20593+ i = xennet_fill_frags(np, skb, &tmpq);
20594+
20595+ /*
20596+ * Truesize must approximates the size of true data plus
20597+ * any supervisor overheads. Adding hypervisor overheads
20598+ * has been shown to significantly reduce achievable
20599+ * bandwidth with the default receive buffer size. It is
20600+ * therefore not wise to account for it here.
20601+ *
20602+ * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
20603+ * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
20604+ * add the size of the data pulled in xennet_fill_frags().
20605+ *
20606+ * We also adjust for any unused space in the main data
20607+ * area by subtracting (RX_COPY_THRESHOLD - len). This is
20608+ * especially important with drivers which split incoming
20609+ * packets into header and data, using only 66 bytes of
20610+ * the main data area (see the e1000 driver for example.)
20611+ * On such systems, without this last adjustement, our
20612+ * achievable receive throughout using the standard receive
20613+ * buffer size was cut by 25%(!!!).
20614+ */
20615+ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
20616+ skb->len += skb->data_len;
20617+
20618+ /*
20619+ * Old backends do not assert data_validated but we
20620+ * can infer it from csum_blank so test both flags.
20621+ */
20622+ if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
20623+ skb->ip_summed = CHECKSUM_UNNECESSARY;
20624+ else
20625+ skb->ip_summed = CHECKSUM_NONE;
20626+#ifdef CONFIG_XEN
20627+ skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
20628+ skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
20629+#endif
20630+ np->stats.rx_packets++;
20631+ np->stats.rx_bytes += skb->len;
20632+
20633+ __skb_queue_tail(&rxq, skb);
20634+
20635+ np->rx.rsp_cons = ++i;
20636+ work_done++;
20637+ }
20638+
20639+ if (pages_flipped) {
20640+ /* Some pages are no longer absent... */
20641+ balloon_update_driver_allowance(-pages_flipped);
20642+
20643+ /* Do all the remapping work and M2P updates. */
20644+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20645+ mcl = np->rx_mcl + pages_flipped;
20646+ mcl->op = __HYPERVISOR_mmu_update;
20647+ mcl->args[0] = (unsigned long)np->rx_mmu;
20648+ mcl->args[1] = pages_flipped;
20649+ mcl->args[2] = 0;
20650+ mcl->args[3] = DOMID_SELF;
20651+ err = HYPERVISOR_multicall_check(np->rx_mcl,
20652+ pages_flipped + 1,
20653+ NULL);
20654+ BUG_ON(err);
20655+ }
20656+ }
20657+
20658+ while ((skb = __skb_dequeue(&errq)))
20659+ kfree_skb(skb);
20660+
20661+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
20662+ struct page *page = NETFRONT_SKB_CB(skb)->page;
20663+ void *vaddr = page_address(page);
20664+ unsigned offset = NETFRONT_SKB_CB(skb)->offset;
20665+
20666+ memcpy(skb->data, vaddr + offset, skb_headlen(skb));
20667+
20668+ if (page != skb_shinfo(skb)->frags[0].page)
20669+ __free_page(page);
20670+
20671+ /* Ethernet work: Delayed to here as it peeks the header. */
20672+ skb->protocol = eth_type_trans(skb, dev);
20673+
20674+ /* Pass it up. */
20675+ netif_receive_skb(skb);
20676+ dev->last_rx = jiffies;
20677+ }
20678+
20679+ /* If we get a callback with very few responses, reduce fill target. */
20680+ /* NB. Note exponential increase, linear decrease. */
20681+ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
20682+ ((3*np->rx_target) / 4)) &&
20683+ (--np->rx_target < np->rx_min_target))
20684+ np->rx_target = np->rx_min_target;
20685+
20686+ network_alloc_rx_buffers(dev);
20687+
20688+ if (work_done < budget) {
20689+ /* there's some spare capacity, try the accelerated path */
20690+ int accel_budget = budget - work_done;
20691+ int accel_budget_start = accel_budget;
20692+
20693+ if (np->accel_vif_state.hooks) {
20694+ accel_more_to_do =
20695+ np->accel_vif_state.hooks->netdev_poll
20696+ (dev, &accel_budget);
20697+ work_done += (accel_budget_start - accel_budget);
20698+ } else
20699+ accel_more_to_do = 0;
20700+ }
20701+
20702+ *pbudget -= work_done;
20703+ dev->quota -= work_done;
20704+
20705+ if (work_done < budget) {
20706+ local_irq_save(flags);
20707+
20708+ RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
20709+
20710+ if (!more_to_do && !accel_more_to_do &&
20711+ np->accel_vif_state.hooks) {
20712+ /*
20713+ * Slow path has nothing more to do, see if
20714+ * fast path is likewise
20715+ */
20716+ accel_more_to_do =
20717+ np->accel_vif_state.hooks->start_napi_irq(dev);
20718+ }
20719+
20720+ if (!more_to_do && !accel_more_to_do)
20721+ __netif_rx_complete(dev);
20722+
20723+ local_irq_restore(flags);
20724+ }
20725+
20726+ spin_unlock(&np->rx_lock);
20727+
20728+ return more_to_do | accel_more_to_do;
20729+}
20730+
20731+static void netif_release_tx_bufs(struct netfront_info *np)
20732+{
20733+ struct sk_buff *skb;
20734+ int i;
20735+
20736+ for (i = 1; i <= NET_TX_RING_SIZE; i++) {
20737+ if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
20738+ continue;
20739+
20740+ skb = np->tx_skbs[i];
20741+ gnttab_end_foreign_access_ref(np->grant_tx_ref[i]);
20742+ gnttab_release_grant_reference(
20743+ &np->gref_tx_head, np->grant_tx_ref[i]);
20744+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
20745+ add_id_to_freelist(np->tx_skbs, i);
20746+ dev_kfree_skb_irq(skb);
20747+ }
20748+}
20749+
20750+static void netif_release_rx_bufs_flip(struct netfront_info *np)
20751+{
20752+ struct mmu_update *mmu = np->rx_mmu;
20753+ struct multicall_entry *mcl = np->rx_mcl;
20754+ struct sk_buff_head free_list;
20755+ struct sk_buff *skb;
20756+ unsigned long mfn;
20757+ int xfer = 0, noxfer = 0, unused = 0;
20758+ int id, ref, rc;
20759+
20760+ skb_queue_head_init(&free_list);
20761+
20762+ spin_lock_bh(&np->rx_lock);
20763+
20764+ for (id = 0; id < NET_RX_RING_SIZE; id++) {
20765+ if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
20766+ unused++;
20767+ continue;
20768+ }
20769+
20770+ skb = np->rx_skbs[id];
20771+ mfn = gnttab_end_foreign_transfer_ref(ref);
20772+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
20773+ np->grant_rx_ref[id] = GRANT_INVALID_REF;
20774+ add_id_to_freelist(np->rx_skbs, id);
20775+
20776+ if (0 == mfn) {
20777+ struct page *page = skb_shinfo(skb)->frags[0].page;
20778+ balloon_release_driver_page(page);
20779+ skb_shinfo(skb)->nr_frags = 0;
20780+ dev_kfree_skb(skb);
20781+ noxfer++;
20782+ continue;
20783+ }
20784+
20785+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20786+ /* Remap the page. */
20787+ struct page *page = skb_shinfo(skb)->frags[0].page;
20788+ unsigned long pfn = page_to_pfn(page);
20789+ void *vaddr = page_address(page);
20790+
20791+ MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
20792+ pfn_pte_ma(mfn, PAGE_KERNEL),
20793+ 0);
20794+ mcl++;
20795+ mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
20796+ | MMU_MACHPHYS_UPDATE;
20797+ mmu->val = pfn;
20798+ mmu++;
20799+
20800+ set_phys_to_machine(pfn, mfn);
20801+ }
20802+ __skb_queue_tail(&free_list, skb);
20803+ xfer++;
20804+ }
20805+
20806+ DPRINTK("%s: %d xfer, %d noxfer, %d unused\n",
20807+ __FUNCTION__, xfer, noxfer, unused);
20808+
20809+ if (xfer) {
20810+ /* Some pages are no longer absent... */
20811+ balloon_update_driver_allowance(-xfer);
20812+
20813+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20814+ /* Do all the remapping work and M2P updates. */
20815+ mcl->op = __HYPERVISOR_mmu_update;
20816+ mcl->args[0] = (unsigned long)np->rx_mmu;
20817+ mcl->args[1] = mmu - np->rx_mmu;
20818+ mcl->args[2] = 0;
20819+ mcl->args[3] = DOMID_SELF;
20820+ mcl++;
20821+ rc = HYPERVISOR_multicall_check(
20822+ np->rx_mcl, mcl - np->rx_mcl, NULL);
20823+ BUG_ON(rc);
20824+ }
20825+ }
20826+
20827+ while ((skb = __skb_dequeue(&free_list)) != NULL)
20828+ dev_kfree_skb(skb);
20829+
20830+ spin_unlock_bh(&np->rx_lock);
20831+}
20832+
20833+static void netif_release_rx_bufs_copy(struct netfront_info *np)
20834+{
20835+ struct sk_buff *skb;
20836+ int i, ref;
20837+ int busy = 0, inuse = 0;
20838+
20839+ spin_lock_bh(&np->rx_lock);
20840+
20841+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
20842+ ref = np->grant_rx_ref[i];
20843+
20844+ if (ref == GRANT_INVALID_REF)
20845+ continue;
20846+
20847+ inuse++;
20848+
20849+ skb = np->rx_skbs[i];
20850+
20851+ if (!gnttab_end_foreign_access_ref(ref))
20852+ {
20853+ busy++;
20854+ continue;
20855+ }
20856+
20857+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
20858+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
20859+ add_id_to_freelist(np->rx_skbs, i);
20860+
20861+ dev_kfree_skb(skb);
20862+ }
20863+
20864+ if (busy)
20865+ DPRINTK("%s: Unable to release %d of %d inuse grant references out of %ld total.\n",
20866+ __FUNCTION__, busy, inuse, NET_RX_RING_SIZE);
20867+
20868+ spin_unlock_bh(&np->rx_lock);
20869+}
20870+
20871+static int network_close(struct net_device *dev)
20872+{
20873+ struct netfront_info *np = netdev_priv(dev);
20874+ netif_stop_queue(np->netdev);
20875+ return 0;
20876+}
20877+
20878+
20879+static struct net_device_stats *network_get_stats(struct net_device *dev)
20880+{
20881+ struct netfront_info *np = netdev_priv(dev);
20882+
20883+ netfront_accelerator_call_get_stats(np, dev);
20884+ return &np->stats;
20885+}
20886+
20887+static int xennet_set_mac_address(struct net_device *dev, void *p)
20888+{
20889+ struct netfront_info *np = netdev_priv(dev);
20890+ struct sockaddr *addr = p;
20891+
20892+ if (netif_running(dev))
20893+ return -EBUSY;
20894+
20895+ if (!is_valid_ether_addr(addr->sa_data))
20896+ return -EADDRNOTAVAIL;
20897+
20898+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
20899+ memcpy(np->mac, addr->sa_data, ETH_ALEN);
20900+
20901+ return 0;
20902+}
20903+
20904+static int xennet_change_mtu(struct net_device *dev, int mtu)
20905+{
20906+ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
20907+
20908+ if (mtu > max)
20909+ return -EINVAL;
20910+ dev->mtu = mtu;
20911+ return 0;
20912+}
20913+
20914+static int xennet_set_sg(struct net_device *dev, u32 data)
20915+{
20916+ if (data) {
20917+ struct netfront_info *np = netdev_priv(dev);
20918+ int val;
20919+
20920+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
20921+ "%d", &val) < 0)
20922+ val = 0;
20923+ if (!val)
20924+ return -ENOSYS;
20925+ } else if (dev->mtu > ETH_DATA_LEN)
20926+ dev->mtu = ETH_DATA_LEN;
20927+
20928+ return ethtool_op_set_sg(dev, data);
20929+}
20930+
20931+static int xennet_set_tso(struct net_device *dev, u32 data)
20932+{
20933+ if (data) {
20934+ struct netfront_info *np = netdev_priv(dev);
20935+ int val;
20936+
20937+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
20938+ "feature-gso-tcpv4", "%d", &val) < 0)
20939+ val = 0;
20940+ if (!val)
20941+ return -ENOSYS;
20942+ }
20943+
20944+ return ethtool_op_set_tso(dev, data);
20945+}
20946+
20947+static void xennet_set_features(struct net_device *dev)
20948+{
20949+ dev_disable_gso_features(dev);
20950+ xennet_set_sg(dev, 0);
20951+
20952+ /* We need checksum offload to enable scatter/gather and TSO. */
20953+ if (!(dev->features & NETIF_F_IP_CSUM))
20954+ return;
20955+
20956+ if (xennet_set_sg(dev, 1))
20957+ return;
20958+
20959+ /* Before 2.6.9 TSO seems to be unreliable so do not enable it
20960+ * on older kernels.
20961+ */
20962+ if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9))
20963+ xennet_set_tso(dev, 1);
20964+}
20965+
20966+static int network_connect(struct net_device *dev)
20967+{
20968+ struct netfront_info *np = netdev_priv(dev);
20969+ int i, requeue_idx, err;
20970+ struct sk_buff *skb;
20971+ grant_ref_t ref;
20972+ netif_rx_request_t *req;
20973+ unsigned int feature_rx_copy, feature_rx_flip;
20974+
20975+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
20976+ "feature-rx-copy", "%u", &feature_rx_copy);
20977+ if (err != 1)
20978+ feature_rx_copy = 0;
20979+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
20980+ "feature-rx-flip", "%u", &feature_rx_flip);
20981+ if (err != 1)
20982+ feature_rx_flip = 1;
20983+
20984+ /*
20985+ * Copy packets on receive path if:
20986+ * (a) This was requested by user, and the backend supports it; or
20987+ * (b) Flipping was requested, but this is unsupported by the backend.
20988+ */
20989+ np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
20990+ (MODPARM_rx_flip && !feature_rx_flip));
20991+
20992+ err = talk_to_backend(np->xbdev, np);
20993+ if (err)
20994+ return err;
20995+
20996+ xennet_set_features(dev);
20997+
20998+ DPRINTK("device %s has %sing receive path.\n",
20999+ dev->name, np->copying_receiver ? "copy" : "flipp");
21000+
21001+ spin_lock_bh(&np->rx_lock);
21002+ spin_lock_irq(&np->tx_lock);
21003+
21004+ /*
21005+ * Recovery procedure:
21006+ * NB. Freelist index entries are always going to be less than
21007+ * PAGE_OFFSET, whereas pointers to skbs will always be equal or
21008+ * greater than PAGE_OFFSET: we use this property to distinguish
21009+ * them.
21010+ */
21011+
21012+ /* Step 1: Discard all pending TX packet fragments. */
21013+ netif_release_tx_bufs(np);
21014+
21015+ /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
21016+ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
21017+ if (!np->rx_skbs[i])
21018+ continue;
21019+
21020+ skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
21021+ ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
21022+ req = RING_GET_REQUEST(&np->rx, requeue_idx);
21023+
21024+ if (!np->copying_receiver) {
21025+ gnttab_grant_foreign_transfer_ref(
21026+ ref, np->xbdev->otherend_id,
21027+ page_to_pfn(skb_shinfo(skb)->frags->page));
21028+ } else {
21029+ gnttab_grant_foreign_access_ref(
21030+ ref, np->xbdev->otherend_id,
21031+ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
21032+ frags->page)),
21033+ 0);
21034+ }
21035+ req->gref = ref;
21036+ req->id = requeue_idx;
21037+
21038+ requeue_idx++;
21039+ }
21040+
21041+ np->rx.req_prod_pvt = requeue_idx;
21042+
21043+ /*
21044+ * Step 3: All public and private state should now be sane. Get
21045+ * ready to start sending and receiving packets and give the driver
21046+ * domain a kick because we've probably just requeued some
21047+ * packets.
21048+ */
21049+ netfront_carrier_on(np);
21050+ notify_remote_via_irq(np->irq);
21051+ network_tx_buf_gc(dev);
21052+ network_alloc_rx_buffers(dev);
21053+
21054+ spin_unlock_irq(&np->tx_lock);
21055+ spin_unlock_bh(&np->rx_lock);
21056+
21057+ return 0;
21058+}
21059+
21060+static void netif_uninit(struct net_device *dev)
21061+{
21062+ struct netfront_info *np = netdev_priv(dev);
21063+ netif_release_tx_bufs(np);
21064+ if (np->copying_receiver)
21065+ netif_release_rx_bufs_copy(np);
21066+ else
21067+ netif_release_rx_bufs_flip(np);
21068+ gnttab_free_grant_references(np->gref_tx_head);
21069+ gnttab_free_grant_references(np->gref_rx_head);
21070+}
21071+
21072+static struct ethtool_ops network_ethtool_ops =
21073+{
21074+ .get_tx_csum = ethtool_op_get_tx_csum,
21075+ .set_tx_csum = ethtool_op_set_tx_csum,
21076+ .get_sg = ethtool_op_get_sg,
21077+ .set_sg = xennet_set_sg,
21078+#if HAVE_TSO
21079+ .get_tso = ethtool_op_get_tso,
21080+ .set_tso = xennet_set_tso,
21081+#endif
21082+ .get_link = ethtool_op_get_link,
21083+};
21084+
21085+#ifdef CONFIG_SYSFS
21086+static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
21087+{
21088+ struct net_device *netdev = container_of(cd, struct net_device,
21089+ class_dev);
21090+ struct netfront_info *info = netdev_priv(netdev);
21091+
21092+ return sprintf(buf, "%u\n", info->rx_min_target);
21093+}
21094+
21095+static ssize_t store_rxbuf_min(struct class_device *cd,
21096+ const char *buf, size_t len)
21097+{
21098+ struct net_device *netdev = container_of(cd, struct net_device,
21099+ class_dev);
21100+ struct netfront_info *np = netdev_priv(netdev);
21101+ char *endp;
21102+ unsigned long target;
21103+
21104+ if (!capable(CAP_NET_ADMIN))
21105+ return -EPERM;
21106+
21107+ target = simple_strtoul(buf, &endp, 0);
21108+ if (endp == buf)
21109+ return -EBADMSG;
21110+
21111+ if (target < RX_MIN_TARGET)
21112+ target = RX_MIN_TARGET;
21113+ if (target > RX_MAX_TARGET)
21114+ target = RX_MAX_TARGET;
21115+
21116+ spin_lock_bh(&np->rx_lock);
21117+ if (target > np->rx_max_target)
21118+ np->rx_max_target = target;
21119+ np->rx_min_target = target;
21120+ if (target > np->rx_target)
21121+ np->rx_target = target;
21122+
21123+ network_alloc_rx_buffers(netdev);
21124+
21125+ spin_unlock_bh(&np->rx_lock);
21126+ return len;
21127+}
21128+
21129+static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
21130+{
21131+ struct net_device *netdev = container_of(cd, struct net_device,
21132+ class_dev);
21133+ struct netfront_info *info = netdev_priv(netdev);
21134+
21135+ return sprintf(buf, "%u\n", info->rx_max_target);
21136+}
21137+
21138+static ssize_t store_rxbuf_max(struct class_device *cd,
21139+ const char *buf, size_t len)
21140+{
21141+ struct net_device *netdev = container_of(cd, struct net_device,
21142+ class_dev);
21143+ struct netfront_info *np = netdev_priv(netdev);
21144+ char *endp;
21145+ unsigned long target;
21146+
21147+ if (!capable(CAP_NET_ADMIN))
21148+ return -EPERM;
21149+
21150+ target = simple_strtoul(buf, &endp, 0);
21151+ if (endp == buf)
21152+ return -EBADMSG;
21153+
21154+ if (target < RX_MIN_TARGET)
21155+ target = RX_MIN_TARGET;
21156+ if (target > RX_MAX_TARGET)
21157+ target = RX_MAX_TARGET;
21158+
21159+ spin_lock_bh(&np->rx_lock);
21160+ if (target < np->rx_min_target)
21161+ np->rx_min_target = target;
21162+ np->rx_max_target = target;
21163+ if (target < np->rx_target)
21164+ np->rx_target = target;
21165+
21166+ network_alloc_rx_buffers(netdev);
21167+
21168+ spin_unlock_bh(&np->rx_lock);
21169+ return len;
21170+}
21171+
21172+static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
21173+{
21174+ struct net_device *netdev = container_of(cd, struct net_device,
21175+ class_dev);
21176+ struct netfront_info *info = netdev_priv(netdev);
21177+
21178+ return sprintf(buf, "%u\n", info->rx_target);
21179+}
21180+
21181+static const struct class_device_attribute xennet_attrs[] = {
21182+ __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
21183+ __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
21184+ __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
21185+};
21186+
21187+static int xennet_sysfs_addif(struct net_device *netdev)
21188+{
21189+ int i;
21190+ int error = 0;
21191+
21192+ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
21193+ error = class_device_create_file(&netdev->class_dev,
21194+ &xennet_attrs[i]);
21195+ if (error)
21196+ goto fail;
21197+ }
21198+ return 0;
21199+
21200+ fail:
21201+ while (--i >= 0)
21202+ class_device_remove_file(&netdev->class_dev,
21203+ &xennet_attrs[i]);
21204+ return error;
21205+}
21206+
21207+static void xennet_sysfs_delif(struct net_device *netdev)
21208+{
21209+ int i;
21210+
21211+ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
21212+ class_device_remove_file(&netdev->class_dev,
21213+ &xennet_attrs[i]);
21214+ }
21215+}
21216+
21217+#endif /* CONFIG_SYSFS */
21218+
21219+
21220+/*
21221+ * Nothing to do here. Virtual interface is point-to-point and the
21222+ * physical interface is probably promiscuous anyway.
21223+ */
21224+static void network_set_multicast_list(struct net_device *dev)
21225+{
21226+}
21227+
21228+static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
21229+{
21230+ int i, err = 0;
21231+ struct net_device *netdev = NULL;
21232+ struct netfront_info *np = NULL;
21233+
21234+ netdev = alloc_etherdev(sizeof(struct netfront_info));
21235+ if (!netdev) {
21236+ printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
21237+ __FUNCTION__);
21238+ return ERR_PTR(-ENOMEM);
21239+ }
21240+
21241+ np = netdev_priv(netdev);
21242+ np->xbdev = dev;
21243+
21244+ spin_lock_init(&np->tx_lock);
21245+ spin_lock_init(&np->rx_lock);
21246+
21247+ init_accelerator_vif(np, dev);
21248+
21249+ skb_queue_head_init(&np->rx_batch);
21250+ np->rx_target = RX_DFL_MIN_TARGET;
21251+ np->rx_min_target = RX_DFL_MIN_TARGET;
21252+ np->rx_max_target = RX_MAX_TARGET;
21253+
21254+ init_timer(&np->rx_refill_timer);
21255+ np->rx_refill_timer.data = (unsigned long)netdev;
21256+ np->rx_refill_timer.function = rx_refill_timeout;
21257+
21258+ /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
21259+ for (i = 0; i <= NET_TX_RING_SIZE; i++) {
21260+ np->tx_skbs[i] = (void *)((unsigned long) i+1);
21261+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
21262+ }
21263+
21264+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
21265+ np->rx_skbs[i] = NULL;
21266+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
21267+ }
21268+
21269+ /* A grant for every tx ring slot */
21270+ if (gnttab_alloc_grant_references(TX_MAX_TARGET,
21271+ &np->gref_tx_head) < 0) {
21272+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
21273+ err = -ENOMEM;
21274+ goto exit;
21275+ }
21276+ /* A grant for every rx ring slot */
21277+ if (gnttab_alloc_grant_references(RX_MAX_TARGET,
21278+ &np->gref_rx_head) < 0) {
21279+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
21280+ err = -ENOMEM;
21281+ goto exit_free_tx;
21282+ }
21283+
21284+ netdev->open = network_open;
21285+ netdev->hard_start_xmit = network_start_xmit;
21286+ netdev->stop = network_close;
21287+ netdev->get_stats = network_get_stats;
21288+ netdev->poll = netif_poll;
21289+ netdev->set_multicast_list = network_set_multicast_list;
21290+ netdev->uninit = netif_uninit;
21291+ netdev->set_mac_address = xennet_set_mac_address;
21292+ netdev->change_mtu = xennet_change_mtu;
21293+ netdev->weight = 64;
21294+ netdev->features = NETIF_F_IP_CSUM;
21295+
21296+ SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
21297+ SET_MODULE_OWNER(netdev);
21298+ SET_NETDEV_DEV(netdev, &dev->dev);
21299+
21300+ np->netdev = netdev;
21301+
21302+ netfront_carrier_off(np);
21303+
21304+ return netdev;
21305+
21306+ exit_free_tx:
21307+ gnttab_free_grant_references(np->gref_tx_head);
21308+ exit:
21309+ free_netdev(netdev);
21310+ return ERR_PTR(err);
21311+}
21312+
21313+#ifdef CONFIG_INET
21314+/*
21315+ * We use this notifier to send out a fake ARP reply to reset switches and
21316+ * router ARP caches when an IP interface is brought up on a VIF.
21317+ */
21318+static int
21319+inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
21320+{
21321+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
21322+ struct net_device *dev = ifa->ifa_dev->dev;
21323+
21324+ /* UP event and is it one of our devices? */
21325+ if (event == NETDEV_UP && dev->open == network_open)
21326+ send_fake_arp(dev);
21327+
21328+ return NOTIFY_DONE;
21329+}
21330+
21331+static struct notifier_block notifier_inetdev = {
21332+ .notifier_call = inetdev_notify,
21333+ .next = NULL,
21334+ .priority = 0
21335+};
21336+#endif
21337+
21338+
21339+static void netif_disconnect_backend(struct netfront_info *info)
21340+{
21341+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
21342+ spin_lock_bh(&info->rx_lock);
21343+ spin_lock_irq(&info->tx_lock);
21344+ netfront_carrier_off(info);
21345+ spin_unlock_irq(&info->tx_lock);
21346+ spin_unlock_bh(&info->rx_lock);
21347+
21348+ if (info->irq)
21349+ unbind_from_irqhandler(info->irq, info->netdev);
21350+ info->irq = 0;
21351+
21352+ end_access(info->tx_ring_ref, info->tx.sring);
21353+ end_access(info->rx_ring_ref, info->rx.sring);
21354+ info->tx_ring_ref = GRANT_INVALID_REF;
21355+ info->rx_ring_ref = GRANT_INVALID_REF;
21356+ info->tx.sring = NULL;
21357+ info->rx.sring = NULL;
21358+}
21359+
21360+
21361+static void end_access(int ref, void *page)
21362+{
21363+ if (ref != GRANT_INVALID_REF)
21364+ gnttab_end_foreign_access(ref, (unsigned long)page);
21365+}
21366+
21367+
21368+/* ** Driver registration ** */
21369+
21370+
21371+static const struct xenbus_device_id netfront_ids[] = {
21372+ { "vif" },
21373+ { "" }
21374+};
21375+MODULE_ALIAS("xen:vif");
21376+
21377+
21378+static struct xenbus_driver netfront_driver = {
21379+ .name = "vif",
21380+ .owner = THIS_MODULE,
21381+ .ids = netfront_ids,
21382+ .probe = netfront_probe,
21383+ .remove = __devexit_p(netfront_remove),
21384+ .suspend = netfront_suspend,
21385+ .suspend_cancel = netfront_suspend_cancel,
21386+ .resume = netfront_resume,
21387+ .otherend_changed = backend_changed,
21388+};
21389+
21390+
21391+static int __init netif_init(void)
21392+{
21393+ if (!is_running_on_xen())
21394+ return -ENODEV;
21395+
21396+#ifdef CONFIG_XEN
21397+ if (MODPARM_rx_flip && MODPARM_rx_copy) {
21398+ WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
21399+ return -EINVAL;
21400+ }
21401+
21402+ if (!MODPARM_rx_flip && !MODPARM_rx_copy)
21403+ MODPARM_rx_flip = 1; /* Default is to flip. */
21404+#endif
21405+
21406+ netif_init_accel();
21407+
21408+ IPRINTK("Initialising virtual ethernet driver.\n");
21409+
21410+#ifdef CONFIG_INET
21411+ (void)register_inetaddr_notifier(&notifier_inetdev);
21412+#endif
21413+
21414+ return xenbus_register_frontend(&netfront_driver);
21415+}
21416+module_init(netif_init);
21417+
21418+
21419+static void __exit netif_exit(void)
21420+{
21421+#ifdef CONFIG_INET
21422+ unregister_inetaddr_notifier(&notifier_inetdev);
21423+#endif
21424+
21425+ netif_exit_accel();
21426+
21427+ return xenbus_unregister_driver(&netfront_driver);
21428+}
21429+module_exit(netif_exit);
21430+
21431+MODULE_LICENSE("Dual BSD/GPL");
21432Index: head-2008-11-25/drivers/xen/netfront/netfront.h
21433===================================================================
21434--- /dev/null 1970-01-01 00:00:00.000000000 +0000
21435+++ head-2008-11-25/drivers/xen/netfront/netfront.h 2008-01-07 13:19:18.000000000 +0100
21436@@ -0,0 +1,274 @@
21437+/******************************************************************************
21438+ * Virtual network driver for conversing with remote driver backends.
21439+ *
21440+ * Copyright (c) 2002-2005, K A Fraser
21441+ * Copyright (c) 2005, XenSource Ltd
21442+ * Copyright (C) 2007 Solarflare Communications, Inc.
21443+ *
21444+ * This program is free software; you can redistribute it and/or
21445+ * modify it under the terms of the GNU General Public License version 2
21446+ * as published by the Free Software Foundation; or, when distributed
21447+ * separately from the Linux kernel or incorporated into other
21448+ * software packages, subject to the following license:
21449+ *
21450+ * Permission is hereby granted, free of charge, to any person obtaining a copy
21451+ * of this source file (the "Software"), to deal in the Software without
21452+ * restriction, including without limitation the rights to use, copy, modify,
21453+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
21454+ * and to permit persons to whom the Software is furnished to do so, subject to
21455+ * the following conditions:
21456+ *
21457+ * The above copyright notice and this permission notice shall be included in
21458+ * all copies or substantial portions of the Software.
21459+ *
21460+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21461+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21462+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21463+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21464+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21465+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21466+ * IN THE SOFTWARE.
21467+ */
21468+
21469+#ifndef NETFRONT_H
21470+#define NETFRONT_H
21471+
21472+#include <xen/interface/io/netif.h>
21473+#include <linux/netdevice.h>
21474+#include <linux/skbuff.h>
21475+#include <linux/list.h>
21476+
21477+#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
21478+#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
21479+
21480+#include <xen/xenbus.h>
21481+
21482+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
21483+#include <xen/platform-compat.h>
21484+#endif
21485+
21486+/*
21487+ * Function pointer table for hooks into a network acceleration
21488+ * plugin. These are called at appropriate points from the netfront
21489+ * driver
21490+ */
21491+struct netfront_accel_hooks {
21492+ /*
21493+ * new_device: Accelerator hook to ask the plugin to support a
21494+ * new network interface
21495+ */
21496+ int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev);
21497+ /*
21498+ * remove: Opposite of new_device
21499+ */
21500+ int (*remove)(struct xenbus_device *dev);
21501+ /*
21502+ * The net_device is being polled, check the accelerated
21503+ * hardware for any pending packets
21504+ */
21505+ int (*netdev_poll)(struct net_device *dev, int *pbudget);
21506+ /*
21507+ * start_xmit: Used to give the accelerated plugin the option
21508+ * of sending a packet. Returns non-zero if has done so, or
21509+ * zero to decline and force the packet onto normal send
21510+ * path
21511+ */
21512+ int (*start_xmit)(struct sk_buff *skb, struct net_device *dev);
21513+ /*
21514+ * start/stop_napi_interrupts Used by netfront to indicate
21515+ * when napi interrupts should be enabled or disabled
21516+ */
21517+ int (*start_napi_irq)(struct net_device *dev);
21518+ void (*stop_napi_irq)(struct net_device *dev);
21519+ /*
21520+ * Called before re-enabling the TX queue to check the fast
21521+ * path has slots too
21522+ */
21523+ int (*check_ready)(struct net_device *dev);
21524+ /*
21525+ * Get the fastpath network statistics
21526+ */
21527+ int (*get_stats)(struct net_device *dev,
21528+ struct net_device_stats *stats);
21529+};
21530+
21531+
21532+/* Version of API/protocol for communication between netfront and
21533+ acceleration plugin supported */
21534+#define NETFRONT_ACCEL_VERSION 0x00010003
21535+
21536+/*
21537+ * Per-netfront device state for the accelerator. This is used to
21538+ * allow efficient per-netfront device access to the accelerator
21539+ * hooks
21540+ */
21541+struct netfront_accel_vif_state {
21542+ struct list_head link;
21543+
21544+ struct xenbus_device *dev;
21545+ struct netfront_info *np;
21546+ struct netfront_accel_hooks *hooks;
21547+
21548+ /* Watch on the accelerator configuration value */
21549+ struct xenbus_watch accel_watch;
21550+ /* Work item to process change in accelerator */
21551+ struct work_struct accel_work;
21552+ /* The string from xenbus last time accel_watch fired */
21553+ char *accel_frontend;
21554+};
21555+
21556+/*
21557+ * Per-accelerator state stored in netfront. These form a list that
21558+ * is used to track which devices are accelerated by which plugins,
21559+ * and what plugins are available/have been requested
21560+ */
21561+struct netfront_accelerator {
21562+ /* Used to make a list */
21563+ struct list_head link;
21564+ /* ID of the accelerator */
21565+ int id;
21566+ /*
21567+ * String describing the accelerator. Currently this is the
21568+ * name of the accelerator module. This is provided by the
21569+ * backend accelerator through xenstore
21570+ */
21571+ char *frontend;
21572+ /* The hooks into the accelerator plugin module */
21573+ struct netfront_accel_hooks *hooks;
21574+
21575+ /*
21576+ * List of per-netfront device state (struct
21577+ * netfront_accel_vif_state) for each netfront device that is
21578+ * using this accelerator
21579+ */
21580+ struct list_head vif_states;
21581+ spinlock_t vif_states_lock;
21582+};
21583+
21584+struct netfront_info {
21585+ struct list_head list;
21586+ struct net_device *netdev;
21587+
21588+ struct net_device_stats stats;
21589+
21590+ struct netif_tx_front_ring tx;
21591+ struct netif_rx_front_ring rx;
21592+
21593+ spinlock_t tx_lock;
21594+ spinlock_t rx_lock;
21595+
21596+ unsigned int irq;
21597+ unsigned int copying_receiver;
21598+ unsigned int carrier;
21599+
21600+ /* Receive-ring batched refills. */
21601+#define RX_MIN_TARGET 8
21602+#define RX_DFL_MIN_TARGET 64
21603+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
21604+ unsigned rx_min_target, rx_max_target, rx_target;
21605+ struct sk_buff_head rx_batch;
21606+
21607+ struct timer_list rx_refill_timer;
21608+
21609+ /*
21610+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
21611+ * is an index into a chain of free entries.
21612+ */
21613+ struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
21614+ struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
21615+
21616+#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
21617+ grant_ref_t gref_tx_head;
21618+ grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
21619+ grant_ref_t gref_rx_head;
21620+ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
21621+
21622+ struct xenbus_device *xbdev;
21623+ int tx_ring_ref;
21624+ int rx_ring_ref;
21625+ u8 mac[ETH_ALEN];
21626+
21627+ unsigned long rx_pfn_array[NET_RX_RING_SIZE];
21628+ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
21629+ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
21630+
21631+ /* Private pointer to state internal to accelerator module */
21632+ void *accel_priv;
21633+ /* The accelerator used by this netfront device */
21634+ struct netfront_accelerator *accelerator;
21635+ /* The accelerator state for this netfront device */
21636+ struct netfront_accel_vif_state accel_vif_state;
21637+};
21638+
21639+
21640+/* Exported Functions */
21641+
21642+/*
21643+ * Called by an accelerator plugin module when it has loaded.
21644+ *
21645+ * frontend: the string describing the accelerator, currently the module name
21646+ * hooks: the hooks for netfront to use to call into the accelerator
21647+ * version: the version of API between frontend and plugin requested
21648+ *
21649+ * return: 0 on success, <0 on error, >0 (with version supported) on
21650+ * version mismatch
21651+ */
21652+extern int netfront_accelerator_loaded(int version, const char *frontend,
21653+ struct netfront_accel_hooks *hooks);
21654+
21655+/*
21656+ * Called by an accelerator plugin module when it is about to unload.
21657+ *
21658+ * frontend: the string describing the accelerator. Must match the
21659+ * one passed to netfront_accelerator_loaded()
21660+ */
21661+extern void netfront_accelerator_stop(const char *frontend);
21662+
21663+/*
21664+ * Called by an accelerator before waking the net device's TX queue to
21665+ * ensure the slow path has available slots. Returns true if OK to
21666+ * wake, false if still busy
21667+ */
21668+extern int netfront_check_queue_ready(struct net_device *net_dev);
21669+
21670+
21671+/* Internal-to-netfront Functions */
21672+
21673+/*
21674+ * Call into accelerator and check to see if it has tx space before we
21675+ * wake the net device's TX queue. Returns true if OK to wake, false
21676+ * if still busy
21677+ */
21678+extern
21679+int netfront_check_accelerator_queue_ready(struct net_device *dev,
21680+ struct netfront_info *np);
21681+extern
21682+int netfront_accelerator_call_remove(struct netfront_info *np,
21683+ struct xenbus_device *dev);
21684+extern
21685+int netfront_accelerator_suspend(struct netfront_info *np,
21686+ struct xenbus_device *dev);
21687+extern
21688+int netfront_accelerator_suspend_cancel(struct netfront_info *np,
21689+ struct xenbus_device *dev);
21690+extern
21691+void netfront_accelerator_resume(struct netfront_info *np,
21692+ struct xenbus_device *dev);
21693+extern
21694+void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np,
21695+ struct net_device *dev);
21696+extern
21697+int netfront_accelerator_call_get_stats(struct netfront_info *np,
21698+ struct net_device *dev);
21699+extern
21700+void netfront_accelerator_add_watch(struct netfront_info *np);
21701+
21702+extern
21703+void netif_init_accel(void);
21704+extern
21705+void netif_exit_accel(void);
21706+
21707+extern
21708+void init_accelerator_vif(struct netfront_info *np,
21709+ struct xenbus_device *dev);
21710+#endif /* NETFRONT_H */
21711Index: head-2008-11-25/drivers/xen/pciback/Makefile
21712===================================================================
21713--- /dev/null 1970-01-01 00:00:00.000000000 +0000
21714+++ head-2008-11-25/drivers/xen/pciback/Makefile 2008-07-21 11:00:33.000000000 +0200
21715@@ -0,0 +1,17 @@
21716+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
21717+
21718+pciback-y := pci_stub.o pciback_ops.o xenbus.o
21719+pciback-y += conf_space.o conf_space_header.o \
21720+ conf_space_capability.o \
21721+ conf_space_capability_vpd.o \
21722+ conf_space_capability_pm.o \
21723+ conf_space_quirks.o
21724+pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
21725+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
21726+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
21727+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
21728+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
21729+
21730+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
21731+EXTRA_CFLAGS += -DDEBUG
21732+endif
21733Index: head-2008-11-25/drivers/xen/pciback/conf_space.c
21734===================================================================
21735--- /dev/null 1970-01-01 00:00:00.000000000 +0000
21736+++ head-2008-11-25/drivers/xen/pciback/conf_space.c 2008-10-29 09:55:56.000000000 +0100
21737@@ -0,0 +1,426 @@
21738+/*
21739+ * PCI Backend - Functions for creating a virtual configuration space for
21740+ * exported PCI Devices.
21741+ * It's dangerous to allow PCI Driver Domains to change their
21742+ * device's resources (memory, i/o ports, interrupts). We need to
21743+ * restrict changes to certain PCI Configuration registers:
21744+ * BARs, INTERRUPT_PIN, most registers in the header...
21745+ *
21746+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
21747+ */
21748+
21749+#include <linux/kernel.h>
21750+#include <linux/pci.h>
21751+#include "pciback.h"
21752+#include "conf_space.h"
21753+#include "conf_space_quirks.h"
21754+
21755+#define DEFINE_PCI_CONFIG(op,size,type) \
21756+int pciback_##op##_config_##size \
21757+(struct pci_dev *dev, int offset, type value, void *data) \
21758+{ \
21759+ return pci_##op##_config_##size (dev, offset, value); \
21760+}
21761+
21762+DEFINE_PCI_CONFIG(read, byte, u8 *)
21763+DEFINE_PCI_CONFIG(read, word, u16 *)
21764+DEFINE_PCI_CONFIG(read, dword, u32 *)
21765+
21766+DEFINE_PCI_CONFIG(write, byte, u8)
21767+DEFINE_PCI_CONFIG(write, word, u16)
21768+DEFINE_PCI_CONFIG(write, dword, u32)
21769+
21770+static int conf_space_read(struct pci_dev *dev,
21771+ const struct config_field_entry *entry,
21772+ int offset, u32 *value)
21773+{
21774+ int ret = 0;
21775+ const struct config_field *field = entry->field;
21776+
21777+ *value = 0;
21778+
21779+ switch (field->size) {
21780+ case 1:
21781+ if (field->u.b.read)
21782+ ret = field->u.b.read(dev, offset, (u8 *) value,
21783+ entry->data);
21784+ break;
21785+ case 2:
21786+ if (field->u.w.read)
21787+ ret = field->u.w.read(dev, offset, (u16 *) value,
21788+ entry->data);
21789+ break;
21790+ case 4:
21791+ if (field->u.dw.read)
21792+ ret = field->u.dw.read(dev, offset, value, entry->data);
21793+ break;
21794+ }
21795+ return ret;
21796+}
21797+
21798+static int conf_space_write(struct pci_dev *dev,
21799+ const struct config_field_entry *entry,
21800+ int offset, u32 value)
21801+{
21802+ int ret = 0;
21803+ const struct config_field *field = entry->field;
21804+
21805+ switch (field->size) {
21806+ case 1:
21807+ if (field->u.b.write)
21808+ ret = field->u.b.write(dev, offset, (u8) value,
21809+ entry->data);
21810+ break;
21811+ case 2:
21812+ if (field->u.w.write)
21813+ ret = field->u.w.write(dev, offset, (u16) value,
21814+ entry->data);
21815+ break;
21816+ case 4:
21817+ if (field->u.dw.write)
21818+ ret = field->u.dw.write(dev, offset, value,
21819+ entry->data);
21820+ break;
21821+ }
21822+ return ret;
21823+}
21824+
21825+static inline u32 get_mask(int size)
21826+{
21827+ if (size == 1)
21828+ return 0xff;
21829+ else if (size == 2)
21830+ return 0xffff;
21831+ else
21832+ return 0xffffffff;
21833+}
21834+
21835+static inline int valid_request(int offset, int size)
21836+{
21837+ /* Validate request (no un-aligned requests) */
21838+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
21839+ return 1;
21840+ return 0;
21841+}
21842+
21843+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
21844+ int offset)
21845+{
21846+ if (offset >= 0) {
21847+ new_val_mask <<= (offset * 8);
21848+ new_val <<= (offset * 8);
21849+ } else {
21850+ new_val_mask >>= (offset * -8);
21851+ new_val >>= (offset * -8);
21852+ }
21853+ val = (val & ~new_val_mask) | (new_val & new_val_mask);
21854+
21855+ return val;
21856+}
21857+
21858+static int pcibios_err_to_errno(int err)
21859+{
21860+ switch (err) {
21861+ case PCIBIOS_SUCCESSFUL:
21862+ return XEN_PCI_ERR_success;
21863+ case PCIBIOS_DEVICE_NOT_FOUND:
21864+ return XEN_PCI_ERR_dev_not_found;
21865+ case PCIBIOS_BAD_REGISTER_NUMBER:
21866+ return XEN_PCI_ERR_invalid_offset;
21867+ case PCIBIOS_FUNC_NOT_SUPPORTED:
21868+ return XEN_PCI_ERR_not_implemented;
21869+ case PCIBIOS_SET_FAILED:
21870+ return XEN_PCI_ERR_access_denied;
21871+ }
21872+ return err;
21873+}
21874+
21875+int pciback_config_read(struct pci_dev *dev, int offset, int size,
21876+ u32 * ret_val)
21877+{
21878+ int err = 0;
21879+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
21880+ const struct config_field_entry *cfg_entry;
21881+ const struct config_field *field;
21882+ int req_start, req_end, field_start, field_end;
21883+ /* if read fails for any reason, return 0 (as if device didn't respond) */
21884+ u32 value = 0, tmp_val;
21885+
21886+ if (unlikely(verbose_request))
21887+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
21888+ pci_name(dev), size, offset);
21889+
21890+ if (!valid_request(offset, size)) {
21891+ err = XEN_PCI_ERR_invalid_offset;
21892+ goto out;
21893+ }
21894+
21895+ /* Get the real value first, then modify as appropriate */
21896+ switch (size) {
21897+ case 1:
21898+ err = pci_read_config_byte(dev, offset, (u8 *) & value);
21899+ break;
21900+ case 2:
21901+ err = pci_read_config_word(dev, offset, (u16 *) & value);
21902+ break;
21903+ case 4:
21904+ err = pci_read_config_dword(dev, offset, &value);
21905+ break;
21906+ }
21907+
21908+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
21909+ field = cfg_entry->field;
21910+
21911+ req_start = offset;
21912+ req_end = offset + size;
21913+ field_start = OFFSET(cfg_entry);
21914+ field_end = OFFSET(cfg_entry) + field->size;
21915+
21916+ if ((req_start >= field_start && req_start < field_end)
21917+ || (req_end > field_start && req_end <= field_end)) {
21918+ err = conf_space_read(dev, cfg_entry, field_start,
21919+ &tmp_val);
21920+ if (err)
21921+ goto out;
21922+
21923+ value = merge_value(value, tmp_val,
21924+ get_mask(field->size),
21925+ field_start - req_start);
21926+ }
21927+ }
21928+
21929+ out:
21930+ if (unlikely(verbose_request))
21931+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
21932+ pci_name(dev), size, offset, value);
21933+
21934+ *ret_val = value;
21935+ return pcibios_err_to_errno(err);
21936+}
21937+
21938+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
21939+{
21940+ int err = 0, handled = 0;
21941+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
21942+ const struct config_field_entry *cfg_entry;
21943+ const struct config_field *field;
21944+ u32 tmp_val;
21945+ int req_start, req_end, field_start, field_end;
21946+
21947+ if (unlikely(verbose_request))
21948+ printk(KERN_DEBUG
21949+ "pciback: %s: write request %d bytes at 0x%x = %x\n",
21950+ pci_name(dev), size, offset, value);
21951+
21952+ if (!valid_request(offset, size))
21953+ return XEN_PCI_ERR_invalid_offset;
21954+
21955+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
21956+ field = cfg_entry->field;
21957+
21958+ req_start = offset;
21959+ req_end = offset + size;
21960+ field_start = OFFSET(cfg_entry);
21961+ field_end = OFFSET(cfg_entry) + field->size;
21962+
21963+ if ((req_start >= field_start && req_start < field_end)
21964+ || (req_end > field_start && req_end <= field_end)) {
21965+ tmp_val = 0;
21966+
21967+ err = pciback_config_read(dev, field_start,
21968+ field->size, &tmp_val);
21969+ if (err)
21970+ break;
21971+
21972+ tmp_val = merge_value(tmp_val, value, get_mask(size),
21973+ req_start - field_start);
21974+
21975+ err = conf_space_write(dev, cfg_entry, field_start,
21976+ tmp_val);
21977+
21978+ /* handled is set true here, but not every byte
21979+ * may have been written! Properly detecting if
21980+ * every byte is handled is unnecessary as the
21981+ * flag is used to detect devices that need
21982+ * special helpers to work correctly.
21983+ */
21984+ handled = 1;
21985+ }
21986+ }
21987+
21988+ if (!handled && !err) {
21989+ /* By default, anything not specificially handled above is
21990+ * read-only. The permissive flag changes this behavior so
21991+ * that anything not specifically handled above is writable.
21992+ * This means that some fields may still be read-only because
21993+ * they have entries in the config_field list that intercept
21994+ * the write and do nothing. */
21995+ if (dev_data->permissive) {
21996+ switch (size) {
21997+ case 1:
21998+ err = pci_write_config_byte(dev, offset,
21999+ (u8) value);
22000+ break;
22001+ case 2:
22002+ err = pci_write_config_word(dev, offset,
22003+ (u16) value);
22004+ break;
22005+ case 4:
22006+ err = pci_write_config_dword(dev, offset,
22007+ (u32) value);
22008+ break;
22009+ }
22010+ } else if (!dev_data->warned_on_write) {
22011+ dev_data->warned_on_write = 1;
22012+ dev_warn(&dev->dev, "Driver tried to write to a "
22013+ "read-only configuration space field at offset "
22014+ "0x%x, size %d. This may be harmless, but if "
22015+ "you have problems with your device:\n"
22016+ "1) see permissive attribute in sysfs\n"
22017+ "2) report problems to the xen-devel "
22018+ "mailing list along with details of your "
22019+ "device obtained from lspci.\n", offset, size);
22020+ }
22021+ }
22022+
22023+ return pcibios_err_to_errno(err);
22024+}
22025+
22026+void pciback_config_free_dyn_fields(struct pci_dev *dev)
22027+{
22028+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22029+ struct config_field_entry *cfg_entry, *t;
22030+ const struct config_field *field;
22031+
22032+ dev_dbg(&dev->dev,
22033+ "free-ing dynamically allocated virtual configuration space fields\n");
22034+
22035+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
22036+ field = cfg_entry->field;
22037+
22038+ if (field->clean) {
22039+ field->clean((struct config_field *)field);
22040+
22041+ if (cfg_entry->data)
22042+ kfree(cfg_entry->data);
22043+
22044+ list_del(&cfg_entry->list);
22045+ kfree(cfg_entry);
22046+ }
22047+
22048+ }
22049+}
22050+
22051+void pciback_config_reset_dev(struct pci_dev *dev)
22052+{
22053+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22054+ const struct config_field_entry *cfg_entry;
22055+ const struct config_field *field;
22056+
22057+ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
22058+
22059+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
22060+ field = cfg_entry->field;
22061+
22062+ if (field->reset)
22063+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
22064+ }
22065+}
22066+
22067+void pciback_config_free_dev(struct pci_dev *dev)
22068+{
22069+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22070+ struct config_field_entry *cfg_entry, *t;
22071+ const struct config_field *field;
22072+
22073+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
22074+
22075+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
22076+ list_del(&cfg_entry->list);
22077+
22078+ field = cfg_entry->field;
22079+
22080+ if (field->release)
22081+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
22082+
22083+ kfree(cfg_entry);
22084+ }
22085+}
22086+
22087+int pciback_config_add_field_offset(struct pci_dev *dev,
22088+ const struct config_field *field,
22089+ unsigned int base_offset)
22090+{
22091+ int err = 0;
22092+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22093+ struct config_field_entry *cfg_entry;
22094+ void *tmp;
22095+
22096+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
22097+ if (!cfg_entry) {
22098+ err = -ENOMEM;
22099+ goto out;
22100+ }
22101+
22102+ cfg_entry->data = NULL;
22103+ cfg_entry->field = field;
22104+ cfg_entry->base_offset = base_offset;
22105+
22106+ /* silently ignore duplicate fields */
22107+ err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
22108+ if (err)
22109+ goto out;
22110+
22111+ if (field->init) {
22112+ tmp = field->init(dev, OFFSET(cfg_entry));
22113+
22114+ if (IS_ERR(tmp)) {
22115+ err = PTR_ERR(tmp);
22116+ goto out;
22117+ }
22118+
22119+ cfg_entry->data = tmp;
22120+ }
22121+
22122+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
22123+ OFFSET(cfg_entry));
22124+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
22125+
22126+ out:
22127+ if (err)
22128+ kfree(cfg_entry);
22129+
22130+ return err;
22131+}
22132+
22133+/* This sets up the device's virtual configuration space to keep track of
22134+ * certain registers (like the base address registers (BARs) so that we can
22135+ * keep the client from manipulating them directly.
22136+ */
22137+int pciback_config_init_dev(struct pci_dev *dev)
22138+{
22139+ int err = 0;
22140+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22141+
22142+ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
22143+
22144+ INIT_LIST_HEAD(&dev_data->config_fields);
22145+
22146+ err = pciback_config_header_add_fields(dev);
22147+ if (err)
22148+ goto out;
22149+
22150+ err = pciback_config_capability_add_fields(dev);
22151+ if (err)
22152+ goto out;
22153+
22154+ err = pciback_config_quirks_init(dev);
22155+
22156+ out:
22157+ return err;
22158+}
22159+
22160+int pciback_config_init(void)
22161+{
22162+ return pciback_config_capability_init();
22163+}
22164Index: head-2008-11-25/drivers/xen/pciback/conf_space.h
22165===================================================================
22166--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22167+++ head-2008-11-25/drivers/xen/pciback/conf_space.h 2008-10-29 09:55:56.000000000 +0100
22168@@ -0,0 +1,126 @@
22169+/*
22170+ * PCI Backend - Common data structures for overriding the configuration space
22171+ *
22172+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22173+ */
22174+
22175+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
22176+#define __XEN_PCIBACK_CONF_SPACE_H__
22177+
22178+#include <linux/list.h>
22179+#include <linux/err.h>
22180+
22181+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
22182+typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
22183+typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
22184+typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
22185+
22186+typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
22187+ void *data);
22188+typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
22189+ void *data);
22190+typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
22191+ void *data);
22192+typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
22193+ void *data);
22194+typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
22195+ void *data);
22196+typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
22197+ void *data);
22198+
22199+/* These are the fields within the configuration space which we
22200+ * are interested in intercepting reads/writes to and changing their
22201+ * values.
22202+ */
22203+struct config_field {
22204+ unsigned int offset;
22205+ unsigned int size;
22206+ unsigned int mask;
22207+ conf_field_init init;
22208+ conf_field_reset reset;
22209+ conf_field_free release;
22210+ void (*clean) (struct config_field * field);
22211+ union {
22212+ struct {
22213+ conf_dword_write write;
22214+ conf_dword_read read;
22215+ } dw;
22216+ struct {
22217+ conf_word_write write;
22218+ conf_word_read read;
22219+ } w;
22220+ struct {
22221+ conf_byte_write write;
22222+ conf_byte_read read;
22223+ } b;
22224+ } u;
22225+ struct list_head list;
22226+};
22227+
22228+struct config_field_entry {
22229+ struct list_head list;
22230+ const struct config_field *field;
22231+ unsigned int base_offset;
22232+ void *data;
22233+};
22234+
22235+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
22236+
22237+/* Add fields to a device - the add_fields macro expects to get a pointer to
22238+ * the first entry in an array (of which the ending is marked by size==0)
22239+ */
22240+int pciback_config_add_field_offset(struct pci_dev *dev,
22241+ const struct config_field *field,
22242+ unsigned int offset);
22243+
22244+static inline int pciback_config_add_field(struct pci_dev *dev,
22245+ const struct config_field *field)
22246+{
22247+ return pciback_config_add_field_offset(dev, field, 0);
22248+}
22249+
22250+static inline int pciback_config_add_fields(struct pci_dev *dev,
22251+ const struct config_field *field)
22252+{
22253+ int i, err = 0;
22254+ for (i = 0; field[i].size != 0; i++) {
22255+ err = pciback_config_add_field(dev, &field[i]);
22256+ if (err)
22257+ break;
22258+ }
22259+ return err;
22260+}
22261+
22262+static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
22263+ const struct config_field *field,
22264+ unsigned int offset)
22265+{
22266+ int i, err = 0;
22267+ for (i = 0; field[i].size != 0; i++) {
22268+ err = pciback_config_add_field_offset(dev, &field[i], offset);
22269+ if (err)
22270+ break;
22271+ }
22272+ return err;
22273+}
22274+
22275+/* Read/Write the real configuration space */
22276+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
22277+ void *data);
22278+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
22279+ void *data);
22280+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
22281+ void *data);
22282+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
22283+ void *data);
22284+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
22285+ void *data);
22286+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
22287+ void *data);
22288+
22289+int pciback_config_capability_init(void);
22290+
22291+int pciback_config_header_add_fields(struct pci_dev *dev);
22292+int pciback_config_capability_add_fields(struct pci_dev *dev);
22293+
22294+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
22295Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability.c
22296===================================================================
22297--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22298+++ head-2008-11-25/drivers/xen/pciback/conf_space_capability.c 2008-10-29 09:55:56.000000000 +0100
22299@@ -0,0 +1,69 @@
22300+/*
22301+ * PCI Backend - Handles the virtual fields found on the capability lists
22302+ * in the configuration space.
22303+ *
22304+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22305+ */
22306+
22307+#include <linux/kernel.h>
22308+#include <linux/pci.h>
22309+#include "pciback.h"
22310+#include "conf_space.h"
22311+#include "conf_space_capability.h"
22312+
22313+static LIST_HEAD(capabilities);
22314+
22315+static const struct config_field caplist_header[] = {
22316+ {
22317+ .offset = PCI_CAP_LIST_ID,
22318+ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
22319+ .u.w.read = pciback_read_config_word,
22320+ .u.w.write = NULL,
22321+ },
22322+ {}
22323+};
22324+
22325+static inline void register_capability(struct pciback_config_capability *cap)
22326+{
22327+ list_add_tail(&cap->cap_list, &capabilities);
22328+}
22329+
22330+int pciback_config_capability_add_fields(struct pci_dev *dev)
22331+{
22332+ int err = 0;
22333+ struct pciback_config_capability *cap;
22334+ int cap_offset;
22335+
22336+ list_for_each_entry(cap, &capabilities, cap_list) {
22337+ cap_offset = pci_find_capability(dev, cap->capability);
22338+ if (cap_offset) {
22339+ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
22340+ cap->capability, cap_offset);
22341+
22342+ err = pciback_config_add_fields_offset(dev,
22343+ caplist_header,
22344+ cap_offset);
22345+ if (err)
22346+ goto out;
22347+ err = pciback_config_add_fields_offset(dev,
22348+ cap->fields,
22349+ cap_offset);
22350+ if (err)
22351+ goto out;
22352+ }
22353+ }
22354+
22355+ out:
22356+ return err;
22357+}
22358+
22359+extern struct pciback_config_capability pciback_config_capability_vpd;
22360+extern struct pciback_config_capability pciback_config_capability_pm;
22361+
22362+int pciback_config_capability_init(void)
22363+{
22364+ register_capability(&pciback_config_capability_vpd);
22365+ register_capability(&pciback_config_capability_pm);
22366+
22367+ return 0;
22368+}
22369Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability.h
22370===================================================================
22371--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22372+++ head-2008-11-25/drivers/xen/pciback/conf_space_capability.h 2008-10-29 09:55:56.000000000 +0100
22373@@ -0,0 +1,23 @@
22374+/*
22375+ * PCI Backend - Data structures for special overlays for structures on
22376+ * the capability list.
22377+ *
22378+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22379+ */
22380+
22381+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
22382+#define __PCIBACK_CONFIG_CAPABILITY_H__
22383+
22384+#include <linux/pci.h>
22385+#include <linux/list.h>
22386+
22387+struct pciback_config_capability {
22388+ struct list_head cap_list;
22389+
22390+ int capability;
22391+
22392+ /* If the device has the capability found above, add these fields */
22393+ const struct config_field *fields;
22394+};
22395+
22396+#endif
22397Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_msi.c
22398===================================================================
22399--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22400+++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_msi.c 2008-09-15 13:40:15.000000000 +0200
22401@@ -0,0 +1,79 @@
22402+/*
22403+ * PCI Backend -- Configuration overlay for MSI capability
22404+ */
22405+#include <linux/pci.h>
22406+#include <linux/slab.h>
22407+#include "conf_space.h"
22408+#include "conf_space_capability.h"
22409+#include <xen/interface/io/pciif.h>
22410+#include "pciback.h"
22411+
22412+int pciback_enable_msi(struct pciback_device *pdev,
22413+ struct pci_dev *dev, struct xen_pci_op *op)
22414+{
22415+ int otherend = pdev->xdev->otherend_id;
22416+ int status;
22417+
22418+ status = pci_enable_msi(dev);
22419+
22420+ if (status) {
22421+ printk("error enable msi for guest %x status %x\n", otherend, status);
22422+ op->value = 0;
22423+ return XEN_PCI_ERR_op_failed;
22424+ }
22425+
22426+ op->value = dev->irq;
22427+ return 0;
22428+}
22429+
22430+int pciback_disable_msi(struct pciback_device *pdev,
22431+ struct pci_dev *dev, struct xen_pci_op *op)
22432+{
22433+ pci_disable_msi(dev);
22434+
22435+ op->value = dev->irq;
22436+ return 0;
22437+}
22438+
22439+int pciback_enable_msix(struct pciback_device *pdev,
22440+ struct pci_dev *dev, struct xen_pci_op *op)
22441+{
22442+ int i, result;
22443+ struct msix_entry *entries;
22444+
22445+ if (op->value > SH_INFO_MAX_VEC)
22446+ return -EINVAL;
22447+
22448+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
22449+ if (entries == NULL)
22450+ return -ENOMEM;
22451+
22452+ for (i = 0; i < op->value; i++) {
22453+ entries[i].entry = op->msix_entries[i].entry;
22454+ entries[i].vector = op->msix_entries[i].vector;
22455+ }
22456+
22457+ result = pci_enable_msix(dev, entries, op->value);
22458+
22459+ for (i = 0; i < op->value; i++) {
22460+ op->msix_entries[i].entry = entries[i].entry;
22461+ op->msix_entries[i].vector = entries[i].vector;
22462+ }
22463+
22464+ kfree(entries);
22465+
22466+ op->value = result;
22467+
22468+ return result;
22469+}
22470+
22471+int pciback_disable_msix(struct pciback_device *pdev,
22472+ struct pci_dev *dev, struct xen_pci_op *op)
22473+{
22474+
22475+ pci_disable_msix(dev);
22476+
22477+ op->value = dev->irq;
22478+ return 0;
22479+}
22480+
22481Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_pm.c
22482===================================================================
22483--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22484+++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_pm.c 2008-10-29 09:55:56.000000000 +0100
22485@@ -0,0 +1,126 @@
22486+/*
22487+ * PCI Backend - Configuration space overlay for power management
22488+ *
22489+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22490+ */
22491+
22492+#include <linux/pci.h>
22493+#include "conf_space.h"
22494+#include "conf_space_capability.h"
22495+
22496+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
22497+ void *data)
22498+{
22499+ int err;
22500+ u16 real_value;
22501+
22502+ err = pci_read_config_word(dev, offset, &real_value);
22503+ if (err)
22504+ goto out;
22505+
22506+ *value = real_value & ~PCI_PM_CAP_PME_MASK;
22507+
22508+ out:
22509+ return err;
22510+}
22511+
22512+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
22513+ * Can't allow driver domain to enable PMEs - they're shared */
22514+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
22515+
22516+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
22517+ void *data)
22518+{
22519+ int err;
22520+ u16 old_value;
22521+ pci_power_t new_state, old_state;
22522+
22523+ err = pci_read_config_word(dev, offset, &old_value);
22524+ if (err)
22525+ goto out;
22526+
22527+ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
22528+ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
22529+
22530+ new_value &= PM_OK_BITS;
22531+ if ((old_value & PM_OK_BITS) != new_value) {
22532+ new_value = (old_value & ~PM_OK_BITS) | new_value;
22533+ err = pci_write_config_word(dev, offset, new_value);
22534+ if (err)
22535+ goto out;
22536+ }
22537+
22538+ /* Let pci core handle the power management change */
22539+ dev_dbg(&dev->dev, "set power state to %x\n", new_state);
22540+ err = pci_set_power_state(dev, new_state);
22541+ if (err) {
22542+ err = PCIBIOS_SET_FAILED;
22543+ goto out;
22544+ }
22545+
22546+ /*
22547+ * Device may lose PCI config info on D3->D0 transition. This
22548+ * is a problem for some guests which will not reset BARs. Even
22549+ * those that have a go will be foiled by our BAR-write handler
22550+ * which will discard the write! Since Linux won't re-init
22551+ * the config space automatically in all cases, we do it here.
22552+ * Future: Should we re-initialise all first 64 bytes of config space?
22553+ */
22554+ if (new_state == PCI_D0 &&
22555+ (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
22556+ !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
22557+ pci_restore_bars(dev);
22558+
22559+ out:
22560+ return err;
22561+}
22562+
22563+/* Ensure PMEs are disabled */
22564+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
22565+{
22566+ int err;
22567+ u16 value;
22568+
22569+ err = pci_read_config_word(dev, offset, &value);
22570+ if (err)
22571+ goto out;
22572+
22573+ if (value & PCI_PM_CTRL_PME_ENABLE) {
22574+ value &= ~PCI_PM_CTRL_PME_ENABLE;
22575+ err = pci_write_config_word(dev, offset, value);
22576+ }
22577+
22578+ out:
22579+ return ERR_PTR(err);
22580+}
22581+
22582+static const struct config_field caplist_pm[] = {
22583+ {
22584+ .offset = PCI_PM_PMC,
22585+ .size = 2,
22586+ .u.w.read = pm_caps_read,
22587+ },
22588+ {
22589+ .offset = PCI_PM_CTRL,
22590+ .size = 2,
22591+ .init = pm_ctrl_init,
22592+ .u.w.read = pciback_read_config_word,
22593+ .u.w.write = pm_ctrl_write,
22594+ },
22595+ {
22596+ .offset = PCI_PM_PPB_EXTENSIONS,
22597+ .size = 1,
22598+ .u.b.read = pciback_read_config_byte,
22599+ },
22600+ {
22601+ .offset = PCI_PM_DATA_REGISTER,
22602+ .size = 1,
22603+ .u.b.read = pciback_read_config_byte,
22604+ },
22605+ {}
22606+};
22607+
22608+struct pciback_config_capability pciback_config_capability_pm = {
22609+ .capability = PCI_CAP_ID_PM,
22610+ .fields = caplist_pm,
22611+};
22612Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_vpd.c
22613===================================================================
22614--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22615+++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_vpd.c 2008-10-29 09:55:56.000000000 +0100
22616@@ -0,0 +1,40 @@
22617+/*
22618+ * PCI Backend - Configuration space overlay for Vital Product Data
22619+ *
22620+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22621+ */
22622+
22623+#include <linux/pci.h>
22624+#include "conf_space.h"
22625+#include "conf_space_capability.h"
22626+
22627+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
22628+ void *data)
22629+{
22630+ /* Disallow writes to the vital product data */
22631+ if (value & PCI_VPD_ADDR_F)
22632+ return PCIBIOS_SET_FAILED;
22633+ else
22634+ return pci_write_config_word(dev, offset, value);
22635+}
22636+
22637+static const struct config_field caplist_vpd[] = {
22638+ {
22639+ .offset = PCI_VPD_ADDR,
22640+ .size = 2,
22641+ .u.w.read = pciback_read_config_word,
22642+ .u.w.write = vpd_address_write,
22643+ },
22644+ {
22645+ .offset = PCI_VPD_DATA,
22646+ .size = 4,
22647+ .u.dw.read = pciback_read_config_dword,
22648+ .u.dw.write = NULL,
22649+ },
22650+ {}
22651+};
22652+
22653+struct pciback_config_capability pciback_config_capability_vpd = {
22654+ .capability = PCI_CAP_ID_VPD,
22655+ .fields = caplist_vpd,
22656+};
22657Index: head-2008-11-25/drivers/xen/pciback/conf_space_header.c
22658===================================================================
22659--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22660+++ head-2008-11-25/drivers/xen/pciback/conf_space_header.c 2008-10-29 09:55:56.000000000 +0100
22661@@ -0,0 +1,317 @@
22662+/*
22663+ * PCI Backend - Handles the virtual fields in the configuration space headers.
22664+ *
22665+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22666+ */
22667+
22668+#include <linux/kernel.h>
22669+#include <linux/pci.h>
22670+#include "pciback.h"
22671+#include "conf_space.h"
22672+
22673+struct pci_bar_info {
22674+ u32 val;
22675+ u32 len_val;
22676+ int which;
22677+};
22678+
22679+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
22680+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
22681+
22682+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
22683+{
22684+ int err;
22685+
22686+ if (!dev->is_enabled && is_enable_cmd(value)) {
22687+ if (unlikely(verbose_request))
22688+ printk(KERN_DEBUG "pciback: %s: enable\n",
22689+ pci_name(dev));
22690+ err = pci_enable_device(dev);
22691+ if (err)
22692+ return err;
22693+ } else if (dev->is_enabled && !is_enable_cmd(value)) {
22694+ if (unlikely(verbose_request))
22695+ printk(KERN_DEBUG "pciback: %s: disable\n",
22696+ pci_name(dev));
22697+ pci_disable_device(dev);
22698+ }
22699+
22700+ if (!dev->is_busmaster && is_master_cmd(value)) {
22701+ if (unlikely(verbose_request))
22702+ printk(KERN_DEBUG "pciback: %s: set bus master\n",
22703+ pci_name(dev));
22704+ pci_set_master(dev);
22705+ }
22706+
22707+ if (value & PCI_COMMAND_INVALIDATE) {
22708+ if (unlikely(verbose_request))
22709+ printk(KERN_DEBUG
22710+ "pciback: %s: enable memory-write-invalidate\n",
22711+ pci_name(dev));
22712+ err = pci_set_mwi(dev);
22713+ if (err) {
22714+ printk(KERN_WARNING
22715+ "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
22716+ pci_name(dev), err);
22717+ value &= ~PCI_COMMAND_INVALIDATE;
22718+ }
22719+ }
22720+
22721+ return pci_write_config_word(dev, offset, value);
22722+}
22723+
22724+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
22725+{
22726+ struct pci_bar_info *bar = data;
22727+
22728+ if (unlikely(!bar)) {
22729+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
22730+ pci_name(dev));
22731+ return XEN_PCI_ERR_op_failed;
22732+ }
22733+
22734+ /* A write to obtain the length must happen as a 32-bit write.
22735+ * This does not (yet) support writing individual bytes
22736+ */
22737+ if (value == ~PCI_ROM_ADDRESS_ENABLE)
22738+ bar->which = 1;
22739+ else {
22740+ u32 tmpval;
22741+ pci_read_config_dword(dev, offset, &tmpval);
22742+ if (tmpval != bar->val && value == bar->val) {
22743+ /* Allow restoration of bar value. */
22744+ pci_write_config_dword(dev, offset, bar->val);
22745+ }
22746+ bar->which = 0;
22747+ }
22748+
22749+ /* Do we need to support enabling/disabling the rom address here? */
22750+
22751+ return 0;
22752+}
22753+
22754+/* For the BARs, only allow writes which write ~0 or
22755+ * the correct resource information
22756+ * (Needed for when the driver probes the resource usage)
22757+ */
22758+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
22759+{
22760+ struct pci_bar_info *bar = data;
22761+
22762+ if (unlikely(!bar)) {
22763+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
22764+ pci_name(dev));
22765+ return XEN_PCI_ERR_op_failed;
22766+ }
22767+
22768+ /* A write to obtain the length must happen as a 32-bit write.
22769+ * This does not (yet) support writing individual bytes
22770+ */
22771+ if (value == ~0)
22772+ bar->which = 1;
22773+ else {
22774+ u32 tmpval;
22775+ pci_read_config_dword(dev, offset, &tmpval);
22776+ if (tmpval != bar->val && value == bar->val) {
22777+ /* Allow restoration of bar value. */
22778+ pci_write_config_dword(dev, offset, bar->val);
22779+ }
22780+ bar->which = 0;
22781+ }
22782+
22783+ return 0;
22784+}
22785+
22786+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
22787+{
22788+ struct pci_bar_info *bar = data;
22789+
22790+ if (unlikely(!bar)) {
22791+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
22792+ pci_name(dev));
22793+ return XEN_PCI_ERR_op_failed;
22794+ }
22795+
22796+ *value = bar->which ? bar->len_val : bar->val;
22797+
22798+ return 0;
22799+}
22800+
22801+static inline void read_dev_bar(struct pci_dev *dev,
22802+ struct pci_bar_info *bar_info, int offset,
22803+ u32 len_mask)
22804+{
22805+ pci_read_config_dword(dev, offset, &bar_info->val);
22806+ pci_write_config_dword(dev, offset, len_mask);
22807+ pci_read_config_dword(dev, offset, &bar_info->len_val);
22808+ pci_write_config_dword(dev, offset, bar_info->val);
22809+}
22810+
22811+static void *bar_init(struct pci_dev *dev, int offset)
22812+{
22813+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
22814+
22815+ if (!bar)
22816+ return ERR_PTR(-ENOMEM);
22817+
22818+ read_dev_bar(dev, bar, offset, ~0);
22819+ bar->which = 0;
22820+
22821+ return bar;
22822+}
22823+
22824+static void *rom_init(struct pci_dev *dev, int offset)
22825+{
22826+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
22827+
22828+ if (!bar)
22829+ return ERR_PTR(-ENOMEM);
22830+
22831+ read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
22832+ bar->which = 0;
22833+
22834+ return bar;
22835+}
22836+
22837+static void bar_reset(struct pci_dev *dev, int offset, void *data)
22838+{
22839+ struct pci_bar_info *bar = data;
22840+
22841+ bar->which = 0;
22842+}
22843+
22844+static void bar_release(struct pci_dev *dev, int offset, void *data)
22845+{
22846+ kfree(data);
22847+}
22848+
22849+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
22850+ void *data)
22851+{
22852+ *value = (u8) dev->irq;
22853+
22854+ return 0;
22855+}
22856+
22857+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
22858+{
22859+ u8 cur_value;
22860+ int err;
22861+
22862+ err = pci_read_config_byte(dev, offset, &cur_value);
22863+ if (err)
22864+ goto out;
22865+
22866+ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
22867+ || value == PCI_BIST_START)
22868+ err = pci_write_config_byte(dev, offset, value);
22869+
22870+ out:
22871+ return err;
22872+}
22873+
22874+static const struct config_field header_common[] = {
22875+ {
22876+ .offset = PCI_COMMAND,
22877+ .size = 2,
22878+ .u.w.read = pciback_read_config_word,
22879+ .u.w.write = command_write,
22880+ },
22881+ {
22882+ .offset = PCI_INTERRUPT_LINE,
22883+ .size = 1,
22884+ .u.b.read = interrupt_read,
22885+ },
22886+ {
22887+ .offset = PCI_INTERRUPT_PIN,
22888+ .size = 1,
22889+ .u.b.read = pciback_read_config_byte,
22890+ },
22891+ {
22892+ /* Any side effects of letting driver domain control cache line? */
22893+ .offset = PCI_CACHE_LINE_SIZE,
22894+ .size = 1,
22895+ .u.b.read = pciback_read_config_byte,
22896+ .u.b.write = pciback_write_config_byte,
22897+ },
22898+ {
22899+ .offset = PCI_LATENCY_TIMER,
22900+ .size = 1,
22901+ .u.b.read = pciback_read_config_byte,
22902+ },
22903+ {
22904+ .offset = PCI_BIST,
22905+ .size = 1,
22906+ .u.b.read = pciback_read_config_byte,
22907+ .u.b.write = bist_write,
22908+ },
22909+ {}
22910+};
22911+
22912+#define CFG_FIELD_BAR(reg_offset) \
22913+ { \
22914+ .offset = reg_offset, \
22915+ .size = 4, \
22916+ .init = bar_init, \
22917+ .reset = bar_reset, \
22918+ .release = bar_release, \
22919+ .u.dw.read = bar_read, \
22920+ .u.dw.write = bar_write, \
22921+ }
22922+
22923+#define CFG_FIELD_ROM(reg_offset) \
22924+ { \
22925+ .offset = reg_offset, \
22926+ .size = 4, \
22927+ .init = rom_init, \
22928+ .reset = bar_reset, \
22929+ .release = bar_release, \
22930+ .u.dw.read = bar_read, \
22931+ .u.dw.write = rom_write, \
22932+ }
22933+
22934+static const struct config_field header_0[] = {
22935+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
22936+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
22937+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
22938+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
22939+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
22940+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
22941+ CFG_FIELD_ROM(PCI_ROM_ADDRESS),
22942+ {}
22943+};
22944+
22945+static const struct config_field header_1[] = {
22946+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
22947+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
22948+ CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
22949+ {}
22950+};
22951+
22952+int pciback_config_header_add_fields(struct pci_dev *dev)
22953+{
22954+ int err;
22955+
22956+ err = pciback_config_add_fields(dev, header_common);
22957+ if (err)
22958+ goto out;
22959+
22960+ switch (dev->hdr_type) {
22961+ case PCI_HEADER_TYPE_NORMAL:
22962+ err = pciback_config_add_fields(dev, header_0);
22963+ break;
22964+
22965+ case PCI_HEADER_TYPE_BRIDGE:
22966+ err = pciback_config_add_fields(dev, header_1);
22967+ break;
22968+
22969+ default:
22970+ err = -EINVAL;
22971+ printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
22972+ pci_name(dev), dev->hdr_type);
22973+ break;
22974+ }
22975+
22976+ out:
22977+ return err;
22978+}
22979Index: head-2008-11-25/drivers/xen/pciback/conf_space_quirks.c
22980===================================================================
22981--- /dev/null 1970-01-01 00:00:00.000000000 +0000
22982+++ head-2008-11-25/drivers/xen/pciback/conf_space_quirks.c 2007-06-12 13:13:45.000000000 +0200
22983@@ -0,0 +1,126 @@
22984+/*
22985+ * PCI Backend - Handle special overlays for broken devices.
22986+ *
22987+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22988+ * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
22989+ */
22990+
22991+#include <linux/kernel.h>
22992+#include <linux/pci.h>
22993+#include "pciback.h"
22994+#include "conf_space.h"
22995+#include "conf_space_quirks.h"
22996+
22997+LIST_HEAD(pciback_quirks);
22998+
22999+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
23000+{
23001+ struct pciback_config_quirk *tmp_quirk;
23002+
23003+ list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
23004+ if (pci_match_id(&tmp_quirk->devid, dev))
23005+ goto out;
23006+ tmp_quirk = NULL;
23007+ printk(KERN_DEBUG
23008+ "quirk didn't match any device pciback knows about\n");
23009+ out:
23010+ return tmp_quirk;
23011+}
23012+
23013+static inline void register_quirk(struct pciback_config_quirk *quirk)
23014+{
23015+ list_add_tail(&quirk->quirks_list, &pciback_quirks);
23016+}
23017+
23018+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
23019+{
23020+ int ret = 0;
23021+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
23022+ struct config_field_entry *cfg_entry;
23023+
23024+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
23025+ if ( OFFSET(cfg_entry) == reg) {
23026+ ret = 1;
23027+ break;
23028+ }
23029+ }
23030+ return ret;
23031+}
23032+
23033+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
23034+ *field)
23035+{
23036+ int err = 0;
23037+
23038+ switch (field->size) {
23039+ case 1:
23040+ field->u.b.read = pciback_read_config_byte;
23041+ field->u.b.write = pciback_write_config_byte;
23042+ break;
23043+ case 2:
23044+ field->u.w.read = pciback_read_config_word;
23045+ field->u.w.write = pciback_write_config_word;
23046+ break;
23047+ case 4:
23048+ field->u.dw.read = pciback_read_config_dword;
23049+ field->u.dw.write = pciback_write_config_dword;
23050+ break;
23051+ default:
23052+ err = -EINVAL;
23053+ goto out;
23054+ }
23055+
23056+ pciback_config_add_field(dev, field);
23057+
23058+ out:
23059+ return err;
23060+}
23061+
23062+int pciback_config_quirks_init(struct pci_dev *dev)
23063+{
23064+ struct pciback_config_quirk *quirk;
23065+ int ret = 0;
23066+
23067+ quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
23068+ if (!quirk) {
23069+ ret = -ENOMEM;
23070+ goto out;
23071+ }
23072+
23073+ quirk->devid.vendor = dev->vendor;
23074+ quirk->devid.device = dev->device;
23075+ quirk->devid.subvendor = dev->subsystem_vendor;
23076+ quirk->devid.subdevice = dev->subsystem_device;
23077+ quirk->devid.class = 0;
23078+ quirk->devid.class_mask = 0;
23079+ quirk->devid.driver_data = 0UL;
23080+
23081+ quirk->pdev = dev;
23082+
23083+ register_quirk(quirk);
23084+ out:
23085+ return ret;
23086+}
23087+
23088+void pciback_config_field_free(struct config_field *field)
23089+{
23090+ kfree(field);
23091+}
23092+
23093+int pciback_config_quirk_release(struct pci_dev *dev)
23094+{
23095+ struct pciback_config_quirk *quirk;
23096+ int ret = 0;
23097+
23098+ quirk = pciback_find_quirk(dev);
23099+ if (!quirk) {
23100+ ret = -ENXIO;
23101+ goto out;
23102+ }
23103+
23104+ list_del(&quirk->quirks_list);
23105+ kfree(quirk);
23106+
23107+ out:
23108+ return ret;
23109+}
23110Index: head-2008-11-25/drivers/xen/pciback/conf_space_quirks.h
23111===================================================================
23112--- /dev/null 1970-01-01 00:00:00.000000000 +0000
23113+++ head-2008-11-25/drivers/xen/pciback/conf_space_quirks.h 2007-06-12 13:13:45.000000000 +0200
23114@@ -0,0 +1,35 @@
23115+/*
23116+ * PCI Backend - Data structures for special overlays for broken devices.
23117+ *
23118+ * Ryan Wilson <hap9@epoch.ncsc.mil>
23119+ * Chris Bookholt <hap10@epoch.ncsc.mil>
23120+ */
23121+
23122+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
23123+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
23124+
23125+#include <linux/pci.h>
23126+#include <linux/list.h>
23127+
23128+struct pciback_config_quirk {
23129+ struct list_head quirks_list;
23130+ struct pci_device_id devid;
23131+ struct pci_dev *pdev;
23132+};
23133+
23134+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
23135+
23136+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
23137+ *field);
23138+
23139+int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
23140+
23141+int pciback_config_quirks_init(struct pci_dev *dev);
23142+
23143+void pciback_config_field_free(struct config_field *field);
23144+
23145+int pciback_config_quirk_release(struct pci_dev *dev);
23146+
23147+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
23148+
23149+#endif
23150Index: head-2008-11-25/drivers/xen/pciback/controller.c
23151===================================================================
23152--- /dev/null 1970-01-01 00:00:00.000000000 +0000
23153+++ head-2008-11-25/drivers/xen/pciback/controller.c 2008-02-26 10:54:11.000000000 +0100
23154@@ -0,0 +1,408 @@
23155+/*
23156+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
23157+ * Alex Williamson <alex.williamson@hp.com>
23158+ *
23159+ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
23160+ * controllers. Devices under the same PCI controller are exposed on the
23161+ * same virtual domain:bus. Within a bus, device slots are virtualized
23162+ * to compact the bus.
23163+ *
23164+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23165+ * This program is free software; you can redistribute it and/or modify
23166+ * it under the terms of the GNU General Public License as published by
23167+ * the Free Software Foundation; either version 2 of the License, or
23168+ * (at your option) any later version.
23169+ *
23170+ * This program is distributed in the hope that it will be useful,
23171+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23172+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23173+ * GNU General Public License for more details.
23174+ *
23175+ * You should have received a copy of the GNU General Public License
23176+ * along with this program; if not, write to the Free Software
23177+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23178+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23179+ */
23180+
23181+#include <linux/acpi.h>
23182+#include <linux/list.h>
23183+#include <linux/pci.h>
23184+#include <linux/spinlock.h>
23185+#include "pciback.h"
23186+
23187+#define PCI_MAX_BUSSES 255
23188+#define PCI_MAX_SLOTS 32
23189+
23190+struct controller_dev_entry {
23191+ struct list_head list;
23192+ struct pci_dev *dev;
23193+ unsigned int devfn;
23194+};
23195+
23196+struct controller_list_entry {
23197+ struct list_head list;
23198+ struct pci_controller *controller;
23199+ unsigned int domain;
23200+ unsigned int bus;
23201+ unsigned int next_devfn;
23202+ struct list_head dev_list;
23203+};
23204+
23205+struct controller_dev_data {
23206+ struct list_head list;
23207+ unsigned int next_domain;
23208+ unsigned int next_bus;
23209+ spinlock_t lock;
23210+};
23211+
23212+struct walk_info {
23213+ struct pciback_device *pdev;
23214+ int resource_count;
23215+ int root_num;
23216+};
23217+
23218+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
23219+ unsigned int domain, unsigned int bus,
23220+ unsigned int devfn)
23221+{
23222+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
23223+ struct controller_dev_entry *dev_entry;
23224+ struct controller_list_entry *cntrl_entry;
23225+ struct pci_dev *dev = NULL;
23226+ unsigned long flags;
23227+
23228+ spin_lock_irqsave(&dev_data->lock, flags);
23229+
23230+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23231+ if (cntrl_entry->domain != domain ||
23232+ cntrl_entry->bus != bus)
23233+ continue;
23234+
23235+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
23236+ if (devfn == dev_entry->devfn) {
23237+ dev = dev_entry->dev;
23238+ goto found;
23239+ }
23240+ }
23241+ }
23242+found:
23243+ spin_unlock_irqrestore(&dev_data->lock, flags);
23244+
23245+ return dev;
23246+}
23247+
23248+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
23249+ int devid, publish_pci_dev_cb publish_cb)
23250+{
23251+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
23252+ struct controller_dev_entry *dev_entry;
23253+ struct controller_list_entry *cntrl_entry;
23254+ struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
23255+ unsigned long flags;
23256+ int ret = 0, found = 0;
23257+
23258+ spin_lock_irqsave(&dev_data->lock, flags);
23259+
23260+ /* Look to see if we already have a domain:bus for this controller */
23261+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23262+ if (cntrl_entry->controller == dev_controller) {
23263+ found = 1;
23264+ break;
23265+ }
23266+ }
23267+
23268+ if (!found) {
23269+ cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
23270+ if (!cntrl_entry) {
23271+ ret = -ENOMEM;
23272+ goto out;
23273+ }
23274+
23275+ cntrl_entry->controller = dev_controller;
23276+ cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
23277+
23278+ cntrl_entry->domain = dev_data->next_domain;
23279+ cntrl_entry->bus = dev_data->next_bus++;
23280+ if (dev_data->next_bus > PCI_MAX_BUSSES) {
23281+ dev_data->next_domain++;
23282+ dev_data->next_bus = 0;
23283+ }
23284+
23285+ INIT_LIST_HEAD(&cntrl_entry->dev_list);
23286+
23287+ list_add_tail(&cntrl_entry->list, &dev_data->list);
23288+ }
23289+
23290+ if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
23291+ /*
23292+ * While it seems unlikely, this can actually happen if
23293+ * a controller has P2P bridges under it.
23294+ */
23295+ xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
23296+ "is full, no room to export %04x:%02x:%02x.%x",
23297+ cntrl_entry->domain, cntrl_entry->bus,
23298+ pci_domain_nr(dev->bus), dev->bus->number,
23299+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
23300+ ret = -ENOSPC;
23301+ goto out;
23302+ }
23303+
23304+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
23305+ if (!dev_entry) {
23306+ if (list_empty(&cntrl_entry->dev_list)) {
23307+ list_del(&cntrl_entry->list);
23308+ kfree(cntrl_entry);
23309+ }
23310+ ret = -ENOMEM;
23311+ goto out;
23312+ }
23313+
23314+ dev_entry->dev = dev;
23315+ dev_entry->devfn = cntrl_entry->next_devfn;
23316+
23317+ list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
23318+
23319+ cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
23320+
23321+out:
23322+ spin_unlock_irqrestore(&dev_data->lock, flags);
23323+
23324+ /* TODO: Publish virtual domain:bus:slot.func here. */
23325+
23326+ return ret;
23327+}
23328+
23329+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
23330+{
23331+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
23332+ struct controller_list_entry *cntrl_entry;
23333+ struct controller_dev_entry *dev_entry = NULL;
23334+ struct pci_dev *found_dev = NULL;
23335+ unsigned long flags;
23336+
23337+ spin_lock_irqsave(&dev_data->lock, flags);
23338+
23339+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23340+ if (cntrl_entry->controller != PCI_CONTROLLER(dev))
23341+ continue;
23342+
23343+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
23344+ if (dev_entry->dev == dev) {
23345+ found_dev = dev_entry->dev;
23346+ break;
23347+ }
23348+ }
23349+ }
23350+
23351+ if (!found_dev) {
23352+ spin_unlock_irqrestore(&dev_data->lock, flags);
23353+ return;
23354+ }
23355+
23356+ list_del(&dev_entry->list);
23357+ kfree(dev_entry);
23358+
23359+ if (list_empty(&cntrl_entry->dev_list)) {
23360+ list_del(&cntrl_entry->list);
23361+ kfree(cntrl_entry);
23362+ }
23363+
23364+ spin_unlock_irqrestore(&dev_data->lock, flags);
23365+ pcistub_put_pci_dev(found_dev);
23366+}
23367+
23368+int pciback_init_devices(struct pciback_device *pdev)
23369+{
23370+ struct controller_dev_data *dev_data;
23371+
23372+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
23373+ if (!dev_data)
23374+ return -ENOMEM;
23375+
23376+ spin_lock_init(&dev_data->lock);
23377+
23378+ INIT_LIST_HEAD(&dev_data->list);
23379+
23380+ /* Starting domain:bus numbers */
23381+ dev_data->next_domain = 0;
23382+ dev_data->next_bus = 0;
23383+
23384+ pdev->pci_dev_data = dev_data;
23385+
23386+ return 0;
23387+}
23388+
23389+static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
23390+{
23391+ struct walk_info *info = data;
23392+ struct acpi_resource_address64 addr;
23393+ acpi_status status;
23394+ int i, len, err;
23395+ char str[32], tmp[3];
23396+ unsigned char *ptr, *buf;
23397+
23398+ status = acpi_resource_to_address64(res, &addr);
23399+
23400+ /* Do we care about this range? Let's check. */
23401+ if (!ACPI_SUCCESS(status) ||
23402+ !(addr.resource_type == ACPI_MEMORY_RANGE ||
23403+ addr.resource_type == ACPI_IO_RANGE) ||
23404+ !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
23405+ return AE_OK;
23406+
23407+ /*
23408+ * Furthermore, we really only care to tell the guest about
23409+ * address ranges that require address translation of some sort.
23410+ */
23411+ if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
23412+ addr.info.mem.translation) &&
23413+ !(addr.resource_type == ACPI_IO_RANGE &&
23414+ addr.info.io.translation))
23415+ return AE_OK;
23416+
23417+ /* Store the resource in xenbus for the guest */
23418+ len = snprintf(str, sizeof(str), "root-%d-resource-%d",
23419+ info->root_num, info->resource_count);
23420+ if (unlikely(len >= (sizeof(str) - 1)))
23421+ return AE_OK;
23422+
23423+ buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
23424+ if (!buf)
23425+ return AE_OK;
23426+
23427+ /* Clean out resource_source */
23428+ res->data.address64.resource_source.index = 0xFF;
23429+ res->data.address64.resource_source.string_length = 0;
23430+ res->data.address64.resource_source.string_ptr = NULL;
23431+
23432+ ptr = (unsigned char *)res;
23433+
23434+ /* Turn the acpi_resource into an ASCII byte stream */
23435+ for (i = 0; i < sizeof(*res); i++) {
23436+ snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
23437+ strncat(buf, tmp, 2);
23438+ }
23439+
23440+ err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
23441+ str, "%s", buf);
23442+
23443+ if (!err)
23444+ info->resource_count++;
23445+
23446+ kfree(buf);
23447+
23448+ return AE_OK;
23449+}
23450+
23451+int pciback_publish_pci_roots(struct pciback_device *pdev,
23452+ publish_pci_root_cb publish_root_cb)
23453+{
23454+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
23455+ struct controller_list_entry *cntrl_entry;
23456+ int i, root_num, len, err = 0;
23457+ unsigned int domain, bus;
23458+ char str[64];
23459+ struct walk_info info;
23460+
23461+ spin_lock(&dev_data->lock);
23462+
23463+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23464+ /* First publish all the domain:bus info */
23465+ err = publish_root_cb(pdev, cntrl_entry->domain,
23466+ cntrl_entry->bus);
23467+ if (err)
23468+ goto out;
23469+
23470+ /*
23471+ * Now figure out which root-%d this belongs to
23472+ * so we can associate resources with it.
23473+ */
23474+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
23475+ "root_num", "%d", &root_num);
23476+
23477+ if (err != 1)
23478+ goto out;
23479+
23480+ for (i = 0; i < root_num; i++) {
23481+ len = snprintf(str, sizeof(str), "root-%d", i);
23482+ if (unlikely(len >= (sizeof(str) - 1))) {
23483+ err = -ENOMEM;
23484+ goto out;
23485+ }
23486+
23487+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
23488+ str, "%x:%x", &domain, &bus);
23489+ if (err != 2)
23490+ goto out;
23491+
23492+ /* Is this the one we just published? */
23493+ if (domain == cntrl_entry->domain &&
23494+ bus == cntrl_entry->bus)
23495+ break;
23496+ }
23497+
23498+ if (i == root_num)
23499+ goto out;
23500+
23501+ info.pdev = pdev;
23502+ info.resource_count = 0;
23503+ info.root_num = i;
23504+
23505+ /* Let ACPI do the heavy lifting on decoding resources */
23506+ acpi_walk_resources(cntrl_entry->controller->acpi_handle,
23507+ METHOD_NAME__CRS, write_xenbus_resource,
23508+ &info);
23509+
23510+ /* No resouces. OK. On to the next one */
23511+ if (!info.resource_count)
23512+ continue;
23513+
23514+ /* Store the number of resources we wrote for this root-%d */
23515+ len = snprintf(str, sizeof(str), "root-%d-resources", i);
23516+ if (unlikely(len >= (sizeof(str) - 1))) {
23517+ err = -ENOMEM;
23518+ goto out;
23519+ }
23520+
23521+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
23522+ "%d", info.resource_count);
23523+ if (err)
23524+ goto out;
23525+ }
23526+
23527+ /* Finally, write some magic to synchronize with the guest. */
23528+ len = snprintf(str, sizeof(str), "root-resource-magic");
23529+ if (unlikely(len >= (sizeof(str) - 1))) {
23530+ err = -ENOMEM;
23531+ goto out;
23532+ }
23533+
23534+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
23535+ "%lx", (sizeof(struct acpi_resource) * 2) + 1);
23536+
23537+out:
23538+ spin_unlock(&dev_data->lock);
23539+
23540+ return err;
23541+}
23542+
23543+void pciback_release_devices(struct pciback_device *pdev)
23544+{
23545+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
23546+ struct controller_list_entry *cntrl_entry, *c;
23547+ struct controller_dev_entry *dev_entry, *d;
23548+
23549+ list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
23550+ list_for_each_entry_safe(dev_entry, d,
23551+ &cntrl_entry->dev_list, list) {
23552+ list_del(&dev_entry->list);
23553+ pcistub_put_pci_dev(dev_entry->dev);
23554+ kfree(dev_entry);
23555+ }
23556+ list_del(&cntrl_entry->list);
23557+ kfree(cntrl_entry);
23558+ }
23559+
23560+ kfree(dev_data);
23561+ pdev->pci_dev_data = NULL;
23562+}
23563Index: head-2008-11-25/drivers/xen/pciback/passthrough.c
23564===================================================================
23565--- /dev/null 1970-01-01 00:00:00.000000000 +0000
23566+++ head-2008-11-25/drivers/xen/pciback/passthrough.c 2008-04-02 12:34:02.000000000 +0200
23567@@ -0,0 +1,166 @@
23568+/*
23569+ * PCI Backend - Provides restricted access to the real PCI bus topology
23570+ * to the frontend
23571+ *
23572+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
23573+ */
23574+
23575+#include <linux/list.h>
23576+#include <linux/pci.h>
23577+#include <linux/spinlock.h>
23578+#include "pciback.h"
23579+
23580+struct passthrough_dev_data {
23581+ /* Access to dev_list must be protected by lock */
23582+ struct list_head dev_list;
23583+ spinlock_t lock;
23584+};
23585+
23586+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
23587+ unsigned int domain, unsigned int bus,
23588+ unsigned int devfn)
23589+{
23590+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23591+ struct pci_dev_entry *dev_entry;
23592+ struct pci_dev *dev = NULL;
23593+ unsigned long flags;
23594+
23595+ spin_lock_irqsave(&dev_data->lock, flags);
23596+
23597+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
23598+ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
23599+ && bus == (unsigned int)dev_entry->dev->bus->number
23600+ && devfn == dev_entry->dev->devfn) {
23601+ dev = dev_entry->dev;
23602+ break;
23603+ }
23604+ }
23605+
23606+ spin_unlock_irqrestore(&dev_data->lock, flags);
23607+
23608+ return dev;
23609+}
23610+
23611+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
23612+ int devid, publish_pci_dev_cb publish_cb)
23613+{
23614+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23615+ struct pci_dev_entry *dev_entry;
23616+ unsigned long flags;
23617+ unsigned int domain, bus, devfn;
23618+ int err;
23619+
23620+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
23621+ if (!dev_entry)
23622+ return -ENOMEM;
23623+ dev_entry->dev = dev;
23624+
23625+ spin_lock_irqsave(&dev_data->lock, flags);
23626+ list_add_tail(&dev_entry->list, &dev_data->dev_list);
23627+ spin_unlock_irqrestore(&dev_data->lock, flags);
23628+
23629+ /* Publish this device. */
23630+ domain = (unsigned int)pci_domain_nr(dev->bus);
23631+ bus = (unsigned int)dev->bus->number;
23632+ devfn = dev->devfn;
23633+ err = publish_cb(pdev, domain, bus, devfn, devid);
23634+
23635+ return err;
23636+}
23637+
23638+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
23639+{
23640+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23641+ struct pci_dev_entry *dev_entry, *t;
23642+ struct pci_dev *found_dev = NULL;
23643+ unsigned long flags;
23644+
23645+ spin_lock_irqsave(&dev_data->lock, flags);
23646+
23647+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
23648+ if (dev_entry->dev == dev) {
23649+ list_del(&dev_entry->list);
23650+ found_dev = dev_entry->dev;
23651+ kfree(dev_entry);
23652+ }
23653+ }
23654+
23655+ spin_unlock_irqrestore(&dev_data->lock, flags);
23656+
23657+ if (found_dev)
23658+ pcistub_put_pci_dev(found_dev);
23659+}
23660+
23661+int pciback_init_devices(struct pciback_device *pdev)
23662+{
23663+ struct passthrough_dev_data *dev_data;
23664+
23665+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
23666+ if (!dev_data)
23667+ return -ENOMEM;
23668+
23669+ spin_lock_init(&dev_data->lock);
23670+
23671+ INIT_LIST_HEAD(&dev_data->dev_list);
23672+
23673+ pdev->pci_dev_data = dev_data;
23674+
23675+ return 0;
23676+}
23677+
23678+int pciback_publish_pci_roots(struct pciback_device *pdev,
23679+ publish_pci_root_cb publish_root_cb)
23680+{
23681+ int err = 0;
23682+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23683+ struct pci_dev_entry *dev_entry, *e;
23684+ struct pci_dev *dev;
23685+ int found;
23686+ unsigned int domain, bus;
23687+
23688+ spin_lock(&dev_data->lock);
23689+
23690+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
23691+ /* Only publish this device as a root if none of its
23692+ * parent bridges are exported
23693+ */
23694+ found = 0;
23695+ dev = dev_entry->dev->bus->self;
23696+ for (; !found && dev != NULL; dev = dev->bus->self) {
23697+ list_for_each_entry(e, &dev_data->dev_list, list) {
23698+ if (dev == e->dev) {
23699+ found = 1;
23700+ break;
23701+ }
23702+ }
23703+ }
23704+
23705+ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
23706+ bus = (unsigned int)dev_entry->dev->bus->number;
23707+
23708+ if (!found) {
23709+ err = publish_root_cb(pdev, domain, bus);
23710+ if (err)
23711+ break;
23712+ }
23713+ }
23714+
23715+ spin_unlock(&dev_data->lock);
23716+
23717+ return err;
23718+}
23719+
23720+void pciback_release_devices(struct pciback_device *pdev)
23721+{
23722+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23723+ struct pci_dev_entry *dev_entry, *t;
23724+
23725+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
23726+ list_del(&dev_entry->list);
23727+ pcistub_put_pci_dev(dev_entry->dev);
23728+ kfree(dev_entry);
23729+ }
23730+
23731+ kfree(dev_data);
23732+ pdev->pci_dev_data = NULL;
23733+}
23734Index: head-2008-11-25/drivers/xen/pciback/pci_stub.c
23735===================================================================
23736--- /dev/null 1970-01-01 00:00:00.000000000 +0000
23737+++ head-2008-11-25/drivers/xen/pciback/pci_stub.c 2008-10-29 09:55:56.000000000 +0100
23738@@ -0,0 +1,948 @@
23739+/*
23740+ * PCI Stub Driver - Grabs devices in backend to be exported later
23741+ *
23742+ * Ryan Wilson <hap9@epoch.ncsc.mil>
23743+ * Chris Bookholt <hap10@epoch.ncsc.mil>
23744+ */
23745+#include <linux/module.h>
23746+#include <linux/init.h>
23747+#include <linux/list.h>
23748+#include <linux/spinlock.h>
23749+#include <linux/kref.h>
23750+#include <asm/atomic.h>
23751+#include "pciback.h"
23752+#include "conf_space.h"
23753+#include "conf_space_quirks.h"
23754+
23755+static char *pci_devs_to_hide = NULL;
23756+module_param_named(hide, pci_devs_to_hide, charp, 0444);
23757+
23758+struct pcistub_device_id {
23759+ struct list_head slot_list;
23760+ int domain;
23761+ unsigned char bus;
23762+ unsigned int devfn;
23763+};
23764+static LIST_HEAD(pcistub_device_ids);
23765+static DEFINE_SPINLOCK(device_ids_lock);
23766+
23767+struct pcistub_device {
23768+ struct kref kref;
23769+ struct list_head dev_list;
23770+ spinlock_t lock;
23771+
23772+ struct pci_dev *dev;
23773+ struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */
23774+};
23775+
23776+/* Access to pcistub_devices & seized_devices lists and the initialize_devices
23777+ * flag must be locked with pcistub_devices_lock
23778+ */
23779+static DEFINE_SPINLOCK(pcistub_devices_lock);
23780+static LIST_HEAD(pcistub_devices);
23781+
23782+/* wait for device_initcall before initializing our devices
23783+ * (see pcistub_init_devices_late)
23784+ */
23785+static int initialize_devices = 0;
23786+static LIST_HEAD(seized_devices);
23787+
23788+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
23789+{
23790+ struct pcistub_device *psdev;
23791+
23792+ dev_dbg(&dev->dev, "pcistub_device_alloc\n");
23793+
23794+ psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
23795+ if (!psdev)
23796+ return NULL;
23797+
23798+ psdev->dev = pci_dev_get(dev);
23799+ if (!psdev->dev) {
23800+ kfree(psdev);
23801+ return NULL;
23802+ }
23803+
23804+ kref_init(&psdev->kref);
23805+ spin_lock_init(&psdev->lock);
23806+
23807+ return psdev;
23808+}
23809+
23810+/* Don't call this directly as it's called by pcistub_device_put */
23811+static void pcistub_device_release(struct kref *kref)
23812+{
23813+ struct pcistub_device *psdev;
23814+
23815+ psdev = container_of(kref, struct pcistub_device, kref);
23816+
23817+ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
23818+
23819+ /* Clean-up the device */
23820+ pciback_reset_device(psdev->dev);
23821+ pciback_config_free_dyn_fields(psdev->dev);
23822+ pciback_config_free_dev(psdev->dev);
23823+ kfree(pci_get_drvdata(psdev->dev));
23824+ pci_set_drvdata(psdev->dev, NULL);
23825+
23826+ pci_dev_put(psdev->dev);
23827+
23828+ kfree(psdev);
23829+}
23830+
23831+static inline void pcistub_device_get(struct pcistub_device *psdev)
23832+{
23833+ kref_get(&psdev->kref);
23834+}
23835+
23836+static inline void pcistub_device_put(struct pcistub_device *psdev)
23837+{
23838+ kref_put(&psdev->kref, pcistub_device_release);
23839+}
23840+
23841+static struct pcistub_device *pcistub_device_find(int domain, int bus,
23842+ int slot, int func)
23843+{
23844+ struct pcistub_device *psdev = NULL;
23845+ unsigned long flags;
23846+
23847+ spin_lock_irqsave(&pcistub_devices_lock, flags);
23848+
23849+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23850+ if (psdev->dev != NULL
23851+ && domain == pci_domain_nr(psdev->dev->bus)
23852+ && bus == psdev->dev->bus->number
23853+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
23854+ pcistub_device_get(psdev);
23855+ goto out;
23856+ }
23857+ }
23858+
23859+ /* didn't find it */
23860+ psdev = NULL;
23861+
23862+ out:
23863+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23864+ return psdev;
23865+}
23866+
23867+static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
23868+ struct pcistub_device *psdev)
23869+{
23870+ struct pci_dev *pci_dev = NULL;
23871+ unsigned long flags;
23872+
23873+ pcistub_device_get(psdev);
23874+
23875+ spin_lock_irqsave(&psdev->lock, flags);
23876+ if (!psdev->pdev) {
23877+ psdev->pdev = pdev;
23878+ pci_dev = psdev->dev;
23879+ }
23880+ spin_unlock_irqrestore(&psdev->lock, flags);
23881+
23882+ if (!pci_dev)
23883+ pcistub_device_put(psdev);
23884+
23885+ return pci_dev;
23886+}
23887+
23888+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
23889+ int domain, int bus,
23890+ int slot, int func)
23891+{
23892+ struct pcistub_device *psdev;
23893+ struct pci_dev *found_dev = NULL;
23894+ unsigned long flags;
23895+
23896+ spin_lock_irqsave(&pcistub_devices_lock, flags);
23897+
23898+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23899+ if (psdev->dev != NULL
23900+ && domain == pci_domain_nr(psdev->dev->bus)
23901+ && bus == psdev->dev->bus->number
23902+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
23903+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
23904+ break;
23905+ }
23906+ }
23907+
23908+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23909+ return found_dev;
23910+}
23911+
23912+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
23913+ struct pci_dev *dev)
23914+{
23915+ struct pcistub_device *psdev;
23916+ struct pci_dev *found_dev = NULL;
23917+ unsigned long flags;
23918+
23919+ spin_lock_irqsave(&pcistub_devices_lock, flags);
23920+
23921+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23922+ if (psdev->dev == dev) {
23923+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
23924+ break;
23925+ }
23926+ }
23927+
23928+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23929+ return found_dev;
23930+}
23931+
23932+void pcistub_put_pci_dev(struct pci_dev *dev)
23933+{
23934+ struct pcistub_device *psdev, *found_psdev = NULL;
23935+ unsigned long flags;
23936+
23937+ spin_lock_irqsave(&pcistub_devices_lock, flags);
23938+
23939+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23940+ if (psdev->dev == dev) {
23941+ found_psdev = psdev;
23942+ break;
23943+ }
23944+ }
23945+
23946+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23947+
23948+ /* Cleanup our device
23949+ * (so it's ready for the next domain)
23950+ */
23951+ pciback_reset_device(found_psdev->dev);
23952+ pciback_config_free_dyn_fields(found_psdev->dev);
23953+ pciback_config_reset_dev(found_psdev->dev);
23954+
23955+ spin_lock_irqsave(&found_psdev->lock, flags);
23956+ found_psdev->pdev = NULL;
23957+ spin_unlock_irqrestore(&found_psdev->lock, flags);
23958+
23959+ pcistub_device_put(found_psdev);
23960+}
23961+
23962+static int __devinit pcistub_match_one(struct pci_dev *dev,
23963+ struct pcistub_device_id *pdev_id)
23964+{
23965+ /* Match the specified device by domain, bus, slot, func and also if
23966+ * any of the device's parent bridges match.
23967+ */
23968+ for (; dev != NULL; dev = dev->bus->self) {
23969+ if (pci_domain_nr(dev->bus) == pdev_id->domain
23970+ && dev->bus->number == pdev_id->bus
23971+ && dev->devfn == pdev_id->devfn)
23972+ return 1;
23973+
23974+ /* Sometimes topmost bridge links to itself. */
23975+ if (dev == dev->bus->self)
23976+ break;
23977+ }
23978+
23979+ return 0;
23980+}
23981+
23982+static int __devinit pcistub_match(struct pci_dev *dev)
23983+{
23984+ struct pcistub_device_id *pdev_id;
23985+ unsigned long flags;
23986+ int found = 0;
23987+
23988+ spin_lock_irqsave(&device_ids_lock, flags);
23989+ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
23990+ if (pcistub_match_one(dev, pdev_id)) {
23991+ found = 1;
23992+ break;
23993+ }
23994+ }
23995+ spin_unlock_irqrestore(&device_ids_lock, flags);
23996+
23997+ return found;
23998+}
23999+
24000+static int __devinit pcistub_init_device(struct pci_dev *dev)
24001+{
24002+ struct pciback_dev_data *dev_data;
24003+ int err = 0;
24004+
24005+ dev_dbg(&dev->dev, "initializing...\n");
24006+
24007+ /* The PCI backend is not intended to be a module (or to work with
24008+ * removable PCI devices (yet). If it were, pciback_config_free()
24009+ * would need to be called somewhere to free the memory allocated
24010+ * here and then to call kfree(pci_get_drvdata(psdev->dev)).
24011+ */
24012+ dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
24013+ if (!dev_data) {
24014+ err = -ENOMEM;
24015+ goto out;
24016+ }
24017+ pci_set_drvdata(dev, dev_data);
24018+
24019+ dev_dbg(&dev->dev, "initializing config\n");
24020+ err = pciback_config_init_dev(dev);
24021+ if (err)
24022+ goto out;
24023+
24024+ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
24025+ * must do this here because pcibios_enable_device may specify
24026+ * the pci device's true irq (and possibly its other resources)
24027+ * if they differ from what's in the configuration space.
24028+ * This makes the assumption that the device's resources won't
24029+ * change after this point (otherwise this code may break!)
24030+ */
24031+ dev_dbg(&dev->dev, "enabling device\n");
24032+ err = pci_enable_device(dev);
24033+ if (err)
24034+ goto config_release;
24035+
24036+ /* Now disable the device (this also ensures some private device
24037+ * data is setup before we export)
24038+ */
24039+ dev_dbg(&dev->dev, "reset device\n");
24040+ pciback_reset_device(dev);
24041+
24042+ return 0;
24043+
24044+ config_release:
24045+ pciback_config_free_dev(dev);
24046+
24047+ out:
24048+ pci_set_drvdata(dev, NULL);
24049+ kfree(dev_data);
24050+ return err;
24051+}
24052+
24053+/*
24054+ * Because some initialization still happens on
24055+ * devices during fs_initcall, we need to defer
24056+ * full initialization of our devices until
24057+ * device_initcall.
24058+ */
24059+static int __init pcistub_init_devices_late(void)
24060+{
24061+ struct pcistub_device *psdev;
24062+ unsigned long flags;
24063+ int err = 0;
24064+
24065+ pr_debug("pciback: pcistub_init_devices_late\n");
24066+
24067+ spin_lock_irqsave(&pcistub_devices_lock, flags);
24068+
24069+ while (!list_empty(&seized_devices)) {
24070+ psdev = container_of(seized_devices.next,
24071+ struct pcistub_device, dev_list);
24072+ list_del(&psdev->dev_list);
24073+
24074+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24075+
24076+ err = pcistub_init_device(psdev->dev);
24077+ if (err) {
24078+ dev_err(&psdev->dev->dev,
24079+ "error %d initializing device\n", err);
24080+ kfree(psdev);
24081+ psdev = NULL;
24082+ }
24083+
24084+ spin_lock_irqsave(&pcistub_devices_lock, flags);
24085+
24086+ if (psdev)
24087+ list_add_tail(&psdev->dev_list, &pcistub_devices);
24088+ }
24089+
24090+ initialize_devices = 1;
24091+
24092+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24093+
24094+ return 0;
24095+}
24096+
24097+static int __devinit pcistub_seize(struct pci_dev *dev)
24098+{
24099+ struct pcistub_device *psdev;
24100+ unsigned long flags;
24101+ int err = 0;
24102+
24103+ psdev = pcistub_device_alloc(dev);
24104+ if (!psdev)
24105+ return -ENOMEM;
24106+
24107+ spin_lock_irqsave(&pcistub_devices_lock, flags);
24108+
24109+ if (initialize_devices) {
24110+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24111+
24112+ /* don't want irqs disabled when calling pcistub_init_device */
24113+ err = pcistub_init_device(psdev->dev);
24114+
24115+ spin_lock_irqsave(&pcistub_devices_lock, flags);
24116+
24117+ if (!err)
24118+ list_add(&psdev->dev_list, &pcistub_devices);
24119+ } else {
24120+ dev_dbg(&dev->dev, "deferring initialization\n");
24121+ list_add(&psdev->dev_list, &seized_devices);
24122+ }
24123+
24124+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24125+
24126+ if (err)
24127+ pcistub_device_put(psdev);
24128+
24129+ return err;
24130+}
24131+
24132+static int __devinit pcistub_probe(struct pci_dev *dev,
24133+ const struct pci_device_id *id)
24134+{
24135+ int err = 0;
24136+
24137+ dev_dbg(&dev->dev, "probing...\n");
24138+
24139+ if (pcistub_match(dev)) {
24140+
24141+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
24142+ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
24143+ dev_err(&dev->dev, "can't export pci devices that "
24144+ "don't have a normal (0) or bridge (1) "
24145+ "header type!\n");
24146+ err = -ENODEV;
24147+ goto out;
24148+ }
24149+
24150+ dev_info(&dev->dev, "seizing device\n");
24151+ err = pcistub_seize(dev);
24152+ } else
24153+ /* Didn't find the device */
24154+ err = -ENODEV;
24155+
24156+ out:
24157+ return err;
24158+}
24159+
24160+static void pcistub_remove(struct pci_dev *dev)
24161+{
24162+ struct pcistub_device *psdev, *found_psdev = NULL;
24163+ unsigned long flags;
24164+
24165+ dev_dbg(&dev->dev, "removing\n");
24166+
24167+ spin_lock_irqsave(&pcistub_devices_lock, flags);
24168+
24169+ pciback_config_quirk_release(dev);
24170+
24171+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
24172+ if (psdev->dev == dev) {
24173+ found_psdev = psdev;
24174+ break;
24175+ }
24176+ }
24177+
24178+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24179+
24180+ if (found_psdev) {
24181+ dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
24182+ found_psdev->pdev);
24183+
24184+ if (found_psdev->pdev) {
24185+ printk(KERN_WARNING "pciback: ****** removing device "
24186+ "%s while still in-use! ******\n",
24187+ pci_name(found_psdev->dev));
24188+ printk(KERN_WARNING "pciback: ****** driver domain may "
24189+ "still access this device's i/o resources!\n");
24190+ printk(KERN_WARNING "pciback: ****** shutdown driver "
24191+ "domain before binding device\n");
24192+ printk(KERN_WARNING "pciback: ****** to other drivers "
24193+ "or domains\n");
24194+
24195+ pciback_release_pci_dev(found_psdev->pdev,
24196+ found_psdev->dev);
24197+ }
24198+
24199+ spin_lock_irqsave(&pcistub_devices_lock, flags);
24200+ list_del(&found_psdev->dev_list);
24201+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24202+
24203+ /* the final put for releasing from the list */
24204+ pcistub_device_put(found_psdev);
24205+ }
24206+}
24207+
24208+static const struct pci_device_id pcistub_ids[] = {
24209+ {
24210+ .vendor = PCI_ANY_ID,
24211+ .device = PCI_ANY_ID,
24212+ .subvendor = PCI_ANY_ID,
24213+ .subdevice = PCI_ANY_ID,
24214+ },
24215+ {0,},
24216+};
24217+
24218+/*
24219+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
24220+ * for a normal device. I don't want it to be loaded automatically.
24221+ */
24222+
24223+static struct pci_driver pciback_pci_driver = {
24224+ .name = "pciback",
24225+ .id_table = pcistub_ids,
24226+ .probe = pcistub_probe,
24227+ .remove = pcistub_remove,
24228+};
24229+
24230+static inline int str_to_slot(const char *buf, int *domain, int *bus,
24231+ int *slot, int *func)
24232+{
24233+ int err;
24234+
24235+ err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
24236+ if (err == 4)
24237+ return 0;
24238+ else if (err < 0)
24239+ return -EINVAL;
24240+
24241+ /* try again without domain */
24242+ *domain = 0;
24243+ err = sscanf(buf, " %x:%x.%x", bus, slot, func);
24244+ if (err == 3)
24245+ return 0;
24246+
24247+ return -EINVAL;
24248+}
24249+
24250+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
24251+ *slot, int *func, int *reg, int *size, int *mask)
24252+{
24253+ int err;
24254+
24255+ err =
24256+ sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
24257+ func, reg, size, mask);
24258+ if (err == 7)
24259+ return 0;
24260+ return -EINVAL;
24261+}
24262+
24263+static int pcistub_device_id_add(int domain, int bus, int slot, int func)
24264+{
24265+ struct pcistub_device_id *pci_dev_id;
24266+ unsigned long flags;
24267+
24268+ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
24269+ if (!pci_dev_id)
24270+ return -ENOMEM;
24271+
24272+ pci_dev_id->domain = domain;
24273+ pci_dev_id->bus = bus;
24274+ pci_dev_id->devfn = PCI_DEVFN(slot, func);
24275+
24276+ pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
24277+ domain, bus, slot, func);
24278+
24279+ spin_lock_irqsave(&device_ids_lock, flags);
24280+ list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
24281+ spin_unlock_irqrestore(&device_ids_lock, flags);
24282+
24283+ return 0;
24284+}
24285+
24286+static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
24287+{
24288+ struct pcistub_device_id *pci_dev_id, *t;
24289+ int devfn = PCI_DEVFN(slot, func);
24290+ int err = -ENOENT;
24291+ unsigned long flags;
24292+
24293+ spin_lock_irqsave(&device_ids_lock, flags);
24294+ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
24295+
24296+ if (pci_dev_id->domain == domain
24297+ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
24298+ /* Don't break; here because it's possible the same
24299+ * slot could be in the list more than once
24300+ */
24301+ list_del(&pci_dev_id->slot_list);
24302+ kfree(pci_dev_id);
24303+
24304+ err = 0;
24305+
24306+ pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
24307+ "seize list\n", domain, bus, slot, func);
24308+ }
24309+ }
24310+ spin_unlock_irqrestore(&device_ids_lock, flags);
24311+
24312+ return err;
24313+}
24314+
24315+static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
24316+ int size, int mask)
24317+{
24318+ int err = 0;
24319+ struct pcistub_device *psdev;
24320+ struct pci_dev *dev;
24321+ struct config_field *field;
24322+
24323+ psdev = pcistub_device_find(domain, bus, slot, func);
24324+ if (!psdev || !psdev->dev) {
24325+ err = -ENODEV;
24326+ goto out;
24327+ }
24328+ dev = psdev->dev;
24329+
24330+ field = kzalloc(sizeof(*field), GFP_ATOMIC);
24331+ if (!field) {
24332+ err = -ENOMEM;
24333+ goto out;
24334+ }
24335+
24336+ field->offset = reg;
24337+ field->size = size;
24338+ field->mask = mask;
24339+ field->init = NULL;
24340+ field->reset = NULL;
24341+ field->release = NULL;
24342+ field->clean = pciback_config_field_free;
24343+
24344+ err = pciback_config_quirks_add_field(dev, field);
24345+ if (err)
24346+ kfree(field);
24347+ out:
24348+ return err;
24349+}
24350+
24351+static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
24352+ size_t count)
24353+{
24354+ int domain, bus, slot, func;
24355+ int err;
24356+
24357+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
24358+ if (err)
24359+ goto out;
24360+
24361+ err = pcistub_device_id_add(domain, bus, slot, func);
24362+
24363+ out:
24364+ if (!err)
24365+ err = count;
24366+ return err;
24367+}
24368+
24369+DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
24370+
24371+static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
24372+ size_t count)
24373+{
24374+ int domain, bus, slot, func;
24375+ int err;
24376+
24377+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
24378+ if (err)
24379+ goto out;
24380+
24381+ err = pcistub_device_id_remove(domain, bus, slot, func);
24382+
24383+ out:
24384+ if (!err)
24385+ err = count;
24386+ return err;
24387+}
24388+
24389+DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
24390+
24391+static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
24392+{
24393+ struct pcistub_device_id *pci_dev_id;
24394+ size_t count = 0;
24395+ unsigned long flags;
24396+
24397+ spin_lock_irqsave(&device_ids_lock, flags);
24398+ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
24399+ if (count >= PAGE_SIZE)
24400+ break;
24401+
24402+ count += scnprintf(buf + count, PAGE_SIZE - count,
24403+ "%04x:%02x:%02x.%01x\n",
24404+ pci_dev_id->domain, pci_dev_id->bus,
24405+ PCI_SLOT(pci_dev_id->devfn),
24406+ PCI_FUNC(pci_dev_id->devfn));
24407+ }
24408+ spin_unlock_irqrestore(&device_ids_lock, flags);
24409+
24410+ return count;
24411+}
24412+
24413+DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
24414+
24415+static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
24416+ size_t count)
24417+{
24418+ int domain, bus, slot, func, reg, size, mask;
24419+ int err;
24420+
24421+ err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
24422+ &mask);
24423+ if (err)
24424+ goto out;
24425+
24426+ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
24427+
24428+ out:
24429+ if (!err)
24430+ err = count;
24431+ return err;
24432+}
24433+
24434+static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
24435+{
24436+ int count = 0;
24437+ unsigned long flags;
24438+ extern struct list_head pciback_quirks;
24439+ struct pciback_config_quirk *quirk;
24440+ struct pciback_dev_data *dev_data;
24441+ const struct config_field *field;
24442+ const struct config_field_entry *cfg_entry;
24443+
24444+ spin_lock_irqsave(&device_ids_lock, flags);
24445+ list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
24446+ if (count >= PAGE_SIZE)
24447+ goto out;
24448+
24449+ count += scnprintf(buf + count, PAGE_SIZE - count,
24450+ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
24451+ quirk->pdev->bus->number,
24452+ PCI_SLOT(quirk->pdev->devfn),
24453+ PCI_FUNC(quirk->pdev->devfn),
24454+ quirk->devid.vendor, quirk->devid.device,
24455+ quirk->devid.subvendor,
24456+ quirk->devid.subdevice);
24457+
24458+ dev_data = pci_get_drvdata(quirk->pdev);
24459+
24460+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
24461+ field = cfg_entry->field;
24462+ if (count >= PAGE_SIZE)
24463+ goto out;
24464+
24465+ count += scnprintf(buf + count, PAGE_SIZE - count,
24466+ "\t\t%08x:%01x:%08x\n",
24467+ cfg_entry->base_offset + field->offset,
24468+ field->size, field->mask);
24469+ }
24470+ }
24471+
24472+ out:
24473+ spin_unlock_irqrestore(&device_ids_lock, flags);
24474+
24475+ return count;
24476+}
24477+
24478+DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
24479+
24480+static ssize_t permissive_add(struct device_driver *drv, const char *buf,
24481+ size_t count)
24482+{
24483+ int domain, bus, slot, func;
24484+ int err;
24485+ struct pcistub_device *psdev;
24486+ struct pciback_dev_data *dev_data;
24487+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
24488+ if (err)
24489+ goto out;
24490+ psdev = pcistub_device_find(domain, bus, slot, func);
24491+ if (!psdev) {
24492+ err = -ENODEV;
24493+ goto out;
24494+ }
24495+ if (!psdev->dev) {
24496+ err = -ENODEV;
24497+ goto release;
24498+ }
24499+ dev_data = pci_get_drvdata(psdev->dev);
24500+ /* the driver data for a device should never be null at this point */
24501+ if (!dev_data) {
24502+ err = -ENXIO;
24503+ goto release;
24504+ }
24505+ if (!dev_data->permissive) {
24506+ dev_data->permissive = 1;
24507+ /* Let user know that what they're doing could be unsafe */
24508+ dev_warn(&psdev->dev->dev,
24509+ "enabling permissive mode configuration space accesses!\n");
24510+ dev_warn(&psdev->dev->dev,
24511+ "permissive mode is potentially unsafe!\n");
24512+ }
24513+ release:
24514+ pcistub_device_put(psdev);
24515+ out:
24516+ if (!err)
24517+ err = count;
24518+ return err;
24519+}
24520+
24521+static ssize_t permissive_show(struct device_driver *drv, char *buf)
24522+{
24523+ struct pcistub_device *psdev;
24524+ struct pciback_dev_data *dev_data;
24525+ size_t count = 0;
24526+ unsigned long flags;
24527+ spin_lock_irqsave(&pcistub_devices_lock, flags);
24528+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
24529+ if (count >= PAGE_SIZE)
24530+ break;
24531+ if (!psdev->dev)
24532+ continue;
24533+ dev_data = pci_get_drvdata(psdev->dev);
24534+ if (!dev_data || !dev_data->permissive)
24535+ continue;
24536+ count +=
24537+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
24538+ pci_name(psdev->dev));
24539+ }
24540+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24541+ return count;
24542+}
24543+
24544+DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
24545+
24546+#ifdef CONFIG_PCI_MSI
24547+
24548+int pciback_get_owner(struct pci_dev *dev)
24549+{
24550+ struct pcistub_device *psdev;
24551+
24552+ psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
24553+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
24554+
24555+ if (!psdev || !psdev->pdev)
24556+ return -1;
24557+
24558+ return psdev->pdev->xdev->otherend_id;
24559+}
24560+#endif
24561+
24562+static void pcistub_exit(void)
24563+{
24564+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
24565+ driver_remove_file(&pciback_pci_driver.driver,
24566+ &driver_attr_remove_slot);
24567+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
24568+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
24569+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
24570+
24571+ pci_unregister_driver(&pciback_pci_driver);
24572+ WARN_ON(unregister_msi_get_owner(pciback_get_owner));
24573+}
24574+
24575+static int __init pcistub_init(void)
24576+{
24577+ int pos = 0;
24578+ int err = 0;
24579+ int domain, bus, slot, func;
24580+ int parsed;
24581+
24582+ if (pci_devs_to_hide && *pci_devs_to_hide) {
24583+ do {
24584+ parsed = 0;
24585+
24586+ err = sscanf(pci_devs_to_hide + pos,
24587+ " (%x:%x:%x.%x) %n",
24588+ &domain, &bus, &slot, &func, &parsed);
24589+ if (err != 4) {
24590+ domain = 0;
24591+ err = sscanf(pci_devs_to_hide + pos,
24592+ " (%x:%x.%x) %n",
24593+ &bus, &slot, &func, &parsed);
24594+ if (err != 3)
24595+ goto parse_error;
24596+ }
24597+
24598+ err = pcistub_device_id_add(domain, bus, slot, func);
24599+ if (err)
24600+ goto out;
24601+
24602+ /* if parsed<=0, we've reached the end of the string */
24603+ pos += parsed;
24604+ } while (parsed > 0 && pci_devs_to_hide[pos]);
24605+ }
24606+
24607+ /* If we're the first PCI Device Driver to register, we're the
24608+ * first one to get offered PCI devices as they become
24609+ * available (and thus we can be the first to grab them)
24610+ */
24611+ err = pci_register_driver(&pciback_pci_driver);
24612+ if (err < 0)
24613+ goto out;
24614+
24615+ err = driver_create_file(&pciback_pci_driver.driver,
24616+ &driver_attr_new_slot);
24617+ if (!err)
24618+ err = driver_create_file(&pciback_pci_driver.driver,
24619+ &driver_attr_remove_slot);
24620+ if (!err)
24621+ err = driver_create_file(&pciback_pci_driver.driver,
24622+ &driver_attr_slots);
24623+ if (!err)
24624+ err = driver_create_file(&pciback_pci_driver.driver,
24625+ &driver_attr_quirks);
24626+ if (!err)
24627+ err = driver_create_file(&pciback_pci_driver.driver,
24628+ &driver_attr_permissive);
24629+
24630+ if (!err)
24631+ err = register_msi_get_owner(pciback_get_owner);
24632+ if (err)
24633+ pcistub_exit();
24634+
24635+ out:
24636+ return err;
24637+
24638+ parse_error:
24639+ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
24640+ pci_devs_to_hide + pos);
24641+ return -EINVAL;
24642+}
24643+
24644+#ifndef MODULE
24645+/*
24646+ * fs_initcall happens before device_initcall
24647+ * so pciback *should* get called first (b/c we
24648+ * want to suck up any device before other drivers
24649+ * get a chance by being the first pci device
24650+ * driver to register)
24651+ */
24652+fs_initcall(pcistub_init);
24653+#endif
24654+
24655+static int __init pciback_init(void)
24656+{
24657+ int err;
24658+
24659+ err = pciback_config_init();
24660+ if (err)
24661+ return err;
24662+
24663+#ifdef MODULE
24664+ err = pcistub_init();
24665+ if (err < 0)
24666+ return err;
24667+#endif
24668+
24669+ pcistub_init_devices_late();
24670+ err = pciback_xenbus_register();
24671+ if (err)
24672+ pcistub_exit();
24673+
24674+ return err;
24675+}
24676+
24677+static void __exit pciback_cleanup(void)
24678+{
24679+ pciback_xenbus_unregister();
24680+ pcistub_exit();
24681+}
24682+
24683+module_init(pciback_init);
24684+module_exit(pciback_cleanup);
24685+
24686+MODULE_LICENSE("Dual BSD/GPL");
24687Index: head-2008-11-25/drivers/xen/pciback/pciback.h
24688===================================================================
24689--- /dev/null 1970-01-01 00:00:00.000000000 +0000
24690+++ head-2008-11-25/drivers/xen/pciback/pciback.h 2008-07-21 11:00:33.000000000 +0200
24691@@ -0,0 +1,111 @@
24692+/*
24693+ * PCI Backend Common Data Structures & Function Declarations
24694+ *
24695+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
24696+ */
24697+#ifndef __XEN_PCIBACK_H__
24698+#define __XEN_PCIBACK_H__
24699+
24700+#include <linux/pci.h>
24701+#include <linux/interrupt.h>
24702+#include <xen/xenbus.h>
24703+#include <linux/list.h>
24704+#include <linux/spinlock.h>
24705+#include <linux/workqueue.h>
24706+#include <asm/atomic.h>
24707+#include <xen/interface/io/pciif.h>
24708+
24709+struct pci_dev_entry {
24710+ struct list_head list;
24711+ struct pci_dev *dev;
24712+};
24713+
24714+#define _PDEVF_op_active (0)
24715+#define PDEVF_op_active (1<<(_PDEVF_op_active))
24716+
24717+struct pciback_device {
24718+ void *pci_dev_data;
24719+ spinlock_t dev_lock;
24720+
24721+ struct xenbus_device *xdev;
24722+
24723+ struct xenbus_watch be_watch;
24724+ u8 be_watching;
24725+
24726+ int evtchn_irq;
24727+
24728+ struct vm_struct *sh_area;
24729+ struct xen_pci_sharedinfo *sh_info;
24730+
24731+ unsigned long flags;
24732+
24733+ struct work_struct op_work;
24734+};
24735+
24736+struct pciback_dev_data {
24737+ struct list_head config_fields;
24738+ int permissive;
24739+ int warned_on_write;
24740+};
24741+
24742+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
24743+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
24744+ int domain, int bus,
24745+ int slot, int func);
24746+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
24747+ struct pci_dev *dev);
24748+void pcistub_put_pci_dev(struct pci_dev *dev);
24749+
24750+/* Ensure a device is turned off or reset */
24751+void pciback_reset_device(struct pci_dev *pdev);
24752+
24753+/* Access a virtual configuration space for a PCI device */
24754+int pciback_config_init(void);
24755+int pciback_config_init_dev(struct pci_dev *dev);
24756+void pciback_config_free_dyn_fields(struct pci_dev *dev);
24757+void pciback_config_reset_dev(struct pci_dev *dev);
24758+void pciback_config_free_dev(struct pci_dev *dev);
24759+int pciback_config_read(struct pci_dev *dev, int offset, int size,
24760+ u32 * ret_val);
24761+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
24762+
24763+/* Handle requests for specific devices from the frontend */
24764+typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
24765+ unsigned int domain, unsigned int bus,
24766+ unsigned int devfn, unsigned int devid);
24767+typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
24768+ unsigned int domain, unsigned int bus);
24769+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
24770+ int devid, publish_pci_dev_cb publish_cb);
24771+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
24772+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
24773+ unsigned int domain, unsigned int bus,
24774+ unsigned int devfn);
24775+int pciback_init_devices(struct pciback_device *pdev);
24776+int pciback_publish_pci_roots(struct pciback_device *pdev,
24777+ publish_pci_root_cb cb);
24778+void pciback_release_devices(struct pciback_device *pdev);
24779+
24780+/* Handles events from front-end */
24781+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
24782+void pciback_do_op(void *data);
24783+
24784+int pciback_xenbus_register(void);
24785+void pciback_xenbus_unregister(void);
24786+
24787+#ifdef CONFIG_PCI_MSI
24788+int pciback_enable_msi(struct pciback_device *pdev,
24789+ struct pci_dev *dev, struct xen_pci_op *op);
24790+
24791+int pciback_disable_msi(struct pciback_device *pdev,
24792+ struct pci_dev *dev, struct xen_pci_op *op);
24793+
24794+
24795+int pciback_enable_msix(struct pciback_device *pdev,
24796+ struct pci_dev *dev, struct xen_pci_op *op);
24797+
24798+int pciback_disable_msix(struct pciback_device *pdev,
24799+ struct pci_dev *dev, struct xen_pci_op *op);
24800+#endif
24801+extern int verbose_request;
24802+#endif
24803Index: head-2008-11-25/drivers/xen/pciback/pciback_ops.c
24804===================================================================
24805--- /dev/null 1970-01-01 00:00:00.000000000 +0000
24806+++ head-2008-11-25/drivers/xen/pciback/pciback_ops.c 2008-07-21 11:00:33.000000000 +0200
24807@@ -0,0 +1,117 @@
24808+/*
24809+ * PCI Backend Operations - respond to PCI requests from Frontend
24810+ *
24811+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
24812+ */
24813+#include <linux/module.h>
24814+#include <asm/bitops.h>
24815+#include <xen/evtchn.h>
24816+#include "pciback.h"
24817+
24818+int verbose_request = 0;
24819+module_param(verbose_request, int, 0644);
24820+
24821+/* Ensure a device is "turned off" and ready to be exported.
24822+ * (Also see pciback_config_reset to ensure virtual configuration space is
24823+ * ready to be re-exported)
24824+ */
24825+void pciback_reset_device(struct pci_dev *dev)
24826+{
24827+ u16 cmd;
24828+
24829+ /* Disable devices (but not bridges) */
24830+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
24831+ pci_disable_device(dev);
24832+
24833+ pci_write_config_word(dev, PCI_COMMAND, 0);
24834+
24835+ dev->is_enabled = 0;
24836+ dev->is_busmaster = 0;
24837+ } else {
24838+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
24839+ if (cmd & (PCI_COMMAND_INVALIDATE)) {
24840+ cmd &= ~(PCI_COMMAND_INVALIDATE);
24841+ pci_write_config_word(dev, PCI_COMMAND, cmd);
24842+
24843+ dev->is_busmaster = 0;
24844+ }
24845+ }
24846+}
24847+
24848+static inline void test_and_schedule_op(struct pciback_device *pdev)
24849+{
24850+ /* Check that frontend is requesting an operation and that we are not
24851+ * already processing a request */
24852+ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
24853+ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
24854+ schedule_work(&pdev->op_work);
24855+}
24856+
24857+/* Performing the configuration space reads/writes must not be done in atomic
24858+ * context because some of the pci_* functions can sleep (mostly due to ACPI
24859+ * use of semaphores). This function is intended to be called from a work
24860+ * queue in process context taking a struct pciback_device as a parameter */
24861+void pciback_do_op(void *data)
24862+{
24863+ struct pciback_device *pdev = data;
24864+ struct pci_dev *dev;
24865+ struct xen_pci_op *op = &pdev->sh_info->op;
24866+
24867+ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
24868+
24869+ if (dev == NULL)
24870+ op->err = XEN_PCI_ERR_dev_not_found;
24871+ else
24872+ {
24873+ switch (op->cmd)
24874+ {
24875+ case XEN_PCI_OP_conf_read:
24876+ op->err = pciback_config_read(dev,
24877+ op->offset, op->size, &op->value);
24878+ break;
24879+ case XEN_PCI_OP_conf_write:
24880+ op->err = pciback_config_write(dev,
24881+ op->offset, op->size, op->value);
24882+ break;
24883+#ifdef CONFIG_PCI_MSI
24884+ case XEN_PCI_OP_enable_msi:
24885+ op->err = pciback_enable_msi(pdev, dev, op);
24886+ break;
24887+ case XEN_PCI_OP_disable_msi:
24888+ op->err = pciback_disable_msi(pdev, dev, op);
24889+ break;
24890+ case XEN_PCI_OP_enable_msix:
24891+ op->err = pciback_enable_msix(pdev, dev, op);
24892+ break;
24893+ case XEN_PCI_OP_disable_msix:
24894+ op->err = pciback_disable_msix(pdev, dev, op);
24895+ break;
24896+#endif
24897+ default:
24898+ op->err = XEN_PCI_ERR_not_implemented;
24899+ break;
24900+ }
24901+ }
24902+ /* Tell the driver domain that we're done. */
24903+ wmb();
24904+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
24905+ notify_remote_via_irq(pdev->evtchn_irq);
24906+
24907+ /* Mark that we're done. */
24908+ smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
24909+ clear_bit(_PDEVF_op_active, &pdev->flags);
24910+ smp_mb__after_clear_bit(); /* /before/ final check for work */
24911+
24912+ /* Check to see if the driver domain tried to start another request in
24913+ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. */
24914+ test_and_schedule_op(pdev);
24915+}
24916+
24917+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
24918+{
24919+ struct pciback_device *pdev = dev_id;
24920+
24921+ test_and_schedule_op(pdev);
24922+
24923+ return IRQ_HANDLED;
24924+}
24925Index: head-2008-11-25/drivers/xen/pciback/slot.c
24926===================================================================
24927--- /dev/null 1970-01-01 00:00:00.000000000 +0000
24928+++ head-2008-11-25/drivers/xen/pciback/slot.c 2008-02-26 10:54:11.000000000 +0100
24929@@ -0,0 +1,157 @@
24930+/*
24931+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
24932+ * to the frontend
24933+ *
24934+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c)
24935+ * Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
24936+ */
24937+
24938+#include <linux/list.h>
24939+#include <linux/slab.h>
24940+#include <linux/pci.h>
24941+#include <linux/spinlock.h>
24942+#include "pciback.h"
24943+
24944+/* There are at most 32 slots in a pci bus. */
24945+#define PCI_SLOT_MAX 32
24946+
24947+#define PCI_BUS_NBR 2
24948+
24949+struct slot_dev_data {
24950+ /* Access to dev_list must be protected by lock */
24951+ struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
24952+ spinlock_t lock;
24953+};
24954+
24955+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
24956+ unsigned int domain, unsigned int bus,
24957+ unsigned int devfn)
24958+{
24959+ struct pci_dev *dev = NULL;
24960+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
24961+ unsigned long flags;
24962+
24963+ if (domain != 0 || PCI_FUNC(devfn) != 0)
24964+ return NULL;
24965+
24966+ if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
24967+ return NULL;
24968+
24969+ spin_lock_irqsave(&slot_dev->lock, flags);
24970+ dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
24971+ spin_unlock_irqrestore(&slot_dev->lock, flags);
24972+
24973+ return dev;
24974+}
24975+
24976+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
24977+ int devid, publish_pci_dev_cb publish_cb)
24978+{
24979+ int err = 0, slot, bus;
24980+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
24981+ unsigned long flags;
24982+
24983+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
24984+ err = -EFAULT;
24985+ xenbus_dev_fatal(pdev->xdev, err,
24986+ "Can't export bridges on the virtual PCI bus");
24987+ goto out;
24988+ }
24989+
24990+ spin_lock_irqsave(&slot_dev->lock, flags);
24991+
24992+ /* Assign to a new slot on the virtual PCI bus */
24993+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
24994+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
24995+ if (slot_dev->slots[bus][slot] == NULL) {
24996+ printk(KERN_INFO
24997+ "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
24998+ pci_name(dev), slot, bus);
24999+ slot_dev->slots[bus][slot] = dev;
25000+ goto unlock;
25001+ }
25002+ }
25003+
25004+ err = -ENOMEM;
25005+ xenbus_dev_fatal(pdev->xdev, err,
25006+ "No more space on root virtual PCI bus");
25007+
25008+ unlock:
25009+ spin_unlock_irqrestore(&slot_dev->lock, flags);
25010+
25011+ /* Publish this device. */
25012+ if(!err)
25013+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
25014+
25015+ out:
25016+ return err;
25017+}
25018+
25019+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
25020+{
25021+ int slot, bus;
25022+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
25023+ struct pci_dev *found_dev = NULL;
25024+ unsigned long flags;
25025+
25026+ spin_lock_irqsave(&slot_dev->lock, flags);
25027+
25028+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
25029+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25030+ if (slot_dev->slots[bus][slot] == dev) {
25031+ slot_dev->slots[bus][slot] = NULL;
25032+ found_dev = dev;
25033+ goto out;
25034+ }
25035+ }
25036+
25037+ out:
25038+ spin_unlock_irqrestore(&slot_dev->lock, flags);
25039+
25040+ if (found_dev)
25041+ pcistub_put_pci_dev(found_dev);
25042+}
25043+
25044+int pciback_init_devices(struct pciback_device *pdev)
25045+{
25046+ int slot, bus;
25047+ struct slot_dev_data *slot_dev;
25048+
25049+ slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
25050+ if (!slot_dev)
25051+ return -ENOMEM;
25052+
25053+ spin_lock_init(&slot_dev->lock);
25054+
25055+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
25056+ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
25057+ slot_dev->slots[bus][slot] = NULL;
25058+
25059+ pdev->pci_dev_data = slot_dev;
25060+
25061+ return 0;
25062+}
25063+
25064+int pciback_publish_pci_roots(struct pciback_device *pdev,
25065+ publish_pci_root_cb publish_cb)
25066+{
25067+ /* The Virtual PCI bus has only one root */
25068+ return publish_cb(pdev, 0, 0);
25069+}
25070+
25071+void pciback_release_devices(struct pciback_device *pdev)
25072+{
25073+ int slot, bus;
25074+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
25075+ struct pci_dev *dev;
25076+
25077+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
25078+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25079+ dev = slot_dev->slots[bus][slot];
25080+ if (dev != NULL)
25081+ pcistub_put_pci_dev(dev);
25082+ }
25083+
25084+ kfree(slot_dev);
25085+ pdev->pci_dev_data = NULL;
25086+}
25087Index: head-2008-11-25/drivers/xen/pciback/vpci.c
25088===================================================================
25089--- /dev/null 1970-01-01 00:00:00.000000000 +0000
25090+++ head-2008-11-25/drivers/xen/pciback/vpci.c 2008-02-26 10:54:11.000000000 +0100
25091@@ -0,0 +1,212 @@
25092+/*
25093+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
25094+ * to the frontend
25095+ *
25096+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
25097+ */
25098+
25099+#include <linux/list.h>
25100+#include <linux/slab.h>
25101+#include <linux/pci.h>
25102+#include <linux/spinlock.h>
25103+#include "pciback.h"
25104+
25105+#define PCI_SLOT_MAX 32
25106+
25107+struct vpci_dev_data {
25108+ /* Access to dev_list must be protected by lock */
25109+ struct list_head dev_list[PCI_SLOT_MAX];
25110+ spinlock_t lock;
25111+};
25112+
25113+static inline struct list_head *list_first(struct list_head *head)
25114+{
25115+ return head->next;
25116+}
25117+
25118+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
25119+ unsigned int domain, unsigned int bus,
25120+ unsigned int devfn)
25121+{
25122+ struct pci_dev_entry *entry;
25123+ struct pci_dev *dev = NULL;
25124+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25125+ unsigned long flags;
25126+
25127+ if (domain != 0 || bus != 0)
25128+ return NULL;
25129+
25130+ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
25131+ spin_lock_irqsave(&vpci_dev->lock, flags);
25132+
25133+ list_for_each_entry(entry,
25134+ &vpci_dev->dev_list[PCI_SLOT(devfn)],
25135+ list) {
25136+ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
25137+ dev = entry->dev;
25138+ break;
25139+ }
25140+ }
25141+
25142+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
25143+ }
25144+ return dev;
25145+}
25146+
25147+static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
25148+{
25149+ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
25150+ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
25151+ return 1;
25152+
25153+ return 0;
25154+}
25155+
25156+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
25157+ int devid, publish_pci_dev_cb publish_cb)
25158+{
25159+ int err = 0, slot, func;
25160+ struct pci_dev_entry *t, *dev_entry;
25161+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25162+ unsigned long flags;
25163+
25164+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
25165+ err = -EFAULT;
25166+ xenbus_dev_fatal(pdev->xdev, err,
25167+ "Can't export bridges on the virtual PCI bus");
25168+ goto out;
25169+ }
25170+
25171+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
25172+ if (!dev_entry) {
25173+ err = -ENOMEM;
25174+ xenbus_dev_fatal(pdev->xdev, err,
25175+ "Error adding entry to virtual PCI bus");
25176+ goto out;
25177+ }
25178+
25179+ dev_entry->dev = dev;
25180+
25181+ spin_lock_irqsave(&vpci_dev->lock, flags);
25182+
25183+ /* Keep multi-function devices together on the virtual PCI bus */
25184+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25185+ if (!list_empty(&vpci_dev->dev_list[slot])) {
25186+ t = list_entry(list_first(&vpci_dev->dev_list[slot]),
25187+ struct pci_dev_entry, list);
25188+
25189+ if (match_slot(dev, t->dev)) {
25190+ pr_info("pciback: vpci: %s: "
25191+ "assign to virtual slot %d func %d\n",
25192+ pci_name(dev), slot,
25193+ PCI_FUNC(dev->devfn));
25194+ list_add_tail(&dev_entry->list,
25195+ &vpci_dev->dev_list[slot]);
25196+ func = PCI_FUNC(dev->devfn);
25197+ goto unlock;
25198+ }
25199+ }
25200+ }
25201+
25202+ /* Assign to a new slot on the virtual PCI bus */
25203+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25204+ if (list_empty(&vpci_dev->dev_list[slot])) {
25205+ printk(KERN_INFO
25206+ "pciback: vpci: %s: assign to virtual slot %d\n",
25207+ pci_name(dev), slot);
25208+ list_add_tail(&dev_entry->list,
25209+ &vpci_dev->dev_list[slot]);
25210+ func = PCI_FUNC(dev->devfn);
25211+ goto unlock;
25212+ }
25213+ }
25214+
25215+ err = -ENOMEM;
25216+ xenbus_dev_fatal(pdev->xdev, err,
25217+ "No more space on root virtual PCI bus");
25218+
25219+ unlock:
25220+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
25221+
25222+ /* Publish this device. */
25223+ if(!err)
25224+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
25225+
25226+ out:
25227+ return err;
25228+}
25229+
25230+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
25231+{
25232+ int slot;
25233+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25234+ struct pci_dev *found_dev = NULL;
25235+ unsigned long flags;
25236+
25237+ spin_lock_irqsave(&vpci_dev->lock, flags);
25238+
25239+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25240+ struct pci_dev_entry *e, *tmp;
25241+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
25242+ list) {
25243+ if (e->dev == dev) {
25244+ list_del(&e->list);
25245+ found_dev = e->dev;
25246+ kfree(e);
25247+ goto out;
25248+ }
25249+ }
25250+ }
25251+
25252+ out:
25253+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
25254+
25255+ if (found_dev)
25256+ pcistub_put_pci_dev(found_dev);
25257+}
25258+
25259+int pciback_init_devices(struct pciback_device *pdev)
25260+{
25261+ int slot;
25262+ struct vpci_dev_data *vpci_dev;
25263+
25264+ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
25265+ if (!vpci_dev)
25266+ return -ENOMEM;
25267+
25268+ spin_lock_init(&vpci_dev->lock);
25269+
25270+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25271+ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
25272+ }
25273+
25274+ pdev->pci_dev_data = vpci_dev;
25275+
25276+ return 0;
25277+}
25278+
25279+int pciback_publish_pci_roots(struct pciback_device *pdev,
25280+ publish_pci_root_cb publish_cb)
25281+{
25282+ /* The Virtual PCI bus has only one root */
25283+ return publish_cb(pdev, 0, 0);
25284+}
25285+
25286+void pciback_release_devices(struct pciback_device *pdev)
25287+{
25288+ int slot;
25289+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25290+
25291+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25292+ struct pci_dev_entry *e, *tmp;
25293+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
25294+ list) {
25295+ list_del(&e->list);
25296+ pcistub_put_pci_dev(e->dev);
25297+ kfree(e);
25298+ }
25299+ }
25300+
25301+ kfree(vpci_dev);
25302+ pdev->pci_dev_data = NULL;
25303+}
25304Index: head-2008-11-25/drivers/xen/pciback/xenbus.c
25305===================================================================
25306--- /dev/null 1970-01-01 00:00:00.000000000 +0000
25307+++ head-2008-11-25/drivers/xen/pciback/xenbus.c 2008-07-21 11:00:33.000000000 +0200
25308@@ -0,0 +1,704 @@
25309+/*
25310+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
25311+ *
25312+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
25313+ */
25314+#include <linux/module.h>
25315+#include <linux/init.h>
25316+#include <linux/list.h>
25317+#include <linux/vmalloc.h>
25318+#include <xen/xenbus.h>
25319+#include <xen/evtchn.h>
25320+#include "pciback.h"
25321+
25322+#define INVALID_EVTCHN_IRQ (-1)
25323+
25324+static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
25325+{
25326+ struct pciback_device *pdev;
25327+
25328+ pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
25329+ if (pdev == NULL)
25330+ goto out;
25331+ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
25332+
25333+ pdev->xdev = xdev;
25334+ xdev->dev.driver_data = pdev;
25335+
25336+ spin_lock_init(&pdev->dev_lock);
25337+
25338+ pdev->sh_area = NULL;
25339+ pdev->sh_info = NULL;
25340+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
25341+ pdev->be_watching = 0;
25342+
25343+ INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
25344+
25345+ if (pciback_init_devices(pdev)) {
25346+ kfree(pdev);
25347+ pdev = NULL;
25348+ }
25349+ out:
25350+ return pdev;
25351+}
25352+
25353+static void pciback_disconnect(struct pciback_device *pdev)
25354+{
25355+ spin_lock(&pdev->dev_lock);
25356+
25357+ /* Ensure the guest can't trigger our handler before removing devices */
25358+ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
25359+ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
25360+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
25361+ }
25362+
25363+ /* If the driver domain started an op, make sure we complete it or
25364+ * delete it before releasing the shared memory */
25365+ cancel_delayed_work(&pdev->op_work);
25366+ flush_scheduled_work();
25367+
25368+ if (pdev->sh_info != NULL) {
25369+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
25370+ pdev->sh_info = NULL;
25371+ }
25372+
25373+ spin_unlock(&pdev->dev_lock);
25374+}
25375+
25376+static void free_pdev(struct pciback_device *pdev)
25377+{
25378+ if (pdev->be_watching)
25379+ unregister_xenbus_watch(&pdev->be_watch);
25380+
25381+ pciback_disconnect(pdev);
25382+
25383+ pciback_release_devices(pdev);
25384+
25385+ pdev->xdev->dev.driver_data = NULL;
25386+ pdev->xdev = NULL;
25387+
25388+ kfree(pdev);
25389+}
25390+
25391+static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
25392+ int remote_evtchn)
25393+{
25394+ int err = 0;
25395+ struct vm_struct *area;
25396+
25397+ dev_dbg(&pdev->xdev->dev,
25398+ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
25399+ gnt_ref, remote_evtchn);
25400+
25401+ area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
25402+ if (IS_ERR(area)) {
25403+ err = PTR_ERR(area);
25404+ goto out;
25405+ }
25406+ pdev->sh_area = area;
25407+ pdev->sh_info = area->addr;
25408+
25409+ err = bind_interdomain_evtchn_to_irqhandler(
25410+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
25411+ SA_SAMPLE_RANDOM, "pciback", pdev);
25412+ if (err < 0) {
25413+ xenbus_dev_fatal(pdev->xdev, err,
25414+ "Error binding event channel to IRQ");
25415+ goto out;
25416+ }
25417+ pdev->evtchn_irq = err;
25418+ err = 0;
25419+
25420+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
25421+ out:
25422+ return err;
25423+}
25424+
25425+static int pciback_attach(struct pciback_device *pdev)
25426+{
25427+ int err = 0;
25428+ int gnt_ref, remote_evtchn;
25429+ char *magic = NULL;
25430+
25431+ spin_lock(&pdev->dev_lock);
25432+
25433+ /* Make sure we only do this setup once */
25434+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
25435+ XenbusStateInitialised)
25436+ goto out;
25437+
25438+ /* Wait for frontend to state that it has published the configuration */
25439+ if (xenbus_read_driver_state(pdev->xdev->otherend) !=
25440+ XenbusStateInitialised)
25441+ goto out;
25442+
25443+ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
25444+
25445+ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
25446+ "pci-op-ref", "%u", &gnt_ref,
25447+ "event-channel", "%u", &remote_evtchn,
25448+ "magic", NULL, &magic, NULL);
25449+ if (err) {
25450+ /* If configuration didn't get read correctly, wait longer */
25451+ xenbus_dev_fatal(pdev->xdev, err,
25452+ "Error reading configuration from frontend");
25453+ goto out;
25454+ }
25455+
25456+ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
25457+ xenbus_dev_fatal(pdev->xdev, -EFAULT,
25458+ "version mismatch (%s/%s) with pcifront - "
25459+ "halting pciback",
25460+ magic, XEN_PCI_MAGIC);
25461+ goto out;
25462+ }
25463+
25464+ err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
25465+ if (err)
25466+ goto out;
25467+
25468+ dev_dbg(&pdev->xdev->dev, "Connecting...\n");
25469+
25470+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
25471+ if (err)
25472+ xenbus_dev_fatal(pdev->xdev, err,
25473+ "Error switching to connected state!");
25474+
25475+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
25476+ out:
25477+ spin_unlock(&pdev->dev_lock);
25478+
25479+ if (magic)
25480+ kfree(magic);
25481+
25482+ return err;
25483+}
25484+
25485+static int pciback_publish_pci_dev(struct pciback_device *pdev,
25486+ unsigned int domain, unsigned int bus,
25487+ unsigned int devfn, unsigned int devid)
25488+{
25489+ int err;
25490+ int len;
25491+ char str[64];
25492+
25493+ len = snprintf(str, sizeof(str), "vdev-%d", devid);
25494+ if (unlikely(len >= (sizeof(str) - 1))) {
25495+ err = -ENOMEM;
25496+ goto out;
25497+ }
25498+
25499+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
25500+ "%04x:%02x:%02x.%02x", domain, bus,
25501+ PCI_SLOT(devfn), PCI_FUNC(devfn));
25502+
25503+ out:
25504+ return err;
25505+}
25506+
25507+static int pciback_export_device(struct pciback_device *pdev,
25508+ int domain, int bus, int slot, int func,
25509+ int devid)
25510+{
25511+ struct pci_dev *dev;
25512+ int err = 0;
25513+
25514+ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
25515+ domain, bus, slot, func);
25516+
25517+ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
25518+ if (!dev) {
25519+ err = -EINVAL;
25520+ xenbus_dev_fatal(pdev->xdev, err,
25521+ "Couldn't locate PCI device "
25522+ "(%04x:%02x:%02x.%01x)! "
25523+ "perhaps already in-use?",
25524+ domain, bus, slot, func);
25525+ goto out;
25526+ }
25527+
25528+ err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
25529+ if (err)
25530+ goto out;
25531+
25532+ /* TODO: It'd be nice to export a bridge and have all of its children
25533+ * get exported with it. This may be best done in xend (which will
25534+ * have to calculate resource usage anyway) but we probably want to
25535+ * put something in here to ensure that if a bridge gets given to a
25536+ * driver domain, that all devices under that bridge are not given
25537+ * to other driver domains (as he who controls the bridge can disable
25538+ * it and stop the other devices from working).
25539+ */
25540+ out:
25541+ return err;
25542+}
25543+
25544+static int pciback_remove_device(struct pciback_device *pdev,
25545+ int domain, int bus, int slot, int func)
25546+{
25547+ int err = 0;
25548+ struct pci_dev *dev;
25549+
25550+ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
25551+ domain, bus, slot, func);
25552+
25553+ dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
25554+ if (!dev) {
25555+ err = -EINVAL;
25556+ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
25557+ "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
25558+ domain, bus, slot, func);
25559+ goto out;
25560+ }
25561+
25562+ pciback_release_pci_dev(pdev, dev);
25563+
25564+ out:
25565+ return err;
25566+}
25567+
25568+static int pciback_publish_pci_root(struct pciback_device *pdev,
25569+ unsigned int domain, unsigned int bus)
25570+{
25571+ unsigned int d, b;
25572+ int i, root_num, len, err;
25573+ char str[64];
25574+
25575+ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
25576+
25577+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25578+ "root_num", "%d", &root_num);
25579+ if (err == 0 || err == -ENOENT)
25580+ root_num = 0;
25581+ else if (err < 0)
25582+ goto out;
25583+
25584+ /* Verify that we haven't already published this pci root */
25585+ for (i = 0; i < root_num; i++) {
25586+ len = snprintf(str, sizeof(str), "root-%d", i);
25587+ if (unlikely(len >= (sizeof(str) - 1))) {
25588+ err = -ENOMEM;
25589+ goto out;
25590+ }
25591+
25592+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25593+ str, "%x:%x", &d, &b);
25594+ if (err < 0)
25595+ goto out;
25596+ if (err != 2) {
25597+ err = -EINVAL;
25598+ goto out;
25599+ }
25600+
25601+ if (d == domain && b == bus) {
25602+ err = 0;
25603+ goto out;
25604+ }
25605+ }
25606+
25607+ len = snprintf(str, sizeof(str), "root-%d", root_num);
25608+ if (unlikely(len >= (sizeof(str) - 1))) {
25609+ err = -ENOMEM;
25610+ goto out;
25611+ }
25612+
25613+ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
25614+ root_num, domain, bus);
25615+
25616+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
25617+ "%04x:%02x", domain, bus);
25618+ if (err)
25619+ goto out;
25620+
25621+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
25622+ "root_num", "%d", (root_num + 1));
25623+
25624+ out:
25625+ return err;
25626+}
25627+
25628+static int pciback_reconfigure(struct pciback_device *pdev)
25629+{
25630+ int err = 0;
25631+ int num_devs;
25632+ int domain, bus, slot, func;
25633+ int substate;
25634+ int i, len;
25635+ char state_str[64];
25636+ char dev_str[64];
25637+
25638+ spin_lock(&pdev->dev_lock);
25639+
25640+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
25641+
25642+ /* Make sure we only reconfigure once */
25643+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
25644+ XenbusStateReconfiguring)
25645+ goto out;
25646+
25647+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
25648+ &num_devs);
25649+ if (err != 1) {
25650+ if (err >= 0)
25651+ err = -EINVAL;
25652+ xenbus_dev_fatal(pdev->xdev, err,
25653+ "Error reading number of devices");
25654+ goto out;
25655+ }
25656+
25657+ for (i = 0; i < num_devs; i++) {
25658+ len = snprintf(state_str, sizeof(state_str), "state-%d", i);
25659+ if (unlikely(len >= (sizeof(state_str) - 1))) {
25660+ err = -ENOMEM;
25661+ xenbus_dev_fatal(pdev->xdev, err,
25662+ "String overflow while reading "
25663+ "configuration");
25664+ goto out;
25665+ }
25666+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
25667+ "%d", &substate);
25668+ if (err != 1)
25669+ substate = XenbusStateUnknown;
25670+
25671+ switch (substate) {
25672+ case XenbusStateInitialising:
25673+ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
25674+
25675+ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
25676+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
25677+ err = -ENOMEM;
25678+ xenbus_dev_fatal(pdev->xdev, err,
25679+ "String overflow while "
25680+ "reading configuration");
25681+ goto out;
25682+ }
25683+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25684+ dev_str, "%x:%x:%x.%x",
25685+ &domain, &bus, &slot, &func);
25686+ if (err < 0) {
25687+ xenbus_dev_fatal(pdev->xdev, err,
25688+ "Error reading device "
25689+ "configuration");
25690+ goto out;
25691+ }
25692+ if (err != 4) {
25693+ err = -EINVAL;
25694+ xenbus_dev_fatal(pdev->xdev, err,
25695+ "Error parsing pci device "
25696+ "configuration");
25697+ goto out;
25698+ }
25699+
25700+ err = pciback_export_device(pdev, domain, bus, slot,
25701+ func, i);
25702+ if (err)
25703+ goto out;
25704+
25705+ /* Publish pci roots. */
25706+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
25707+ if (err) {
25708+ xenbus_dev_fatal(pdev->xdev, err,
25709+ "Error while publish PCI root"
25710+ "buses for frontend");
25711+ goto out;
25712+ }
25713+
25714+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
25715+ state_str, "%d",
25716+ XenbusStateInitialised);
25717+ if (err) {
25718+ xenbus_dev_fatal(pdev->xdev, err,
25719+ "Error switching substate of "
25720+ "dev-%d\n", i);
25721+ goto out;
25722+ }
25723+ break;
25724+
25725+ case XenbusStateClosing:
25726+ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
25727+
25728+ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
25729+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
25730+ err = -ENOMEM;
25731+ xenbus_dev_fatal(pdev->xdev, err,
25732+ "String overflow while "
25733+ "reading configuration");
25734+ goto out;
25735+ }
25736+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25737+ dev_str, "%x:%x:%x.%x",
25738+ &domain, &bus, &slot, &func);
25739+ if (err < 0) {
25740+ xenbus_dev_fatal(pdev->xdev, err,
25741+ "Error reading device "
25742+ "configuration");
25743+ goto out;
25744+ }
25745+ if (err != 4) {
25746+ err = -EINVAL;
25747+ xenbus_dev_fatal(pdev->xdev, err,
25748+ "Error parsing pci device "
25749+ "configuration");
25750+ goto out;
25751+ }
25752+
25753+ err = pciback_remove_device(pdev, domain, bus, slot,
25754+ func);
25755+ if(err)
25756+ goto out;
25757+
25758+ /* TODO: If at some point we implement support for pci
25759+ * root hot-remove on pcifront side, we'll need to
25760+ * remove unnecessary xenstore nodes of pci roots here.
25761+ */
25762+
25763+ break;
25764+
25765+ default:
25766+ break;
25767+ }
25768+ }
25769+
25770+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
25771+ if (err) {
25772+ xenbus_dev_fatal(pdev->xdev, err,
25773+ "Error switching to reconfigured state!");
25774+ goto out;
25775+ }
25776+
25777+ out:
25778+ spin_unlock(&pdev->dev_lock);
25779+
25780+ return 0;
25781+}
25782+
25783+static void pciback_frontend_changed(struct xenbus_device *xdev,
25784+ enum xenbus_state fe_state)
25785+{
25786+ struct pciback_device *pdev = xdev->dev.driver_data;
25787+
25788+ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
25789+
25790+ switch (fe_state) {
25791+ case XenbusStateInitialised:
25792+ pciback_attach(pdev);
25793+ break;
25794+
25795+ case XenbusStateReconfiguring:
25796+ pciback_reconfigure(pdev);
25797+ break;
25798+
25799+ case XenbusStateConnected:
25800+ /* pcifront switched its state from reconfiguring to connected.
25801+ * Then switch to connected state.
25802+ */
25803+ xenbus_switch_state(xdev, XenbusStateConnected);
25804+ break;
25805+
25806+ case XenbusStateClosing:
25807+ pciback_disconnect(pdev);
25808+ xenbus_switch_state(xdev, XenbusStateClosing);
25809+ break;
25810+
25811+ case XenbusStateClosed:
25812+ pciback_disconnect(pdev);
25813+ xenbus_switch_state(xdev, XenbusStateClosed);
25814+ if (xenbus_dev_is_online(xdev))
25815+ break;
25816+ /* fall through if not online */
25817+ case XenbusStateUnknown:
25818+ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
25819+ device_unregister(&xdev->dev);
25820+ break;
25821+
25822+ default:
25823+ break;
25824+ }
25825+}
25826+
25827+static int pciback_setup_backend(struct pciback_device *pdev)
25828+{
25829+ /* Get configuration from xend (if available now) */
25830+ int domain, bus, slot, func;
25831+ int err = 0;
25832+ int i, num_devs;
25833+ char dev_str[64];
25834+ char state_str[64];
25835+
25836+ spin_lock(&pdev->dev_lock);
25837+
25838+ /* It's possible we could get the call to setup twice, so make sure
25839+ * we're not already connected.
25840+ */
25841+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
25842+ XenbusStateInitWait)
25843+ goto out;
25844+
25845+ dev_dbg(&pdev->xdev->dev, "getting be setup\n");
25846+
25847+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
25848+ &num_devs);
25849+ if (err != 1) {
25850+ if (err >= 0)
25851+ err = -EINVAL;
25852+ xenbus_dev_fatal(pdev->xdev, err,
25853+ "Error reading number of devices");
25854+ goto out;
25855+ }
25856+
25857+ for (i = 0; i < num_devs; i++) {
25858+ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
25859+ if (unlikely(l >= (sizeof(dev_str) - 1))) {
25860+ err = -ENOMEM;
25861+ xenbus_dev_fatal(pdev->xdev, err,
25862+ "String overflow while reading "
25863+ "configuration");
25864+ goto out;
25865+ }
25866+
25867+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
25868+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
25869+ if (err < 0) {
25870+ xenbus_dev_fatal(pdev->xdev, err,
25871+ "Error reading device configuration");
25872+ goto out;
25873+ }
25874+ if (err != 4) {
25875+ err = -EINVAL;
25876+ xenbus_dev_fatal(pdev->xdev, err,
25877+ "Error parsing pci device "
25878+ "configuration");
25879+ goto out;
25880+ }
25881+
25882+ err = pciback_export_device(pdev, domain, bus, slot, func, i);
25883+ if (err)
25884+ goto out;
25885+
25886+ /* Switch substate of this device. */
25887+ l = snprintf(state_str, sizeof(state_str), "state-%d", i);
25888+ if (unlikely(l >= (sizeof(state_str) - 1))) {
25889+ err = -ENOMEM;
25890+ xenbus_dev_fatal(pdev->xdev, err,
25891+ "String overflow while reading "
25892+ "configuration");
25893+ goto out;
25894+ }
25895+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
25896+ "%d", XenbusStateInitialised);
25897+ if (err) {
25898+ xenbus_dev_fatal(pdev->xdev, err, "Error switching "
25899+ "substate of dev-%d\n", i);
25900+ goto out;
25901+ }
25902+ }
25903+
25904+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
25905+ if (err) {
25906+ xenbus_dev_fatal(pdev->xdev, err,
25907+ "Error while publish PCI root buses "
25908+ "for frontend");
25909+ goto out;
25910+ }
25911+
25912+ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
25913+ if (err)
25914+ xenbus_dev_fatal(pdev->xdev, err,
25915+ "Error switching to initialised state!");
25916+
25917+ out:
25918+ spin_unlock(&pdev->dev_lock);
25919+
25920+ if (!err)
25921+ /* see if pcifront is already configured (if not, we'll wait) */
25922+ pciback_attach(pdev);
25923+
25924+ return err;
25925+}
25926+
25927+static void pciback_be_watch(struct xenbus_watch *watch,
25928+ const char **vec, unsigned int len)
25929+{
25930+ struct pciback_device *pdev =
25931+ container_of(watch, struct pciback_device, be_watch);
25932+
25933+ switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
25934+ case XenbusStateInitWait:
25935+ pciback_setup_backend(pdev);
25936+ break;
25937+
25938+ default:
25939+ break;
25940+ }
25941+}
25942+
25943+static int pciback_xenbus_probe(struct xenbus_device *dev,
25944+ const struct xenbus_device_id *id)
25945+{
25946+ int err = 0;
25947+ struct pciback_device *pdev = alloc_pdev(dev);
25948+
25949+ if (pdev == NULL) {
25950+ err = -ENOMEM;
25951+ xenbus_dev_fatal(dev, err,
25952+ "Error allocating pciback_device struct");
25953+ goto out;
25954+ }
25955+
25956+ /* wait for xend to configure us */
25957+ err = xenbus_switch_state(dev, XenbusStateInitWait);
25958+ if (err)
25959+ goto out;
25960+
25961+ /* watch the backend node for backend configuration information */
25962+ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
25963+ pciback_be_watch);
25964+ if (err)
25965+ goto out;
25966+ pdev->be_watching = 1;
25967+
25968+ /* We need to force a call to our callback here in case
25969+ * xend already configured us!
25970+ */
25971+ pciback_be_watch(&pdev->be_watch, NULL, 0);
25972+
25973+ out:
25974+ return err;
25975+}
25976+
25977+static int pciback_xenbus_remove(struct xenbus_device *dev)
25978+{
25979+ struct pciback_device *pdev = dev->dev.driver_data;
25980+
25981+ if (pdev != NULL)
25982+ free_pdev(pdev);
25983+
25984+ return 0;
25985+}
25986+
25987+static const struct xenbus_device_id xenpci_ids[] = {
25988+ {"pci"},
25989+ {{0}},
25990+};
25991+
25992+static struct xenbus_driver xenbus_pciback_driver = {
25993+ .name = "pciback",
25994+ .owner = THIS_MODULE,
25995+ .ids = xenpci_ids,
25996+ .probe = pciback_xenbus_probe,
25997+ .remove = pciback_xenbus_remove,
25998+ .otherend_changed = pciback_frontend_changed,
25999+};
26000+
26001+int __init pciback_xenbus_register(void)
26002+{
26003+ if (!is_running_on_xen())
26004+ return -ENODEV;
26005+
26006+ return xenbus_register_backend(&xenbus_pciback_driver);
26007+}
26008+
26009+void __exit pciback_xenbus_unregister(void)
26010+{
26011+ xenbus_unregister_driver(&xenbus_pciback_driver);
26012+}
26013Index: head-2008-11-25/drivers/xen/pcifront/Makefile
26014===================================================================
26015--- /dev/null 1970-01-01 00:00:00.000000000 +0000
26016+++ head-2008-11-25/drivers/xen/pcifront/Makefile 2007-06-12 13:13:45.000000000 +0200
26017@@ -0,0 +1,7 @@
26018+obj-y += pcifront.o
26019+
26020+pcifront-y := pci_op.o xenbus.o pci.o
26021+
26022+ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y)
26023+EXTRA_CFLAGS += -DDEBUG
26024+endif
26025Index: head-2008-11-25/drivers/xen/pcifront/pci.c
26026===================================================================
26027--- /dev/null 1970-01-01 00:00:00.000000000 +0000
26028+++ head-2008-11-25/drivers/xen/pcifront/pci.c 2007-06-12 13:13:45.000000000 +0200
26029@@ -0,0 +1,46 @@
26030+/*
26031+ * PCI Frontend Operations - ensure only one PCI frontend runs at a time
26032+ *
26033+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26034+ */
26035+#include <linux/module.h>
26036+#include <linux/init.h>
26037+#include <linux/pci.h>
26038+#include <linux/spinlock.h>
26039+#include "pcifront.h"
26040+
26041+DEFINE_SPINLOCK(pcifront_dev_lock);
26042+static struct pcifront_device *pcifront_dev = NULL;
26043+
26044+int pcifront_connect(struct pcifront_device *pdev)
26045+{
26046+ int err = 0;
26047+
26048+ spin_lock(&pcifront_dev_lock);
26049+
26050+ if (!pcifront_dev) {
26051+ dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
26052+ pcifront_dev = pdev;
26053+ }
26054+ else {
26055+ dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
26056+ err = -EEXIST;
26057+ }
26058+
26059+ spin_unlock(&pcifront_dev_lock);
26060+
26061+ return err;
26062+}
26063+
26064+void pcifront_disconnect(struct pcifront_device *pdev)
26065+{
26066+ spin_lock(&pcifront_dev_lock);
26067+
26068+ if (pdev == pcifront_dev) {
26069+ dev_info(&pdev->xdev->dev,
26070+ "Disconnecting PCI Frontend Buses\n");
26071+ pcifront_dev = NULL;
26072+ }
26073+
26074+ spin_unlock(&pcifront_dev_lock);
26075+}
26076Index: head-2008-11-25/drivers/xen/pcifront/pci_op.c
26077===================================================================
26078--- /dev/null 1970-01-01 00:00:00.000000000 +0000
26079+++ head-2008-11-25/drivers/xen/pcifront/pci_op.c 2008-07-21 11:00:33.000000000 +0200
26080@@ -0,0 +1,551 @@
26081+/*
26082+ * PCI Frontend Operations - Communicates with frontend
26083+ *
26084+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26085+ */
26086+#include <linux/module.h>
26087+#include <linux/version.h>
26088+#include <linux/init.h>
26089+#include <linux/pci.h>
26090+#include <linux/spinlock.h>
26091+#include <linux/time.h>
26092+#include <xen/evtchn.h>
26093+#include "pcifront.h"
26094+
26095+static int verbose_request = 0;
26096+module_param(verbose_request, int, 0644);
26097+
26098+#ifdef __ia64__
26099+static void pcifront_init_sd(struct pcifront_sd *sd,
26100+ unsigned int domain, unsigned int bus,
26101+ struct pcifront_device *pdev)
26102+{
26103+ int err, i, j, k, len, root_num, res_count;
26104+ struct acpi_resource res;
26105+ unsigned int d, b, byte;
26106+ unsigned long magic;
26107+ char str[64], tmp[3];
26108+ unsigned char *buf, *bufp;
26109+ u8 *ptr;
26110+
26111+ memset(sd, 0, sizeof(*sd));
26112+
26113+ sd->segment = domain;
26114+ sd->node = -1; /* Revisit for NUMA */
26115+ sd->platform_data = pdev;
26116+
26117+ /* Look for resources for this controller in xenbus. */
26118+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "root_num",
26119+ "%d", &root_num);
26120+ if (err != 1)
26121+ return;
26122+
26123+ for (i = 0; i < root_num; i++) {
26124+ len = snprintf(str, sizeof(str), "root-%d", i);
26125+ if (unlikely(len >= (sizeof(str) - 1)))
26126+ return;
26127+
26128+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26129+ str, "%x:%x", &d, &b);
26130+ if (err != 2)
26131+ return;
26132+
26133+ if (d == domain && b == bus)
26134+ break;
26135+ }
26136+
26137+ if (i == root_num)
26138+ return;
26139+
26140+ len = snprintf(str, sizeof(str), "root-resource-magic");
26141+
26142+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26143+ str, "%lx", &magic);
26144+
26145+ if (err != 1)
26146+ return; /* No resources, nothing to do */
26147+
26148+ if (magic != (sizeof(res) * 2) + 1) {
26149+ printk(KERN_WARNING "pcifront: resource magic mismatch\n");
26150+ return;
26151+ }
26152+
26153+ len = snprintf(str, sizeof(str), "root-%d-resources", i);
26154+ if (unlikely(len >= (sizeof(str) - 1)))
26155+ return;
26156+
26157+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26158+ str, "%d", &res_count);
26159+
26160+ if (err != 1)
26161+ return; /* No resources, nothing to do */
26162+
26163+ sd->window = kzalloc(sizeof(*sd->window) * res_count, GFP_KERNEL);
26164+ if (!sd->window)
26165+ return;
26166+
26167+ /* magic is also the size of the byte stream in xenbus */
26168+ buf = kmalloc(magic, GFP_KERNEL);
26169+ if (!buf) {
26170+ kfree(sd->window);
26171+ sd->window = NULL;
26172+ return;
26173+ }
26174+
26175+ /* Read the resources out of xenbus */
26176+ for (j = 0; j < res_count; j++) {
26177+ memset(&res, 0, sizeof(res));
26178+ memset(buf, 0, magic);
26179+
26180+ len = snprintf(str, sizeof(str), "root-%d-resource-%d", i, j);
26181+ if (unlikely(len >= (sizeof(str) - 1)))
26182+ return;
26183+
26184+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
26185+ "%s", buf);
26186+ if (err != 1) {
26187+ printk(KERN_WARNING "pcifront: error reading "
26188+ "resource %d on bus %04x:%02x\n",
26189+ j, domain, bus);
26190+ continue;
26191+ }
26192+
26193+ bufp = buf;
26194+ ptr = (u8 *)&res;
26195+ memset(tmp, 0, sizeof(tmp));
26196+
26197+ /* Copy ASCII byte stream into structure */
26198+ for (k = 0; k < magic - 1; k += 2) {
26199+ memcpy(tmp, bufp, 2);
26200+ bufp += 2;
26201+
26202+ sscanf(tmp, "%02x", &byte);
26203+ *ptr = byte;
26204+ ptr++;
26205+ }
26206+
26207+ xen_add_resource(sd, domain, bus, &res);
26208+ sd->windows++;
26209+ }
26210+ kfree(buf);
26211+}
26212+#endif
26213+
26214+static int errno_to_pcibios_err(int errno)
26215+{
26216+ switch (errno) {
26217+ case XEN_PCI_ERR_success:
26218+ return PCIBIOS_SUCCESSFUL;
26219+
26220+ case XEN_PCI_ERR_dev_not_found:
26221+ return PCIBIOS_DEVICE_NOT_FOUND;
26222+
26223+ case XEN_PCI_ERR_invalid_offset:
26224+ case XEN_PCI_ERR_op_failed:
26225+ return PCIBIOS_BAD_REGISTER_NUMBER;
26226+
26227+ case XEN_PCI_ERR_not_implemented:
26228+ return PCIBIOS_FUNC_NOT_SUPPORTED;
26229+
26230+ case XEN_PCI_ERR_access_denied:
26231+ return PCIBIOS_SET_FAILED;
26232+ }
26233+ return errno;
26234+}
26235+
26236+static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
26237+{
26238+ int err = 0;
26239+ struct xen_pci_op *active_op = &pdev->sh_info->op;
26240+ unsigned long irq_flags;
26241+ evtchn_port_t port = pdev->evtchn;
26242+ s64 ns, ns_timeout;
26243+ struct timeval tv;
26244+
26245+ spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
26246+
26247+ memcpy(active_op, op, sizeof(struct xen_pci_op));
26248+
26249+ /* Go */
26250+ wmb();
26251+ set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
26252+ notify_remote_via_evtchn(port);
26253+
26254+ /*
26255+ * We set a poll timeout of 3 seconds but give up on return after
26256+ * 2 seconds. It is better to time out too late rather than too early
26257+ * (in the latter case we end up continually re-executing poll() with a
26258+ * timeout in the past). 1s difference gives plenty of slack for error.
26259+ */
26260+ do_gettimeofday(&tv);
26261+ ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
26262+
26263+ clear_evtchn(port);
26264+
26265+ while (test_bit(_XEN_PCIF_active,
26266+ (unsigned long *)&pdev->sh_info->flags)) {
26267+ if (HYPERVISOR_poll(&port, 1, jiffies + 3*HZ))
26268+ BUG();
26269+ clear_evtchn(port);
26270+ do_gettimeofday(&tv);
26271+ ns = timeval_to_ns(&tv);
26272+ if (ns > ns_timeout) {
26273+ dev_err(&pdev->xdev->dev,
26274+ "pciback not responding!!!\n");
26275+ clear_bit(_XEN_PCIF_active,
26276+ (unsigned long *)&pdev->sh_info->flags);
26277+ err = XEN_PCI_ERR_dev_not_found;
26278+ goto out;
26279+ }
26280+ }
26281+
26282+ memcpy(op, active_op, sizeof(struct xen_pci_op));
26283+
26284+ err = op->err;
26285+ out:
26286+ spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
26287+ return err;
26288+}
26289+
26290+/* Access to this function is spinlocked in drivers/pci/access.c */
26291+static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
26292+ int where, int size, u32 * val)
26293+{
26294+ int err = 0;
26295+ struct xen_pci_op op = {
26296+ .cmd = XEN_PCI_OP_conf_read,
26297+ .domain = pci_domain_nr(bus),
26298+ .bus = bus->number,
26299+ .devfn = devfn,
26300+ .offset = where,
26301+ .size = size,
26302+ };
26303+ struct pcifront_sd *sd = bus->sysdata;
26304+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
26305+
26306+ if (verbose_request)
26307+ dev_info(&pdev->xdev->dev,
26308+ "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
26309+ pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
26310+ PCI_FUNC(devfn), where, size);
26311+
26312+ err = do_pci_op(pdev, &op);
26313+
26314+ if (likely(!err)) {
26315+ if (verbose_request)
26316+ dev_info(&pdev->xdev->dev, "read got back value %x\n",
26317+ op.value);
26318+
26319+ *val = op.value;
26320+ } else if (err == -ENODEV) {
26321+ /* No device here, pretend that it just returned 0 */
26322+ err = 0;
26323+ *val = 0;
26324+ }
26325+
26326+ return errno_to_pcibios_err(err);
26327+}
26328+
26329+/* Access to this function is spinlocked in drivers/pci/access.c */
26330+static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
26331+ int where, int size, u32 val)
26332+{
26333+ struct xen_pci_op op = {
26334+ .cmd = XEN_PCI_OP_conf_write,
26335+ .domain = pci_domain_nr(bus),
26336+ .bus = bus->number,
26337+ .devfn = devfn,
26338+ .offset = where,
26339+ .size = size,
26340+ .value = val,
26341+ };
26342+ struct pcifront_sd *sd = bus->sysdata;
26343+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
26344+
26345+ if (verbose_request)
26346+ dev_info(&pdev->xdev->dev,
26347+ "write dev=%04x:%02x:%02x.%01x - "
26348+ "offset %x size %d val %x\n",
26349+ pci_domain_nr(bus), bus->number,
26350+ PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
26351+
26352+ return errno_to_pcibios_err(do_pci_op(pdev, &op));
26353+}
26354+
26355+struct pci_ops pcifront_bus_ops = {
26356+ .read = pcifront_bus_read,
26357+ .write = pcifront_bus_write,
26358+};
26359+
26360+#ifdef CONFIG_PCI_MSI
26361+int pci_frontend_enable_msix(struct pci_dev *dev,
26362+ struct msix_entry *entries,
26363+ int nvec)
26364+{
26365+ int err;
26366+ int i;
26367+ struct xen_pci_op op = {
26368+ .cmd = XEN_PCI_OP_enable_msix,
26369+ .domain = pci_domain_nr(dev->bus),
26370+ .bus = dev->bus->number,
26371+ .devfn = dev->devfn,
26372+ .value = nvec,
26373+ };
26374+ struct pcifront_sd *sd = dev->bus->sysdata;
26375+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
26376+
26377+ if (nvec > SH_INFO_MAX_VEC) {
26378+ printk("too much vector for pci frontend%x\n", nvec);
26379+ return -EINVAL;
26380+ }
26381+
26382+ for (i = 0; i < nvec; i++) {
26383+ op.msix_entries[i].entry = entries[i].entry;
26384+ op.msix_entries[i].vector = entries[i].vector;
26385+ }
26386+
26387+ err = do_pci_op(pdev, &op);
26388+
26389+ if (!err) {
26390+ if (!op.value) {
26391+ /* we get the result */
26392+ for ( i = 0; i < nvec; i++)
26393+ entries[i].vector = op.msix_entries[i].vector;
26394+ return 0;
26395+ }
26396+ else {
26397+ printk("enable msix get value %x\n", op.value);
26398+ return op.value;
26399+ }
26400+ }
26401+ else {
26402+ printk("enable msix get err %x\n", err);
26403+ return err;
26404+ }
26405+}
26406+
26407+void pci_frontend_disable_msix(struct pci_dev* dev)
26408+{
26409+ int err;
26410+ struct xen_pci_op op = {
26411+ .cmd = XEN_PCI_OP_disable_msix,
26412+ .domain = pci_domain_nr(dev->bus),
26413+ .bus = dev->bus->number,
26414+ .devfn = dev->devfn,
26415+ };
26416+ struct pcifront_sd *sd = dev->bus->sysdata;
26417+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
26418+
26419+ err = do_pci_op(pdev, &op);
26420+
26421+ /* What should do for error ? */
26422+ if (err)
26423+ printk("pci_disable_msix get err %x\n", err);
26424+}
26425+
26426+int pci_frontend_enable_msi(struct pci_dev *dev)
26427+{
26428+ int err;
26429+ struct xen_pci_op op = {
26430+ .cmd = XEN_PCI_OP_enable_msi,
26431+ .domain = pci_domain_nr(dev->bus),
26432+ .bus = dev->bus->number,
26433+ .devfn = dev->devfn,
26434+ };
26435+ struct pcifront_sd *sd = dev->bus->sysdata;
26436+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
26437+
26438+ err = do_pci_op(pdev, &op);
26439+ if (likely(!err)) {
26440+ dev->irq = op.value;
26441+ }
26442+ else {
26443+ printk("pci frontend enable msi failed for dev %x:%x \n",
26444+ op.bus, op.devfn);
26445+ err = -EINVAL;
26446+ }
26447+ return err;
26448+}
26449+
26450+void pci_frontend_disable_msi(struct pci_dev* dev)
26451+{
26452+ int err;
26453+ struct xen_pci_op op = {
26454+ .cmd = XEN_PCI_OP_disable_msi,
26455+ .domain = pci_domain_nr(dev->bus),
26456+ .bus = dev->bus->number,
26457+ .devfn = dev->devfn,
26458+ };
26459+ struct pcifront_sd *sd = dev->bus->sysdata;
26460+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
26461+
26462+ err = do_pci_op(pdev, &op);
26463+ if (err == XEN_PCI_ERR_dev_not_found) {
26464+ /* XXX No response from backend, what shall we do? */
26465+ printk("get no response from backend for disable MSI\n");
26466+ return;
26467+ }
26468+ if (likely(!err))
26469+ dev->irq = op.value;
26470+ else
26471+ /* how can pciback notify us fail? */
26472+ printk("get fake response frombackend \n");
26473+}
26474+#endif /* CONFIG_PCI_MSI */
26475+
26476+/* Claim resources for the PCI frontend as-is, backend won't allow changes */
26477+static void pcifront_claim_resource(struct pci_dev *dev, void *data)
26478+{
26479+ struct pcifront_device *pdev = data;
26480+ int i;
26481+ struct resource *r;
26482+
26483+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
26484+ r = &dev->resource[i];
26485+
26486+ if (!r->parent && r->start && r->flags) {
26487+ dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n",
26488+ pci_name(dev), i);
26489+ pci_claim_resource(dev, i);
26490+ }
26491+ }
26492+}
26493+
26494+int __devinit pcifront_scan_root(struct pcifront_device *pdev,
26495+ unsigned int domain, unsigned int bus)
26496+{
26497+ struct pci_bus *b;
26498+ struct pcifront_sd *sd = NULL;
26499+ struct pci_bus_entry *bus_entry = NULL;
26500+ int err = 0;
26501+
26502+#ifndef CONFIG_PCI_DOMAINS
26503+ if (domain != 0) {
26504+ dev_err(&pdev->xdev->dev,
26505+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
26506+ dev_err(&pdev->xdev->dev,
26507+ "Please compile with CONFIG_PCI_DOMAINS\n");
26508+ err = -EINVAL;
26509+ goto err_out;
26510+ }
26511+#endif
26512+
26513+ dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
26514+ domain, bus);
26515+
26516+ bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
26517+ sd = kmalloc(sizeof(*sd), GFP_KERNEL);
26518+ if (!bus_entry || !sd) {
26519+ err = -ENOMEM;
26520+ goto err_out;
26521+ }
26522+ pcifront_init_sd(sd, domain, bus, pdev);
26523+
26524+ b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
26525+ &pcifront_bus_ops, sd);
26526+ if (!b) {
26527+ dev_err(&pdev->xdev->dev,
26528+ "Error creating PCI Frontend Bus!\n");
26529+ err = -ENOMEM;
26530+ goto err_out;
26531+ }
26532+
26533+ pcifront_setup_root_resources(b, sd);
26534+ bus_entry->bus = b;
26535+
26536+ list_add(&bus_entry->list, &pdev->root_buses);
26537+
26538+ /* Claim resources before going "live" with our devices */
26539+ pci_walk_bus(b, pcifront_claim_resource, pdev);
26540+
26541+ pci_bus_add_devices(b);
26542+
26543+ return 0;
26544+
26545+ err_out:
26546+ kfree(bus_entry);
26547+ kfree(sd);
26548+
26549+ return err;
26550+}
26551+
26552+int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
26553+ unsigned int domain, unsigned int bus)
26554+{
26555+ struct pci_bus *b;
26556+ struct pci_dev *d;
26557+ unsigned int devfn;
26558+
26559+#ifndef CONFIG_PCI_DOMAINS
26560+ if (domain != 0) {
26561+ dev_err(&pdev->xdev->dev,
26562+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
26563+ dev_err(&pdev->xdev->dev,
26564+ "Please compile with CONFIG_PCI_DOMAINS\n");
26565+ return -EINVAL;
26566+ }
26567+#endif
26568+
26569+ dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
26570+ domain, bus);
26571+
26572+ b = pci_find_bus(domain, bus);
26573+ if(!b)
26574+ /* If the bus is unknown, create it. */
26575+ return pcifront_scan_root(pdev, domain, bus);
26576+
26577+ /* Rescan the bus for newly attached functions and add.
26578+ * We omit handling of PCI bridge attachment because pciback prevents
26579+ * bridges from being exported.
26580+ */
26581+ for (devfn = 0; devfn < 0x100; devfn++) {
26582+ d = pci_get_slot(b, devfn);
26583+ if(d) {
26584+ /* Device is already known. */
26585+ pci_dev_put(d);
26586+ continue;
26587+ }
26588+
26589+ d = pci_scan_single_device(b, devfn);
26590+ if (d) {
26591+ dev_info(&pdev->xdev->dev, "New device on "
26592+ "%04x:%02x:%02x.%02x found.\n", domain, bus,
26593+ PCI_SLOT(devfn), PCI_FUNC(devfn));
26594+ pci_bus_add_device(d);
26595+ }
26596+ }
26597+
26598+ return 0;
26599+}
26600+
26601+static void free_root_bus_devs(struct pci_bus *bus)
26602+{
26603+ struct pci_dev *dev;
26604+
26605+ while (!list_empty(&bus->devices)) {
26606+ dev = container_of(bus->devices.next, struct pci_dev,
26607+ bus_list);
26608+ dev_dbg(&dev->dev, "removing device\n");
26609+ pci_remove_bus_device(dev);
26610+ }
26611+}
26612+
26613+void pcifront_free_roots(struct pcifront_device *pdev)
26614+{
26615+ struct pci_bus_entry *bus_entry, *t;
26616+
26617+ dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
26618+
26619+ list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
26620+ list_del(&bus_entry->list);
26621+
26622+ free_root_bus_devs(bus_entry->bus);
26623+
26624+ kfree(bus_entry->bus->sysdata);
26625+
26626+ device_unregister(bus_entry->bus->bridge);
26627+ pci_remove_bus(bus_entry->bus);
26628+
26629+ kfree(bus_entry);
26630+ }
26631+}
26632Index: head-2008-11-25/drivers/xen/pcifront/pcifront.h
26633===================================================================
26634--- /dev/null 1970-01-01 00:00:00.000000000 +0000
26635+++ head-2008-11-25/drivers/xen/pcifront/pcifront.h 2008-02-26 10:54:11.000000000 +0100
26636@@ -0,0 +1,42 @@
26637+/*
26638+ * PCI Frontend - Common data structures & function declarations
26639+ *
26640+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26641+ */
26642+#ifndef __XEN_PCIFRONT_H__
26643+#define __XEN_PCIFRONT_H__
26644+
26645+#include <linux/spinlock.h>
26646+#include <linux/pci.h>
26647+#include <xen/xenbus.h>
26648+#include <xen/interface/io/pciif.h>
26649+#include <xen/pcifront.h>
26650+
26651+struct pci_bus_entry {
26652+ struct list_head list;
26653+ struct pci_bus *bus;
26654+};
26655+
26656+struct pcifront_device {
26657+ struct xenbus_device *xdev;
26658+ struct list_head root_buses;
26659+ spinlock_t dev_lock;
26660+
26661+ int evtchn;
26662+ int gnt_ref;
26663+
26664+ /* Lock this when doing any operations in sh_info */
26665+ spinlock_t sh_info_lock;
26666+ struct xen_pci_sharedinfo *sh_info;
26667+};
26668+
26669+int pcifront_connect(struct pcifront_device *pdev);
26670+void pcifront_disconnect(struct pcifront_device *pdev);
26671+
26672+int pcifront_scan_root(struct pcifront_device *pdev,
26673+ unsigned int domain, unsigned int bus);
26674+int pcifront_rescan_root(struct pcifront_device *pdev,
26675+ unsigned int domain, unsigned int bus);
26676+void pcifront_free_roots(struct pcifront_device *pdev);
26677+
26678+#endif /* __XEN_PCIFRONT_H__ */
26679Index: head-2008-11-25/drivers/xen/pcifront/xenbus.c
26680===================================================================
26681--- /dev/null 1970-01-01 00:00:00.000000000 +0000
26682+++ head-2008-11-25/drivers/xen/pcifront/xenbus.c 2008-07-21 11:00:33.000000000 +0200
26683@@ -0,0 +1,455 @@
26684+/*
26685+ * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn)
26686+ *
26687+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26688+ */
26689+#include <linux/module.h>
26690+#include <linux/init.h>
26691+#include <linux/mm.h>
26692+#include <xen/xenbus.h>
26693+#include <xen/gnttab.h>
26694+#include "pcifront.h"
26695+
26696+#ifndef __init_refok
26697+#define __init_refok
26698+#endif
26699+
26700+#define INVALID_GRANT_REF (0)
26701+#define INVALID_EVTCHN (-1)
26702+
26703+static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
26704+{
26705+ struct pcifront_device *pdev;
26706+
26707+ pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
26708+ if (pdev == NULL)
26709+ goto out;
26710+
26711+ pdev->sh_info =
26712+ (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
26713+ if (pdev->sh_info == NULL) {
26714+ kfree(pdev);
26715+ pdev = NULL;
26716+ goto out;
26717+ }
26718+ pdev->sh_info->flags = 0;
26719+
26720+ xdev->dev.driver_data = pdev;
26721+ pdev->xdev = xdev;
26722+
26723+ INIT_LIST_HEAD(&pdev->root_buses);
26724+
26725+ spin_lock_init(&pdev->dev_lock);
26726+ spin_lock_init(&pdev->sh_info_lock);
26727+
26728+ pdev->evtchn = INVALID_EVTCHN;
26729+ pdev->gnt_ref = INVALID_GRANT_REF;
26730+
26731+ dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
26732+ pdev, pdev->sh_info);
26733+ out:
26734+ return pdev;
26735+}
26736+
26737+static void free_pdev(struct pcifront_device *pdev)
26738+{
26739+ dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
26740+
26741+ pcifront_free_roots(pdev);
26742+
26743+ if (pdev->evtchn != INVALID_EVTCHN)
26744+ xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
26745+
26746+ if (pdev->gnt_ref != INVALID_GRANT_REF)
26747+ gnttab_end_foreign_access(pdev->gnt_ref,
26748+ (unsigned long)pdev->sh_info);
26749+
26750+ pdev->xdev->dev.driver_data = NULL;
26751+
26752+ kfree(pdev);
26753+}
26754+
26755+static int pcifront_publish_info(struct pcifront_device *pdev)
26756+{
26757+ int err = 0;
26758+ struct xenbus_transaction trans;
26759+
26760+ err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
26761+ if (err < 0)
26762+ goto out;
26763+
26764+ pdev->gnt_ref = err;
26765+
26766+ err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
26767+ if (err)
26768+ goto out;
26769+
26770+ do_publish:
26771+ err = xenbus_transaction_start(&trans);
26772+ if (err) {
26773+ xenbus_dev_fatal(pdev->xdev, err,
26774+ "Error writing configuration for backend "
26775+ "(start transaction)");
26776+ goto out;
26777+ }
26778+
26779+ err = xenbus_printf(trans, pdev->xdev->nodename,
26780+ "pci-op-ref", "%u", pdev->gnt_ref);
26781+ if (!err)
26782+ err = xenbus_printf(trans, pdev->xdev->nodename,
26783+ "event-channel", "%u", pdev->evtchn);
26784+ if (!err)
26785+ err = xenbus_printf(trans, pdev->xdev->nodename,
26786+ "magic", XEN_PCI_MAGIC);
26787+
26788+ if (err) {
26789+ xenbus_transaction_end(trans, 1);
26790+ xenbus_dev_fatal(pdev->xdev, err,
26791+ "Error writing configuration for backend");
26792+ goto out;
26793+ } else {
26794+ err = xenbus_transaction_end(trans, 0);
26795+ if (err == -EAGAIN)
26796+ goto do_publish;
26797+ else if (err) {
26798+ xenbus_dev_fatal(pdev->xdev, err,
26799+ "Error completing transaction "
26800+ "for backend");
26801+ goto out;
26802+ }
26803+ }
26804+
26805+ xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
26806+
26807+ dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
26808+
26809+ out:
26810+ return err;
26811+}
26812+
26813+static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
26814+{
26815+ int err = -EFAULT;
26816+ int i, num_roots, len;
26817+ char str[64];
26818+ unsigned int domain, bus;
26819+
26820+ spin_lock(&pdev->dev_lock);
26821+
26822+ /* Only connect once */
26823+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
26824+ XenbusStateInitialised)
26825+ goto out;
26826+
26827+ err = pcifront_connect(pdev);
26828+ if (err) {
26829+ xenbus_dev_fatal(pdev->xdev, err,
26830+ "Error connecting PCI Frontend");
26831+ goto out;
26832+ }
26833+
26834+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26835+ "root_num", "%d", &num_roots);
26836+ if (err == -ENOENT) {
26837+ xenbus_dev_error(pdev->xdev, err,
26838+ "No PCI Roots found, trying 0000:00");
26839+ err = pcifront_scan_root(pdev, 0, 0);
26840+ num_roots = 0;
26841+ } else if (err != 1) {
26842+ if (err == 0)
26843+ err = -EINVAL;
26844+ xenbus_dev_fatal(pdev->xdev, err,
26845+ "Error reading number of PCI roots");
26846+ goto out;
26847+ }
26848+
26849+ for (i = 0; i < num_roots; i++) {
26850+ len = snprintf(str, sizeof(str), "root-%d", i);
26851+ if (unlikely(len >= (sizeof(str) - 1))) {
26852+ err = -ENOMEM;
26853+ goto out;
26854+ }
26855+
26856+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
26857+ "%x:%x", &domain, &bus);
26858+ if (err != 2) {
26859+ if (err >= 0)
26860+ err = -EINVAL;
26861+ xenbus_dev_fatal(pdev->xdev, err,
26862+ "Error reading PCI root %d", i);
26863+ goto out;
26864+ }
26865+
26866+ err = pcifront_scan_root(pdev, domain, bus);
26867+ if (err) {
26868+ xenbus_dev_fatal(pdev->xdev, err,
26869+ "Error scanning PCI root %04x:%02x",
26870+ domain, bus);
26871+ goto out;
26872+ }
26873+ }
26874+
26875+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
26876+ if (err)
26877+ goto out;
26878+
26879+ out:
26880+ spin_unlock(&pdev->dev_lock);
26881+ return err;
26882+}
26883+
26884+static int pcifront_try_disconnect(struct pcifront_device *pdev)
26885+{
26886+ int err = 0;
26887+ enum xenbus_state prev_state;
26888+
26889+ spin_lock(&pdev->dev_lock);
26890+
26891+ prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
26892+
26893+ if (prev_state >= XenbusStateClosing)
26894+ goto out;
26895+
26896+ if(prev_state == XenbusStateConnected) {
26897+ pcifront_free_roots(pdev);
26898+ pcifront_disconnect(pdev);
26899+ }
26900+
26901+ err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
26902+
26903+ out:
26904+ spin_unlock(&pdev->dev_lock);
26905+
26906+ return err;
26907+}
26908+
26909+static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
26910+{
26911+ int err = -EFAULT;
26912+ int i, num_roots, len;
26913+ unsigned int domain, bus;
26914+ char str[64];
26915+
26916+ spin_lock(&pdev->dev_lock);
26917+
26918+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
26919+ XenbusStateReconfiguring)
26920+ goto out;
26921+
26922+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26923+ "root_num", "%d", &num_roots);
26924+ if (err == -ENOENT) {
26925+ xenbus_dev_error(pdev->xdev, err,
26926+ "No PCI Roots found, trying 0000:00");
26927+ err = pcifront_rescan_root(pdev, 0, 0);
26928+ num_roots = 0;
26929+ } else if (err != 1) {
26930+ if (err == 0)
26931+ err = -EINVAL;
26932+ xenbus_dev_fatal(pdev->xdev, err,
26933+ "Error reading number of PCI roots");
26934+ goto out;
26935+ }
26936+
26937+ for (i = 0; i < num_roots; i++) {
26938+ len = snprintf(str, sizeof(str), "root-%d", i);
26939+ if (unlikely(len >= (sizeof(str) - 1))) {
26940+ err = -ENOMEM;
26941+ goto out;
26942+ }
26943+
26944+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
26945+ "%x:%x", &domain, &bus);
26946+ if (err != 2) {
26947+ if (err >= 0)
26948+ err = -EINVAL;
26949+ xenbus_dev_fatal(pdev->xdev, err,
26950+ "Error reading PCI root %d", i);
26951+ goto out;
26952+ }
26953+
26954+ err = pcifront_rescan_root(pdev, domain, bus);
26955+ if (err) {
26956+ xenbus_dev_fatal(pdev->xdev, err,
26957+ "Error scanning PCI root %04x:%02x",
26958+ domain, bus);
26959+ goto out;
26960+ }
26961+ }
26962+
26963+ xenbus_switch_state(pdev->xdev, XenbusStateConnected);
26964+
26965+ out:
26966+ spin_unlock(&pdev->dev_lock);
26967+ return err;
26968+}
26969+
26970+static int pcifront_detach_devices(struct pcifront_device *pdev)
26971+{
26972+ int err = 0;
26973+ int i, num_devs;
26974+ unsigned int domain, bus, slot, func;
26975+ struct pci_bus *pci_bus;
26976+ struct pci_dev *pci_dev;
26977+ char str[64];
26978+
26979+ spin_lock(&pdev->dev_lock);
26980+
26981+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
26982+ XenbusStateConnected)
26983+ goto out;
26984+
26985+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
26986+ &num_devs);
26987+ if (err != 1) {
26988+ if (err >= 0)
26989+ err = -EINVAL;
26990+ xenbus_dev_fatal(pdev->xdev, err,
26991+ "Error reading number of PCI devices");
26992+ goto out;
26993+ }
26994+
26995+ /* Find devices being detached and remove them. */
26996+ for (i = 0; i < num_devs; i++) {
26997+ int l, state;
26998+ l = snprintf(str, sizeof(str), "state-%d", i);
26999+ if (unlikely(l >= (sizeof(str) - 1))) {
27000+ err = -ENOMEM;
27001+ goto out;
27002+ }
27003+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
27004+ &state);
27005+ if (err != 1)
27006+ state = XenbusStateUnknown;
27007+
27008+ if (state != XenbusStateClosing)
27009+ continue;
27010+
27011+ /* Remove device. */
27012+ l = snprintf(str, sizeof(str), "vdev-%d", i);
27013+ if (unlikely(l >= (sizeof(str) - 1))) {
27014+ err = -ENOMEM;
27015+ goto out;
27016+ }
27017+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
27018+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
27019+ if (err != 4) {
27020+ if (err >= 0)
27021+ err = -EINVAL;
27022+ xenbus_dev_fatal(pdev->xdev, err,
27023+ "Error reading PCI device %d", i);
27024+ goto out;
27025+ }
27026+
27027+ pci_bus = pci_find_bus(domain, bus);
27028+ if(!pci_bus) {
27029+ dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
27030+ domain, bus);
27031+ continue;
27032+ }
27033+ pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
27034+ if(!pci_dev) {
27035+ dev_dbg(&pdev->xdev->dev,
27036+ "Cannot get PCI device %04x:%02x:%02x.%02x\n",
27037+ domain, bus, slot, func);
27038+ continue;
27039+ }
27040+ pci_remove_bus_device(pci_dev);
27041+ pci_dev_put(pci_dev);
27042+
27043+ dev_dbg(&pdev->xdev->dev,
27044+ "PCI device %04x:%02x:%02x.%02x removed.\n",
27045+ domain, bus, slot, func);
27046+ }
27047+
27048+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
27049+
27050+ out:
27051+ spin_unlock(&pdev->dev_lock);
27052+ return err;
27053+}
27054+
27055+static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
27056+ enum xenbus_state be_state)
27057+{
27058+ struct pcifront_device *pdev = xdev->dev.driver_data;
27059+
27060+ switch (be_state) {
27061+ case XenbusStateUnknown:
27062+ case XenbusStateInitialising:
27063+ case XenbusStateInitWait:
27064+ case XenbusStateInitialised:
27065+ case XenbusStateClosed:
27066+ break;
27067+
27068+ case XenbusStateConnected:
27069+ pcifront_try_connect(pdev);
27070+ break;
27071+
27072+ case XenbusStateClosing:
27073+ dev_warn(&xdev->dev, "backend going away!\n");
27074+ pcifront_try_disconnect(pdev);
27075+ break;
27076+
27077+ case XenbusStateReconfiguring:
27078+ pcifront_detach_devices(pdev);
27079+ break;
27080+
27081+ case XenbusStateReconfigured:
27082+ pcifront_attach_devices(pdev);
27083+ break;
27084+ }
27085+}
27086+
27087+static int pcifront_xenbus_probe(struct xenbus_device *xdev,
27088+ const struct xenbus_device_id *id)
27089+{
27090+ int err = 0;
27091+ struct pcifront_device *pdev = alloc_pdev(xdev);
27092+
27093+ if (pdev == NULL) {
27094+ err = -ENOMEM;
27095+ xenbus_dev_fatal(xdev, err,
27096+ "Error allocating pcifront_device struct");
27097+ goto out;
27098+ }
27099+
27100+ err = pcifront_publish_info(pdev);
27101+
27102+ out:
27103+ return err;
27104+}
27105+
27106+static int pcifront_xenbus_remove(struct xenbus_device *xdev)
27107+{
27108+ if (xdev->dev.driver_data)
27109+ free_pdev(xdev->dev.driver_data);
27110+
27111+ return 0;
27112+}
27113+
27114+static const struct xenbus_device_id xenpci_ids[] = {
27115+ {"pci"},
27116+ {{0}},
27117+};
27118+MODULE_ALIAS("xen:pci");
27119+
27120+static struct xenbus_driver xenbus_pcifront_driver = {
27121+ .name = "pcifront",
27122+ .owner = THIS_MODULE,
27123+ .ids = xenpci_ids,
27124+ .probe = pcifront_xenbus_probe,
27125+ .remove = pcifront_xenbus_remove,
27126+ .otherend_changed = pcifront_backend_changed,
27127+};
27128+
27129+static int __init pcifront_init(void)
27130+{
27131+ if (!is_running_on_xen())
27132+ return -ENODEV;
27133+
27134+ return xenbus_register_frontend(&xenbus_pcifront_driver);
27135+}
27136+
27137+/* Initialize after the Xen PCI Frontend Stub is initialized */
27138+subsys_initcall(pcifront_init);
27139Index: head-2008-11-25/drivers/xen/privcmd/Makefile
27140===================================================================
27141--- /dev/null 1970-01-01 00:00:00.000000000 +0000
27142+++ head-2008-11-25/drivers/xen/privcmd/Makefile 2007-07-10 09:42:30.000000000 +0200
27143@@ -0,0 +1,3 @@
27144+
27145+obj-y += privcmd.o
27146+obj-$(CONFIG_COMPAT) += compat_privcmd.o
27147Index: head-2008-11-25/drivers/xen/privcmd/compat_privcmd.c
27148===================================================================
27149--- /dev/null 1970-01-01 00:00:00.000000000 +0000
27150+++ head-2008-11-25/drivers/xen/privcmd/compat_privcmd.c 2007-07-10 09:42:30.000000000 +0200
27151@@ -0,0 +1,73 @@
27152+/*
27153+ * This program is free software; you can redistribute it and/or modify
27154+ * it under the terms of the GNU General Public License as published by
27155+ * the Free Software Foundation; either version 2 of the License, or
27156+ * (at your option) any later version.
27157+ *
27158+ * This program is distributed in the hope that it will be useful,
27159+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27160+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27161+ * GNU General Public License for more details.
27162+ *
27163+ * You should have received a copy of the GNU General Public License
27164+ * along with this program; if not, write to the Free Software
27165+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27166+ *
27167+ * Copyright (C) IBM Corp. 2006
27168+ *
27169+ * Authors: Jimi Xenidis <jimix@watson.ibm.com>
27170+ */
27171+
27172+#include <linux/config.h>
27173+#include <linux/compat.h>
27174+#include <linux/ioctl.h>
27175+#include <linux/syscalls.h>
27176+#include <asm/hypervisor.h>
27177+#include <asm/uaccess.h>
27178+#include <xen/public/privcmd.h>
27179+#include <xen/compat_ioctl.h>
27180+
27181+int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg)
27182+{
27183+ int ret;
27184+
27185+ switch (cmd) {
27186+ case IOCTL_PRIVCMD_MMAP_32: {
27187+ struct privcmd_mmap *p;
27188+ struct privcmd_mmap_32 *p32;
27189+ struct privcmd_mmap_32 n32;
27190+
27191+ p32 = compat_ptr(arg);
27192+ p = compat_alloc_user_space(sizeof(*p));
27193+ if (copy_from_user(&n32, p32, sizeof(n32)) ||
27194+ put_user(n32.num, &p->num) ||
27195+ put_user(n32.dom, &p->dom) ||
27196+ put_user(compat_ptr(n32.entry), &p->entry))
27197+ return -EFAULT;
27198+
27199+ ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAP, (unsigned long)p);
27200+ }
27201+ break;
27202+ case IOCTL_PRIVCMD_MMAPBATCH_32: {
27203+ struct privcmd_mmapbatch *p;
27204+ struct privcmd_mmapbatch_32 *p32;
27205+ struct privcmd_mmapbatch_32 n32;
27206+
27207+ p32 = compat_ptr(arg);
27208+ p = compat_alloc_user_space(sizeof(*p));
27209+ if (copy_from_user(&n32, p32, sizeof(n32)) ||
27210+ put_user(n32.num, &p->num) ||
27211+ put_user(n32.dom, &p->dom) ||
27212+ put_user(n32.addr, &p->addr) ||
27213+ put_user(compat_ptr(n32.arr), &p->arr))
27214+ return -EFAULT;
27215+
27216+ ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, (unsigned long)p);
27217+ }
27218+ break;
27219+ default:
27220+ ret = -EINVAL;
27221+ break;
27222+ }
27223+ return ret;
27224+}
27225Index: head-2008-11-25/drivers/xen/privcmd/privcmd.c
27226===================================================================
27227--- /dev/null 1970-01-01 00:00:00.000000000 +0000
27228+++ head-2008-11-25/drivers/xen/privcmd/privcmd.c 2008-07-21 11:00:33.000000000 +0200
27229@@ -0,0 +1,356 @@
27230+/******************************************************************************
27231+ * privcmd.c
27232+ *
27233+ * Interface to privileged domain-0 commands.
27234+ *
27235+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
27236+ */
27237+
27238+#include <linux/kernel.h>
27239+#include <linux/sched.h>
27240+#include <linux/slab.h>
27241+#include <linux/string.h>
27242+#include <linux/errno.h>
27243+#include <linux/mm.h>
27244+#include <linux/mman.h>
27245+#include <linux/swap.h>
27246+#include <linux/smp_lock.h>
27247+#include <linux/highmem.h>
27248+#include <linux/pagemap.h>
27249+#include <linux/seq_file.h>
27250+#include <asm/hypervisor.h>
27251+
27252+#include <asm/pgalloc.h>
27253+#include <asm/pgtable.h>
27254+#include <asm/uaccess.h>
27255+#include <asm/tlb.h>
27256+#include <asm/hypervisor.h>
27257+#include <xen/public/privcmd.h>
27258+#include <xen/interface/xen.h>
27259+#include <xen/xen_proc.h>
27260+#include <xen/features.h>
27261+
27262+static struct proc_dir_entry *privcmd_intf;
27263+static struct proc_dir_entry *capabilities_intf;
27264+
27265+#ifndef HAVE_ARCH_PRIVCMD_MMAP
27266+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
27267+#endif
27268+
27269+static long privcmd_ioctl(struct file *file,
27270+ unsigned int cmd, unsigned long data)
27271+{
27272+ int ret = -ENOSYS;
27273+ void __user *udata = (void __user *) data;
27274+
27275+ switch (cmd) {
27276+ case IOCTL_PRIVCMD_HYPERCALL: {
27277+ privcmd_hypercall_t hypercall;
27278+
27279+ if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
27280+ return -EFAULT;
27281+
27282+#if defined(__i386__)
27283+ if (hypercall.op >= (PAGE_SIZE >> 5))
27284+ break;
27285+ __asm__ __volatile__ (
27286+ "pushl %%ebx; pushl %%ecx; pushl %%edx; "
27287+ "pushl %%esi; pushl %%edi; "
27288+ "movl 8(%%eax),%%ebx ;"
27289+ "movl 16(%%eax),%%ecx ;"
27290+ "movl 24(%%eax),%%edx ;"
27291+ "movl 32(%%eax),%%esi ;"
27292+ "movl 40(%%eax),%%edi ;"
27293+ "movl (%%eax),%%eax ;"
27294+ "shll $5,%%eax ;"
27295+ "addl $hypercall_page,%%eax ;"
27296+ "call *%%eax ;"
27297+ "popl %%edi; popl %%esi; popl %%edx; "
27298+ "popl %%ecx; popl %%ebx"
27299+ : "=a" (ret) : "0" (&hypercall) : "memory" );
27300+#elif defined (__x86_64__)
27301+ if (hypercall.op < (PAGE_SIZE >> 5)) {
27302+ long ign1, ign2, ign3;
27303+ __asm__ __volatile__ (
27304+ "movq %8,%%r10; movq %9,%%r8;"
27305+ "shll $5,%%eax ;"
27306+ "addq $hypercall_page,%%rax ;"
27307+ "call *%%rax"
27308+ : "=a" (ret), "=D" (ign1),
27309+ "=S" (ign2), "=d" (ign3)
27310+ : "0" ((unsigned int)hypercall.op),
27311+ "1" (hypercall.arg[0]),
27312+ "2" (hypercall.arg[1]),
27313+ "3" (hypercall.arg[2]),
27314+ "g" (hypercall.arg[3]),
27315+ "g" (hypercall.arg[4])
27316+ : "r8", "r10", "memory" );
27317+ }
27318+#else
27319+ ret = privcmd_hypercall(&hypercall);
27320+#endif
27321+ }
27322+ break;
27323+
27324+ case IOCTL_PRIVCMD_MMAP: {
27325+#define MMAP_NR_PER_PAGE (int)((PAGE_SIZE-sizeof(struct list_head))/sizeof(privcmd_mmap_entry_t))
27326+ privcmd_mmap_t mmapcmd;
27327+ privcmd_mmap_entry_t *msg;
27328+ privcmd_mmap_entry_t __user *p;
27329+ struct mm_struct *mm = current->mm;
27330+ struct vm_area_struct *vma;
27331+ unsigned long va;
27332+ int i, rc;
27333+ LIST_HEAD(pagelist);
27334+ struct list_head *l,*l2;
27335+
27336+ if (!is_initial_xendomain())
27337+ return -EPERM;
27338+
27339+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
27340+ return -EFAULT;
27341+
27342+ p = mmapcmd.entry;
27343+ for (i = 0; i < mmapcmd.num;) {
27344+ int nr = min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
27345+
27346+ rc = -ENOMEM;
27347+ l = (struct list_head *) __get_free_page(GFP_KERNEL);
27348+ if (l == NULL)
27349+ goto mmap_out;
27350+
27351+ INIT_LIST_HEAD(l);
27352+ list_add_tail(l, &pagelist);
27353+ msg = (privcmd_mmap_entry_t*)(l + 1);
27354+
27355+ rc = -EFAULT;
27356+ if (copy_from_user(msg, p, nr*sizeof(*msg)))
27357+ goto mmap_out;
27358+ i += nr;
27359+ p += nr;
27360+ }
27361+
27362+ l = pagelist.next;
27363+ msg = (privcmd_mmap_entry_t*)(l + 1);
27364+
27365+ down_write(&mm->mmap_sem);
27366+
27367+ vma = find_vma(mm, msg->va);
27368+ rc = -EINVAL;
27369+ if (!vma || (msg->va != vma->vm_start) ||
27370+ !privcmd_enforce_singleshot_mapping(vma))
27371+ goto mmap_out;
27372+
27373+ va = vma->vm_start;
27374+
27375+ i = 0;
27376+ list_for_each(l, &pagelist) {
27377+ int nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
27378+
27379+ msg = (privcmd_mmap_entry_t*)(l + 1);
27380+ while (i<nr) {
27381+
27382+ /* Do not allow range to wrap the address space. */
27383+ rc = -EINVAL;
27384+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
27385+ ((unsigned long)(msg->npages << PAGE_SHIFT) >= -va))
27386+ goto mmap_out;
27387+
27388+ /* Range chunks must be contiguous in va space. */
27389+ if ((msg->va != va) ||
27390+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
27391+ goto mmap_out;
27392+
27393+ if ((rc = direct_remap_pfn_range(
27394+ vma,
27395+ msg->va & PAGE_MASK,
27396+ msg->mfn,
27397+ msg->npages << PAGE_SHIFT,
27398+ vma->vm_page_prot,
27399+ mmapcmd.dom)) < 0)
27400+ goto mmap_out;
27401+
27402+ va += msg->npages << PAGE_SHIFT;
27403+ msg++;
27404+ i++;
27405+ }
27406+ }
27407+
27408+ rc = 0;
27409+
27410+ mmap_out:
27411+ up_write(&mm->mmap_sem);
27412+ list_for_each_safe(l,l2,&pagelist)
27413+ free_page((unsigned long)l);
27414+ ret = rc;
27415+ }
27416+#undef MMAP_NR_PER_PAGE
27417+ break;
27418+
27419+ case IOCTL_PRIVCMD_MMAPBATCH: {
27420+#define MMAPBATCH_NR_PER_PAGE (unsigned long)((PAGE_SIZE-sizeof(struct list_head))/sizeof(unsigned long))
27421+ privcmd_mmapbatch_t m;
27422+ struct mm_struct *mm = current->mm;
27423+ struct vm_area_struct *vma;
27424+ xen_pfn_t __user *p;
27425+ unsigned long addr, *mfn, nr_pages;
27426+ int i;
27427+ LIST_HEAD(pagelist);
27428+ struct list_head *l, *l2;
27429+
27430+ if (!is_initial_xendomain())
27431+ return -EPERM;
27432+
27433+ if (copy_from_user(&m, udata, sizeof(m)))
27434+ return -EFAULT;
27435+
27436+ nr_pages = m.num;
27437+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
27438+ return -EINVAL;
27439+
27440+ p = m.arr;
27441+ for (i=0; i<nr_pages; ) {
27442+ int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
27443+
27444+ ret = -ENOMEM;
27445+ l = (struct list_head *)__get_free_page(GFP_KERNEL);
27446+ if (l == NULL)
27447+ goto mmapbatch_out;
27448+
27449+ INIT_LIST_HEAD(l);
27450+ list_add_tail(l, &pagelist);
27451+
27452+ mfn = (unsigned long*)(l + 1);
27453+ ret = -EFAULT;
27454+ if (copy_from_user(mfn, p, nr*sizeof(*mfn)))
27455+ goto mmapbatch_out;
27456+
27457+ i += nr; p+= nr;
27458+ }
27459+
27460+ down_write(&mm->mmap_sem);
27461+
27462+ vma = find_vma(mm, m.addr);
27463+ ret = -EINVAL;
27464+ if (!vma ||
27465+ (m.addr != vma->vm_start) ||
27466+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
27467+ !privcmd_enforce_singleshot_mapping(vma)) {
27468+ up_write(&mm->mmap_sem);
27469+ goto mmapbatch_out;
27470+ }
27471+
27472+ p = m.arr;
27473+ addr = m.addr;
27474+ i = 0;
27475+ ret = 0;
27476+ list_for_each(l, &pagelist) {
27477+ int nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
27478+ mfn = (unsigned long *)(l + 1);
27479+
27480+ while (i<nr) {
27481+ if(direct_remap_pfn_range(vma, addr & PAGE_MASK,
27482+ *mfn, PAGE_SIZE,
27483+ vma->vm_page_prot, m.dom) < 0) {
27484+ *mfn |= 0xf0000000U;
27485+ ret++;
27486+ }
27487+ mfn++; i++; addr += PAGE_SIZE;
27488+ }
27489+ }
27490+
27491+ up_write(&mm->mmap_sem);
27492+ if (ret > 0) {
27493+ p = m.arr;
27494+ i = 0;
27495+ ret = 0;
27496+ list_for_each(l, &pagelist) {
27497+ int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
27498+ mfn = (unsigned long *)(l + 1);
27499+ if (copy_to_user(p, mfn, nr*sizeof(*mfn)))
27500+ ret = -EFAULT;
27501+ i += nr; p += nr;
27502+ }
27503+ }
27504+ mmapbatch_out:
27505+ list_for_each_safe(l,l2,&pagelist)
27506+ free_page((unsigned long)l);
27507+#undef MMAPBATCH_NR_PER_PAGE
27508+ }
27509+ break;
27510+
27511+ default:
27512+ ret = -EINVAL;
27513+ break;
27514+ }
27515+
27516+ return ret;
27517+}
27518+
27519+#ifndef HAVE_ARCH_PRIVCMD_MMAP
27520+static struct page *privcmd_nopage(struct vm_area_struct *vma,
27521+ unsigned long address,
27522+ int *type)
27523+{
27524+ return NOPAGE_SIGBUS;
27525+}
27526+
27527+static struct vm_operations_struct privcmd_vm_ops = {
27528+ .nopage = privcmd_nopage
27529+};
27530+
27531+static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
27532+{
27533+ /* Unsupported for auto-translate guests. */
27534+ if (xen_feature(XENFEAT_auto_translated_physmap))
27535+ return -ENOSYS;
27536+
27537+ /* DONTCOPY is essential for Xen as copy_page_range is broken. */
27538+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
27539+ vma->vm_ops = &privcmd_vm_ops;
27540+ vma->vm_private_data = NULL;
27541+
27542+ return 0;
27543+}
27544+
27545+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
27546+{
27547+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
27548+}
27549+#endif
27550+
27551+static const struct file_operations privcmd_file_ops = {
27552+ .unlocked_ioctl = privcmd_ioctl,
27553+ .mmap = privcmd_mmap,
27554+};
27555+
27556+static int capabilities_read(char *page, char **start, off_t off,
27557+ int count, int *eof, void *data)
27558+{
27559+ int len = 0;
27560+ *page = 0;
27561+
27562+ if (is_initial_xendomain())
27563+ len = sprintf( page, "control_d\n" );
27564+
27565+ *eof = 1;
27566+ return len;
27567+}
27568+
27569+static int __init privcmd_init(void)
27570+{
27571+ if (!is_running_on_xen())
27572+ return -ENODEV;
27573+
27574+ privcmd_intf = create_xen_proc_entry("privcmd", 0400);
27575+ if (privcmd_intf != NULL)
27576+ privcmd_intf->proc_fops = &privcmd_file_ops;
27577+
27578+ capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
27579+ if (capabilities_intf != NULL)
27580+ capabilities_intf->read_proc = capabilities_read;
27581+
27582+ return 0;
27583+}
27584+
27585+__initcall(privcmd_init);
27586Index: head-2008-11-25/drivers/xen/scsiback/Makefile
27587===================================================================
27588--- /dev/null 1970-01-01 00:00:00.000000000 +0000
27589+++ head-2008-11-25/drivers/xen/scsiback/Makefile 2008-07-21 11:00:33.000000000 +0200
27590@@ -0,0 +1,4 @@
27591+obj-$(CONFIG_XEN_SCSI_BACKEND) := xen-scsibk.o
27592+
27593+xen-scsibk-y := interface.o scsiback.o xenbus.o translate.o emulate.o
27594+
27595Index: head-2008-11-25/drivers/xen/scsiback/common.h
27596===================================================================
27597--- /dev/null 1970-01-01 00:00:00.000000000 +0000
27598+++ head-2008-11-25/drivers/xen/scsiback/common.h 2008-07-21 11:00:33.000000000 +0200
27599@@ -0,0 +1,181 @@
27600+/*
27601+ * Copyright (c) 2008, FUJITSU Limited
27602+ *
27603+ * Based on the blkback driver code.
27604+ *
27605+ * This program is free software; you can redistribute it and/or
27606+ * modify it under the terms of the GNU General Public License version 2
27607+ * as published by the Free Software Foundation; or, when distributed
27608+ * separately from the Linux kernel or incorporated into other
27609+ * software packages, subject to the following license:
27610+ *
27611+ * Permission is hereby granted, free of charge, to any person obtaining a copy
27612+ * of this source file (the "Software"), to deal in the Software without
27613+ * restriction, including without limitation the rights to use, copy, modify,
27614+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
27615+ * and to permit persons to whom the Software is furnished to do so, subject to
27616+ * the following conditions:
27617+ *
27618+ * The above copyright notice and this permission notice shall be included in
27619+ * all copies or substantial portions of the Software.
27620+ *
27621+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27622+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27623+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27624+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27625+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27626+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27627+ * IN THE SOFTWARE.
27628+ */
27629+
27630+#ifndef __SCSIIF__BACKEND__COMMON_H__
27631+#define __SCSIIF__BACKEND__COMMON_H__
27632+
27633+#include <linux/version.h>
27634+#include <linux/module.h>
27635+#include <linux/interrupt.h>
27636+#include <linux/slab.h>
27637+#include <linux/vmalloc.h>
27638+#include <linux/wait.h>
27639+#include <linux/sched.h>
27640+#include <linux/kthread.h>
27641+#include <linux/blkdev.h>
27642+#include <linux/list.h>
27643+#include <linux/kthread.h>
27644+#include <scsi/scsi.h>
27645+#include <scsi/scsi_cmnd.h>
27646+#include <scsi/scsi_host.h>
27647+#include <scsi/scsi_device.h>
27648+#include <scsi/scsi_dbg.h>
27649+#include <scsi/scsi_eh.h>
27650+#include <asm/io.h>
27651+#include <asm/setup.h>
27652+#include <asm/pgalloc.h>
27653+#include <asm/delay.h>
27654+#include <xen/evtchn.h>
27655+#include <asm/hypervisor.h>
27656+#include <xen/gnttab.h>
27657+#include <xen/driver_util.h>
27658+#include <xen/xenbus.h>
27659+#include <xen/interface/io/ring.h>
27660+#include <xen/interface/grant_table.h>
27661+#include <xen/interface/io/vscsiif.h>
27662+
27663+
27664+#define DPRINTK(_f, _a...) \
27665+ pr_debug("(file=%s, line=%d) " _f, \
27666+ __FILE__ , __LINE__ , ## _a )
27667+
27668+struct ids_tuple {
27669+ unsigned int hst; /* host */
27670+ unsigned int chn; /* channel */
27671+ unsigned int tgt; /* target */
27672+ unsigned int lun; /* LUN */
27673+};
27674+
27675+struct v2p_entry {
27676+ struct ids_tuple v; /* translate from */
27677+ struct scsi_device *sdev; /* translate to */
27678+ struct list_head l;
27679+};
27680+
27681+struct vscsibk_info {
27682+ struct xenbus_device *dev;
27683+
27684+ domid_t domid;
27685+ unsigned int evtchn;
27686+ unsigned int irq;
27687+
27688+ struct vscsiif_back_ring ring;
27689+ struct vm_struct *ring_area;
27690+ grant_handle_t shmem_handle;
27691+ grant_ref_t shmem_ref;
27692+
27693+ spinlock_t ring_lock;
27694+ atomic_t nr_unreplied_reqs;
27695+
27696+ spinlock_t v2p_lock;
27697+ struct list_head v2p_entry_lists;
27698+
27699+ struct task_struct *kthread;
27700+ wait_queue_head_t waiting_to_free;
27701+ wait_queue_head_t wq;
27702+ unsigned int waiting_reqs;
27703+ struct page **mmap_pages;
27704+
27705+};
27706+
27707+typedef struct {
27708+ unsigned char act;
27709+ struct vscsibk_info *info;
27710+ struct scsi_device *sdev;
27711+
27712+ uint16_t rqid;
27713+
27714+ uint8_t nr_segments;
27715+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
27716+ uint8_t cmd_len;
27717+
27718+ uint8_t sc_data_direction;
27719+ uint16_t timeout_per_command;
27720+
27721+ uint32_t request_bufflen;
27722+ struct scatterlist *sgl;
27723+ grant_ref_t gref[VSCSIIF_SG_TABLESIZE];
27724+
27725+ int32_t rslt;
27726+ uint32_t resid;
27727+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
27728+
27729+ struct list_head free_list;
27730+} pending_req_t;
27731+
27732+
27733+
27734+#define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs))
27735+#define scsiback_put(_b) \
27736+ do { \
27737+ if (atomic_dec_and_test(&(_b)->nr_unreplied_reqs)) \
27738+ wake_up(&(_b)->waiting_to_free);\
27739+ } while (0)
27740+
27741+#define VSCSIIF_TIMEOUT (900*HZ)
27742+
27743+
27744+irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
27745+int scsiback_init_sring(struct vscsibk_info *info,
27746+ unsigned long ring_ref, unsigned int evtchn);
27747+int scsiback_schedule(void *data);
27748+
27749+
27750+struct vscsibk_info *vscsibk_info_alloc(domid_t domid);
27751+void scsiback_free(struct vscsibk_info *info);
27752+void scsiback_disconnect(struct vscsibk_info *info);
27753+int __init scsiback_interface_init(void);
27754+void scsiback_interface_exit(void);
27755+int scsiback_xenbus_init(void);
27756+void scsiback_xenbus_unregister(void);
27757+
27758+void scsiback_init_translation_table(struct vscsibk_info *info);
27759+
27760+int scsiback_add_translation_entry(struct vscsibk_info *info,
27761+ struct scsi_device *sdev, struct ids_tuple *v);
27762+
27763+int scsiback_del_translation_entry(struct vscsibk_info *info,
27764+ struct ids_tuple *v);
27765+struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
27766+ struct ids_tuple *v);
27767+void scsiback_release_translation_entry(struct vscsibk_info *info);
27768+
27769+
27770+void scsiback_cmd_exec(pending_req_t *pending_req);
27771+void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
27772+ uint32_t resid, pending_req_t *pending_req);
27773+void scsiback_fast_flush_area(pending_req_t *req);
27774+
27775+void scsiback_rsp_emulation(pending_req_t *pending_req);
27776+void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req);
27777+void scsiback_emulation_init(void);
27778+
27779+
27780+#endif /* __SCSIIF__BACKEND__COMMON_H__ */
27781Index: head-2008-11-25/drivers/xen/scsiback/emulate.c
27782===================================================================
27783--- /dev/null 1970-01-01 00:00:00.000000000 +0000
27784+++ head-2008-11-25/drivers/xen/scsiback/emulate.c 2008-08-07 12:44:36.000000000 +0200
27785@@ -0,0 +1,454 @@
27786+/*
27787+ * Xen SCSI backend driver
27788+ *
27789+ * Copyright (c) 2008, FUJITSU Limited
27790+ *
27791+ * This program is free software; you can redistribute it and/or
27792+ * modify it under the terms of the GNU General Public License version 2
27793+ * as published by the Free Software Foundation; or, when distributed
27794+ * separately from the Linux kernel or incorporated into other
27795+ * software packages, subject to the following license:
27796+ *
27797+ * Permission is hereby granted, free of charge, to any person obtaining a copy
27798+ * of this source file (the "Software"), to deal in the Software without
27799+ * restriction, including without limitation the rights to use, copy, modify,
27800+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
27801+ * and to permit persons to whom the Software is furnished to do so, subject to
27802+ * the following conditions:
27803+ *
27804+ * The above copyright notice and this permission notice shall be included in
27805+ * all copies or substantial portions of the Software.
27806+ *
27807+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27808+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27809+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27810+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27811+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27812+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27813+ * IN THE SOFTWARE.
27814+ */
27815+
27816+#include <scsi/scsi.h>
27817+#include <scsi/scsi_cmnd.h>
27818+#include <scsi/scsi_device.h>
27819+#include "common.h"
27820+
27821+/* Following SCSI commands are not defined in scsi/scsi.h */
27822+#define EXTENDED_COPY 0x83 /* EXTENDED COPY command */
27823+#define REPORT_ALIASES 0xa3 /* REPORT ALIASES command */
27824+#define CHANGE_ALIASES 0xa4 /* CHANGE ALIASES command */
27825+#define SET_PRIORITY 0xa4 /* SET PRIORITY command */
27826+
27827+
27828+/*
27829+ The bitmap in order to control emulation.
27830+ (Bit 3 to 7 are reserved for future use.)
27831+*/
27832+#define VSCSIIF_NEED_CMD_EXEC 0x01 /* If this bit is set, cmd exec */
27833+ /* is required. */
27834+#define VSCSIIF_NEED_EMULATE_REQBUF 0x02 /* If this bit is set, need */
27835+ /* emulation reqest buff before */
27836+ /* cmd exec. */
27837+#define VSCSIIF_NEED_EMULATE_RSPBUF 0x04 /* If this bit is set, need */
27838+ /* emulation resp buff after */
27839+ /* cmd exec. */
27840+
27841+/* Additional Sense Code (ASC) used */
27842+#define NO_ADDITIONAL_SENSE 0x0
27843+#define LOGICAL_UNIT_NOT_READY 0x4
27844+#define UNRECOVERED_READ_ERR 0x11
27845+#define PARAMETER_LIST_LENGTH_ERR 0x1a
27846+#define INVALID_OPCODE 0x20
27847+#define ADDR_OUT_OF_RANGE 0x21
27848+#define INVALID_FIELD_IN_CDB 0x24
27849+#define INVALID_FIELD_IN_PARAM_LIST 0x26
27850+#define POWERON_RESET 0x29
27851+#define SAVING_PARAMS_UNSUP 0x39
27852+#define THRESHOLD_EXCEEDED 0x5d
27853+#define LOW_POWER_COND_ON 0x5e
27854+
27855+
27856+
27857+/* Number os SCSI op_code */
27858+#define VSCSI_MAX_SCSI_OP_CODE 256
27859+static unsigned char bitmap[VSCSI_MAX_SCSI_OP_CODE];
27860+
27861+
27862+
27863+/*
27864+ Emulation routines for each SCSI op_code.
27865+*/
27866+static void (*pre_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
27867+static void (*post_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
27868+
27869+
27870+static const int check_condition_result =
27871+ (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
27872+
27873+static void scsiback_mk_sense_buffer(uint8_t *data, uint8_t key,
27874+ uint8_t asc, uint8_t asq)
27875+{
27876+ data[0] = 0x70; /* fixed, current */
27877+ data[2] = key;
27878+ data[7] = 0xa; /* implies 18 byte sense buffer */
27879+ data[12] = asc;
27880+ data[13] = asq;
27881+}
27882+
27883+static void resp_not_supported_cmd(pending_req_t *pending_req, void *data)
27884+{
27885+ scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
27886+ INVALID_OPCODE, 0);
27887+ pending_req->resid = 0;
27888+ pending_req->rslt = check_condition_result;
27889+}
27890+
27891+
27892+static int __copy_to_sg(struct scatterlist *sg, unsigned int nr_sg,
27893+ void *buf, unsigned int buflen)
27894+{
27895+ void *from = buf;
27896+ void *to;
27897+ unsigned int from_rest = buflen;
27898+ unsigned int to_capa;
27899+ unsigned int copy_size = 0;
27900+ unsigned int i;
27901+ unsigned long pfn;
27902+
27903+ for (i = 0; i < nr_sg; i++) {
27904+ if (sg->page == NULL) {
27905+ printk(KERN_WARNING "%s: inconsistent length field in "
27906+ "scatterlist\n", __FUNCTION__);
27907+ return -ENOMEM;
27908+ }
27909+
27910+ to_capa = sg->length;
27911+ copy_size = min_t(unsigned int, to_capa, from_rest);
27912+
27913+ pfn = page_to_pfn(sg->page);
27914+ to = pfn_to_kaddr(pfn) + (sg->offset);
27915+ memcpy(to, from, copy_size);
27916+
27917+ from_rest -= copy_size;
27918+ if (from_rest == 0) {
27919+ return 0;
27920+ }
27921+
27922+ sg++;
27923+ from += copy_size;
27924+ }
27925+
27926+ printk(KERN_WARNING "%s: no space in scatterlist\n",
27927+ __FUNCTION__);
27928+ return -ENOMEM;
27929+}
27930+
27931+static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg,
27932+ void *buf, unsigned int buflen)
27933+{
27934+ void *from;
27935+ void *to = buf;
27936+ unsigned int from_rest;
27937+ unsigned int to_capa = buflen;
27938+ unsigned int copy_size;
27939+ unsigned int i;
27940+ unsigned long pfn;
27941+
27942+ for (i = 0; i < nr_sg; i++) {
27943+ if (sg->page == NULL) {
27944+ printk(KERN_WARNING "%s: inconsistent length field in "
27945+ "scatterlist\n", __FUNCTION__);
27946+ return -ENOMEM;
27947+ }
27948+
27949+ from_rest = sg->length;
27950+ if ((from_rest > 0) && (to_capa < from_rest)) {
27951+ printk(KERN_WARNING
27952+ "%s: no space in destination buffer\n",
27953+ __FUNCTION__);
27954+ return -ENOMEM;
27955+ }
27956+ copy_size = from_rest;
27957+
27958+ pfn = page_to_pfn(sg->page);
27959+ from = pfn_to_kaddr(pfn) + (sg->offset);
27960+ memcpy(to, from, copy_size);
27961+
27962+ to_capa -= copy_size;
27963+
27964+ sg++;
27965+ to += copy_size;
27966+ }
27967+
27968+ return 0;
27969+}
27970+
27971+static int __nr_luns_under_host(struct vscsibk_info *info)
27972+{
27973+ struct v2p_entry *entry;
27974+ struct list_head *head = &(info->v2p_entry_lists);
27975+ unsigned long flags;
27976+ int lun_cnt = 0;
27977+
27978+ spin_lock_irqsave(&info->v2p_lock, flags);
27979+ list_for_each_entry(entry, head, l) {
27980+ lun_cnt++;
27981+ }
27982+ spin_unlock_irqrestore(&info->v2p_lock, flags);
27983+
27984+ return (lun_cnt);
27985+}
27986+
27987+
27988+/* REPORT LUNS Define*/
27989+#define VSCSI_REPORT_LUNS_HEADER 8
27990+#define VSCSI_REPORT_LUNS_RETRY 3
27991+
27992+/* quoted scsi_debug.c/resp_report_luns() */
27993+static void __report_luns(pending_req_t *pending_req, void *data)
27994+{
27995+ struct vscsibk_info *info = pending_req->info;
27996+ unsigned int channel = pending_req->sdev->channel;
27997+ unsigned int target = pending_req->sdev->id;
27998+ unsigned int nr_seg = pending_req->nr_segments;
27999+ unsigned char *cmd = (unsigned char *)pending_req->cmnd;
28000+
28001+ unsigned char *buff = NULL;
28002+ unsigned char alloc_len;
28003+ unsigned int alloc_luns = 0;
28004+ unsigned int req_bufflen = 0;
28005+ unsigned int actual_len = 0;
28006+ unsigned int retry_cnt = 0;
28007+ int select_report = (int)cmd[2];
28008+ int i, lun_cnt = 0, lun, upper, err = 0;
28009+
28010+ struct v2p_entry *entry;
28011+ struct list_head *head = &(info->v2p_entry_lists);
28012+ unsigned long flags;
28013+
28014+ struct scsi_lun *one_lun;
28015+
28016+ req_bufflen = cmd[9] + (cmd[8] << 8) + (cmd[7] << 16) + (cmd[6] << 24);
28017+ if ((req_bufflen < 4) || (select_report != 0))
28018+ goto fail;
28019+
28020+ alloc_luns = __nr_luns_under_host(info);
28021+ alloc_len = sizeof(struct scsi_lun) * alloc_luns
28022+ + VSCSI_REPORT_LUNS_HEADER;
28023+retry:
28024+ if ((buff = kmalloc(alloc_len, GFP_KERNEL)) == NULL) {
28025+ printk(KERN_ERR "scsiback:%s kmalloc err\n", __FUNCTION__);
28026+ goto fail;
28027+ }
28028+
28029+ memset(buff, 0, alloc_len);
28030+
28031+ one_lun = (struct scsi_lun *) &buff[8];
28032+ spin_lock_irqsave(&info->v2p_lock, flags);
28033+ list_for_each_entry(entry, head, l) {
28034+ if ((entry->v.chn == channel) &&
28035+ (entry->v.tgt == target)) {
28036+
28037+ /* check overflow */
28038+ if (lun_cnt >= alloc_luns) {
28039+ spin_unlock_irqrestore(&info->v2p_lock,
28040+ flags);
28041+
28042+ if (retry_cnt < VSCSI_REPORT_LUNS_RETRY) {
28043+ retry_cnt++;
28044+ if (buff)
28045+ kfree(buff);
28046+ goto retry;
28047+ }
28048+
28049+ goto fail;
28050+ }
28051+
28052+ lun = entry->v.lun;
28053+ upper = (lun >> 8) & 0x3f;
28054+ if (upper)
28055+ one_lun[lun_cnt].scsi_lun[0] = upper;
28056+ one_lun[lun_cnt].scsi_lun[1] = lun & 0xff;
28057+ lun_cnt++;
28058+ }
28059+ }
28060+
28061+ spin_unlock_irqrestore(&info->v2p_lock, flags);
28062+
28063+ buff[2] = ((sizeof(struct scsi_lun) * lun_cnt) >> 8) & 0xff;
28064+ buff[3] = (sizeof(struct scsi_lun) * lun_cnt) & 0xff;
28065+
28066+ actual_len = lun_cnt * sizeof(struct scsi_lun)
28067+ + VSCSI_REPORT_LUNS_HEADER;
28068+ req_bufflen = 0;
28069+ for (i = 0; i < nr_seg; i++)
28070+ req_bufflen += pending_req->sgl[i].length;
28071+
28072+ err = __copy_to_sg(pending_req->sgl, nr_seg, buff,
28073+ min(req_bufflen, actual_len));
28074+ if (err)
28075+ goto fail;
28076+
28077+ memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
28078+ pending_req->rslt = 0x00;
28079+ pending_req->resid = req_bufflen - min(req_bufflen, actual_len);
28080+
28081+ kfree(buff);
28082+ return;
28083+
28084+fail:
28085+ scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
28086+ INVALID_FIELD_IN_CDB, 0);
28087+ pending_req->rslt = check_condition_result;
28088+ pending_req->resid = 0;
28089+ if (buff)
28090+ kfree(buff);
28091+ return;
28092+}
28093+
28094+
28095+
28096+int __pre_do_emulation(pending_req_t *pending_req, void *data)
28097+{
28098+ uint8_t op_code = pending_req->cmnd[0];
28099+
28100+ if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_REQBUF) &&
28101+ pre_function[op_code] != NULL) {
28102+ pre_function[op_code](pending_req, data);
28103+ }
28104+
28105+ /*
28106+ 0: no need for native driver call, so should return immediately.
28107+ 1: non emulation or should call native driver
28108+ after modifing the request buffer.
28109+ */
28110+ return !!(bitmap[op_code] & VSCSIIF_NEED_CMD_EXEC);
28111+}
28112+
28113+void scsiback_rsp_emulation(pending_req_t *pending_req)
28114+{
28115+ uint8_t op_code = pending_req->cmnd[0];
28116+
28117+ if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_RSPBUF) &&
28118+ post_function[op_code] != NULL) {
28119+ post_function[op_code](pending_req, NULL);
28120+ }
28121+
28122+ return;
28123+}
28124+
28125+
28126+void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req)
28127+{
28128+ if (__pre_do_emulation(pending_req, NULL)) {
28129+ scsiback_cmd_exec(pending_req);
28130+ }
28131+ else {
28132+ scsiback_fast_flush_area(pending_req);
28133+ scsiback_do_resp_with_sense(pending_req->sense_buffer,
28134+ pending_req->rslt, pending_req->resid, pending_req);
28135+ }
28136+}
28137+
28138+
28139+/*
28140+ Following are not customizable functions.
28141+*/
28142+void scsiback_emulation_init(void)
28143+{
28144+ int i;
28145+
28146+ /* Initialize to default state */
28147+ for (i = 0; i < VSCSI_MAX_SCSI_OP_CODE; i++) {
28148+ bitmap[i] = (VSCSIIF_NEED_EMULATE_REQBUF |
28149+ VSCSIIF_NEED_EMULATE_RSPBUF);
28150+ pre_function[i] = resp_not_supported_cmd;
28151+ post_function[i] = NULL;
28152+ /* means,
28153+ - no need for pre-emulation
28154+ - no need for post-emulation
28155+ - call native driver
28156+ */
28157+ }
28158+
28159+ /*
28160+ Register appropriate functions below as you need.
28161+ (See scsi/scsi.h for definition of SCSI op_code.)
28162+ */
28163+
28164+ /*
28165+ This command is Non emulation.
28166+ */
28167+ bitmap[TEST_UNIT_READY] = VSCSIIF_NEED_CMD_EXEC;
28168+ pre_function[TEST_UNIT_READY] = NULL;
28169+ post_function[TEST_UNIT_READY] = NULL;
28170+
28171+ bitmap[REZERO_UNIT] = VSCSIIF_NEED_CMD_EXEC;
28172+ pre_function[REZERO_UNIT] = NULL;
28173+ post_function[REZERO_UNIT] = NULL;
28174+
28175+ bitmap[REQUEST_SENSE] = VSCSIIF_NEED_CMD_EXEC;
28176+ pre_function[REQUEST_SENSE] = NULL;
28177+ post_function[REQUEST_SENSE] = NULL;
28178+
28179+ bitmap[FORMAT_UNIT] = VSCSIIF_NEED_CMD_EXEC;
28180+ pre_function[FORMAT_UNIT] = NULL;
28181+ post_function[FORMAT_UNIT] = NULL;
28182+
28183+ bitmap[READ_BLOCK_LIMITS] = VSCSIIF_NEED_CMD_EXEC;
28184+ pre_function[READ_BLOCK_LIMITS] = NULL;
28185+ post_function[READ_BLOCK_LIMITS] = NULL;
28186+
28187+ bitmap[READ_6] = VSCSIIF_NEED_CMD_EXEC;
28188+ pre_function[READ_6] = NULL;
28189+ post_function[READ_6] = NULL;
28190+
28191+ bitmap[WRITE_6] = VSCSIIF_NEED_CMD_EXEC;
28192+ pre_function[WRITE_6] = NULL;
28193+ post_function[WRITE_6] = NULL;
28194+
28195+ bitmap[WRITE_FILEMARKS] = VSCSIIF_NEED_CMD_EXEC;
28196+ pre_function[WRITE_FILEMARKS] = NULL;
28197+ post_function[WRITE_FILEMARKS] = NULL;
28198+
28199+ bitmap[SPACE] = VSCSIIF_NEED_CMD_EXEC;
28200+ pre_function[SPACE] = NULL;
28201+ post_function[SPACE] = NULL;
28202+
28203+ bitmap[INQUIRY] = VSCSIIF_NEED_CMD_EXEC;
28204+ pre_function[INQUIRY] = NULL;
28205+ post_function[INQUIRY] = NULL;
28206+
28207+ bitmap[ERASE] = VSCSIIF_NEED_CMD_EXEC;
28208+ pre_function[ERASE] = NULL;
28209+ post_function[ERASE] = NULL;
28210+
28211+ bitmap[MODE_SENSE] = VSCSIIF_NEED_CMD_EXEC;
28212+ pre_function[MODE_SENSE] = NULL;
28213+ post_function[MODE_SENSE] = NULL;
28214+
28215+ bitmap[SEND_DIAGNOSTIC] = VSCSIIF_NEED_CMD_EXEC;
28216+ pre_function[SEND_DIAGNOSTIC] = NULL;
28217+ post_function[SEND_DIAGNOSTIC] = NULL;
28218+
28219+ bitmap[READ_CAPACITY] = VSCSIIF_NEED_CMD_EXEC;
28220+ pre_function[READ_CAPACITY] = NULL;
28221+ post_function[READ_CAPACITY] = NULL;
28222+
28223+ bitmap[READ_10] = VSCSIIF_NEED_CMD_EXEC;
28224+ pre_function[READ_10] = NULL;
28225+ post_function[READ_10] = NULL;
28226+
28227+ bitmap[WRITE_10] = VSCSIIF_NEED_CMD_EXEC;
28228+ pre_function[WRITE_10] = NULL;
28229+ post_function[WRITE_10] = NULL;
28230+
28231+ /*
28232+ This command is Full emulation.
28233+ */
28234+ pre_function[REPORT_LUNS] = __report_luns;
28235+ bitmap[REPORT_LUNS] = (VSCSIIF_NEED_EMULATE_REQBUF |
28236+ VSCSIIF_NEED_EMULATE_RSPBUF);
28237+
28238+ return;
28239+}
28240Index: head-2008-11-25/drivers/xen/scsiback/interface.c
28241===================================================================
28242--- /dev/null 1970-01-01 00:00:00.000000000 +0000
28243+++ head-2008-11-25/drivers/xen/scsiback/interface.c 2008-07-21 11:00:33.000000000 +0200
28244@@ -0,0 +1,182 @@
28245+/*
28246+ * interface management.
28247+ *
28248+ * Copyright (c) 2008, FUJITSU Limited
28249+ *
28250+ * Based on the blkback driver code.
28251+ *
28252+ * This program is free software; you can redistribute it and/or
28253+ * modify it under the terms of the GNU General Public License version 2
28254+ * as published by the Free Software Foundation; or, when distributed
28255+ * separately from the Linux kernel or incorporated into other
28256+ * software packages, subject to the following license:
28257+ *
28258+ * Permission is hereby granted, free of charge, to any person obtaining a copy
28259+ * of this source file (the "Software"), to deal in the Software without
28260+ * restriction, including without limitation the rights to use, copy, modify,
28261+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
28262+ * and to permit persons to whom the Software is furnished to do so, subject to
28263+ * the following conditions:
28264+ *
28265+ * The above copyright notice and this permission notice shall be included in
28266+ * all copies or substantial portions of the Software.
28267+ *
28268+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28269+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28270+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28271+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28272+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28273+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28274+ * IN THE SOFTWARE.
28275+ */
28276+
28277+#include <scsi/scsi.h>
28278+#include <scsi/scsi_host.h>
28279+#include <scsi/scsi_device.h>
28280+#include "common.h"
28281+
28282+#include <xen/evtchn.h>
28283+#include <linux/kthread.h>
28284+
28285+
28286+static kmem_cache_t *scsiback_cachep;
28287+
28288+struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
28289+{
28290+ struct vscsibk_info *info;
28291+
28292+ info = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
28293+ if (!info)
28294+ return ERR_PTR(-ENOMEM);
28295+
28296+ memset(info, 0, sizeof(*info));
28297+ info->domid = domid;
28298+ spin_lock_init(&info->ring_lock);
28299+ atomic_set(&info->nr_unreplied_reqs, 0);
28300+ init_waitqueue_head(&info->wq);
28301+ init_waitqueue_head(&info->waiting_to_free);
28302+
28303+ return info;
28304+}
28305+
28306+static int map_frontend_page( struct vscsibk_info *info,
28307+ unsigned long ring_ref)
28308+{
28309+ struct gnttab_map_grant_ref op;
28310+ int err;
28311+
28312+ gnttab_set_map_op(&op, (unsigned long)info->ring_area->addr,
28313+ GNTMAP_host_map, ring_ref,
28314+ info->domid);
28315+
28316+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
28317+ BUG_ON(err);
28318+
28319+ if (op.status) {
28320+ printk(KERN_ERR "scsiback: Grant table operation failure !\n");
28321+ return op.status;
28322+ }
28323+
28324+ info->shmem_ref = ring_ref;
28325+ info->shmem_handle = op.handle;
28326+
28327+ return (GNTST_okay);
28328+}
28329+
28330+static void unmap_frontend_page(struct vscsibk_info *info)
28331+{
28332+ struct gnttab_unmap_grant_ref op;
28333+ int err;
28334+
28335+ gnttab_set_unmap_op(&op, (unsigned long)info->ring_area->addr,
28336+ GNTMAP_host_map, info->shmem_handle);
28337+
28338+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
28339+ BUG_ON(err);
28340+
28341+}
28342+
28343+int scsiback_init_sring(struct vscsibk_info *info,
28344+ unsigned long ring_ref, unsigned int evtchn)
28345+{
28346+ struct vscsiif_sring *sring;
28347+ int err;
28348+
28349+ if (info->irq) {
28350+ printk(KERN_ERR "scsiback: Already connected through?\n");
28351+ return -1;
28352+ }
28353+
28354+ info->ring_area = alloc_vm_area(PAGE_SIZE);
28355+ if (!info)
28356+ return -ENOMEM;
28357+
28358+ err = map_frontend_page(info, ring_ref);
28359+ if (err)
28360+ goto free_vm;
28361+
28362+ sring = (struct vscsiif_sring *) info->ring_area->addr;
28363+ BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
28364+
28365+ err = bind_interdomain_evtchn_to_irqhandler(
28366+ info->domid, evtchn,
28367+ scsiback_intr, 0, "vscsiif-backend", info);
28368+
28369+ if (err < 0)
28370+ goto unmap_page;
28371+
28372+ info->irq = err;
28373+
28374+ return 0;
28375+
28376+unmap_page:
28377+ unmap_frontend_page(info);
28378+free_vm:
28379+ free_vm_area(info->ring_area);
28380+
28381+ return err;
28382+}
28383+
28384+void scsiback_disconnect(struct vscsibk_info *info)
28385+{
28386+ if (info->kthread) {
28387+ kthread_stop(info->kthread);
28388+ info->kthread = NULL;
28389+ }
28390+
28391+ wait_event(info->waiting_to_free,
28392+ atomic_read(&info->nr_unreplied_reqs) == 0);
28393+
28394+ if (info->irq) {
28395+ unbind_from_irqhandler(info->irq, info);
28396+ info->irq = 0;
28397+ }
28398+
28399+ if (info->ring.sring) {
28400+ unmap_frontend_page(info);
28401+ free_vm_area(info->ring_area);
28402+ info->ring.sring = NULL;
28403+ }
28404+}
28405+
28406+void scsiback_free(struct vscsibk_info *info)
28407+{
28408+ kmem_cache_free(scsiback_cachep, info);
28409+}
28410+
28411+int __init scsiback_interface_init(void)
28412+{
28413+ scsiback_cachep = kmem_cache_create("vscsiif_cache",
28414+ sizeof(struct vscsibk_info), 0, 0, NULL, NULL);
28415+ if (!scsiback_cachep) {
28416+ printk(KERN_ERR "scsiback: can't init scsi cache\n");
28417+ return -ENOMEM;
28418+ }
28419+
28420+ return 0;
28421+}
28422+
28423+void scsiback_interface_exit(void)
28424+{
28425+ kmem_cache_destroy(scsiback_cachep);
28426+}
28427Index: head-2008-11-25/drivers/xen/scsiback/scsiback.c
28428===================================================================
28429--- /dev/null 1970-01-01 00:00:00.000000000 +0000
28430+++ head-2008-11-25/drivers/xen/scsiback/scsiback.c 2008-07-21 11:00:33.000000000 +0200
28431@@ -0,0 +1,717 @@
28432+/*
28433+ * Xen SCSI backend driver
28434+ *
28435+ * Copyright (c) 2008, FUJITSU Limited
28436+ *
28437+ * Based on the blkback driver code.
28438+ *
28439+ * This program is free software; you can redistribute it and/or
28440+ * modify it under the terms of the GNU General Public License version 2
28441+ * as published by the Free Software Foundation; or, when distributed
28442+ * separately from the Linux kernel or incorporated into other
28443+ * software packages, subject to the following license:
28444+ *
28445+ * Permission is hereby granted, free of charge, to any person obtaining a copy
28446+ * of this source file (the "Software"), to deal in the Software without
28447+ * restriction, including without limitation the rights to use, copy, modify,
28448+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
28449+ * and to permit persons to whom the Software is furnished to do so, subject to
28450+ * the following conditions:
28451+ *
28452+ * The above copyright notice and this permission notice shall be included in
28453+ * all copies or substantial portions of the Software.
28454+ *
28455+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28456+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28457+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28458+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28459+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28460+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28461+ * IN THE SOFTWARE.
28462+ */
28463+
28464+#include <linux/spinlock.h>
28465+#include <linux/kthread.h>
28466+#include <linux/list.h>
28467+#include <linux/delay.h>
28468+#include <xen/balloon.h>
28469+#include <asm/hypervisor.h>
28470+#include <scsi/scsi.h>
28471+#include <scsi/scsi_cmnd.h>
28472+#include <scsi/scsi_host.h>
28473+#include <scsi/scsi_device.h>
28474+#include <scsi/scsi_dbg.h>
28475+#include <scsi/scsi_eh.h>
28476+
28477+#include "common.h"
28478+
28479+
28480+struct list_head pending_free;
28481+DEFINE_SPINLOCK(pending_free_lock);
28482+DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
28483+
28484+int vscsiif_reqs = VSCSIIF_BACK_MAX_PENDING_REQS;
28485+module_param_named(reqs, vscsiif_reqs, int, 0);
28486+MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
28487+
28488+static unsigned int log_print_stat = 0;
28489+module_param(log_print_stat, int, 0644);
28490+
28491+#define SCSIBACK_INVALID_HANDLE (~0)
28492+
28493+static pending_req_t *pending_reqs;
28494+static struct page **pending_pages;
28495+static grant_handle_t *pending_grant_handles;
28496+
28497+static int vaddr_pagenr(pending_req_t *req, int seg)
28498+{
28499+ return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg;
28500+}
28501+
28502+static unsigned long vaddr(pending_req_t *req, int seg)
28503+{
28504+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
28505+ return (unsigned long)pfn_to_kaddr(pfn);
28506+}
28507+
28508+#define pending_handle(_req, _seg) \
28509+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
28510+
28511+
28512+void scsiback_fast_flush_area(pending_req_t *req)
28513+{
28514+ struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE];
28515+ unsigned int i, invcount = 0;
28516+ grant_handle_t handle;
28517+ int err;
28518+
28519+ if (req->nr_segments) {
28520+ for (i = 0; i < req->nr_segments; i++) {
28521+ handle = pending_handle(req, i);
28522+ if (handle == SCSIBACK_INVALID_HANDLE)
28523+ continue;
28524+ gnttab_set_unmap_op(&unmap[i], vaddr(req, i),
28525+ GNTMAP_host_map, handle);
28526+ pending_handle(req, i) = SCSIBACK_INVALID_HANDLE;
28527+ invcount++;
28528+ }
28529+
28530+ err = HYPERVISOR_grant_table_op(
28531+ GNTTABOP_unmap_grant_ref, unmap, invcount);
28532+ BUG_ON(err);
28533+ kfree(req->sgl);
28534+ }
28535+
28536+ return;
28537+}
28538+
28539+
28540+static pending_req_t * alloc_req(struct vscsibk_info *info)
28541+{
28542+ pending_req_t *req = NULL;
28543+ unsigned long flags;
28544+
28545+ spin_lock_irqsave(&pending_free_lock, flags);
28546+ if (!list_empty(&pending_free)) {
28547+ req = list_entry(pending_free.next, pending_req_t, free_list);
28548+ list_del(&req->free_list);
28549+ }
28550+ spin_unlock_irqrestore(&pending_free_lock, flags);
28551+ return req;
28552+}
28553+
28554+
28555+static void free_req(pending_req_t *req)
28556+{
28557+ unsigned long flags;
28558+ int was_empty;
28559+
28560+ spin_lock_irqsave(&pending_free_lock, flags);
28561+ was_empty = list_empty(&pending_free);
28562+ list_add(&req->free_list, &pending_free);
28563+ spin_unlock_irqrestore(&pending_free_lock, flags);
28564+ if (was_empty)
28565+ wake_up(&pending_free_wq);
28566+}
28567+
28568+
28569+static void scsiback_notify_work(struct vscsibk_info *info)
28570+{
28571+ info->waiting_reqs = 1;
28572+ wake_up(&info->wq);
28573+}
28574+
28575+void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
28576+ uint32_t resid, pending_req_t *pending_req)
28577+{
28578+ vscsiif_response_t *ring_res;
28579+ struct vscsibk_info *info = pending_req->info;
28580+ int notify;
28581+ int more_to_do = 1;
28582+ unsigned long flags;
28583+
28584+ DPRINTK("%s\n",__FUNCTION__);
28585+
28586+ spin_lock_irqsave(&info->ring_lock, flags);
28587+
28588+ ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
28589+ info->ring.rsp_prod_pvt++;
28590+
28591+ ring_res->rslt = result;
28592+ ring_res->rqid = pending_req->rqid;
28593+
28594+ if (sense_buffer != NULL) {
28595+ memcpy(ring_res->sense_buffer, sense_buffer,
28596+ VSCSIIF_SENSE_BUFFERSIZE);
28597+ ring_res->sense_len = VSCSIIF_SENSE_BUFFERSIZE;
28598+ } else {
28599+ ring_res->sense_len = 0;
28600+ }
28601+
28602+ ring_res->residual_len = resid;
28603+
28604+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
28605+ if (info->ring.rsp_prod_pvt == info->ring.req_cons) {
28606+ RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
28607+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&info->ring)) {
28608+ more_to_do = 1;
28609+ }
28610+
28611+ spin_unlock_irqrestore(&info->ring_lock, flags);
28612+
28613+ if (more_to_do)
28614+ scsiback_notify_work(info);
28615+
28616+ if (notify)
28617+ notify_remote_via_irq(info->irq);
28618+
28619+ free_req(pending_req);
28620+}
28621+
28622+static void scsiback_print_status(char *sense_buffer, int errors,
28623+ pending_req_t *pending_req)
28624+{
28625+ struct scsi_device *sdev = pending_req->sdev;
28626+
28627+ printk(KERN_ERR "scsiback: %d:%d:%d:%d ",sdev->host->host_no,
28628+ sdev->channel, sdev->id, sdev->lun);
28629+ printk(KERN_ERR "status = 0x%02x, message = 0x%02x, host = 0x%02x, driver = 0x%02x\n",
28630+ status_byte(errors), msg_byte(errors),
28631+ host_byte(errors), driver_byte(errors));
28632+
28633+ printk(KERN_ERR "scsiback: cmnd[0]=0x%02X\n",
28634+ pending_req->cmnd[0]);
28635+
28636+ if (CHECK_CONDITION & status_byte(errors))
28637+ __scsi_print_sense("scsiback", sense_buffer, SCSI_SENSE_BUFFERSIZE);
28638+}
28639+
28640+
28641+static void scsiback_cmd_done(struct request *req, int errors)
28642+{
28643+ pending_req_t *pending_req = req->end_io_data;
28644+ unsigned char *sense_buffer;
28645+ unsigned int resid;
28646+
28647+ sense_buffer = req->sense;
28648+ resid = req->data_len;
28649+
28650+ if (errors != 0) {
28651+ if (log_print_stat)
28652+ scsiback_print_status(sense_buffer, errors, pending_req);
28653+ }
28654+
28655+ scsiback_rsp_emulation(pending_req);
28656+
28657+ scsiback_fast_flush_area(pending_req);
28658+ scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
28659+ scsiback_put(pending_req->info);
28660+
28661+ __blk_put_request(req->q, req);
28662+}
28663+
28664+
28665+static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req,
28666+ pending_req_t *pending_req)
28667+{
28668+ u32 flags;
28669+ int write;
28670+ int i, err = 0;
28671+ unsigned int data_len = 0;
28672+ struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE];
28673+ struct vscsibk_info *info = pending_req->info;
28674+
28675+ int data_dir = (int)pending_req->sc_data_direction;
28676+ unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
28677+
28678+ write = (data_dir == DMA_TO_DEVICE);
28679+
28680+ if (nr_segments) {
28681+ /* free of (sgl) in fast_flush_area()*/
28682+ pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments,
28683+ GFP_KERNEL);
28684+ if (!pending_req->sgl) {
28685+ printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
28686+ return -ENOMEM;
28687+ }
28688+
28689+ for (i = 0; i < nr_segments; i++) {
28690+ flags = GNTMAP_host_map;
28691+ if (write)
28692+ flags |= GNTMAP_readonly;
28693+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
28694+ ring_req->seg[i].gref,
28695+ info->domid);
28696+ }
28697+
28698+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments);
28699+ BUG_ON(err);
28700+
28701+ for (i = 0; i < nr_segments; i++) {
28702+ if (unlikely(map[i].status != 0)) {
28703+ printk(KERN_ERR "scsiback: invalid buffer -- could not remap it\n");
28704+ map[i].handle = SCSIBACK_INVALID_HANDLE;
28705+ err |= 1;
28706+ }
28707+
28708+ pending_handle(pending_req, i) = map[i].handle;
28709+
28710+ if (err)
28711+ continue;
28712+
28713+ set_phys_to_machine(__pa(vaddr(
28714+ pending_req, i)) >> PAGE_SHIFT,
28715+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
28716+
28717+ pending_req->sgl[i].page = virt_to_page(vaddr(pending_req, i));
28718+ pending_req->sgl[i].offset = ring_req->seg[i].offset;
28719+ pending_req->sgl[i].length = ring_req->seg[i].length;
28720+ data_len += pending_req->sgl[i].length;
28721+
28722+ barrier();
28723+ if (pending_req->sgl[i].offset >= PAGE_SIZE ||
28724+ pending_req->sgl[i].length > PAGE_SIZE ||
28725+ pending_req->sgl[i].offset + pending_req->sgl[i].length > PAGE_SIZE)
28726+ err |= 1;
28727+
28728+ }
28729+
28730+ if (err)
28731+ goto fail_flush;
28732+ }
28733+
28734+ pending_req->request_bufflen = data_len;
28735+
28736+ return 0;
28737+
28738+fail_flush:
28739+ scsiback_fast_flush_area(pending_req);
28740+ return -ENOMEM;
28741+}
28742+
28743+/* quoted scsi_lib.c/scsi_merge_bio */
28744+static int scsiback_merge_bio(struct request *rq, struct bio *bio)
28745+{
28746+ struct request_queue *q = rq->q;
28747+
28748+ bio->bi_flags &= ~(1 << BIO_SEG_VALID);
28749+ if (rq_data_dir(rq) == WRITE)
28750+ bio->bi_rw |= (1 << BIO_RW);
28751+
28752+ blk_queue_bounce(q, &bio);
28753+
28754+ if (!rq->bio)
28755+ blk_rq_bio_prep(q, rq, bio);
28756+ else if (!q->back_merge_fn(q, rq, bio))
28757+ return -EINVAL;
28758+ else {
28759+ rq->biotail->bi_next = bio;
28760+ rq->biotail = bio;
28761+ rq->hard_nr_sectors += bio_sectors(bio);
28762+ rq->nr_sectors = rq->hard_nr_sectors;
28763+ }
28764+
28765+ return 0;
28766+}
28767+
28768+
28769+/* quoted scsi_lib.c/scsi_bi_endio */
28770+static int scsiback_bi_endio(struct bio *bio, unsigned int bytes_done, int error)
28771+{
28772+ if (bio->bi_size)
28773+ return 1;
28774+
28775+ bio_put(bio);
28776+ return 0;
28777+}
28778+
28779+
28780+
28781+/* quoted scsi_lib.c/scsi_req_map_sg . */
28782+static int request_map_sg(struct request *rq, pending_req_t *pending_req, unsigned int count)
28783+{
28784+ struct request_queue *q = rq->q;
28785+ int nr_pages;
28786+ unsigned int nsegs = count;
28787+
28788+ unsigned int data_len = 0, len, bytes, off;
28789+ struct page *page;
28790+ struct bio *bio = NULL;
28791+ int i, err, nr_vecs = 0;
28792+
28793+ for (i = 0; i < nsegs; i++) {
28794+ page = pending_req->sgl[i].page;
28795+ off = (unsigned int)pending_req->sgl[i].offset;
28796+ len = (unsigned int)pending_req->sgl[i].length;
28797+ data_len += len;
28798+
28799+ nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT;
28800+ while (len > 0) {
28801+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
28802+
28803+ if (!bio) {
28804+ nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
28805+ nr_pages -= nr_vecs;
28806+ bio = bio_alloc(GFP_KERNEL, nr_vecs);
28807+ if (!bio) {
28808+ err = -ENOMEM;
28809+ goto free_bios;
28810+ }
28811+ bio->bi_end_io = scsiback_bi_endio;
28812+ }
28813+
28814+ if (bio_add_pc_page(q, bio, page, bytes, off) !=
28815+ bytes) {
28816+ bio_put(bio);
28817+ err = -EINVAL;
28818+ goto free_bios;
28819+ }
28820+
28821+ if (bio->bi_vcnt >= nr_vecs) {
28822+ err = scsiback_merge_bio(rq, bio);
28823+ if (err) {
28824+ bio_endio(bio, bio->bi_size, 0);
28825+ goto free_bios;
28826+ }
28827+ bio = NULL;
28828+ }
28829+
28830+ page++;
28831+ len -= bytes;
28832+ off = 0;
28833+ }
28834+ }
28835+
28836+ rq->buffer = rq->data = NULL;
28837+ rq->data_len = data_len;
28838+
28839+ return 0;
28840+
28841+free_bios:
28842+ while ((bio = rq->bio) != NULL) {
28843+ rq->bio = bio->bi_next;
28844+ /*
28845+ * call endio instead of bio_put incase it was bounced
28846+ */
28847+ bio_endio(bio, bio->bi_size, 0);
28848+ }
28849+
28850+ return err;
28851+}
28852+
28853+
28854+void scsiback_cmd_exec(pending_req_t *pending_req)
28855+{
28856+ int cmd_len = (int)pending_req->cmd_len;
28857+ int data_dir = (int)pending_req->sc_data_direction;
28858+ unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
28859+ unsigned int timeout;
28860+ struct request *rq;
28861+ int write;
28862+
28863+ DPRINTK("%s\n",__FUNCTION__);
28864+
28865+ /* because it doesn't timeout backend earlier than frontend.*/
28866+ if (pending_req->timeout_per_command)
28867+ timeout = pending_req->timeout_per_command * HZ;
28868+ else
28869+ timeout = VSCSIIF_TIMEOUT;
28870+
28871+ write = (data_dir == DMA_TO_DEVICE);
28872+ rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
28873+
28874+ rq->flags |= REQ_BLOCK_PC;
28875+ rq->cmd_len = cmd_len;
28876+ memcpy(rq->cmd, pending_req->cmnd, cmd_len);
28877+
28878+ memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
28879+ rq->sense = pending_req->sense_buffer;
28880+ rq->sense_len = 0;
28881+
28882+ /* not allowed to retry in backend. */
28883+ rq->retries = 0;
28884+ rq->timeout = timeout;
28885+ rq->end_io_data = pending_req;
28886+
28887+ if (nr_segments) {
28888+
28889+ if (request_map_sg(rq, pending_req, nr_segments)) {
28890+ printk(KERN_ERR "scsiback: SG Request Map Error\n");
28891+ return;
28892+ }
28893+ }
28894+
28895+ scsiback_get(pending_req->info);
28896+ blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done);
28897+
28898+ return ;
28899+}
28900+
28901+
28902+static void scsiback_device_reset_exec(pending_req_t *pending_req)
28903+{
28904+ struct vscsibk_info *info = pending_req->info;
28905+ int err;
28906+ struct scsi_device *sdev = pending_req->sdev;
28907+
28908+ scsiback_get(info);
28909+ err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE);
28910+
28911+ scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
28912+ scsiback_put(info);
28913+
28914+ return;
28915+}
28916+
28917+
28918+irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
28919+{
28920+ scsiback_notify_work((struct vscsibk_info *)dev_id);
28921+ return IRQ_HANDLED;
28922+}
28923+
28924+static int prepare_pending_reqs(struct vscsibk_info *info,
28925+ vscsiif_request_t *ring_req, pending_req_t *pending_req)
28926+{
28927+ struct scsi_device *sdev;
28928+ struct ids_tuple vir;
28929+ int err = -EINVAL;
28930+
28931+ DPRINTK("%s\n",__FUNCTION__);
28932+
28933+ pending_req->rqid = ring_req->rqid;
28934+ pending_req->act = ring_req->act;
28935+
28936+ pending_req->info = info;
28937+
28938+ vir.chn = ring_req->channel;
28939+ vir.tgt = ring_req->id;
28940+ vir.lun = ring_req->lun;
28941+
28942+ rmb();
28943+ sdev = scsiback_do_translation(info, &vir);
28944+ if (!sdev) {
28945+ pending_req->sdev = NULL;
28946+ DPRINTK("scsiback: doesn't exist.\n");
28947+ err = -ENODEV;
28948+ goto invalid_value;
28949+ }
28950+ pending_req->sdev = sdev;
28951+
28952+ /* request range check from frontend */
28953+ pending_req->sc_data_direction = ring_req->sc_data_direction;
28954+ barrier();
28955+ if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
28956+ (pending_req->sc_data_direction != DMA_TO_DEVICE) &&
28957+ (pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
28958+ (pending_req->sc_data_direction != DMA_NONE)) {
28959+ DPRINTK("scsiback: invalid parameter data_dir = %d\n",
28960+ pending_req->sc_data_direction);
28961+ err = -EINVAL;
28962+ goto invalid_value;
28963+ }
28964+
28965+ pending_req->nr_segments = ring_req->nr_segments;
28966+ barrier();
28967+ if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) {
28968+ DPRINTK("scsiback: invalid parameter nr_seg = %d\n",
28969+ pending_req->nr_segments);
28970+ err = -EINVAL;
28971+ goto invalid_value;
28972+ }
28973+
28974+ pending_req->cmd_len = ring_req->cmd_len;
28975+ barrier();
28976+ if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
28977+ DPRINTK("scsiback: invalid parameter cmd_len = %d\n",
28978+ pending_req->cmd_len);
28979+ err = -EINVAL;
28980+ goto invalid_value;
28981+ }
28982+ memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
28983+
28984+ pending_req->timeout_per_command = ring_req->timeout_per_command;
28985+
28986+ if(scsiback_gnttab_data_map(ring_req, pending_req)) {
28987+ DPRINTK("scsiback: invalid buffer\n");
28988+ err = -EINVAL;
28989+ goto invalid_value;
28990+ }
28991+
28992+ return 0;
28993+
28994+invalid_value:
28995+ return err;
28996+}
28997+
28998+
28999+static int scsiback_do_cmd_fn(struct vscsibk_info *info)
29000+{
29001+ struct vscsiif_back_ring *ring = &info->ring;
29002+ vscsiif_request_t *ring_req;
29003+
29004+ pending_req_t *pending_req;
29005+ RING_IDX rc, rp;
29006+ int err, more_to_do = 0;
29007+
29008+ DPRINTK("%s\n",__FUNCTION__);
29009+
29010+ rc = ring->req_cons;
29011+ rp = ring->sring->req_prod;
29012+ rmb();
29013+
29014+ while ((rc != rp)) {
29015+ if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
29016+ break;
29017+ pending_req = alloc_req(info);
29018+ if (NULL == pending_req) {
29019+ more_to_do = 1;
29020+ break;
29021+ }
29022+
29023+ ring_req = RING_GET_REQUEST(ring, rc);
29024+ ring->req_cons = ++rc;
29025+
29026+ err = prepare_pending_reqs(info, ring_req,
29027+ pending_req);
29028+ if (err == -EINVAL) {
29029+ scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
29030+ 0, pending_req);
29031+ continue;
29032+ } else if (err == -ENODEV) {
29033+ scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << 16),
29034+ 0, pending_req);
29035+ continue;
29036+ }
29037+
29038+ if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) {
29039+ scsiback_req_emulation_or_cmdexec(pending_req);
29040+ } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) {
29041+ scsiback_device_reset_exec(pending_req);
29042+ } else {
29043+ printk(KERN_ERR "scsiback: invalid parameter for request\n");
29044+ scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
29045+ 0, pending_req);
29046+ continue;
29047+ }
29048+ }
29049+
29050+ if (RING_HAS_UNCONSUMED_REQUESTS(ring))
29051+ more_to_do = 1;
29052+
29053+ /* Yield point for this unbounded loop. */
29054+ cond_resched();
29055+
29056+ return more_to_do;
29057+}
29058+
29059+
29060+int scsiback_schedule(void *data)
29061+{
29062+ struct vscsibk_info *info = (struct vscsibk_info *)data;
29063+
29064+ DPRINTK("%s\n",__FUNCTION__);
29065+
29066+ while (!kthread_should_stop()) {
29067+ wait_event_interruptible(
29068+ info->wq,
29069+ info->waiting_reqs || kthread_should_stop());
29070+ wait_event_interruptible(
29071+ pending_free_wq,
29072+ !list_empty(&pending_free) || kthread_should_stop());
29073+
29074+ info->waiting_reqs = 0;
29075+ smp_mb();
29076+
29077+ if (scsiback_do_cmd_fn(info))
29078+ info->waiting_reqs = 1;
29079+ }
29080+
29081+ return 0;
29082+}
29083+
29084+
29085+static int __init scsiback_init(void)
29086+{
29087+ int i, mmap_pages;
29088+
29089+ if (!is_running_on_xen())
29090+ return -ENODEV;
29091+
29092+ mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
29093+
29094+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
29095+ vscsiif_reqs, GFP_KERNEL);
29096+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
29097+ mmap_pages, GFP_KERNEL);
29098+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
29099+
29100+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
29101+ goto out_of_memory;
29102+
29103+ for (i = 0; i < mmap_pages; i++)
29104+ pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE;
29105+
29106+ if (scsiback_interface_init() < 0)
29107+ goto out_of_kmem;
29108+
29109+ memset(pending_reqs, 0, sizeof(pending_reqs));
29110+ INIT_LIST_HEAD(&pending_free);
29111+
29112+ for (i = 0; i < vscsiif_reqs; i++)
29113+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
29114+
29115+ if (scsiback_xenbus_init())
29116+ goto out_of_xenbus;
29117+
29118+ scsiback_emulation_init();
29119+
29120+ return 0;
29121+
29122+out_of_xenbus:
29123+ scsiback_xenbus_unregister();
29124+out_of_kmem:
29125+ scsiback_interface_exit();
29126+out_of_memory:
29127+ kfree(pending_reqs);
29128+ kfree(pending_grant_handles);
29129+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
29130+ printk(KERN_ERR "scsiback: %s: out of memory\n", __FUNCTION__);
29131+ return -ENOMEM;
29132+}
29133+
29134+static void __exit scsiback_exit(void)
29135+{
29136+ scsiback_xenbus_unregister();
29137+ scsiback_interface_exit();
29138+ kfree(pending_reqs);
29139+ kfree(pending_grant_handles);
29140+ free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * VSCSIIF_SG_TABLESIZE));
29141+
29142+}
29143+
29144+module_init(scsiback_init);
29145+module_exit(scsiback_exit);
29146+
29147+MODULE_DESCRIPTION("Xen SCSI backend driver");
29148+MODULE_LICENSE("Dual BSD/GPL");
29149Index: head-2008-11-25/drivers/xen/scsiback/translate.c
29150===================================================================
29151--- /dev/null 1970-01-01 00:00:00.000000000 +0000
29152+++ head-2008-11-25/drivers/xen/scsiback/translate.c 2008-07-21 11:00:33.000000000 +0200
29153@@ -0,0 +1,168 @@
29154+/*
29155+ * Xen SCSI backend driver
29156+ *
29157+ * Copyright (c) 2008, FUJITSU Limited
29158+ *
29159+ * This program is free software; you can redistribute it and/or
29160+ * modify it under the terms of the GNU General Public License version 2
29161+ * as published by the Free Software Foundation; or, when distributed
29162+ * separately from the Linux kernel or incorporated into other
29163+ * software packages, subject to the following license:
29164+ *
29165+ * Permission is hereby granted, free of charge, to any person obtaining a copy
29166+ * of this source file (the "Software"), to deal in the Software without
29167+ * restriction, including without limitation the rights to use, copy, modify,
29168+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29169+ * and to permit persons to whom the Software is furnished to do so, subject to
29170+ * the following conditions:
29171+ *
29172+ * The above copyright notice and this permission notice shall be included in
29173+ * all copies or substantial portions of the Software.
29174+ *
29175+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29176+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29177+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29178+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29179+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29180+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29181+ * IN THE SOFTWARE.
29182+ */
29183+
29184+#include <linux/list.h>
29185+#include <linux/gfp.h>
29186+
29187+#include "common.h"
29188+
29189+/*
29190+ Initialize the translation entry list
29191+*/
29192+void scsiback_init_translation_table(struct vscsibk_info *info)
29193+{
29194+ INIT_LIST_HEAD(&info->v2p_entry_lists);
29195+ spin_lock_init(&info->v2p_lock);
29196+}
29197+
29198+
29199+/*
29200+ Add a new translation entry
29201+*/
29202+int scsiback_add_translation_entry(struct vscsibk_info *info,
29203+ struct scsi_device *sdev, struct ids_tuple *v)
29204+{
29205+ int err = 0;
29206+ struct v2p_entry *entry;
29207+ struct v2p_entry *new;
29208+ struct list_head *head = &(info->v2p_entry_lists);
29209+ unsigned long flags;
29210+
29211+ spin_lock_irqsave(&info->v2p_lock, flags);
29212+
29213+ /* Check double assignment to identical virtual ID */
29214+ list_for_each_entry(entry, head, l) {
29215+ if ((entry->v.chn == v->chn) &&
29216+ (entry->v.tgt == v->tgt) &&
29217+ (entry->v.lun == v->lun)) {
29218+ printk(KERN_WARNING "scsiback: Virtual ID is already used. "
29219+ "Assignment was not performed.\n");
29220+ err = -EEXIST;
29221+ goto out;
29222+ }
29223+
29224+ }
29225+
29226+ /* Create a new translation entry and add to the list */
29227+ if ((new = kmalloc(sizeof(struct v2p_entry), GFP_ATOMIC)) == NULL) {
29228+ printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
29229+ err = -ENOMEM;
29230+ goto out;
29231+ }
29232+ new->v = *v;
29233+ new->sdev = sdev;
29234+ list_add_tail(&new->l, head);
29235+
29236+out:
29237+ spin_unlock_irqrestore(&info->v2p_lock, flags);
29238+ return err;
29239+}
29240+
29241+
29242+/*
29243+ Delete the translation entry specfied
29244+*/
29245+int scsiback_del_translation_entry(struct vscsibk_info *info,
29246+ struct ids_tuple *v)
29247+{
29248+ struct v2p_entry *entry;
29249+ struct list_head *head = &(info->v2p_entry_lists);
29250+ unsigned long flags;
29251+
29252+ spin_lock_irqsave(&info->v2p_lock, flags);
29253+ /* Find out the translation entry specified */
29254+ list_for_each_entry(entry, head, l) {
29255+ if ((entry->v.chn == v->chn) &&
29256+ (entry->v.tgt == v->tgt) &&
29257+ (entry->v.lun == v->lun)) {
29258+ goto found;
29259+ }
29260+ }
29261+
29262+ spin_unlock_irqrestore(&info->v2p_lock, flags);
29263+ return 1;
29264+
29265+found:
29266+ /* Delete the translation entry specfied */
29267+ scsi_device_put(entry->sdev);
29268+ list_del(&entry->l);
29269+ kfree(entry);
29270+
29271+ spin_unlock_irqrestore(&info->v2p_lock, flags);
29272+ return 0;
29273+}
29274+
29275+
29276+/*
29277+ Perform virtual to physical translation
29278+*/
29279+struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
29280+ struct ids_tuple *v)
29281+{
29282+ struct v2p_entry *entry;
29283+ struct list_head *head = &(info->v2p_entry_lists);
29284+ struct scsi_device *sdev = NULL;
29285+ unsigned long flags;
29286+
29287+ spin_lock_irqsave(&info->v2p_lock, flags);
29288+ list_for_each_entry(entry, head, l) {
29289+ if ((entry->v.chn == v->chn) &&
29290+ (entry->v.tgt == v->tgt) &&
29291+ (entry->v.lun == v->lun)) {
29292+ sdev = entry->sdev;
29293+ goto out;
29294+ }
29295+ }
29296+out:
29297+ spin_unlock_irqrestore(&info->v2p_lock, flags);
29298+ return sdev;
29299+}
29300+
29301+
29302+/*
29303+ Release the translation entry specfied
29304+*/
29305+void scsiback_release_translation_entry(struct vscsibk_info *info)
29306+{
29307+ struct v2p_entry *entry, *tmp;
29308+ struct list_head *head = &(info->v2p_entry_lists);
29309+ unsigned long flags;
29310+
29311+ spin_lock_irqsave(&info->v2p_lock, flags);
29312+ list_for_each_entry_safe(entry, tmp, head, l) {
29313+ scsi_device_put(entry->sdev);
29314+ list_del(&entry->l);
29315+ kfree(entry);
29316+ }
29317+
29318+ spin_unlock_irqrestore(&info->v2p_lock, flags);
29319+ return;
29320+
29321+}
29322Index: head-2008-11-25/drivers/xen/scsiback/xenbus.c
29323===================================================================
29324--- /dev/null 1970-01-01 00:00:00.000000000 +0000
29325+++ head-2008-11-25/drivers/xen/scsiback/xenbus.c 2008-07-21 11:00:33.000000000 +0200
29326@@ -0,0 +1,368 @@
29327+/*
29328+ * Xen SCSI backend driver
29329+ *
29330+ * Copyright (c) 2008, FUJITSU Limited
29331+ *
29332+ * Based on the blkback driver code.
29333+ *
29334+ * This program is free software; you can redistribute it and/or
29335+ * modify it under the terms of the GNU General Public License version 2
29336+ * as published by the Free Software Foundation; or, when distributed
29337+ * separately from the Linux kernel or incorporated into other
29338+ * software packages, subject to the following license:
29339+ *
29340+ * Permission is hereby granted, free of charge, to any person obtaining a copy
29341+ * of this source file (the "Software"), to deal in the Software without
29342+ * restriction, including without limitation the rights to use, copy, modify,
29343+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29344+ * and to permit persons to whom the Software is furnished to do so, subject to
29345+ * the following conditions:
29346+ *
29347+ * The above copyright notice and this permission notice shall be included in
29348+ * all copies or substantial portions of the Software.
29349+ *
29350+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29351+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29352+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29353+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29354+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29355+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29356+ * IN THE SOFTWARE.
29357+ */
29358+
29359+#include <stdarg.h>
29360+#include <linux/module.h>
29361+#include <linux/kthread.h>
29362+#include <scsi/scsi.h>
29363+#include <scsi/scsi_host.h>
29364+#include <scsi/scsi_device.h>
29365+
29366+#include "common.h"
29367+
29368+struct backend_info
29369+{
29370+ struct xenbus_device *dev;
29371+ struct vscsibk_info *info;
29372+};
29373+
29374+
29375+static int __vscsiif_name(struct backend_info *be, char *buf)
29376+{
29377+ struct xenbus_device *dev = be->dev;
29378+ unsigned int domid, id;
29379+
29380+ sscanf(dev->nodename, "backend/vscsi/%u/%u", &domid, &id);
29381+ snprintf(buf, TASK_COMM_LEN, "vscsi.%u.%u", be->info->domid, id);
29382+
29383+ return 0;
29384+}
29385+
29386+static int scsiback_map(struct backend_info *be)
29387+{
29388+ struct xenbus_device *dev = be->dev;
29389+ unsigned long ring_ref;
29390+ unsigned int evtchn;
29391+ int err;
29392+ char name[TASK_COMM_LEN];
29393+
29394+ err = xenbus_gather(XBT_NIL, dev->otherend,
29395+ "ring-ref", "%lu", &ring_ref,
29396+ "event-channel", "%u", &evtchn, NULL);
29397+ if (err) {
29398+ xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend);
29399+ return err;
29400+ }
29401+
29402+ err = scsiback_init_sring(be->info, ring_ref, evtchn);
29403+ if (err)
29404+ return err;
29405+
29406+ err = __vscsiif_name(be, name);
29407+ if (err) {
29408+ xenbus_dev_error(dev, err, "get scsiback dev name");
29409+ return err;
29410+ }
29411+
29412+ be->info->kthread = kthread_run(scsiback_schedule, be->info, name);
29413+ if (IS_ERR(be->info->kthread)) {
29414+ err = PTR_ERR(be->info->kthread);
29415+ be->info->kthread = NULL;
29416+ xenbus_dev_error(be->dev, err, "start vscsiif");
29417+ return err;
29418+ }
29419+
29420+ return 0;
29421+}
29422+
29423+
29424+struct scsi_device *scsiback_get_scsi_device(struct ids_tuple *phy)
29425+{
29426+ struct Scsi_Host *shost;
29427+ struct scsi_device *sdev = NULL;
29428+
29429+ shost = scsi_host_lookup(phy->hst);
29430+ if (IS_ERR(shost)) {
29431+ printk(KERN_ERR "scsiback: host%d doesn't exist.\n",
29432+ phy->hst);
29433+ return NULL;
29434+ }
29435+ sdev = scsi_device_lookup(shost, phy->chn, phy->tgt, phy->lun);
29436+ if (!sdev) {
29437+ printk(KERN_ERR "scsiback: %d:%d:%d:%d doesn't exist.\n",
29438+ phy->hst, phy->chn, phy->tgt, phy->lun);
29439+ scsi_host_put(shost);
29440+ return NULL;
29441+ }
29442+
29443+ scsi_host_put(shost);
29444+ return (sdev);
29445+}
29446+
29447+#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1
29448+#define VSCSIBACK_OP_UPDATEDEV_STATE 2
29449+
29450+
29451+static void scsiback_do_lun_hotplug(struct backend_info *be, int op)
29452+{
29453+ int i, err = 0;
29454+ struct ids_tuple phy, vir;
29455+ int device_state;
29456+ char str[64], state_str[64];
29457+ char **dir;
29458+ unsigned int dir_n = 0;
29459+ struct xenbus_device *dev = be->dev;
29460+ struct scsi_device *sdev;
29461+
29462+ dir = xenbus_directory(XBT_NIL, dev->nodename, "vscsi-devs", &dir_n);
29463+ if (IS_ERR(dir))
29464+ return;
29465+
29466+ for (i = 0; i < dir_n; i++) {
29467+
29468+ /* read status */
29469+ snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
29470+ err = xenbus_scanf(XBT_NIL, dev->nodename, state_str, "%u",
29471+ &device_state);
29472+ if (XENBUS_EXIST_ERR(err))
29473+ continue;
29474+
29475+ /* physical SCSI device */
29476+ snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", dir[i]);
29477+ err = xenbus_scanf(XBT_NIL, dev->nodename, str,
29478+ "%u:%u:%u:%u", &phy.hst, &phy.chn, &phy.tgt, &phy.lun);
29479+ if (XENBUS_EXIST_ERR(err)) {
29480+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
29481+ "%d", XenbusStateClosed);
29482+ continue;
29483+ }
29484+
29485+ /* virtual SCSI device */
29486+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
29487+ err = xenbus_scanf(XBT_NIL, dev->nodename, str,
29488+ "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
29489+ if (XENBUS_EXIST_ERR(err)) {
29490+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
29491+ "%d", XenbusStateClosed);
29492+ continue;
29493+ }
29494+
29495+ switch (op) {
29496+ case VSCSIBACK_OP_ADD_OR_DEL_LUN:
29497+ if (device_state == XenbusStateInitialising) {
29498+ sdev = scsiback_get_scsi_device(&phy);
29499+ if (!sdev)
29500+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
29501+ "%d", XenbusStateClosed);
29502+ else {
29503+ err = scsiback_add_translation_entry(be->info, sdev, &vir);
29504+ if (!err) {
29505+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
29506+ "%d", XenbusStateInitialised)) {
29507+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
29508+ scsiback_del_translation_entry(be->info, &vir);
29509+ }
29510+ } else {
29511+ scsi_device_put(sdev);
29512+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
29513+ "%d", XenbusStateClosed);
29514+ }
29515+ }
29516+ }
29517+
29518+ if (device_state == XenbusStateClosing) {
29519+ if (!scsiback_del_translation_entry(be->info, &vir)) {
29520+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
29521+ "%d", XenbusStateClosed))
29522+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
29523+ }
29524+ }
29525+ break;
29526+
29527+ case VSCSIBACK_OP_UPDATEDEV_STATE:
29528+ if (device_state == XenbusStateInitialised) {
29529+ /* modify vscsi-devs/dev-x/state */
29530+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
29531+ "%d", XenbusStateConnected)) {
29532+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
29533+ scsiback_del_translation_entry(be->info, &vir);
29534+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
29535+ "%d", XenbusStateClosed);
29536+ }
29537+ }
29538+ break;
29539+ /*When it is necessary, processing is added here.*/
29540+ default:
29541+ break;
29542+ }
29543+ }
29544+
29545+ kfree(dir);
29546+ return ;
29547+}
29548+
29549+
29550+static void scsiback_frontend_changed(struct xenbus_device *dev,
29551+ enum xenbus_state frontend_state)
29552+{
29553+ struct backend_info *be = dev->dev.driver_data;
29554+ int err;
29555+
29556+ switch (frontend_state) {
29557+ case XenbusStateInitialising:
29558+ break;
29559+ case XenbusStateInitialised:
29560+ err = scsiback_map(be);
29561+ if (err)
29562+ break;
29563+
29564+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
29565+ xenbus_switch_state(dev, XenbusStateConnected);
29566+
29567+ break;
29568+ case XenbusStateConnected:
29569+
29570+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_UPDATEDEV_STATE);
29571+
29572+ if (dev->state == XenbusStateConnected)
29573+ break;
29574+
29575+ xenbus_switch_state(dev, XenbusStateConnected);
29576+
29577+ break;
29578+
29579+ case XenbusStateClosing:
29580+ scsiback_disconnect(be->info);
29581+ xenbus_switch_state(dev, XenbusStateClosing);
29582+ break;
29583+
29584+ case XenbusStateClosed:
29585+ xenbus_switch_state(dev, XenbusStateClosed);
29586+ if (xenbus_dev_is_online(dev))
29587+ break;
29588+ /* fall through if not online */
29589+ case XenbusStateUnknown:
29590+ device_unregister(&dev->dev);
29591+ break;
29592+
29593+ case XenbusStateReconfiguring:
29594+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
29595+
29596+ xenbus_switch_state(dev, XenbusStateReconfigured);
29597+
29598+ break;
29599+
29600+ default:
29601+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
29602+ frontend_state);
29603+ break;
29604+ }
29605+}
29606+
29607+
29608+static int scsiback_remove(struct xenbus_device *dev)
29609+{
29610+ struct backend_info *be = dev->dev.driver_data;
29611+
29612+ if (be->info) {
29613+ scsiback_disconnect(be->info);
29614+ scsiback_release_translation_entry(be->info);
29615+ scsiback_free(be->info);
29616+ be->info = NULL;
29617+ }
29618+
29619+ kfree(be);
29620+ dev->dev.driver_data = NULL;
29621+
29622+ return 0;
29623+}
29624+
29625+
29626+static int scsiback_probe(struct xenbus_device *dev,
29627+ const struct xenbus_device_id *id)
29628+{
29629+ int err;
29630+
29631+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
29632+ GFP_KERNEL);
29633+
29634+ DPRINTK("%p %d\n", dev, dev->otherend_id);
29635+
29636+ if (!be) {
29637+ xenbus_dev_fatal(dev, -ENOMEM,
29638+ "allocating backend structure");
29639+ return -ENOMEM;
29640+ }
29641+ be->dev = dev;
29642+ dev->dev.driver_data = be;
29643+
29644+ be->info = vscsibk_info_alloc(dev->otherend_id);
29645+ if (IS_ERR(be->info)) {
29646+ err = PTR_ERR(be->info);
29647+ be->info = NULL;
29648+ xenbus_dev_fatal(dev, err, "creating scsihost interface");
29649+ goto fail;
29650+ }
29651+
29652+ be->info->dev = dev;
29653+ be->info->irq = 0;
29654+
29655+ scsiback_init_translation_table(be->info);
29656+
29657+ err = xenbus_switch_state(dev, XenbusStateInitWait);
29658+ if (err)
29659+ goto fail;
29660+
29661+ return 0;
29662+
29663+
29664+fail:
29665+ printk(KERN_WARNING "scsiback: %s failed\n",__FUNCTION__);
29666+ scsiback_remove(dev);
29667+
29668+ return err;
29669+}
29670+
29671+
29672+static struct xenbus_device_id scsiback_ids[] = {
29673+ { "vscsi" },
29674+ { "" }
29675+};
29676+
29677+static struct xenbus_driver scsiback = {
29678+ .name = "vscsi",
29679+ .owner = THIS_MODULE,
29680+ .ids = scsiback_ids,
29681+ .probe = scsiback_probe,
29682+ .remove = scsiback_remove,
29683+ .otherend_changed = scsiback_frontend_changed
29684+};
29685+
29686+int scsiback_xenbus_init(void)
29687+{
29688+ return xenbus_register_backend(&scsiback);
29689+}
29690+
29691+void scsiback_xenbus_unregister(void)
29692+{
29693+ xenbus_unregister_driver(&scsiback);
29694+}
29695Index: head-2008-11-25/drivers/xen/scsifront/Makefile
29696===================================================================
29697--- /dev/null 1970-01-01 00:00:00.000000000 +0000
29698+++ head-2008-11-25/drivers/xen/scsifront/Makefile 2008-07-21 11:00:33.000000000 +0200
29699@@ -0,0 +1,3 @@
29700+
29701+obj-$(CONFIG_XEN_SCSI_FRONTEND) := xenscsi.o
29702+xenscsi-objs := scsifront.o xenbus.o
29703Index: head-2008-11-25/drivers/xen/scsifront/common.h
29704===================================================================
29705--- /dev/null 1970-01-01 00:00:00.000000000 +0000
29706+++ head-2008-11-25/drivers/xen/scsifront/common.h 2008-07-21 11:00:33.000000000 +0200
29707@@ -0,0 +1,129 @@
29708+/*
29709+ * Xen SCSI frontend driver
29710+ *
29711+ * Copyright (c) 2008, FUJITSU Limited
29712+ *
29713+ * This program is free software; you can redistribute it and/or
29714+ * modify it under the terms of the GNU General Public License version 2
29715+ * as published by the Free Software Foundation; or, when distributed
29716+ * separately from the Linux kernel or incorporated into other
29717+ * software packages, subject to the following license:
29718+ *
29719+ * Permission is hereby granted, free of charge, to any person obtaining a copy
29720+ * of this source file (the "Software"), to deal in the Software without
29721+ * restriction, including without limitation the rights to use, copy, modify,
29722+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29723+ * and to permit persons to whom the Software is furnished to do so, subject to
29724+ * the following conditions:
29725+ *
29726+ * The above copyright notice and this permission notice shall be included in
29727+ * all copies or substantial portions of the Software.
29728+ *
29729+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29730+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29731+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29732+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29733+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29734+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29735+ * IN THE SOFTWARE.
29736+ */
29737+
29738+#ifndef __XEN_DRIVERS_SCSIFRONT_H__
29739+#define __XEN_DRIVERS_SCSIFRONT_H__
29740+
29741+#include <linux/version.h>
29742+#include <linux/module.h>
29743+#include <linux/kernel.h>
29744+#include <linux/device.h>
29745+#include <linux/kthread.h>
29746+#include <linux/wait.h>
29747+#include <linux/interrupt.h>
29748+#include <linux/spinlock.h>
29749+#include <linux/sched.h>
29750+#include <linux/blkdev.h>
29751+#include <scsi/scsi_cmnd.h>
29752+#include <scsi/scsi_device.h>
29753+#include <scsi/scsi.h>
29754+#include <scsi/scsi_host.h>
29755+#include <xen/xenbus.h>
29756+#include <xen/gnttab.h>
29757+#include <xen/evtchn.h>
29758+#include <xen/interface/xen.h>
29759+#include <xen/interface/io/ring.h>
29760+#include <xen/interface/io/vscsiif.h>
29761+#include <asm/delay.h>
29762+
29763+
29764+#define GRANT_INVALID_REF 0
29765+#define VSCSI_IN_ABORT 1
29766+#define VSCSI_IN_RESET 2
29767+
29768+/* tuning point*/
29769+#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
29770+#define VSCSIIF_MAX_TARGET 64
29771+#define VSCSIIF_MAX_LUN 255
29772+
29773+#define VSCSIIF_RING_SIZE \
29774+ __RING_SIZE((struct vscsiif_sring *)0, PAGE_SIZE)
29775+#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE
29776+
29777+struct vscsifrnt_shadow {
29778+ uint16_t next_free;
29779+
29780+ /* command between backend and frontend
29781+ * VSCSIIF_ACT_SCSI_CDB or VSCSIIF_ACT_SCSI_RESET */
29782+ unsigned char act;
29783+
29784+ /* do reset function */
29785+ wait_queue_head_t wq_reset; /* reset work queue */
29786+ int wait_reset; /* reset work queue condition */
29787+ int32_t rslt_reset; /* reset response status */
29788+ /* (SUCESS or FAILED) */
29789+
29790+ /* for DMA_TO_DEVICE(1), DMA_FROM_DEVICE(2), DMA_NONE(3)
29791+ requests */
29792+ unsigned int sc_data_direction;
29793+
29794+ /* Number of pieces of scatter-gather */
29795+ unsigned int nr_segments;
29796+
29797+ /* requested struct scsi_cmnd is stored from kernel */
29798+ unsigned long req_scsi_cmnd;
29799+ int gref[VSCSIIF_SG_TABLESIZE];
29800+};
29801+
29802+struct vscsifrnt_info {
29803+ struct xenbus_device *dev;
29804+
29805+ struct Scsi_Host *host;
29806+
29807+ spinlock_t io_lock;
29808+ spinlock_t shadow_lock;
29809+ unsigned int evtchn;
29810+ unsigned int irq;
29811+
29812+ grant_ref_t ring_ref;
29813+ struct vscsiif_front_ring ring;
29814+ struct vscsiif_response ring_res;
29815+
29816+ struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS];
29817+ uint32_t shadow_free;
29818+
29819+ struct task_struct *kthread;
29820+ wait_queue_head_t wq;
29821+ unsigned int waiting_resp;
29822+
29823+};
29824+
29825+#define DPRINTK(_f, _a...) \
29826+ pr_debug("(file=%s, line=%d) " _f, \
29827+ __FILE__ , __LINE__ , ## _a )
29828+
29829+int scsifront_xenbus_init(void);
29830+void scsifront_xenbus_unregister(void);
29831+int scsifront_schedule(void *data);
29832+irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
29833+int scsifront_cmd_done(struct vscsifrnt_info *info);
29834+
29835+
29836+#endif /* __XEN_DRIVERS_SCSIFRONT_H__ */
29837Index: head-2008-11-25/drivers/xen/scsifront/scsifront.c
29838===================================================================
29839--- /dev/null 1970-01-01 00:00:00.000000000 +0000
29840+++ head-2008-11-25/drivers/xen/scsifront/scsifront.c 2008-07-21 11:00:33.000000000 +0200
29841@@ -0,0 +1,511 @@
29842+/*
29843+ * Xen SCSI frontend driver
29844+ *
29845+ * Copyright (c) 2008, FUJITSU Limited
29846+ *
29847+ * This program is free software; you can redistribute it and/or
29848+ * modify it under the terms of the GNU General Public License version 2
29849+ * as published by the Free Software Foundation; or, when distributed
29850+ * separately from the Linux kernel or incorporated into other
29851+ * software packages, subject to the following license:
29852+ *
29853+ * Permission is hereby granted, free of charge, to any person obtaining a copy
29854+ * of this source file (the "Software"), to deal in the Software without
29855+ * restriction, including without limitation the rights to use, copy, modify,
29856+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29857+ * and to permit persons to whom the Software is furnished to do so, subject to
29858+ * the following conditions:
29859+ *
29860+ * The above copyright notice and this permission notice shall be included in
29861+ * all copies or substantial portions of the Software.
29862+ *
29863+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29864+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29865+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29866+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29867+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29868+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29869+ * IN THE SOFTWARE.
29870+ */
29871+
29872+
29873+#include <linux/version.h>
29874+#include "common.h"
29875+
29876+static int get_id_from_freelist(struct vscsifrnt_info *info)
29877+{
29878+ unsigned long flags;
29879+ uint32_t free;
29880+
29881+ spin_lock_irqsave(&info->shadow_lock, flags);
29882+
29883+ free = info->shadow_free;
29884+ BUG_ON(free > VSCSIIF_MAX_REQS);
29885+ info->shadow_free = info->shadow[free].next_free;
29886+ info->shadow[free].next_free = 0x0fff;
29887+
29888+ info->shadow[free].wait_reset = 0;
29889+
29890+ spin_unlock_irqrestore(&info->shadow_lock, flags);
29891+
29892+ return free;
29893+}
29894+
29895+static void add_id_to_freelist(struct vscsifrnt_info *info, uint32_t id)
29896+{
29897+ unsigned long flags;
29898+
29899+ spin_lock_irqsave(&info->shadow_lock, flags);
29900+
29901+ info->shadow[id].next_free = info->shadow_free;
29902+ info->shadow[id].req_scsi_cmnd = 0;
29903+ info->shadow_free = id;
29904+
29905+ spin_unlock_irqrestore(&info->shadow_lock, flags);
29906+}
29907+
29908+
29909+struct vscsiif_request * scsifront_pre_request(struct vscsifrnt_info *info)
29910+{
29911+ struct vscsiif_front_ring *ring = &(info->ring);
29912+ vscsiif_request_t *ring_req;
29913+ uint32_t id;
29914+
29915+ ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
29916+
29917+ ring->req_prod_pvt++;
29918+
29919+ id = get_id_from_freelist(info); /* use id by response */
29920+ ring_req->rqid = (uint16_t)id;
29921+
29922+ return ring_req;
29923+}
29924+
29925+
29926+static void scsifront_notify_work(struct vscsifrnt_info *info)
29927+{
29928+ info->waiting_resp = 1;
29929+ wake_up(&info->wq);
29930+}
29931+
29932+
29933+static void scsifront_do_request(struct vscsifrnt_info *info)
29934+{
29935+ struct vscsiif_front_ring *ring = &(info->ring);
29936+ unsigned int irq = info->irq;
29937+ int notify;
29938+
29939+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
29940+ if (notify)
29941+ notify_remote_via_irq(irq);
29942+}
29943+
29944+irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
29945+{
29946+ scsifront_notify_work((struct vscsifrnt_info *)dev_id);
29947+ return IRQ_HANDLED;
29948+}
29949+
29950+
29951+static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id)
29952+{
29953+ int i;
29954+
29955+ if (s->sc_data_direction == DMA_NONE)
29956+ return;
29957+
29958+ if (s->nr_segments) {
29959+ for (i = 0; i < s->nr_segments; i++) {
29960+ if (unlikely(gnttab_query_foreign_access(
29961+ s->gref[i]) != 0)) {
29962+ printk(KERN_ALERT "scsifront: "
29963+ "grant still in use by backend.\n");
29964+ BUG();
29965+ }
29966+ gnttab_end_foreign_access(s->gref[i], 0UL);
29967+ }
29968+ }
29969+
29970+ return;
29971+}
29972+
29973+
29974+static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
29975+ vscsiif_response_t *ring_res)
29976+{
29977+ struct scsi_cmnd *sc;
29978+ uint32_t id;
29979+ uint8_t sense_len;
29980+
29981+ id = ring_res->rqid;
29982+ sc = (struct scsi_cmnd *)info->shadow[id].req_scsi_cmnd;
29983+
29984+ if (sc == NULL)
29985+ BUG();
29986+
29987+ scsifront_gnttab_done(&info->shadow[id], id);
29988+ add_id_to_freelist(info, id);
29989+
29990+ sc->result = ring_res->rslt;
29991+ sc->resid = ring_res->residual_len;
29992+
29993+ if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE)
29994+ sense_len = VSCSIIF_SENSE_BUFFERSIZE;
29995+ else
29996+ sense_len = ring_res->sense_len;
29997+
29998+ if (sense_len)
29999+ memcpy(sc->sense_buffer, ring_res->sense_buffer, sense_len);
30000+
30001+ sc->scsi_done(sc);
30002+
30003+ return;
30004+}
30005+
30006+
30007+static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
30008+ vscsiif_response_t *ring_res)
30009+{
30010+ uint16_t id = ring_res->rqid;
30011+ unsigned long flags;
30012+
30013+ spin_lock_irqsave(&info->shadow_lock, flags);
30014+ info->shadow[id].wait_reset = 1;
30015+ info->shadow[id].rslt_reset = ring_res->rslt;
30016+ spin_unlock_irqrestore(&info->shadow_lock, flags);
30017+
30018+ wake_up(&(info->shadow[id].wq_reset));
30019+}
30020+
30021+
30022+int scsifront_cmd_done(struct vscsifrnt_info *info)
30023+{
30024+ vscsiif_response_t *ring_res;
30025+
30026+ RING_IDX i, rp;
30027+ int more_to_do = 0;
30028+ unsigned long flags;
30029+
30030+ spin_lock_irqsave(&info->io_lock, flags);
30031+
30032+ rp = info->ring.sring->rsp_prod;
30033+ rmb();
30034+ for (i = info->ring.rsp_cons; i != rp; i++) {
30035+
30036+ ring_res = RING_GET_RESPONSE(&info->ring, i);
30037+
30038+ if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB)
30039+ scsifront_cdb_cmd_done(info, ring_res);
30040+ else
30041+ scsifront_sync_cmd_done(info, ring_res);
30042+ }
30043+
30044+ info->ring.rsp_cons = i;
30045+
30046+ if (i != info->ring.req_prod_pvt) {
30047+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
30048+ } else {
30049+ info->ring.sring->rsp_event = i + 1;
30050+ }
30051+
30052+ spin_unlock_irqrestore(&info->io_lock, flags);
30053+
30054+
30055+ /* Yield point for this unbounded loop. */
30056+ cond_resched();
30057+
30058+ return more_to_do;
30059+}
30060+
30061+
30062+
30063+
30064+int scsifront_schedule(void *data)
30065+{
30066+ struct vscsifrnt_info *info = (struct vscsifrnt_info *)data;
30067+
30068+ while (!kthread_should_stop()) {
30069+ wait_event_interruptible(
30070+ info->wq,
30071+ info->waiting_resp || kthread_should_stop());
30072+
30073+ info->waiting_resp = 0;
30074+ smp_mb();
30075+
30076+ if (scsifront_cmd_done(info))
30077+ info->waiting_resp = 1;
30078+ }
30079+
30080+ return 0;
30081+}
30082+
30083+
30084+
30085+static int map_data_for_request(struct vscsifrnt_info *info,
30086+ struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id)
30087+{
30088+ grant_ref_t gref_head;
30089+ struct page *page;
30090+ int err, i, ref, ref_cnt = 0;
30091+ int write = (sc->sc_data_direction == DMA_TO_DEVICE);
30092+ int nr_pages, off, len, bytes;
30093+ unsigned long buffer_pfn;
30094+ unsigned int data_len = 0;
30095+
30096+ if (sc->sc_data_direction == DMA_NONE)
30097+ return 0;
30098+
30099+ err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head);
30100+ if (err) {
30101+ printk(KERN_ERR "scsifront: gnttab_alloc_grant_references() error\n");
30102+ return -ENOMEM;
30103+ }
30104+
30105+ if (sc->use_sg) {
30106+ /* quoted scsi_lib.c/scsi_req_map_sg . */
30107+ struct scatterlist *sg = (struct scatterlist *)sc->request_buffer;
30108+ nr_pages = (sc->request_bufflen + sg[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
30109+
30110+ if (nr_pages > VSCSIIF_SG_TABLESIZE) {
30111+ printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n");
30112+ ref_cnt = (-E2BIG);
30113+ goto big_to_sg;
30114+ }
30115+
30116+ for (i = 0; i < sc->use_sg; i++) {
30117+ page = sg[i].page;
30118+ off = sg[i].offset;
30119+ len = sg[i].length;
30120+ data_len += len;
30121+
30122+ buffer_pfn = page_to_phys(page) >> PAGE_SHIFT;
30123+
30124+ while (len > 0) {
30125+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
30126+
30127+ ref = gnttab_claim_grant_reference(&gref_head);
30128+ BUG_ON(ref == -ENOSPC);
30129+
30130+ gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
30131+ buffer_pfn, write);
30132+
30133+ info->shadow[id].gref[ref_cnt] = ref;
30134+ ring_req->seg[ref_cnt].gref = ref;
30135+ ring_req->seg[ref_cnt].offset = (uint16_t)off;
30136+ ring_req->seg[ref_cnt].length = (uint16_t)bytes;
30137+
30138+ buffer_pfn++;
30139+ len -= bytes;
30140+ off = 0;
30141+ ref_cnt++;
30142+ }
30143+ }
30144+ } else if (sc->request_bufflen) {
30145+ unsigned long end = ((unsigned long)sc->request_buffer
30146+ + sc->request_bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
30147+ unsigned long start = (unsigned long)sc->request_buffer >> PAGE_SHIFT;
30148+
30149+ page = virt_to_page(sc->request_buffer);
30150+ nr_pages = end - start;
30151+ len = sc->request_bufflen;
30152+
30153+ if (nr_pages > VSCSIIF_SG_TABLESIZE) {
30154+ ref_cnt = (-E2BIG);
30155+ goto big_to_sg;
30156+ }
30157+
30158+ buffer_pfn = page_to_phys(page) >> PAGE_SHIFT;
30159+
30160+ off = offset_in_page((unsigned long)sc->request_buffer);
30161+ for (i = 0; i < nr_pages; i++) {
30162+ bytes = PAGE_SIZE - off;
30163+
30164+ if (bytes > len)
30165+ bytes = len;
30166+
30167+ ref = gnttab_claim_grant_reference(&gref_head);
30168+ BUG_ON(ref == -ENOSPC);
30169+
30170+ gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
30171+ buffer_pfn, write);
30172+
30173+ info->shadow[id].gref[i] = ref;
30174+ ring_req->seg[i].gref = ref;
30175+ ring_req->seg[i].offset = (uint16_t)off;
30176+ ring_req->seg[i].length = (uint16_t)bytes;
30177+
30178+ buffer_pfn++;
30179+ len -= bytes;
30180+ off = 0;
30181+ ref_cnt++;
30182+ }
30183+ }
30184+
30185+big_to_sg:
30186+
30187+ gnttab_free_grant_references(gref_head);
30188+
30189+ return ref_cnt;
30190+}
30191+
30192+static int scsifront_queuecommand(struct scsi_cmnd *sc,
30193+ void (*done)(struct scsi_cmnd *))
30194+{
30195+ struct vscsifrnt_info *info =
30196+ (struct vscsifrnt_info *) sc->device->host->hostdata;
30197+ vscsiif_request_t *ring_req;
30198+ int ref_cnt;
30199+ uint16_t rqid;
30200+
30201+ if (RING_FULL(&info->ring)) {
30202+ goto out_host_busy;
30203+ }
30204+
30205+ sc->scsi_done = done;
30206+ sc->result = 0;
30207+
30208+ ring_req = scsifront_pre_request(info);
30209+ rqid = ring_req->rqid;
30210+ ring_req->act = VSCSIIF_ACT_SCSI_CDB;
30211+
30212+ ring_req->id = sc->device->id;
30213+ ring_req->lun = sc->device->lun;
30214+ ring_req->channel = sc->device->channel;
30215+ ring_req->cmd_len = sc->cmd_len;
30216+
30217+ BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
30218+
30219+ if ( sc->cmd_len )
30220+ memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
30221+ else
30222+ memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
30223+
30224+ ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
30225+ ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
30226+
30227+ info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc;
30228+ info->shadow[rqid].sc_data_direction = sc->sc_data_direction;
30229+ info->shadow[rqid].act = ring_req->act;
30230+
30231+ ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
30232+ if (ref_cnt < 0) {
30233+ add_id_to_freelist(info, rqid);
30234+ if (ref_cnt == (-ENOMEM))
30235+ goto out_host_busy;
30236+ else {
30237+ sc->result = (DID_ERROR << 16);
30238+ goto out_fail_command;
30239+ }
30240+ }
30241+
30242+ ring_req->nr_segments = (uint8_t)ref_cnt;
30243+ info->shadow[rqid].nr_segments = ref_cnt;
30244+
30245+ scsifront_do_request(info);
30246+
30247+ return 0;
30248+
30249+out_host_busy:
30250+ return SCSI_MLQUEUE_HOST_BUSY;
30251+
30252+out_fail_command:
30253+ done(sc);
30254+ return 0;
30255+}
30256+
30257+
30258+static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
30259+{
30260+ return (FAILED);
30261+}
30262+
30263+/* vscsi supports only device_reset, because it is each of LUNs */
30264+static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
30265+{
30266+ struct Scsi_Host *host = sc->device->host;
30267+ struct vscsifrnt_info *info =
30268+ (struct vscsifrnt_info *) sc->device->host->hostdata;
30269+
30270+ vscsiif_request_t *ring_req;
30271+ uint16_t rqid;
30272+ int err;
30273+
30274+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
30275+ spin_lock_irq(host->host_lock);
30276+#endif
30277+
30278+ ring_req = scsifront_pre_request(info);
30279+ ring_req->act = VSCSIIF_ACT_SCSI_RESET;
30280+
30281+ rqid = ring_req->rqid;
30282+ info->shadow[rqid].act = VSCSIIF_ACT_SCSI_RESET;
30283+
30284+ ring_req->channel = sc->device->channel;
30285+ ring_req->id = sc->device->id;
30286+ ring_req->lun = sc->device->lun;
30287+ ring_req->cmd_len = sc->cmd_len;
30288+
30289+ if ( sc->cmd_len )
30290+ memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
30291+ else
30292+ memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
30293+
30294+ ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
30295+ ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
30296+ ring_req->nr_segments = 0;
30297+
30298+ scsifront_do_request(info);
30299+
30300+ spin_unlock_irq(host->host_lock);
30301+ wait_event_interruptible(info->shadow[rqid].wq_reset,
30302+ info->shadow[rqid].wait_reset);
30303+ spin_lock_irq(host->host_lock);
30304+
30305+ err = info->shadow[rqid].rslt_reset;
30306+
30307+ add_id_to_freelist(info, rqid);
30308+
30309+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
30310+ spin_unlock_irq(host->host_lock);
30311+#endif
30312+ return (err);
30313+}
30314+
30315+
30316+struct scsi_host_template scsifront_sht = {
30317+ .module = THIS_MODULE,
30318+ .name = "Xen SCSI frontend driver",
30319+ .queuecommand = scsifront_queuecommand,
30320+ .eh_abort_handler = scsifront_eh_abort_handler,
30321+ .eh_device_reset_handler= scsifront_dev_reset_handler,
30322+ .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN,
30323+ .can_queue = VSCSIIF_MAX_REQS,
30324+ .this_id = -1,
30325+ .sg_tablesize = VSCSIIF_SG_TABLESIZE,
30326+ .use_clustering = DISABLE_CLUSTERING,
30327+ .proc_name = "scsifront",
30328+};
30329+
30330+
30331+static int __init scsifront_init(void)
30332+{
30333+ int err;
30334+
30335+ if (!is_running_on_xen())
30336+ return -ENODEV;
30337+
30338+ err = scsifront_xenbus_init();
30339+
30340+ return err;
30341+}
30342+
30343+static void __exit scsifront_exit(void)
30344+{
30345+ scsifront_xenbus_unregister();
30346+}
30347+
30348+module_init(scsifront_init);
30349+module_exit(scsifront_exit);
30350+
30351+MODULE_DESCRIPTION("Xen SCSI frontend driver");
30352+MODULE_LICENSE("GPL");
30353Index: head-2008-11-25/drivers/xen/scsifront/xenbus.c
30354===================================================================
30355--- /dev/null 1970-01-01 00:00:00.000000000 +0000
30356+++ head-2008-11-25/drivers/xen/scsifront/xenbus.c 2008-07-21 11:00:33.000000000 +0200
30357@@ -0,0 +1,421 @@
30358+/*
30359+ * Xen SCSI frontend driver
30360+ *
30361+ * Copyright (c) 2008, FUJITSU Limited
30362+ *
30363+ * This program is free software; you can redistribute it and/or
30364+ * modify it under the terms of the GNU General Public License version 2
30365+ * as published by the Free Software Foundation; or, when distributed
30366+ * separately from the Linux kernel or incorporated into other
30367+ * software packages, subject to the following license:
30368+ *
30369+ * Permission is hereby granted, free of charge, to any person obtaining a copy
30370+ * of this source file (the "Software"), to deal in the Software without
30371+ * restriction, including without limitation the rights to use, copy, modify,
30372+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
30373+ * and to permit persons to whom the Software is furnished to do so, subject to
30374+ * the following conditions:
30375+ *
30376+ * The above copyright notice and this permission notice shall be included in
30377+ * all copies or substantial portions of the Software.
30378+ *
30379+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30380+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30381+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30382+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30383+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30384+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30385+ * IN THE SOFTWARE.
30386+ */
30387+
30388+
30389+#include <linux/version.h>
30390+#include "common.h"
30391+
30392+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
30393+ #define DEFAULT_TASK_COMM_LEN 16
30394+#else
30395+ #define DEFAULT_TASK_COMM_LEN TASK_COMM_LEN
30396+#endif
30397+
30398+extern struct scsi_host_template scsifront_sht;
30399+
30400+static void scsifront_free(struct vscsifrnt_info *info)
30401+{
30402+ struct Scsi_Host *host = info->host;
30403+
30404+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
30405+ if (host->shost_state != SHOST_DEL) {
30406+#else
30407+ if (!test_bit(SHOST_DEL, &host->shost_state)) {
30408+#endif
30409+ scsi_remove_host(info->host);
30410+ }
30411+
30412+ if (info->ring_ref != GRANT_INVALID_REF) {
30413+ gnttab_end_foreign_access(info->ring_ref,
30414+ (unsigned long)info->ring.sring);
30415+ info->ring_ref = GRANT_INVALID_REF;
30416+ info->ring.sring = NULL;
30417+ }
30418+
30419+ if (info->irq)
30420+ unbind_from_irqhandler(info->irq, info);
30421+ info->irq = 0;
30422+
30423+ scsi_host_put(info->host);
30424+}
30425+
30426+
30427+static int scsifront_alloc_ring(struct vscsifrnt_info *info)
30428+{
30429+ struct xenbus_device *dev = info->dev;
30430+ struct vscsiif_sring *sring;
30431+ int err = -ENOMEM;
30432+
30433+
30434+ info->ring_ref = GRANT_INVALID_REF;
30435+
30436+ /***** Frontend to Backend ring start *****/
30437+ sring = (struct vscsiif_sring *) __get_free_page(GFP_KERNEL);
30438+ if (!sring) {
30439+ xenbus_dev_fatal(dev, err, "fail to allocate shared ring (Front to Back)");
30440+ return err;
30441+ }
30442+ SHARED_RING_INIT(sring);
30443+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
30444+
30445+ err = xenbus_grant_ring(dev, virt_to_mfn(sring));
30446+ if (err < 0) {
30447+ free_page((unsigned long) sring);
30448+ info->ring.sring = NULL;
30449+ xenbus_dev_fatal(dev, err, "fail to grant shared ring (Front to Back)");
30450+ goto free_sring;
30451+ }
30452+ info->ring_ref = err;
30453+
30454+ err = bind_listening_port_to_irqhandler(
30455+ dev->otherend_id, scsifront_intr,
30456+ SA_SAMPLE_RANDOM, "scsifront", info);
30457+
30458+ if (err <= 0) {
30459+ xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler");
30460+ goto free_sring;
30461+ }
30462+ info->irq = err;
30463+
30464+ return 0;
30465+
30466+/* free resource */
30467+free_sring:
30468+ scsifront_free(info);
30469+
30470+ return err;
30471+}
30472+
30473+
30474+static int scsifront_init_ring(struct vscsifrnt_info *info)
30475+{
30476+ struct xenbus_device *dev = info->dev;
30477+ struct xenbus_transaction xbt;
30478+ int err;
30479+
30480+ DPRINTK("%s\n",__FUNCTION__);
30481+
30482+ err = scsifront_alloc_ring(info);
30483+ if (err)
30484+ return err;
30485+ DPRINTK("%u %u\n", info->ring_ref, info->evtchn);
30486+
30487+again:
30488+ err = xenbus_transaction_start(&xbt);
30489+ if (err) {
30490+ xenbus_dev_fatal(dev, err, "starting transaction");
30491+ }
30492+
30493+ err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
30494+ info->ring_ref);
30495+ if (err) {
30496+ xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
30497+ goto fail;
30498+ }
30499+
30500+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
30501+ irq_to_evtchn_port(info->irq));
30502+
30503+ if (err) {
30504+ xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
30505+ goto fail;
30506+ }
30507+
30508+ err = xenbus_transaction_end(xbt, 0);
30509+ if (err) {
30510+ if (err == -EAGAIN)
30511+ goto again;
30512+ xenbus_dev_fatal(dev, err, "completing transaction");
30513+ goto free_sring;
30514+ }
30515+
30516+ return 0;
30517+
30518+fail:
30519+ xenbus_transaction_end(xbt, 1);
30520+free_sring:
30521+ /* free resource */
30522+ scsifront_free(info);
30523+
30524+ return err;
30525+}
30526+
30527+
30528+static int scsifront_probe(struct xenbus_device *dev,
30529+ const struct xenbus_device_id *id)
30530+{
30531+ struct vscsifrnt_info *info;
30532+ struct Scsi_Host *host;
30533+ int i, err = -ENOMEM;
30534+ char name[DEFAULT_TASK_COMM_LEN];
30535+
30536+ host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
30537+ if (!host) {
30538+ xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
30539+ return err;
30540+ }
30541+ info = (struct vscsifrnt_info *) host->hostdata;
30542+ info->host = host;
30543+
30544+
30545+ dev->dev.driver_data = info;
30546+ info->dev = dev;
30547+
30548+ for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
30549+ info->shadow[i].next_free = i + 1;
30550+ init_waitqueue_head(&(info->shadow[i].wq_reset));
30551+ info->shadow[i].wait_reset = 0;
30552+ }
30553+ info->shadow[VSCSIIF_MAX_REQS - 1].next_free = 0x0fff;
30554+
30555+ err = scsifront_init_ring(info);
30556+ if (err) {
30557+ scsi_host_put(host);
30558+ return err;
30559+ }
30560+
30561+ init_waitqueue_head(&info->wq);
30562+ spin_lock_init(&info->io_lock);
30563+ spin_lock_init(&info->shadow_lock);
30564+
30565+ snprintf(name, DEFAULT_TASK_COMM_LEN, "vscsiif.%d", info->host->host_no);
30566+
30567+ info->kthread = kthread_run(scsifront_schedule, info, name);
30568+ if (IS_ERR(info->kthread)) {
30569+ err = PTR_ERR(info->kthread);
30570+ info->kthread = NULL;
30571+ printk(KERN_ERR "scsifront: kthread start err %d\n", err);
30572+ goto free_sring;
30573+ }
30574+
30575+ host->max_id = VSCSIIF_MAX_TARGET;
30576+ host->max_channel = 0;
30577+ host->max_lun = VSCSIIF_MAX_LUN;
30578+ host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512;
30579+
30580+ err = scsi_add_host(host, &dev->dev);
30581+ if (err) {
30582+ printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err);
30583+ goto free_sring;
30584+ }
30585+
30586+ xenbus_switch_state(dev, XenbusStateInitialised);
30587+
30588+ return 0;
30589+
30590+free_sring:
30591+ /* free resource */
30592+ scsifront_free(info);
30593+ return err;
30594+}
30595+
30596+static int scsifront_remove(struct xenbus_device *dev)
30597+{
30598+ struct vscsifrnt_info *info = dev->dev.driver_data;
30599+
30600+ DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename);
30601+
30602+ if (info->kthread) {
30603+ kthread_stop(info->kthread);
30604+ info->kthread = NULL;
30605+ }
30606+
30607+ scsifront_free(info);
30608+
30609+ return 0;
30610+}
30611+
30612+
30613+static int scsifront_disconnect(struct vscsifrnt_info *info)
30614+{
30615+ struct xenbus_device *dev = info->dev;
30616+ struct Scsi_Host *host = info->host;
30617+
30618+ DPRINTK("%s: %s disconnect\n",__FUNCTION__ ,dev->nodename);
30619+
30620+ /*
30621+ When this function is executed, all devices of
30622+ Frontend have been deleted.
30623+ Therefore, it need not block I/O before remove_host.
30624+ */
30625+
30626+ scsi_remove_host(host);
30627+ xenbus_frontend_closed(dev);
30628+
30629+ return 0;
30630+}
30631+
30632+#define VSCSIFRONT_OP_ADD_LUN 1
30633+#define VSCSIFRONT_OP_DEL_LUN 2
30634+
30635+static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
30636+{
30637+ struct xenbus_device *dev = info->dev;
30638+ int i, err = 0;
30639+ char str[64], state_str[64];
30640+ char **dir;
30641+ unsigned int dir_n = 0;
30642+ unsigned int device_state;
30643+ unsigned int hst, chn, tgt, lun;
30644+ struct scsi_device *sdev;
30645+
30646+ dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
30647+ if (IS_ERR(dir))
30648+ return;
30649+
30650+ for (i = 0; i < dir_n; i++) {
30651+ /* read status */
30652+ snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
30653+ err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
30654+ &device_state);
30655+ if (XENBUS_EXIST_ERR(err))
30656+ continue;
30657+
30658+ /* virtual SCSI device */
30659+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
30660+ err = xenbus_scanf(XBT_NIL, dev->otherend, str,
30661+ "%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
30662+ if (XENBUS_EXIST_ERR(err))
30663+ continue;
30664+
30665+ /* front device state path */
30666+ snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
30667+
30668+ switch (op) {
30669+ case VSCSIFRONT_OP_ADD_LUN:
30670+ if (device_state == XenbusStateInitialised) {
30671+ sdev = scsi_device_lookup(info->host, chn, tgt, lun);
30672+ if (sdev) {
30673+ printk(KERN_ERR "scsifront: Device already in use.\n");
30674+ scsi_device_put(sdev);
30675+ xenbus_printf(XBT_NIL, dev->nodename,
30676+ state_str, "%d", XenbusStateClosed);
30677+ } else {
30678+ scsi_add_device(info->host, chn, tgt, lun);
30679+ xenbus_printf(XBT_NIL, dev->nodename,
30680+ state_str, "%d", XenbusStateConnected);
30681+ }
30682+ }
30683+ break;
30684+ case VSCSIFRONT_OP_DEL_LUN:
30685+ if (device_state == XenbusStateClosing) {
30686+ sdev = scsi_device_lookup(info->host, chn, tgt, lun);
30687+ if (sdev) {
30688+ scsi_remove_device(sdev);
30689+ scsi_device_put(sdev);
30690+ xenbus_printf(XBT_NIL, dev->nodename,
30691+ state_str, "%d", XenbusStateClosed);
30692+ }
30693+ }
30694+ break;
30695+ default:
30696+ break;
30697+ }
30698+ }
30699+
30700+ kfree(dir);
30701+ return;
30702+}
30703+
30704+
30705+
30706+
30707+static void scsifront_backend_changed(struct xenbus_device *dev,
30708+ enum xenbus_state backend_state)
30709+{
30710+ struct vscsifrnt_info *info = dev->dev.driver_data;
30711+
30712+ DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
30713+
30714+ switch (backend_state) {
30715+ case XenbusStateUnknown:
30716+ case XenbusStateInitialising:
30717+ case XenbusStateInitWait:
30718+ case XenbusStateClosed:
30719+ break;
30720+
30721+ case XenbusStateInitialised:
30722+ break;
30723+
30724+ case XenbusStateConnected:
30725+ if (xenbus_read_driver_state(dev->nodename) ==
30726+ XenbusStateInitialised) {
30727+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
30728+ }
30729+
30730+ if (dev->state == XenbusStateConnected)
30731+ break;
30732+
30733+ xenbus_switch_state(dev, XenbusStateConnected);
30734+ break;
30735+
30736+ case XenbusStateClosing:
30737+ scsifront_disconnect(info);
30738+ break;
30739+
30740+ case XenbusStateReconfiguring:
30741+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
30742+ xenbus_switch_state(dev, XenbusStateReconfiguring);
30743+ break;
30744+
30745+ case XenbusStateReconfigured:
30746+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
30747+ xenbus_switch_state(dev, XenbusStateConnected);
30748+ break;
30749+ }
30750+}
30751+
30752+
30753+static struct xenbus_device_id scsifront_ids[] = {
30754+ { "vscsi" },
30755+ { "" }
30756+};
30757+
30758+
30759+static struct xenbus_driver scsifront_driver = {
30760+ .name = "vscsi",
30761+ .owner = THIS_MODULE,
30762+ .ids = scsifront_ids,
30763+ .probe = scsifront_probe,
30764+ .remove = scsifront_remove,
30765+/* .resume = scsifront_resume, */
30766+ .otherend_changed = scsifront_backend_changed,
30767+};
30768+
30769+int scsifront_xenbus_init(void)
30770+{
30771+ return xenbus_register_frontend(&scsifront_driver);
30772+}
30773+
30774+void scsifront_xenbus_unregister(void)
30775+{
30776+ xenbus_unregister_driver(&scsifront_driver);
30777+}
30778+
30779Index: head-2008-11-25/drivers/xen/sfc_netback/Makefile
30780===================================================================
30781--- /dev/null 1970-01-01 00:00:00.000000000 +0000
30782+++ head-2008-11-25/drivers/xen/sfc_netback/Makefile 2008-02-26 10:54:11.000000000 +0100
30783@@ -0,0 +1,12 @@
30784+EXTRA_CFLAGS += -Idrivers/xen/sfc_netback -Idrivers/xen/sfc_netutil -Idrivers/xen/netback -Idrivers/net/sfc
30785+EXTRA_CFLAGS += -D__ci_driver__
30786+EXTRA_CFLAGS += -DEFX_USE_KCOMPAT
30787+EXTRA_CFLAGS += -Werror
30788+
30789+ifdef GCOV
30790+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
30791+endif
30792+
30793+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) := sfc_netback.o
30794+
30795+sfc_netback-objs := accel.o accel_fwd.o accel_msg.o accel_solarflare.o accel_xenbus.o accel_debugfs.o
30796Index: head-2008-11-25/drivers/xen/sfc_netback/accel.c
30797===================================================================
30798--- /dev/null 1970-01-01 00:00:00.000000000 +0000
30799+++ head-2008-11-25/drivers/xen/sfc_netback/accel.c 2008-02-26 10:54:11.000000000 +0100
30800@@ -0,0 +1,129 @@
30801+/****************************************************************************
30802+ * Solarflare driver for Xen network acceleration
30803+ *
30804+ * Copyright 2006-2008: Solarflare Communications Inc,
30805+ * 9501 Jeronimo Road, Suite 250,
30806+ * Irvine, CA 92618, USA
30807+ *
30808+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
30809+ *
30810+ * This program is free software; you can redistribute it and/or modify it
30811+ * under the terms of the GNU General Public License version 2 as published
30812+ * by the Free Software Foundation, incorporated herein by reference.
30813+ *
30814+ * This program is distributed in the hope that it will be useful,
30815+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30816+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30817+ * GNU General Public License for more details.
30818+ *
30819+ * You should have received a copy of the GNU General Public License
30820+ * along with this program; if not, write to the Free Software
30821+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30822+ ****************************************************************************
30823+ */
30824+
30825+#include "accel.h"
30826+#include "accel_msg_iface.h"
30827+#include "accel_solarflare.h"
30828+
30829+#include <linux/notifier.h>
30830+
30831+#ifdef EFX_GCOV
30832+#include "gcov.h"
30833+#endif
30834+
30835+static int netback_accel_netdev_event(struct notifier_block *nb,
30836+ unsigned long event, void *ptr)
30837+{
30838+ struct net_device *net_dev = (struct net_device *)ptr;
30839+ struct netback_accel *bend;
30840+
30841+ if ((event == NETDEV_UP) || (event == NETDEV_DOWN)) {
30842+ mutex_lock(&bend_list_mutex);
30843+ bend = bend_list;
30844+ while (bend != NULL) {
30845+ mutex_lock(&bend->bend_mutex);
30846+ /*
30847+ * This happens when the shared pages have
30848+ * been unmapped, but the bend not yet removed
30849+ * from list
30850+ */
30851+ if (bend->shared_page == NULL)
30852+ goto next;
30853+
30854+ if (bend->net_dev->ifindex == net_dev->ifindex)
30855+ netback_accel_set_interface_state
30856+ (bend, event == NETDEV_UP);
30857+
30858+ next:
30859+ mutex_unlock(&bend->bend_mutex);
30860+ bend = bend->next_bend;
30861+ }
30862+ mutex_unlock(&bend_list_mutex);
30863+ }
30864+
30865+ return NOTIFY_DONE;
30866+}
30867+
30868+
30869+static struct notifier_block netback_accel_netdev_notifier = {
30870+ .notifier_call = netback_accel_netdev_event,
30871+};
30872+
30873+
30874+unsigned sfc_netback_max_pages = NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES;
30875+module_param_named(max_pages, sfc_netback_max_pages, uint, 0644);
30876+MODULE_PARM_DESC(max_pages,
30877+ "The number of buffer pages to enforce on each guest");
30878+
30879+/* Initialise subsystems need for the accelerated fast path */
30880+static int __init netback_accel_init(void)
30881+{
30882+ int rc = 0;
30883+
30884+#ifdef EFX_GCOV
30885+ gcov_provider_init(THIS_MODULE);
30886+#endif
30887+
30888+ rc = netback_accel_init_fwd();
30889+
30890+ if (rc == 0)
30891+ netback_accel_debugfs_init();
30892+
30893+ if (rc == 0)
30894+ rc = netback_accel_sf_init();
30895+
30896+ if (rc == 0)
30897+ rc = register_netdevice_notifier
30898+ (&netback_accel_netdev_notifier);
30899+
30900+ /*
30901+ * What if no device was found, shouldn't we clean up stuff
30902+ * we've allocated for acceleration subsystem?
30903+ */
30904+
30905+ return rc;
30906+}
30907+
30908+module_init(netback_accel_init);
30909+
30910+static void __exit netback_accel_exit(void)
30911+{
30912+ unregister_netdevice_notifier(&netback_accel_netdev_notifier);
30913+
30914+ netback_accel_sf_shutdown();
30915+
30916+ netback_accel_shutdown_bends();
30917+
30918+ netback_accel_debugfs_fini();
30919+
30920+ netback_accel_shutdown_fwd();
30921+
30922+#ifdef EFX_GCOV
30923+ gcov_provider_fini(THIS_MODULE);
30924+#endif
30925+}
30926+
30927+module_exit(netback_accel_exit);
30928+
30929+MODULE_LICENSE("GPL");
30930Index: head-2008-11-25/drivers/xen/sfc_netback/accel.h
30931===================================================================
30932--- /dev/null 1970-01-01 00:00:00.000000000 +0000
30933+++ head-2008-11-25/drivers/xen/sfc_netback/accel.h 2008-02-26 10:54:11.000000000 +0100
30934@@ -0,0 +1,393 @@
30935+/****************************************************************************
30936+ * Solarflare driver for Xen network acceleration
30937+ *
30938+ * Copyright 2006-2008: Solarflare Communications Inc,
30939+ * 9501 Jeronimo Road, Suite 250,
30940+ * Irvine, CA 92618, USA
30941+ *
30942+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
30943+ *
30944+ * This program is free software; you can redistribute it and/or modify it
30945+ * under the terms of the GNU General Public License version 2 as published
30946+ * by the Free Software Foundation, incorporated herein by reference.
30947+ *
30948+ * This program is distributed in the hope that it will be useful,
30949+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30950+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30951+ * GNU General Public License for more details.
30952+ *
30953+ * You should have received a copy of the GNU General Public License
30954+ * along with this program; if not, write to the Free Software
30955+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30956+ ****************************************************************************
30957+ */
30958+
30959+#ifndef NETBACK_ACCEL_H
30960+#define NETBACK_ACCEL_H
30961+
30962+#include <linux/slab.h>
30963+#include <linux/ip.h>
30964+#include <linux/tcp.h>
30965+#include <linux/udp.h>
30966+#include <linux/in.h>
30967+#include <linux/netdevice.h>
30968+#include <linux/etherdevice.h>
30969+#include <linux/mutex.h>
30970+#include <linux/wait.h>
30971+
30972+#include <xen/xenbus.h>
30973+
30974+#include "accel_shared_fifo.h"
30975+#include "accel_msg_iface.h"
30976+#include "accel_util.h"
30977+
30978+/**************************************************************************
30979+ * Datatypes
30980+ **************************************************************************/
30981+
30982+#define NETBACK_ACCEL_DEFAULT_MAX_FILTERS (8)
30983+#define NETBACK_ACCEL_DEFAULT_MAX_MCASTS (8)
30984+#define NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES (384)
30985+/* Variable to store module parameter for max_buf_pages */
30986+extern unsigned sfc_netback_max_pages;
30987+
30988+#define NETBACK_ACCEL_STATS 1
30989+
30990+#if NETBACK_ACCEL_STATS
30991+#define NETBACK_ACCEL_STATS_OP(x) x
30992+#else
30993+#define NETBACK_ACCEL_STATS_OP(x)
30994+#endif
30995+
30996+/*! Statistics for a given backend */
30997+struct netback_accel_stats {
30998+ /*! Number of eventq wakeup events */
30999+ u64 evq_wakeups;
31000+ /*! Number of eventq timeout events */
31001+ u64 evq_timeouts;
31002+ /*! Number of filters used */
31003+ u32 num_filters;
31004+ /*! Number of buffer pages registered */
31005+ u32 num_buffer_pages;
31006+};
31007+
31008+
31009+/* Debug fs nodes for each of the above stats */
31010+struct netback_accel_dbfs {
31011+ struct dentry *evq_wakeups;
31012+ struct dentry *evq_timeouts;
31013+ struct dentry *num_filters;
31014+ struct dentry *num_buffer_pages;
31015+};
31016+
31017+
31018+/*! Resource limits for a given NIC */
31019+struct netback_accel_limits {
31020+ int max_filters; /*!< Max. number of filters to use. */
31021+ int max_mcasts; /*!< Max. number of mcast subscriptions */
31022+ int max_buf_pages; /*!< Max. number of pages of NIC buffers */
31023+};
31024+
31025+
31026+/*! The state for an instance of the back end driver. */
31027+struct netback_accel {
31028+ /*! mutex to protect this state */
31029+ struct mutex bend_mutex;
31030+
31031+ /*! Watches on xenstore */
31032+ struct xenbus_watch domu_accel_watch;
31033+ struct xenbus_watch config_accel_watch;
31034+
31035+ /*! Pointer to whatever device cookie ties us in to the hypervisor */
31036+ void *hdev_data;
31037+
31038+ /*! FIFO indices. Next page is msg FIFOs */
31039+ struct net_accel_shared_page *shared_page;
31040+
31041+ /*! Defer control message processing */
31042+ struct work_struct handle_msg;
31043+
31044+ /*! Identifies other end VM and interface.*/
31045+ int far_end;
31046+ int vif_num;
31047+
31048+ /*!< To unmap the shared pages */
31049+ void *sh_pages_unmap;
31050+
31051+ /* Resource tracking */
31052+ /*! Limits on H/W & Dom0 resources */
31053+ struct netback_accel_limits quotas;
31054+
31055+ /* Hardware resources */
31056+ /*! The H/W type of associated NIC */
31057+ enum net_accel_hw_type hw_type;
31058+ /*! State of allocation */
31059+ int hw_state;
31060+ /*! Index into ci_driver.nics[] for this interface */
31061+ int nic_index;
31062+ /*! How to set up the acceleration for this hardware */
31063+ int (*accel_setup)(struct netback_accel *);
31064+ /*! And how to stop it. */
31065+ void (*accel_shutdown)(struct netback_accel *);
31066+
31067+ /*! The physical/real net_dev for this interface */
31068+ struct net_device *net_dev;
31069+
31070+ /*! Magic pointer to locate state in fowarding table */
31071+ void *fwd_priv;
31072+
31073+ /*! Message FIFO */
31074+ sh_msg_fifo2 to_domU;
31075+ /*! Message FIFO */
31076+ sh_msg_fifo2 from_domU;
31077+
31078+ /*! General notification channel id */
31079+ int msg_channel;
31080+ /*! General notification channel irq */
31081+ int msg_channel_irq;
31082+
31083+ /*! Event channel id dedicated to network packet interrupts. */
31084+ int net_channel;
31085+ /*! Event channel irq dedicated to network packets interrupts */
31086+ int net_channel_irq;
31087+
31088+ /*! The MAC address the frontend goes by. */
31089+ u8 mac[ETH_ALEN];
31090+ /*! Driver name of associated NIC */
31091+ char *nicname;
31092+
31093+ /*! Array of pointers to buffer pages mapped */
31094+ grant_handle_t *buffer_maps;
31095+ u64 *buffer_addrs;
31096+ /*! Index into buffer_maps */
31097+ int buffer_maps_index;
31098+ /*! Max number of pages that domU is allowed/will request to map */
31099+ int max_pages;
31100+
31101+ /*! Pointer to hardware specific private area */
31102+ void *accel_hw_priv;
31103+
31104+ /*! Wait queue for changes in accelstate. */
31105+ wait_queue_head_t state_wait_queue;
31106+
31107+ /*! Current state of the frontend according to the xenbus
31108+ * watch. */
31109+ XenbusState frontend_state;
31110+
31111+ /*! Current state of this backend. */
31112+ XenbusState backend_state;
31113+
31114+ /*! Non-zero if the backend is being removed. */
31115+ int removing;
31116+
31117+ /*! Non-zero if the setup_vnic has been called. */
31118+ int vnic_is_setup;
31119+
31120+#if NETBACK_ACCEL_STATS
31121+ struct netback_accel_stats stats;
31122+#endif
31123+#if defined(CONFIG_DEBUG_FS)
31124+ char *dbfs_dir_name;
31125+ struct dentry *dbfs_dir;
31126+ struct netback_accel_dbfs dbfs;
31127+#endif
31128+
31129+ /*! List */
31130+ struct netback_accel *next_bend;
31131+};
31132+
31133+
31134+/*
31135+ * Values for netback_accel.hw_state. States of resource allocation
31136+ * we can go through
31137+ */
31138+/*! No hardware has yet been allocated. */
31139+#define NETBACK_ACCEL_RES_NONE (0)
31140+/*! Hardware has been allocated. */
31141+#define NETBACK_ACCEL_RES_ALLOC (1)
31142+#define NETBACK_ACCEL_RES_FILTER (2)
31143+#define NETBACK_ACCEL_RES_HWINFO (3)
31144+
31145+/*! Filtering specification. This assumes that for VNIC support we
31146+ * will always want wildcard entries, so only specifies the
31147+ * destination IP/port
31148+ */
31149+struct netback_accel_filter_spec {
31150+ /*! Internal, used to access efx_vi API */
31151+ void *filter_handle;
31152+
31153+ /*! Destination IP in network order */
31154+ u32 destip_be;
31155+ /*! Destination port in network order */
31156+ u16 destport_be;
31157+ /*! Mac address */
31158+ u8 mac[ETH_ALEN];
31159+ /*! TCP or UDP */
31160+ u8 proto;
31161+};
31162+
31163+
31164+/**************************************************************************
31165+ * From accel.c
31166+ **************************************************************************/
31167+
31168+/*! \brief Start up all the acceleration plugins
31169+ *
31170+ * \return 0 on success, an errno on failure
31171+ */
31172+extern int netback_accel_init_accel(void);
31173+
31174+/*! \brief Shut down all the acceleration plugins
31175+ */
31176+extern void netback_accel_shutdown_accel(void);
31177+
31178+
31179+/**************************************************************************
31180+ * From accel_fwd.c
31181+ **************************************************************************/
31182+
31183+/*! \brief Init the forwarding infrastructure
31184+ * \return 0 on success, or -ENOMEM if it couldn't get memory for the
31185+ * forward table
31186+ */
31187+extern int netback_accel_init_fwd(void);
31188+
31189+/*! \brief Shut down the forwarding and free memory. */
31190+extern void netback_accel_shutdown_fwd(void);
31191+
31192+/*! Initialise each nic port's fowarding table */
31193+extern void *netback_accel_init_fwd_port(void);
31194+extern void netback_accel_shutdown_fwd_port(void *fwd_priv);
31195+
31196+/*! \brief Add an entry to the forwarding table.
31197+ * \param mac : MAC address, used as hash key
31198+ * \param ctxt : value to associate with key (can be NULL, see
31199+ * netback_accel_fwd_set_context)
31200+ * \return 0 on success, -ENOMEM if table was full and could no grow it
31201+ */
31202+extern int netback_accel_fwd_add(const __u8 *mac, void *context,
31203+ void *fwd_priv);
31204+
31205+/*! \brief Remove an entry from the forwarding table.
31206+ * \param mac : the MAC address to remove
31207+ * \return nothing: it is not an error if the mac was not in the table
31208+ */
31209+extern void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv);
31210+
31211+/*! \brief Set the context pointer for an existing fwd table entry.
31212+ * \param mac : key that is already present in the table
31213+ * \param context : new value to associate with key
31214+ * \return 0 on success, -ENOENT if mac not present in table.
31215+ */
31216+extern int netback_accel_fwd_set_context(const __u8 *mac, void *context,
31217+ void *fwd_priv);
31218+
31219+/**************************************************************************
31220+ * From accel_msg.c
31221+ **************************************************************************/
31222+
31223+
31224+/*! \brief Send the start-of-day message that handshakes with the VNIC
31225+ * and tells it its MAC address.
31226+ *
31227+ * \param bend The back end driver data structure
31228+ * \param version The version of communication to use, e.g. NET_ACCEL_MSG_VERSION
31229+ */
31230+extern void netback_accel_msg_tx_hello(struct netback_accel *bend,
31231+ unsigned version);
31232+
31233+/*! \brief Send a "there's a new local mac address" message
31234+ *
31235+ * \param bend The back end driver data structure for the vnic to send
31236+ * the message to
31237+ * \param mac Pointer to the new mac address
31238+ */
31239+extern void netback_accel_msg_tx_new_localmac(struct netback_accel *bend,
31240+ const void *mac);
31241+
31242+/*! \brief Send a "a mac address that was local has gone away" message
31243+ *
31244+ * \param bend The back end driver data structure for the vnic to send
31245+ * the message to
31246+ * \param mac Pointer to the old mac address
31247+ */
31248+extern void netback_accel_msg_tx_old_localmac(struct netback_accel *bend,
31249+ const void *mac);
31250+
31251+extern void netback_accel_set_interface_state(struct netback_accel *bend,
31252+ int up);
31253+
31254+/*! \brief Process the message queue for a bend that has just
31255+ * interrupted.
31256+ *
31257+ * Demultiplexs an interrupt from the front end driver, taking
31258+ * messages from the fifo and taking appropriate action.
31259+ *
31260+ * \param bend The back end driver data structure
31261+ */
31262+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
31263+extern void netback_accel_msg_rx_handler(struct work_struct *arg);
31264+#else
31265+extern void netback_accel_msg_rx_handler(void *bend_void);
31266+#endif
31267+
31268+/**************************************************************************
31269+ * From accel_xenbus.c
31270+ **************************************************************************/
31271+/*! List of all the bends currently in existence. */
31272+extern struct netback_accel *bend_list;
31273+extern struct mutex bend_list_mutex;
31274+
31275+/*! \brief Probe a new network interface. */
31276+extern int netback_accel_probe(struct xenbus_device *dev);
31277+
31278+/*! \brief Remove a network interface. */
31279+extern int netback_accel_remove(struct xenbus_device *dev);
31280+
31281+/*! \brief Shutdown all accelerator backends */
31282+extern void netback_accel_shutdown_bends(void);
31283+
31284+/*! \brief Initiate the xenbus state teardown handshake */
31285+extern void netback_accel_set_closing(struct netback_accel *bend);
31286+
31287+/**************************************************************************
31288+ * From accel_debugfs.c
31289+ **************************************************************************/
31290+/*! Global statistics */
31291+struct netback_accel_global_stats {
31292+ /*! Number of TX packets seen through driverlink */
31293+ u64 dl_tx_packets;
31294+ /*! Number of TX packets seen through driverlink we didn't like */
31295+ u64 dl_tx_bad_packets;
31296+ /*! Number of RX packets seen through driverlink */
31297+ u64 dl_rx_packets;
31298+ /*! Number of mac addresses we are forwarding to */
31299+ u32 num_fwds;
31300+};
31301+
31302+/*! Debug fs entries for each of the above stats */
31303+struct netback_accel_global_dbfs {
31304+ struct dentry *dl_tx_packets;
31305+ struct dentry *dl_tx_bad_packets;
31306+ struct dentry *dl_rx_packets;
31307+ struct dentry *num_fwds;
31308+};
31309+
31310+#if NETBACK_ACCEL_STATS
31311+extern struct netback_accel_global_stats global_stats;
31312+#endif
31313+
31314+/*! \brief Initialise the debugfs root and populate with global stats */
31315+extern void netback_accel_debugfs_init(void);
31316+
31317+/*! \brief Remove our debugfs root directory */
31318+extern void netback_accel_debugfs_fini(void);
31319+
31320+/*! \brief Add per-bend statistics to debug fs */
31321+extern int netback_accel_debugfs_create(struct netback_accel *bend);
31322+/*! \brief Remove per-bend statistics from debug fs */
31323+extern int netback_accel_debugfs_remove(struct netback_accel *bend);
31324+
31325+#endif /* NETBACK_ACCEL_H */
31326+
31327+
31328Index: head-2008-11-25/drivers/xen/sfc_netback/accel_debugfs.c
31329===================================================================
31330--- /dev/null 1970-01-01 00:00:00.000000000 +0000
31331+++ head-2008-11-25/drivers/xen/sfc_netback/accel_debugfs.c 2008-02-26 10:54:11.000000000 +0100
31332@@ -0,0 +1,148 @@
31333+/****************************************************************************
31334+ * Solarflare driver for Xen network acceleration
31335+ *
31336+ * Copyright 2006-2008: Solarflare Communications Inc,
31337+ * 9501 Jeronimo Road, Suite 250,
31338+ * Irvine, CA 92618, USA
31339+ *
31340+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
31341+ *
31342+ * This program is free software; you can redistribute it and/or modify it
31343+ * under the terms of the GNU General Public License version 2 as published
31344+ * by the Free Software Foundation, incorporated herein by reference.
31345+ *
31346+ * This program is distributed in the hope that it will be useful,
31347+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31348+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31349+ * GNU General Public License for more details.
31350+ *
31351+ * You should have received a copy of the GNU General Public License
31352+ * along with this program; if not, write to the Free Software
31353+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31354+ ****************************************************************************
31355+ */
31356+
31357+#include <linux/fs.h>
31358+#include <linux/debugfs.h>
31359+
31360+#include "accel.h"
31361+
31362+#if defined(CONFIG_DEBUG_FS)
31363+static struct dentry *sfc_debugfs_root = NULL;
31364+#endif
31365+
31366+#if NETBACK_ACCEL_STATS
31367+struct netback_accel_global_stats global_stats;
31368+#if defined(CONFIG_DEBUG_FS)
31369+static struct netback_accel_global_dbfs global_dbfs;
31370+#endif
31371+#endif
31372+
31373+void netback_accel_debugfs_init(void)
31374+{
31375+#if defined(CONFIG_DEBUG_FS)
31376+ sfc_debugfs_root = debugfs_create_dir("sfc_netback", NULL);
31377+ if (sfc_debugfs_root == NULL)
31378+ return;
31379+
31380+ global_dbfs.num_fwds = debugfs_create_u32
31381+ ("num_fwds", S_IRUSR | S_IRGRP | S_IROTH,
31382+ sfc_debugfs_root, &global_stats.num_fwds);
31383+ global_dbfs.dl_tx_packets = debugfs_create_u64
31384+ ("dl_tx_packets", S_IRUSR | S_IRGRP | S_IROTH,
31385+ sfc_debugfs_root, &global_stats.dl_tx_packets);
31386+ global_dbfs.dl_rx_packets = debugfs_create_u64
31387+ ("dl_rx_packets", S_IRUSR | S_IRGRP | S_IROTH,
31388+ sfc_debugfs_root, &global_stats.dl_rx_packets);
31389+ global_dbfs.dl_tx_bad_packets = debugfs_create_u64
31390+ ("dl_tx_bad_packets", S_IRUSR | S_IRGRP | S_IROTH,
31391+ sfc_debugfs_root, &global_stats.dl_tx_bad_packets);
31392+#endif
31393+}
31394+
31395+
31396+void netback_accel_debugfs_fini(void)
31397+{
31398+#if defined(CONFIG_DEBUG_FS)
31399+ debugfs_remove(global_dbfs.num_fwds);
31400+ debugfs_remove(global_dbfs.dl_tx_packets);
31401+ debugfs_remove(global_dbfs.dl_rx_packets);
31402+ debugfs_remove(global_dbfs.dl_tx_bad_packets);
31403+
31404+ debugfs_remove(sfc_debugfs_root);
31405+#endif
31406+}
31407+
31408+
31409+int netback_accel_debugfs_create(struct netback_accel *bend)
31410+{
31411+#if defined(CONFIG_DEBUG_FS)
31412+ /* Smallest length is 7 (vif0.0\n) */
31413+ int length = 7, temp;
31414+
31415+ if (sfc_debugfs_root == NULL)
31416+ return -ENOENT;
31417+
31418+ /* Work out length of string representation of far_end and vif_num */
31419+ temp = bend->far_end;
31420+ while (temp > 9) {
31421+ length++;
31422+ temp = temp / 10;
31423+ }
31424+ temp = bend->vif_num;
31425+ while (temp > 9) {
31426+ length++;
31427+ temp = temp / 10;
31428+ }
31429+
31430+ bend->dbfs_dir_name = kmalloc(length, GFP_KERNEL);
31431+ if (bend->dbfs_dir_name == NULL)
31432+ return -ENOMEM;
31433+ sprintf(bend->dbfs_dir_name, "vif%d.%d", bend->far_end, bend->vif_num);
31434+
31435+ bend->dbfs_dir = debugfs_create_dir(bend->dbfs_dir_name,
31436+ sfc_debugfs_root);
31437+ if (bend->dbfs_dir == NULL) {
31438+ kfree(bend->dbfs_dir_name);
31439+ return -ENOMEM;
31440+ }
31441+
31442+#if NETBACK_ACCEL_STATS
31443+ bend->dbfs.evq_wakeups = debugfs_create_u64
31444+ ("evq_wakeups", S_IRUSR | S_IRGRP | S_IROTH,
31445+ bend->dbfs_dir, &bend->stats.evq_wakeups);
31446+ bend->dbfs.evq_timeouts = debugfs_create_u64
31447+ ("evq_timeouts", S_IRUSR | S_IRGRP | S_IROTH,
31448+ bend->dbfs_dir, &bend->stats.evq_timeouts);
31449+ bend->dbfs.num_filters = debugfs_create_u32
31450+ ("num_filters", S_IRUSR | S_IRGRP | S_IROTH,
31451+ bend->dbfs_dir, &bend->stats.num_filters);
31452+ bend->dbfs.num_buffer_pages = debugfs_create_u32
31453+ ("num_buffer_pages", S_IRUSR | S_IRGRP | S_IROTH,
31454+ bend->dbfs_dir, &bend->stats.num_buffer_pages);
31455+#endif
31456+#endif
31457+ return 0;
31458+}
31459+
31460+
31461+int netback_accel_debugfs_remove(struct netback_accel *bend)
31462+{
31463+#if defined(CONFIG_DEBUG_FS)
31464+ if (bend->dbfs_dir != NULL) {
31465+#if NETBACK_ACCEL_STATS
31466+ debugfs_remove(bend->dbfs.evq_wakeups);
31467+ debugfs_remove(bend->dbfs.evq_timeouts);
31468+ debugfs_remove(bend->dbfs.num_filters);
31469+ debugfs_remove(bend->dbfs.num_buffer_pages);
31470+#endif
31471+ debugfs_remove(bend->dbfs_dir);
31472+ }
31473+
31474+ if (bend->dbfs_dir_name)
31475+ kfree(bend->dbfs_dir_name);
31476+#endif
31477+ return 0;
31478+}
31479+
31480+
31481Index: head-2008-11-25/drivers/xen/sfc_netback/accel_fwd.c
31482===================================================================
31483--- /dev/null 1970-01-01 00:00:00.000000000 +0000
31484+++ head-2008-11-25/drivers/xen/sfc_netback/accel_fwd.c 2008-04-02 12:34:02.000000000 +0200
31485@@ -0,0 +1,420 @@
31486+/****************************************************************************
31487+ * Solarflare driver for Xen network acceleration
31488+ *
31489+ * Copyright 2006-2008: Solarflare Communications Inc,
31490+ * 9501 Jeronimo Road, Suite 250,
31491+ * Irvine, CA 92618, USA
31492+ *
31493+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
31494+ *
31495+ * This program is free software; you can redistribute it and/or modify it
31496+ * under the terms of the GNU General Public License version 2 as published
31497+ * by the Free Software Foundation, incorporated herein by reference.
31498+ *
31499+ * This program is distributed in the hope that it will be useful,
31500+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31501+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31502+ * GNU General Public License for more details.
31503+ *
31504+ * You should have received a copy of the GNU General Public License
31505+ * along with this program; if not, write to the Free Software
31506+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31507+ ****************************************************************************
31508+ */
31509+
31510+#include "accel.h"
31511+#include "accel_cuckoo_hash.h"
31512+#include "accel_util.h"
31513+#include "accel_solarflare.h"
31514+
31515+#include "driverlink_api.h"
31516+
31517+#include <linux/if_arp.h>
31518+#include <linux/skbuff.h>
31519+#include <linux/list.h>
31520+
31521+/* State stored in the forward table */
31522+struct fwd_struct {
31523+ struct list_head link; /* Forms list */
31524+ void * context;
31525+ __u8 valid;
31526+ __u8 mac[ETH_ALEN];
31527+};
31528+
31529+/* Max value we support */
31530+#define NUM_FWDS_BITS 8
31531+#define NUM_FWDS (1 << NUM_FWDS_BITS)
31532+#define FWD_MASK (NUM_FWDS - 1)
31533+
31534+struct port_fwd {
31535+ /* Make a list */
31536+ struct list_head link;
31537+ /* Hash table to store the fwd_structs */
31538+ cuckoo_hash_table fwd_hash_table;
31539+ /* The array of fwd_structs */
31540+ struct fwd_struct *fwd_array;
31541+ /* Linked list of entries in use. */
31542+ struct list_head fwd_list;
31543+ /* Could do something clever with a reader/writer lock. */
31544+ spinlock_t fwd_lock;
31545+ /* Make find_free_entry() a bit faster by caching this */
31546+ int last_free_index;
31547+};
31548+
31549+/*
31550+ * This is unlocked as it's only called from dl probe and remove,
31551+ * which are themselves synchronised. Could get rid of it entirely as
31552+ * it's never iterated, but useful for debug
31553+ */
31554+static struct list_head port_fwds;
31555+
31556+
31557+/* Search the fwd_array for an unused entry */
31558+static int fwd_find_free_entry(struct port_fwd *fwd_set)
31559+{
31560+ int index = fwd_set->last_free_index;
31561+
31562+ do {
31563+ if (!fwd_set->fwd_array[index].valid) {
31564+ fwd_set->last_free_index = index;
31565+ return index;
31566+ }
31567+ index++;
31568+ if (index >= NUM_FWDS)
31569+ index = 0;
31570+ } while (index != fwd_set->last_free_index);
31571+
31572+ return -ENOMEM;
31573+}
31574+
31575+
31576+/* Look up a MAC in the hash table. Caller should hold table lock. */
31577+static inline struct fwd_struct *fwd_find_entry(const __u8 *mac,
31578+ struct port_fwd *fwd_set)
31579+{
31580+ cuckoo_hash_value value;
31581+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
31582+
31583+ if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
31584+ (cuckoo_hash_key *)(&key),
31585+ &value)) {
31586+ struct fwd_struct *fwd = &fwd_set->fwd_array[value];
31587+ DPRINTK_ON(memcmp(fwd->mac, mac, ETH_ALEN) != 0);
31588+ return fwd;
31589+ }
31590+
31591+ return NULL;
31592+}
31593+
31594+
31595+/* Initialise each nic port's fowarding table */
31596+void *netback_accel_init_fwd_port(void)
31597+{
31598+ struct port_fwd *fwd_set;
31599+
31600+ fwd_set = kzalloc(sizeof(struct port_fwd), GFP_KERNEL);
31601+ if (fwd_set == NULL) {
31602+ return NULL;
31603+ }
31604+
31605+ spin_lock_init(&fwd_set->fwd_lock);
31606+
31607+ fwd_set->fwd_array = kzalloc(sizeof (struct fwd_struct) * NUM_FWDS,
31608+ GFP_KERNEL);
31609+ if (fwd_set->fwd_array == NULL) {
31610+ kfree(fwd_set);
31611+ return NULL;
31612+ }
31613+
31614+ if (cuckoo_hash_init(&fwd_set->fwd_hash_table, NUM_FWDS_BITS, 8) != 0) {
31615+ kfree(fwd_set->fwd_array);
31616+ kfree(fwd_set);
31617+ return NULL;
31618+ }
31619+
31620+ INIT_LIST_HEAD(&fwd_set->fwd_list);
31621+
31622+ list_add(&fwd_set->link, &port_fwds);
31623+
31624+ return fwd_set;
31625+}
31626+
31627+
31628+void netback_accel_shutdown_fwd_port(void *fwd_priv)
31629+{
31630+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31631+
31632+ BUG_ON(fwd_priv == NULL);
31633+
31634+ BUG_ON(list_empty(&port_fwds));
31635+ list_del(&fwd_set->link);
31636+
31637+ BUG_ON(!list_empty(&fwd_set->fwd_list));
31638+
31639+ cuckoo_hash_destroy(&fwd_set->fwd_hash_table);
31640+ kfree(fwd_set->fwd_array);
31641+ kfree(fwd_set);
31642+}
31643+
31644+
31645+int netback_accel_init_fwd()
31646+{
31647+ INIT_LIST_HEAD(&port_fwds);
31648+ return 0;
31649+}
31650+
31651+
31652+void netback_accel_shutdown_fwd()
31653+{
31654+ BUG_ON(!list_empty(&port_fwds));
31655+}
31656+
31657+
31658+/*
31659+ * Add an entry to the forwarding table. Returns -ENOMEM if no
31660+ * space.
31661+ */
31662+int netback_accel_fwd_add(const __u8 *mac, void *context, void *fwd_priv)
31663+{
31664+ struct fwd_struct *fwd;
31665+ int rc = 0, index;
31666+ unsigned long flags;
31667+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
31668+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31669+
31670+ BUG_ON(fwd_priv == NULL);
31671+
31672+ DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac));
31673+
31674+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31675+
31676+ if ((rc = fwd_find_free_entry(fwd_set)) < 0 ) {
31677+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31678+ return rc;
31679+ }
31680+
31681+ index = rc;
31682+
31683+ /* Shouldn't already be in the table */
31684+ if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
31685+ (cuckoo_hash_key *)(&key), &rc) != 0) {
31686+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31687+ EPRINTK("MAC address " MAC_FMT " already accelerated.\n",
31688+ MAC_ARG(mac));
31689+ return -EEXIST;
31690+ }
31691+
31692+ if ((rc = cuckoo_hash_add(&fwd_set->fwd_hash_table,
31693+ (cuckoo_hash_key *)(&key), index, 1)) == 0) {
31694+ fwd = &fwd_set->fwd_array[index];
31695+ fwd->valid = 1;
31696+ fwd->context = context;
31697+ memcpy(fwd->mac, mac, ETH_ALEN);
31698+ list_add(&fwd->link, &fwd_set->fwd_list);
31699+ NETBACK_ACCEL_STATS_OP(global_stats.num_fwds++);
31700+ }
31701+
31702+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31703+
31704+ /*
31705+ * No need to tell frontend that this mac address is local -
31706+ * it should auto-discover through packets on fastpath what is
31707+ * local and what is not, and just being on same server
31708+ * doesn't make it local (it could be on a different
31709+ * bridge)
31710+ */
31711+
31712+ return rc;
31713+}
31714+
31715+
31716+/* remove an entry from the forwarding tables. */
31717+void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv)
31718+{
31719+ struct fwd_struct *fwd;
31720+ unsigned long flags;
31721+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
31722+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31723+
31724+ DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac));
31725+
31726+ BUG_ON(fwd_priv == NULL);
31727+
31728+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31729+
31730+ fwd = fwd_find_entry(mac, fwd_set);
31731+ if (fwd != NULL) {
31732+ BUG_ON(list_empty(&fwd_set->fwd_list));
31733+ list_del(&fwd->link);
31734+
31735+ fwd->valid = 0;
31736+ cuckoo_hash_remove(&fwd_set->fwd_hash_table,
31737+ (cuckoo_hash_key *)(&key));
31738+ NETBACK_ACCEL_STATS_OP(global_stats.num_fwds--);
31739+ }
31740+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31741+
31742+ /*
31743+ * No need to tell frontend that this is no longer present -
31744+ * the frontend is currently only interested in remote
31745+ * addresses and it works these out (mostly) by itself
31746+ */
31747+}
31748+
31749+
31750+/* Set the context pointer for a hash table entry. */
31751+int netback_accel_fwd_set_context(const __u8 *mac, void *context,
31752+ void *fwd_priv)
31753+{
31754+ struct fwd_struct *fwd;
31755+ unsigned long flags;
31756+ int rc = -ENOENT;
31757+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31758+
31759+ BUG_ON(fwd_priv == NULL);
31760+
31761+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31762+ fwd = fwd_find_entry(mac, fwd_set);
31763+ if (fwd != NULL) {
31764+ fwd->context = context;
31765+ rc = 0;
31766+ }
31767+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31768+ return rc;
31769+}
31770+
31771+
31772+/**************************************************************************
31773+ * Process a received packet
31774+ **************************************************************************/
31775+
31776+/*
31777+ * Returns whether or not we have a match in our forward table for the
31778+ * this skb. Must be called with appropriate fwd_lock already held
31779+ */
31780+static struct netback_accel *for_a_vnic(struct netback_pkt_buf *skb,
31781+ struct port_fwd *fwd_set)
31782+{
31783+ struct fwd_struct *fwd;
31784+ struct netback_accel *retval = NULL;
31785+
31786+ fwd = fwd_find_entry(skb->mac.raw, fwd_set);
31787+ if (fwd != NULL)
31788+ retval = fwd->context;
31789+ return retval;
31790+}
31791+
31792+
31793+static inline int packet_is_arp_reply(struct sk_buff *skb)
31794+{
31795+ return skb->protocol == ntohs(ETH_P_ARP)
31796+ && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY);
31797+}
31798+
31799+
31800+static inline void hdr_to_filt(struct ethhdr *ethhdr, struct iphdr *ip,
31801+ struct netback_accel_filter_spec *spec)
31802+{
31803+ spec->proto = ip->protocol;
31804+ spec->destip_be = ip->daddr;
31805+ memcpy(spec->mac, ethhdr->h_source, ETH_ALEN);
31806+
31807+ if (ip->protocol == IPPROTO_TCP) {
31808+ struct tcphdr *tcp = (struct tcphdr *)((char *)ip + 4 * ip->ihl);
31809+ spec->destport_be = tcp->dest;
31810+ } else {
31811+ struct udphdr *udp = (struct udphdr *)((char *)ip + 4 * ip->ihl);
31812+ EPRINTK_ON(ip->protocol != IPPROTO_UDP);
31813+ spec->destport_be = udp->dest;
31814+ }
31815+}
31816+
31817+
31818+static inline int netback_accel_can_filter(struct netback_pkt_buf *skb)
31819+{
31820+ return (skb->protocol == htons(ETH_P_IP) &&
31821+ ((skb->nh.iph->protocol == IPPROTO_TCP) ||
31822+ (skb->nh.iph->protocol == IPPROTO_UDP)));
31823+}
31824+
31825+
31826+static inline void netback_accel_filter_packet(struct netback_accel *bend,
31827+ struct netback_pkt_buf *skb)
31828+{
31829+ struct netback_accel_filter_spec fs;
31830+ struct ethhdr *eh = (struct ethhdr *)(skb->mac.raw);
31831+
31832+ hdr_to_filt(eh, skb->nh.iph, &fs);
31833+
31834+ netback_accel_filter_check_add(bend, &fs);
31835+}
31836+
31837+
31838+/*
31839+ * Receive a packet and do something appropriate with it. Return true
31840+ * to take exclusive ownership of the packet. This is verging on
31841+ * solarflare specific
31842+ */
31843+void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv)
31844+{
31845+ struct netback_accel *bend;
31846+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31847+ unsigned long flags;
31848+
31849+ BUG_ON(fwd_priv == NULL);
31850+
31851+ /* Checking for bcast is cheaper so do that first */
31852+ if (is_broadcast_ether_addr(skb->mac.raw)) {
31853+ /* pass through the slow path by not claiming ownership */
31854+ return;
31855+ } else if (is_multicast_ether_addr(skb->mac.raw)) {
31856+ /* pass through the slow path by not claiming ownership */
31857+ return;
31858+ } else {
31859+ /* It is unicast */
31860+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31861+ /* We insert filter to pass it off to a VNIC */
31862+ if ((bend = for_a_vnic(skb, fwd_set)) != NULL)
31863+ if (netback_accel_can_filter(skb))
31864+ netback_accel_filter_packet(bend, skb);
31865+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31866+ }
31867+ return;
31868+}
31869+
31870+
31871+void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv)
31872+{
31873+ __u8 *mac;
31874+ unsigned long flags;
31875+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31876+ struct fwd_struct *fwd;
31877+
31878+ BUG_ON(fwd_priv == NULL);
31879+
31880+ if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) {
31881+ /*
31882+ * update our fast path forwarding to reflect this
31883+ * gratuitous ARP
31884+ */
31885+ mac = skb->mac.raw+ETH_ALEN;
31886+
31887+ DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n",
31888+ __FUNCTION__, MAC_ARG(mac));
31889+
31890+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31891+ /*
31892+ * Might not be local, but let's tell them all it is,
31893+ * and they can restore the fastpath if they continue
31894+ * to get packets that way
31895+ */
31896+ list_for_each_entry(fwd, &fwd_set->fwd_list, link) {
31897+ struct netback_accel *bend = fwd->context;
31898+ if (bend != NULL)
31899+ netback_accel_msg_tx_new_localmac(bend, mac);
31900+ }
31901+
31902+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31903+ }
31904+ return;
31905+}
31906Index: head-2008-11-25/drivers/xen/sfc_netback/accel_msg.c
31907===================================================================
31908--- /dev/null 1970-01-01 00:00:00.000000000 +0000
31909+++ head-2008-11-25/drivers/xen/sfc_netback/accel_msg.c 2008-02-20 09:32:49.000000000 +0100
31910@@ -0,0 +1,392 @@
31911+/****************************************************************************
31912+ * Solarflare driver for Xen network acceleration
31913+ *
31914+ * Copyright 2006-2008: Solarflare Communications Inc,
31915+ * 9501 Jeronimo Road, Suite 250,
31916+ * Irvine, CA 92618, USA
31917+ *
31918+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
31919+ *
31920+ * This program is free software; you can redistribute it and/or modify it
31921+ * under the terms of the GNU General Public License version 2 as published
31922+ * by the Free Software Foundation, incorporated herein by reference.
31923+ *
31924+ * This program is distributed in the hope that it will be useful,
31925+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31926+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31927+ * GNU General Public License for more details.
31928+ *
31929+ * You should have received a copy of the GNU General Public License
31930+ * along with this program; if not, write to the Free Software
31931+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31932+ ****************************************************************************
31933+ */
31934+
31935+#include <xen/evtchn.h>
31936+
31937+#include "accel.h"
31938+#include "accel_msg_iface.h"
31939+#include "accel_util.h"
31940+#include "accel_solarflare.h"
31941+
31942+/* Send a HELLO to front end to start things off */
31943+void netback_accel_msg_tx_hello(struct netback_accel *bend, unsigned version)
31944+{
31945+ unsigned long lock_state;
31946+ struct net_accel_msg *msg =
31947+ net_accel_msg_start_send(bend->shared_page,
31948+ &bend->to_domU, &lock_state);
31949+ /* The queue _cannot_ be full, we're the first users. */
31950+ EPRINTK_ON(msg == NULL);
31951+
31952+ if (msg != NULL) {
31953+ net_accel_msg_init(msg, NET_ACCEL_MSG_HELLO);
31954+ msg->u.hello.version = version;
31955+ msg->u.hello.max_pages = bend->quotas.max_buf_pages;
31956+ VPRINTK("Sending hello to channel %d\n", bend->msg_channel);
31957+ net_accel_msg_complete_send_notify(bend->shared_page,
31958+ &bend->to_domU,
31959+ &lock_state,
31960+ bend->msg_channel_irq);
31961+ }
31962+}
31963+
31964+/* Send a local mac message to vnic */
31965+static void netback_accel_msg_tx_localmac(struct netback_accel *bend,
31966+ int type, const void *mac)
31967+{
31968+ unsigned long lock_state;
31969+ struct net_accel_msg *msg;
31970+
31971+ BUG_ON(bend == NULL || mac == NULL);
31972+
31973+ VPRINTK("Sending local mac message: " MAC_FMT "\n",
31974+ MAC_ARG((const char *)mac));
31975+
31976+ msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU,
31977+ &lock_state);
31978+
31979+ if (msg != NULL) {
31980+ net_accel_msg_init(msg, NET_ACCEL_MSG_LOCALMAC);
31981+ msg->u.localmac.flags = type;
31982+ memcpy(msg->u.localmac.mac, mac, ETH_ALEN);
31983+ net_accel_msg_complete_send_notify(bend->shared_page,
31984+ &bend->to_domU,
31985+ &lock_state,
31986+ bend->msg_channel_irq);
31987+ } else {
31988+ /*
31989+ * TODO if this happens we may leave a domU
31990+ * fastpathing packets when they should be delivered
31991+ * locally. Solution is get domU to timeout entries
31992+ * in its fastpath lookup table when it receives no RX
31993+ * traffic
31994+ */
31995+ EPRINTK("%s: saw full queue, may need ARP timer to recover\n",
31996+ __FUNCTION__);
31997+ }
31998+}
31999+
32000+/* Send an add local mac message to vnic */
32001+void netback_accel_msg_tx_new_localmac(struct netback_accel *bend,
32002+ const void *mac)
32003+{
32004+ netback_accel_msg_tx_localmac(bend, NET_ACCEL_MSG_ADD, mac);
32005+}
32006+
32007+
32008+static int netback_accel_msg_rx_buffer_map(struct netback_accel *bend,
32009+ struct net_accel_msg *msg)
32010+{
32011+ int log2_pages, rc;
32012+
32013+ /* Can only allocate in power of two */
32014+ log2_pages = log2_ge(msg->u.mapbufs.pages, 0);
32015+ if (msg->u.mapbufs.pages != pow2(log2_pages)) {
32016+ EPRINTK("%s: Can only alloc bufs in power of 2 sizes (%d)\n",
32017+ __FUNCTION__, msg->u.mapbufs.pages);
32018+ rc = -EINVAL;
32019+ goto err_out;
32020+ }
32021+
32022+ /*
32023+ * Sanity. Assumes NET_ACCEL_MSG_MAX_PAGE_REQ is same for
32024+ * both directions/domains
32025+ */
32026+ if (msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ) {
32027+ EPRINTK("%s: too many pages in a single message: %d %d\n",
32028+ __FUNCTION__, msg->u.mapbufs.pages,
32029+ NET_ACCEL_MSG_MAX_PAGE_REQ);
32030+ rc = -EINVAL;
32031+ goto err_out;
32032+ }
32033+
32034+ if ((rc = netback_accel_add_buffers(bend, msg->u.mapbufs.pages,
32035+ log2_pages, msg->u.mapbufs.grants,
32036+ &msg->u.mapbufs.buf)) < 0) {
32037+ goto err_out;
32038+ }
32039+
32040+ msg->id |= NET_ACCEL_MSG_REPLY;
32041+
32042+ return 0;
32043+
32044+ err_out:
32045+ EPRINTK("%s: err_out\n", __FUNCTION__);
32046+ msg->id |= NET_ACCEL_MSG_ERROR | NET_ACCEL_MSG_REPLY;
32047+ return rc;
32048+}
32049+
32050+
32051+/* Hint from frontend that one of our filters is out of date */
32052+static int netback_accel_process_fastpath(struct netback_accel *bend,
32053+ struct net_accel_msg *msg)
32054+{
32055+ struct netback_accel_filter_spec spec;
32056+
32057+ if (msg->u.fastpath.flags & NET_ACCEL_MSG_REMOVE) {
32058+ /*
32059+ * Would be nice to BUG() this but would leave us
32060+ * vulnerable to naughty frontend
32061+ */
32062+ EPRINTK_ON(msg->u.fastpath.flags & NET_ACCEL_MSG_ADD);
32063+
32064+ memcpy(spec.mac, msg->u.fastpath.mac, ETH_ALEN);
32065+ spec.destport_be = msg->u.fastpath.port;
32066+ spec.destip_be = msg->u.fastpath.ip;
32067+ spec.proto = msg->u.fastpath.proto;
32068+
32069+ netback_accel_filter_remove_spec(bend, &spec);
32070+ }
32071+
32072+ return 0;
32073+}
32074+
32075+
32076+/* Flow control for message queues */
32077+inline void set_queue_not_full(struct netback_accel *bend)
32078+{
32079+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
32080+ (unsigned long *)&bend->shared_page->aflags))
32081+ notify_remote_via_irq(bend->msg_channel_irq);
32082+ else
32083+ VPRINTK("queue not full bit already set, not signalling\n");
32084+}
32085+
32086+
32087+/* Flow control for message queues */
32088+inline void set_queue_full(struct netback_accel *bend)
32089+{
32090+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
32091+ (unsigned long *)&bend->shared_page->aflags))
32092+ notify_remote_via_irq(bend->msg_channel_irq);
32093+ else
32094+ VPRINTK("queue full bit already set, not signalling\n");
32095+}
32096+
32097+
32098+void netback_accel_set_interface_state(struct netback_accel *bend, int up)
32099+{
32100+ bend->shared_page->net_dev_up = up;
32101+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
32102+ (unsigned long *)&bend->shared_page->aflags))
32103+ notify_remote_via_irq(bend->msg_channel_irq);
32104+ else
32105+ VPRINTK("interface up/down bit already set, not signalling\n");
32106+}
32107+
32108+
32109+static int check_rx_hello_version(unsigned version)
32110+{
32111+ /* Should only happen if there's been a version mismatch */
32112+ BUG_ON(version == NET_ACCEL_MSG_VERSION);
32113+
32114+ if (version > NET_ACCEL_MSG_VERSION) {
32115+ /* Newer protocol, we must refuse */
32116+ return -EPROTO;
32117+ }
32118+
32119+ if (version < NET_ACCEL_MSG_VERSION) {
32120+ /*
32121+ * We are newer, so have discretion to accept if we
32122+ * wish. For now however, just reject
32123+ */
32124+ return -EPROTO;
32125+ }
32126+
32127+ return -EINVAL;
32128+}
32129+
32130+
32131+static int process_rx_msg(struct netback_accel *bend,
32132+ struct net_accel_msg *msg)
32133+{
32134+ int err = 0;
32135+
32136+ switch (msg->id) {
32137+ case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO:
32138+ /* Reply to a HELLO; mark ourselves as connected */
32139+ DPRINTK("got Hello reply, version %.8x\n",
32140+ msg->u.hello.version);
32141+
32142+ /*
32143+ * Check that we've not successfully done this
32144+ * already. NB no check at the moment that this reply
32145+ * comes after we've actually sent a HELLO as that's
32146+ * not possible with the current code structure
32147+ */
32148+ if (bend->hw_state != NETBACK_ACCEL_RES_NONE)
32149+ return -EPROTO;
32150+
32151+ /* Store max_pages for accel_setup */
32152+ if (msg->u.hello.max_pages > bend->quotas.max_buf_pages) {
32153+ EPRINTK("More pages than quota allows (%d > %d)\n",
32154+ msg->u.hello.max_pages,
32155+ bend->quotas.max_buf_pages);
32156+ /* Force it down to the quota */
32157+ msg->u.hello.max_pages = bend->quotas.max_buf_pages;
32158+ }
32159+ bend->max_pages = msg->u.hello.max_pages;
32160+
32161+ /* Set up the hardware visible to the other end */
32162+ err = bend->accel_setup(bend);
32163+ if (err) {
32164+ /* This is fatal */
32165+ DPRINTK("Hello gave accel_setup error %d\n", err);
32166+ netback_accel_set_closing(bend);
32167+ } else {
32168+ /*
32169+ * Now add the context so that packet
32170+ * forwarding will commence
32171+ */
32172+ netback_accel_fwd_set_context(bend->mac, bend,
32173+ bend->fwd_priv);
32174+ }
32175+ break;
32176+ case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_ERROR:
32177+ EPRINTK("got Hello error, versions us:%.8x them:%.8x\n",
32178+ NET_ACCEL_MSG_VERSION, msg->u.hello.version);
32179+
32180+ if (bend->hw_state != NETBACK_ACCEL_RES_NONE)
32181+ return -EPROTO;
32182+
32183+ if (msg->u.hello.version != NET_ACCEL_MSG_VERSION) {
32184+ /* Error is due to version mismatch */
32185+ err = check_rx_hello_version(msg->u.hello.version);
32186+ if (err == 0) {
32187+ /*
32188+ * It's OK to be compatible, send
32189+ * another hello with compatible version
32190+ */
32191+ netback_accel_msg_tx_hello
32192+ (bend, msg->u.hello.version);
32193+ } else {
32194+ /*
32195+ * Tell frontend that we're not going to
32196+ * send another HELLO by going to Closing.
32197+ */
32198+ netback_accel_set_closing(bend);
32199+ }
32200+ }
32201+ break;
32202+ case NET_ACCEL_MSG_MAPBUF:
32203+ VPRINTK("Got mapped buffers request %d\n",
32204+ msg->u.mapbufs.reqid);
32205+
32206+ if (bend->hw_state == NETBACK_ACCEL_RES_NONE)
32207+ return -EPROTO;
32208+
32209+ /*
32210+ * Frontend wants a buffer table entry for the
32211+ * supplied pages
32212+ */
32213+ err = netback_accel_msg_rx_buffer_map(bend, msg);
32214+ if (net_accel_msg_reply_notify(bend->shared_page,
32215+ bend->msg_channel_irq,
32216+ &bend->to_domU, msg)) {
32217+ /*
32218+ * This is fatal as we can't tell the frontend
32219+ * about the problem through the message
32220+ * queue, and so would otherwise stalemate
32221+ */
32222+ netback_accel_set_closing(bend);
32223+ }
32224+ break;
32225+ case NET_ACCEL_MSG_FASTPATH:
32226+ DPRINTK("Got fastpath request\n");
32227+
32228+ if (bend->hw_state == NETBACK_ACCEL_RES_NONE)
32229+ return -EPROTO;
32230+
32231+ err = netback_accel_process_fastpath(bend, msg);
32232+ break;
32233+ default:
32234+ EPRINTK("Huh? Message code is %x\n", msg->id);
32235+ err = -EPROTO;
32236+ break;
32237+ }
32238+ return err;
32239+}
32240+
32241+
32242+/* Demultiplex an IRQ from the frontend driver. */
32243+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
32244+void netback_accel_msg_rx_handler(struct work_struct *arg)
32245+#else
32246+void netback_accel_msg_rx_handler(void *bend_void)
32247+#endif
32248+{
32249+ struct net_accel_msg msg;
32250+ int err, queue_was_full = 0;
32251+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
32252+ struct netback_accel *bend =
32253+ container_of(arg, struct netback_accel, handle_msg);
32254+#else
32255+ struct netback_accel *bend = (struct netback_accel *)bend_void;
32256+#endif
32257+
32258+ mutex_lock(&bend->bend_mutex);
32259+
32260+ /*
32261+ * This happens when the shared pages have been unmapped, but
32262+ * the workqueue not flushed yet
32263+ */
32264+ if (bend->shared_page == NULL)
32265+ goto done;
32266+
32267+ if ((bend->shared_page->aflags &
32268+ NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK) != 0) {
32269+ if (bend->shared_page->aflags &
32270+ NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL) {
32271+ /* We've been told there may now be space. */
32272+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
32273+ (unsigned long *)&bend->shared_page->aflags);
32274+ }
32275+
32276+ if (bend->shared_page->aflags &
32277+ NET_ACCEL_MSG_AFLAGS_QUEUEUFULL) {
32278+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
32279+ (unsigned long *)&bend->shared_page->aflags);
32280+ queue_was_full = 1;
32281+ }
32282+ }
32283+
32284+ while ((err = net_accel_msg_recv(bend->shared_page, &bend->from_domU,
32285+ &msg)) == 0) {
32286+ err = process_rx_msg(bend, &msg);
32287+
32288+ if (err != 0) {
32289+ EPRINTK("%s: Error %d\n", __FUNCTION__, err);
32290+ goto err;
32291+ }
32292+ }
32293+
32294+ err:
32295+ /* There will be space now if we can make any. */
32296+ if (queue_was_full)
32297+ set_queue_not_full(bend);
32298+ done:
32299+ mutex_unlock(&bend->bend_mutex);
32300+
32301+ return;
32302+}
32303Index: head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.c
32304===================================================================
32305--- /dev/null 1970-01-01 00:00:00.000000000 +0000
32306+++ head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.c 2008-02-20 09:32:49.000000000 +0100
32307@@ -0,0 +1,1253 @@
32308+/****************************************************************************
32309+ * Solarflare driver for Xen network acceleration
32310+ *
32311+ * Copyright 2006-2008: Solarflare Communications Inc,
32312+ * 9501 Jeronimo Road, Suite 250,
32313+ * Irvine, CA 92618, USA
32314+ *
32315+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
32316+ *
32317+ * This program is free software; you can redistribute it and/or modify it
32318+ * under the terms of the GNU General Public License version 2 as published
32319+ * by the Free Software Foundation, incorporated herein by reference.
32320+ *
32321+ * This program is distributed in the hope that it will be useful,
32322+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32323+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32324+ * GNU General Public License for more details.
32325+ *
32326+ * You should have received a copy of the GNU General Public License
32327+ * along with this program; if not, write to the Free Software
32328+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32329+ ****************************************************************************
32330+ */
32331+
32332+#include "common.h"
32333+
32334+#include "accel.h"
32335+#include "accel_solarflare.h"
32336+#include "accel_msg_iface.h"
32337+#include "accel_util.h"
32338+
32339+#include "accel_cuckoo_hash.h"
32340+
32341+#include "ci/driver/resource/efx_vi.h"
32342+
32343+#include "ci/efrm/nic_table.h"
32344+#include "ci/efhw/public.h"
32345+
32346+#include <xen/evtchn.h>
32347+#include <xen/driver_util.h>
32348+#include <linux/list.h>
32349+#include <linux/mutex.h>
32350+
32351+#include "driverlink_api.h"
32352+
32353+#define SF_XEN_RX_USR_BUF_SIZE 2048
32354+
32355+struct falcon_bend_accel_priv {
32356+ struct efx_vi_state *efx_vih;
32357+
32358+ /*! Array of pointers to dma_map state, used so VNIC can
32359+ * request their removal in a single message
32360+ */
32361+ struct efx_vi_dma_map_state **dma_maps;
32362+ /*! Index into dma_maps */
32363+ int dma_maps_index;
32364+
32365+ /*! Serialises access to filters */
32366+ spinlock_t filter_lock;
32367+ /*! Bitmap of which filters are free */
32368+ unsigned long free_filters;
32369+ /*! Used for index normalisation */
32370+ u32 filter_idx_mask;
32371+ struct netback_accel_filter_spec *fspecs;
32372+ cuckoo_hash_table filter_hash_table;
32373+
32374+ u32 txdmaq_gnt;
32375+ u32 rxdmaq_gnt;
32376+ u32 doorbell_gnt;
32377+ u32 evq_rptr_gnt;
32378+ u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES];
32379+ u32 evq_npages;
32380+};
32381+
32382+/* Forward declaration */
32383+static int netback_accel_filter_init(struct netback_accel *);
32384+static void netback_accel_filter_shutdown(struct netback_accel *);
32385+
32386+/**************************************************************************
32387+ *
32388+ * Driverlink stuff
32389+ *
32390+ **************************************************************************/
32391+
32392+struct driverlink_port {
32393+ struct list_head link;
32394+ enum net_accel_hw_type type;
32395+ struct net_device *net_dev;
32396+ struct efx_dl_device *efx_dl_dev;
32397+ int nic_index;
32398+ void *fwd_priv;
32399+};
32400+
32401+static struct list_head dl_ports;
32402+
32403+/* This mutex protects global state, such as the dl_ports list */
32404+DEFINE_MUTEX(accel_mutex);
32405+
32406+static int init_done = 0;
32407+
32408+/* The DL callbacks */
32409+
32410+
32411+#if defined(EFX_USE_FASTCALL)
32412+static enum efx_veto fastcall
32413+#else
32414+static enum efx_veto
32415+#endif
32416+bend_dl_tx_packet(struct efx_dl_device *efx_dl_dev,
32417+ struct sk_buff *skb)
32418+{
32419+ struct driverlink_port *port = efx_dl_dev->priv;
32420+
32421+ BUG_ON(port == NULL);
32422+
32423+ NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++);
32424+ if (skb->mac.raw != NULL)
32425+ netback_accel_tx_packet(skb, port->fwd_priv);
32426+ else {
32427+ DPRINTK("Ignoring packet with missing mac address\n");
32428+ NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_bad_packets++);
32429+ }
32430+ return EFX_ALLOW_PACKET;
32431+}
32432+
32433+/* EFX_USE_FASTCALL */
32434+#if defined(EFX_USE_FASTCALL)
32435+static enum efx_veto fastcall
32436+#else
32437+static enum efx_veto
32438+#endif
32439+bend_dl_rx_packet(struct efx_dl_device *efx_dl_dev,
32440+ const char *pkt_buf, int pkt_len)
32441+{
32442+ struct driverlink_port *port = efx_dl_dev->priv;
32443+ struct netback_pkt_buf pkt;
32444+ struct ethhdr *eh;
32445+
32446+ BUG_ON(port == NULL);
32447+
32448+ pkt.mac.raw = (char *)pkt_buf;
32449+ pkt.nh.raw = (char *)pkt_buf + ETH_HLEN;
32450+ eh = (struct ethhdr *)pkt_buf;
32451+ pkt.protocol = eh->h_proto;
32452+
32453+ NETBACK_ACCEL_STATS_OP(global_stats.dl_rx_packets++);
32454+ netback_accel_rx_packet(&pkt, port->fwd_priv);
32455+ return EFX_ALLOW_PACKET;
32456+}
32457+
32458+
32459+/* Callbacks we'd like to get from the netdriver through driverlink */
32460+struct efx_dl_callbacks bend_dl_callbacks =
32461+ {
32462+ .tx_packet = bend_dl_tx_packet,
32463+ .rx_packet = bend_dl_rx_packet,
32464+ };
32465+
32466+
32467+static struct netback_accel_hooks accel_hooks = {
32468+ THIS_MODULE,
32469+ &netback_accel_probe,
32470+ &netback_accel_remove
32471+};
32472+
32473+
32474+/*
32475+ * Handy helper which given an efx_dl_device works out which
32476+ * efab_nic_t index into efrm_nic_table.nics[] it corresponds to
32477+ */
32478+static int efx_device_to_efab_nic_index(struct efx_dl_device *efx_dl_dev)
32479+{
32480+ int i;
32481+
32482+ for (i = 0; i < EFHW_MAX_NR_DEVS; i++) {
32483+ struct efhw_nic *nic = efrm_nic_table.nic[i];
32484+
32485+ /*
32486+ * It's possible for the nic structure to have not
32487+ * been initialised if the resource driver failed its
32488+ * driverlink probe
32489+ */
32490+ if (nic == NULL || nic->net_driver_dev == NULL)
32491+ continue;
32492+
32493+ /* Work out if these are talking about the same NIC */
32494+ if (nic->net_driver_dev->pci_dev == efx_dl_dev->pci_dev)
32495+ return i;
32496+ }
32497+
32498+ return -1;
32499+}
32500+
32501+
32502+/* Driver link probe - register our callbacks */
32503+static int bend_dl_probe(struct efx_dl_device *efx_dl_dev,
32504+ const struct net_device *net_dev,
32505+ const struct efx_dl_device_info *dev_info,
32506+ const char* silicon_rev)
32507+{
32508+ int rc;
32509+ enum net_accel_hw_type type;
32510+ struct driverlink_port *port;
32511+
32512+ DPRINTK("%s: %s\n", __FUNCTION__, silicon_rev);
32513+
32514+ if (strcmp(silicon_rev, "falcon/a1") == 0)
32515+ type = NET_ACCEL_MSG_HWTYPE_FALCON_A;
32516+ else if (strcmp(silicon_rev, "falcon/b0") == 0)
32517+ type = NET_ACCEL_MSG_HWTYPE_FALCON_B;
32518+ else {
32519+ EPRINTK("%s: unsupported silicon %s\n", __FUNCTION__,
32520+ silicon_rev);
32521+ rc = -EINVAL;
32522+ goto fail1;
32523+ }
32524+
32525+ port = kmalloc(sizeof(struct driverlink_port), GFP_KERNEL);
32526+ if (port == NULL) {
32527+ EPRINTK("%s: no memory for dl probe\n", __FUNCTION__);
32528+ rc = -ENOMEM;
32529+ goto fail1;
32530+ }
32531+
32532+ port->efx_dl_dev = efx_dl_dev;
32533+ efx_dl_dev->priv = port;
32534+
32535+ port->nic_index = efx_device_to_efab_nic_index(efx_dl_dev);
32536+ if (port->nic_index < 0) {
32537+ /*
32538+ * This can happen in theory if the resource driver
32539+ * failed to initialise properly
32540+ */
32541+ EPRINTK("%s: nic structure not found\n", __FUNCTION__);
32542+ rc = -EINVAL;
32543+ goto fail2;
32544+ }
32545+
32546+ port->fwd_priv = netback_accel_init_fwd_port();
32547+ if (port->fwd_priv == NULL) {
32548+ EPRINTK("%s: failed to set up forwarding for port\n",
32549+ __FUNCTION__);
32550+ rc = -ENOMEM;
32551+ goto fail2;
32552+ }
32553+
32554+ rc = efx_dl_register_callbacks(efx_dl_dev, &bend_dl_callbacks);
32555+ if (rc != 0) {
32556+ EPRINTK("%s: register_callbacks failed\n", __FUNCTION__);
32557+ goto fail3;
32558+ }
32559+
32560+ port->type = type;
32561+ port->net_dev = (struct net_device *)net_dev;
32562+
32563+ mutex_lock(&accel_mutex);
32564+ list_add(&port->link, &dl_ports);
32565+ mutex_unlock(&accel_mutex);
32566+
32567+ rc = netback_connect_accelerator(NETBACK_ACCEL_VERSION, 0,
32568+ port->net_dev->name, &accel_hooks);
32569+
32570+ if (rc < 0) {
32571+ EPRINTK("Xen netback accelerator version mismatch\n");
32572+ goto fail4;
32573+ } else if (rc > 0) {
32574+ /*
32575+ * In future may want to add backwards compatibility
32576+ * and accept certain subsets of previous versions
32577+ */
32578+ EPRINTK("Xen netback accelerator version mismatch\n");
32579+ goto fail4;
32580+ }
32581+
32582+ return 0;
32583+
32584+ fail4:
32585+ mutex_lock(&accel_mutex);
32586+ list_del(&port->link);
32587+ mutex_unlock(&accel_mutex);
32588+
32589+ efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks);
32590+ fail3:
32591+ netback_accel_shutdown_fwd_port(port->fwd_priv);
32592+ fail2:
32593+ efx_dl_dev->priv = NULL;
32594+ kfree(port);
32595+ fail1:
32596+ return rc;
32597+}
32598+
32599+
32600+static void bend_dl_remove(struct efx_dl_device *efx_dl_dev)
32601+{
32602+ struct driverlink_port *port;
32603+
32604+ DPRINTK("Unregistering driverlink callbacks.\n");
32605+
32606+ mutex_lock(&accel_mutex);
32607+
32608+ port = (struct driverlink_port *)efx_dl_dev->priv;
32609+
32610+ BUG_ON(list_empty(&dl_ports));
32611+ BUG_ON(port == NULL);
32612+ BUG_ON(port->efx_dl_dev != efx_dl_dev);
32613+
32614+ netback_disconnect_accelerator(0, port->net_dev->name);
32615+
32616+ list_del(&port->link);
32617+
32618+ mutex_unlock(&accel_mutex);
32619+
32620+ efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks);
32621+ netback_accel_shutdown_fwd_port(port->fwd_priv);
32622+
32623+ efx_dl_dev->priv = NULL;
32624+ kfree(port);
32625+
32626+ return;
32627+}
32628+
32629+
32630+static struct efx_dl_driver bend_dl_driver =
32631+ {
32632+ .name = "SFC Xen backend",
32633+ .probe = bend_dl_probe,
32634+ .remove = bend_dl_remove,
32635+ };
32636+
32637+
32638+int netback_accel_sf_init(void)
32639+{
32640+ int rc, nic_i;
32641+ struct efhw_nic *nic;
32642+
32643+ INIT_LIST_HEAD(&dl_ports);
32644+
32645+ rc = efx_dl_register_driver(&bend_dl_driver);
32646+ /* If we couldn't find the NET driver, give up */
32647+ if (rc == -ENOENT)
32648+ return rc;
32649+
32650+ if (rc == 0) {
32651+ EFRM_FOR_EACH_NIC(nic_i, nic)
32652+ falcon_nic_set_rx_usr_buf_size(nic,
32653+ SF_XEN_RX_USR_BUF_SIZE);
32654+ }
32655+
32656+ init_done = (rc == 0);
32657+ return rc;
32658+}
32659+
32660+
32661+void netback_accel_sf_shutdown(void)
32662+{
32663+ if (!init_done)
32664+ return;
32665+ DPRINTK("Unregistering driverlink driver\n");
32666+
32667+ /*
32668+ * This will trigger removal callbacks for all the devices, which
32669+ * will unregister their callbacks, disconnect from netfront, etc.
32670+ */
32671+ efx_dl_unregister_driver(&bend_dl_driver);
32672+}
32673+
32674+
32675+int netback_accel_sf_hwtype(struct netback_accel *bend)
32676+{
32677+ struct driverlink_port *port;
32678+
32679+ mutex_lock(&accel_mutex);
32680+
32681+ list_for_each_entry(port, &dl_ports, link) {
32682+ if (strcmp(bend->nicname, port->net_dev->name) == 0) {
32683+ bend->hw_type = port->type;
32684+ bend->accel_setup = netback_accel_setup_vnic_hw;
32685+ bend->accel_shutdown = netback_accel_shutdown_vnic_hw;
32686+ bend->fwd_priv = port->fwd_priv;
32687+ /* This is just needed to pass to efx_vi_alloc */
32688+ bend->nic_index = port->nic_index;
32689+ bend->net_dev = port->net_dev;
32690+ mutex_unlock(&accel_mutex);
32691+ return 0;
32692+ }
32693+ }
32694+
32695+ mutex_unlock(&accel_mutex);
32696+
32697+ EPRINTK("Failed to identify backend device '%s' with a NIC\n",
32698+ bend->nicname);
32699+
32700+ return -ENOENT;
32701+}
32702+
32703+
32704+/****************************************************************************
32705+ * Resource management code
32706+ ***************************************************************************/
32707+
32708+static int alloc_page_state(struct netback_accel *bend, int max_pages)
32709+{
32710+ struct falcon_bend_accel_priv *accel_hw_priv;
32711+
32712+ if (max_pages < 0 || max_pages > bend->quotas.max_buf_pages) {
32713+ EPRINTK("%s: invalid max_pages: %d\n", __FUNCTION__, max_pages);
32714+ return -EINVAL;
32715+ }
32716+
32717+ accel_hw_priv = kzalloc(sizeof(struct falcon_bend_accel_priv),
32718+ GFP_KERNEL);
32719+ if (accel_hw_priv == NULL) {
32720+ EPRINTK("%s: no memory for accel_hw_priv\n", __FUNCTION__);
32721+ return -ENOMEM;
32722+ }
32723+
32724+ accel_hw_priv->dma_maps = kzalloc
32725+ (sizeof(struct efx_vi_dma_map_state **) *
32726+ (max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ), GFP_KERNEL);
32727+ if (accel_hw_priv->dma_maps == NULL) {
32728+ EPRINTK("%s: no memory for dma_maps\n", __FUNCTION__);
32729+ kfree(accel_hw_priv);
32730+ return -ENOMEM;
32731+ }
32732+
32733+ bend->buffer_maps = kzalloc(sizeof(struct vm_struct *) * max_pages,
32734+ GFP_KERNEL);
32735+ if (bend->buffer_maps == NULL) {
32736+ EPRINTK("%s: no memory for buffer_maps\n", __FUNCTION__);
32737+ kfree(accel_hw_priv->dma_maps);
32738+ kfree(accel_hw_priv);
32739+ return -ENOMEM;
32740+ }
32741+
32742+ bend->buffer_addrs = kzalloc(sizeof(u64) * max_pages, GFP_KERNEL);
32743+ if (bend->buffer_addrs == NULL) {
32744+ kfree(bend->buffer_maps);
32745+ kfree(accel_hw_priv->dma_maps);
32746+ kfree(accel_hw_priv);
32747+ return -ENOMEM;
32748+ }
32749+
32750+ bend->accel_hw_priv = accel_hw_priv;
32751+
32752+ return 0;
32753+}
32754+
32755+
32756+static int free_page_state(struct netback_accel *bend)
32757+{
32758+ struct falcon_bend_accel_priv *accel_hw_priv;
32759+
32760+ DPRINTK("%s: %p\n", __FUNCTION__, bend);
32761+
32762+ accel_hw_priv = bend->accel_hw_priv;
32763+
32764+ if (accel_hw_priv) {
32765+ kfree(accel_hw_priv->dma_maps);
32766+ kfree(bend->buffer_maps);
32767+ kfree(bend->buffer_addrs);
32768+ kfree(accel_hw_priv);
32769+ bend->accel_hw_priv = NULL;
32770+ bend->max_pages = 0;
32771+ }
32772+
32773+ return 0;
32774+}
32775+
32776+
32777+/* The timeout event callback for the event q */
32778+static void bend_evq_timeout(void *context, int is_timeout)
32779+{
32780+ struct netback_accel *bend = (struct netback_accel *)context;
32781+ if (is_timeout) {
32782+ /* Pass event to vnic front end driver */
32783+ VPRINTK("timeout event to %d\n", bend->net_channel);
32784+ NETBACK_ACCEL_STATS_OP(bend->stats.evq_timeouts++);
32785+ notify_remote_via_irq(bend->net_channel_irq);
32786+ } else {
32787+ /* It's a wakeup event, used by Falcon */
32788+ VPRINTK("wakeup to %d\n", bend->net_channel);
32789+ NETBACK_ACCEL_STATS_OP(bend->stats.evq_wakeups++);
32790+ notify_remote_via_irq(bend->net_channel_irq);
32791+ }
32792+}
32793+
32794+
32795+/*
32796+ * Create the eventq and associated gubbins for communication with the
32797+ * front end vnic driver
32798+ */
32799+static int ef_get_vnic(struct netback_accel *bend)
32800+{
32801+ struct falcon_bend_accel_priv *accel_hw_priv;
32802+ int rc = 0;
32803+
32804+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_NONE);
32805+
32806+ /* Allocate page related state and accel_hw_priv */
32807+ rc = alloc_page_state(bend, bend->max_pages);
32808+ if (rc != 0) {
32809+ EPRINTK("Failed to allocate page state: %d\n", rc);
32810+ return rc;
32811+ }
32812+
32813+ accel_hw_priv = bend->accel_hw_priv;
32814+
32815+ rc = efx_vi_alloc(&accel_hw_priv->efx_vih, bend->nic_index);
32816+ if (rc != 0) {
32817+ EPRINTK("%s: efx_vi_alloc failed %d\n", __FUNCTION__, rc);
32818+ free_page_state(bend);
32819+ return rc;
32820+ }
32821+
32822+ rc = efx_vi_eventq_register_callback(accel_hw_priv->efx_vih,
32823+ bend_evq_timeout,
32824+ bend);
32825+ if (rc != 0) {
32826+ EPRINTK("%s: register_callback failed %d\n", __FUNCTION__, rc);
32827+ efx_vi_free(accel_hw_priv->efx_vih);
32828+ free_page_state(bend);
32829+ return rc;
32830+ }
32831+
32832+ bend->hw_state = NETBACK_ACCEL_RES_ALLOC;
32833+
32834+ return 0;
32835+}
32836+
32837+
32838+static void ef_free_vnic(struct netback_accel *bend)
32839+{
32840+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32841+
32842+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC);
32843+
32844+ efx_vi_eventq_kill_callback(accel_hw_priv->efx_vih);
32845+
32846+ DPRINTK("Hardware is freeable. Will proceed.\n");
32847+
32848+ efx_vi_free(accel_hw_priv->efx_vih);
32849+ accel_hw_priv->efx_vih = NULL;
32850+
32851+ VPRINTK("Free page state...\n");
32852+ free_page_state(bend);
32853+
32854+ bend->hw_state = NETBACK_ACCEL_RES_NONE;
32855+}
32856+
32857+
32858+static inline void ungrant_or_crash(grant_ref_t gntref, int domain) {
32859+ if (net_accel_ungrant_page(gntref) == -EBUSY)
32860+ net_accel_shutdown_remote(domain);
32861+}
32862+
32863+
32864+static void netback_accel_release_hwinfo(struct netback_accel *bend)
32865+{
32866+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32867+ int i;
32868+
32869+ DPRINTK("Remove dma q grants %d %d\n", accel_hw_priv->txdmaq_gnt,
32870+ accel_hw_priv->rxdmaq_gnt);
32871+ ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end);
32872+ ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end);
32873+
32874+ DPRINTK("Remove doorbell grant %d\n", accel_hw_priv->doorbell_gnt);
32875+ ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end);
32876+
32877+ if (bend->hw_type == NET_ACCEL_MSG_HWTYPE_FALCON_A) {
32878+ DPRINTK("Remove rptr grant %d\n", accel_hw_priv->evq_rptr_gnt);
32879+ ungrant_or_crash(accel_hw_priv->evq_rptr_gnt, bend->far_end);
32880+ }
32881+
32882+ for (i = 0; i < accel_hw_priv->evq_npages; i++) {
32883+ DPRINTK("Remove evq grant %d\n", accel_hw_priv->evq_mem_gnts[i]);
32884+ ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], bend->far_end);
32885+ }
32886+
32887+ bend->hw_state = NETBACK_ACCEL_RES_FILTER;
32888+
32889+ return;
32890+}
32891+
32892+
32893+static int ef_bend_hwinfo_falcon_common(struct netback_accel *bend,
32894+ struct net_accel_hw_falcon_b *hwinfo)
32895+{
32896+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32897+ struct efx_vi_hw_resource_metadata res_mdata;
32898+ struct efx_vi_hw_resource res_array[EFX_VI_HW_RESOURCE_MAXSIZE];
32899+ int rc, len = EFX_VI_HW_RESOURCE_MAXSIZE, i, pfn = 0;
32900+ unsigned long txdmaq_pfn = 0, rxdmaq_pfn = 0;
32901+
32902+ rc = efx_vi_hw_resource_get_phys(accel_hw_priv->efx_vih, &res_mdata,
32903+ res_array, &len);
32904+ if (rc != 0) {
32905+ DPRINTK("%s: resource_get_phys returned %d\n",
32906+ __FUNCTION__, rc);
32907+ return rc;
32908+ }
32909+
32910+ if (res_mdata.version != 0)
32911+ return -EPROTO;
32912+
32913+ hwinfo->nic_arch = res_mdata.nic_arch;
32914+ hwinfo->nic_variant = res_mdata.nic_variant;
32915+ hwinfo->nic_revision = res_mdata.nic_revision;
32916+
32917+ hwinfo->evq_order = res_mdata.evq_order;
32918+ hwinfo->evq_offs = res_mdata.evq_offs;
32919+ hwinfo->evq_capacity = res_mdata.evq_capacity;
32920+ hwinfo->instance = res_mdata.instance;
32921+ hwinfo->rx_capacity = res_mdata.rx_capacity;
32922+ hwinfo->tx_capacity = res_mdata.tx_capacity;
32923+
32924+ VPRINTK("evq_order %d evq_offs %d evq_cap %d inst %d rx_cap %d tx_cap %d\n",
32925+ hwinfo->evq_order, hwinfo->evq_offs, hwinfo->evq_capacity,
32926+ hwinfo->instance, hwinfo->rx_capacity, hwinfo->tx_capacity);
32927+
32928+ for (i = 0; i < len; i++) {
32929+ struct efx_vi_hw_resource *res = &(res_array[i]);
32930+ switch (res->type) {
32931+ case EFX_VI_HW_RESOURCE_TXDMAQ:
32932+ txdmaq_pfn = page_to_pfn(virt_to_page(res->address));
32933+ break;
32934+ case EFX_VI_HW_RESOURCE_RXDMAQ:
32935+ rxdmaq_pfn = page_to_pfn(virt_to_page(res->address));
32936+ break;
32937+ case EFX_VI_HW_RESOURCE_EVQTIMER:
32938+ break;
32939+ case EFX_VI_HW_RESOURCE_EVQRPTR:
32940+ case EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET:
32941+ hwinfo->evq_rptr = res->address;
32942+ break;
32943+ case EFX_VI_HW_RESOURCE_EVQMEMKVA:
32944+ accel_hw_priv->evq_npages = 1 << res_mdata.evq_order;
32945+ pfn = page_to_pfn(virt_to_page(res->address));
32946+ break;
32947+ case EFX_VI_HW_RESOURCE_BELLPAGE:
32948+ hwinfo->doorbell_mfn = res->address;
32949+ break;
32950+ default:
32951+ EPRINTK("%s: Unknown hardware resource type %d\n",
32952+ __FUNCTION__, res->type);
32953+ break;
32954+ }
32955+ }
32956+
32957+ VPRINTK("Passing txdmaq page pfn %lx\n", txdmaq_pfn);
32958+ accel_hw_priv->txdmaq_gnt = hwinfo->txdmaq_gnt =
32959+ net_accel_grant_page(bend->hdev_data, pfn_to_mfn(txdmaq_pfn),
32960+ 0);
32961+
32962+ VPRINTK("Passing rxdmaq page pfn %lx\n", rxdmaq_pfn);
32963+ accel_hw_priv->rxdmaq_gnt = hwinfo->rxdmaq_gnt =
32964+ net_accel_grant_page(bend->hdev_data, pfn_to_mfn(rxdmaq_pfn),
32965+ 0);
32966+
32967+ VPRINTK("Passing doorbell page mfn %x\n", hwinfo->doorbell_mfn);
32968+ /* Make the relevant H/W pages mappable by the far end */
32969+ accel_hw_priv->doorbell_gnt = hwinfo->doorbell_gnt =
32970+ net_accel_grant_page(bend->hdev_data, hwinfo->doorbell_mfn, 1);
32971+
32972+ /* Now do the same for the memory pages */
32973+ /* Convert the page + length we got back for the evq to grants. */
32974+ for (i = 0; i < accel_hw_priv->evq_npages; i++) {
32975+ accel_hw_priv->evq_mem_gnts[i] = hwinfo->evq_mem_gnts[i] =
32976+ net_accel_grant_page(bend->hdev_data, pfn_to_mfn(pfn), 0);
32977+ VPRINTK("Got grant %u for evq pfn %x\n", hwinfo->evq_mem_gnts[i],
32978+ pfn);
32979+ pfn++;
32980+ }
32981+
32982+ return 0;
32983+}
32984+
32985+
32986+static int ef_bend_hwinfo_falcon_a(struct netback_accel *bend,
32987+ struct net_accel_hw_falcon_a *hwinfo)
32988+{
32989+ int rc;
32990+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32991+
32992+ if ((rc = ef_bend_hwinfo_falcon_common(bend, &hwinfo->common)) != 0)
32993+ return rc;
32994+
32995+ /*
32996+ * Note that unlike the above, where the message field is the
32997+ * page number, here evq_rptr is the entire address because
32998+ * it is currently a pointer into the densely mapped timer page.
32999+ */
33000+ VPRINTK("Passing evq_rptr pfn %x for rptr %x\n",
33001+ hwinfo->common.evq_rptr >> PAGE_SHIFT,
33002+ hwinfo->common.evq_rptr);
33003+ rc = net_accel_grant_page(bend->hdev_data,
33004+ hwinfo->common.evq_rptr >> PAGE_SHIFT, 0);
33005+ if (rc < 0)
33006+ return rc;
33007+
33008+ accel_hw_priv->evq_rptr_gnt = hwinfo->evq_rptr_gnt = rc;
33009+ VPRINTK("evq_rptr_gnt got %d\n", hwinfo->evq_rptr_gnt);
33010+
33011+ return 0;
33012+}
33013+
33014+
33015+static int ef_bend_hwinfo_falcon_b(struct netback_accel *bend,
33016+ struct net_accel_hw_falcon_b *hwinfo)
33017+{
33018+ return ef_bend_hwinfo_falcon_common(bend, hwinfo);
33019+}
33020+
33021+
33022+/*
33023+ * Fill in the message with a description of the hardware resources, based on
33024+ * the H/W type
33025+ */
33026+static int netback_accel_hwinfo(struct netback_accel *bend,
33027+ struct net_accel_msg_hw *msgvi)
33028+{
33029+ int rc = 0;
33030+
33031+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER);
33032+
33033+ msgvi->type = bend->hw_type;
33034+ switch (bend->hw_type) {
33035+ case NET_ACCEL_MSG_HWTYPE_FALCON_A:
33036+ rc = ef_bend_hwinfo_falcon_a(bend, &msgvi->resources.falcon_a);
33037+ break;
33038+ case NET_ACCEL_MSG_HWTYPE_FALCON_B:
33039+ rc = ef_bend_hwinfo_falcon_b(bend, &msgvi->resources.falcon_b);
33040+ break;
33041+ case NET_ACCEL_MSG_HWTYPE_NONE:
33042+ /* Nothing to do. The slow path should just work. */
33043+ break;
33044+ }
33045+
33046+ if (rc == 0)
33047+ bend->hw_state = NETBACK_ACCEL_RES_HWINFO;
33048+
33049+ return rc;
33050+}
33051+
33052+
33053+/* Allocate hardware resources and make them available to the client domain */
33054+int netback_accel_setup_vnic_hw(struct netback_accel *bend)
33055+{
33056+ struct net_accel_msg msg;
33057+ int err;
33058+
33059+ /* Allocate the event queue, VI and so on. */
33060+ err = ef_get_vnic(bend);
33061+ if (err) {
33062+ EPRINTK("Failed to allocate hardware resource for bend:"
33063+ "error %d\n", err);
33064+ return err;
33065+ }
33066+
33067+ /* Set up the filter management */
33068+ err = netback_accel_filter_init(bend);
33069+ if (err) {
33070+ EPRINTK("Filter setup failed, error %d", err);
33071+ ef_free_vnic(bend);
33072+ return err;
33073+ }
33074+
33075+ net_accel_msg_init(&msg, NET_ACCEL_MSG_SETHW);
33076+
33077+ /*
33078+ * Extract the low-level hardware info we will actually pass to the
33079+ * other end, and set up the grants/ioremap permissions needed
33080+ */
33081+ err = netback_accel_hwinfo(bend, &msg.u.hw);
33082+
33083+ if (err != 0) {
33084+ netback_accel_filter_shutdown(bend);
33085+ ef_free_vnic(bend);
33086+ return err;
33087+ }
33088+
33089+ /* Send the message, this is a reply to a hello-reply */
33090+ err = net_accel_msg_reply_notify(bend->shared_page,
33091+ bend->msg_channel_irq,
33092+ &bend->to_domU, &msg);
33093+
33094+ /*
33095+ * The message should succeed as it's logically a reply and we
33096+ * guarantee space for replies, but a misbehaving frontend
33097+ * could result in that behaviour, so be tolerant
33098+ */
33099+ if (err != 0) {
33100+ netback_accel_release_hwinfo(bend);
33101+ netback_accel_filter_shutdown(bend);
33102+ ef_free_vnic(bend);
33103+ }
33104+
33105+ return err;
33106+}
33107+
33108+
33109+/* Free hardware resources */
33110+void netback_accel_shutdown_vnic_hw(struct netback_accel *bend)
33111+{
33112+ /*
33113+ * Only try and release resources if accel_hw_priv was setup,
33114+ * otherwise there is nothing to do as we're on "null-op"
33115+ * acceleration
33116+ */
33117+ switch (bend->hw_state) {
33118+ case NETBACK_ACCEL_RES_HWINFO:
33119+ VPRINTK("Release hardware resources\n");
33120+ netback_accel_release_hwinfo(bend);
33121+ /* deliberate drop through */
33122+ case NETBACK_ACCEL_RES_FILTER:
33123+ VPRINTK("Free filters...\n");
33124+ netback_accel_filter_shutdown(bend);
33125+ /* deliberate drop through */
33126+ case NETBACK_ACCEL_RES_ALLOC:
33127+ VPRINTK("Free vnic...\n");
33128+ ef_free_vnic(bend);
33129+ /* deliberate drop through */
33130+ case NETBACK_ACCEL_RES_NONE:
33131+ break;
33132+ default:
33133+ BUG();
33134+ }
33135+}
33136+
33137+/**************************************************************************
33138+ *
33139+ * Buffer table stuff
33140+ *
33141+ **************************************************************************/
33142+
33143+/*
33144+ * Undo any allocation that netback_accel_msg_rx_buffer_map() has made
33145+ * if it fails half way through
33146+ */
33147+static inline void buffer_map_cleanup(struct netback_accel *bend, int i)
33148+{
33149+ while (i > 0) {
33150+ i--;
33151+ bend->buffer_maps_index--;
33152+ net_accel_unmap_device_page(bend->hdev_data,
33153+ bend->buffer_maps[bend->buffer_maps_index],
33154+ bend->buffer_addrs[bend->buffer_maps_index]);
33155+ }
33156+}
33157+
33158+
33159+int netback_accel_add_buffers(struct netback_accel *bend, int pages, int log2_pages,
33160+ u32 *grants, u32 *buf_addr_out)
33161+{
33162+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33163+ unsigned long long addr_array[NET_ACCEL_MSG_MAX_PAGE_REQ];
33164+ int rc, i, index;
33165+ u64 dev_bus_addr;
33166+
33167+ /* Make sure we can't overflow the dma_maps array */
33168+ if (accel_hw_priv->dma_maps_index >=
33169+ bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ) {
33170+ EPRINTK("%s: too many buffer table allocations: %d %d\n",
33171+ __FUNCTION__, accel_hw_priv->dma_maps_index,
33172+ bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ);
33173+ return -EINVAL;
33174+ }
33175+
33176+ /* Make sure we can't overflow the buffer_maps array */
33177+ if (bend->buffer_maps_index + pages > bend->max_pages) {
33178+ EPRINTK("%s: too many pages mapped: %d + %d > %d\n",
33179+ __FUNCTION__, bend->buffer_maps_index,
33180+ pages, bend->max_pages);
33181+ return -EINVAL;
33182+ }
33183+
33184+ for (i = 0; i < pages; i++) {
33185+ VPRINTK("%s: mapping page %d\n", __FUNCTION__, i);
33186+ rc = net_accel_map_device_page
33187+ (bend->hdev_data, grants[i],
33188+ &bend->buffer_maps[bend->buffer_maps_index],
33189+ &dev_bus_addr);
33190+
33191+ if (rc != 0) {
33192+ EPRINTK("error in net_accel_map_device_page\n");
33193+ buffer_map_cleanup(bend, i);
33194+ return rc;
33195+ }
33196+
33197+ bend->buffer_addrs[bend->buffer_maps_index] = dev_bus_addr;
33198+
33199+ bend->buffer_maps_index++;
33200+
33201+ addr_array[i] = dev_bus_addr;
33202+ }
33203+
33204+ VPRINTK("%s: mapping dma addresses to vih %p\n", __FUNCTION__,
33205+ accel_hw_priv->efx_vih);
33206+
33207+ index = accel_hw_priv->dma_maps_index;
33208+ if ((rc = efx_vi_dma_map_addrs(accel_hw_priv->efx_vih, addr_array, pages,
33209+ &(accel_hw_priv->dma_maps[index]))) < 0) {
33210+ EPRINTK("error in dma_map_pages\n");
33211+ buffer_map_cleanup(bend, i);
33212+ return rc;
33213+ }
33214+
33215+ accel_hw_priv->dma_maps_index++;
33216+ NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages += pages);
33217+
33218+ //DPRINTK("%s: getting map address\n", __FUNCTION__);
33219+
33220+ *buf_addr_out = efx_vi_dma_get_map_addr(accel_hw_priv->efx_vih,
33221+ accel_hw_priv->dma_maps[index]);
33222+
33223+ //DPRINTK("%s: done\n", __FUNCTION__);
33224+
33225+ return 0;
33226+}
33227+
33228+
33229+int netback_accel_remove_buffers(struct netback_accel *bend)
33230+{
33231+ /* Only try to free buffers if accel_hw_priv was setup */
33232+ if (bend->hw_state != NETBACK_ACCEL_RES_NONE) {
33233+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33234+ int i;
33235+
33236+ efx_vi_reset(accel_hw_priv->efx_vih);
33237+
33238+ while (accel_hw_priv->dma_maps_index > 0) {
33239+ accel_hw_priv->dma_maps_index--;
33240+ i = accel_hw_priv->dma_maps_index;
33241+ efx_vi_dma_unmap_addrs(accel_hw_priv->efx_vih,
33242+ accel_hw_priv->dma_maps[i]);
33243+ }
33244+
33245+ while (bend->buffer_maps_index > 0) {
33246+ VPRINTK("Unmapping granted buffer %d\n",
33247+ bend->buffer_maps_index);
33248+ bend->buffer_maps_index--;
33249+ i = bend->buffer_maps_index;
33250+ net_accel_unmap_device_page(bend->hdev_data,
33251+ bend->buffer_maps[i],
33252+ bend->buffer_addrs[i]);
33253+ }
33254+
33255+ NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages = 0);
33256+ }
33257+
33258+ return 0;
33259+}
33260+
33261+/**************************************************************************
33262+ *
33263+ * Filter stuff
33264+ *
33265+ **************************************************************************/
33266+
33267+static int netback_accel_filter_init(struct netback_accel *bend)
33268+{
33269+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33270+ int i, rc;
33271+
33272+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC);
33273+
33274+ spin_lock_init(&accel_hw_priv->filter_lock);
33275+
33276+ if ((rc = cuckoo_hash_init(&accel_hw_priv->filter_hash_table,
33277+ 5 /* space for 32 filters */, 8)) != 0) {
33278+ EPRINTK("Failed to initialise filter hash table\n");
33279+ return rc;
33280+ }
33281+
33282+ accel_hw_priv->fspecs = kzalloc(sizeof(struct netback_accel_filter_spec) *
33283+ bend->quotas.max_filters,
33284+ GFP_KERNEL);
33285+
33286+ if (accel_hw_priv->fspecs == NULL) {
33287+ EPRINTK("No memory for filter specs.\n");
33288+ cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table);
33289+ return -ENOMEM;
33290+ }
33291+
33292+ for (i = 0; i < bend->quotas.max_filters; i++) {
33293+ accel_hw_priv->free_filters |= (1 << i);
33294+ }
33295+
33296+ /* Base mask on highest set bit in max_filters */
33297+ accel_hw_priv->filter_idx_mask = (1 << fls(bend->quotas.max_filters)) - 1;
33298+ VPRINTK("filter setup: max is %x mask is %x\n",
33299+ bend->quotas.max_filters, accel_hw_priv->filter_idx_mask);
33300+
33301+ bend->hw_state = NETBACK_ACCEL_RES_FILTER;
33302+
33303+ return 0;
33304+}
33305+
33306+
33307+static inline void make_filter_key(cuckoo_hash_ip_key *key,
33308+ struct netback_accel_filter_spec *filt)
33309+
33310+{
33311+ key->local_ip = filt->destip_be;
33312+ key->local_port = filt->destport_be;
33313+ key->proto = filt->proto;
33314+}
33315+
33316+
33317+static inline
33318+void netback_accel_free_filter(struct falcon_bend_accel_priv *accel_hw_priv,
33319+ int filter)
33320+{
33321+ cuckoo_hash_ip_key filter_key;
33322+
33323+ if (!(accel_hw_priv->free_filters & (1 << filter))) {
33324+ efx_vi_filter_stop(accel_hw_priv->efx_vih,
33325+ accel_hw_priv->fspecs[filter].filter_handle);
33326+ make_filter_key(&filter_key, &(accel_hw_priv->fspecs[filter]));
33327+ if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
33328+ (cuckoo_hash_key *)&filter_key)) {
33329+ EPRINTK("%s: Couldn't find filter to remove from table\n",
33330+ __FUNCTION__);
33331+ BUG();
33332+ }
33333+ }
33334+}
33335+
33336+
33337+static void netback_accel_filter_shutdown(struct netback_accel *bend)
33338+{
33339+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33340+ int i;
33341+ unsigned long flags;
33342+
33343+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER);
33344+
33345+ spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
33346+
33347+ BUG_ON(accel_hw_priv->fspecs == NULL);
33348+
33349+ for (i = 0; i < bend->quotas.max_filters; i++) {
33350+ netback_accel_free_filter(accel_hw_priv, i);
33351+ }
33352+
33353+ kfree(accel_hw_priv->fspecs);
33354+ accel_hw_priv->fspecs = NULL;
33355+ accel_hw_priv->free_filters = 0;
33356+
33357+ cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table);
33358+
33359+ spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
33360+
33361+ bend->hw_state = NETBACK_ACCEL_RES_ALLOC;
33362+}
33363+
33364+
33365+/*! Suggest a filter to replace when we want to insert a new one and have
33366+ * none free.
33367+ */
33368+static unsigned get_victim_filter(struct netback_accel *bend)
33369+{
33370+ /*
33371+ * We could attempt to get really clever, and may do at some
33372+ * point, but random replacement is v. cheap and low on
33373+ * pathological worst cases.
33374+ */
33375+ unsigned index, cycles;
33376+
33377+ rdtscl(cycles);
33378+
33379+ /*
33380+ * Some doubt about the quality of the bottom few bits, so
33381+ * throw 'em * away
33382+ */
33383+ index = (cycles >> 4) & ((struct falcon_bend_accel_priv *)
33384+ bend->accel_hw_priv)->filter_idx_mask;
33385+ /*
33386+ * We don't enforce that the number of filters is a power of
33387+ * two, but the masking gets us to within one subtraction of a
33388+ * valid index
33389+ */
33390+ if (index >= bend->quotas.max_filters)
33391+ index -= bend->quotas.max_filters;
33392+ DPRINTK("backend %s->%d has no free filters. Filter %d will be evicted\n",
33393+ bend->nicname, bend->far_end, index);
33394+ return index;
33395+}
33396+
33397+
33398+/* Add a filter for the specified IP/port to the backend */
33399+int
33400+netback_accel_filter_check_add(struct netback_accel *bend,
33401+ struct netback_accel_filter_spec *filt)
33402+{
33403+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33404+ struct netback_accel_filter_spec *fs;
33405+ unsigned filter_index;
33406+ unsigned long flags;
33407+ int rc, recycling = 0;
33408+ cuckoo_hash_ip_key filter_key, evict_key;
33409+
33410+ BUG_ON(filt->proto != IPPROTO_TCP && filt->proto != IPPROTO_UDP);
33411+
33412+ DPRINTK("Will add %s filter for dst ip %08x and dst port %d\n",
33413+ (filt->proto == IPPROTO_TCP) ? "TCP" : "UDP",
33414+ be32_to_cpu(filt->destip_be), be16_to_cpu(filt->destport_be));
33415+
33416+ spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
33417+ /*
33418+ * Check to see if we're already filtering this IP address and
33419+ * port. Happens if you insert a filter mid-stream as there
33420+ * are many packets backed up to be delivered to dom0 already
33421+ */
33422+ make_filter_key(&filter_key, filt);
33423+ if (cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table,
33424+ (cuckoo_hash_key *)(&filter_key),
33425+ &filter_index)) {
33426+ DPRINTK("Found matching filter %d already in table\n",
33427+ filter_index);
33428+ rc = -1;
33429+ goto out;
33430+ }
33431+
33432+ if (accel_hw_priv->free_filters == 0) {
33433+ filter_index = get_victim_filter(bend);
33434+ recycling = 1;
33435+ } else {
33436+ filter_index = __ffs(accel_hw_priv->free_filters);
33437+ clear_bit(filter_index, &accel_hw_priv->free_filters);
33438+ }
33439+
33440+ fs = &accel_hw_priv->fspecs[filter_index];
33441+
33442+ if (recycling) {
33443+ DPRINTK("Removing filter index %d handle %p\n", filter_index,
33444+ fs->filter_handle);
33445+
33446+ if ((rc = efx_vi_filter_stop(accel_hw_priv->efx_vih,
33447+ fs->filter_handle)) != 0) {
33448+ EPRINTK("Couldn't clear NIC filter table entry %d\n", rc);
33449+ }
33450+
33451+ make_filter_key(&evict_key, fs);
33452+ if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
33453+ (cuckoo_hash_key *)&evict_key)) {
33454+ EPRINTK("Couldn't find filter to remove from table\n");
33455+ BUG();
33456+ }
33457+ NETBACK_ACCEL_STATS_OP(bend->stats.num_filters--);
33458+ }
33459+
33460+ /* Update the filter spec with new details */
33461+ *fs = *filt;
33462+
33463+ if ((rc = cuckoo_hash_add(&accel_hw_priv->filter_hash_table,
33464+ (cuckoo_hash_key *)&filter_key, filter_index,
33465+ 1)) != 0) {
33466+ EPRINTK("Error (%d) adding filter to table\n", rc);
33467+ accel_hw_priv->free_filters |= (1 << filter_index);
33468+ goto out;
33469+ }
33470+
33471+ rc = efx_vi_filter(accel_hw_priv->efx_vih, filt->proto, filt->destip_be,
33472+ filt->destport_be,
33473+ (struct filter_resource_t **)&fs->filter_handle);
33474+
33475+ if (rc != 0) {
33476+ EPRINTK("Hardware filter insertion failed. Error %d\n", rc);
33477+ accel_hw_priv->free_filters |= (1 << filter_index);
33478+ cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
33479+ (cuckoo_hash_key *)&filter_key);
33480+ rc = -1;
33481+ goto out;
33482+ }
33483+
33484+ NETBACK_ACCEL_STATS_OP(bend->stats.num_filters++);
33485+
33486+ VPRINTK("%s: success index %d handle %p\n", __FUNCTION__, filter_index,
33487+ fs->filter_handle);
33488+
33489+ rc = filter_index;
33490+ out:
33491+ spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
33492+ return rc;
33493+}
33494+
33495+
33496+/* Remove a filter entry for the specific device and IP/port */
33497+static void netback_accel_filter_remove(struct netback_accel *bend,
33498+ int filter_index)
33499+{
33500+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33501+
33502+ BUG_ON(accel_hw_priv->free_filters & (1 << filter_index));
33503+ netback_accel_free_filter(accel_hw_priv, filter_index);
33504+ accel_hw_priv->free_filters |= (1 << filter_index);
33505+}
33506+
33507+
33508+/* Remove a filter entry for the specific device and IP/port */
33509+void netback_accel_filter_remove_spec(struct netback_accel *bend,
33510+ struct netback_accel_filter_spec *filt)
33511+{
33512+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33513+ unsigned filter_found;
33514+ unsigned long flags;
33515+ cuckoo_hash_ip_key filter_key;
33516+ struct netback_accel_filter_spec *fs;
33517+
33518+ if (filt->proto == IPPROTO_TCP) {
33519+ DPRINTK("Remove TCP filter for dst ip %08x and dst port %d\n",
33520+ be32_to_cpu(filt->destip_be),
33521+ be16_to_cpu(filt->destport_be));
33522+ } else if (filt->proto == IPPROTO_UDP) {
33523+ DPRINTK("Remove UDP filter for dst ip %08x and dst port %d\n",
33524+ be32_to_cpu(filt->destip_be),
33525+ be16_to_cpu(filt->destport_be));
33526+ } else {
33527+ /*
33528+ * This could be provoked by an evil frontend, so can't
33529+ * BUG(), but harmless as it should fail tests below
33530+ */
33531+ DPRINTK("Non-TCP/UDP filter dst ip %08x and dst port %d\n",
33532+ be32_to_cpu(filt->destip_be),
33533+ be16_to_cpu(filt->destport_be));
33534+ }
33535+
33536+ spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
33537+
33538+ make_filter_key(&filter_key, filt);
33539+ if (!cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table,
33540+ (cuckoo_hash_key *)(&filter_key),
33541+ &filter_found)) {
33542+ EPRINTK("Couldn't find matching filter already in table\n");
33543+ goto out;
33544+ }
33545+
33546+ /* Do a full check to make sure we've not had a hash collision */
33547+ fs = &accel_hw_priv->fspecs[filter_found];
33548+ if (fs->destip_be == filt->destip_be &&
33549+ fs->destport_be == filt->destport_be &&
33550+ fs->proto == filt->proto &&
33551+ !memcmp(fs->mac, filt->mac, ETH_ALEN)) {
33552+ netback_accel_filter_remove(bend, filter_found);
33553+ } else {
33554+ EPRINTK("Entry in hash table does not match filter spec\n");
33555+ goto out;
33556+ }
33557+
33558+ out:
33559+ spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
33560+}
33561Index: head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.h
33562===================================================================
33563--- /dev/null 1970-01-01 00:00:00.000000000 +0000
33564+++ head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.h 2008-02-20 09:32:49.000000000 +0100
33565@@ -0,0 +1,88 @@
33566+/****************************************************************************
33567+ * Solarflare driver for Xen network acceleration
33568+ *
33569+ * Copyright 2006-2008: Solarflare Communications Inc,
33570+ * 9501 Jeronimo Road, Suite 250,
33571+ * Irvine, CA 92618, USA
33572+ *
33573+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
33574+ *
33575+ * This program is free software; you can redistribute it and/or modify it
33576+ * under the terms of the GNU General Public License version 2 as published
33577+ * by the Free Software Foundation, incorporated herein by reference.
33578+ *
33579+ * This program is distributed in the hope that it will be useful,
33580+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33581+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33582+ * GNU General Public License for more details.
33583+ *
33584+ * You should have received a copy of the GNU General Public License
33585+ * along with this program; if not, write to the Free Software
33586+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33587+ ****************************************************************************
33588+ */
33589+
33590+#ifndef NETBACK_ACCEL_SOLARFLARE_H
33591+#define NETBACK_ACCEL_SOLARFLARE_H
33592+
33593+#include "accel.h"
33594+#include "accel_msg_iface.h"
33595+
33596+#include "driverlink_api.h"
33597+
33598+#define MAX_NICS 5
33599+#define MAX_PORTS 2
33600+
33601+
33602+extern int netback_accel_sf_init(void);
33603+extern void netback_accel_sf_shutdown(void);
33604+extern int netback_accel_sf_hwtype(struct netback_accel *bend);
33605+
33606+extern int netback_accel_sf_char_init(void);
33607+extern void netback_accel_sf_char_shutdown(void);
33608+
33609+extern int netback_accel_setup_vnic_hw(struct netback_accel *bend);
33610+extern void netback_accel_shutdown_vnic_hw(struct netback_accel *bend);
33611+
33612+extern int netback_accel_add_buffers(struct netback_accel *bend, int pages,
33613+ int log2_pages, u32 *grants,
33614+ u32 *buf_addr_out);
33615+extern int netback_accel_remove_buffers(struct netback_accel *bend);
33616+
33617+
33618+/* Add a filter for the specified IP/port to the backend */
33619+extern int
33620+netback_accel_filter_check_add(struct netback_accel *bend,
33621+ struct netback_accel_filter_spec *filt);
33622+/* Remove a filter entry for the specific device and IP/port */
33623+extern
33624+void netback_accel_filter_remove_index(struct netback_accel *bend,
33625+ int filter_index);
33626+extern
33627+void netback_accel_filter_remove_spec(struct netback_accel *bend,
33628+ struct netback_accel_filter_spec *filt);
33629+
33630+/* This is designed to look a bit like a skb */
33631+struct netback_pkt_buf {
33632+ union {
33633+ unsigned char *raw;
33634+ } mac;
33635+ union {
33636+ struct iphdr *iph;
33637+ struct arphdr *arph;
33638+ unsigned char *raw;
33639+ } nh;
33640+ int protocol;
33641+};
33642+
33643+/*! \brief Handle a received packet: insert fast path filters as necessary
33644+ * \param skb The packet buffer
33645+ */
33646+extern void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv);
33647+
33648+/*! \brief Handle a transmitted packet: update fast path filters as necessary
33649+ * \param skb The packet buffer
33650+ */
33651+extern void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv);
33652+
33653+#endif /* NETBACK_ACCEL_SOLARFLARE_H */
33654Index: head-2008-11-25/drivers/xen/sfc_netback/accel_xenbus.c
33655===================================================================
33656--- /dev/null 1970-01-01 00:00:00.000000000 +0000
33657+++ head-2008-11-25/drivers/xen/sfc_netback/accel_xenbus.c 2008-02-26 10:54:11.000000000 +0100
33658@@ -0,0 +1,831 @@
33659+/****************************************************************************
33660+ * Solarflare driver for Xen network acceleration
33661+ *
33662+ * Copyright 2006-2008: Solarflare Communications Inc,
33663+ * 9501 Jeronimo Road, Suite 250,
33664+ * Irvine, CA 92618, USA
33665+ *
33666+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
33667+ *
33668+ * This program is free software; you can redistribute it and/or modify it
33669+ * under the terms of the GNU General Public License version 2 as published
33670+ * by the Free Software Foundation, incorporated herein by reference.
33671+ *
33672+ * This program is distributed in the hope that it will be useful,
33673+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33674+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33675+ * GNU General Public License for more details.
33676+ *
33677+ * You should have received a copy of the GNU General Public License
33678+ * along with this program; if not, write to the Free Software
33679+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33680+ ****************************************************************************
33681+ */
33682+
33683+#include <xen/evtchn.h>
33684+#include <linux/mutex.h>
33685+
33686+/* drivers/xen/netback/common.h */
33687+#include "common.h"
33688+
33689+#include "accel.h"
33690+#include "accel_solarflare.h"
33691+#include "accel_util.h"
33692+
33693+#define NODENAME_PATH_FMT "backend/vif/%d/%d"
33694+
33695+#define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \
33696+ ((struct backend_info *)(_dev)->dev.driver_data)->netback_accel_priv
33697+
33698+/* List of all the bends currently in existence. */
33699+struct netback_accel *bend_list = NULL;
33700+DEFINE_MUTEX(bend_list_mutex);
33701+
33702+/* Put in bend_list. Must hold bend_list_mutex */
33703+static void link_bend(struct netback_accel *bend)
33704+{
33705+ bend->next_bend = bend_list;
33706+ bend_list = bend;
33707+}
33708+
33709+/* Remove from bend_list, Must hold bend_list_mutex */
33710+static void unlink_bend(struct netback_accel *bend)
33711+{
33712+ struct netback_accel *tmp = bend_list;
33713+ struct netback_accel *prev = NULL;
33714+ while (tmp != NULL) {
33715+ if (tmp == bend) {
33716+ if (prev != NULL)
33717+ prev->next_bend = bend->next_bend;
33718+ else
33719+ bend_list = bend->next_bend;
33720+ return;
33721+ }
33722+ prev = tmp;
33723+ tmp = tmp->next_bend;
33724+ }
33725+}
33726+
33727+
33728+/* Demultiplex a message IRQ from the frontend driver. */
33729+static irqreturn_t msgirq_from_frontend(int irq, void *context,
33730+ struct pt_regs *unused)
33731+{
33732+ struct xenbus_device *dev = context;
33733+ struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
33734+ VPRINTK("irq %d from device %s\n", irq, dev->nodename);
33735+ schedule_work(&bend->handle_msg);
33736+ return IRQ_HANDLED;
33737+}
33738+
33739+
33740+/*
33741+ * Demultiplex an IRQ from the frontend driver. This is never used
33742+ * functionally, but we need it to pass to the bind function, and may
33743+ * get called spuriously
33744+ */
33745+static irqreturn_t netirq_from_frontend(int irq, void *context,
33746+ struct pt_regs *unused)
33747+{
33748+ VPRINTK("netirq %d from device %s\n", irq,
33749+ ((struct xenbus_device *)context)->nodename);
33750+
33751+ return IRQ_HANDLED;
33752+}
33753+
33754+
33755+/* Read the limits values of the xenbus structure. */
33756+static
33757+void cfg_hw_quotas(struct xenbus_device *dev, struct netback_accel *bend)
33758+{
33759+ int err = xenbus_gather
33760+ (XBT_NIL, dev->nodename,
33761+ "limits/max-filters", "%d", &bend->quotas.max_filters,
33762+ "limits/max-buf-pages", "%d", &bend->quotas.max_buf_pages,
33763+ "limits/max-mcasts", "%d", &bend->quotas.max_mcasts,
33764+ NULL);
33765+ if (err) {
33766+ /*
33767+ * TODO what if they have previously been set by the
33768+ * user? This will overwrite with defaults. Maybe
33769+ * not what we want to do, but useful in startup
33770+ * case
33771+ */
33772+ DPRINTK("Failed to read quotas from xenbus, using defaults\n");
33773+ bend->quotas.max_filters = NETBACK_ACCEL_DEFAULT_MAX_FILTERS;
33774+ bend->quotas.max_buf_pages = sfc_netback_max_pages;
33775+ bend->quotas.max_mcasts = NETBACK_ACCEL_DEFAULT_MAX_MCASTS;
33776+ }
33777+
33778+ return;
33779+}
33780+
33781+
33782+static void bend_config_accel_change(struct xenbus_watch *watch,
33783+ const char **vec, unsigned int len)
33784+{
33785+ struct netback_accel *bend;
33786+
33787+ bend = container_of(watch, struct netback_accel, config_accel_watch);
33788+
33789+ mutex_lock(&bend->bend_mutex);
33790+ if (bend->config_accel_watch.node != NULL) {
33791+ struct xenbus_device *dev =
33792+ (struct xenbus_device *)bend->hdev_data;
33793+ DPRINTK("Watch matched, got dev %p otherend %p\n",
33794+ dev, dev->otherend);
33795+ if(!xenbus_exists(XBT_NIL, watch->node, "")) {
33796+ DPRINTK("Ignoring watch as otherend seems invalid\n");
33797+ goto out;
33798+ }
33799+
33800+ cfg_hw_quotas(dev, bend);
33801+ }
33802+ out:
33803+ mutex_unlock(&bend->bend_mutex);
33804+ return;
33805+}
33806+
33807+
33808+/*
33809+ * Setup watch on "limits" in the backend vif info to know when
33810+ * configuration has been set
33811+ */
33812+static int setup_config_accel_watch(struct xenbus_device *dev,
33813+ struct netback_accel *bend)
33814+{
33815+ int err;
33816+
33817+ VPRINTK("Setting watch on %s/%s\n", dev->nodename, "limits");
33818+
33819+ err = xenbus_watch_path2(dev, dev->nodename, "limits",
33820+ &bend->config_accel_watch,
33821+ bend_config_accel_change);
33822+
33823+ if (err) {
33824+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
33825+ __FUNCTION__, err);
33826+ bend->config_accel_watch.node = NULL;
33827+ return err;
33828+ }
33829+ return 0;
33830+}
33831+
33832+
33833+static int
33834+cfg_frontend_info(struct xenbus_device *dev, struct netback_accel *bend,
33835+ int *grants)
33836+{
33837+ /* Get some info from xenbus on the event channel and shmem grant */
33838+ int err = xenbus_gather(XBT_NIL, dev->otherend,
33839+ "accel-msg-channel", "%u", &bend->msg_channel,
33840+ "accel-ctrl-page", "%d", &(grants[0]),
33841+ "accel-msg-page", "%d", &(grants[1]),
33842+ "accel-net-channel", "%u", &bend->net_channel,
33843+ NULL);
33844+ if (err)
33845+ EPRINTK("failed to read event channels or shmem grant: %d\n",
33846+ err);
33847+ else
33848+ DPRINTK("got event chan %d and net chan %d from frontend\n",
33849+ bend->msg_channel, bend->net_channel);
33850+ return err;
33851+}
33852+
33853+
33854+/* Setup all the comms needed to chat with the front end driver */
33855+static int setup_vnic(struct xenbus_device *dev)
33856+{
33857+ struct netback_accel *bend;
33858+ int grants[2], err, msgs_per_queue;
33859+
33860+ bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
33861+
33862+ err = cfg_frontend_info(dev, bend, grants);
33863+ if (err)
33864+ goto fail1;
33865+
33866+ /*
33867+ * If we get here, both frontend Connected and configuration
33868+ * options available. All is well.
33869+ */
33870+
33871+ /* Get the hardware quotas for the VNIC in question. */
33872+ cfg_hw_quotas(dev, bend);
33873+
33874+ /* Set up the deferred work handlers */
33875+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
33876+ INIT_WORK(&bend->handle_msg,
33877+ netback_accel_msg_rx_handler);
33878+#else
33879+ INIT_WORK(&bend->handle_msg,
33880+ netback_accel_msg_rx_handler,
33881+ (void*)bend);
33882+#endif
33883+
33884+ /* Request the frontend mac */
33885+ err = net_accel_xen_net_read_mac(dev, bend->mac);
33886+ if (err)
33887+ goto fail2;
33888+
33889+ /* Set up the shared page. */
33890+ bend->shared_page = net_accel_map_grants_contig(dev, grants, 2,
33891+ &bend->sh_pages_unmap);
33892+
33893+ if (bend->shared_page == NULL) {
33894+ EPRINTK("failed to map shared page for %s\n", dev->otherend);
33895+ err = -ENOMEM;
33896+ goto fail2;
33897+ }
33898+
33899+ /* Initialise the shared page(s) used for comms */
33900+ net_accel_msg_init_page(bend->shared_page, PAGE_SIZE,
33901+ bend->net_dev->flags & IFF_UP);
33902+
33903+ msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
33904+
33905+ net_accel_msg_init_queue
33906+ (&bend->to_domU, &bend->shared_page->queue0,
33907+ (struct net_accel_msg *)((__u8*)bend->shared_page + PAGE_SIZE),
33908+ msgs_per_queue);
33909+
33910+ net_accel_msg_init_queue
33911+ (&bend->from_domU, &bend->shared_page->queue1,
33912+ (struct net_accel_msg *)((__u8*)bend->shared_page +
33913+ (3 * PAGE_SIZE / 2)),
33914+ msgs_per_queue);
33915+
33916+ /* Bind the message event channel to a handler
33917+ *
33918+ * Note that we will probably get a spurious interrupt when we
33919+ * do this, so it must not be done until we have set up
33920+ * everything we need to handle it.
33921+ */
33922+ err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
33923+ bend->msg_channel,
33924+ msgirq_from_frontend,
33925+ 0,
33926+ "netback_accel",
33927+ dev);
33928+ if (err < 0) {
33929+ EPRINTK("failed to bind event channel: %d\n", err);
33930+ goto fail3;
33931+ }
33932+ else
33933+ bend->msg_channel_irq = err;
33934+
33935+ /* TODO: No need to bind this evtchn to an irq. */
33936+ err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
33937+ bend->net_channel,
33938+ netirq_from_frontend,
33939+ 0,
33940+ "netback_accel",
33941+ dev);
33942+ if (err < 0) {
33943+ EPRINTK("failed to bind net channel: %d\n", err);
33944+ goto fail4;
33945+ }
33946+ else
33947+ bend->net_channel_irq = err;
33948+
33949+ /*
33950+ * Grab ourselves an entry in the forwarding hash table. We do
33951+ * this now so we don't have the embarassmesnt of sorting out
33952+ * an allocation failure while at IRQ. Because we pass NULL as
33953+ * the context, the actual hash lookup will succeed for this
33954+ * NIC, but the check for somewhere to forward to will
33955+ * fail. This is necessary to prevent forwarding before
33956+ * hardware resources are set up
33957+ */
33958+ err = netback_accel_fwd_add(bend->mac, NULL, bend->fwd_priv);
33959+ if (err) {
33960+ EPRINTK("failed to add to fwd hash table\n");
33961+ goto fail5;
33962+ }
33963+
33964+ /*
33965+ * Say hello to frontend. Important to do this straight after
33966+ * obtaining the message queue as otherwise we are vulnerable
33967+ * to an evil frontend sending a HELLO-REPLY before we've sent
33968+ * the HELLO and confusing us
33969+ */
33970+ netback_accel_msg_tx_hello(bend, NET_ACCEL_MSG_VERSION);
33971+ return 0;
33972+
33973+ fail5:
33974+ unbind_from_irqhandler(bend->net_channel_irq, dev);
33975+ fail4:
33976+ unbind_from_irqhandler(bend->msg_channel_irq, dev);
33977+ fail3:
33978+ net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
33979+ bend->shared_page = NULL;
33980+ bend->sh_pages_unmap = NULL;
33981+ fail2:
33982+ fail1:
33983+ return err;
33984+}
33985+
33986+
33987+static int read_nicname(struct xenbus_device *dev, struct netback_accel *bend)
33988+{
33989+ int len;
33990+
33991+ /* nic name used to select interface used for acceleration */
33992+ bend->nicname = xenbus_read(XBT_NIL, dev->nodename, "accel", &len);
33993+ if (IS_ERR(bend->nicname))
33994+ return PTR_ERR(bend->nicname);
33995+
33996+ return 0;
33997+}
33998+
33999+static const char *frontend_name = "sfc_netfront";
34000+
34001+static int publish_frontend_name(struct xenbus_device *dev)
34002+{
34003+ struct xenbus_transaction tr;
34004+ int err;
34005+
34006+ /* Publish the name of the frontend driver */
34007+ do {
34008+ err = xenbus_transaction_start(&tr);
34009+ if (err != 0) {
34010+ EPRINTK("%s: transaction start failed\n", __FUNCTION__);
34011+ return err;
34012+ }
34013+ err = xenbus_printf(tr, dev->nodename, "accel-frontend",
34014+ "%s", frontend_name);
34015+ if (err != 0) {
34016+ EPRINTK("%s: xenbus_printf failed\n", __FUNCTION__);
34017+ xenbus_transaction_end(tr, 1);
34018+ return err;
34019+ }
34020+ err = xenbus_transaction_end(tr, 0);
34021+ } while (err == -EAGAIN);
34022+
34023+ if (err != 0) {
34024+ EPRINTK("failed to end frontend name transaction\n");
34025+ return err;
34026+ }
34027+ return 0;
34028+}
34029+
34030+
34031+static int unpublish_frontend_name(struct xenbus_device *dev)
34032+{
34033+ struct xenbus_transaction tr;
34034+ int err;
34035+
34036+ do {
34037+ err = xenbus_transaction_start(&tr);
34038+ if (err != 0)
34039+ break;
34040+ err = xenbus_rm(tr, dev->nodename, "accel-frontend");
34041+ if (err != 0) {
34042+ xenbus_transaction_end(tr, 1);
34043+ break;
34044+ }
34045+ err = xenbus_transaction_end(tr, 0);
34046+ } while (err == -EAGAIN);
34047+
34048+ return err;
34049+}
34050+
34051+
34052+static void cleanup_vnic(struct netback_accel *bend)
34053+{
34054+ struct xenbus_device *dev;
34055+
34056+ dev = (struct xenbus_device *)bend->hdev_data;
34057+
34058+ DPRINTK("%s: bend %p dev %p\n", __FUNCTION__, bend, dev);
34059+
34060+ DPRINTK("%s: Remove %p's mac from fwd table...\n",
34061+ __FUNCTION__, bend);
34062+ netback_accel_fwd_remove(bend->mac, bend->fwd_priv);
34063+
34064+ /* Free buffer table allocations */
34065+ netback_accel_remove_buffers(bend);
34066+
34067+ DPRINTK("%s: Release hardware resources...\n", __FUNCTION__);
34068+ if (bend->accel_shutdown)
34069+ bend->accel_shutdown(bend);
34070+
34071+ if (bend->net_channel_irq) {
34072+ unbind_from_irqhandler(bend->net_channel_irq, dev);
34073+ bend->net_channel_irq = 0;
34074+ }
34075+
34076+ if (bend->msg_channel_irq) {
34077+ unbind_from_irqhandler(bend->msg_channel_irq, dev);
34078+ bend->msg_channel_irq = 0;
34079+ }
34080+
34081+ if (bend->sh_pages_unmap) {
34082+ DPRINTK("%s: Unmap grants %p\n", __FUNCTION__,
34083+ bend->sh_pages_unmap);
34084+ net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
34085+ bend->sh_pages_unmap = NULL;
34086+ bend->shared_page = NULL;
34087+ }
34088+}
34089+
34090+
34091+/*************************************************************************/
34092+
34093+/*
34094+ * The following code handles accelstate changes between the frontend
34095+ * and the backend. It calls setup_vnic and cleanup_vnic in matching
34096+ * pairs in response to transitions.
34097+ *
34098+ * Valid state transitions for Dom0 are as follows:
34099+ *
34100+ * Closed->Init on probe or in response to Init from domU
34101+ * Closed->Closing on error/remove
34102+ *
34103+ * Init->Connected in response to Connected from domU
34104+ * Init->Closing on error/remove or in response to Closing from domU
34105+ *
34106+ * Connected->Closing on error/remove or in response to Closing from domU
34107+ *
34108+ * Closing->Closed in response to Closed from domU
34109+ *
34110+ */
34111+
34112+
34113+static void netback_accel_frontend_changed(struct xenbus_device *dev,
34114+ XenbusState frontend_state)
34115+{
34116+ struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
34117+ XenbusState backend_state;
34118+
34119+ DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
34120+ __FUNCTION__, xenbus_strstate(bend->frontend_state),
34121+ xenbus_strstate(frontend_state),dev->nodename, dev->otherend);
34122+
34123+ /*
34124+ * Ignore duplicate state changes. This can happen if the
34125+ * frontend changes state twice in quick succession and the
34126+ * first watch fires in the backend after the second
34127+ * transition has completed.
34128+ */
34129+ if (bend->frontend_state == frontend_state)
34130+ return;
34131+
34132+ bend->frontend_state = frontend_state;
34133+ backend_state = bend->backend_state;
34134+
34135+ switch (frontend_state) {
34136+ case XenbusStateInitialising:
34137+ if (backend_state == XenbusStateClosed &&
34138+ !bend->removing)
34139+ backend_state = XenbusStateInitialising;
34140+ break;
34141+
34142+ case XenbusStateConnected:
34143+ if (backend_state == XenbusStateInitialising) {
34144+ if (!bend->vnic_is_setup &&
34145+ setup_vnic(dev) == 0) {
34146+ bend->vnic_is_setup = 1;
34147+ backend_state = XenbusStateConnected;
34148+ } else {
34149+ backend_state = XenbusStateClosing;
34150+ }
34151+ }
34152+ break;
34153+
34154+ case XenbusStateInitWait:
34155+ case XenbusStateInitialised:
34156+ default:
34157+ DPRINTK("Unknown state %s (%d) from frontend.\n",
34158+ xenbus_strstate(frontend_state), frontend_state);
34159+ /* Unknown state. Fall through. */
34160+ case XenbusStateClosing:
34161+ if (backend_state != XenbusStateClosed)
34162+ backend_state = XenbusStateClosing;
34163+
34164+ /*
34165+ * The bend will now persist (with watches active) in
34166+ * case the frontend comes back again, eg. after
34167+ * frontend module reload or suspend/resume
34168+ */
34169+
34170+ break;
34171+
34172+ case XenbusStateUnknown:
34173+ case XenbusStateClosed:
34174+ if (bend->vnic_is_setup) {
34175+ bend->vnic_is_setup = 0;
34176+ cleanup_vnic(bend);
34177+ }
34178+
34179+ if (backend_state == XenbusStateClosing)
34180+ backend_state = XenbusStateClosed;
34181+ break;
34182+ }
34183+
34184+ if (backend_state != bend->backend_state) {
34185+ DPRINTK("Switching from state %s (%d) to %s (%d)\n",
34186+ xenbus_strstate(bend->backend_state),
34187+ bend->backend_state,
34188+ xenbus_strstate(backend_state), backend_state);
34189+ bend->backend_state = backend_state;
34190+ net_accel_update_state(dev, backend_state);
34191+ }
34192+
34193+ wake_up(&bend->state_wait_queue);
34194+}
34195+
34196+
34197+/* accelstate on the frontend's xenbus node has changed */
34198+static void bend_domu_accel_change(struct xenbus_watch *watch,
34199+ const char **vec, unsigned int len)
34200+{
34201+ int state;
34202+ struct netback_accel *bend;
34203+
34204+ bend = container_of(watch, struct netback_accel, domu_accel_watch);
34205+ if (bend->domu_accel_watch.node != NULL) {
34206+ struct xenbus_device *dev =
34207+ (struct xenbus_device *)bend->hdev_data;
34208+ VPRINTK("Watch matched, got dev %p otherend %p\n",
34209+ dev, dev->otherend);
34210+ /*
34211+ * dev->otherend != NULL check to protect against
34212+ * watch firing when domain goes away and we haven't
34213+ * yet cleaned up
34214+ */
34215+ if (!dev->otherend ||
34216+ !xenbus_exists(XBT_NIL, watch->node, "") ||
34217+ strncmp(dev->otherend, vec[XS_WATCH_PATH],
34218+ strlen(dev->otherend))) {
34219+ DPRINTK("Ignoring watch as otherend seems invalid\n");
34220+ return;
34221+ }
34222+
34223+ mutex_lock(&bend->bend_mutex);
34224+
34225+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
34226+ &state);
34227+ netback_accel_frontend_changed(dev, state);
34228+
34229+ mutex_unlock(&bend->bend_mutex);
34230+ }
34231+}
34232+
34233+/* Setup watch on frontend's accelstate */
34234+static int setup_domu_accel_watch(struct xenbus_device *dev,
34235+ struct netback_accel *bend)
34236+{
34237+ int err;
34238+
34239+ VPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
34240+
34241+ err = xenbus_watch_path2(dev, dev->otherend, "accelstate",
34242+ &bend->domu_accel_watch,
34243+ bend_domu_accel_change);
34244+ if (err) {
34245+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
34246+ __FUNCTION__, err);
34247+ goto fail;
34248+ }
34249+ return 0;
34250+ fail:
34251+ bend->domu_accel_watch.node = NULL;
34252+ return err;
34253+}
34254+
34255+
34256+int netback_accel_probe(struct xenbus_device *dev)
34257+{
34258+ struct netback_accel *bend;
34259+ struct backend_info *binfo;
34260+ int err;
34261+
34262+ DPRINTK("%s: passed device %s\n", __FUNCTION__, dev->nodename);
34263+
34264+ /* Allocate structure to store all our state... */
34265+ bend = kzalloc(sizeof(struct netback_accel), GFP_KERNEL);
34266+ if (bend == NULL) {
34267+ DPRINTK("%s: no memory for bend\n", __FUNCTION__);
34268+ return -ENOMEM;
34269+ }
34270+
34271+ mutex_init(&bend->bend_mutex);
34272+
34273+ mutex_lock(&bend->bend_mutex);
34274+
34275+ /* ...and store it where we can get at it */
34276+ binfo = (struct backend_info *) dev->dev.driver_data;
34277+ binfo->netback_accel_priv = bend;
34278+ /* And vice-versa */
34279+ bend->hdev_data = dev;
34280+
34281+ DPRINTK("%s: Adding bend %p to list\n", __FUNCTION__, bend);
34282+
34283+ init_waitqueue_head(&bend->state_wait_queue);
34284+ bend->vnic_is_setup = 0;
34285+ bend->frontend_state = XenbusStateUnknown;
34286+ bend->backend_state = XenbusStateClosed;
34287+ bend->removing = 0;
34288+
34289+ sscanf(dev->nodename, NODENAME_PATH_FMT, &bend->far_end,
34290+ &bend->vif_num);
34291+
34292+ err = read_nicname(dev, bend);
34293+ if (err) {
34294+ /*
34295+ * Technically not an error, just means we're not
34296+ * supposed to accelerate this
34297+ */
34298+ DPRINTK("failed to get device name\n");
34299+ goto fail_nicname;
34300+ }
34301+
34302+ /*
34303+ * Look up the device name in the list of NICs provided by
34304+ * driverlink to get the hardware type.
34305+ */
34306+ err = netback_accel_sf_hwtype(bend);
34307+ if (err) {
34308+ /*
34309+ * Technically not an error, just means we're not
34310+ * supposed to accelerate this, probably belongs to
34311+ * some other backend
34312+ */
34313+ DPRINTK("failed to match device name\n");
34314+ goto fail_init_type;
34315+ }
34316+
34317+ err = publish_frontend_name(dev);
34318+ if (err)
34319+ goto fail_publish;
34320+
34321+ err = netback_accel_debugfs_create(bend);
34322+ if (err)
34323+ goto fail_debugfs;
34324+
34325+ mutex_unlock(&bend->bend_mutex);
34326+
34327+ err = setup_config_accel_watch(dev, bend);
34328+ if (err)
34329+ goto fail_config_watch;
34330+
34331+ err = setup_domu_accel_watch(dev, bend);
34332+ if (err)
34333+ goto fail_domu_watch;
34334+
34335+ /*
34336+ * Indicate to the other end that we're ready to start unless
34337+ * the watch has already fired.
34338+ */
34339+ mutex_lock(&bend->bend_mutex);
34340+ if (bend->backend_state == XenbusStateClosed) {
34341+ bend->backend_state = XenbusStateInitialising;
34342+ net_accel_update_state(dev, XenbusStateInitialising);
34343+ }
34344+ mutex_unlock(&bend->bend_mutex);
34345+
34346+ mutex_lock(&bend_list_mutex);
34347+ link_bend(bend);
34348+ mutex_unlock(&bend_list_mutex);
34349+
34350+ return 0;
34351+
34352+fail_domu_watch:
34353+
34354+ unregister_xenbus_watch(&bend->config_accel_watch);
34355+ kfree(bend->config_accel_watch.node);
34356+fail_config_watch:
34357+
34358+ /*
34359+ * Flush the scheduled work queue before freeing bend to get
34360+ * rid of any pending netback_accel_msg_rx_handler()
34361+ */
34362+ flush_scheduled_work();
34363+
34364+ mutex_lock(&bend->bend_mutex);
34365+ net_accel_update_state(dev, XenbusStateUnknown);
34366+ netback_accel_debugfs_remove(bend);
34367+fail_debugfs:
34368+
34369+ unpublish_frontend_name(dev);
34370+fail_publish:
34371+
34372+ /* No need to reverse netback_accel_sf_hwtype. */
34373+fail_init_type:
34374+
34375+ kfree(bend->nicname);
34376+fail_nicname:
34377+ binfo->netback_accel_priv = NULL;
34378+ mutex_unlock(&bend->bend_mutex);
34379+ kfree(bend);
34380+ return err;
34381+}
34382+
34383+
34384+int netback_accel_remove(struct xenbus_device *dev)
34385+{
34386+ struct backend_info *binfo;
34387+ struct netback_accel *bend;
34388+ int frontend_state;
34389+
34390+ binfo = (struct backend_info *) dev->dev.driver_data;
34391+ bend = (struct netback_accel *) binfo->netback_accel_priv;
34392+
34393+ DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend);
34394+
34395+ BUG_ON(bend == NULL);
34396+
34397+ mutex_lock(&bend_list_mutex);
34398+ unlink_bend(bend);
34399+ mutex_unlock(&bend_list_mutex);
34400+
34401+ mutex_lock(&bend->bend_mutex);
34402+
34403+ /* Reject any requests to connect. */
34404+ bend->removing = 1;
34405+
34406+ /*
34407+ * Switch to closing to tell the other end that we're going
34408+ * away.
34409+ */
34410+ if (bend->backend_state != XenbusStateClosing) {
34411+ bend->backend_state = XenbusStateClosing;
34412+ net_accel_update_state(dev, XenbusStateClosing);
34413+ }
34414+
34415+ frontend_state = (int)XenbusStateUnknown;
34416+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
34417+ &frontend_state);
34418+
34419+ mutex_unlock(&bend->bend_mutex);
34420+
34421+ /*
34422+ * Wait until this end goes to the closed state. This happens
34423+ * in response to the other end going to the closed state.
34424+ * Don't bother doing this if the other end is already closed
34425+ * because if it is then there is nothing to do.
34426+ */
34427+ if (frontend_state != (int)XenbusStateClosed &&
34428+ frontend_state != (int)XenbusStateUnknown)
34429+ wait_event(bend->state_wait_queue,
34430+ bend->backend_state == XenbusStateClosed);
34431+
34432+ unregister_xenbus_watch(&bend->domu_accel_watch);
34433+ kfree(bend->domu_accel_watch.node);
34434+
34435+ unregister_xenbus_watch(&bend->config_accel_watch);
34436+ kfree(bend->config_accel_watch.node);
34437+
34438+ /*
34439+ * Flush the scheduled work queue before freeing bend to get
34440+ * rid of any pending netback_accel_msg_rx_handler()
34441+ */
34442+ flush_scheduled_work();
34443+
34444+ mutex_lock(&bend->bend_mutex);
34445+
34446+ /* Tear down the vnic if it was set up. */
34447+ if (bend->vnic_is_setup) {
34448+ bend->vnic_is_setup = 0;
34449+ cleanup_vnic(bend);
34450+ }
34451+
34452+ bend->backend_state = XenbusStateUnknown;
34453+ net_accel_update_state(dev, XenbusStateUnknown);
34454+
34455+ netback_accel_debugfs_remove(bend);
34456+
34457+ unpublish_frontend_name(dev);
34458+
34459+ kfree(bend->nicname);
34460+
34461+ binfo->netback_accel_priv = NULL;
34462+
34463+ mutex_unlock(&bend->bend_mutex);
34464+
34465+ kfree(bend);
34466+
34467+ return 0;
34468+}
34469+
34470+
34471+void netback_accel_shutdown_bends(void)
34472+{
34473+ mutex_lock(&bend_list_mutex);
34474+ /*
34475+ * I think we should have had a remove callback for all
34476+ * interfaces before being allowed to unload the module
34477+ */
34478+ BUG_ON(bend_list != NULL);
34479+ mutex_unlock(&bend_list_mutex);
34480+}
34481+
34482+
34483+void netback_accel_set_closing(struct netback_accel *bend)
34484+{
34485+
34486+ bend->backend_state = XenbusStateClosing;
34487+ net_accel_update_state((struct xenbus_device *)bend->hdev_data,
34488+ XenbusStateClosing);
34489+}
34490Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat.h
34491===================================================================
34492--- /dev/null 1970-01-01 00:00:00.000000000 +0000
34493+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat.h 2008-02-20 09:32:49.000000000 +0100
34494@@ -0,0 +1,53 @@
34495+/****************************************************************************
34496+ * Copyright 2002-2005: Level 5 Networks Inc.
34497+ * Copyright 2005-2008: Solarflare Communications Inc,
34498+ * 9501 Jeronimo Road, Suite 250,
34499+ * Irvine, CA 92618, USA
34500+ *
34501+ * Maintained by Solarflare Communications
34502+ * <linux-xen-drivers@solarflare.com>
34503+ * <onload-dev@solarflare.com>
34504+ *
34505+ * This program is free software; you can redistribute it and/or modify it
34506+ * under the terms of the GNU General Public License version 2 as published
34507+ * by the Free Software Foundation, incorporated herein by reference.
34508+ *
34509+ * This program is distributed in the hope that it will be useful,
34510+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34511+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34512+ * GNU General Public License for more details.
34513+ *
34514+ * You should have received a copy of the GNU General Public License
34515+ * along with this program; if not, write to the Free Software
34516+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34517+ ****************************************************************************
34518+ */
34519+
34520+/*
34521+ * \author djr
34522+ * \brief Compatability layer. Provides definitions of fundamental
34523+ * types and definitions that are used throughout CI source
34524+ * code. It does not introduce any link time dependencies,
34525+ * or include any unnecessary system headers.
34526+ */
34527+/*! \cidoxg_include_ci */
34528+
34529+#ifndef __CI_COMPAT_H__
34530+#define __CI_COMPAT_H__
34531+
34532+#ifdef __cplusplus
34533+extern "C" {
34534+#endif
34535+
34536+#include <ci/compat/primitive.h>
34537+#include <ci/compat/sysdep.h>
34538+#include <ci/compat/utils.h>
34539+
34540+
34541+#ifdef __cplusplus
34542+}
34543+#endif
34544+
34545+#endif /* __CI_COMPAT_H__ */
34546+
34547+/*! \cidoxg_end */
34548Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc.h
34549===================================================================
34550--- /dev/null 1970-01-01 00:00:00.000000000 +0000
34551+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc.h 2008-02-20 09:32:49.000000000 +0100
34552@@ -0,0 +1,158 @@
34553+/****************************************************************************
34554+ * Copyright 2002-2005: Level 5 Networks Inc.
34555+ * Copyright 2005-2008: Solarflare Communications Inc,
34556+ * 9501 Jeronimo Road, Suite 250,
34557+ * Irvine, CA 92618, USA
34558+ *
34559+ * Maintained by Solarflare Communications
34560+ * <linux-xen-drivers@solarflare.com>
34561+ * <onload-dev@solarflare.com>
34562+ *
34563+ * This program is free software; you can redistribute it and/or modify it
34564+ * under the terms of the GNU General Public License version 2 as published
34565+ * by the Free Software Foundation, incorporated herein by reference.
34566+ *
34567+ * This program is distributed in the hope that it will be useful,
34568+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34569+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34570+ * GNU General Public License for more details.
34571+ *
34572+ * You should have received a copy of the GNU General Public License
34573+ * along with this program; if not, write to the Free Software
34574+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34575+ ****************************************************************************
34576+ */
34577+
34578+/*! \cidoxg_include_ci_compat */
34579+
34580+#ifndef __CI_COMPAT_GCC_H__
34581+#define __CI_COMPAT_GCC_H__
34582+
34583+
34584+#define CI_HAVE_INT64
34585+
34586+
34587+#if defined(__linux__) && defined(__KERNEL__)
34588+
34589+# include <linux/types.h>
34590+
34591+typedef __u64 ci_uint64;
34592+typedef __s64 ci_int64;
34593+# if BITS_PER_LONG == 32
34594+typedef __s32 ci_ptr_arith_t;
34595+typedef __u32 ci_uintptr_t;
34596+# else
34597+typedef __s64 ci_ptr_arith_t;
34598+typedef __u64 ci_uintptr_t;
34599+# endif
34600+
34601+
34602+/* it's not obvious to me why the below is wrong for x64_64, but
34603+ * gcc seems to complain on this platform
34604+ */
34605+# if defined(__ia64__)
34606+# define CI_PRId64 "ld"
34607+# define CI_PRIi64 "li"
34608+# define CI_PRIo64 "lo"
34609+# define CI_PRIu64 "lu"
34610+# define CI_PRIx64 "lx"
34611+# define CI_PRIX64 "lX"
34612+# else
34613+# define CI_PRId64 "lld"
34614+# define CI_PRIi64 "lli"
34615+# define CI_PRIo64 "llo"
34616+# define CI_PRIu64 "llu"
34617+# define CI_PRIx64 "llx"
34618+# define CI_PRIX64 "llX"
34619+# endif
34620+
34621+# define CI_PRId32 "d"
34622+# define CI_PRIi32 "i"
34623+# define CI_PRIo32 "o"
34624+# define CI_PRIu32 "u"
34625+# define CI_PRIx32 "x"
34626+# define CI_PRIX32 "X"
34627+
34628+#else
34629+
34630+# include <stdint.h>
34631+# include <inttypes.h>
34632+
34633+typedef uint64_t ci_uint64;
34634+typedef int64_t ci_int64;
34635+typedef intptr_t ci_ptr_arith_t;
34636+typedef uintptr_t ci_uintptr_t;
34637+
34638+# define CI_PRId64 PRId64
34639+# define CI_PRIi64 PRIi64
34640+# define CI_PRIo64 PRIo64
34641+# define CI_PRIu64 PRIu64
34642+# define CI_PRIx64 PRIx64
34643+# define CI_PRIX64 PRIX64
34644+
34645+# define CI_PRId32 PRId32
34646+# define CI_PRIi32 PRIi32
34647+# define CI_PRIo32 PRIo32
34648+# define CI_PRIu32 PRIu32
34649+# define CI_PRIx32 PRIx32
34650+# define CI_PRIX32 PRIX32
34651+
34652+#endif
34653+
34654+
34655+typedef ci_uint64 ci_fixed_descriptor_t;
34656+
34657+#define from_fixed_descriptor(desc) ((ci_uintptr_t)(desc))
34658+#define to_fixed_descriptor(desc) ((ci_fixed_descriptor_t)(ci_uintptr_t)(desc))
34659+
34660+
34661+#if __GNUC__ >= 3 && !defined(__cplusplus)
34662+/*
34663+** Checks that [p_mbr] has the same type as [&c_type::mbr_name].
34664+*/
34665+# define CI_CONTAINER(c_type, mbr_name, p_mbr) \
34666+ __builtin_choose_expr( \
34667+ __builtin_types_compatible_p(__typeof__(&((c_type*)0)->mbr_name), \
34668+ __typeof__(p_mbr)), \
34669+ __CI_CONTAINER(c_type, mbr_name, p_mbr), (void)0)
34670+
34671+# define ci_restrict __restrict__
34672+#endif
34673+
34674+
34675+#if !defined(__KERNEL__) || defined(__unix__)
34676+#define CI_HAVE_NPRINTF 1
34677+#endif
34678+
34679+
34680+/* At what version was this introduced? */
34681+#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91)
34682+# define CI_LIKELY(t) __builtin_expect((t), 1)
34683+# define CI_UNLIKELY(t) __builtin_expect((t), 0)
34684+#endif
34685+
34686+/**********************************************************************
34687+ * Attributes
34688+ */
34689+#if __GNUC__ >= 3 && defined(NDEBUG)
34690+# define CI_HF __attribute__((visibility("hidden")))
34691+# define CI_HV __attribute__((visibility("hidden")))
34692+#else
34693+# define CI_HF
34694+# define CI_HV
34695+#endif
34696+
34697+#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
34698+# define ci_noinline static __attribute__((__noinline__))
34699+/* (Linux 2.6 defines its own "noinline", so we use the "__noinline__" form) */
34700+#else
34701+# define ci_noinline static
34702+#endif
34703+
34704+#define CI_ALIGN(x) __attribute__ ((aligned (x)))
34705+
34706+#define CI_PRINTF_LIKE(a,b) __attribute__((format(printf,a,b)))
34707+
34708+#endif /* __CI_COMPAT_GCC_H__ */
34709+
34710+/*! \cidoxg_end */
34711Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc_x86.h
34712===================================================================
34713--- /dev/null 1970-01-01 00:00:00.000000000 +0000
34714+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc_x86.h 2008-02-20 09:32:49.000000000 +0100
34715@@ -0,0 +1,115 @@
34716+/****************************************************************************
34717+ * Copyright 2002-2005: Level 5 Networks Inc.
34718+ * Copyright 2005-2008: Solarflare Communications Inc,
34719+ * 9501 Jeronimo Road, Suite 250,
34720+ * Irvine, CA 92618, USA
34721+ *
34722+ * Maintained by Solarflare Communications
34723+ * <linux-xen-drivers@solarflare.com>
34724+ * <onload-dev@solarflare.com>
34725+ *
34726+ * This program is free software; you can redistribute it and/or modify it
34727+ * under the terms of the GNU General Public License version 2 as published
34728+ * by the Free Software Foundation, incorporated herein by reference.
34729+ *
34730+ * This program is distributed in the hope that it will be useful,
34731+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34732+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34733+ * GNU General Public License for more details.
34734+ *
34735+ * You should have received a copy of the GNU General Public License
34736+ * along with this program; if not, write to the Free Software
34737+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34738+ ****************************************************************************
34739+ */
34740+
34741+/*! \cidoxg_include_ci_compat */
34742+
34743+#ifndef __CI_COMPAT_GCC_X86_H__
34744+#define __CI_COMPAT_GCC_X86_H__
34745+
34746+/*
34747+** The facts:
34748+**
34749+** SSE sfence
34750+** SSE2 lfence, mfence, pause
34751+*/
34752+
34753+/*
34754+ Barriers to enforce ordering with respect to:
34755+
34756+ normal memory use: ci_wmb, ci_rmb, ci_wmb
34757+ IO bus access use: ci_wiob, ci_riob, ci_iob
34758+*/
34759+#if defined(__x86_64__)
34760+# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
34761+#else
34762+# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
34763+#endif
34764+
34765+/* ?? measure the impact of latency of sfence on a modern processor before we
34766+ take a decision on how to integrate with respect to writecombining */
34767+
34768+/* DJR: I don't think we need to add "memory" here. It means the asm does
34769+** something to memory that GCC doesn't understand. But all this does is
34770+** commit changes that GCC thinks have already happened. NB. GCC will not
34771+** reorder across a __volatile__ __asm__ anyway.
34772+*/
34773+#define ci_gcc_fence() __asm__ __volatile__ ("")
34774+
34775+#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
34776+# define ci_x86_sfence() __asm__ __volatile__ ("sfence")
34777+# define ci_x86_lfence() __asm__ __volatile__ ("lfence")
34778+# define ci_x86_mfence() __asm__ __volatile__ ("mfence")
34779+#else
34780+# define ci_x86_sfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
34781+# define ci_x86_lfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xE8")
34782+# define ci_x86_mfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF0")
34783+#endif
34784+
34785+
34786+/* x86 processors to P4 Xeon store in-order unless executing streaming
34787+ extensions or when using writecombining
34788+
34789+ Hence we do not define ci_wmb to use sfence by default. Requirement is that
34790+ we do not use writecombining to memory and any code which uses SSE
34791+ extensions must call sfence directly
34792+
34793+ We need to track non intel clones which may support out of order store.
34794+
34795+*/
34796+
34797+#if CI_CPU_OOS
34798+# if CI_CPU_HAS_SSE
34799+# define ci_wmb() ci_x86_sfence()
34800+# else
34801+# define ci_wmb() ci_x86_mb()
34802+# endif
34803+#else
34804+# define ci_wmb() ci_gcc_fence()
34805+#endif
34806+
34807+#if CI_CPU_HAS_SSE2
34808+# define ci_rmb() ci_x86_lfence()
34809+# define ci_mb() ci_x86_mfence()
34810+# define ci_riob() ci_x86_lfence()
34811+# define ci_wiob() ci_x86_sfence()
34812+# define ci_iob() ci_x86_mfence()
34813+#else
34814+# if CI_CPU_HAS_SSE
34815+# define ci_wiob() ci_x86_sfence()
34816+# else
34817+# define ci_wiob() ci_x86_mb()
34818+# endif
34819+# define ci_rmb() ci_x86_mb()
34820+# define ci_mb() ci_x86_mb()
34821+# define ci_riob() ci_x86_mb()
34822+# define ci_iob() ci_x86_mb()
34823+#endif
34824+
34825+typedef unsigned long ci_phys_addr_t;
34826+#define ci_phys_addr_fmt "%lx"
34827+
34828+#endif /* __CI_COMPAT_GCC_X86_H__ */
34829+
34830+/*! \cidoxg_end */
34831Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/primitive.h
34832===================================================================
34833--- /dev/null 1970-01-01 00:00:00.000000000 +0000
34834+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/primitive.h 2008-02-20 09:32:49.000000000 +0100
34835@@ -0,0 +1,77 @@
34836+/****************************************************************************
34837+ * Copyright 2002-2005: Level 5 Networks Inc.
34838+ * Copyright 2005-2008: Solarflare Communications Inc,
34839+ * 9501 Jeronimo Road, Suite 250,
34840+ * Irvine, CA 92618, USA
34841+ *
34842+ * Maintained by Solarflare Communications
34843+ * <linux-xen-drivers@solarflare.com>
34844+ * <onload-dev@solarflare.com>
34845+ *
34846+ * This program is free software; you can redistribute it and/or modify it
34847+ * under the terms of the GNU General Public License version 2 as published
34848+ * by the Free Software Foundation, incorporated herein by reference.
34849+ *
34850+ * This program is distributed in the hope that it will be useful,
34851+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34852+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34853+ * GNU General Public License for more details.
34854+ *
34855+ * You should have received a copy of the GNU General Public License
34856+ * along with this program; if not, write to the Free Software
34857+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34858+ ****************************************************************************
34859+ */
34860+/*! \cidoxg_include_ci_compat */
34861+
34862+#ifndef __CI_COMPAT_PRIMITIVE_H__
34863+#define __CI_COMPAT_PRIMITIVE_H__
34864+
34865+
34866+/**********************************************************************
34867+ * Primitive types.
34868+ */
34869+
34870+typedef unsigned char ci_uint8;
34871+typedef char ci_int8;
34872+
34873+typedef unsigned short ci_uint16;
34874+typedef short ci_int16;
34875+
34876+typedef unsigned int ci_uint32;
34877+typedef int ci_int32;
34878+
34879+/* 64-bit support is platform dependent. */
34880+
34881+
34882+/**********************************************************************
34883+ * Other fancy types.
34884+ */
34885+
34886+typedef ci_uint8 ci_octet;
34887+
34888+typedef enum {
34889+ CI_FALSE = 0,
34890+ CI_TRUE
34891+} ci_boolean_t;
34892+
34893+
34894+/**********************************************************************
34895+ * Some nice types you'd always assumed were standards.
34896+ * (Really, they are SYSV "standards".)
34897+ */
34898+
34899+#ifdef _WIN32
34900+typedef unsigned long ulong;
34901+typedef unsigned int uint;
34902+typedef char* caddr_t;
34903+#elif defined(__linux__) && defined(__KERNEL__)
34904+#include <linux/types.h>
34905+#elif defined(__linux__)
34906+#include <sys/types.h>
34907+#endif
34908+
34909+
34910+#endif /* __CI_COMPAT_PRIMITIVE_H__ */
34911+
34912+/*! \cidoxg_end */
34913Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/sysdep.h
34914===================================================================
34915--- /dev/null 1970-01-01 00:00:00.000000000 +0000
34916+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/sysdep.h 2008-02-20 09:32:49.000000000 +0100
34917@@ -0,0 +1,166 @@
34918+/****************************************************************************
34919+ * Copyright 2002-2005: Level 5 Networks Inc.
34920+ * Copyright 2005-2008: Solarflare Communications Inc,
34921+ * 9501 Jeronimo Road, Suite 250,
34922+ * Irvine, CA 92618, USA
34923+ *
34924+ * Maintained by Solarflare Communications
34925+ * <linux-xen-drivers@solarflare.com>
34926+ * <onload-dev@solarflare.com>
34927+ *
34928+ * This program is free software; you can redistribute it and/or modify it
34929+ * under the terms of the GNU General Public License version 2 as published
34930+ * by the Free Software Foundation, incorporated herein by reference.
34931+ *
34932+ * This program is distributed in the hope that it will be useful,
34933+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34934+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34935+ * GNU General Public License for more details.
34936+ *
34937+ * You should have received a copy of the GNU General Public License
34938+ * along with this program; if not, write to the Free Software
34939+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34940+ ****************************************************************************
34941+ */
34942+
34943+/*! \cidoxg_include_ci_compat */
34944+
34945+#ifndef __CI_COMPAT_SYSDEP_H__
34946+#define __CI_COMPAT_SYSDEP_H__
34947+
34948+
34949+/**********************************************************************
34950+ * Platform definition fixups.
34951+ */
34952+
34953+#if defined(__ci_ul_driver__) && !defined(__ci_driver__)
34954+# define __ci_driver__
34955+#endif
34956+
34957+#if defined(__ci_driver__) && !defined(__ci_ul_driver__) && \
34958+ !defined(__KERNEL__)
34959+# define __KERNEL__
34960+#endif
34961+
34962+
34963+/**********************************************************************
34964+ * Sanity checks (no cheating!)
34965+ */
34966+
34967+#if defined(__KERNEL__) && !defined(__ci_driver__)
34968+# error Insane.
34969+#endif
34970+
34971+#if defined(__KERNEL__) && defined(__ci_ul_driver__)
34972+# error Madness.
34973+#endif
34974+
34975+#if defined(__unix__) && defined(_WIN32)
34976+# error Strange.
34977+#endif
34978+
34979+#if defined(__GNUC__) && defined(_MSC_VER)
34980+# error Crazy.
34981+#endif
34982+
34983+
34984+/**********************************************************************
34985+ * Compiler and processor dependencies.
34986+ */
34987+
34988+#if defined(__GNUC__)
34989+
34990+# include <ci/compat/gcc.h>
34991+
34992+# if defined(__i386__)
34993+# include <ci/compat/x86.h>
34994+# include <ci/compat/gcc_x86.h>
34995+# elif defined(__x86_64__)
34996+# include <ci/compat/x86_64.h>
34997+# include <ci/compat/gcc_x86.h>
34998+# elif defined(__PPC__)
34999+# include <ci/compat/ppc.h>
35000+# include <ci/compat/gcc_ppc.h>
35001+# elif defined(__ia64__)
35002+# include <ci/compat/ia64.h>
35003+# include <ci/compat/gcc_ia64.h>
35004+# else
35005+# error Unknown processor - GNU C
35006+# endif
35007+
35008+#elif defined(_MSC_VER)
35009+
35010+# include <ci/compat/msvc.h>
35011+
35012+# if defined(__i386__)
35013+# include <ci/compat/x86.h>
35014+# include <ci/compat/msvc_x86.h>
35015+# elif defined(__x86_64__)
35016+# include <ci/compat/x86_64.h>
35017+# include <ci/compat/msvc_x86_64.h>
35018+# else
35019+# error Unknown processor MSC
35020+# endif
35021+
35022+#elif defined(__PGI)
35023+
35024+# include <ci/compat/x86.h>
35025+# include <ci/compat/pg_x86.h>
35026+
35027+#elif defined(__INTEL_COMPILER)
35028+
35029+/* Intel compilers v7 claim to be very gcc compatible. */
35030+# if __INTEL_COMPILER >= 700
35031+# include <ci/compat/gcc.h>
35032+# include <ci/compat/x86.h>
35033+# include <ci/compat/gcc_x86.h>
35034+# else
35035+# error Old Intel compiler not supported. Yet.
35036+# endif
35037+
35038+#else
35039+# error Unknown compiler.
35040+#endif
35041+
35042+
35043+/**********************************************************************
35044+ * Misc stuff (that probably shouldn't be here).
35045+ */
35046+
35047+#ifdef __sun
35048+# ifdef __KERNEL__
35049+# define _KERNEL
35050+# define _SYSCALL32
35051+# ifdef _LP64
35052+# define _SYSCALL32_IMPL
35053+# endif
35054+# else
35055+# define _REENTRANT
35056+# endif
35057+#endif
35058+
35059+
35060+/**********************************************************************
35061+ * Defaults for anything left undefined.
35062+ */
35063+
35064+#ifndef CI_LIKELY
35065+# define CI_LIKELY(t) (t)
35066+# define CI_UNLIKELY(t) (t)
35067+#endif
35068+
35069+#ifndef ci_restrict
35070+# define ci_restrict
35071+#endif
35072+
35073+#ifndef ci_inline
35074+# define ci_inline static inline
35075+#endif
35076+
35077+#ifndef ci_noinline
35078+# define ci_noinline static
35079+#endif
35080+
35081+#endif /* __CI_COMPAT_SYSDEP_H__ */
35082+
35083+/*! \cidoxg_end */
35084Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/utils.h
35085===================================================================
35086--- /dev/null 1970-01-01 00:00:00.000000000 +0000
35087+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/utils.h 2008-02-20 09:32:49.000000000 +0100
35088@@ -0,0 +1,269 @@
35089+/****************************************************************************
35090+ * Copyright 2002-2005: Level 5 Networks Inc.
35091+ * Copyright 2005-2008: Solarflare Communications Inc,
35092+ * 9501 Jeronimo Road, Suite 250,
35093+ * Irvine, CA 92618, USA
35094+ *
35095+ * Maintained by Solarflare Communications
35096+ * <linux-xen-drivers@solarflare.com>
35097+ * <onload-dev@solarflare.com>
35098+ *
35099+ * This program is free software; you can redistribute it and/or modify it
35100+ * under the terms of the GNU General Public License version 2 as published
35101+ * by the Free Software Foundation, incorporated herein by reference.
35102+ *
35103+ * This program is distributed in the hope that it will be useful,
35104+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35105+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35106+ * GNU General Public License for more details.
35107+ *
35108+ * You should have received a copy of the GNU General Public License
35109+ * along with this program; if not, write to the Free Software
35110+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35111+ ****************************************************************************
35112+ */
35113+
35114+/*
35115+ * \author djr
35116+ * \brief Handy utility macros.
35117+ * \date 2003/01/17
35118+ */
35119+
35120+/*! \cidoxg_include_ci_compat */
35121+
35122+#ifndef __CI_COMPAT_UTILS_H__
35123+#define __CI_COMPAT_UTILS_H__
35124+
35125+
35126+/**********************************************************************
35127+ * Alignment -- [align] must be a power of 2.
35128+ **********************************************************************/
35129+
35130+ /*! Align forward onto next boundary. */
35131+
35132+#define CI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u))
35133+
35134+
35135+ /*! Align back onto prev boundary. */
35136+
35137+#define CI_ALIGN_BACK(p, align) ((p) & ~((align)-1u))
35138+
35139+
35140+ /*! How far to next boundary? */
35141+
35142+#define CI_ALIGN_NEEDED(p, align, signed_t) (-(signed_t)(p) & ((align)-1u))
35143+
35144+
35145+ /*! How far beyond prev boundary? */
35146+
35147+#define CI_OFFSET(p, align) ((p) & ((align)-1u))
35148+
35149+
35150+ /*! Does object fit in gap before next boundary? */
35151+
35152+#define CI_FITS(p, size, align, signed_t) \
35153+ (CI_ALIGN_NEEDED((p) + 1, (align), signed_t) + 1 >= (size))
35154+
35155+
35156+ /*! Align forward onto next boundary. */
35157+
35158+#define CI_PTR_ALIGN_FWD(p, align) \
35159+ ((char*) CI_ALIGN_FWD(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))
35160+
35161+ /*! Align back onto prev boundary. */
35162+
35163+#define CI_PTR_ALIGN_BACK(p, align) \
35164+ ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))
35165+
35166+ /*! How far to next boundary? */
35167+
35168+#define CI_PTR_ALIGN_NEEDED(p, align) \
35169+ CI_ALIGN_NEEDED(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)), \
35170+ ci_ptr_arith_t)
35171+
35172+ /*! How far to next boundary? NZ = not zero i.e. give align if on boundary */
35173+
35174+#define CI_PTR_ALIGN_NEEDED_NZ(p, align) \
35175+ ((align) - (((char*)p) - \
35176+ ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))))
35177+
35178+ /*! How far beyond prev boundary? */
35179+
35180+#define CI_PTR_OFFSET(p, align) \
35181+ CI_OFFSET(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))
35182+
35183+
35184+ /* Same as CI_ALIGN_FWD and CI_ALIGN_BACK. */
35185+
35186+#define CI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u))
35187+
35188+#define CI_ROUND_DOWN(i, align) ((i) & ~((align)-1u))
35189+
35190+
35191+/**********************************************************************
35192+ * Byte-order
35193+ **********************************************************************/
35194+
35195+/* These are not flags. They are enumeration values for use with
35196+ * CI_MY_BYTE_ORDER. */
35197+#define CI_BIG_ENDIAN 1
35198+#define CI_LITTLE_ENDIAN 0
35199+
35200+/*
35201+** Note that these byte-swapping primitives may leave junk in bits above
35202+** the range they operate on.
35203+**
35204+** The CI_BSWAP_nn() routines require that bits above [nn] are zero. Use
35205+** CI_BSWAPM_nn(x) if this cannot be guaranteed.
35206+*/
35207+
35208+/* ?? May be able to improve on some of these with inline assembler on some
35209+** platforms.
35210+*/
35211+
35212+#define CI_BSWAP_16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
35213+#define CI_BSWAPM_16(v) ((((v) & 0xff) << 8) | (((v) & 0xff00) >> 8))
35214+
35215+#define CI_BSWAP_32(v) (((v) >> 24) | \
35216+ (((v) & 0x00ff0000) >> 8) | \
35217+ (((v) & 0x0000ff00) << 8) | \
35218+ ((v) << 24))
35219+#define CI_BSWAPM_32(v) ((((v) & 0xff000000) >> 24) | \
35220+ (((v) & 0x00ff0000) >> 8) | \
35221+ (((v) & 0x0000ff00) << 8) | \
35222+ ((v) << 24))
35223+
35224+#define CI_BSWAP_64(v) (((v) >> 56) | \
35225+ (((v) & 0x00ff000000000000) >> 40) | \
35226+ (((v) & 0x0000ff0000000000) >> 24) | \
35227+ (((v) & 0x000000ff00000000) >> 8) | \
35228+ (((v) & 0x00000000ff000000) << 8) | \
35229+ (((v) & 0x0000000000ff0000) << 24) | \
35230+ (((v) & 0x000000000000ff00) << 40) | \
35231+ ((v) << 56))
35232+
35233+# define CI_BSWAPPED_16_IF(c,v) ((c) ? CI_BSWAP_16(v) : (v))
35234+# define CI_BSWAPPED_32_IF(c,v) ((c) ? CI_BSWAP_32(v) : (v))
35235+# define CI_BSWAPPED_64_IF(c,v) ((c) ? CI_BSWAP_64(v) : (v))
35236+# define CI_BSWAP_16_IF(c,v) do{ if((c)) (v) = CI_BSWAP_16(v); }while(0)
35237+# define CI_BSWAP_32_IF(c,v) do{ if((c)) (v) = CI_BSWAP_32(v); }while(0)
35238+# define CI_BSWAP_64_IF(c,v) do{ if((c)) (v) = CI_BSWAP_64(v); }while(0)
35239+
35240+#if (CI_MY_BYTE_ORDER == CI_LITTLE_ENDIAN)
35241+# define CI_BSWAP_LE16(v) (v)
35242+# define CI_BSWAP_LE32(v) (v)
35243+# define CI_BSWAP_LE64(v) (v)
35244+# define CI_BSWAP_BE16(v) CI_BSWAP_16(v)
35245+# define CI_BSWAP_BE32(v) CI_BSWAP_32(v)
35246+# define CI_BSWAP_BE64(v) CI_BSWAP_64(v)
35247+# define CI_BSWAPM_LE16(v) (v)
35248+# define CI_BSWAPM_LE32(v) (v)
35249+# define CI_BSWAPM_LE64(v) (v)
35250+# define CI_BSWAPM_BE16(v) CI_BSWAPM_16(v)
35251+# define CI_BSWAPM_BE32(v) CI_BSWAPM_32(v)
35252+#elif (CI_MY_BYTE_ORDER == CI_BIG_ENDIAN)
35253+# define CI_BSWAP_BE16(v) (v)
35254+# define CI_BSWAP_BE32(v) (v)
35255+# define CI_BSWAP_BE64(v) (v)
35256+# define CI_BSWAP_LE16(v) CI_BSWAP_16(v)
35257+# define CI_BSWAP_LE32(v) CI_BSWAP_32(v)
35258+# define CI_BSWAP_LE64(v) CI_BSWAP_64(v)
35259+# define CI_BSWAPM_BE16(v) (v)
35260+# define CI_BSWAPM_BE32(v) (v)
35261+# define CI_BSWAPM_BE64(v) (v)
35262+# define CI_BSWAPM_LE16(v) CI_BSWAPM_16(v)
35263+# define CI_BSWAPM_LE32(v) CI_BSWAPM_32(v)
35264+#else
35265+# error Bad endian.
35266+#endif
35267+
35268+
35269+/**********************************************************************
35270+ * Get pointer to struct from pointer to member
35271+ **********************************************************************/
35272+
35273+#define CI_MEMBER_OFFSET(c_type, mbr_name) \
35274+ ((ci_uint32) (ci_uintptr_t)(&((c_type*)0)->mbr_name))
35275+
35276+#define CI_MEMBER_SIZE(c_type, mbr_name) \
35277+ sizeof(((c_type*)0)->mbr_name)
35278+
35279+#define __CI_CONTAINER(c_type, mbr_name, p_mbr) \
35280+ ( (c_type*) ((char*)(p_mbr) - CI_MEMBER_OFFSET(c_type, mbr_name)) )
35281+
35282+#ifndef CI_CONTAINER
35283+# define CI_CONTAINER(t,m,p) __CI_CONTAINER(t,m,p)
35284+#endif
35285+
35286+
35287+/**********************************************************************
35288+ * Structure member initialiser.
35289+ **********************************************************************/
35290+
35291+#ifndef CI_STRUCT_MBR
35292+# define CI_STRUCT_MBR(name, val) .name = val
35293+#endif
35294+
35295+
35296+/**********************************************************************
35297+ * min / max
35298+ **********************************************************************/
35299+
35300+#define CI_MIN(x,y) (((x) < (y)) ? (x) : (y))
35301+#define CI_MAX(x,y) (((x) > (y)) ? (x) : (y))
35302+
35303+/**********************************************************************
35304+ * abs
35305+ **********************************************************************/
35306+
35307+#define CI_ABS(x) (((x) < 0) ? -(x) : (x))
35308+
35309+/**********************************************************************
35310+ * Conditional debugging
35311+ **********************************************************************/
35312+
35313+#ifdef NDEBUG
35314+# define CI_DEBUG(x)
35315+# define CI_NDEBUG(x) x
35316+# define CI_IF_DEBUG(y,n) (n)
35317+# define CI_DEBUG_ARG(x)
35318+#else
35319+# define CI_DEBUG(x) x
35320+# define CI_NDEBUG(x)
35321+# define CI_IF_DEBUG(y,n) (y)
35322+# define CI_DEBUG_ARG(x) ,x
35323+#endif
35324+
35325+#ifdef __KERNEL__
35326+#define CI_KERNEL_ARG(x) ,x
35327+#else
35328+#define CI_KERNEL_ARG(x)
35329+#endif
35330+
35331+#ifdef _WIN32
35332+# define CI_KERNEL_ARG_WIN(x) CI_KERNEL_ARG(x)
35333+# define CI_ARG_WIN(x) ,x
35334+#else
35335+# define CI_KERNEL_ARG_WIN(x)
35336+# define CI_ARG_WIN(x)
35337+#endif
35338+
35339+#ifdef __unix__
35340+# define CI_KERNEL_ARG_UNIX(x) CI_KERNEL_ARG(x)
35341+# define CI_ARG_UNIX(x) ,x
35342+#else
35343+# define CI_KERNEL_ARG_UNIX(x)
35344+# define CI_ARG_UNIX(x)
35345+#endif
35346+
35347+#ifdef __linux__
35348+# define CI_KERNEL_ARG_LINUX(x) CI_KERNEL_ARG(x)
35349+# define CI_ARG_LINUX(x) ,x
35350+#else
35351+# define CI_KERNEL_ARG_LINUX(x)
35352+# define CI_ARG_LINUX(x)
35353+#endif
35354+
35355+
35356+#endif /* __CI_COMPAT_UTILS_H__ */
35357+/*! \cidoxg_end */
35358Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86.h
35359===================================================================
35360--- /dev/null 1970-01-01 00:00:00.000000000 +0000
35361+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86.h 2008-02-20 09:32:49.000000000 +0100
35362@@ -0,0 +1,48 @@
35363+/****************************************************************************
35364+ * Copyright 2002-2005: Level 5 Networks Inc.
35365+ * Copyright 2005-2008: Solarflare Communications Inc,
35366+ * 9501 Jeronimo Road, Suite 250,
35367+ * Irvine, CA 92618, USA
35368+ *
35369+ * Maintained by Solarflare Communications
35370+ * <linux-xen-drivers@solarflare.com>
35371+ * <onload-dev@solarflare.com>
35372+ *
35373+ * This program is free software; you can redistribute it and/or modify it
35374+ * under the terms of the GNU General Public License version 2 as published
35375+ * by the Free Software Foundation, incorporated herein by reference.
35376+ *
35377+ * This program is distributed in the hope that it will be useful,
35378+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35379+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35380+ * GNU General Public License for more details.
35381+ *
35382+ * You should have received a copy of the GNU General Public License
35383+ * along with this program; if not, write to the Free Software
35384+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35385+ ****************************************************************************
35386+ */
35387+
35388+/*! \cidoxg_include_ci_compat */
35389+
35390+#ifndef __CI_COMPAT_X86_H__
35391+#define __CI_COMPAT_X86_H__
35392+
35393+
35394+#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN
35395+
35396+#define CI_WORD_SIZE 4
35397+#define CI_PTR_SIZE 4
35398+
35399+#define CI_PAGE_SIZE 4096
35400+#define CI_PAGE_SHIFT 12
35401+#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1))
35402+
35403+#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */
35404+#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */
35405+#define CI_CPU_OOS 0 /* CPU does out of order stores */
35406+
35407+
35408+#endif /* __CI_COMPAT_X86_H__ */
35409+
35410+/*! \cidoxg_end */
35411Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86_64.h
35412===================================================================
35413--- /dev/null 1970-01-01 00:00:00.000000000 +0000
35414+++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86_64.h 2008-02-20 09:32:49.000000000 +0100
35415@@ -0,0 +1,54 @@
35416+/****************************************************************************
35417+ * Copyright 2002-2005: Level 5 Networks Inc.
35418+ * Copyright 2005-2008: Solarflare Communications Inc,
35419+ * 9501 Jeronimo Road, Suite 250,
35420+ * Irvine, CA 92618, USA
35421+ *
35422+ * Maintained by Solarflare Communications
35423+ * <linux-xen-drivers@solarflare.com>
35424+ * <onload-dev@solarflare.com>
35425+ *
35426+ * This program is free software; you can redistribute it and/or modify it
35427+ * under the terms of the GNU General Public License version 2 as published
35428+ * by the Free Software Foundation, incorporated herein by reference.
35429+ *
35430+ * This program is distributed in the hope that it will be useful,
35431+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35432+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35433+ * GNU General Public License for more details.
35434+ *
35435+ * You should have received a copy of the GNU General Public License
35436+ * along with this program; if not, write to the Free Software
35437+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35438+ ****************************************************************************
35439+ */
35440+
35441+/*
35442+ * \author djr
35443+ * \brief Arch stuff for AMD x86_64.
35444+ * \date 2004/08/17
35445+ */
35446+
35447+/*! \cidoxg_include_ci_compat */
35448+#ifndef __CI_COMPAT_X86_64_H__
35449+#define __CI_COMPAT_X86_64_H__
35450+
35451+
35452+#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN
35453+
35454+#define CI_WORD_SIZE 8
35455+#define CI_PTR_SIZE 8
35456+
35457+#define CI_PAGE_SIZE 4096
35458+#define CI_PAGE_SHIFT 12
35459+#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1))
35460+
35461+#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */
35462+
35463+/* SSE2 disabled while investigating BUG1060 */
35464+#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */
35465+#define CI_CPU_OOS 0 /* CPU does out of order stores */
35466+
35467+
35468+#endif /* __CI_COMPAT_X86_64_H__ */
35469+/*! \cidoxg_end */
35470Index: head-2008-11-25/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h
35471===================================================================
35472--- /dev/null 1970-01-01 00:00:00.000000000 +0000
35473+++ head-2008-11-25/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h 2008-02-20 09:32:49.000000000 +0100
35474@@ -0,0 +1,276 @@
35475+/****************************************************************************
35476+ * Driver for Solarflare network controllers -
35477+ * resource management for Xen backend, OpenOnload, etc
35478+ * (including support for SFE4001 10GBT NIC)
35479+ *
35480+ * This file contains public EFX VI API to Solarflare resource manager.
35481+ *
35482+ * Copyright 2005-2007: Solarflare Communications Inc,
35483+ * 9501 Jeronimo Road, Suite 250,
35484+ * Irvine, CA 92618, USA
35485+ *
35486+ * Developed and maintained by Solarflare Communications:
35487+ * <linux-xen-drivers@solarflare.com>
35488+ * <onload-dev@solarflare.com>
35489+ *
35490+ *
35491+ * This program is free software; you can redistribute it and/or modify it
35492+ * under the terms of the GNU General Public License version 2 as published
35493+ * by the Free Software Foundation, incorporated herein by reference.
35494+ *
35495+ * This program is distributed in the hope that it will be useful,
35496+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35497+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35498+ * GNU General Public License for more details.
35499+ *
35500+ * You should have received a copy of the GNU General Public License
35501+ * along with this program; if not, write to the Free Software
35502+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35503+ ****************************************************************************
35504+ */
35505+
35506+#ifndef __CI_DRIVER_RESOURCE_EFX_VI_H__
35507+#define __CI_DRIVER_RESOURCE_EFX_VI_H__
35508+
35509+/* Default size of event queue in the efx_vi resource. Copied from
35510+ * CI_CFG_NETIF_EVENTQ_SIZE */
35511+#define EFX_VI_EVENTQ_SIZE_DEFAULT 1024
35512+
35513+extern int efx_vi_eventq_size;
35514+
35515+/**************************************************************************
35516+ * efx_vi_state types, allocation and free
35517+ **************************************************************************/
35518+
35519+/*! Handle for refering to a efx_vi */
35520+struct efx_vi_state;
35521+
35522+/*!
35523+ * Allocate an efx_vi, including event queue and pt_endpoint
35524+ *
35525+ * \param vih_out Pointer to a handle that is set on success
35526+ * \param nic_index Index of NIC to apply this resource to
35527+ * \return Zero on success (and vih_out set), non-zero on failure.
35528+ */
35529+extern int
35530+efx_vi_alloc(struct efx_vi_state **vih_out, int nic_index);
35531+
35532+/*!
35533+ * Free a previously allocated efx_vi
35534+ *
35535+ * \param vih The handle of the efx_vi to free
35536+ */
35537+extern void
35538+efx_vi_free(struct efx_vi_state *vih);
35539+
35540+/*!
35541+ * Reset a previously allocated efx_vi
35542+ *
35543+ * \param vih The handle of the efx_vi to reset
35544+ */
35545+extern void
35546+efx_vi_reset(struct efx_vi_state *vih);
35547+
35548+/**************************************************************************
35549+ * efx_vi_eventq types and functions
35550+ **************************************************************************/
35551+
35552+/*!
35553+ * Register a function to receive callbacks when event queue timeouts
35554+ * or wakeups occur. Only one function per efx_vi can be registered
35555+ * at once.
35556+ *
35557+ * \param vih The handle to identify the efx_vi
35558+ * \param callback The function to callback
35559+ * \param context An argument to pass to the callback function
35560+ * \return Zero on success, non-zero on failure.
35561+ */
35562+extern int
35563+efx_vi_eventq_register_callback(struct efx_vi_state *vih,
35564+ void (*callback)(void *context, int is_timeout),
35565+ void *context);
35566+
35567+/*!
35568+ * Remove the current eventq timeout or wakeup callback function
35569+ *
35570+ * \param vih The handle to identify the efx_vi
35571+ * \return Zero on success, non-zero on failure
35572+ */
35573+extern int
35574+efx_vi_eventq_kill_callback(struct efx_vi_state *vih);
35575+
35576+/**************************************************************************
35577+ * efx_vi_dma_map types and functions
35578+ **************************************************************************/
35579+
35580+/*!
35581+ * Handle for refering to a efx_vi
35582+ */
35583+struct efx_vi_dma_map_state;
35584+
35585+/*!
35586+ * Map a list of buffer pages so they are registered with the hardware
35587+ *
35588+ * \param vih The handle to identify the efx_vi
35589+ * \param addrs An array of page pointers to map
35590+ * \param n_addrs Length of the page pointer array. Must be a power of two.
35591+ * \param dmh_out Set on success to a handle used to refer to this mapping
35592+ * \return Zero on success, non-zero on failure.
35593+ */
35594+extern int
35595+efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages,
35596+ int n_pages, struct efx_vi_dma_map_state **dmh_out);
35597+extern int
35598+efx_vi_dma_map_addrs(struct efx_vi_state *vih,
35599+ unsigned long long *dev_bus_addrs, int n_pages,
35600+ struct efx_vi_dma_map_state **dmh_out);
35601+
35602+/*!
35603+ * Unmap a previously mapped set of pages so they are no longer registered
35604+ * with the hardware.
35605+ *
35606+ * \param vih The handle to identify the efx_vi
35607+ * \param dmh The handle to identify the dma mapping
35608+ */
35609+extern void
35610+efx_vi_dma_unmap_pages(struct efx_vi_state *vih,
35611+ struct efx_vi_dma_map_state *dmh);
35612+extern void
35613+efx_vi_dma_unmap_addrs(struct efx_vi_state *vih,
35614+ struct efx_vi_dma_map_state *dmh);
35615+
35616+/*!
35617+ * Retrieve the buffer address of the mapping
35618+ *
35619+ * \param vih The handle to identify the efx_vi
35620+ * \param dmh The handle to identify the buffer mapping
35621+ * \return The buffer address on success, or zero on failure
35622+ */
35623+extern unsigned
35624+efx_vi_dma_get_map_addr(struct efx_vi_state *vih,
35625+ struct efx_vi_dma_map_state *dmh);
35626+
35627+/**************************************************************************
35628+ * efx_vi filter functions
35629+ **************************************************************************/
35630+
35631+#define EFX_VI_STATIC_FILTERS 32
35632+
35633+/*! Handle to refer to a filter instance */
35634+struct filter_resource_t;
35635+
35636+/*!
35637+ * Allocate and add a filter
35638+ *
35639+ * \param vih The handle to identify the efx_vi
35640+ * \param protocol The protocol of the new filter: UDP or TCP
35641+ * \param ip_addr_be32 The local ip address of the filter
35642+ * \param port_le16 The local port of the filter
35643+ * \param fh_out Set on success to be a handle to refer to this filter
35644+ * \return Zero on success, non-zero on failure.
35645+ */
35646+extern int
35647+efx_vi_filter(struct efx_vi_state *vih, int protocol, unsigned ip_addr_be32,
35648+ int port_le16, struct filter_resource_t **fh_out);
35649+
35650+/*!
35651+ * Remove a filter and free resources associated with it
35652+ *
35653+ * \param vih The handle to identify the efx_vi
35654+ * \param fh The handle to identify the filter
35655+ * \return Zero on success, non-zero on failure
35656+ */
35657+extern int
35658+efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh);
35659+
35660+/**************************************************************************
35661+ * efx_vi hw resources types and functions
35662+ **************************************************************************/
35663+
35664+/*! Constants for the type field in efx_vi_hw_resource */
35665+#define EFX_VI_HW_RESOURCE_TXDMAQ 0x0 /* PFN of TX DMA Q */
35666+#define EFX_VI_HW_RESOURCE_RXDMAQ 0x1 /* PFN of RX DMA Q */
35667+#define EFX_VI_HW_RESOURCE_TXBELL 0x2 /* PFN of TX Doorbell (EF1) */
35668+#define EFX_VI_HW_RESOURCE_RXBELL 0x3 /* PFN of RX Doorbell (EF1) */
35669+#define EFX_VI_HW_RESOURCE_EVQTIMER 0x4 /* Address of event q timer */
35670+
35671+/* Address of event q pointer (EF1) */
35672+#define EFX_VI_HW_RESOURCE_EVQPTR 0x5
35673+/* Address of register pointer (Falcon A) */
35674+#define EFX_VI_HW_RESOURCE_EVQRPTR 0x6
35675+/* Offset of register pointer (Falcon B) */
35676+#define EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET 0x7
35677+/* Address of mem KVA */
35678+#define EFX_VI_HW_RESOURCE_EVQMEMKVA 0x8
35679+/* PFN of doorbell page (Falcon) */
35680+#define EFX_VI_HW_RESOURCE_BELLPAGE 0x9
35681+
35682+/*! How large an array to allocate for the get_() functions - smaller
35683+ than the total number of constants as some are mutually exclusive */
35684+#define EFX_VI_HW_RESOURCE_MAXSIZE 0x7
35685+
35686+/*! Constants for the mem_type field in efx_vi_hw_resource */
35687+#define EFX_VI_HW_RESOURCE_IOBUFFER 0 /* Host memory */
35688+#define EFX_VI_HW_RESOURCE_PERIPHERAL 1 /* Card memory/registers */
35689+
35690+/*!
35691+ * Data structure providing information on a hardware resource mapping
35692+ */
35693+struct efx_vi_hw_resource {
35694+ u8 type; /*!< What this resource represents */
35695+ u8 mem_type; /*!< What type of memory is it in, eg,
35696+ * host or iomem */
35697+ u8 more_to_follow; /*!< Is this part of a multi-region resource */
35698+ u32 length; /*!< Length of the resource in bytes */
35699+ unsigned long address; /*!< Address of this resource */
35700+};
35701+
35702+/*!
35703+ * Metadata concerning the list of hardware resource mappings
35704+ */
35705+struct efx_vi_hw_resource_metadata {
35706+ int version;
35707+ int evq_order;
35708+ int evq_offs;
35709+ int evq_capacity;
35710+ int instance;
35711+ unsigned rx_capacity;
35712+ unsigned tx_capacity;
35713+ int nic_arch;
35714+ int nic_revision;
35715+ char nic_variant;
35716+};
35717+
35718+/*!
35719+ * Obtain a list of hardware resource mappings, using virtual addresses
35720+ *
35721+ * \param vih The handle to identify the efx_vi
35722+ * \param mdata Pointer to a structure to receive the metadata
35723+ * \param hw_res_array An array to receive the list of hardware resources
35724+ * \param length The length of hw_res_array. Updated on success to contain
35725+ * the number of entries in the supplied array that were used.
35726+ * \return Zero on success, non-zero on failure
35727+ */
35728+extern int
35729+efx_vi_hw_resource_get_virt(struct efx_vi_state *vih,
35730+ struct efx_vi_hw_resource_metadata *mdata,
35731+ struct efx_vi_hw_resource *hw_res_array,
35732+ int *length);
35733+
35734+/*!
35735+ * Obtain a list of hardware resource mappings, using physical addresses
35736+ *
35737+ * \param vih The handle to identify the efx_vi
35738+ * \param mdata Pointer to a structure to receive the metadata
35739+ * \param hw_res_array An array to receive the list of hardware resources
35740+ * \param length The length of hw_res_array. Updated on success to contain
35741+ * the number of entries in the supplied array that were used.
35742+ * \return Zero on success, non-zero on failure
35743+ */
35744+extern int
35745+efx_vi_hw_resource_get_phys(struct efx_vi_state *vih,
35746+ struct efx_vi_hw_resource_metadata *mdata,
35747+ struct efx_vi_hw_resource *hw_res_array,
35748+ int *length);
35749+
35750+#endif /* __CI_DRIVER_RESOURCE_EFX_VI_H__ */
35751Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common.h
35752===================================================================
35753--- /dev/null 1970-01-01 00:00:00.000000000 +0000
35754+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common.h 2008-02-20 09:32:49.000000000 +0100
35755@@ -0,0 +1,102 @@
35756+/****************************************************************************
35757+ * Driver for Solarflare network controllers -
35758+ * resource management for Xen backend, OpenOnload, etc
35759+ * (including support for SFE4001 10GBT NIC)
35760+ *
35761+ * This file provides API of the efhw library which may be used both from
35762+ * the kernel and from the user-space code.
35763+ *
35764+ * Copyright 2005-2007: Solarflare Communications Inc,
35765+ * 9501 Jeronimo Road, Suite 250,
35766+ * Irvine, CA 92618, USA
35767+ *
35768+ * Developed and maintained by Solarflare Communications:
35769+ * <linux-xen-drivers@solarflare.com>
35770+ * <onload-dev@solarflare.com>
35771+ *
35772+ * Certain parts of the driver were implemented by
35773+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
35774+ * OKTET Labs Ltd, Russia,
35775+ * http://oktetlabs.ru, <info@oktetlabs.ru>
35776+ * by request of Solarflare Communications
35777+ *
35778+ *
35779+ * This program is free software; you can redistribute it and/or modify it
35780+ * under the terms of the GNU General Public License version 2 as published
35781+ * by the Free Software Foundation, incorporated herein by reference.
35782+ *
35783+ * This program is distributed in the hope that it will be useful,
35784+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35785+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35786+ * GNU General Public License for more details.
35787+ *
35788+ * You should have received a copy of the GNU General Public License
35789+ * along with this program; if not, write to the Free Software
35790+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35791+ ****************************************************************************
35792+ */
35793+
35794+#ifndef __CI_EFHW_COMMON_H__
35795+#define __CI_EFHW_COMMON_H__
35796+
35797+#include <ci/efhw/common_sysdep.h>
35798+
35799+enum efhw_arch {
35800+ EFHW_ARCH_FALCON,
35801+ EFHW_ARCH_SIENA,
35802+};
35803+
35804+typedef uint32_t efhw_buffer_addr_t;
35805+#define EFHW_BUFFER_ADDR_FMT "[ba:%"PRIx32"]"
35806+
35807+/*! Comment? */
35808+typedef union {
35809+ uint64_t u64;
35810+ struct {
35811+ uint32_t a;
35812+ uint32_t b;
35813+ } opaque;
35814+ struct {
35815+ uint32_t code;
35816+ uint32_t status;
35817+ } ev1002;
35818+} efhw_event_t;
35819+
35820+/* Flags for TX/RX queues */
35821+#define EFHW_VI_JUMBO_EN 0x01 /*! scatter RX over multiple desc */
35822+#define EFHW_VI_ISCSI_RX_HDIG_EN 0x02 /*! iscsi rx header digest */
35823+#define EFHW_VI_ISCSI_TX_HDIG_EN 0x04 /*! iscsi tx header digest */
35824+#define EFHW_VI_ISCSI_RX_DDIG_EN 0x08 /*! iscsi rx data digest */
35825+#define EFHW_VI_ISCSI_TX_DDIG_EN 0x10 /*! iscsi tx data digest */
35826+#define EFHW_VI_TX_PHYS_ADDR_EN 0x20 /*! TX physical address mode */
35827+#define EFHW_VI_RX_PHYS_ADDR_EN 0x40 /*! RX physical address mode */
35828+#define EFHW_VI_RM_WITH_INTERRUPT 0x80 /*! VI with an interrupt */
35829+#define EFHW_VI_TX_IP_CSUM_DIS 0x100 /*! enable ip checksum generation */
35830+#define EFHW_VI_TX_TCPUDP_CSUM_DIS 0x200 /*! enable tcp/udp checksum
35831+ generation */
35832+#define EFHW_VI_TX_TCPUDP_ONLY 0x400 /*! drop non-tcp/udp packets */
35833+
35834+/* Types of hardware filter */
35835+/* Each of these values implicitly selects scatter filters on B0 - or in
35836+ EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK if a non-scatter filter is required */
35837+#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD (0) /* dest host only */
35838+#define EFHW_IP_FILTER_TYPE_UDP_FULL (1) /* dest host and port */
35839+#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD (2) /* dest based filter */
35840+#define EFHW_IP_FILTER_TYPE_TCP_FULL (3) /* src filter */
35841+/* Same again, but with RSS (for B0 only) */
35842+#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD_RSS_B0 (4)
35843+#define EFHW_IP_FILTER_TYPE_UDP_FULL_RSS_B0 (5)
35844+#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD_RSS_B0 (6)
35845+#define EFHW_IP_FILTER_TYPE_TCP_FULL_RSS_B0 (7)
35846+
35847+#define EFHW_IP_FILTER_TYPE_FULL_MASK (0x1) /* Mask for full / wildcard */
35848+#define EFHW_IP_FILTER_TYPE_TCP_MASK (0x2) /* Mask for TCP type */
35849+#define EFHW_IP_FILTER_TYPE_RSS_B0_MASK (0x4) /* Mask for B0 RSS enable */
35850+#define EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK (0x8) /* Mask for B0 SCATTER dsbl */
35851+
35852+#define EFHW_IP_FILTER_TYPE_MASK (0xffff) /* Mask of types above */
35853+
35854+#define EFHW_IP_FILTER_BROADCAST (0x10000) /* driverlink filter
35855+ support */
35856+
35857+#endif /* __CI_EFHW_COMMON_H__ */
35858Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h
35859===================================================================
35860--- /dev/null 1970-01-01 00:00:00.000000000 +0000
35861+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h 2008-02-20 09:32:49.000000000 +0100
35862@@ -0,0 +1,67 @@
35863+/****************************************************************************
35864+ * Driver for Solarflare network controllers -
35865+ * resource management for Xen backend, OpenOnload, etc
35866+ * (including support for SFE4001 10GBT NIC)
35867+ *
35868+ * This file provides version-independent Linux kernel API for
35869+ * userland-to-kernel interfaces.
35870+ * Only kernels >=2.6.9 are supported.
35871+ *
35872+ * Copyright 2005-2007: Solarflare Communications Inc,
35873+ * 9501 Jeronimo Road, Suite 250,
35874+ * Irvine, CA 92618, USA
35875+ *
35876+ * Developed and maintained by Solarflare Communications:
35877+ * <linux-xen-drivers@solarflare.com>
35878+ * <onload-dev@solarflare.com>
35879+ *
35880+ * Certain parts of the driver were implemented by
35881+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
35882+ * OKTET Labs Ltd, Russia,
35883+ * http://oktetlabs.ru, <info@oktetlabs.ru>
35884+ * by request of Solarflare Communications
35885+ *
35886+ *
35887+ * This program is free software; you can redistribute it and/or modify it
35888+ * under the terms of the GNU General Public License version 2 as published
35889+ * by the Free Software Foundation, incorporated herein by reference.
35890+ *
35891+ * This program is distributed in the hope that it will be useful,
35892+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35893+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35894+ * GNU General Public License for more details.
35895+ *
35896+ * You should have received a copy of the GNU General Public License
35897+ * along with this program; if not, write to the Free Software
35898+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35899+ ****************************************************************************
35900+ */
35901+
35902+#ifndef __CI_EFHW_COMMON_LINUX_H__
35903+#define __CI_EFHW_COMMON_LINUX_H__
35904+
35905+#include <linux/types.h>
35906+#include <linux/version.h>
35907+
35908+/* Dirty hack, but Linux kernel does not provide DMA_ADDR_T_FMT */
35909+#if BITS_PER_LONG == 64 || defined(CONFIG_HIGHMEM64G)
35910+#define DMA_ADDR_T_FMT "%llx"
35911+#else
35912+#define DMA_ADDR_T_FMT "%x"
35913+#endif
35914+
35915+/* Linux kernel also does not provide PRIx32... Sigh. */
35916+#define PRIx32 "x"
35917+#define PRIx64 "llx"
35918+
35919+
35920+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
35921+enum {
35922+ false = 0,
35923+ true = 1
35924+};
35925+
35926+typedef _Bool bool;
35927+#endif /* LINUX_VERSION_CODE < 2.6.19 */
35928+
35929+#endif /* __CI_EFHW_COMMON_LINUX_H__ */
35930Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/debug.h
35931===================================================================
35932--- /dev/null 1970-01-01 00:00:00.000000000 +0000
35933+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/debug.h 2008-02-20 09:32:49.000000000 +0100
35934@@ -0,0 +1,84 @@
35935+/****************************************************************************
35936+ * Driver for Solarflare network controllers -
35937+ * resource management for Xen backend, OpenOnload, etc
35938+ * (including support for SFE4001 10GBT NIC)
35939+ *
35940+ * This file provides debug-related API for efhw library using Linux kernel
35941+ * primitives.
35942+ *
35943+ * Copyright 2005-2007: Solarflare Communications Inc,
35944+ * 9501 Jeronimo Road, Suite 250,
35945+ * Irvine, CA 92618, USA
35946+ *
35947+ * Developed and maintained by Solarflare Communications:
35948+ * <linux-xen-drivers@solarflare.com>
35949+ * <onload-dev@solarflare.com>
35950+ *
35951+ * Certain parts of the driver were implemented by
35952+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
35953+ * OKTET Labs Ltd, Russia,
35954+ * http://oktetlabs.ru, <info@oktetlabs.ru>
35955+ * by request of Solarflare Communications
35956+ *
35957+ *
35958+ * This program is free software; you can redistribute it and/or modify it
35959+ * under the terms of the GNU General Public License version 2 as published
35960+ * by the Free Software Foundation, incorporated herein by reference.
35961+ *
35962+ * This program is distributed in the hope that it will be useful,
35963+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35964+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35965+ * GNU General Public License for more details.
35966+ *
35967+ * You should have received a copy of the GNU General Public License
35968+ * along with this program; if not, write to the Free Software
35969+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35970+ ****************************************************************************
35971+ */
35972+
35973+#ifndef __CI_EFHW_DEBUG_LINUX_H__
35974+#define __CI_EFHW_DEBUG_LINUX_H__
35975+
35976+#define EFHW_PRINTK_PREFIX "[sfc efhw] "
35977+
35978+#define EFHW_PRINTK(level, fmt, ...) \
35979+ printk(level EFHW_PRINTK_PREFIX fmt "\n", __VA_ARGS__)
35980+
35981+/* Following macros should be used with non-zero format parameters
35982+ * due to __VA_ARGS__ limitations. Use "%s" with __FUNCTION__ if you can't
35983+ * find better parameters. */
35984+#define EFHW_ERR(fmt, ...) EFHW_PRINTK(KERN_ERR, fmt, __VA_ARGS__)
35985+#define EFHW_WARN(fmt, ...) EFHW_PRINTK(KERN_WARNING, fmt, __VA_ARGS__)
35986+#define EFHW_NOTICE(fmt, ...) EFHW_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__)
35987+#if 0 && !defined(NDEBUG)
35988+#define EFHW_TRACE(fmt, ...) EFHW_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__)
35989+#else
35990+#define EFHW_TRACE(fmt, ...)
35991+#endif
35992+
35993+#ifndef NDEBUG
35994+#define EFHW_ASSERT(cond) BUG_ON((cond) == 0)
35995+#define EFHW_DO_DEBUG(expr) expr
35996+#else
35997+#define EFHW_ASSERT(cond)
35998+#define EFHW_DO_DEBUG(expr)
35999+#endif
36000+
36001+#define EFHW_TEST(expr) \
36002+ do { \
36003+ if (unlikely(!(expr))) \
36004+ BUG(); \
36005+ } while (0)
36006+
36007+/* Build time asserts. We paste the line number into the type name
36008+ * so that the macro can be used more than once per file even if the
36009+ * compiler objects to multiple identical typedefs. Collisions
36010+ * between use in different header files is still possible. */
36011+#ifndef EFHW_BUILD_ASSERT
36012+#define __EFHW_BUILD_ASSERT_NAME(_x) __EFHW_BUILD_ASSERT_ILOATHECPP(_x)
36013+#define __EFHW_BUILD_ASSERT_ILOATHECPP(_x) __EFHW_BUILD_ASSERT__ ##_x
36014+#define EFHW_BUILD_ASSERT(e) \
36015+ typedef char __EFHW_BUILD_ASSERT_NAME(__LINE__)[(e) ? 1 : -1]
36016+#endif
36017+
36018+#endif /* __CI_EFHW_DEBUG_LINUX_H__ */
36019Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_config.h
36020===================================================================
36021--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36022+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_config.h 2008-02-20 09:32:49.000000000 +0100
36023@@ -0,0 +1,43 @@
36024+/****************************************************************************
36025+ * Driver for Solarflare network controllers -
36026+ * resource management for Xen backend, OpenOnload, etc
36027+ * (including support for SFE4001 10GBT NIC)
36028+ *
36029+ * This file provides some limits used in both kernel and userland code.
36030+ *
36031+ * Copyright 2005-2007: Solarflare Communications Inc,
36032+ * 9501 Jeronimo Road, Suite 250,
36033+ * Irvine, CA 92618, USA
36034+ *
36035+ * Developed and maintained by Solarflare Communications:
36036+ * <linux-xen-drivers@solarflare.com>
36037+ * <onload-dev@solarflare.com>
36038+ *
36039+ * Certain parts of the driver were implemented by
36040+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36041+ * OKTET Labs Ltd, Russia,
36042+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36043+ * by request of Solarflare Communications
36044+ *
36045+ *
36046+ * This program is free software; you can redistribute it and/or modify it
36047+ * under the terms of the GNU General Public License version 2 as published
36048+ * by the Free Software Foundation, incorporated herein by reference.
36049+ *
36050+ * This program is distributed in the hope that it will be useful,
36051+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36052+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36053+ * GNU General Public License for more details.
36054+ *
36055+ * You should have received a copy of the GNU General Public License
36056+ * along with this program; if not, write to the Free Software
36057+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36058+ ****************************************************************************
36059+ */
36060+
36061+#ifndef __CI_EFHW_EFAB_CONFIG_H__
36062+#define __CI_EFHW_EFAB_CONFIG_H__
36063+
36064+#define EFHW_MAX_NR_DEVS 5 /* max number of efhw devices supported */
36065+
36066+#endif /* __CI_EFHW_EFAB_CONFIG_H__ */
36067Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_types.h
36068===================================================================
36069--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36070+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_types.h 2008-02-20 09:32:49.000000000 +0100
36071@@ -0,0 +1,342 @@
36072+/****************************************************************************
36073+ * Driver for Solarflare network controllers -
36074+ * resource management for Xen backend, OpenOnload, etc
36075+ * (including support for SFE4001 10GBT NIC)
36076+ *
36077+ * This file provides struct efhw_nic and some related types.
36078+ *
36079+ * Copyright 2005-2007: Solarflare Communications Inc,
36080+ * 9501 Jeronimo Road, Suite 250,
36081+ * Irvine, CA 92618, USA
36082+ *
36083+ * Developed and maintained by Solarflare Communications:
36084+ * <linux-xen-drivers@solarflare.com>
36085+ * <onload-dev@solarflare.com>
36086+ *
36087+ * Certain parts of the driver were implemented by
36088+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36089+ * OKTET Labs Ltd, Russia,
36090+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36091+ * by request of Solarflare Communications
36092+ *
36093+ *
36094+ * This program is free software; you can redistribute it and/or modify it
36095+ * under the terms of the GNU General Public License version 2 as published
36096+ * by the Free Software Foundation, incorporated herein by reference.
36097+ *
36098+ * This program is distributed in the hope that it will be useful,
36099+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36100+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36101+ * GNU General Public License for more details.
36102+ *
36103+ * You should have received a copy of the GNU General Public License
36104+ * along with this program; if not, write to the Free Software
36105+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36106+ ****************************************************************************
36107+ */
36108+
36109+#ifndef __CI_EFHW_EFAB_TYPES_H__
36110+#define __CI_EFHW_EFAB_TYPES_H__
36111+
36112+#include <ci/efhw/efhw_config.h>
36113+#include <ci/efhw/hardware_sysdep.h>
36114+#include <ci/efhw/iopage_types.h>
36115+#include <ci/efhw/sysdep.h>
36116+
36117+/*--------------------------------------------------------------------
36118+ *
36119+ * hardware limits used in the types
36120+ *
36121+ *--------------------------------------------------------------------*/
36122+
36123+#define EFHW_KEVENTQ_MAX 8
36124+
36125+/*--------------------------------------------------------------------
36126+ *
36127+ * forward type declarations
36128+ *
36129+ *--------------------------------------------------------------------*/
36130+
36131+struct efhw_nic;
36132+
36133+/*--------------------------------------------------------------------
36134+ *
36135+ * Managed interface
36136+ *
36137+ *--------------------------------------------------------------------*/
36138+
36139+struct efhw_buffer_table_allocation{
36140+ unsigned base;
36141+ unsigned order;
36142+};
36143+
36144+struct eventq_resource_hardware {
36145+ /*!iobuffer allocated for eventq - can be larger than eventq */
36146+ efhw_iopages_t iobuff;
36147+ unsigned iobuff_off;
36148+ struct efhw_buffer_table_allocation buf_tbl_alloc;
36149+ int capacity; /*!< capacity of event queue */
36150+};
36151+
36152+/*--------------------------------------------------------------------
36153+ *
36154+ * event queues and event driven callbacks
36155+ *
36156+ *--------------------------------------------------------------------*/
36157+
36158+struct efhw_keventq {
36159+ volatile int lock;
36160+ caddr_t evq_base;
36161+ int32_t evq_ptr;
36162+ uint32_t evq_mask;
36163+ unsigned instance;
36164+ struct eventq_resource_hardware hw;
36165+ struct efhw_ev_handler *ev_handlers;
36166+};
36167+
36168+/**********************************************************************
36169+ * Portable HW interface. ***************************************
36170+ **********************************************************************/
36171+
36172+/*--------------------------------------------------------------------
36173+ *
36174+ * EtherFabric Functional units - configuration and control
36175+ *
36176+ *--------------------------------------------------------------------*/
36177+
36178+struct efhw_func_ops {
36179+
36180+ /*-------------- Initialisation ------------ */
36181+
36182+ /*! close down all hardware functional units - leaves NIC in a safe
36183+ state for driver unload */
36184+ void (*close_hardware) (struct efhw_nic *nic);
36185+
36186+ /*! initialise all hardware functional units */
36187+ int (*init_hardware) (struct efhw_nic *nic,
36188+ struct efhw_ev_handler *,
36189+ const uint8_t *mac_addr);
36190+
36191+ /*-------------- Interrupt support ------------ */
36192+
36193+ /*! Main interrupt routine
36194+ ** This function returns,
36195+ ** - zero, if the IRQ was not generated by EF1
36196+ ** - non-zero, if EF1 was the source of the IRQ
36197+ **
36198+ **
36199+ ** opaque is an OS provided pointer for use by the OS callbacks
36200+ ** e.g in Windows used to indicate DPC scheduled
36201+ */
36202+ int (*interrupt) (struct efhw_nic *nic);
36203+
36204+ /*! Enable given interrupt mask for the given IRQ unit */
36205+ void (*interrupt_enable) (struct efhw_nic *nic, uint idx);
36206+
36207+ /*! Disable given interrupt mask for the given IRQ unit */
36208+ void (*interrupt_disable) (struct efhw_nic *nic, uint idx);
36209+
36210+ /*! Set interrupt moderation strategy for the given IRQ unit
36211+ ** val is in usec
36212+ */
36213+ void (*set_interrupt_moderation)(struct efhw_nic *nic,
36214+ uint idx, uint val);
36215+
36216+ /*-------------- Event support ------------ */
36217+
36218+ /*! Enable the given event queue
36219+ depending on the underlying implementation (EF1 or Falcon) then
36220+ either a q_base_addr in host memory, or a buffer base id should
36221+ be proivded
36222+ */
36223+ void (*event_queue_enable) (struct efhw_nic *nic,
36224+ uint evq, /* evnt queue index */
36225+ uint evq_size, /* units of #entries */
36226+ dma_addr_t q_base_addr, uint buf_base_id);
36227+
36228+ /*! Disable the given event queue (and any associated timer) */
36229+ void (*event_queue_disable) (struct efhw_nic *nic, uint evq,
36230+ int timer_only);
36231+
36232+ /*! request wakeup from the NIC on a given event Q */
36233+ void (*wakeup_request) (struct efhw_nic *nic, dma_addr_t q_base_addr,
36234+ int next_i, int evq);
36235+
36236+ /*! Push a SW event on a given eventQ */
36237+ void (*sw_event) (struct efhw_nic *nic, int data, int evq);
36238+
36239+ /*-------------- Filter support ------------ */
36240+
36241+ /*! Setup a given filter - The software can request a filter_i,
36242+ * but some EtherFabric implementations will override with
36243+ * a more suitable index
36244+ */
36245+ int (*ipfilter_set) (struct efhw_nic *nic, int type,
36246+ int *filter_i, int dmaq,
36247+ unsigned saddr_be32, unsigned sport_be16,
36248+ unsigned daddr_be32, unsigned dport_be16);
36249+
36250+ /*! Attach a given filter to a DMAQ */
36251+ void (*ipfilter_attach) (struct efhw_nic *nic, int filter_idx,
36252+ int dmaq_idx);
36253+
36254+ /*! Detach a filter from its DMAQ */
36255+ void (*ipfilter_detach) (struct efhw_nic *nic, int filter_idx);
36256+
36257+ /*! Clear down a given filter */
36258+ void (*ipfilter_clear) (struct efhw_nic *nic, int filter_idx);
36259+
36260+ /*-------------- DMA support ------------ */
36261+
36262+ /*! Initialise NIC state for a given TX DMAQ */
36263+ void (*dmaq_tx_q_init) (struct efhw_nic *nic,
36264+ uint dmaq, uint evq, uint owner, uint tag,
36265+ uint dmaq_size, uint buf_idx, uint flags);
36266+
36267+ /*! Initialise NIC state for a given RX DMAQ */
36268+ void (*dmaq_rx_q_init) (struct efhw_nic *nic,
36269+ uint dmaq, uint evq, uint owner, uint tag,
36270+ uint dmaq_size, uint buf_idx, uint flags);
36271+
36272+ /*! Disable a given TX DMAQ */
36273+ void (*dmaq_tx_q_disable) (struct efhw_nic *nic, uint dmaq);
36274+
36275+ /*! Disable a given RX DMAQ */
36276+ void (*dmaq_rx_q_disable) (struct efhw_nic *nic, uint dmaq);
36277+
36278+ /*! Flush a given TX DMA channel */
36279+ int (*flush_tx_dma_channel) (struct efhw_nic *nic, uint dmaq);
36280+
36281+ /*! Flush a given RX DMA channel */
36282+ int (*flush_rx_dma_channel) (struct efhw_nic *nic, uint dmaq);
36283+
36284+ /*-------------- Buffer table Support ------------ */
36285+
36286+ /*! Initialise a buffer table page */
36287+ void (*buffer_table_set) (struct efhw_nic *nic,
36288+ dma_addr_t dma_addr,
36289+ uint bufsz, uint region,
36290+ int own_id, int buffer_id);
36291+
36292+ /*! Initialise a block of buffer table pages */
36293+ void (*buffer_table_set_n) (struct efhw_nic *nic, int buffer_id,
36294+ dma_addr_t dma_addr,
36295+ uint bufsz, uint region,
36296+ int n_pages, int own_id);
36297+
36298+ /*! Clear a block of buffer table pages */
36299+ void (*buffer_table_clear) (struct efhw_nic *nic, int buffer_id,
36300+ int num);
36301+
36302+ /*! Commit a buffer table update */
36303+ void (*buffer_table_commit) (struct efhw_nic *nic);
36304+
36305+};
36306+
36307+
36308+/*----------------------------------------------------------------------------
36309+ *
36310+ * NIC type
36311+ *
36312+ *---------------------------------------------------------------------------*/
36313+
36314+struct efhw_device_type {
36315+ int arch; /* enum efhw_arch */
36316+ char variant; /* 'A', 'B', ... */
36317+ int revision; /* 0, 1, ... */
36318+};
36319+
36320+
36321+/*----------------------------------------------------------------------------
36322+ *
36323+ * EtherFabric NIC instance - nic.c for HW independent functions
36324+ *
36325+ *---------------------------------------------------------------------------*/
36326+
36327+/*! */
36328+struct efhw_nic {
36329+ /*! zero base index in efrm_nic_table.nic array */
36330+ volatile int index;
36331+ int ifindex; /*!< OS level nic index */
36332+#ifdef HAS_NET_NAMESPACE
36333+ struct net *nd_net;
36334+#endif
36335+
36336+ struct efhw_device_type devtype;
36337+
36338+ /*! Options that can be set by user. */
36339+ unsigned options;
36340+# define NIC_OPT_EFTEST 0x1 /* owner is an eftest app */
36341+
36342+# define NIC_OPT_DEFAULT 0
36343+
36344+ /*! Internal flags that indicate hardware properties at runtime. */
36345+ unsigned flags;
36346+# define NIC_FLAG_NO_INTERRUPT 0x01 /* to be set at init time only */
36347+# define NIC_FLAG_TRY_MSI 0x02
36348+# define NIC_FLAG_MSI 0x04
36349+# define NIC_FLAG_OS_IRQ_EN 0x08
36350+# define NIC_FLAG_10G 0x10
36351+
36352+ unsigned mtu; /*!< MAC MTU (includes MAC hdr) */
36353+
36354+ /* hardware resources */
36355+
36356+ /*! I/O address of the start of the bar */
36357+ efhw_ioaddr_t bar_ioaddr;
36358+
36359+ /*! Bar number of control aperture. */
36360+ unsigned ctr_ap_bar;
36361+ /*! Length of control aperture in bytes. */
36362+ unsigned ctr_ap_bytes;
36363+
36364+ uint8_t mac_addr[ETH_ALEN]; /*!< mac address */
36365+
36366+ /*! EtherFabric Functional Units -- functions */
36367+ const struct efhw_func_ops *efhw_func;
36368+
36369+ /* Value read from FPGA version register. Zero for asic. */
36370+ unsigned fpga_version;
36371+
36372+ /*! This lock protects a number of misc NIC resources. It should
36373+ * only be used for things that can be at the bottom of the lock
36374+ * order. ie. You mustn't attempt to grab any other lock while
36375+ * holding this one.
36376+ */
36377+ spinlock_t *reg_lock;
36378+ spinlock_t the_reg_lock;
36379+
36380+ int buf_commit_outstanding; /*!< outstanding buffer commits */
36381+
36382+ /*! interrupt callbacks (hard-irq) */
36383+ void (*irq_handler) (struct efhw_nic *, int unit);
36384+
36385+ /*! event queues per driver */
36386+ struct efhw_keventq evq[EFHW_KEVENTQ_MAX];
36387+
36388+/* for marking when we are not using an IRQ unit
36389+ - 0 is a valid offset to an IRQ unit on EF1! */
36390+#define EFHW_IRQ_UNIT_UNUSED 0xffff
36391+ /*! interrupt unit in use */
36392+ unsigned int irq_unit[EFHW_KEVENTQ_MAX];
36393+ efhw_iopage_t irq_iobuff; /*!< Falcon SYSERR interrupt */
36394+
36395+ /* The new driverlink infrastructure. */
36396+ struct efx_dl_device *net_driver_dev;
36397+ struct efx_dlfilt_cb_s *dlfilter_cb;
36398+
36399+ /*! Bit masks of the sizes of event queues and dma queues supported
36400+ * by the nic. */
36401+ unsigned evq_sizes;
36402+ unsigned rxq_sizes;
36403+ unsigned txq_sizes;
36404+
36405+ /* Size of filter table (including odd and even banks). */
36406+ unsigned filter_tbl_size;
36407+};
36408+
36409+
36410+#define EFHW_KVA(nic) ((nic)->bar_ioaddr)
36411+
36412+
36413+#endif /* __CI_EFHW_EFHW_TYPES_H__ */
36414Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h
36415===================================================================
36416--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36417+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h 2008-02-20 09:32:49.000000000 +0100
36418@@ -0,0 +1,84 @@
36419+/****************************************************************************
36420+ * Driver for Solarflare network controllers -
36421+ * resource management for Xen backend, OpenOnload, etc
36422+ * (including support for SFE4001 10GBT NIC)
36423+ *
36424+ * This file provides version-independent Linux kernel API for header files
36425+ * with hardware-related definitions (in ci/driver/efab/hardware*).
36426+ * Only kernels >=2.6.9 are supported.
36427+ *
36428+ * Copyright 2005-2007: Solarflare Communications Inc,
36429+ * 9501 Jeronimo Road, Suite 250,
36430+ * Irvine, CA 92618, USA
36431+ *
36432+ * Developed and maintained by Solarflare Communications:
36433+ * <linux-xen-drivers@solarflare.com>
36434+ * <onload-dev@solarflare.com>
36435+ *
36436+ * Certain parts of the driver were implemented by
36437+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36438+ * OKTET Labs Ltd, Russia,
36439+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36440+ * by request of Solarflare Communications
36441+ *
36442+ *
36443+ * This program is free software; you can redistribute it and/or modify it
36444+ * under the terms of the GNU General Public License version 2 as published
36445+ * by the Free Software Foundation, incorporated herein by reference.
36446+ *
36447+ * This program is distributed in the hope that it will be useful,
36448+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36449+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36450+ * GNU General Public License for more details.
36451+ *
36452+ * You should have received a copy of the GNU General Public License
36453+ * along with this program; if not, write to the Free Software
36454+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36455+ ****************************************************************************
36456+ */
36457+
36458+#ifndef __CI_EFHW_HARDWARE_LINUX_H__
36459+#define __CI_EFHW_HARDWARE_LINUX_H__
36460+
36461+#include <asm/io.h>
36462+
36463+#ifdef __LITTLE_ENDIAN
36464+#define EFHW_IS_LITTLE_ENDIAN
36465+#elif __BIG_ENDIAN
36466+#define EFHW_IS_BIG_ENDIAN
36467+#else
36468+#error Unknown endianness
36469+#endif
36470+
36471+#ifndef mmiowb
36472+ #if defined(__i386__) || defined(__x86_64__)
36473+ #define mmiowb()
36474+ #elif defined(__ia64__)
36475+ #ifndef ia64_mfa
36476+ #define ia64_mfa() asm volatile ("mf.a" ::: "memory")
36477+ #endif
36478+ #define mmiowb ia64_mfa
36479+ #else
36480+ #error "Need definition for mmiowb()"
36481+ #endif
36482+#endif
36483+
36484+typedef char *efhw_ioaddr_t;
36485+
36486+#ifndef readq
36487+static inline uint64_t __readq(void __iomem *addr)
36488+{
36489+ return *(volatile uint64_t *)addr;
36490+}
36491+#define readq(x) __readq(x)
36492+#endif
36493+
36494+#ifndef writeq
36495+static inline void __writeq(uint64_t v, void __iomem *addr)
36496+{
36497+ *(volatile uint64_t *)addr = v;
36498+}
36499+#define writeq(val, addr) __writeq((val), (addr))
36500+#endif
36501+
36502+#endif /* __CI_EFHW_HARDWARE_LINUX_H__ */
36503Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/iopage_types.h
36504===================================================================
36505--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36506+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/iopage_types.h 2008-02-20 09:32:49.000000000 +0100
36507@@ -0,0 +1,188 @@
36508+/****************************************************************************
36509+ * Driver for Solarflare network controllers -
36510+ * resource management for Xen backend, OpenOnload, etc
36511+ * (including support for SFE4001 10GBT NIC)
36512+ *
36513+ * This file provides efhw_page_t and efhw_iopage_t for Linux kernel.
36514+ *
36515+ * Copyright 2005-2007: Solarflare Communications Inc,
36516+ * 9501 Jeronimo Road, Suite 250,
36517+ * Irvine, CA 92618, USA
36518+ *
36519+ * Developed and maintained by Solarflare Communications:
36520+ * <linux-xen-drivers@solarflare.com>
36521+ * <onload-dev@solarflare.com>
36522+ *
36523+ * Certain parts of the driver were implemented by
36524+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36525+ * OKTET Labs Ltd, Russia,
36526+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36527+ * by request of Solarflare Communications
36528+ *
36529+ *
36530+ * This program is free software; you can redistribute it and/or modify it
36531+ * under the terms of the GNU General Public License version 2 as published
36532+ * by the Free Software Foundation, incorporated herein by reference.
36533+ *
36534+ * This program is distributed in the hope that it will be useful,
36535+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36536+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36537+ * GNU General Public License for more details.
36538+ *
36539+ * You should have received a copy of the GNU General Public License
36540+ * along with this program; if not, write to the Free Software
36541+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36542+ ****************************************************************************
36543+ */
36544+
36545+#ifndef __CI_EFHW_IOPAGE_LINUX_H__
36546+#define __CI_EFHW_IOPAGE_LINUX_H__
36547+
36548+#include <linux/gfp.h>
36549+#include <linux/hardirq.h>
36550+#include <ci/efhw/debug.h>
36551+
36552+/*--------------------------------------------------------------------
36553+ *
36554+ * efhw_page_t: A single page of memory. Directly mapped in the driver,
36555+ * and can be mapped to userlevel.
36556+ *
36557+ *--------------------------------------------------------------------*/
36558+
36559+typedef struct {
36560+ unsigned long kva;
36561+} efhw_page_t;
36562+
36563+static inline int efhw_page_alloc(efhw_page_t *p)
36564+{
36565+ p->kva = __get_free_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL);
36566+ return p->kva ? 0 : -ENOMEM;
36567+}
36568+
36569+static inline int efhw_page_alloc_zeroed(efhw_page_t *p)
36570+{
36571+ p->kva = get_zeroed_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL);
36572+ return p->kva ? 0 : -ENOMEM;
36573+}
36574+
36575+static inline void efhw_page_free(efhw_page_t *p)
36576+{
36577+ free_page(p->kva);
36578+ EFHW_DO_DEBUG(memset(p, 0, sizeof(*p)));
36579+}
36580+
36581+static inline char *efhw_page_ptr(efhw_page_t *p)
36582+{
36583+ return (char *)p->kva;
36584+}
36585+
36586+static inline unsigned efhw_page_pfn(efhw_page_t *p)
36587+{
36588+ return (unsigned)(__pa(p->kva) >> PAGE_SHIFT);
36589+}
36590+
36591+static inline void efhw_page_mark_invalid(efhw_page_t *p)
36592+{
36593+ p->kva = 0;
36594+}
36595+
36596+static inline int efhw_page_is_valid(efhw_page_t *p)
36597+{
36598+ return p->kva != 0;
36599+}
36600+
36601+static inline void efhw_page_init_from_va(efhw_page_t *p, void *va)
36602+{
36603+ p->kva = (unsigned long)va;
36604+}
36605+
36606+/*--------------------------------------------------------------------
36607+ *
36608+ * efhw_iopage_t: A single page of memory. Directly mapped in the driver,
36609+ * and can be mapped to userlevel. Can also be accessed by the NIC.
36610+ *
36611+ *--------------------------------------------------------------------*/
36612+
36613+typedef struct {
36614+ efhw_page_t p;
36615+ dma_addr_t dma_addr;
36616+} efhw_iopage_t;
36617+
36618+static inline dma_addr_t efhw_iopage_dma_addr(efhw_iopage_t *p)
36619+{
36620+ return p->dma_addr;
36621+}
36622+
36623+#define efhw_iopage_ptr(iop) efhw_page_ptr(&(iop)->p)
36624+#define efhw_iopage_pfn(iop) efhw_page_pfn(&(iop)->p)
36625+#define efhw_iopage_mark_invalid(iop) efhw_page_mark_invalid(&(iop)->p)
36626+#define efhw_iopage_is_valid(iop) efhw_page_is_valid(&(iop)->p)
36627+
36628+/*--------------------------------------------------------------------
36629+ *
36630+ * efhw_iopages_t: A set of pages that are contiguous in physical memory.
36631+ * Directly mapped in the driver, and can be mapped to userlevel. Can also
36632+ * be accessed by the NIC.
36633+ *
36634+ * NB. The O/S may be unwilling to allocate many, or even any of these. So
36635+ * only use this type where the NIC really needs a physically contiguous
36636+ * buffer.
36637+ *
36638+ *--------------------------------------------------------------------*/
36639+
36640+typedef struct {
36641+ caddr_t kva;
36642+ unsigned order;
36643+ dma_addr_t dma_addr;
36644+} efhw_iopages_t;
36645+
36646+static inline caddr_t efhw_iopages_ptr(efhw_iopages_t *p)
36647+{
36648+ return p->kva;
36649+}
36650+
36651+static inline unsigned efhw_iopages_pfn(efhw_iopages_t *p)
36652+{
36653+ return (unsigned)(__pa(p->kva) >> PAGE_SHIFT);
36654+}
36655+
36656+static inline dma_addr_t efhw_iopages_dma_addr(efhw_iopages_t *p)
36657+{
36658+ return p->dma_addr;
36659+}
36660+
36661+static inline unsigned efhw_iopages_size(efhw_iopages_t *p)
36662+{
36663+ return 1u << (p->order + PAGE_SHIFT);
36664+}
36665+
36666+/* efhw_iopage_t <-> efhw_iopages_t conversions for handling physically
36667+ * contiguous allocations in iobufsets for iSCSI. This allows the
36668+ * essential information about contiguous allocations from
36669+ * efhw_iopages_alloc() to be saved away in the efhw_iopage_t array in an
36670+ * iobufset. (Changing the iobufset resource to use a union type would
36671+ * involve a lot of code changes, and make the iobufset's metadata larger
36672+ * which could be bad as it's supposed to fit into a single page on some
36673+ * platforms.)
36674+ */
36675+static inline void
36676+efhw_iopage_init_from_iopages(efhw_iopage_t *iopage,
36677+ efhw_iopages_t *iopages, unsigned pageno)
36678+{
36679+ iopage->p.kva = ((unsigned long)efhw_iopages_ptr(iopages))
36680+ + (pageno * PAGE_SIZE);
36681+ iopage->dma_addr = efhw_iopages_dma_addr(iopages) +
36682+ (pageno * PAGE_SIZE);
36683+}
36684+
36685+static inline void
36686+efhw_iopages_init_from_iopage(efhw_iopages_t *iopages,
36687+ efhw_iopage_t *iopage, unsigned order)
36688+{
36689+ iopages->kva = (caddr_t) efhw_iopage_ptr(iopage);
36690+ EFHW_ASSERT(iopages->kva);
36691+ iopages->order = order;
36692+ iopages->dma_addr = efhw_iopage_dma_addr(iopage);
36693+}
36694+
36695+#endif /* __CI_EFHW_IOPAGE_LINUX_H__ */
36696Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/public.h
36697===================================================================
36698--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36699+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/public.h 2008-02-20 09:32:49.000000000 +0100
36700@@ -0,0 +1,83 @@
36701+/****************************************************************************
36702+ * Driver for Solarflare network controllers -
36703+ * resource management for Xen backend, OpenOnload, etc
36704+ * (including support for SFE4001 10GBT NIC)
36705+ *
36706+ * This file provides public API of efhw library exported from the SFC
36707+ * resource driver.
36708+ *
36709+ * Copyright 2005-2007: Solarflare Communications Inc,
36710+ * 9501 Jeronimo Road, Suite 250,
36711+ * Irvine, CA 92618, USA
36712+ *
36713+ * Developed and maintained by Solarflare Communications:
36714+ * <linux-xen-drivers@solarflare.com>
36715+ * <onload-dev@solarflare.com>
36716+ *
36717+ * Certain parts of the driver were implemented by
36718+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36719+ * OKTET Labs Ltd, Russia,
36720+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36721+ * by request of Solarflare Communications
36722+ *
36723+ *
36724+ * This program is free software; you can redistribute it and/or modify it
36725+ * under the terms of the GNU General Public License version 2 as published
36726+ * by the Free Software Foundation, incorporated herein by reference.
36727+ *
36728+ * This program is distributed in the hope that it will be useful,
36729+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36730+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36731+ * GNU General Public License for more details.
36732+ *
36733+ * You should have received a copy of the GNU General Public License
36734+ * along with this program; if not, write to the Free Software
36735+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36736+ ****************************************************************************
36737+ */
36738+
36739+#ifndef __CI_EFHW_PUBLIC_H__
36740+#define __CI_EFHW_PUBLIC_H__
36741+
36742+#include <ci/efhw/common.h>
36743+#include <ci/efhw/efhw_types.h>
36744+
36745+/*! Returns true if we have some EtherFabric functional units -
36746+ whether configured or not */
36747+static inline int efhw_nic_have_functional_units(struct efhw_nic *nic)
36748+{
36749+ return nic->efhw_func != 0;
36750+}
36751+
36752+/*! Returns true if the EtherFabric functional units have been configured */
36753+static inline int efhw_nic_have_hw(struct efhw_nic *nic)
36754+{
36755+ return efhw_nic_have_functional_units(nic) && (EFHW_KVA(nic) != 0);
36756+}
36757+
36758+/*! Helper function to allocate the iobuffer needed by an eventq
36759+ * - it ensures the eventq has the correct alignment for the NIC
36760+ *
36761+ * \param rm Event-queue resource manager
36762+ * \param instance Event-queue instance (index)
36763+ * \param buf_bytes Requested size of eventq
36764+ * \return < 0 if iobuffer allocation fails
36765+ */
36766+int efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic,
36767+ struct eventq_resource_hardware *h,
36768+ int evq_instance, unsigned buf_bytes);
36769+
36770+extern void falcon_nic_set_rx_usr_buf_size(struct efhw_nic *,
36771+ int rx_usr_buf_size);
36772+
36773+extern void
36774+falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full,
36775+ uint32_t tcp_wild,
36776+ uint32_t udp_full, uint32_t udp_wild);
36777+
36778+extern void
36779+falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full,
36780+ uint32_t *tcp_wild,
36781+ uint32_t *udp_full, uint32_t *udp_wild);
36782+
36783+#endif /* __CI_EFHW_PUBLIC_H__ */
36784Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/sysdep.h
36785===================================================================
36786--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36787+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/sysdep.h 2008-02-20 09:32:49.000000000 +0100
36788@@ -0,0 +1,72 @@
36789+/****************************************************************************
36790+ * Driver for Solarflare network controllers -
36791+ * resource management for Xen backend, OpenOnload, etc
36792+ * (including support for SFE4001 10GBT NIC)
36793+ *
36794+ * This file provides version-independent Linux kernel API for efhw library.
36795+ * Only kernels >=2.6.9 are supported.
36796+ *
36797+ * Copyright 2005-2007: Solarflare Communications Inc,
36798+ * 9501 Jeronimo Road, Suite 250,
36799+ * Irvine, CA 92618, USA
36800+ *
36801+ * Developed and maintained by Solarflare Communications:
36802+ * <linux-xen-drivers@solarflare.com>
36803+ * <onload-dev@solarflare.com>
36804+ *
36805+ * Certain parts of the driver were implemented by
36806+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36807+ * OKTET Labs Ltd, Russia,
36808+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36809+ * by request of Solarflare Communications
36810+ *
36811+ *
36812+ * This program is free software; you can redistribute it and/or modify it
36813+ * under the terms of the GNU General Public License version 2 as published
36814+ * by the Free Software Foundation, incorporated herein by reference.
36815+ *
36816+ * This program is distributed in the hope that it will be useful,
36817+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36818+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36819+ * GNU General Public License for more details.
36820+ *
36821+ * You should have received a copy of the GNU General Public License
36822+ * along with this program; if not, write to the Free Software
36823+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36824+ ****************************************************************************
36825+ */
36826+
36827+#ifndef __CI_EFHW_SYSDEP_LINUX_H__
36828+#define __CI_EFHW_SYSDEP_LINUX_H__
36829+
36830+#include <linux/version.h>
36831+#include <linux/module.h>
36832+#include <linux/spinlock.h>
36833+#include <linux/delay.h>
36834+#include <linux/if_ether.h>
36835+
36836+#include <linux/netdevice.h> /* necessary for etherdevice.h on some kernels */
36837+#include <linux/etherdevice.h>
36838+
36839+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21)
36840+static inline int is_local_ether_addr(const u8 *addr)
36841+{
36842+ return (0x02 & addr[0]);
36843+}
36844+#endif
36845+
36846+typedef unsigned long irq_flags_t;
36847+
36848+#define spin_lock_destroy(l_) do {} while (0)
36849+
36850+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
36851+#define HAS_NET_NAMESPACE
36852+#endif
36853+
36854+/* Funny, but linux has round_up for x86 only, defined in
36855+ * x86-specific header */
36856+#ifndef round_up
36857+#define round_up(x, y) (((x) + (y) - 1) & ~((y)-1))
36858+#endif
36859+
36860+#endif /* __CI_EFHW_SYSDEP_LINUX_H__ */
36861Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/nic_table.h
36862===================================================================
36863--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36864+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/nic_table.h 2008-02-20 09:32:49.000000000 +0100
36865@@ -0,0 +1,98 @@
36866+/****************************************************************************
36867+ * Driver for Solarflare network controllers -
36868+ * resource management for Xen backend, OpenOnload, etc
36869+ * (including support for SFE4001 10GBT NIC)
36870+ *
36871+ * This file provides public API for NIC table.
36872+ *
36873+ * Copyright 2005-2007: Solarflare Communications Inc,
36874+ * 9501 Jeronimo Road, Suite 250,
36875+ * Irvine, CA 92618, USA
36876+ *
36877+ * Developed and maintained by Solarflare Communications:
36878+ * <linux-xen-drivers@solarflare.com>
36879+ * <onload-dev@solarflare.com>
36880+ *
36881+ * Certain parts of the driver were implemented by
36882+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36883+ * OKTET Labs Ltd, Russia,
36884+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36885+ * by request of Solarflare Communications
36886+ *
36887+ *
36888+ * This program is free software; you can redistribute it and/or modify it
36889+ * under the terms of the GNU General Public License version 2 as published
36890+ * by the Free Software Foundation, incorporated herein by reference.
36891+ *
36892+ * This program is distributed in the hope that it will be useful,
36893+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36894+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36895+ * GNU General Public License for more details.
36896+ *
36897+ * You should have received a copy of the GNU General Public License
36898+ * along with this program; if not, write to the Free Software
36899+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36900+ ****************************************************************************
36901+ */
36902+
36903+#ifndef __CI_EFRM_NIC_TABLE_H__
36904+#define __CI_EFRM_NIC_TABLE_H__
36905+
36906+#include <ci/efhw/efhw_types.h>
36907+#include <ci/efrm/sysdep.h>
36908+
36909+/*--------------------------------------------------------------------
36910+ *
36911+ * struct efrm_nic_table - top level driver object keeping all NICs -
36912+ * implemented in driver_object.c
36913+ *
36914+ *--------------------------------------------------------------------*/
36915+
36916+/*! Comment? */
36917+struct efrm_nic_table {
36918+ /*! nics attached to this driver */
36919+ struct efhw_nic *nic[EFHW_MAX_NR_DEVS];
36920+ /*! pointer to an arbitrary struct efhw_nic if one exists;
36921+ * for code which does not care which NIC it wants but
36922+ * still needs one. Note you cannot assume nic[0] exists. */
36923+ struct efhw_nic *a_nic;
36924+ uint32_t nic_count; /*!< number of nics attached to this driver */
36925+ spinlock_t lock; /*!< lock for table modifications */
36926+ atomic_t ref_count; /*!< refcount for users of nic table */
36927+};
36928+
36929+/* Resource driver structures used by other drivers as well */
36930+extern struct efrm_nic_table efrm_nic_table;
36931+
36932+static inline void efrm_nic_table_hold(void)
36933+{
36934+ atomic_inc(&efrm_nic_table.ref_count);
36935+}
36936+
36937+static inline void efrm_nic_table_rele(void)
36938+{
36939+ atomic_dec(&efrm_nic_table.ref_count);
36940+}
36941+
36942+static inline int efrm_nic_table_held(void)
36943+{
36944+ return (atomic_read(&efrm_nic_table.ref_count) != 0);
36945+}
36946+
36947+/* Run code block _x multiple times with variable nic set to each
36948+ * registered NIC in turn.
36949+ * DO NOT "break" out of this loop early. */
36950+#define EFRM_FOR_EACH_NIC(_nic_i, _nic) \
36951+ for ((_nic_i) = (efrm_nic_table_hold(), 0); \
36952+ (_nic_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \
36953+ (_nic_i)++) \
36954+ if (((_nic) = efrm_nic_table.nic[_nic_i]))
36955+
36956+#define EFRM_FOR_EACH_NIC_IN_SET(_set, _i, _nic) \
36957+ for ((_i) = (efrm_nic_table_hold(), 0); \
36958+ (_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \
36959+ ++(_i)) \
36960+ if (((_nic) = efrm_nic_table.nic[_i]) && \
36961+ efrm_nic_set_read((_set), (_i)))
36962+
36963+#endif /* __CI_EFRM_NIC_TABLE_H__ */
36964Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep.h
36965===================================================================
36966--- /dev/null 1970-01-01 00:00:00.000000000 +0000
36967+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep.h 2008-02-20 09:32:49.000000000 +0100
36968@@ -0,0 +1,54 @@
36969+/****************************************************************************
36970+ * Driver for Solarflare network controllers -
36971+ * resource management for Xen backend, OpenOnload, etc
36972+ * (including support for SFE4001 10GBT NIC)
36973+ *
36974+ * This file provides Linux-like system-independent API for efrm library.
36975+ *
36976+ * Copyright 2005-2007: Solarflare Communications Inc,
36977+ * 9501 Jeronimo Road, Suite 250,
36978+ * Irvine, CA 92618, USA
36979+ *
36980+ * Developed and maintained by Solarflare Communications:
36981+ * <linux-xen-drivers@solarflare.com>
36982+ * <onload-dev@solarflare.com>
36983+ *
36984+ * Certain parts of the driver were implemented by
36985+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36986+ * OKTET Labs Ltd, Russia,
36987+ * http://oktetlabs.ru, <info@oktetlabs.ru>
36988+ * by request of Solarflare Communications
36989+ *
36990+ *
36991+ * This program is free software; you can redistribute it and/or modify it
36992+ * under the terms of the GNU General Public License version 2 as published
36993+ * by the Free Software Foundation, incorporated herein by reference.
36994+ *
36995+ * This program is distributed in the hope that it will be useful,
36996+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36997+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36998+ * GNU General Public License for more details.
36999+ *
37000+ * You should have received a copy of the GNU General Public License
37001+ * along with this program; if not, write to the Free Software
37002+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37003+ ****************************************************************************
37004+ */
37005+
37006+#ifndef __CI_EFRM_SYSDEP_H__
37007+#define __CI_EFRM_SYSDEP_H__
37008+
37009+/* Spinlocks are defined in efhw/sysdep.h */
37010+#include <ci/efhw/sysdep.h>
37011+
37012+#if defined(__linux__) && defined(__KERNEL__)
37013+
37014+# include <ci/efrm/sysdep_linux.h>
37015+
37016+#else
37017+
37018+# include <ci/efrm/sysdep_ci2linux.h>
37019+
37020+#endif
37021+
37022+#endif /* __CI_EFRM_SYSDEP_H__ */
37023Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h
37024===================================================================
37025--- /dev/null 1970-01-01 00:00:00.000000000 +0000
37026+++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h 2008-02-20 09:32:49.000000000 +0100
37027@@ -0,0 +1,248 @@
37028+/****************************************************************************
37029+ * Driver for Solarflare network controllers -
37030+ * resource management for Xen backend, OpenOnload, etc
37031+ * (including support for SFE4001 10GBT NIC)
37032+ *
37033+ * This file provides version-independent Linux kernel API for efrm library.
37034+ * Only kernels >=2.6.9 are supported.
37035+ *
37036+ * Copyright 2005-2007: Solarflare Communications Inc,
37037+ * 9501 Jeronimo Road, Suite 250,
37038+ * Irvine, CA 92618, USA
37039+ *
37040+ * Kfifo API is partially stolen from linux-2.6.22/include/linux/list.h
37041+ * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
37042+ *
37043+ * Developed and maintained by Solarflare Communications:
37044+ * <linux-xen-drivers@solarflare.com>
37045+ * <onload-dev@solarflare.com>
37046+ *
37047+ * Certain parts of the driver were implemented by
37048+ * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
37049+ * OKTET Labs Ltd, Russia,
37050+ * http://oktetlabs.ru, <info@oktetlabs.ru>
37051+ * by request of Solarflare Communications
37052+ *
37053+ *
37054+ * This program is free software; you can redistribute it and/or modify it
37055+ * under the terms of the GNU General Public License version 2 as published
37056+ * by the Free Software Foundation, incorporated herein by reference.
37057+ *
37058+ * This program is distributed in the hope that it will be useful,
37059+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
37060+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37061+ * GNU General Public License for more details.
37062+ *
37063+ * You should have received a copy of the GNU General Public License
37064+ * along with this program; if not, write to the Free Software
37065+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37066+ ****************************************************************************
37067+ */
37068+
37069+#ifndef __CI_EFRM_SYSDEP_LINUX_H__
37070+#define __CI_EFRM_SYSDEP_LINUX_H__
37071+
37072+#include <linux/version.h>
37073+#include <linux/list.h>
37074+#include <linux/vmalloc.h>
37075+#include <linux/errno.h>
37076+#include <linux/string.h>
37077+#include <linux/workqueue.h>
37078+#include <linux/gfp.h>
37079+#include <linux/slab.h>
37080+#include <linux/hardirq.h>
37081+#include <linux/kernel.h>
37082+#include <linux/if_ether.h>
37083+#include <linux/completion.h>
37084+#include <linux/in.h>
37085+
37086+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
37087+/* get roundup_pow_of_two(), which was in kernel.h in early kernel versions */
37088+#include <linux/log2.h>
37089+#endif
37090+
37091+/********************************************************************
37092+ *
37093+ * List API
37094+ *
37095+ ********************************************************************/
37096+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
37097+static inline void
37098+list_replace_init(struct list_head *old, struct list_head *new)
37099+{
37100+ new->next = old->next;
37101+ new->next->prev = new;
37102+ new->prev = old->prev;
37103+ new->prev->next = new;
37104+ INIT_LIST_HEAD(old);
37105+}
37106+#endif
37107+
37108+static inline struct list_head *list_pop(struct list_head *list)
37109+{
37110+ struct list_head *link = list->next;
37111+ list_del(link);
37112+ return link;
37113+}
37114+
37115+static inline struct list_head *list_pop_tail(struct list_head *list)
37116+{
37117+ struct list_head *link = list->prev;
37118+ list_del(link);
37119+ return link;
37120+}
37121+
37122+/********************************************************************
37123+ *
37124+ * Workqueue API
37125+ *
37126+ ********************************************************************/
37127+
37128+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
37129+#define NEED_OLD_WORK_API
37130+
37131+/**
37132+ * The old and new work function prototypes just change
37133+ * the type of the pointer in the only argument, so it's
37134+ * safe to cast one function type to the other
37135+ */
37136+typedef void (*efrm_old_work_func_t) (void *p);
37137+
37138+#undef INIT_WORK
37139+#define INIT_WORK(_work, _func) \
37140+ do { \
37141+ INIT_LIST_HEAD(&(_work)->entry); \
37142+ (_work)->pending = 0; \
37143+ PREPARE_WORK((_work), \
37144+ (efrm_old_work_func_t) (_func), \
37145+ (_work)); \
37146+ } while (0)
37147+
37148+#endif
37149+
37150+/********************************************************************
37151+ *
37152+ * Kfifo API
37153+ *
37154+ ********************************************************************/
37155+
37156+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
37157+
37158+#if !defined(RHEL_RELEASE_CODE) || (RHEL_RELEASE_CODE < 1029)
37159+typedef unsigned gfp_t;
37160+#endif
37161+
37162+#define HAS_NO_KFIFO
37163+
37164+struct kfifo {
37165+ unsigned char *buffer; /* the buffer holding the data */
37166+ unsigned int size; /* the size of the allocated buffer */
37167+ unsigned int in; /* data is added at offset (in % size) */
37168+ unsigned int out; /* data is extracted from off. (out % size) */
37169+ spinlock_t *lock; /* protects concurrent modifications */
37170+};
37171+
37172+extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
37173+ gfp_t gfp_mask, spinlock_t *lock);
37174+extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
37175+ spinlock_t *lock);
37176+extern void kfifo_free(struct kfifo *fifo);
37177+extern unsigned int __kfifo_put(struct kfifo *fifo,
37178+ unsigned char *buffer, unsigned int len);
37179+extern unsigned int __kfifo_get(struct kfifo *fifo,
37180+ unsigned char *buffer, unsigned int len);
37181+
37182+/**
37183+ * kfifo_put - puts some data into the FIFO
37184+ * @fifo: the fifo to be used.
37185+ * @buffer: the data to be added.
37186+ * @len: the length of the data to be added.
37187+ *
37188+ * This function copies at most @len bytes from the @buffer into
37189+ * the FIFO depending on the free space, and returns the number of
37190+ * bytes copied.
37191+ */
37192+static inline unsigned int
37193+kfifo_put(struct kfifo *fifo, unsigned char *buffer, unsigned int len)
37194+{
37195+ unsigned long flags;
37196+ unsigned int ret;
37197+
37198+ spin_lock_irqsave(fifo->lock, flags);
37199+
37200+ ret = __kfifo_put(fifo, buffer, len);
37201+
37202+ spin_unlock_irqrestore(fifo->lock, flags);
37203+
37204+ return ret;
37205+}
37206+
37207+/**
37208+ * kfifo_get - gets some data from the FIFO
37209+ * @fifo: the fifo to be used.
37210+ * @buffer: where the data must be copied.
37211+ * @len: the size of the destination buffer.
37212+ *
37213+ * This function copies at most @len bytes from the FIFO into the
37214+ * @buffer and returns the number of copied bytes.
37215+ */
37216+static inline unsigned int
37217+kfifo_get(struct kfifo *fifo, unsigned char *buffer, unsigned int len)
37218+{
37219+ unsigned long flags;
37220+ unsigned int ret;
37221+
37222+ spin_lock_irqsave(fifo->lock, flags);
37223+
37224+ ret = __kfifo_get(fifo, buffer, len);
37225+
37226+ /*
37227+ * optimization: if the FIFO is empty, set the indices to 0
37228+ * so we don't wrap the next time
37229+ */
37230+ if (fifo->in == fifo->out)
37231+ fifo->in = fifo->out = 0;
37232+
37233+ spin_unlock_irqrestore(fifo->lock, flags);
37234+
37235+ return ret;
37236+}
37237+
37238+/**
37239+ * __kfifo_len - returns the number of bytes available in the FIFO, no locking version
37240+ * @fifo: the fifo to be used.
37241+ */
37242+static inline unsigned int __kfifo_len(struct kfifo *fifo)
37243+{
37244+ return fifo->in - fifo->out;
37245+}
37246+
37247+/**
37248+ * kfifo_len - returns the number of bytes available in the FIFO
37249+ * @fifo: the fifo to be used.
37250+ */
37251+static inline unsigned int kfifo_len(struct kfifo *fifo)
37252+{
37253+ unsigned long flags;
37254+ unsigned int ret;
37255+
37256+ spin_lock_irqsave(fifo->lock, flags);
37257+
37258+ ret = __kfifo_len(fifo);
37259+
37260+ spin_unlock_irqrestore(fifo->lock, flags);
37261+
37262+ return ret;
37263+}
37264+
37265+#else
37266+#include <linux/kfifo.h>
37267+#endif
37268+
37269+static inline void kfifo_vfree(struct kfifo *fifo)
37270+{
37271+ vfree(fifo->buffer);
37272+ kfree(fifo);
37273+}
37274+
37275+#endif /* __CI_EFRM_SYSDEP_LINUX_H__ */
37276Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/config.h
37277===================================================================
37278--- /dev/null 1970-01-01 00:00:00.000000000 +0000
37279+++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/config.h 2008-02-20 09:32:49.000000000 +0100
37280@@ -0,0 +1,49 @@
37281+/****************************************************************************
37282+ * Copyright 2002-2005: Level 5 Networks Inc.
37283+ * Copyright 2005-2008: Solarflare Communications Inc,
37284+ * 9501 Jeronimo Road, Suite 250,
37285+ * Irvine, CA 92618, USA
37286+ *
37287+ * Maintained by Solarflare Communications
37288+ * <linux-xen-drivers@solarflare.com>
37289+ * <onload-dev@solarflare.com>
37290+ *
37291+ * This program is free software; you can redistribute it and/or modify it
37292+ * under the terms of the GNU General Public License version 2 as published
37293+ * by the Free Software Foundation, incorporated herein by reference.
37294+ *
37295+ * This program is distributed in the hope that it will be useful,
37296+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
37297+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37298+ * GNU General Public License for more details.
37299+ *
37300+ * You should have received a copy of the GNU General Public License
37301+ * along with this program; if not, write to the Free Software
37302+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37303+ ****************************************************************************
37304+ */
37305+
37306+/*! \cidoxg_include_ci_tools */
37307+
37308+#ifndef __CI_TOOLS_CONFIG_H__
37309+#define __CI_TOOLS_CONFIG_H__
37310+
37311+
37312+/**********************************************************************
37313+ * Debugging.
37314+ */
37315+
37316+#define CI_INCLUDE_ASSERT_VALID 0
37317+
37318+/* Set non-zero to allow info about who has allocated what to appear in
37319+ * /proc/drivers/level5/mem.
37320+ * However - Note that doing so can lead to segfault when you unload the
37321+ * driver, and other weirdness. i.e. I don't think the code for is quite
37322+ * right (written by Oktet, hacked by gel), but it does work well enough to be
37323+ * useful.
37324+ */
37325+#define CI_MEMLEAK_DEBUG_ALLOC_TABLE 0
37326+
37327+
37328+#endif /* __CI_TOOLS_CONFIG_H__ */
37329+/*! \cidoxg_end */
37330Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/debug.h
37331===================================================================
37332--- /dev/null 1970-01-01 00:00:00.000000000 +0000
37333+++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/debug.h 2008-02-20 09:32:49.000000000 +0100
37334@@ -0,0 +1,336 @@
37335+/****************************************************************************
37336+ * Copyright 2002-2005: Level 5 Networks Inc.
37337+ * Copyright 2005-2008: Solarflare Communications Inc,
37338+ * 9501 Jeronimo Road, Suite 250,
37339+ * Irvine, CA 92618, USA
37340+ *
37341+ * Maintained by Solarflare Communications
37342+ * <linux-xen-drivers@solarflare.com>
37343+ * <onload-dev@solarflare.com>
37344+ *
37345+ * This program is free software; you can redistribute it and/or modify it
37346+ * under the terms of the GNU General Public License version 2 as published
37347+ * by the Free Software Foundation, incorporated herein by reference.
37348+ *
37349+ * This program is distributed in the hope that it will be useful,
37350+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
37351+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37352+ * GNU General Public License for more details.
37353+ *
37354+ * You should have received a copy of the GNU General Public License
37355+ * along with this program; if not, write to the Free Software
37356+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37357+ ****************************************************************************
37358+ */
37359+
37360+/*! \cidoxg_include_ci_tools */
37361+
37362+#ifndef __CI_TOOLS_DEBUG_H__
37363+#define __CI_TOOLS_DEBUG_H__
37364+
37365+#define CI_LOG_E(x) x /* errors */
37366+#define CI_LOG_W(x) x /* warnings */
37367+#define CI_LOG_I(x) x /* information */
37368+#define CI_LOG_V(x) x /* verbose */
37369+
37370+/* Build time asserts. We paste the line number into the type name
37371+ * so that the macro can be used more than once per file even if the
37372+ * compiler objects to multiple identical typedefs. Collisions
37373+ * between use in different header files is still possible. */
37374+#ifndef CI_BUILD_ASSERT
37375+#define __CI_BUILD_ASSERT_NAME(_x) __CI_BUILD_ASSERT_ILOATHECPP(_x)
37376+#define __CI_BUILD_ASSERT_ILOATHECPP(_x) __CI_BUILD_ASSERT__ ##_x
37377+#define CI_BUILD_ASSERT(e)\
37378+ typedef char __CI_BUILD_ASSERT_NAME(__LINE__)[(e)?1:-1]
37379+#endif
37380+
37381+
37382+#ifdef NDEBUG
37383+
37384+# define _ci_check(exp, file, line)
37385+# define _ci_assert2(e, x, y, file, line)
37386+# define _ci_assert(exp, file, line)
37387+# define _ci_assert_equal(exp1, exp2, file, line)
37388+# define _ci_assert_equiv(exp1, exp2, file, line)
37389+# define _ci_assert_nequal(exp1, exp2, file, line)
37390+# define _ci_assert_le(exp1, exp2, file, line)
37391+# define _ci_assert_lt(exp1, exp2, file, line)
37392+# define _ci_assert_ge(exp1, exp2, file, line)
37393+# define _ci_assert_gt(exp1, exp2, file, line)
37394+# define _ci_assert_impl(exp1, exp2, file, line)
37395+
37396+# define _ci_verify(exp, file, line) \
37397+ do { \
37398+ (void)(exp); \
37399+ } while (0)
37400+
37401+# define CI_DEBUG_TRY(exp) \
37402+ do { \
37403+ (void)(exp); \
37404+ } while (0)
37405+
37406+#define CI_TRACE(exp,fmt)
37407+#define CI_TRACE_INT(integer)
37408+#define CI_TRACE_INT32(integer)
37409+#define CI_TRACE_INT64(integer)
37410+#define CI_TRACE_UINT(integer)
37411+#define CI_TRACE_UINT32(integer)
37412+#define CI_TRACE_UINT64(integer)
37413+#define CI_TRACE_HEX(integer)
37414+#define CI_TRACE_HEX32(integer)
37415+#define CI_TRACE_HEX64(integer)
37416+#define CI_TRACE_PTR(pointer)
37417+#define CI_TRACE_STRING(string)
37418+#define CI_TRACE_MAC(mac)
37419+#define CI_TRACE_IP(ip_be32)
37420+#define CI_TRACE_ARP(arp_pkt)
37421+
37422+#else
37423+
37424+# define _CI_ASSERT_FMT "\nfrom %s:%d"
37425+
37426+# define _ci_check(exp, file, line) \
37427+ do { \
37428+ if (CI_UNLIKELY(!(exp))) \
37429+ ci_warn(("ci_check(%s)"_CI_ASSERT_FMT, #exp, \
37430+ (file), (line))); \
37431+ } while (0)
37432+
37433+/*
37434+ * NOTE: ci_fail() emits the file and line where the assert is actually
37435+ * coded.
37436+ */
37437+
37438+# define _ci_assert(exp, file, line) \
37439+ do { \
37440+ if (CI_UNLIKELY(!(exp))) \
37441+ ci_fail(("ci_assert(%s)"_CI_ASSERT_FMT, #exp, \
37442+ (file), (line))); \
37443+ } while (0)
37444+
37445+# define _ci_assert2(e, x, y, file, line) do { \
37446+ if(CI_UNLIKELY( ! (e) )) \
37447+ ci_fail(("ci_assert(%s)\nwhere [%s=%"CI_PRIx64"] " \
37448+ "[%s=%"CI_PRIx64"]\nat %s:%d\nfrom %s:%d", #e \
37449+ , #x, (ci_uint64)(ci_uintptr_t)(x) \
37450+ , #y, (ci_uint64)(ci_uintptr_t)(y), \
37451+ __FILE__, __LINE__, (file), (line))); \
37452+ } while (0)
37453+
37454+# define _ci_verify(exp, file, line) \
37455+ do { \
37456+ if (CI_UNLIKELY(!(exp))) \
37457+ ci_fail(("ci_verify(%s)"_CI_ASSERT_FMT, #exp, \
37458+ (file), (line))); \
37459+ } while (0)
37460+
37461+# define _ci_assert_equal(x, y, f, l) _ci_assert2((x)==(y), x, y, (f), (l))
37462+# define _ci_assert_nequal(x, y, f, l) _ci_assert2((x)!=(y), x, y, (f), (l))
37463+# define _ci_assert_le(x, y, f, l) _ci_assert2((x)<=(y), x, y, (f), (l))
37464+# define _ci_assert_lt(x, y, f, l) _ci_assert2((x)< (y), x, y, (f), (l))
37465+# define _ci_assert_ge(x, y, f, l) _ci_assert2((x)>=(y), x, y, (f), (l))
37466+# define _ci_assert_gt(x, y, f, l) _ci_assert2((x)> (y), x, y, (f), (l))
37467+# define _ci_assert_or(x, y, f, l) _ci_assert2((x)||(y), x, y, (f), (l))
37468+# define _ci_assert_impl(x, y, f, l) _ci_assert2(!(x) || (y), x, y, (f), (l))
37469+# define _ci_assert_equiv(x, y, f, l) _ci_assert2(!(x)== !(y), x, y, (f), (l))
37470+
37471+#define _ci_assert_equal_msg(exp1, exp2, msg, file, line) \
37472+ do { \
37473+ if (CI_UNLIKELY((exp1)!=(exp2))) \
37474+ ci_fail(("ci_assert_equal_msg(%s == %s) were " \
37475+ "(%"CI_PRIx64":%"CI_PRIx64") with msg[%c%c%c%c]" \
37476+ _CI_ASSERT_FMT, #exp1, #exp2, \
37477+ (ci_uint64)(ci_uintptr_t)(exp1), \
37478+ (ci_uint64)(ci_uintptr_t)(exp2), \
37479+ (((ci_uint32)msg) >> 24) && 0xff, \
37480+ (((ci_uint32)msg) >> 16) && 0xff, \
37481+ (((ci_uint32)msg) >> 8 ) && 0xff, \
37482+ (((ci_uint32)msg) ) && 0xff, \
37483+ (file), (line))); \
37484+ } while (0)
37485+
37486+# define CI_DEBUG_TRY(exp) CI_TRY(exp)
37487+
37488+#define CI_TRACE(exp,fmt) \
37489+ ci_log("%s:%d:%s] " #exp "=" fmt, \
37490+ __FILE__, __LINE__, __FUNCTION__, (exp))
37491+
37492+
37493+#define CI_TRACE_INT(integer) \
37494+ ci_log("%s:%d:%s] " #integer "=%d", \
37495+ __FILE__, __LINE__, __FUNCTION__, (integer))
37496+
37497+
37498+#define CI_TRACE_INT32(integer) \
37499+ ci_log("%s:%d:%s] " #integer "=%d", \
37500+ __FILE__, __LINE__, __FUNCTION__, ((ci_int32)integer))
37501+
37502+
37503+#define CI_TRACE_INT64(integer) \
37504+ ci_log("%s:%d:%s] " #integer "=%lld", \
37505+ __FILE__, __LINE__, __FUNCTION__, ((ci_int64)integer))
37506+
37507+
37508+#define CI_TRACE_UINT(integer) \
37509+ ci_log("%s:%d:%s] " #integer "=%ud", \
37510+ __FILE__, __LINE__, __FUNCTION__, (integer))
37511+
37512+
37513+#define CI_TRACE_UINT32(integer) \
37514+ ci_log("%s:%d:%s] " #integer "=%ud", \
37515+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer))
37516+
37517+
37518+#define CI_TRACE_UINT64(integer) \
37519+ ci_log("%s:%d:%s] " #integer "=%ulld", \
37520+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer))
37521+
37522+
37523+#define CI_TRACE_HEX(integer) \
37524+ ci_log("%s:%d:%s] " #integer "=0x%x", \
37525+ __FILE__, __LINE__, __FUNCTION__, (integer))
37526+
37527+
37528+#define CI_TRACE_HEX32(integer) \
37529+ ci_log("%s:%d:%s] " #integer "=0x%x", \
37530+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer))
37531+
37532+
37533+#define CI_TRACE_HEX64(integer) \
37534+ ci_log("%s:%d:%s] " #integer "=0x%llx", \
37535+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer))
37536+
37537+
37538+#define CI_TRACE_PTR(pointer) \
37539+ ci_log("%s:%d:%s] " #pointer "=0x%p", \
37540+ __FILE__, __LINE__, __FUNCTION__, (pointer))
37541+
37542+
37543+#define CI_TRACE_STRING(string) \
37544+ ci_log("%s:%d:%s] " #string "=%s", \
37545+ __FILE__, __LINE__, __FUNCTION__, (string))
37546+
37547+
37548+#define CI_TRACE_MAC(mac) \
37549+ ci_log("%s:%d:%s] " #mac "=" CI_MAC_PRINTF_FORMAT, \
37550+ __FILE__, __LINE__, __FUNCTION__, CI_MAC_PRINTF_ARGS(mac))
37551+
37552+
37553+#define CI_TRACE_IP(ip_be32) \
37554+ ci_log("%s:%d:%s] " #ip_be32 "=" CI_IP_PRINTF_FORMAT, __FILE__, \
37555+ __LINE__, __FUNCTION__, CI_IP_PRINTF_ARGS(&(ip_be32)))
37556+
37557+
37558+#define CI_TRACE_ARP(arp_pkt) \
37559+ ci_log("%s:%d:%s]\n"CI_ARP_PRINTF_FORMAT, \
37560+ __FILE__, __LINE__, __FUNCTION__, CI_ARP_PRINTF_ARGS(arp_pkt))
37561+
37562+#endif /* NDEBUG */
37563+
37564+#define ci_check(exp) \
37565+ _ci_check(exp, __FILE__, __LINE__)
37566+
37567+#define ci_assert(exp) \
37568+ _ci_assert(exp, __FILE__, __LINE__)
37569+
37570+#define ci_verify(exp) \
37571+ _ci_verify(exp, __FILE__, __LINE__)
37572+
37573+#define ci_assert_equal(exp1, exp2) \
37574+ _ci_assert_equal(exp1, exp2, __FILE__, __LINE__)
37575+
37576+#define ci_assert_equal_msg(exp1, exp2, msg) \
37577+ _ci_assert_equal_msg(exp1, exp2, msg, __FILE__, __LINE__)
37578+
37579+#define ci_assert_nequal(exp1, exp2) \
37580+ _ci_assert_nequal(exp1, exp2, __FILE__, __LINE__)
37581+
37582+#define ci_assert_le(exp1, exp2) \
37583+ _ci_assert_le(exp1, exp2, __FILE__, __LINE__)
37584+
37585+#define ci_assert_lt(exp1, exp2) \
37586+ _ci_assert_lt(exp1, exp2, __FILE__, __LINE__)
37587+
37588+#define ci_assert_ge(exp1, exp2) \
37589+ _ci_assert_ge(exp1, exp2, __FILE__, __LINE__)
37590+
37591+#define ci_assert_gt(exp1, exp2) \
37592+ _ci_assert_gt(exp1, exp2, __FILE__, __LINE__)
37593+
37594+#define ci_assert_impl(exp1, exp2) \
37595+ _ci_assert_impl(exp1, exp2, __FILE__, __LINE__)
37596+
37597+#define ci_assert_equiv(exp1, exp2) \
37598+ _ci_assert_equiv(exp1, exp2, __FILE__, __LINE__)
37599+
37600+
37601+#define CI_TEST(exp) \
37602+ do{ \
37603+ if( CI_UNLIKELY(!(exp)) ) \
37604+ ci_fail(("CI_TEST(%s)", #exp)); \
37605+ }while(0)
37606+
37607+
37608+#define CI_TRY(exp) \
37609+ do{ \
37610+ int _trc; \
37611+ _trc=(exp); \
37612+ if( CI_UNLIKELY(_trc < 0) ) \
37613+ ci_sys_fail(#exp, _trc); \
37614+ }while(0)
37615+
37616+
37617+#define CI_TRY_RET(exp) \
37618+ do{ \
37619+ int _trc; \
37620+ _trc=(exp); \
37621+ if( CI_UNLIKELY(_trc < 0) ) { \
37622+ ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__); \
37623+ return _trc; \
37624+ } \
37625+ }while(0)
37626+
37627+#define CI_LOGLEVEL_TRY_RET(logfn, exp) \
37628+ do{ \
37629+ int _trc; \
37630+ _trc=(exp); \
37631+ if( CI_UNLIKELY(_trc < 0) ) { \
37632+ logfn (ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__)); \
37633+ return _trc; \
37634+ } \
37635+ }while(0)
37636+
37637+
37638+#define CI_SOCK_TRY(exp) \
37639+ do{ \
37640+ ci_sock_err_t _trc; \
37641+ _trc=(exp); \
37642+ if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) \
37643+ ci_sys_fail(#exp, _trc.val); \
37644+ }while(0)
37645+
37646+
37647+#define CI_SOCK_TRY_RET(exp) \
37648+ do{ \
37649+ ci_sock_err_t _trc; \
37650+ _trc=(exp); \
37651+ if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \
37652+ ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \
37653+ return ci_sock_errcode(_trc); \
37654+ } \
37655+ }while(0)
37656+
37657+
37658+#define CI_SOCK_TRY_SOCK_RET(exp) \
37659+ do{ \
37660+ ci_sock_err_t _trc; \
37661+ _trc=(exp); \
37662+ if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \
37663+ ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \
37664+ return _trc; \
37665+ } \
37666+ }while(0)
37667+
37668+#endif /* __CI_TOOLS_DEBUG_H__ */
37669+
37670+/*! \cidoxg_end */
37671Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/log.h
37672===================================================================
37673--- /dev/null 1970-01-01 00:00:00.000000000 +0000
37674+++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/log.h 2008-02-20 09:32:49.000000000 +0100
37675@@ -0,0 +1,262 @@
37676+/****************************************************************************
37677+ * Copyright 2002-2005: Level 5 Networks Inc.
37678+ * Copyright 2005-2008: Solarflare Communications Inc,
37679+ * 9501 Jeronimo Road, Suite 250,
37680+ * Irvine, CA 92618, USA
37681+ *
37682+ * Maintained by Solarflare Communications
37683+ * <linux-xen-drivers@solarflare.com>
37684+ * <onload-dev@solarflare.com>
37685+ *
37686+ * This program is free software; you can redistribute it and/or modify it
37687+ * under the terms of the GNU General Public License version 2 as published
37688+ * by the Free Software Foundation, incorporated herein by reference.
37689+ *
37690+ * This program is distributed in the hope that it will be useful,
37691+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
37692+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37693+ * GNU General Public License for more details.
37694+ *
37695+ * You should have received a copy of the GNU General Public License
37696+ * along with this program; if not, write to the Free Software
37697+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37698+ ****************************************************************************
37699+ */
37700+
37701+/*
37702+ * \author djr
37703+ * \brief Functions for logging and pretty-printing.
37704+ * \date 2002/08/07
37705+ */
37706+
37707+/*! \cidoxg_include_ci_tools */
37708+
37709+#ifndef __CI_TOOLS_LOG_H__
37710+#define __CI_TOOLS_LOG_H__
37711+
37712+#include <stdarg.h>
37713+
37714+
37715+/**********************************************************************
37716+ * Logging.
37717+ */
37718+
37719+/* size of internal log buffer */
37720+#define CI_LOG_MAX_LINE 512
37721+/* uses of ci_log must ensure that all trace messages are shorter than this */
37722+#define CI_LOG_MAX_MSG_LENGTH (CI_LOG_MAX_LINE-50)
37723+
37724+extern void ci_vlog(const char* fmt, va_list args) CI_HF;
37725+extern void ci_log(const char* fmt, ...) CI_PRINTF_LIKE(1,2) CI_HF;
37726+
37727+ /*! Set the prefix for log messages.
37728+ **
37729+ ** Uses the storage pointed to by \em prefix. Therefore \em prefix must
37730+ ** be allocated on the heap, or statically.
37731+ */
37732+extern void ci_set_log_prefix(const char* prefix) CI_HF;
37733+
37734+typedef void (*ci_log_fn_t)(const char* msg);
37735+extern ci_log_fn_t ci_log_fn CI_HV;
37736+
37737+/* Log functions. */
37738+extern void ci_log_null(const char* msg) CI_HF;
37739+extern void ci_log_stderr(const char* msg) CI_HF;
37740+extern void ci_log_stdout(const char* msg) CI_HF;
37741+extern void ci_log_syslog(const char* msg) CI_HF;
37742+
37743+/*! Call the following to install special logging behaviours. */
37744+extern void ci_log_buffer_till_fail(void) CI_HF;
37745+extern void ci_log_buffer_till_exit(void) CI_HF;
37746+
37747+extern void __ci_log_unique(const char* msg) CI_HF;
37748+extern ci_log_fn_t __ci_log_unique_fn CI_HV;
37749+ci_inline void ci_log_uniquify(void) {
37750+ if( ci_log_fn != __ci_log_unique ) {
37751+ __ci_log_unique_fn = ci_log_fn;
37752+ ci_log_fn = __ci_log_unique;
37753+ }
37754+}
37755+
37756+extern void ci_log_file(const char* msg) CI_HF;
37757+extern int ci_log_file_fd CI_HV;
37758+
37759+extern void __ci_log_nth(const char* msg) CI_HF;
37760+extern ci_log_fn_t __ci_log_nth_fn CI_HV;
37761+extern int ci_log_nth_n CI_HV; /* default 100 */
37762+ci_inline void ci_log_nth(void) {
37763+ if( ci_log_fn != __ci_log_nth ) {
37764+ __ci_log_nth_fn = ci_log_fn;
37765+ ci_log_fn = __ci_log_nth;
37766+ }
37767+}
37768+
37769+extern int ci_log_level CI_HV;
37770+
37771+extern int ci_log_options CI_HV;
37772+#define CI_LOG_PID 0x1
37773+#define CI_LOG_TID 0x2
37774+#define CI_LOG_TIME 0x4
37775+#define CI_LOG_DELTA 0x8
37776+
37777+/**********************************************************************
37778+ * Used to define which mode we are in
37779+ */
37780+#if (defined(_WIN32) && !defined(__KERNEL__))
37781+typedef enum {
37782+ ci_log_md_NULL=0,
37783+ ci_log_md_ioctl,
37784+ ci_log_md_stderr,
37785+ ci_log_md_stdout,
37786+ ci_log_md_file,
37787+ ci_log_md_serial,
37788+ ci_log_md_syslog,
37789+ ci_log_md_pidfile
37790+} ci_log_mode_t;
37791+extern ci_log_mode_t ci_log_mode;
37792+#endif
37793+
37794+/**********************************************************************
37795+ * Pretty-printing.
37796+ */
37797+
37798+extern char ci_printable_char(char c) CI_HF;
37799+
37800+extern void (*ci_hex_dump_formatter)(char* buf, const ci_octet* s,
37801+ int i, int off, int len) CI_HV;
37802+extern void ci_hex_dump_format_octets(char*,const ci_octet*,int,int,int) CI_HF;
37803+extern void ci_hex_dump_format_dwords(char*,const ci_octet*,int,int,int) CI_HF;
37804+
37805+extern void ci_hex_dump_row(char* buf, volatile const void* s, int len,
37806+ ci_ptr_arith_t address) CI_HF;
37807+ /*!< A row contains up to 16 bytes. Row starts at [address & 15u], so
37808+ ** therefore [len + (address & 15u)] must be <= 16.
37809+ */
37810+
37811+extern void ci_hex_dump(ci_log_fn_t, volatile const void*,
37812+ int len, ci_ptr_arith_t address) CI_HF;
37813+
37814+extern int ci_hex_dump_to_raw(const char* src_hex, void* buf,
37815+ unsigned* addr_out_opt, int* skip) CI_HF;
37816+ /*!< Recovers raw data from a single line of a hex dump. [buf] must be at
37817+ ** least 16 bytes long. Returns the number of bytes written to [buf] (in
37818+ ** range 1 -> 16), or -1 if [src_hex] doesn't contain hex data. Does not
37819+ ** cope with missing bytes at the start of a line.
37820+ */
37821+
37822+extern int ci_format_eth_addr(char* buf, const void* eth_mac_addr,
37823+ char sep) CI_HF;
37824+ /*!< This will write 18 characters to <buf> including terminating null.
37825+ ** Returns number of bytes written excluding null. If [sep] is zero, ':'
37826+ ** is used.
37827+ */
37828+
37829+extern int ci_parse_eth_addr(void* eth_mac_addr,
37830+ const char* str, char sep) CI_HF;
37831+ /*!< If [sep] is zero, absolutely any separator is accepted (even
37832+ ** inconsistent separators). Returns 0 on success, -1 on error.
37833+ */
37834+
37835+extern int ci_format_ip4_addr(char* buf, unsigned addr_be32) CI_HF;
37836+ /*!< Formats the IP address (in network endian) in dotted-quad. Returns
37837+ ** the number of bytes written (up to 15), excluding the null. [buf]
37838+ ** must be at least 16 bytes long.
37839+ */
37840+
37841+
37842+/**********************************************************************
37843+ * Error checking.
37844+ */
37845+
37846+extern void (*ci_fail_stop_fn)(void) CI_HV;
37847+
37848+extern void ci_fail_stop(void) CI_HF;
37849+extern void ci_fail_hang(void) CI_HF;
37850+extern void ci_fail_bomb(void) CI_HF;
37851+extern void ci_backtrace(void) CI_HF;
37852+
37853+#if defined __linux__ && !defined __KERNEL__
37854+extern void ci_fail_abort (void) CI_HF;
37855+#endif
37856+
37857+#ifdef __GNUC__
37858+extern void
37859+__ci_fail(const char*, ...) CI_PRINTF_LIKE(1,2) CI_HF;
37860+#else
37861+# if _PREFAST_
37862+ extern void _declspec(noreturn) __ci_fail(const char* fmt, ...);
37863+# else
37864+ extern void __ci_fail(const char* fmt, ...);
37865+# endif
37866+
37867+#endif
37868+
37869+#define ci_warn(x) \
37870+ do{ ci_log("WARN at %s:%d", __FILE__, __LINE__); }while(0)
37871+
37872+#define ci_fail(x) \
37873+ do{ ci_log("FAIL at %s:%d", __FILE__, __LINE__); __ci_fail x; }while(0)
37874+
37875+extern void __ci_sys_fail(const char* fn, int rc,
37876+ const char* file, int line) CI_HF;
37877+#define ci_sys_fail(fn, rc) __ci_sys_fail(fn, rc, __FILE__, __LINE__)
37878+
37879+/**********************************************************************
37880+ * Logging to buffer (src/citools/log_buffer.c)
37881+ */
37882+
37883+/*! Divert ci_log() messages to the log buffer
37884+ * normally they go to the system console */
37885+extern void ci_log_buffer_till_fail(void) CI_HF;
37886+
37887+/*! Dump the contents of the log buffer to the system console */
37888+extern void ci_log_buffer_dump(void) CI_HF;
37889+
37890+
37891+/**********************************************************************
37892+ * Some useful pretty-printing.
37893+ */
37894+
37895+#ifdef __linux__
37896+# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s%s%s"
37897+
37898+# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
37899+ (((x) & MSG_OOB ) ? "OOB " :""), \
37900+ (((x) & MSG_PEEK ) ? "PEEK " :""), \
37901+ (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \
37902+ (((x) & MSG_EOR ) ? "EOR " :""), \
37903+ (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \
37904+ (((x) & MSG_TRUNC ) ? "TRUNC " :""), \
37905+ (((x) & MSG_WAITALL ) ? "WAITALL " :""), \
37906+ (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \
37907+ (((x) & MSG_NOSIGNAL ) ? "NOSIGNAL " :""), \
37908+ (((x) & MSG_ERRQUEUE ) ? "ERRQUEUE " :""), \
37909+ (((x) & MSG_CONFIRM ) ? "CONFIRM " :"")
37910+#endif
37911+
37912+#ifdef _WIN32
37913+# define CI_SOCKCALL_FLAGS_FMT "%s%s%s"
37914+
37915+# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
37916+ (((x) & MSG_OOB ) ? "OOB " :""), \
37917+ (((x) & MSG_PEEK ) ? "PEEK " :""), \
37918+ (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :"")
37919+#endif
37920+
37921+#ifdef __sun__
37922+# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s"
37923+
37924+# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
37925+ (((x) & MSG_OOB ) ? "OOB " :""), \
37926+ (((x) & MSG_PEEK ) ? "PEEK " :""), \
37927+ (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \
37928+ (((x) & MSG_EOR ) ? "EOR " :""), \
37929+ (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \
37930+ (((x) & MSG_TRUNC ) ? "TRUNC " :""), \
37931+ (((x) & MSG_WAITALL ) ? "WAITALL " :""), \
37932+ (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \
37933+ (((x) & MSG_NOTIFICATION) ? "NOTIFICATION" :"")
37934+#endif
37935+
37936+#endif /* __CI_TOOLS_LOG_H__ */
37937+/*! \cidoxg_end */
37938Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h
37939===================================================================
37940--- /dev/null 1970-01-01 00:00:00.000000000 +0000
37941+++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h 2008-02-20 09:32:49.000000000 +0100
37942@@ -0,0 +1,361 @@
37943+/****************************************************************************
37944+ * Copyright 2002-2005: Level 5 Networks Inc.
37945+ * Copyright 2005-2008: Solarflare Communications Inc,
37946+ * 9501 Jeronimo Road, Suite 250,
37947+ * Irvine, CA 92618, USA
37948+ *
37949+ * Maintained by Solarflare Communications
37950+ * <linux-xen-drivers@solarflare.com>
37951+ * <onload-dev@solarflare.com>
37952+ *
37953+ * This program is free software; you can redistribute it and/or modify it
37954+ * under the terms of the GNU General Public License version 2 as published
37955+ * by the Free Software Foundation, incorporated herein by reference.
37956+ *
37957+ * This program is distributed in the hope that it will be useful,
37958+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
37959+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37960+ * GNU General Public License for more details.
37961+ *
37962+ * You should have received a copy of the GNU General Public License
37963+ * along with this program; if not, write to the Free Software
37964+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37965+ ****************************************************************************
37966+ */
37967+
37968+/*! \cidoxg_include_ci_tools_platform */
37969+
37970+#ifndef __CI_TOOLS_GCC_X86_H__
37971+#define __CI_TOOLS_GCC_X86_H__
37972+
37973+
37974+/**********************************************************************
37975+ * Free-running cycle counters.
37976+ */
37977+
37978+#define CI_HAVE_FRC64
37979+#define CI_HAVE_FRC32
37980+
37981+#define ci_frc32(pval) __asm__ __volatile__("rdtsc" : "=a" (*pval) : : "edx")
37982+
37983+#if defined(__x86_64__)
37984+ci_inline void ci_frc64(ci_uint64* pval) {
37985+ /* temp fix until we figure how to get this out in one bite */
37986+ ci_uint64 low, high;
37987+ __asm__ __volatile__("rdtsc" : "=a" (low) , "=d" (high));
37988+ *pval = (high << 32) | low;
37989+}
37990+
37991+#else
37992+#define ci_frc64(pval) __asm__ __volatile__("rdtsc" : "=A" (*pval))
37993+#endif
37994+
37995+#define ci_frc_flush() /* ?? Need a pipeline barrier. */
37996+
37997+
37998+/**********************************************************************
37999+ * Atomic integer.
38000+ */
38001+
38002+/*
38003+** int ci_atomic_read(a) { return a->n; }
38004+** void ci_atomic_set(a, v) { a->n = v; }
38005+** void ci_atomic_inc(a) { ++a->n; }
38006+** void ci_atomic_dec(a) { --a->n; }
38007+** int ci_atomic_inc_and_test(a) { return ++a->n == 0; }
38008+** int ci_atomic_dec_and_test(a) { return --a->n == 0; }
38009+** void ci_atomic_and(a, v) { a->n &= v; }
38010+** void ci_atomic_or(a, v) { a->n |= v; }
38011+*/
38012+
38013+typedef struct { volatile ci_int32 n; } ci_atomic_t;
38014+
38015+#define CI_ATOMIC_INITIALISER(i) {(i)}
38016+
38017+static inline ci_int32 ci_atomic_read(const ci_atomic_t* a) { return a->n; }
38018+static inline void ci_atomic_set(ci_atomic_t* a, int v) { a->n = v; ci_wmb(); }
38019+
38020+static inline void ci_atomic_inc(ci_atomic_t* a)
38021+{ __asm__ __volatile__("lock; incl %0" : "+m" (a->n)); }
38022+
38023+
38024+static inline void ci_atomic_dec(ci_atomic_t* a)
38025+{ __asm__ __volatile__("lock; decl %0" : "+m" (a->n)); }
38026+
38027+static inline int ci_atomic_inc_and_test(ci_atomic_t* a) {
38028+ char r;
38029+ __asm__ __volatile__("lock; incl %0; sete %1"
38030+ : "+m" (a->n), "=qm" (r));
38031+ return r;
38032+}
38033+
38034+static inline int ci_atomic_dec_and_test(ci_atomic_t* a) {
38035+ char r;
38036+ __asm__ __volatile__("lock; decl %0; sete %1"
38037+ : "+m" (a->n), "=qm" (r));
38038+ return r;
38039+}
38040+
38041+ci_inline int
38042+ci_atomic_xadd (ci_atomic_t *a, int v) {
38043+ __asm__ ("lock xadd %0, %1" : "=r" (v), "+m" (a->n) : "0" (v));
38044+ return v;
38045+}
38046+ci_inline int
38047+ci_atomic_xchg (ci_atomic_t *a, int v) {
38048+ __asm__ ("lock xchg %0, %1" : "=r" (v), "+m" (a->n) : "0" (v));
38049+ return v;
38050+}
38051+
38052+ci_inline void ci_atomic32_or(volatile ci_uint32* p, ci_uint32 mask)
38053+{ __asm__ __volatile__("lock; orl %1, %0" : "+m" (*p) : "ir" (mask)); }
38054+
38055+ci_inline void ci_atomic32_and(volatile ci_uint32* p, ci_uint32 mask)
38056+{ __asm__ __volatile__("lock; andl %1, %0" : "+m" (*p) : "ir" (mask)); }
38057+
38058+ci_inline void ci_atomic32_add(volatile ci_uint32* p, ci_uint32 v)
38059+{ __asm__ __volatile__("lock; addl %1, %0" : "+m" (*p) : "ir" (v)); }
38060+
38061+#define ci_atomic_or(a, v) ci_atomic32_or ((ci_uint32*) &(a)->n, (v))
38062+#define ci_atomic_and(a, v) ci_atomic32_and((ci_uint32*) &(a)->n, (v))
38063+#define ci_atomic_add(a, v) ci_atomic32_add((ci_uint32*) &(a)->n, (v))
38064+
38065+extern int ci_glibc_uses_nptl (void) CI_HF;
38066+extern int ci_glibc_nptl_broken(void) CI_HF;
38067+extern int ci_glibc_gs_get_is_multihreaded_offset (void) CI_HF;
38068+extern int ci_glibc_gs_is_multihreaded_offset CI_HV;
38069+
38070+#if !defined(__x86_64__)
38071+#ifdef __GLIBC__
38072+/* Returns non-zero if the calling process might be mulithreaded, returns 0 if
38073+ * it definitely isn't (i.e. if reimplementing this function for other
38074+ * architectures and platforms, you can safely just return 1).
38075+ */
38076+static inline int ci_is_multithreaded (void) {
38077+
38078+ while (1) {
38079+ if (ci_glibc_gs_is_multihreaded_offset >= 0) {
38080+ /* NPTL keeps a variable that tells us this hanging off gs (i.e. in thread-
38081+ * local storage); just return this
38082+ */
38083+ int r;
38084+ __asm__ __volatile__ ("movl %%gs:(%1), %0"
38085+ : "=r" (r)
38086+ : "r" (ci_glibc_gs_is_multihreaded_offset));
38087+ return r;
38088+ }
38089+
38090+ if (ci_glibc_gs_is_multihreaded_offset == -2) {
38091+ /* This means we've already determined that the libc version is NOT good
38092+ * for our funky "is multithreaded" hack
38093+ */
38094+ return 1;
38095+ }
38096+
38097+ /* If we get here, it means this is the first time the function has been
38098+ * called -- detect the libc version and go around again.
38099+ */
38100+ ci_glibc_gs_is_multihreaded_offset = ci_glibc_gs_get_is_multihreaded_offset ();
38101+
38102+ /* Go around again. We do the test here rather than at the top so that we go
38103+ * quicker in the common the case
38104+ */
38105+ }
38106+}
38107+
38108+#else /* def __GLIBC__ */
38109+
38110+#define ci_is_multithreaded() 1 /* ?? Is the the POSIX way of finding out */
38111+ /* whether the appication is single */
38112+ /* threaded? */
38113+
38114+#endif /* def __GLIBC__ */
38115+
38116+#else /* defined __x86_64__ */
38117+
38118+static inline int ci_is_multithreaded (void) {
38119+ /* Now easy way to tell on x86_64; so assume we're multithreaded */
38120+ return 1;
38121+}
38122+
38123+#endif /* defined __x86_64__ */
38124+
38125+
38126+/**********************************************************************
38127+ * Compare and swap.
38128+ */
38129+
38130+#define CI_HAVE_COMPARE_AND_SWAP
38131+
38132+ci_inline int ci_cas32_succeed(volatile ci_int32* p, ci_int32 oldval,
38133+ ci_int32 newval) {
38134+ char ret;
38135+ ci_int32 prevval;
38136+ __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0"
38137+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38138+ : "r"(newval), "a"(oldval));
38139+ return ret;
38140+}
38141+
38142+ci_inline int ci_cas32_fail(volatile ci_int32* p, ci_int32 oldval,
38143+ ci_int32 newval) {
38144+ char ret;
38145+ ci_int32 prevval;
38146+ __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0"
38147+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38148+ : "r"(newval), "a"(oldval));
38149+ return ret;
38150+}
38151+
38152+#ifdef __x86_64__
38153+ci_inline int ci_cas64_succeed(volatile ci_int64* p, ci_int64 oldval,
38154+ ci_int64 newval) {
38155+ char ret;
38156+ ci_int64 prevval;
38157+ __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0"
38158+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38159+ : "r"(newval), "a"(oldval));
38160+ return ret;
38161+}
38162+
38163+ci_inline int ci_cas64_fail(volatile ci_int64* p, ci_int64 oldval,
38164+ ci_int64 newval) {
38165+ char ret;
38166+ ci_int64 prevval;
38167+ __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0"
38168+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38169+ : "r"(newval), "a"(oldval));
38170+ return ret;
38171+}
38172+#endif
38173+
38174+ci_inline int ci_cas32u_succeed(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) {
38175+ char ret;
38176+ ci_uint32 prevval;
38177+ __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0"
38178+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38179+ : "r"(newval), "a"(oldval));
38180+ return ret;
38181+}
38182+
38183+ci_inline int ci_cas32u_fail(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) {
38184+ char ret;
38185+ ci_uint32 prevval;
38186+ __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0"
38187+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38188+ : "r"(newval), "a"(oldval));
38189+ return ret;
38190+}
38191+
38192+ci_inline int ci_cas64u_succeed(volatile ci_uint64* p, ci_uint64 oldval,
38193+ ci_uint64 newval) {
38194+ char ret;
38195+ ci_uint64 prevval;
38196+ __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0"
38197+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38198+ : "r"(newval), "a"(oldval));
38199+ return ret;
38200+}
38201+
38202+ci_inline int ci_cas64u_fail(volatile ci_uint64* p, ci_uint64 oldval,
38203+ ci_uint64 newval) {
38204+ char ret;
38205+ ci_uint64 prevval;
38206+ __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0"
38207+ : "=q"(ret), "+m"(*p), "=a"(prevval)
38208+ : "r"(newval), "a"(oldval));
38209+ return ret;
38210+}
38211+
38212+#ifdef __x86_64__
38213+
38214+# define ci_cas_uintptr_succeed(p,o,n) \
38215+ ci_cas64u_succeed((volatile ci_uint64*) (p), (o), (n))
38216+# define ci_cas_uintptr_fail(p,o,n) \
38217+ ci_cas64u_fail((volatile ci_uint64*) (p), (o), (n))
38218+
38219+#else
38220+
38221+# define ci_cas_uintptr_succeed(p,o,n) \
38222+ ci_cas32u_succeed((volatile ci_uint32*) (p), (o), (n))
38223+# define ci_cas_uintptr_fail(p,o,n) \
38224+ ci_cas32u_fail((volatile ci_uint32*) (p), (o), (n))
38225+
38226+#endif
38227+
38228+
38229+/**********************************************************************
38230+ * Atomic bit field.
38231+ */
38232+
38233+typedef ci_uint32 ci_bits;
38234+#define CI_BITS_N 32u
38235+
38236+#define CI_BITS_DECLARE(name, n) \
38237+ ci_bits name[((n) + CI_BITS_N - 1u) / CI_BITS_N]
38238+
38239+ci_inline void ci_bits_clear_all(volatile ci_bits* b, int n_bits)
38240+{ memset((void*) b, 0, (n_bits+CI_BITS_N-1u) / CI_BITS_N * sizeof(ci_bits)); }
38241+
38242+ci_inline void ci_bit_set(volatile ci_bits* b, int i) {
38243+ __asm__ __volatile__("lock; btsl %1, %0"
38244+ : "=m" (*b)
38245+ : "Ir" (i));
38246+}
38247+
38248+ci_inline void ci_bit_clear(volatile ci_bits* b, int i) {
38249+ __asm__ __volatile__("lock; btrl %1, %0"
38250+ : "=m" (*b)
38251+ : "Ir" (i));
38252+}
38253+
38254+ci_inline int ci_bit_test(volatile ci_bits* b, int i) {
38255+ char rc;
38256+ __asm__("btl %2, %1; setc %0"
38257+ : "=r" (rc)
38258+ : "m" (*b), "Ir" (i));
38259+ return rc;
38260+}
38261+
38262+ci_inline int ci_bit_test_and_set(volatile ci_bits* b, int i) {
38263+ char rc;
38264+ __asm__ __volatile__("lock; btsl %2, %1; setc %0"
38265+ : "=r" (rc), "+m" (*b)
38266+ : "Ir" (i));
38267+ return rc;
38268+}
38269+
38270+ci_inline int ci_bit_test_and_clear(volatile ci_bits* b, int i) {
38271+ char rc;
38272+ __asm__ __volatile__("lock; btrl %2, %1; setc %0"
38273+ : "=r" (rc), "+m" (*b)
38274+ : "Ir" (i));
38275+ return rc;
38276+}
38277+
38278+/* These mask ops only work within a single ci_bits word. */
38279+#define ci_bit_mask_set(b,m) ci_atomic32_or((b), (m))
38280+#define ci_bit_mask_clear(b,m) ci_atomic32_and((b), ~(m))
38281+
38282+
38283+/**********************************************************************
38284+ * Misc.
38285+ */
38286+
38287+#if __GNUC__ >= 3
38288+# define ci_spinloop_pause() __asm__("pause")
38289+#else
38290+# define ci_spinloop_pause() __asm__(".byte 0xf3, 0x90")
38291+#endif
38292+
38293+
38294+#define CI_HAVE_ADDC32
38295+#define ci_add_carry32(sum, v) __asm__("addl %1, %0 ;" \
38296+ "adcl $0, %0 ;" \
38297+ : "=r" (sum) \
38298+ : "g" ((ci_uint32) v), "0" (sum))
38299+
38300+
38301+#endif /* __CI_TOOLS_GCC_X86_H__ */
38302+
38303+/*! \cidoxg_end */
38304Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h
38305===================================================================
38306--- /dev/null 1970-01-01 00:00:00.000000000 +0000
38307+++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2008-02-20 09:32:49.000000000 +0100
38308@@ -0,0 +1,362 @@
38309+/****************************************************************************
38310+ * Copyright 2002-2005: Level 5 Networks Inc.
38311+ * Copyright 2005-2008: Solarflare Communications Inc,
38312+ * 9501 Jeronimo Road, Suite 250,
38313+ * Irvine, CA 92618, USA
38314+ *
38315+ * Maintained by Solarflare Communications
38316+ * <linux-xen-drivers@solarflare.com>
38317+ * <onload-dev@solarflare.com>
38318+ *
38319+ * This program is free software; you can redistribute it and/or modify it
38320+ * under the terms of the GNU General Public License version 2 as published
38321+ * by the Free Software Foundation, incorporated herein by reference.
38322+ *
38323+ * This program is distributed in the hope that it will be useful,
38324+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
38325+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38326+ * GNU General Public License for more details.
38327+ *
38328+ * You should have received a copy of the GNU General Public License
38329+ * along with this program; if not, write to the Free Software
38330+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
38331+ ****************************************************************************
38332+ */
38333+
38334+
38335+/*! \cidoxg_include_ci_tools_platform */
38336+
38337+#ifndef __CI_TOOLS_LINUX_KERNEL_H__
38338+#define __CI_TOOLS_LINUX_KERNEL_H__
38339+
38340+/**********************************************************************
38341+ * Need to know the kernel version.
38342+ */
38343+
38344+#ifndef LINUX_VERSION_CODE
38345+# include <linux/version.h>
38346+# ifndef UTS_RELEASE
38347+ /* 2.6.18 onwards defines UTS_RELEASE in a separate header */
38348+# include <linux/utsrelease.h>
38349+# endif
38350+#endif
38351+
38352+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) || \
38353+ LINUX_VERSION_CODE >= KERNEL_VERSION(2,7,0)
38354+# error "Linux 2.6 required"
38355+#endif
38356+
38357+
38358+#include <linux/slab.h> /* kmalloc / kfree */
38359+#include <linux/vmalloc.h> /* vmalloc / vfree */
38360+#include <linux/interrupt.h>/* in_interrupt() */
38361+#include <linux/in.h>
38362+#include <linux/in6.h>
38363+#include <linux/spinlock.h>
38364+#include <linux/highmem.h>
38365+#include <linux/smp_lock.h>
38366+#include <linux/ctype.h>
38367+#include <linux/uio.h>
38368+#include <asm/current.h>
38369+#include <asm/errno.h>
38370+#include <asm/kmap_types.h>
38371+#include <asm/semaphore.h>
38372+
38373+#include <ci/tools/config.h>
38374+
38375+#define ci_in_irq in_irq
38376+#define ci_in_interrupt in_interrupt
38377+#define ci_in_atomic in_atomic
38378+
38379+
38380+/**********************************************************************
38381+ * Misc stuff.
38382+ */
38383+
38384+#ifdef BUG
38385+# define CI_BOMB BUG
38386+#endif
38387+
38388+ci_inline void* __ci_alloc(size_t n)
38389+{ return kmalloc(n, (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)); }
38390+
38391+ci_inline void* __ci_atomic_alloc(size_t n)
38392+{ return kmalloc(n, GFP_ATOMIC ); }
38393+
38394+ci_inline void __ci_free(void* p) { return kfree(p); }
38395+ci_inline void* __ci_vmalloc(size_t n) { return vmalloc(n); }
38396+ci_inline void __ci_vfree(void* p) { return vfree(p); }
38397+
38398+
38399+#if CI_MEMLEAK_DEBUG_ALLOC_TABLE
38400+ #define ci_alloc(s) ci_alloc_memleak_debug (s, __FILE__, __LINE__)
38401+ #define ci_atomic_alloc(s) ci_atomic_alloc_memleak_debug(s, __FILE__, __LINE__)
38402+ #define ci_free ci_free_memleak_debug
38403+ #define ci_vmalloc(s) ci_vmalloc_memleak_debug (s, __FILE__,__LINE__)
38404+ #define ci_vfree ci_vfree_memleak_debug
38405+ #define ci_alloc_fn ci_alloc_fn_memleak_debug
38406+ #define ci_vmalloc_fn ci_vmalloc_fn_memleak_debug
38407+#else /* !CI_MEMLEAK_DEBUG_ALLOC_TABLE */
38408+ #define ci_alloc_fn __ci_alloc
38409+ #define ci_vmalloc_fn __ci_vmalloc
38410+#endif
38411+
38412+#ifndef ci_alloc
38413+ #define ci_atomic_alloc __ci_atomic_alloc
38414+ #define ci_alloc __ci_alloc
38415+ #define ci_free __ci_free
38416+ #define ci_vmalloc __ci_vmalloc
38417+ #define ci_vmalloc_fn __ci_vmalloc
38418+ #define ci_vfree __ci_vfree
38419+#endif
38420+
38421+#define ci_sprintf sprintf
38422+#define ci_vsprintf vsprintf
38423+#define ci_snprintf snprintf
38424+#define ci_vsnprintf vsnprintf
38425+#define ci_sscanf sscanf
38426+
38427+
38428+#define CI_LOG_FN_DEFAULT ci_log_syslog
38429+
38430+
38431+/*--------------------------------------------------------------------
38432+ *
38433+ * irqs_disabled - needed for kmap helpers on some kernels
38434+ *
38435+ *--------------------------------------------------------------------*/
38436+#ifdef irqs_disabled
38437+# define ci_irqs_disabled irqs_disabled
38438+#else
38439+# if defined(__i386__) | defined(__x86_64__)
38440+# define ci_irqs_disabled(x) \
38441+ ({ \
38442+ unsigned long flags; \
38443+ local_save_flags(flags); \
38444+ !(flags & (1<<9)); \
38445+ })
38446+# else
38447+# error "Need to implement irqs_disabled() for your architecture"
38448+# endif
38449+#endif
38450+
38451+
38452+/**********************************************************************
38453+ * kmap helpers.
38454+ *
38455+ * Use ci_k(un)map for code paths which are not in an atomic context.
38456+ * For atomic code you need to use ci_k(un)map_in_atomic. This will grab
38457+ * one of the per-CPU kmap slots.
38458+ *
38459+ * NB in_interrupt != in_irq. If you don't know the difference then
38460+ * don't use kmap_in_atomic
38461+ *
38462+ * 2.4 allocates kmap slots by function. We are going to re-use the
38463+ * skb module's slot - we also use the same interlock
38464+ *
38465+ * 2.6 allocates kmap slots by type as well as by function. We are
38466+ * going to use the currently (2.6.10) unsused SOFTIRQ slot
38467+ *
38468+ */
38469+
38470+ci_inline void* ci_kmap(struct page *page) {
38471+ CI_DEBUG(if( ci_in_atomic() | ci_in_interrupt() | ci_in_irq() ) BUG());
38472+ return kmap(page);
38473+}
38474+
38475+ci_inline void ci_kunmap(struct page *page) {
38476+ kunmap(page);
38477+}
38478+
38479+#define CI_KM_SLOT KM_SOFTIRQ0
38480+
38481+
38482+typedef struct semaphore ci_semaphore_t;
38483+
38484+ci_inline void
38485+ci_sem_init (ci_semaphore_t *sem, int val) {
38486+ sema_init (sem, val);
38487+}
38488+
38489+ci_inline void
38490+ci_sem_down (ci_semaphore_t *sem) {
38491+ down (sem);
38492+}
38493+
38494+ci_inline int
38495+ci_sem_trydown (ci_semaphore_t *sem) {
38496+ return down_trylock (sem);
38497+}
38498+
38499+ci_inline void
38500+ci_sem_up (ci_semaphore_t *sem) {
38501+ up (sem);
38502+}
38503+
38504+ci_inline int
38505+ci_sem_get_count(ci_semaphore_t *sem) {
38506+ return sem->count.counter;
38507+}
38508+
38509+ci_inline void* ci_kmap_in_atomic(struct page *page)
38510+{
38511+ CI_DEBUG(if( ci_in_irq() ) BUG());
38512+
38513+ /* iSCSI can call without in_interrupt() but with irqs_disabled()
38514+ and in a context that can't sleep, so we need to check that
38515+ too */
38516+ if(ci_in_interrupt() || ci_irqs_disabled())
38517+ return kmap_atomic(page, CI_KM_SLOT);
38518+ else
38519+ return kmap(page);
38520+}
38521+
38522+ci_inline void ci_kunmap_in_atomic(struct page *page, void* kaddr)
38523+{
38524+ CI_DEBUG(if( ci_in_irq() ) BUG());
38525+
38526+ /* iSCSI can call without in_interrupt() but with irqs_disabled()
38527+ and in a context that can't sleep, so we need to check that
38528+ too */
38529+ if(ci_in_interrupt() || ci_irqs_disabled())
38530+ kunmap_atomic(kaddr, CI_KM_SLOT);
38531+ else
38532+ kunmap(page);
38533+}
38534+
38535+/**********************************************************************
38536+ * spinlock implementation: used by <ci/tools/spinlock.h>
38537+ */
38538+
38539+#define CI_HAVE_SPINLOCKS
38540+
38541+typedef ci_uintptr_t ci_lock_holder_t;
38542+#define ci_lock_thisthread (ci_lock_holder_t)current
38543+#define ci_lock_no_holder (ci_lock_holder_t)NULL
38544+
38545+typedef spinlock_t ci_lock_i;
38546+typedef spinlock_t ci_irqlock_i;
38547+typedef unsigned long ci_irqlock_state_t;
38548+
38549+#define IRQLOCK_CYCLES 500000
38550+
38551+#define ci_lock_ctor_i(l) spin_lock_init(l)
38552+#define ci_lock_dtor_i(l) do{}while(0)
38553+#define ci_lock_lock_i(l) spin_lock(l)
38554+#define ci_lock_trylock_i(l) spin_trylock(l)
38555+#define ci_lock_unlock_i(l) spin_unlock(l)
38556+
38557+#define ci_irqlock_ctor_i(l) spin_lock_init(l)
38558+#define ci_irqlock_dtor_i(l) do{}while(0)
38559+#define ci_irqlock_lock_i(l,s) spin_lock_irqsave(l,*(s))
38560+#define ci_irqlock_unlock_i(l,s) spin_unlock_irqrestore(l, *(s))
38561+
38562+
38563+/**********************************************************************
38564+ * register access
38565+ */
38566+
38567+#include <asm/io.h>
38568+
38569+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
38570+typedef volatile void __iomem* ioaddr_t;
38571+#else
38572+typedef unsigned long ioaddr_t;
38573+#endif
38574+
38575+
38576+
38577+/**********************************************************************
38578+ * thread implementation -- kernel dependancies probably should be
38579+ * moved to driver/linux_kernel.h
38580+ */
38581+
38582+#define ci_linux_daemonize(name) daemonize(name)
38583+
38584+#include <linux/workqueue.h>
38585+
38586+
38587+typedef struct {
38588+ void* (*fn)(void* arg);
38589+ void* arg;
38590+ const char* name;
38591+ int thrd_id;
38592+ struct completion exit_event;
38593+ struct work_struct keventd_witem;
38594+} ci_kernel_thread_t;
38595+
38596+
38597+typedef ci_kernel_thread_t* cithread_t;
38598+
38599+
38600+extern int cithread_create(cithread_t* tid, void* (*fn)(void*), void* arg,
38601+ const char* name);
38602+extern int cithread_detach(cithread_t kt);
38603+extern int cithread_join(cithread_t kt);
38604+
38605+
38606+/* Kernel sysctl variables. */
38607+extern int sysctl_tcp_wmem[3];
38608+extern int sysctl_tcp_rmem[3];
38609+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
38610+#define LINUX_HAS_SYSCTL_MEM_MAX
38611+extern ci_uint32 sysctl_wmem_max;
38612+extern ci_uint32 sysctl_rmem_max;
38613+#endif
38614+
38615+
38616+/*--------------------------------------------------------------------
38617+ *
38618+ * ci_bigbuf_t: An abstraction of a large buffer. Needed because in the
38619+ * Linux kernel, large buffers need to be allocated with vmalloc(), whereas
38620+ * smaller buffers should use kmalloc(). This abstraction chooses the
38621+ * appropriate mechansim.
38622+ *
38623+ *--------------------------------------------------------------------*/
38624+
38625+typedef struct {
38626+ char* p;
38627+ int is_vmalloc;
38628+} ci_bigbuf_t;
38629+
38630+
38631+ci_inline int ci_bigbuf_alloc(ci_bigbuf_t* bb, size_t bytes) {
38632+ if( bytes >= CI_PAGE_SIZE && ! ci_in_atomic() ) {
38633+ bb->is_vmalloc = 1;
38634+ if( (bb->p = vmalloc(bytes)) ) return 0;
38635+ }
38636+ bb->is_vmalloc = 0;
38637+ bb->p = kmalloc(bytes, ci_in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
38638+ return bb->p ? 0 : -ENOMEM;
38639+}
38640+
38641+ci_inline void ci_bigbuf_free(ci_bigbuf_t* bb) {
38642+ if( bb->is_vmalloc ) vfree(bb->p);
38643+ else kfree(bb->p);
38644+}
38645+
38646+ci_inline char* ci_bigbuf_ptr(ci_bigbuf_t* bb)
38647+{ return bb->p; }
38648+
38649+/**********************************************************************
38650+ * struct iovec abstraction (for Windows port)
38651+ */
38652+
38653+typedef struct iovec ci_iovec;
38654+
38655+/* Accessors for buffer/length */
38656+#define CI_IOVEC_BASE(i) ((i)->iov_base)
38657+#define CI_IOVEC_LEN(i) ((i)->iov_len)
38658+
38659+/**********************************************************************
38660+ * Signals
38661+ */
38662+
38663+ci_inline void
38664+ci_send_sig(int signum)
38665+{
38666+ send_sig(signum, current, 0);
38667+}
38668+
38669+#endif /* __CI_TOOLS_LINUX_KERNEL_H__ */
38670+/*! \cidoxg_end */
38671Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/sysdep.h
38672===================================================================
38673--- /dev/null 1970-01-01 00:00:00.000000000 +0000
38674+++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/sysdep.h 2008-02-20 09:32:49.000000000 +0100
38675@@ -0,0 +1,132 @@
38676+/****************************************************************************
38677+ * Copyright 2002-2005: Level 5 Networks Inc.
38678+ * Copyright 2005-2008: Solarflare Communications Inc,
38679+ * 9501 Jeronimo Road, Suite 250,
38680+ * Irvine, CA 92618, USA
38681+ *
38682+ * Maintained by Solarflare Communications
38683+ * <linux-xen-drivers@solarflare.com>
38684+ * <onload-dev@solarflare.com>
38685+ *
38686+ * This program is free software; you can redistribute it and/or modify it
38687+ * under the terms of the GNU General Public License version 2 as published
38688+ * by the Free Software Foundation, incorporated herein by reference.
38689+ *
38690+ * This program is distributed in the hope that it will be useful,
38691+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
38692+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38693+ * GNU General Public License for more details.
38694+ *
38695+ * You should have received a copy of the GNU General Public License
38696+ * along with this program; if not, write to the Free Software
38697+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
38698+ ****************************************************************************
38699+ */
38700+
38701+/*! \cidoxg_include_ci_tools */
38702+
38703+#ifndef __CI_TOOLS_SYSDEP_H__
38704+#define __CI_TOOLS_SYSDEP_H__
38705+
38706+/* Make this header self-sufficient */
38707+#include <ci/compat.h>
38708+#include <ci/tools/log.h>
38709+#include <ci/tools/debug.h>
38710+
38711+
38712+/**********************************************************************
38713+ * Platform dependencies.
38714+ */
38715+
38716+#if defined(__KERNEL__)
38717+
38718+# if defined(__linux__)
38719+# include <ci/tools/platform/linux_kernel.h>
38720+# elif defined(_WIN32)
38721+# include <ci/tools/platform/win32_kernel.h>
38722+# elif defined(__sun__)
38723+# include <ci/tools/platform/sunos_kernel.h>
38724+# else
38725+# error Unknown platform.
38726+# endif
38727+
38728+#elif defined(_WIN32)
38729+
38730+# include <ci/tools/platform/win32.h>
38731+
38732+#elif defined(__unix__)
38733+
38734+# include <ci/tools/platform/unix.h>
38735+
38736+#else
38737+
38738+# error Unknown platform.
38739+
38740+#endif
38741+
38742+#if defined(__linux__)
38743+/*! Linux sendfile() support enable/disable. */
38744+# define CI_HAVE_SENDFILE /* provide sendfile i/f */
38745+
38746+# define CI_HAVE_OS_NOPAGE
38747+#endif
38748+
38749+#if defined(__sun__)
38750+# define CI_HAVE_SENDFILE /* provide sendfile i/f */
38751+# define CI_HAVE_SENDFILEV /* provide sendfilev i/f */
38752+
38753+# define CI_IOCTL_SENDFILE /* use efrm CI_SENDFILEV ioctl */
38754+#endif
38755+
38756+#if defined(_WIN32)
38757+typedef ci_uint32 ci_uerr_t; /* range of OS user-mode return codes */
38758+typedef ci_uint32 ci_kerr_t; /* range of OS kernel-mode return codes */
38759+#elif defined(__unix__)
38760+typedef ci_int32 ci_uerr_t; /* range of OS user-mode return codes */
38761+typedef ci_int32 ci_kerr_t; /* range of OS kernel-mode return codes */
38762+#endif
38763+
38764+
38765+/**********************************************************************
38766+ * Compiler and processor dependencies.
38767+ */
38768+
38769+#if defined(__GNUC__)
38770+
38771+#if defined(__i386__) || defined(__x86_64__)
38772+# include <ci/tools/platform/gcc_x86.h>
38773+#elif defined(__PPC__)
38774+# include <ci/tools/platform/gcc_ppc.h>
38775+#elif defined(__ia64__)
38776+# include <ci/tools/platform/gcc_ia64.h>
38777+#else
38778+# error Unknown processor.
38779+#endif
38780+
38781+#elif defined(_MSC_VER)
38782+
38783+#if defined(__i386__)
38784+# include <ci/tools/platform/msvc_x86.h>
38785+# elif defined(__x86_64__)
38786+# include <ci/tools/platform/msvc_x86_64.h>
38787+#else
38788+# error Unknown processor.
38789+#endif
38790+
38791+#elif defined(__PGI)
38792+
38793+# include <ci/tools/platform/pg_x86.h>
38794+
38795+#elif defined(__INTEL_COMPILER)
38796+
38797+/* Intel compilers v7 claim to be very gcc compatible. */
38798+# include <ci/tools/platform/gcc_x86.h>
38799+
38800+#else
38801+# error Unknown compiler.
38802+#endif
38803+
38804+
38805+#endif /* __CI_TOOLS_SYSDEP_H__ */
38806+
38807+/*! \cidoxg_end */
38808Index: head-2008-11-25/drivers/xen/sfc_netfront/Makefile
38809===================================================================
38810--- /dev/null 1970-01-01 00:00:00.000000000 +0000
38811+++ head-2008-11-25/drivers/xen/sfc_netfront/Makefile 2008-02-26 10:54:11.000000000 +0100
38812@@ -0,0 +1,11 @@
38813+EXTRA_CFLAGS += -Idrivers/xen/sfc_netfront -Idrivers/xen/sfc_netutil -Idrivers/xen/netfront
38814+EXTRA_CFLAGS += -D__ci_driver__
38815+EXTRA_CFLAGS += -Werror
38816+
38817+ifdef GCOV
38818+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
38819+endif
38820+
38821+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) := sfc_netfront.o
38822+
38823+sfc_netfront-objs := accel_msg.o accel_bufs.o accel_netfront.o accel_vi.o accel_xenbus.o accel_tso.o accel_ssr.o accel_debugfs.o falcon_event.o falcon_vi.o pt_tx.o vi_init.o
38824Index: head-2008-11-25/drivers/xen/sfc_netfront/accel.h
38825===================================================================
38826--- /dev/null 1970-01-01 00:00:00.000000000 +0000
38827+++ head-2008-11-25/drivers/xen/sfc_netfront/accel.h 2008-02-26 10:54:11.000000000 +0100
38828@@ -0,0 +1,477 @@
38829+/****************************************************************************
38830+ * Solarflare driver for Xen network acceleration
38831+ *
38832+ * Copyright 2006-2008: Solarflare Communications Inc,
38833+ * 9501 Jeronimo Road, Suite 250,
38834+ * Irvine, CA 92618, USA
38835+ *
38836+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
38837+ *
38838+ * This program is free software; you can redistribute it and/or modify it
38839+ * under the terms of the GNU General Public License version 2 as published
38840+ * by the Free Software Foundation, incorporated herein by reference.
38841+ *
38842+ * This program is distributed in the hope that it will be useful,
38843+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
38844+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38845+ * GNU General Public License for more details.
38846+ *
38847+ * You should have received a copy of the GNU General Public License
38848+ * along with this program; if not, write to the Free Software
38849+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
38850+ ****************************************************************************
38851+ */
38852+
38853+#ifndef NETFRONT_ACCEL_H
38854+#define NETFRONT_ACCEL_H
38855+
38856+#include "accel_msg_iface.h"
38857+#include "accel_cuckoo_hash.h"
38858+#include "accel_bufs.h"
38859+
38860+#include "etherfabric/ef_vi.h"
38861+
38862+#include <xen/xenbus.h>
38863+#include <xen/evtchn.h>
38864+
38865+#include <linux/kernel.h>
38866+#include <linux/list.h>
38867+
38868+enum netfront_accel_post_status {
38869+ NETFRONT_ACCEL_STATUS_GOOD,
38870+ NETFRONT_ACCEL_STATUS_BUSY,
38871+ NETFRONT_ACCEL_STATUS_CANT
38872+};
38873+
38874+#define NETFRONT_ACCEL_STATS 1
38875+#if NETFRONT_ACCEL_STATS
38876+#define NETFRONT_ACCEL_STATS_OP(x) x
38877+#else
38878+#define NETFRONT_ACCEL_STATS_OP(x)
38879+#endif
38880+
38881+
38882+enum netfront_accel_msg_state {
38883+ NETFRONT_ACCEL_MSG_NONE = 0,
38884+ NETFRONT_ACCEL_MSG_HELLO = 1,
38885+ NETFRONT_ACCEL_MSG_HW = 2
38886+};
38887+
38888+
38889+typedef struct {
38890+ u32 in_progress;
38891+ u32 total_len;
38892+ struct sk_buff *skb;
38893+} netfront_accel_jumbo_state;
38894+
38895+
38896+struct netfront_accel_ssr_state {
38897+ /** List of tracked connections. */
38898+ struct list_head conns;
38899+
38900+ /** Free efx_ssr_conn instances. */
38901+ struct list_head free_conns;
38902+};
38903+
38904+
38905+struct netfront_accel_netdev_stats {
38906+ /* Fastpath stats. */
38907+ u32 fastpath_rx_pkts;
38908+ u32 fastpath_rx_bytes;
38909+ u32 fastpath_rx_errors;
38910+ u32 fastpath_tx_pkts;
38911+ u32 fastpath_tx_bytes;
38912+ u32 fastpath_tx_errors;
38913+};
38914+
38915+
38916+struct netfront_accel_netdev_dbfs {
38917+ struct dentry *fastpath_rx_pkts;
38918+ struct dentry *fastpath_rx_bytes;
38919+ struct dentry *fastpath_rx_errors;
38920+ struct dentry *fastpath_tx_pkts;
38921+ struct dentry *fastpath_tx_bytes;
38922+ struct dentry *fastpath_tx_errors;
38923+};
38924+
38925+
38926+struct netfront_accel_stats {
38927+ /** Fast path events */
38928+ u64 fastpath_tx_busy;
38929+
38930+ /** TX DMA queue status */
38931+ u64 fastpath_tx_completions;
38932+
38933+ /** The number of events processed. */
38934+ u64 event_count;
38935+
38936+ /** Number of frame trunc events seen on fastpath */
38937+ u64 fastpath_frm_trunc;
38938+
38939+ /** Number of no rx descriptor trunc events seen on fastpath */
38940+ u64 rx_no_desc_trunc;
38941+
38942+ /** The number of misc bad events (e.g. RX_DISCARD) processed. */
38943+ u64 bad_event_count;
38944+
38945+ /** Number of events dealt with in poll loop */
38946+ u32 events_per_poll_max;
38947+ u32 events_per_poll_tx_max;
38948+ u32 events_per_poll_rx_max;
38949+
38950+ /** Largest number of concurrently outstanding tx descriptors */
38951+ u32 fastpath_tx_pending_max;
38952+
38953+ /** The number of events since the last interrupts. */
38954+ u32 event_count_since_irq;
38955+
38956+ /** The max number of events between interrupts. */
38957+ u32 events_per_irq_max;
38958+
38959+ /** The number of interrupts. */
38960+ u64 irq_count;
38961+
38962+ /** The number of useless interrupts. */
38963+ u64 useless_irq_count;
38964+
38965+ /** The number of polls scheduled. */
38966+ u64 poll_schedule_count;
38967+
38968+ /** The number of polls called. */
38969+ u64 poll_call_count;
38970+
38971+ /** The number of rechecks. */
38972+ u64 poll_reschedule_count;
38973+
38974+ /** Number of times we've called netif_stop_queue/netif_wake_queue */
38975+ u64 queue_stops;
38976+ u64 queue_wakes;
38977+
38978+ /** SSR stats */
38979+ u64 ssr_bursts;
38980+ u64 ssr_drop_stream;
38981+ u64 ssr_misorder;
38982+ u64 ssr_slow_start;
38983+ u64 ssr_merges;
38984+ u64 ssr_too_many;
38985+ u64 ssr_new_stream;
38986+};
38987+
38988+
38989+struct netfront_accel_dbfs {
38990+ struct dentry *fastpath_tx_busy;
38991+ struct dentry *fastpath_tx_completions;
38992+ struct dentry *fastpath_tx_pending_max;
38993+ struct dentry *fastpath_frm_trunc;
38994+ struct dentry *rx_no_desc_trunc;
38995+ struct dentry *event_count;
38996+ struct dentry *bad_event_count;
38997+ struct dentry *events_per_poll_max;
38998+ struct dentry *events_per_poll_rx_max;
38999+ struct dentry *events_per_poll_tx_max;
39000+ struct dentry *event_count_since_irq;
39001+ struct dentry *events_per_irq_max;
39002+ struct dentry *irq_count;
39003+ struct dentry *useless_irq_count;
39004+ struct dentry *poll_schedule_count;
39005+ struct dentry *poll_call_count;
39006+ struct dentry *poll_reschedule_count;
39007+ struct dentry *queue_stops;
39008+ struct dentry *queue_wakes;
39009+ struct dentry *ssr_bursts;
39010+ struct dentry *ssr_drop_stream;
39011+ struct dentry *ssr_misorder;
39012+ struct dentry *ssr_slow_start;
39013+ struct dentry *ssr_merges;
39014+ struct dentry *ssr_too_many;
39015+ struct dentry *ssr_new_stream;
39016+};
39017+
39018+
39019+typedef struct netfront_accel_vnic {
39020+ struct netfront_accel_vnic *next;
39021+
39022+ struct mutex vnic_mutex;
39023+
39024+ spinlock_t tx_lock;
39025+
39026+ struct netfront_accel_bufpages bufpages;
39027+ struct netfront_accel_bufinfo *rx_bufs;
39028+ struct netfront_accel_bufinfo *tx_bufs;
39029+
39030+ /** Hardware & VI state */
39031+ ef_vi vi;
39032+
39033+ ef_vi_state *vi_state;
39034+
39035+ ef_eventq_state evq_state;
39036+
39037+ void *evq_mapping;
39038+
39039+ /** Hardware dependant state */
39040+ union {
39041+ struct {
39042+ /** Falcon A or B */
39043+ enum net_accel_hw_type type;
39044+ u32 *evq_rptr;
39045+ u32 *doorbell;
39046+ void *evq_rptr_mapping;
39047+ void *doorbell_mapping;
39048+ void *txdmaq_mapping;
39049+ void *rxdmaq_mapping;
39050+ } falcon;
39051+ } hw;
39052+
39053+ /** RX DMA queue status */
39054+ u32 rx_dma_level;
39055+
39056+ /** Number of RX descriptors waiting to be pushed to the card. */
39057+ u32 rx_dma_batched;
39058+#define NETFRONT_ACCEL_RX_DESC_BATCH 16
39059+
39060+ /**
39061+ * Hash table of remote mac addresses to decide whether to try
39062+ * fast path
39063+ */
39064+ cuckoo_hash_table fastpath_table;
39065+ spinlock_t table_lock;
39066+
39067+ /** the local mac address of virtual interface we're accelerating */
39068+ u8 mac[ETH_ALEN];
39069+
39070+ int rx_pkt_stride;
39071+ int rx_skb_stride;
39072+
39073+ /**
39074+ * Keep track of fragments of jumbo packets as events are
39075+ * delivered by NIC
39076+ */
39077+ netfront_accel_jumbo_state jumbo_state;
39078+
39079+ struct net_device *net_dev;
39080+
39081+ /** These two gate the enabling of fast path operations */
39082+ int frontend_ready;
39083+ int backend_netdev_up;
39084+
39085+ int irq_enabled;
39086+ spinlock_t irq_enabled_lock;
39087+
39088+ int tx_enabled;
39089+
39090+ int poll_enabled;
39091+
39092+ /** A spare slot for a TX packet. This is treated as an extension
39093+ * of the DMA queue. */
39094+ struct sk_buff *tx_skb;
39095+
39096+ /** Keep track of fragments of SSR packets */
39097+ struct netfront_accel_ssr_state ssr_state;
39098+
39099+ struct xenbus_device *dev;
39100+
39101+ /** Event channel for messages */
39102+ int msg_channel;
39103+ int msg_channel_irq;
39104+
39105+ /** Event channel for network interrupts. */
39106+ int net_channel;
39107+ int net_channel_irq;
39108+
39109+ struct net_accel_shared_page *shared_page;
39110+
39111+ grant_ref_t ctrl_page_gnt;
39112+ grant_ref_t msg_page_gnt;
39113+
39114+ /** Message Qs, 1 each way. */
39115+ sh_msg_fifo2 to_dom0;
39116+ sh_msg_fifo2 from_dom0;
39117+
39118+ enum netfront_accel_msg_state msg_state;
39119+
39120+ /** Watch on accelstate */
39121+ struct xenbus_watch backend_accel_watch;
39122+ /** Watch on frontend's MAC address */
39123+ struct xenbus_watch mac_address_watch;
39124+
39125+ /** Work to process received irq/msg */
39126+ struct work_struct msg_from_bend;
39127+
39128+ /** Wait queue for changes in accelstate. */
39129+ wait_queue_head_t state_wait_queue;
39130+
39131+ /** The current accelstate of this driver. */
39132+ XenbusState frontend_state;
39133+
39134+ /** The most recent accelstate seen by the xenbus watch. */
39135+ XenbusState backend_state;
39136+
39137+ /** Non-zero if we should reject requests to connect. */
39138+ int removing;
39139+
39140+ /** Non-zero if the domU shared state has been initialised. */
39141+ int domU_state_is_setup;
39142+
39143+ /** Non-zero if the dom0 shared state has been initialised. */
39144+ int dom0_state_is_setup;
39145+
39146+ /* Those statistics that are added to the netdev stats */
39147+ struct netfront_accel_netdev_stats netdev_stats;
39148+ struct netfront_accel_netdev_stats stats_last_read;
39149+#ifdef CONFIG_DEBUG_FS
39150+ struct netfront_accel_netdev_dbfs netdev_dbfs;
39151+#endif
39152+
39153+ /* These statistics are internal and optional */
39154+#if NETFRONT_ACCEL_STATS
39155+ struct netfront_accel_stats stats;
39156+#ifdef CONFIG_DEBUG_FS
39157+ struct netfront_accel_dbfs dbfs;
39158+#endif
39159+#endif
39160+
39161+ /** Debufs fs dir for this interface */
39162+ struct dentry *dbfs_dir;
39163+} netfront_accel_vnic;
39164+
39165+
39166+/* Module parameters */
39167+extern unsigned sfc_netfront_max_pages;
39168+extern unsigned sfc_netfront_buffer_split;
39169+
39170+extern const char *frontend_name;
39171+extern struct netfront_accel_hooks accel_hooks;
39172+extern struct workqueue_struct *netfront_accel_workqueue;
39173+
39174+
39175+extern
39176+void netfront_accel_vi_ctor(netfront_accel_vnic *vnic);
39177+
39178+extern
39179+int netfront_accel_vi_init(netfront_accel_vnic *vnic,
39180+ struct net_accel_msg_hw *hw_msg);
39181+
39182+extern
39183+void netfront_accel_vi_dtor(netfront_accel_vnic *vnic);
39184+
39185+
39186+/**
39187+ * Add new buffers which have been registered with the NIC.
39188+ *
39189+ * @v vnic The vnic instance to process the response.
39190+ *
39191+ * The buffers contained in the message are added to the buffer pool.
39192+ */
39193+extern
39194+void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx);
39195+
39196+/**
39197+ * Put a packet on the tx DMA queue.
39198+ *
39199+ * @v vnic The vnic instance to accept the packet.
39200+ * @v skb A sk_buff to send.
39201+ *
39202+ * Attempt to send a packet. On success, the skb is owned by the DMA
39203+ * queue and will be released when the completion event arrives.
39204+ */
39205+extern enum netfront_accel_post_status
39206+netfront_accel_vi_tx_post(netfront_accel_vnic *vnic,
39207+ struct sk_buff *skb);
39208+
39209+
39210+/**
39211+ * Process events in response to an interrupt.
39212+ *
39213+ * @v vnic The vnic instance to poll.
39214+ * @v rx_packets The maximum number of rx packets to process.
39215+ * @ret rx_done The number of rx packets processed.
39216+ *
39217+ * The vnic will process events until there are no more events
39218+ * remaining or the specified number of rx packets has been processed.
39219+ * The split from the interrupt call is to allow Linux NAPI
39220+ * polling.
39221+ */
39222+extern
39223+int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets);
39224+
39225+
39226+/**
39227+ * Iterate over the fragments of a packet buffer.
39228+ *
39229+ * @v skb The packet buffer to examine.
39230+ * @v idx A variable name for the fragment index.
39231+ * @v data A variable name for the address of the fragment data.
39232+ * @v length A variable name for the fragment length.
39233+ * @v code A section of code to execute for each fragment.
39234+ *
39235+ * This macro iterates over the fragments in a packet buffer and
39236+ * executes the code for each of them.
39237+ */
39238+#define NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT(skb, frag_idx, \
39239+ frag_data, frag_len, \
39240+ code) \
39241+ do { \
39242+ int frag_idx; \
39243+ void *frag_data; \
39244+ unsigned int frag_len; \
39245+ \
39246+ frag_data = skb->data; \
39247+ frag_len = skb_headlen(skb); \
39248+ frag_idx = 0; \
39249+ while (1) { /* For each fragment */ \
39250+ code; \
39251+ if (frag_idx >= skb_shinfo(skb)->nr_frags) { \
39252+ break; \
39253+ } else { \
39254+ skb_frag_t *fragment; \
39255+ fragment = &skb_shinfo(skb)->frags[frag_idx]; \
39256+ frag_len = fragment->size; \
39257+ frag_data = ((void*)page_address(fragment->page) \
39258+ + fragment->page_offset); \
39259+ }; \
39260+ frag_idx++; \
39261+ } \
39262+ } while(0)
39263+
39264+static inline
39265+void netfront_accel_disable_net_interrupts(netfront_accel_vnic *vnic)
39266+{
39267+ mask_evtchn(vnic->net_channel);
39268+}
39269+
39270+static inline
39271+void netfront_accel_enable_net_interrupts(netfront_accel_vnic *vnic)
39272+{
39273+ unmask_evtchn(vnic->net_channel);
39274+}
39275+
39276+void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
39277+ u32 ip, u16 port, u8 protocol);
39278+
39279+/* Process an IRQ received from back end driver */
39280+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
39281+ struct pt_regs *unused);
39282+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
39283+ struct pt_regs *unused);
39284+
39285+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
39286+extern void netfront_accel_msg_from_bend(struct work_struct *context);
39287+#else
39288+extern void netfront_accel_msg_from_bend(void *context);
39289+#endif
39290+
39291+extern void vnic_stop_fastpath(netfront_accel_vnic *vnic);
39292+
39293+extern int netfront_accel_probe(struct net_device *net_dev,
39294+ struct xenbus_device *dev);
39295+extern int netfront_accel_remove(struct xenbus_device *dev);
39296+extern void netfront_accel_set_closing(netfront_accel_vnic *vnic);
39297+
39298+extern int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic);
39299+
39300+extern void netfront_accel_debugfs_init(void);
39301+extern void netfront_accel_debugfs_fini(void);
39302+extern int netfront_accel_debugfs_create(netfront_accel_vnic *vnic);
39303+extern int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic);
39304+
39305+#endif /* NETFRONT_ACCEL_H */
39306Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.c
39307===================================================================
39308--- /dev/null 1970-01-01 00:00:00.000000000 +0000
39309+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.c 2008-02-26 10:54:12.000000000 +0100
39310@@ -0,0 +1,393 @@
39311+/****************************************************************************
39312+ * Solarflare driver for Xen network acceleration
39313+ *
39314+ * Copyright 2006-2008: Solarflare Communications Inc,
39315+ * 9501 Jeronimo Road, Suite 250,
39316+ * Irvine, CA 92618, USA
39317+ *
39318+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
39319+ *
39320+ * This program is free software; you can redistribute it and/or modify it
39321+ * under the terms of the GNU General Public License version 2 as published
39322+ * by the Free Software Foundation, incorporated herein by reference.
39323+ *
39324+ * This program is distributed in the hope that it will be useful,
39325+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
39326+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39327+ * GNU General Public License for more details.
39328+ *
39329+ * You should have received a copy of the GNU General Public License
39330+ * along with this program; if not, write to the Free Software
39331+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39332+ ****************************************************************************
39333+ */
39334+
39335+#include <xen/gnttab.h>
39336+
39337+#include "accel_bufs.h"
39338+#include "accel_util.h"
39339+
39340+#include "accel.h"
39341+
39342+
39343+static int
39344+netfront_accel_alloc_buf_desc_blocks(struct netfront_accel_bufinfo *manager,
39345+ int pages)
39346+{
39347+ manager->desc_blocks =
39348+ kzalloc(sizeof(struct netfront_accel_pkt_desc *) *
39349+ NETFRONT_ACCEL_BUF_NUM_BLOCKS(pages), GFP_KERNEL);
39350+ if (manager->desc_blocks == NULL) {
39351+ return -ENOMEM;
39352+ }
39353+
39354+ return 0;
39355+}
39356+
39357+static int
39358+netfront_accel_alloc_buf_lists(struct netfront_accel_bufpages *bufpages,
39359+ int pages)
39360+{
39361+ bufpages->page_list = kmalloc(pages * sizeof(void *), GFP_KERNEL);
39362+ if (bufpages->page_list == NULL) {
39363+ return -ENOMEM;
39364+ }
39365+
39366+ bufpages->grant_list = kzalloc(pages * sizeof(grant_ref_t), GFP_KERNEL);
39367+ if (bufpages->grant_list == NULL) {
39368+ kfree(bufpages->page_list);
39369+ bufpages->page_list = NULL;
39370+ return -ENOMEM;
39371+ }
39372+
39373+ return 0;
39374+}
39375+
39376+
39377+int netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
39378+ struct netfront_accel_bufinfo *rx_manager,
39379+ struct netfront_accel_bufinfo *tx_manager,
39380+ int pages)
39381+{
39382+ int n, rc;
39383+
39384+ if ((rc = netfront_accel_alloc_buf_desc_blocks
39385+ (rx_manager, pages - (pages / sfc_netfront_buffer_split))) < 0) {
39386+ goto rx_fail;
39387+ }
39388+
39389+ if ((rc = netfront_accel_alloc_buf_desc_blocks
39390+ (tx_manager, pages / sfc_netfront_buffer_split)) < 0) {
39391+ goto tx_fail;
39392+ }
39393+
39394+ if ((rc = netfront_accel_alloc_buf_lists(bufpages, pages)) < 0) {
39395+ goto lists_fail;
39396+ }
39397+
39398+ for (n = 0; n < pages; n++) {
39399+ void *tmp = (void*)__get_free_page(GFP_KERNEL);
39400+ if (tmp == NULL)
39401+ break;
39402+
39403+ bufpages->page_list[n] = tmp;
39404+ }
39405+
39406+ if (n != pages) {
39407+ EPRINTK("%s: not enough pages: %d != %d\n", __FUNCTION__, n,
39408+ pages);
39409+ for (; n >= 0; n--)
39410+ free_page((unsigned long)(bufpages->page_list[n]));
39411+ rc = -ENOMEM;
39412+ goto pages_fail;
39413+ }
39414+
39415+ bufpages->max_pages = pages;
39416+ bufpages->page_reqs = 0;
39417+
39418+ return 0;
39419+
39420+ pages_fail:
39421+ kfree(bufpages->page_list);
39422+ kfree(bufpages->grant_list);
39423+
39424+ bufpages->page_list = NULL;
39425+ bufpages->grant_list = NULL;
39426+ lists_fail:
39427+ kfree(tx_manager->desc_blocks);
39428+ tx_manager->desc_blocks = NULL;
39429+
39430+ tx_fail:
39431+ kfree(rx_manager->desc_blocks);
39432+ rx_manager->desc_blocks = NULL;
39433+ rx_fail:
39434+ return rc;
39435+}
39436+
39437+
39438+void netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
39439+ struct netfront_accel_bufinfo *rx_manager,
39440+ struct netfront_accel_bufinfo *tx_manager)
39441+{
39442+ int i;
39443+
39444+ for (i = 0; i < bufpages->max_pages; i++) {
39445+ if (bufpages->grant_list[i] != 0)
39446+ net_accel_ungrant_page(bufpages->grant_list[i]);
39447+ free_page((unsigned long)(bufpages->page_list[i]));
39448+ }
39449+
39450+ if (bufpages->max_pages) {
39451+ kfree(bufpages->page_list);
39452+ kfree(bufpages->grant_list);
39453+ kfree(rx_manager->desc_blocks);
39454+ kfree(tx_manager->desc_blocks);
39455+ }
39456+}
39457+
39458+
39459+/*
39460+ * Allocate memory for the buffer manager and create a lock. If no
39461+ * lock is supplied its own is allocated.
39462+ */
39463+struct netfront_accel_bufinfo *netfront_accel_init_bufs(spinlock_t *lock)
39464+{
39465+ struct netfront_accel_bufinfo *res = kmalloc(sizeof(*res), GFP_KERNEL);
39466+ if (res != NULL) {
39467+ res->npages = res->nused = 0;
39468+ res->first_free = -1;
39469+
39470+ if (lock == NULL) {
39471+ res->lock = kmalloc(sizeof(*res->lock), GFP_KERNEL);
39472+ if (res->lock == NULL) {
39473+ kfree(res);
39474+ return NULL;
39475+ }
39476+ spin_lock_init(res->lock);
39477+ res->internally_locked = 1;
39478+ } else {
39479+ res->lock = lock;
39480+ res->internally_locked = 0;
39481+ }
39482+
39483+ res->desc_blocks = NULL;
39484+ }
39485+
39486+ return res;
39487+}
39488+
39489+
39490+void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *bufs)
39491+{
39492+ if (bufs->internally_locked)
39493+ kfree(bufs->lock);
39494+ kfree(bufs);
39495+}
39496+
39497+
39498+int netfront_accel_buf_map_request(struct xenbus_device *dev,
39499+ struct netfront_accel_bufpages *bufpages,
39500+ struct net_accel_msg *msg,
39501+ int pages, int offset)
39502+{
39503+ int i, mfn;
39504+ int err;
39505+
39506+ net_accel_msg_init(msg, NET_ACCEL_MSG_MAPBUF);
39507+
39508+ BUG_ON(pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
39509+
39510+ msg->u.mapbufs.pages = pages;
39511+
39512+ for (i = 0; i < msg->u.mapbufs.pages; i++) {
39513+ /*
39514+ * This can happen if we tried to send this message
39515+ * earlier but the queue was full.
39516+ */
39517+ if (bufpages->grant_list[offset+i] != 0) {
39518+ msg->u.mapbufs.grants[i] =
39519+ bufpages->grant_list[offset+i];
39520+ continue;
39521+ }
39522+
39523+ mfn = virt_to_mfn(bufpages->page_list[offset+i]);
39524+ VPRINTK("%s: Granting page %d, mfn %08x\n",
39525+ __FUNCTION__, i, mfn);
39526+
39527+ bufpages->grant_list[offset+i] =
39528+ net_accel_grant_page(dev, mfn, 0);
39529+ msg->u.mapbufs.grants[i] = bufpages->grant_list[offset+i];
39530+
39531+ if (msg->u.mapbufs.grants[i] < 0) {
39532+ EPRINTK("%s: Failed to grant buffer: %d\n",
39533+ __FUNCTION__, msg->u.mapbufs.grants[i]);
39534+ err = -EIO;
39535+ goto error;
39536+ }
39537+ }
39538+
39539+ /* This is interpreted on return as the offset in the the page_list */
39540+ msg->u.mapbufs.reqid = offset;
39541+
39542+ return 0;
39543+
39544+error:
39545+ /* Ungrant all the pages we've successfully granted. */
39546+ for (i--; i >= 0; i--) {
39547+ net_accel_ungrant_page(bufpages->grant_list[offset+i]);
39548+ bufpages->grant_list[offset+i] = 0;
39549+ }
39550+ return err;
39551+}
39552+
39553+
39554+/* Process a response to a buffer request. */
39555+int netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
39556+ struct netfront_accel_bufinfo *manager,
39557+ struct net_accel_msg *msg)
39558+{
39559+ int msg_pages, page_offset, i, newtot;
39560+ int old_block_count, new_block_count;
39561+ u32 msg_buf;
39562+ unsigned long flags;
39563+
39564+ VPRINTK("%s: manager %p msg %p\n", __FUNCTION__, manager, msg);
39565+
39566+ BUG_ON(msg->id != (NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY));
39567+
39568+ msg_pages = msg->u.mapbufs.pages;
39569+ msg_buf = msg->u.mapbufs.buf;
39570+ page_offset = msg->u.mapbufs.reqid;
39571+
39572+ spin_lock_irqsave(manager->lock, flags);
39573+ newtot = manager->npages + msg_pages;
39574+ old_block_count =
39575+ (manager->npages + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
39576+ NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
39577+ new_block_count =
39578+ (newtot + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
39579+ NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
39580+
39581+ for (i = old_block_count; i < new_block_count; i++) {
39582+ struct netfront_accel_pkt_desc *block;
39583+ if (manager->desc_blocks[i] != NULL) {
39584+ VPRINTK("Not needed\n");
39585+ continue;
39586+ }
39587+ block = kzalloc(NETFRONT_ACCEL_BUFS_PER_BLOCK *
39588+ sizeof(netfront_accel_pkt_desc), GFP_ATOMIC);
39589+ if (block == NULL) {
39590+ spin_unlock_irqrestore(manager->lock, flags);
39591+ return -ENOMEM;
39592+ }
39593+ manager->desc_blocks[i] = block;
39594+ }
39595+ for (i = manager->npages; i < newtot; i++) {
39596+ int k, j = i - manager->npages;
39597+ int block_num;
39598+ int block_idx;
39599+ struct netfront_accel_pkt_desc *pkt;
39600+
39601+ block_num = i >> NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
39602+ block_idx = (NETFRONT_ACCEL_BUFS_PER_PAGE*i)
39603+ & (NETFRONT_ACCEL_BUFS_PER_BLOCK-1);
39604+
39605+ pkt = manager->desc_blocks[block_num] + block_idx;
39606+
39607+ for (k = 0; k < NETFRONT_ACCEL_BUFS_PER_PAGE; k++) {
39608+ BUG_ON(page_offset + j >= bufpages->max_pages);
39609+
39610+ pkt[k].buf_id = NETFRONT_ACCEL_BUFS_PER_PAGE * i + k;
39611+ pkt[k].pkt_kva = bufpages->page_list[page_offset + j] +
39612+ (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * k;
39613+ pkt[k].pkt_buff_addr = msg_buf +
39614+ (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) *
39615+ (NETFRONT_ACCEL_BUFS_PER_PAGE * j + k);
39616+ pkt[k].next_free = manager->first_free;
39617+ manager->first_free = pkt[k].buf_id;
39618+ *(int*)(pkt[k].pkt_kva) = pkt[k].buf_id;
39619+
39620+ VPRINTK("buf %d desc %p kva %p buffaddr %x\n",
39621+ pkt[k].buf_id, &(pkt[k]), pkt[k].pkt_kva,
39622+ pkt[k].pkt_buff_addr);
39623+ }
39624+ }
39625+ manager->npages = newtot;
39626+ spin_unlock_irqrestore(manager->lock, flags);
39627+ VPRINTK("Added %d pages. Total is now %d\n", msg_pages,
39628+ manager->npages);
39629+ return 0;
39630+}
39631+
39632+
39633+netfront_accel_pkt_desc *
39634+netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id)
39635+{
39636+ netfront_accel_pkt_desc *pkt;
39637+ int block_num = id >> NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT;
39638+ int block_idx = id & (NETFRONT_ACCEL_BUFS_PER_BLOCK - 1);
39639+ BUG_ON(id >= manager->npages * NETFRONT_ACCEL_BUFS_PER_PAGE);
39640+ BUG_ON(block_idx >= NETFRONT_ACCEL_BUFS_PER_BLOCK);
39641+ pkt = manager->desc_blocks[block_num] + block_idx;
39642+ return pkt;
39643+}
39644+
39645+
39646+/* Allocate a buffer from the buffer manager */
39647+netfront_accel_pkt_desc *
39648+netfront_accel_buf_get(struct netfront_accel_bufinfo *manager)
39649+{
39650+ int bufno = -1;
39651+ netfront_accel_pkt_desc *buf = NULL;
39652+ unsigned long flags = 0;
39653+
39654+ /* Any spare? */
39655+ if (manager->first_free == -1)
39656+ return NULL;
39657+ /* Take lock */
39658+ if (manager->internally_locked)
39659+ spin_lock_irqsave(manager->lock, flags);
39660+ bufno = manager->first_free;
39661+ if (bufno != -1) {
39662+ buf = netfront_accel_buf_find(manager, bufno);
39663+ manager->first_free = buf->next_free;
39664+ manager->nused++;
39665+ }
39666+ /* Release lock */
39667+ if (manager->internally_locked)
39668+ spin_unlock_irqrestore(manager->lock, flags);
39669+
39670+ /* Tell the world */
39671+ VPRINTK("Allocated buffer %i, buffaddr %x\n", bufno,
39672+ buf->pkt_buff_addr);
39673+
39674+ return buf;
39675+}
39676+
39677+
39678+/* Release a buffer back to the buffer manager pool */
39679+int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, u16 id)
39680+{
39681+ netfront_accel_pkt_desc *buf = netfront_accel_buf_find(manager, id);
39682+ unsigned long flags = 0;
39683+ unsigned was_empty = 0;
39684+ int bufno = id;
39685+
39686+ VPRINTK("Freeing buffer %i\n", id);
39687+ BUG_ON(id == (u16)-1);
39688+
39689+ if (manager->internally_locked)
39690+ spin_lock_irqsave(manager->lock, flags);
39691+
39692+ if (manager->first_free == -1)
39693+ was_empty = 1;
39694+
39695+ buf->next_free = manager->first_free;
39696+ manager->first_free = bufno;
39697+ manager->nused--;
39698+
39699+ if (manager->internally_locked)
39700+ spin_unlock_irqrestore(manager->lock, flags);
39701+
39702+ return was_empty;
39703+}
39704Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.h
39705===================================================================
39706--- /dev/null 1970-01-01 00:00:00.000000000 +0000
39707+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.h 2008-02-20 09:32:49.000000000 +0100
39708@@ -0,0 +1,181 @@
39709+/****************************************************************************
39710+ * Solarflare driver for Xen network acceleration
39711+ *
39712+ * Copyright 2006-2008: Solarflare Communications Inc,
39713+ * 9501 Jeronimo Road, Suite 250,
39714+ * Irvine, CA 92618, USA
39715+ *
39716+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
39717+ *
39718+ * This program is free software; you can redistribute it and/or modify it
39719+ * under the terms of the GNU General Public License version 2 as published
39720+ * by the Free Software Foundation, incorporated herein by reference.
39721+ *
39722+ * This program is distributed in the hope that it will be useful,
39723+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
39724+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39725+ * GNU General Public License for more details.
39726+ *
39727+ * You should have received a copy of the GNU General Public License
39728+ * along with this program; if not, write to the Free Software
39729+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39730+ ****************************************************************************
39731+ */
39732+
39733+#ifndef NETFRONT_ACCEL_BUFS_H
39734+#define NETFRONT_ACCEL_BUFS_H
39735+
39736+#include <linux/skbuff.h>
39737+#include <linux/spinlock.h>
39738+#include <xen/xenbus.h>
39739+
39740+#include "accel_msg_iface.h"
39741+
39742+
39743+/*! Buffer descriptor structure */
39744+typedef struct netfront_accel_pkt_desc {
39745+ int buf_id;
39746+ u32 pkt_buff_addr;
39747+ void *pkt_kva;
39748+ /* This is the socket buffer currently married to this buffer */
39749+ struct sk_buff *skb;
39750+ int next_free;
39751+} netfront_accel_pkt_desc;
39752+
39753+
39754+#define NETFRONT_ACCEL_DEFAULT_BUF_PAGES (384)
39755+#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT (4)
39756+#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK \
39757+ (1 << (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT))
39758+#define NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT (1)
39759+#define NETFRONT_ACCEL_BUFS_PER_PAGE \
39760+ (1 << (NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT))
39761+#define NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT \
39762+ (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT + \
39763+ NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT)
39764+#define NETFRONT_ACCEL_BUFS_PER_BLOCK \
39765+ (1 << NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT)
39766+#define NETFRONT_ACCEL_BUF_NUM_BLOCKS(max_pages) \
39767+ (((max_pages)+NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK-1) / \
39768+ NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK)
39769+
39770+/*! Buffer management structure. */
39771+struct netfront_accel_bufinfo {
39772+ /* number added to this manager */
39773+ unsigned npages;
39774+ /* number currently used from this manager */
39775+ unsigned nused;
39776+
39777+ int first_free;
39778+
39779+ int internally_locked;
39780+ spinlock_t *lock;
39781+
39782+ /*
39783+ * array of pointers (length NETFRONT_ACCEL_BUF_NUM_BLOCKS) to
39784+ * pkt descs
39785+ */
39786+ struct netfront_accel_pkt_desc **desc_blocks;
39787+};
39788+
39789+
39790+struct netfront_accel_bufpages {
39791+ /* length of lists of pages/grants */
39792+ int max_pages;
39793+ /* list of pages allocated for network buffers */
39794+ void **page_list;
39795+ /* list of grants for the above pages */
39796+ grant_ref_t *grant_list;
39797+
39798+ /* number of page requests that have been made */
39799+ unsigned page_reqs;
39800+};
39801+
39802+
39803+/*! Allocate memory for the buffer manager, set up locks etc.
39804+ * Optionally takes a lock to use, if not supplied it makes its own.
39805+ *
39806+ * \return pointer to netfront_accel_bufinfo structure that represents the
39807+ * buffer manager
39808+ */
39809+extern struct netfront_accel_bufinfo *
39810+netfront_accel_init_bufs(spinlock_t *lock);
39811+
39812+/*! Allocate memory for the buffers
39813+ */
39814+extern int
39815+netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
39816+ struct netfront_accel_bufinfo *rx_res,
39817+ struct netfront_accel_bufinfo *tx_res,
39818+ int pages);
39819+extern void
39820+netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
39821+ struct netfront_accel_bufinfo *rx_res,
39822+ struct netfront_accel_bufinfo *tx_res);
39823+
39824+/*! Release memory for the buffer manager, buffers, etc.
39825+ *
39826+ * \param manager pointer to netfront_accel_bufinfo structure that
39827+ * represents the buffer manager
39828+ */
39829+extern void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *manager);
39830+
39831+/*! Release a buffer.
39832+ *
39833+ * \param manager The buffer manager which owns the buffer.
39834+ * \param id The buffer identifier.
39835+ */
39836+extern int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager,
39837+ u16 id);
39838+
39839+/*! Get the packet descriptor associated with a buffer id.
39840+ *
39841+ * \param manager The buffer manager which owns the buffer.
39842+ * \param id The buffer identifier.
39843+ *
39844+ * The returned value is the packet descriptor for this buffer.
39845+ */
39846+extern netfront_accel_pkt_desc *
39847+netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id);
39848+
39849+
39850+/*! Fill out a message request for some buffers to be mapped by the
39851+ * back end driver
39852+ *
39853+ * \param manager The buffer manager
39854+ * \param msg Pointer to an ef_msg to complete.
39855+ * \return 0 on success
39856+ */
39857+extern int
39858+netfront_accel_buf_map_request(struct xenbus_device *dev,
39859+ struct netfront_accel_bufpages *bufpages,
39860+ struct net_accel_msg *msg,
39861+ int pages, int offset);
39862+
39863+/*! Process a response to a buffer request.
39864+ *
39865+ * Deal with a received message from the back end in response to our
39866+ * request for buffers
39867+ *
39868+ * \param manager The buffer manager
39869+ * \param msg The received message from the back end describing new
39870+ * buffers
39871+ * \return 0 on success
39872+ */
39873+extern int
39874+netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
39875+ struct netfront_accel_bufinfo *manager,
39876+ struct net_accel_msg *msg);
39877+
39878+
39879+/*! Allocate a buffer from the buffer manager
39880+ *
39881+ * \param manager The buffer manager data structure
39882+ * \param id On exit, the id of the buffer allocated
39883+ * \return Pointer to buffer descriptor.
39884+ */
39885+struct netfront_accel_pkt_desc *
39886+netfront_accel_buf_get(struct netfront_accel_bufinfo *manager);
39887+
39888+#endif /* NETFRONT_ACCEL_BUFS_H */
39889+
39890Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_debugfs.c
39891===================================================================
39892--- /dev/null 1970-01-01 00:00:00.000000000 +0000
39893+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_debugfs.c 2008-02-26 10:54:12.000000000 +0100
39894@@ -0,0 +1,211 @@
39895+/****************************************************************************
39896+ * Solarflare driver for Xen network acceleration
39897+ *
39898+ * Copyright 2006-2008: Solarflare Communications Inc,
39899+ * 9501 Jeronimo Road, Suite 250,
39900+ * Irvine, CA 92618, USA
39901+ *
39902+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
39903+ *
39904+ * This program is free software; you can redistribute it and/or modify it
39905+ * under the terms of the GNU General Public License version 2 as published
39906+ * by the Free Software Foundation, incorporated herein by reference.
39907+ *
39908+ * This program is distributed in the hope that it will be useful,
39909+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
39910+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39911+ * GNU General Public License for more details.
39912+ *
39913+ * You should have received a copy of the GNU General Public License
39914+ * along with this program; if not, write to the Free Software
39915+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39916+ ****************************************************************************
39917+ */
39918+
39919+#include <linux/fs.h>
39920+#include <linux/debugfs.h>
39921+
39922+#include "accel.h"
39923+
39924+#if defined(CONFIG_DEBUG_FS)
39925+static struct dentry *sfc_debugfs_root = NULL;
39926+#endif
39927+
39928+void netfront_accel_debugfs_init(void)
39929+{
39930+#if defined(CONFIG_DEBUG_FS)
39931+ sfc_debugfs_root = debugfs_create_dir(frontend_name, NULL);
39932+#endif
39933+}
39934+
39935+
39936+void netfront_accel_debugfs_fini(void)
39937+{
39938+#if defined(CONFIG_DEBUG_FS)
39939+ if (sfc_debugfs_root)
39940+ debugfs_remove(sfc_debugfs_root);
39941+#endif
39942+}
39943+
39944+
39945+int netfront_accel_debugfs_create(netfront_accel_vnic *vnic)
39946+{
39947+#if defined(CONFIG_DEBUG_FS)
39948+ if (sfc_debugfs_root == NULL)
39949+ return -ENOENT;
39950+
39951+ vnic->dbfs_dir = debugfs_create_dir(vnic->net_dev->name,
39952+ sfc_debugfs_root);
39953+ if (vnic->dbfs_dir == NULL)
39954+ return -ENOMEM;
39955+
39956+ vnic->netdev_dbfs.fastpath_rx_pkts = debugfs_create_u32
39957+ ("fastpath_rx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
39958+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_pkts);
39959+ vnic->netdev_dbfs.fastpath_rx_bytes = debugfs_create_u32
39960+ ("fastpath_rx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
39961+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_bytes);
39962+ vnic->netdev_dbfs.fastpath_rx_errors = debugfs_create_u32
39963+ ("fastpath_rx_errors", S_IRUSR | S_IRGRP | S_IROTH,
39964+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_errors);
39965+ vnic->netdev_dbfs.fastpath_tx_pkts = debugfs_create_u32
39966+ ("fastpath_tx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
39967+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_pkts);
39968+ vnic->netdev_dbfs.fastpath_tx_bytes = debugfs_create_u32
39969+ ("fastpath_tx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
39970+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_bytes);
39971+ vnic->netdev_dbfs.fastpath_tx_errors = debugfs_create_u32
39972+ ("fastpath_tx_errors", S_IRUSR | S_IRGRP | S_IROTH,
39973+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_errors);
39974+
39975+#if NETFRONT_ACCEL_STATS
39976+ vnic->dbfs.irq_count = debugfs_create_u64
39977+ ("irq_count", S_IRUSR | S_IRGRP | S_IROTH,
39978+ vnic->dbfs_dir, &vnic->stats.irq_count);
39979+ vnic->dbfs.useless_irq_count = debugfs_create_u64
39980+ ("useless_irq_count", S_IRUSR | S_IRGRP | S_IROTH,
39981+ vnic->dbfs_dir, &vnic->stats.useless_irq_count);
39982+ vnic->dbfs.poll_schedule_count = debugfs_create_u64
39983+ ("poll_schedule_count", S_IRUSR | S_IRGRP | S_IROTH,
39984+ vnic->dbfs_dir, &vnic->stats.poll_schedule_count);
39985+ vnic->dbfs.poll_call_count = debugfs_create_u64
39986+ ("poll_call_count", S_IRUSR | S_IRGRP | S_IROTH,
39987+ vnic->dbfs_dir, &vnic->stats.poll_call_count);
39988+ vnic->dbfs.poll_reschedule_count = debugfs_create_u64
39989+ ("poll_reschedule_count", S_IRUSR | S_IRGRP | S_IROTH,
39990+ vnic->dbfs_dir, &vnic->stats.poll_reschedule_count);
39991+ vnic->dbfs.queue_stops = debugfs_create_u64
39992+ ("queue_stops", S_IRUSR | S_IRGRP | S_IROTH,
39993+ vnic->dbfs_dir, &vnic->stats.queue_stops);
39994+ vnic->dbfs.queue_wakes = debugfs_create_u64
39995+ ("queue_wakes", S_IRUSR | S_IRGRP | S_IROTH,
39996+ vnic->dbfs_dir, &vnic->stats.queue_wakes);
39997+ vnic->dbfs.ssr_bursts = debugfs_create_u64
39998+ ("ssr_bursts", S_IRUSR | S_IRGRP | S_IROTH,
39999+ vnic->dbfs_dir, &vnic->stats.ssr_bursts);
40000+ vnic->dbfs.ssr_drop_stream = debugfs_create_u64
40001+ ("ssr_drop_stream", S_IRUSR | S_IRGRP | S_IROTH,
40002+ vnic->dbfs_dir, &vnic->stats.ssr_drop_stream);
40003+ vnic->dbfs.ssr_misorder = debugfs_create_u64
40004+ ("ssr_misorder", S_IRUSR | S_IRGRP | S_IROTH,
40005+ vnic->dbfs_dir, &vnic->stats.ssr_misorder);
40006+ vnic->dbfs.ssr_slow_start = debugfs_create_u64
40007+ ("ssr_slow_start", S_IRUSR | S_IRGRP | S_IROTH,
40008+ vnic->dbfs_dir, &vnic->stats.ssr_slow_start);
40009+ vnic->dbfs.ssr_merges = debugfs_create_u64
40010+ ("ssr_merges", S_IRUSR | S_IRGRP | S_IROTH,
40011+ vnic->dbfs_dir, &vnic->stats.ssr_merges);
40012+ vnic->dbfs.ssr_too_many = debugfs_create_u64
40013+ ("ssr_too_many", S_IRUSR | S_IRGRP | S_IROTH,
40014+ vnic->dbfs_dir, &vnic->stats.ssr_too_many);
40015+ vnic->dbfs.ssr_new_stream = debugfs_create_u64
40016+ ("ssr_new_stream", S_IRUSR | S_IRGRP | S_IROTH,
40017+ vnic->dbfs_dir, &vnic->stats.ssr_new_stream);
40018+
40019+ vnic->dbfs.fastpath_tx_busy = debugfs_create_u64
40020+ ("fastpath_tx_busy", S_IRUSR | S_IRGRP | S_IROTH,
40021+ vnic->dbfs_dir, &vnic->stats.fastpath_tx_busy);
40022+ vnic->dbfs.fastpath_tx_completions = debugfs_create_u64
40023+ ("fastpath_tx_completions", S_IRUSR | S_IRGRP | S_IROTH,
40024+ vnic->dbfs_dir, &vnic->stats.fastpath_tx_completions);
40025+ vnic->dbfs.fastpath_tx_pending_max = debugfs_create_u32
40026+ ("fastpath_tx_pending_max", S_IRUSR | S_IRGRP | S_IROTH,
40027+ vnic->dbfs_dir, &vnic->stats.fastpath_tx_pending_max);
40028+ vnic->dbfs.event_count = debugfs_create_u64
40029+ ("event_count", S_IRUSR | S_IRGRP | S_IROTH,
40030+ vnic->dbfs_dir, &vnic->stats.event_count);
40031+ vnic->dbfs.bad_event_count = debugfs_create_u64
40032+ ("bad_event_count", S_IRUSR | S_IRGRP | S_IROTH,
40033+ vnic->dbfs_dir, &vnic->stats.bad_event_count);
40034+ vnic->dbfs.event_count_since_irq = debugfs_create_u32
40035+ ("event_count_since_irq", S_IRUSR | S_IRGRP | S_IROTH,
40036+ vnic->dbfs_dir, &vnic->stats.event_count_since_irq);
40037+ vnic->dbfs.events_per_irq_max = debugfs_create_u32
40038+ ("events_per_irq_max", S_IRUSR | S_IRGRP | S_IROTH,
40039+ vnic->dbfs_dir, &vnic->stats.events_per_irq_max);
40040+ vnic->dbfs.fastpath_frm_trunc = debugfs_create_u64
40041+ ("fastpath_frm_trunc", S_IRUSR | S_IRGRP | S_IROTH,
40042+ vnic->dbfs_dir, &vnic->stats.fastpath_frm_trunc);
40043+ vnic->dbfs.rx_no_desc_trunc = debugfs_create_u64
40044+ ("rx_no_desc_trunc", S_IRUSR | S_IRGRP | S_IROTH,
40045+ vnic->dbfs_dir, &vnic->stats.rx_no_desc_trunc);
40046+ vnic->dbfs.events_per_poll_max = debugfs_create_u32
40047+ ("events_per_poll_max", S_IRUSR | S_IRGRP | S_IROTH,
40048+ vnic->dbfs_dir, &vnic->stats.events_per_poll_max);
40049+ vnic->dbfs.events_per_poll_rx_max = debugfs_create_u32
40050+ ("events_per_poll_rx_max", S_IRUSR | S_IRGRP | S_IROTH,
40051+ vnic->dbfs_dir, &vnic->stats.events_per_poll_rx_max);
40052+ vnic->dbfs.events_per_poll_tx_max = debugfs_create_u32
40053+ ("events_per_poll_tx_max", S_IRUSR | S_IRGRP | S_IROTH,
40054+ vnic->dbfs_dir, &vnic->stats.events_per_poll_tx_max);
40055+#endif
40056+#endif
40057+ return 0;
40058+}
40059+
40060+
40061+int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic)
40062+{
40063+#if defined(CONFIG_DEBUG_FS)
40064+ if (vnic->dbfs_dir != NULL) {
40065+ debugfs_remove(vnic->netdev_dbfs.fastpath_rx_pkts);
40066+ debugfs_remove(vnic->netdev_dbfs.fastpath_rx_bytes);
40067+ debugfs_remove(vnic->netdev_dbfs.fastpath_rx_errors);
40068+ debugfs_remove(vnic->netdev_dbfs.fastpath_tx_pkts);
40069+ debugfs_remove(vnic->netdev_dbfs.fastpath_tx_bytes);
40070+ debugfs_remove(vnic->netdev_dbfs.fastpath_tx_errors);
40071+
40072+#if NETFRONT_ACCEL_STATS
40073+ debugfs_remove(vnic->dbfs.irq_count);
40074+ debugfs_remove(vnic->dbfs.useless_irq_count);
40075+ debugfs_remove(vnic->dbfs.poll_schedule_count);
40076+ debugfs_remove(vnic->dbfs.poll_call_count);
40077+ debugfs_remove(vnic->dbfs.poll_reschedule_count);
40078+ debugfs_remove(vnic->dbfs.queue_stops);
40079+ debugfs_remove(vnic->dbfs.queue_wakes);
40080+ debugfs_remove(vnic->dbfs.ssr_bursts);
40081+ debugfs_remove(vnic->dbfs.ssr_drop_stream);
40082+ debugfs_remove(vnic->dbfs.ssr_misorder);
40083+ debugfs_remove(vnic->dbfs.ssr_slow_start);
40084+ debugfs_remove(vnic->dbfs.ssr_merges);
40085+ debugfs_remove(vnic->dbfs.ssr_too_many);
40086+ debugfs_remove(vnic->dbfs.ssr_new_stream);
40087+
40088+ debugfs_remove(vnic->dbfs.fastpath_tx_busy);
40089+ debugfs_remove(vnic->dbfs.fastpath_tx_completions);
40090+ debugfs_remove(vnic->dbfs.fastpath_tx_pending_max);
40091+ debugfs_remove(vnic->dbfs.event_count);
40092+ debugfs_remove(vnic->dbfs.bad_event_count);
40093+ debugfs_remove(vnic->dbfs.event_count_since_irq);
40094+ debugfs_remove(vnic->dbfs.events_per_irq_max);
40095+ debugfs_remove(vnic->dbfs.fastpath_frm_trunc);
40096+ debugfs_remove(vnic->dbfs.rx_no_desc_trunc);
40097+ debugfs_remove(vnic->dbfs.events_per_poll_max);
40098+ debugfs_remove(vnic->dbfs.events_per_poll_rx_max);
40099+ debugfs_remove(vnic->dbfs.events_per_poll_tx_max);
40100+#endif
40101+ debugfs_remove(vnic->dbfs_dir);
40102+ }
40103+#endif
40104+ return 0;
40105+}
40106Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_msg.c
40107===================================================================
40108--- /dev/null 1970-01-01 00:00:00.000000000 +0000
40109+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_msg.c 2008-02-26 10:54:12.000000000 +0100
40110@@ -0,0 +1,566 @@
40111+/****************************************************************************
40112+ * Solarflare driver for Xen network acceleration
40113+ *
40114+ * Copyright 2006-2008: Solarflare Communications Inc,
40115+ * 9501 Jeronimo Road, Suite 250,
40116+ * Irvine, CA 92618, USA
40117+ *
40118+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
40119+ *
40120+ * This program is free software; you can redistribute it and/or modify it
40121+ * under the terms of the GNU General Public License version 2 as published
40122+ * by the Free Software Foundation, incorporated herein by reference.
40123+ *
40124+ * This program is distributed in the hope that it will be useful,
40125+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
40126+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40127+ * GNU General Public License for more details.
40128+ *
40129+ * You should have received a copy of the GNU General Public License
40130+ * along with this program; if not, write to the Free Software
40131+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
40132+ ****************************************************************************
40133+ */
40134+
40135+#include <linux/stddef.h>
40136+#include <linux/errno.h>
40137+
40138+#include <xen/xenbus.h>
40139+
40140+#include "accel.h"
40141+#include "accel_msg_iface.h"
40142+#include "accel_util.h"
40143+#include "accel_bufs.h"
40144+
40145+#include "netfront.h" /* drivers/xen/netfront/netfront.h */
40146+
40147+static void vnic_start_interrupts(netfront_accel_vnic *vnic)
40148+{
40149+ unsigned long flags;
40150+
40151+ /* Prime our interrupt */
40152+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40153+ if (!netfront_accel_vi_enable_interrupts(vnic)) {
40154+ /* Cripes, that was quick, better pass it up */
40155+ netfront_accel_disable_net_interrupts(vnic);
40156+ vnic->irq_enabled = 0;
40157+ NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
40158+ netif_rx_schedule(vnic->net_dev);
40159+ } else {
40160+ /*
40161+ * Nothing yet, make sure we get interrupts through
40162+ * back end
40163+ */
40164+ vnic->irq_enabled = 1;
40165+ netfront_accel_enable_net_interrupts(vnic);
40166+ }
40167+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40168+}
40169+
40170+
40171+static void vnic_stop_interrupts(netfront_accel_vnic *vnic)
40172+{
40173+ unsigned long flags;
40174+
40175+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40176+ netfront_accel_disable_net_interrupts(vnic);
40177+ vnic->irq_enabled = 0;
40178+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40179+}
40180+
40181+
40182+static void vnic_start_fastpath(netfront_accel_vnic *vnic)
40183+{
40184+ struct net_device *net_dev = vnic->net_dev;
40185+ unsigned long flags;
40186+
40187+ DPRINTK("%s\n", __FUNCTION__);
40188+
40189+ spin_lock_irqsave(&vnic->tx_lock, flags);
40190+ vnic->tx_enabled = 1;
40191+ spin_unlock_irqrestore(&vnic->tx_lock, flags);
40192+
40193+ netif_poll_disable(net_dev);
40194+ vnic->poll_enabled = 1;
40195+ netif_poll_enable(net_dev);
40196+
40197+ vnic_start_interrupts(vnic);
40198+}
40199+
40200+
40201+void vnic_stop_fastpath(netfront_accel_vnic *vnic)
40202+{
40203+ struct net_device *net_dev = vnic->net_dev;
40204+ struct netfront_info *np = (struct netfront_info *)netdev_priv(net_dev);
40205+ unsigned long flags1, flags2;
40206+
40207+ DPRINTK("%s\n", __FUNCTION__);
40208+
40209+ vnic_stop_interrupts(vnic);
40210+
40211+ spin_lock_irqsave(&vnic->tx_lock, flags1);
40212+ vnic->tx_enabled = 0;
40213+ spin_lock_irqsave(&np->tx_lock, flags2);
40214+ if (vnic->tx_skb != NULL) {
40215+ dev_kfree_skb_any(vnic->tx_skb);
40216+ vnic->tx_skb = NULL;
40217+ if (netfront_check_queue_ready(net_dev)) {
40218+ netif_wake_queue(net_dev);
40219+ NETFRONT_ACCEL_STATS_OP
40220+ (vnic->stats.queue_wakes++);
40221+ }
40222+ }
40223+ spin_unlock_irqrestore(&np->tx_lock, flags2);
40224+ spin_unlock_irqrestore(&vnic->tx_lock, flags1);
40225+
40226+ /* Must prevent polls and hold lock to modify poll_enabled */
40227+ netif_poll_disable(net_dev);
40228+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags1);
40229+ vnic->poll_enabled = 0;
40230+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1);
40231+ netif_poll_enable(net_dev);
40232+}
40233+
40234+
40235+static void netfront_accel_interface_up(netfront_accel_vnic *vnic)
40236+{
40237+
40238+ if (!vnic->backend_netdev_up) {
40239+ vnic->backend_netdev_up = 1;
40240+
40241+ if (vnic->frontend_ready)
40242+ vnic_start_fastpath(vnic);
40243+ }
40244+}
40245+
40246+
40247+static void netfront_accel_interface_down(netfront_accel_vnic *vnic)
40248+{
40249+
40250+ if (vnic->backend_netdev_up) {
40251+ vnic->backend_netdev_up = 0;
40252+
40253+ if (vnic->frontend_ready)
40254+ vnic_stop_fastpath(vnic);
40255+ }
40256+}
40257+
40258+
40259+static int vnic_add_bufs(netfront_accel_vnic *vnic,
40260+ struct net_accel_msg *msg)
40261+{
40262+ int rc, offset;
40263+ struct netfront_accel_bufinfo *bufinfo;
40264+
40265+ BUG_ON(msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
40266+
40267+ offset = msg->u.mapbufs.reqid;
40268+
40269+ if (offset < vnic->bufpages.max_pages -
40270+ (vnic->bufpages.max_pages / sfc_netfront_buffer_split)) {
40271+ bufinfo = vnic->rx_bufs;
40272+ } else
40273+ bufinfo = vnic->tx_bufs;
40274+
40275+ /* Queue up some Rx buffers to start things off. */
40276+ if ((rc = netfront_accel_add_bufs(&vnic->bufpages, bufinfo, msg)) == 0) {
40277+ netfront_accel_vi_add_bufs(vnic, bufinfo == vnic->rx_bufs);
40278+
40279+ if (offset + msg->u.mapbufs.pages == vnic->bufpages.max_pages) {
40280+ VPRINTK("%s: got all buffers back\n", __FUNCTION__);
40281+ vnic->frontend_ready = 1;
40282+ if (vnic->backend_netdev_up)
40283+ vnic_start_fastpath(vnic);
40284+ } else {
40285+ VPRINTK("%s: got buffers back %d %d\n", __FUNCTION__,
40286+ offset, msg->u.mapbufs.pages);
40287+ }
40288+ }
40289+
40290+ return rc;
40291+}
40292+
40293+
40294+/* The largest [o] such that (1u << o) <= n. Requires n > 0. */
40295+
40296+inline unsigned log2_le(unsigned long n) {
40297+ unsigned order = 1;
40298+ while ((1ul << order) <= n) ++order;
40299+ return (order - 1);
40300+}
40301+
40302+static int vnic_send_buffer_requests(netfront_accel_vnic *vnic,
40303+ struct netfront_accel_bufpages *bufpages)
40304+{
40305+ int pages, offset, rc = 0, sent = 0;
40306+ struct net_accel_msg msg;
40307+
40308+ while (bufpages->page_reqs < bufpages->max_pages) {
40309+ offset = bufpages->page_reqs;
40310+
40311+ pages = pow2(log2_le(bufpages->max_pages -
40312+ bufpages->page_reqs));
40313+ pages = pages < NET_ACCEL_MSG_MAX_PAGE_REQ ?
40314+ pages : NET_ACCEL_MSG_MAX_PAGE_REQ;
40315+
40316+ BUG_ON(offset < 0);
40317+ BUG_ON(pages <= 0);
40318+
40319+ rc = netfront_accel_buf_map_request(vnic->dev, bufpages,
40320+ &msg, pages, offset);
40321+ if (rc == 0) {
40322+ rc = net_accel_msg_send(vnic->shared_page,
40323+ &vnic->to_dom0, &msg);
40324+ if (rc < 0) {
40325+ VPRINTK("%s: queue full, stopping for now\n",
40326+ __FUNCTION__);
40327+ break;
40328+ }
40329+ sent++;
40330+ } else {
40331+ EPRINTK("%s: problem with grant, stopping for now\n",
40332+ __FUNCTION__);
40333+ break;
40334+ }
40335+
40336+ bufpages->page_reqs += pages;
40337+ }
40338+
40339+ if (sent)
40340+ net_accel_msg_notify(vnic->msg_channel_irq);
40341+
40342+ return rc;
40343+}
40344+
40345+
40346+/*
40347+ * In response to dom0 saying "my queue is full", we reply with this
40348+ * when it is no longer full
40349+ */
40350+inline void vnic_set_queue_not_full(netfront_accel_vnic *vnic)
40351+{
40352+
40353+ if (test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
40354+ (unsigned long *)&vnic->shared_page->aflags))
40355+ notify_remote_via_irq(vnic->msg_channel_irq);
40356+ else
40357+ VPRINTK("queue not full bit already set, not signalling\n");
40358+}
40359+
40360+/*
40361+ * Notify dom0 that the queue we want to use is full, it should
40362+ * respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course
40363+ */
40364+inline void vnic_set_queue_full(netfront_accel_vnic *vnic)
40365+{
40366+
40367+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
40368+ (unsigned long *)&vnic->shared_page->aflags))
40369+ notify_remote_via_irq(vnic->msg_channel_irq);
40370+ else
40371+ VPRINTK("queue full bit already set, not signalling\n");
40372+}
40373+
40374+
40375+static int vnic_check_hello_version(unsigned version)
40376+{
40377+ if (version > NET_ACCEL_MSG_VERSION) {
40378+ /* Newer protocol, we must refuse */
40379+ return -EPROTO;
40380+ }
40381+
40382+ if (version < NET_ACCEL_MSG_VERSION) {
40383+ /*
40384+ * We are newer, so have discretion to accept if we
40385+ * wish. For now however, just reject
40386+ */
40387+ return -EPROTO;
40388+ }
40389+
40390+ BUG_ON(version != NET_ACCEL_MSG_VERSION);
40391+ return 0;
40392+}
40393+
40394+
40395+static int vnic_process_hello_msg(netfront_accel_vnic *vnic,
40396+ struct net_accel_msg *msg)
40397+{
40398+ int err = 0;
40399+ unsigned pages = sfc_netfront_max_pages;
40400+
40401+ if (vnic_check_hello_version(msg->u.hello.version) < 0) {
40402+ msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY
40403+ | NET_ACCEL_MSG_ERROR;
40404+ msg->u.hello.version = NET_ACCEL_MSG_VERSION;
40405+ } else {
40406+ vnic->backend_netdev_up
40407+ = vnic->shared_page->net_dev_up;
40408+
40409+ msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY;
40410+ msg->u.hello.version = NET_ACCEL_MSG_VERSION;
40411+ if (msg->u.hello.max_pages &&
40412+ msg->u.hello.max_pages < pages)
40413+ pages = msg->u.hello.max_pages;
40414+ msg->u.hello.max_pages = pages;
40415+
40416+ /* Half of pages for rx, half for tx */
40417+ err = netfront_accel_alloc_buffer_mem(&vnic->bufpages,
40418+ vnic->rx_bufs,
40419+ vnic->tx_bufs,
40420+ pages);
40421+ if (err)
40422+ msg->id |= NET_ACCEL_MSG_ERROR;
40423+ }
40424+
40425+ /* Send reply */
40426+ net_accel_msg_reply_notify(vnic->shared_page, vnic->msg_channel_irq,
40427+ &vnic->to_dom0, msg);
40428+ return err;
40429+}
40430+
40431+
40432+static int vnic_process_localmac_msg(netfront_accel_vnic *vnic,
40433+ struct net_accel_msg *msg)
40434+{
40435+ unsigned long flags;
40436+ cuckoo_hash_mac_key key;
40437+
40438+ if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
40439+ DPRINTK("MAC has moved, could be local: " MAC_FMT "\n",
40440+ MAC_ARG(msg->u.localmac.mac));
40441+ key = cuckoo_mac_to_key(msg->u.localmac.mac);
40442+ spin_lock_irqsave(&vnic->table_lock, flags);
40443+ /* Try to remove it, not a big deal if not there */
40444+ cuckoo_hash_remove(&vnic->fastpath_table,
40445+ (cuckoo_hash_key *)&key);
40446+ spin_unlock_irqrestore(&vnic->table_lock, flags);
40447+ }
40448+
40449+ return 0;
40450+}
40451+
40452+
40453+static
40454+int vnic_process_rx_msg(netfront_accel_vnic *vnic,
40455+ struct net_accel_msg *msg)
40456+{
40457+ int err;
40458+
40459+ switch (msg->id) {
40460+ case NET_ACCEL_MSG_HELLO:
40461+ /* Hello, reply with Reply */
40462+ DPRINTK("got Hello, with version %.8x\n",
40463+ msg->u.hello.version);
40464+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_NONE);
40465+ err = vnic_process_hello_msg(vnic, msg);
40466+ if (err == 0)
40467+ vnic->msg_state = NETFRONT_ACCEL_MSG_HELLO;
40468+ break;
40469+ case NET_ACCEL_MSG_SETHW:
40470+ /* Hardware info message */
40471+ DPRINTK("got H/W info\n");
40472+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HELLO);
40473+ err = netfront_accel_vi_init(vnic, &msg->u.hw);
40474+ if (err == 0)
40475+ vnic->msg_state = NETFRONT_ACCEL_MSG_HW;
40476+ break;
40477+ case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY:
40478+ VPRINTK("Got mapped buffers back\n");
40479+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
40480+ err = vnic_add_bufs(vnic, msg);
40481+ break;
40482+ case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_ERROR:
40483+ /* No buffers. Can't use the fast path. */
40484+ EPRINTK("Got mapped buffers error. Cannot accelerate.\n");
40485+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
40486+ err = -EIO;
40487+ break;
40488+ case NET_ACCEL_MSG_LOCALMAC:
40489+ /* Should be add, remove not currently used */
40490+ EPRINTK_ON(!(msg->u.localmac.flags & NET_ACCEL_MSG_ADD));
40491+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
40492+ err = vnic_process_localmac_msg(vnic, msg);
40493+ break;
40494+ default:
40495+ EPRINTK("Huh? Message code is 0x%x\n", msg->id);
40496+ err = -EPROTO;
40497+ break;
40498+ }
40499+
40500+ return err;
40501+}
40502+
40503+
40504+/* Process an IRQ received from back end driver */
40505+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
40506+void netfront_accel_msg_from_bend(struct work_struct *context)
40507+#else
40508+void netfront_accel_msg_from_bend(void *context)
40509+#endif
40510+{
40511+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
40512+ netfront_accel_vnic *vnic =
40513+ container_of(context, netfront_accel_vnic, msg_from_bend);
40514+#else
40515+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
40516+#endif
40517+ struct net_accel_msg msg;
40518+ int err, queue_was_full = 0;
40519+
40520+ mutex_lock(&vnic->vnic_mutex);
40521+
40522+ /*
40523+ * This happens when the shared pages have been unmapped but
40524+ * the workqueue has yet to be flushed
40525+ */
40526+ if (!vnic->dom0_state_is_setup)
40527+ goto unlock_out;
40528+
40529+ while ((vnic->shared_page->aflags & NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK)
40530+ != 0) {
40531+ if (vnic->shared_page->aflags &
40532+ NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL) {
40533+ /* We've been told there may now be space. */
40534+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
40535+ (unsigned long *)&vnic->shared_page->aflags);
40536+ }
40537+
40538+ if (vnic->shared_page->aflags &
40539+ NET_ACCEL_MSG_AFLAGS_QUEUE0FULL) {
40540+ /*
40541+ * There will be space at the end of this
40542+ * function if we can make any.
40543+ */
40544+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
40545+ (unsigned long *)&vnic->shared_page->aflags);
40546+ queue_was_full = 1;
40547+ }
40548+
40549+ if (vnic->shared_page->aflags &
40550+ NET_ACCEL_MSG_AFLAGS_NETUPDOWN) {
40551+ DPRINTK("%s: net interface change\n", __FUNCTION__);
40552+ clear_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
40553+ (unsigned long *)&vnic->shared_page->aflags);
40554+ if (vnic->shared_page->net_dev_up)
40555+ netfront_accel_interface_up(vnic);
40556+ else
40557+ netfront_accel_interface_down(vnic);
40558+ }
40559+ }
40560+
40561+ /* Pull msg out of shared memory */
40562+ while ((err = net_accel_msg_recv(vnic->shared_page, &vnic->from_dom0,
40563+ &msg)) == 0) {
40564+ err = vnic_process_rx_msg(vnic, &msg);
40565+
40566+ if (err != 0)
40567+ goto done;
40568+ }
40569+
40570+ /*
40571+ * Send any pending buffer map request messages that we can,
40572+ * and mark domU->dom0 as full if necessary.
40573+ */
40574+ if (vnic->msg_state == NETFRONT_ACCEL_MSG_HW &&
40575+ vnic->bufpages.page_reqs < vnic->bufpages.max_pages) {
40576+ if (vnic_send_buffer_requests(vnic, &vnic->bufpages) == -ENOSPC)
40577+ vnic_set_queue_full(vnic);
40578+ }
40579+
40580+ /*
40581+ * If there are no messages then this is not an error. It
40582+ * just means that we've finished processing the queue.
40583+ */
40584+ if (err == -ENOENT)
40585+ err = 0;
40586+ done:
40587+ /* We will now have made space in the dom0->domU queue if we can */
40588+ if (queue_was_full)
40589+ vnic_set_queue_not_full(vnic);
40590+
40591+ if (err != 0) {
40592+ EPRINTK("%s returned %d\n", __FUNCTION__, err);
40593+ netfront_accel_set_closing(vnic);
40594+ }
40595+
40596+ unlock_out:
40597+ mutex_unlock(&vnic->vnic_mutex);
40598+
40599+ return;
40600+}
40601+
40602+
40603+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
40604+ struct pt_regs *unused)
40605+{
40606+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
40607+ VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
40608+
40609+ queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
40610+
40611+ return IRQ_HANDLED;
40612+}
40613+
40614+/* Process an interrupt received from the NIC via backend */
40615+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
40616+ struct pt_regs *unused)
40617+{
40618+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
40619+ struct net_device *net_dev = vnic->net_dev;
40620+ unsigned long flags;
40621+
40622+ VPRINTK("net irq %d from device %s\n", irq, vnic->dev->nodename);
40623+
40624+ NETFRONT_ACCEL_STATS_OP(vnic->stats.irq_count++);
40625+
40626+ BUG_ON(net_dev==NULL);
40627+
40628+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40629+ if (vnic->irq_enabled) {
40630+ netfront_accel_disable_net_interrupts(vnic);
40631+ vnic->irq_enabled = 0;
40632+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40633+
40634+#if NETFRONT_ACCEL_STATS
40635+ vnic->stats.poll_schedule_count++;
40636+ if (vnic->stats.event_count_since_irq >
40637+ vnic->stats.events_per_irq_max)
40638+ vnic->stats.events_per_irq_max =
40639+ vnic->stats.event_count_since_irq;
40640+ vnic->stats.event_count_since_irq = 0;
40641+#endif
40642+ netif_rx_schedule(net_dev);
40643+ }
40644+ else {
40645+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40646+ NETFRONT_ACCEL_STATS_OP(vnic->stats.useless_irq_count++);
40647+ DPRINTK("%s: irq when disabled\n", __FUNCTION__);
40648+ }
40649+
40650+ return IRQ_HANDLED;
40651+}
40652+
40653+
40654+void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
40655+ u32 ip, u16 port, u8 protocol)
40656+{
40657+ unsigned long lock_state;
40658+ struct net_accel_msg *msg;
40659+
40660+ msg = net_accel_msg_start_send(vnic->shared_page, &vnic->to_dom0,
40661+ &lock_state);
40662+
40663+ if (msg == NULL)
40664+ return;
40665+
40666+ net_accel_msg_init(msg, NET_ACCEL_MSG_FASTPATH);
40667+ msg->u.fastpath.flags = NET_ACCEL_MSG_REMOVE;
40668+ memcpy(msg->u.fastpath.mac, mac, ETH_ALEN);
40669+
40670+ msg->u.fastpath.port = port;
40671+ msg->u.fastpath.ip = ip;
40672+ msg->u.fastpath.proto = protocol;
40673+
40674+ net_accel_msg_complete_send_notify(vnic->shared_page, &vnic->to_dom0,
40675+ &lock_state, vnic->msg_channel_irq);
40676+}
40677Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_netfront.c
40678===================================================================
40679--- /dev/null 1970-01-01 00:00:00.000000000 +0000
40680+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_netfront.c 2008-02-26 10:54:12.000000000 +0100
40681@@ -0,0 +1,319 @@
40682+/****************************************************************************
40683+ * Solarflare driver for Xen network acceleration
40684+ *
40685+ * Copyright 2006-2008: Solarflare Communications Inc,
40686+ * 9501 Jeronimo Road, Suite 250,
40687+ * Irvine, CA 92618, USA
40688+ *
40689+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
40690+ *
40691+ * This program is free software; you can redistribute it and/or modify it
40692+ * under the terms of the GNU General Public License version 2 as published
40693+ * by the Free Software Foundation, incorporated herein by reference.
40694+ *
40695+ * This program is distributed in the hope that it will be useful,
40696+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
40697+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40698+ * GNU General Public License for more details.
40699+ *
40700+ * You should have received a copy of the GNU General Public License
40701+ * along with this program; if not, write to the Free Software
40702+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
40703+ ****************************************************************************
40704+ */
40705+
40706+#include <linux/skbuff.h>
40707+#include <linux/netdevice.h>
40708+
40709+/* drivers/xen/netfront/netfront.h */
40710+#include "netfront.h"
40711+
40712+#include "accel.h"
40713+#include "accel_bufs.h"
40714+#include "accel_util.h"
40715+#include "accel_msg_iface.h"
40716+#include "accel_ssr.h"
40717+
40718+#ifdef EFX_GCOV
40719+#include "gcov.h"
40720+#endif
40721+
40722+#define NETFRONT_ACCEL_VNIC_FROM_NETDEV(_nd) \
40723+ ((netfront_accel_vnic *)((struct netfront_info *)netdev_priv(net_dev))->accel_priv)
40724+
40725+static int netfront_accel_netdev_start_xmit(struct sk_buff *skb,
40726+ struct net_device *net_dev)
40727+{
40728+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40729+ struct netfront_info *np =
40730+ (struct netfront_info *)netdev_priv(net_dev);
40731+ int handled, rc;
40732+ unsigned long flags1, flags2;
40733+
40734+ BUG_ON(vnic == NULL);
40735+
40736+ /* Take our tx lock and hold for the duration */
40737+ spin_lock_irqsave(&vnic->tx_lock, flags1);
40738+
40739+ if (!vnic->tx_enabled) {
40740+ rc = 0;
40741+ goto unlock_out;
40742+ }
40743+
40744+ handled = netfront_accel_vi_tx_post(vnic, skb);
40745+ if (handled == NETFRONT_ACCEL_STATUS_BUSY) {
40746+ BUG_ON(vnic->net_dev != net_dev);
40747+ DPRINTK("%s stopping queue\n", __FUNCTION__);
40748+
40749+ /* Netfront's lock protects tx_skb */
40750+ spin_lock_irqsave(&np->tx_lock, flags2);
40751+ BUG_ON(vnic->tx_skb != NULL);
40752+ vnic->tx_skb = skb;
40753+ netif_stop_queue(net_dev);
40754+ spin_unlock_irqrestore(&np->tx_lock, flags2);
40755+
40756+ NETFRONT_ACCEL_STATS_OP(vnic->stats.queue_stops++);
40757+ }
40758+
40759+ if (handled == NETFRONT_ACCEL_STATUS_CANT)
40760+ rc = 0;
40761+ else
40762+ rc = 1;
40763+
40764+unlock_out:
40765+ spin_unlock_irqrestore(&vnic->tx_lock, flags1);
40766+
40767+ return rc;
40768+}
40769+
40770+
40771+static int netfront_accel_netdev_poll(struct net_device *net_dev, int *budget)
40772+{
40773+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40774+ int rx_allowed = *budget, rx_done;
40775+
40776+ BUG_ON(vnic == NULL);
40777+
40778+ /* Can check this without lock as modifier excludes polls */
40779+ if (!vnic->poll_enabled)
40780+ return 0;
40781+
40782+ rx_done = netfront_accel_vi_poll(vnic, rx_allowed);
40783+ *budget -= rx_done;
40784+
40785+ NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_call_count++);
40786+
40787+ VPRINTK("%s: done %d allowed %d\n",
40788+ __FUNCTION__, rx_done, rx_allowed);
40789+
40790+ netfront_accel_ssr_end_of_burst(vnic, &vnic->ssr_state);
40791+
40792+ if (rx_done < rx_allowed) {
40793+ return 0; /* Done */
40794+ }
40795+
40796+ NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_reschedule_count++);
40797+
40798+ return 1; /* More to do. */
40799+}
40800+
40801+
40802+/*
40803+ * Process request from netfront to start napi interrupt
40804+ * mode. (i.e. enable interrupts as it's finished polling)
40805+ */
40806+static int netfront_accel_start_napi_interrupts(struct net_device *net_dev)
40807+{
40808+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40809+ unsigned long flags;
40810+
40811+ BUG_ON(vnic == NULL);
40812+
40813+ /*
40814+ * Can check this without lock as writer excludes poll before
40815+ * modifying
40816+ */
40817+ if (!vnic->poll_enabled)
40818+ return 0;
40819+
40820+ if (!netfront_accel_vi_enable_interrupts(vnic)) {
40821+ /*
40822+ * There was something there, tell caller we had
40823+ * something to do.
40824+ */
40825+ return 1;
40826+ }
40827+
40828+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40829+ vnic->irq_enabled = 1;
40830+ netfront_accel_enable_net_interrupts(vnic);
40831+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40832+
40833+ return 0;
40834+}
40835+
40836+
40837+/*
40838+ * Process request from netfront to stop napi interrupt
40839+ * mode. (i.e. disable interrupts as it's starting to poll
40840+ */
40841+static void netfront_accel_stop_napi_interrupts(struct net_device *net_dev)
40842+{
40843+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40844+ unsigned long flags;
40845+
40846+ BUG_ON(vnic == NULL);
40847+
40848+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40849+
40850+ if (!vnic->poll_enabled) {
40851+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40852+ return;
40853+ }
40854+
40855+ netfront_accel_disable_net_interrupts(vnic);
40856+ vnic->irq_enabled = 0;
40857+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40858+}
40859+
40860+
40861+static int netfront_accel_check_ready(struct net_device *net_dev)
40862+{
40863+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40864+
40865+ BUG_ON(vnic == NULL);
40866+
40867+ /* This is protected by netfront's lock */
40868+ return vnic->tx_skb == NULL;
40869+}
40870+
40871+
40872+static int netfront_accel_get_stats(struct net_device *net_dev,
40873+ struct net_device_stats *stats)
40874+{
40875+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40876+ struct netfront_accel_netdev_stats now;
40877+
40878+ BUG_ON(vnic == NULL);
40879+
40880+ now.fastpath_rx_pkts = vnic->netdev_stats.fastpath_rx_pkts;
40881+ now.fastpath_rx_bytes = vnic->netdev_stats.fastpath_rx_bytes;
40882+ now.fastpath_rx_errors = vnic->netdev_stats.fastpath_rx_errors;
40883+ now.fastpath_tx_pkts = vnic->netdev_stats.fastpath_tx_pkts;
40884+ now.fastpath_tx_bytes = vnic->netdev_stats.fastpath_tx_bytes;
40885+ now.fastpath_tx_errors = vnic->netdev_stats.fastpath_tx_errors;
40886+
40887+ stats->rx_packets += (now.fastpath_rx_pkts -
40888+ vnic->stats_last_read.fastpath_rx_pkts);
40889+ stats->rx_bytes += (now.fastpath_rx_bytes -
40890+ vnic->stats_last_read.fastpath_rx_bytes);
40891+ stats->rx_errors += (now.fastpath_rx_errors -
40892+ vnic->stats_last_read.fastpath_rx_errors);
40893+ stats->tx_packets += (now.fastpath_tx_pkts -
40894+ vnic->stats_last_read.fastpath_tx_pkts);
40895+ stats->tx_bytes += (now.fastpath_tx_bytes -
40896+ vnic->stats_last_read.fastpath_tx_bytes);
40897+ stats->tx_errors += (now.fastpath_tx_errors -
40898+ vnic->stats_last_read.fastpath_tx_errors);
40899+
40900+ vnic->stats_last_read = now;
40901+
40902+ return 0;
40903+}
40904+
40905+
40906+struct netfront_accel_hooks accel_hooks = {
40907+ .new_device = &netfront_accel_probe,
40908+ .remove = &netfront_accel_remove,
40909+ .netdev_poll = &netfront_accel_netdev_poll,
40910+ .start_xmit = &netfront_accel_netdev_start_xmit,
40911+ .start_napi_irq = &netfront_accel_start_napi_interrupts,
40912+ .stop_napi_irq = &netfront_accel_stop_napi_interrupts,
40913+ .check_ready = &netfront_accel_check_ready,
40914+ .get_stats = &netfront_accel_get_stats
40915+};
40916+
40917+
40918+unsigned sfc_netfront_max_pages = NETFRONT_ACCEL_DEFAULT_BUF_PAGES;
40919+module_param_named (max_pages, sfc_netfront_max_pages, uint, 0644);
40920+MODULE_PARM_DESC(max_pages, "Number of buffer pages to request");
40921+
40922+unsigned sfc_netfront_buffer_split = 2;
40923+module_param_named (buffer_split, sfc_netfront_buffer_split, uint, 0644);
40924+MODULE_PARM_DESC(buffer_split,
40925+ "Fraction of buffers to use for TX, rest for RX");
40926+
40927+
40928+const char *frontend_name = "sfc_netfront";
40929+
40930+struct workqueue_struct *netfront_accel_workqueue;
40931+
40932+static int __init netfront_accel_init(void)
40933+{
40934+ int rc;
40935+#ifdef EFX_GCOV
40936+ gcov_provider_init(THIS_MODULE);
40937+#endif
40938+
40939+ /*
40940+ * If we're running on dom0, netfront hasn't initialised
40941+ * itself, so we need to keep away
40942+ */
40943+ if (is_initial_xendomain())
40944+ return 0;
40945+
40946+ if (!is_pow2(sizeof(struct net_accel_msg)))
40947+ EPRINTK("%s: bad structure size\n", __FUNCTION__);
40948+
40949+ netfront_accel_workqueue = create_workqueue(frontend_name);
40950+
40951+ netfront_accel_debugfs_init();
40952+
40953+ rc = netfront_accelerator_loaded(NETFRONT_ACCEL_VERSION,
40954+ frontend_name, &accel_hooks);
40955+
40956+ if (rc < 0) {
40957+ EPRINTK("Xen netfront accelerator version mismatch\n");
40958+ return -EINVAL;
40959+ }
40960+
40961+ if (rc > 0) {
40962+ /*
40963+ * In future may want to add backwards compatibility
40964+ * and accept certain subsets of previous versions
40965+ */
40966+ EPRINTK("Xen netfront accelerator version mismatch\n");
40967+ return -EINVAL;
40968+ }
40969+
40970+ return 0;
40971+}
40972+module_init(netfront_accel_init);
40973+
40974+static void __exit netfront_accel_exit(void)
40975+{
40976+ if (is_initial_xendomain())
40977+ return;
40978+
40979+ DPRINTK("%s: unhooking\n", __FUNCTION__);
40980+
40981+ /* Unhook from normal netfront */
40982+ netfront_accelerator_stop(frontend_name);
40983+
40984+ DPRINTK("%s: done\n", __FUNCTION__);
40985+
40986+ netfront_accel_debugfs_fini();
40987+
40988+ flush_workqueue(netfront_accel_workqueue);
40989+
40990+ destroy_workqueue(netfront_accel_workqueue);
40991+
40992+#ifdef EFX_GCOV
40993+ gcov_provider_fini(THIS_MODULE);
40994+#endif
40995+ return;
40996+}
40997+module_exit(netfront_accel_exit);
40998+
40999+MODULE_LICENSE("GPL");
41000+
41001Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.c
41002===================================================================
41003--- /dev/null 1970-01-01 00:00:00.000000000 +0000
41004+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.c 2008-02-20 09:32:49.000000000 +0100
41005@@ -0,0 +1,308 @@
41006+/****************************************************************************
41007+ * Solarflare driver for Xen network acceleration
41008+ *
41009+ * Copyright 2006-2008: Solarflare Communications Inc,
41010+ * 9501 Jeronimo Road, Suite 250,
41011+ * Irvine, CA 92618, USA
41012+ *
41013+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41014+ *
41015+ * This program is free software; you can redistribute it and/or modify it
41016+ * under the terms of the GNU General Public License version 2 as published
41017+ * by the Free Software Foundation, incorporated herein by reference.
41018+ *
41019+ * This program is distributed in the hope that it will be useful,
41020+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
41021+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41022+ * GNU General Public License for more details.
41023+ *
41024+ * You should have received a copy of the GNU General Public License
41025+ * along with this program; if not, write to the Free Software
41026+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41027+ ****************************************************************************
41028+ */
41029+
41030+#include <linux/socket.h>
41031+#include <linux/in.h>
41032+#include <linux/ip.h>
41033+#include <linux/tcp.h>
41034+#include <linux/list.h>
41035+#include <net/ip.h>
41036+#include <net/checksum.h>
41037+
41038+#include "accel.h"
41039+#include "accel_util.h"
41040+#include "accel_bufs.h"
41041+
41042+#include "accel_ssr.h"
41043+
41044+static inline int list_valid(struct list_head *lh) {
41045+ return(lh->next != NULL);
41046+}
41047+
41048+static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic,
41049+ struct netfront_accel_ssr_state *st,
41050+ struct netfront_accel_ssr_conn *c);
41051+
41052+/** Construct an efx_ssr_state.
41053+ *
41054+ * @v st The SSR state (per channel per port)
41055+ * @v port The port.
41056+ */
41057+void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) {
41058+ unsigned i;
41059+
41060+ INIT_LIST_HEAD(&st->conns);
41061+ INIT_LIST_HEAD(&st->free_conns);
41062+ for (i = 0; i < 8; ++i) {
41063+ struct netfront_accel_ssr_conn *c =
41064+ kmalloc(sizeof(*c), GFP_KERNEL);
41065+ if (c == NULL) break;
41066+ c->n_in_order_pkts = 0;
41067+ c->skb = NULL;
41068+ list_add(&c->link, &st->free_conns);
41069+ }
41070+
41071+}
41072+
41073+
41074+/** Destructor for an efx_ssr_state.
41075+ *
41076+ * @v st The SSR state (per channel per port)
41077+ */
41078+void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
41079+ struct netfront_accel_ssr_state *st) {
41080+ struct netfront_accel_ssr_conn *c;
41081+
41082+ /* Return cleanly if efx_ssr_init() not previously called */
41083+ BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns));
41084+ if (! list_valid(&st->conns))
41085+ return;
41086+
41087+ while ( ! list_empty(&st->free_conns)) {
41088+ c = list_entry(st->free_conns.prev,
41089+ struct netfront_accel_ssr_conn, link);
41090+ list_del(&c->link);
41091+ BUG_ON(c->skb != NULL);
41092+ kfree(c);
41093+ }
41094+ while ( ! list_empty(&st->conns)) {
41095+ c = list_entry(st->conns.prev,
41096+ struct netfront_accel_ssr_conn, link);
41097+ list_del(&c->link);
41098+ if (c->skb)
41099+ netfront_accel_ssr_deliver(vnic, st, c);
41100+ kfree(c);
41101+ }
41102+}
41103+
41104+
41105+/** Calc IP checksum and deliver to the OS
41106+ *
41107+ * @v st The SSR state (per channel per port)
41108+ * @v c The SSR connection state
41109+ */
41110+static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic,
41111+ struct netfront_accel_ssr_state *st,
41112+ struct netfront_accel_ssr_conn *c) {
41113+ BUG_ON(c->skb == NULL);
41114+
41115+ /*
41116+ * If we've chained packets together, recalculate the IP
41117+ * checksum.
41118+ */
41119+ if (skb_shinfo(c->skb)->frag_list) {
41120+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts);
41121+ c->iph->check = 0;
41122+ c->iph->check = ip_fast_csum((unsigned char *) c->iph,
41123+ c->iph->ihl);
41124+ }
41125+
41126+ VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len);
41127+
41128+ netif_receive_skb(c->skb);
41129+ c->skb = NULL;
41130+}
41131+
41132+
41133+/** Push held skbs down into network stack.
41134+ *
41135+ * @v st SSR state
41136+ *
41137+ * Only called if we are tracking one or more connections.
41138+ */
41139+void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
41140+ struct netfront_accel_ssr_state *st) {
41141+ struct netfront_accel_ssr_conn *c;
41142+
41143+ BUG_ON(list_empty(&st->conns));
41144+
41145+ list_for_each_entry(c, &st->conns, link)
41146+ if (c->skb)
41147+ netfront_accel_ssr_deliver(vnic, st, c);
41148+
41149+ /* Time-out connections that have received no traffic for 20ms. */
41150+ c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn,
41151+ link);
41152+ if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) {
41153+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream);
41154+ list_del(&c->link);
41155+ list_add(&c->link, &st->free_conns);
41156+ }
41157+}
41158+
41159+
41160+/** Process SKB and decide whether to dispatch it to the stack now or
41161+ * later.
41162+ *
41163+ * @v st SSR state
41164+ * @v skb SKB to exmaine
41165+ * @ret rc 0 => deliver SKB to kernel now, otherwise the SKB belongs
41166+ * us.
41167+ */
41168+int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic,
41169+ struct netfront_accel_ssr_state *st,
41170+ struct sk_buff *skb) {
41171+ int data_length, dont_merge;
41172+ struct netfront_accel_ssr_conn *c;
41173+ struct iphdr *iph;
41174+ struct tcphdr *th;
41175+ unsigned th_seq;
41176+
41177+ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
41178+ BUG_ON(skb->next != NULL);
41179+
41180+ /* We're not interested if it isn't TCP over IPv4. */
41181+ iph = (struct iphdr *) skb->data;
41182+ if (skb->protocol != htons(ETH_P_IP) ||
41183+ iph->protocol != IPPROTO_TCP) {
41184+ return 0;
41185+ }
41186+
41187+ /* Ignore segments that fail csum or are fragmented. */
41188+ if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) |
41189+ (iph->frag_off & htons(IP_MF | IP_OFFSET)))) {
41190+ return 0;
41191+ }
41192+
41193+ th = (struct tcphdr*)(skb->data + iph->ihl * 4);
41194+ data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4;
41195+ th_seq = ntohl(th->seq);
41196+ dont_merge = (data_length == 0) | th->urg | th->syn | th->rst;
41197+
41198+ list_for_each_entry(c, &st->conns, link) {
41199+ if ((c->saddr - iph->saddr) |
41200+ (c->daddr - iph->daddr) |
41201+ (c->source - th->source) |
41202+ (c->dest - th->dest ))
41203+ continue;
41204+
41205+ /* Re-insert at head of list to reduce lookup time. */
41206+ list_del(&c->link);
41207+ list_add(&c->link, &st->conns);
41208+ c->last_pkt_jiffies = jiffies;
41209+
41210+ if (unlikely(th_seq - c->next_seq)) {
41211+ /* Out-of-order, so start counting again. */
41212+ if (c->skb)
41213+ netfront_accel_ssr_deliver(vnic, st, c);
41214+ c->n_in_order_pkts = 0;
41215+ c->next_seq = th_seq + data_length;
41216+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder);
41217+ return 0;
41218+ }
41219+ c->next_seq = th_seq + data_length;
41220+
41221+ if (++c->n_in_order_pkts < 300) {
41222+ /* May be in slow-start, so don't merge. */
41223+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start);
41224+ return 0;
41225+ }
41226+
41227+ if (unlikely(dont_merge)) {
41228+ if (c->skb)
41229+ netfront_accel_ssr_deliver(vnic, st, c);
41230+ return 0;
41231+ }
41232+
41233+ if (c->skb) {
41234+ c->iph->tot_len = ntohs(c->iph->tot_len);
41235+ c->iph->tot_len += data_length;
41236+ c->iph->tot_len = htons(c->iph->tot_len);
41237+ c->th->ack_seq = th->ack_seq;
41238+ c->th->fin |= th->fin;
41239+ c->th->psh |= th->psh;
41240+ c->th->window = th->window;
41241+
41242+ /* Remove the headers from this skb. */
41243+ skb_pull(skb, skb->len - data_length);
41244+
41245+ /*
41246+ * Tack the new skb onto the head skb's frag_list.
41247+ * This is exactly the format that fragmented IP
41248+ * datagrams are reassembled into.
41249+ */
41250+ BUG_ON(skb->next != 0);
41251+ if ( ! skb_shinfo(c->skb)->frag_list)
41252+ skb_shinfo(c->skb)->frag_list = skb;
41253+ else
41254+ c->skb_tail->next = skb;
41255+ c->skb_tail = skb;
41256+ c->skb->len += skb->len;
41257+ c->skb->data_len += skb->len;
41258+ c->skb->truesize += skb->truesize;
41259+
41260+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges);
41261+
41262+ /*
41263+ * If the next packet might push this super-packet
41264+ * over the limit for an IP packet, deliver it now.
41265+ * This is slightly conservative, but close enough.
41266+ */
41267+ if (c->skb->len +
41268+ (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)
41269+ > 16384)
41270+ netfront_accel_ssr_deliver(vnic, st, c);
41271+
41272+ return 1;
41273+ }
41274+ else {
41275+ c->iph = iph;
41276+ c->th = th;
41277+ c->skb = skb;
41278+ return 1;
41279+ }
41280+ }
41281+
41282+ /* We're not yet tracking this connection. */
41283+
41284+ if (dont_merge) {
41285+ return 0;
41286+ }
41287+
41288+ if (list_empty(&st->free_conns)) {
41289+ c = list_entry(st->conns.prev,
41290+ struct netfront_accel_ssr_conn,
41291+ link);
41292+ if (c->skb) {
41293+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many);
41294+ return 0;
41295+ }
41296+ }
41297+ else {
41298+ c = list_entry(st->free_conns.next,
41299+ struct netfront_accel_ssr_conn,
41300+ link);
41301+ }
41302+ list_del(&c->link);
41303+ list_add(&c->link, &st->conns);
41304+ c->saddr = iph->saddr;
41305+ c->daddr = iph->daddr;
41306+ c->source = th->source;
41307+ c->dest = th->dest;
41308+ c->next_seq = th_seq + data_length;
41309+ c->n_in_order_pkts = 0;
41310+ BUG_ON(c->skb != NULL);
41311+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream);
41312+ return 0;
41313+}
41314Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.h
41315===================================================================
41316--- /dev/null 1970-01-01 00:00:00.000000000 +0000
41317+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.h 2008-02-20 09:32:49.000000000 +0100
41318@@ -0,0 +1,88 @@
41319+/****************************************************************************
41320+ * Solarflare driver for Xen network acceleration
41321+ *
41322+ * Copyright 2006-2008: Solarflare Communications Inc,
41323+ * 9501 Jeronimo Road, Suite 250,
41324+ * Irvine, CA 92618, USA
41325+ *
41326+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41327+ *
41328+ * This program is free software; you can redistribute it and/or modify it
41329+ * under the terms of the GNU General Public License version 2 as published
41330+ * by the Free Software Foundation, incorporated herein by reference.
41331+ *
41332+ * This program is distributed in the hope that it will be useful,
41333+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
41334+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41335+ * GNU General Public License for more details.
41336+ *
41337+ * You should have received a copy of the GNU General Public License
41338+ * along with this program; if not, write to the Free Software
41339+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41340+ ****************************************************************************
41341+ */
41342+
41343+#ifndef NETFRONT_ACCEL_SSR_H
41344+#define NETFRONT_ACCEL_SSR_H
41345+
41346+#include <linux/skbuff.h>
41347+#include <linux/ip.h>
41348+#include <linux/tcp.h>
41349+#include <linux/list.h>
41350+
41351+#include "accel.h"
41352+
41353+/** State for Soft Segment Reassembly (SSR). */
41354+
41355+struct netfront_accel_ssr_conn {
41356+ struct list_head link;
41357+
41358+ unsigned saddr, daddr;
41359+ unsigned short source, dest;
41360+
41361+ /** Number of in-order packets we've seen with payload. */
41362+ unsigned n_in_order_pkts;
41363+
41364+ /** Next in-order sequence number. */
41365+ unsigned next_seq;
41366+
41367+ /** Time we last saw a packet on this connection. */
41368+ unsigned long last_pkt_jiffies;
41369+
41370+ /** The SKB we are currently holding. If NULL, then all following
41371+ * fields are undefined.
41372+ */
41373+ struct sk_buff *skb;
41374+
41375+ /** The tail of the frag_list of SKBs we're holding. Only valid
41376+ * after at least one merge.
41377+ */
41378+ struct sk_buff *skb_tail;
41379+
41380+ /** The IP header of the skb we are holding. */
41381+ struct iphdr *iph;
41382+
41383+ /** The TCP header of the skb we are holding. */
41384+ struct tcphdr *th;
41385+};
41386+
41387+extern void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st);
41388+extern void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
41389+ struct netfront_accel_ssr_state *st);
41390+
41391+extern void
41392+__netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
41393+ struct netfront_accel_ssr_state *st);
41394+
41395+extern int netfront_accel_ssr_skb(netfront_accel_vnic *vnic,
41396+ struct netfront_accel_ssr_state *st,
41397+ struct sk_buff *skb);
41398+
41399+static inline void
41400+netfront_accel_ssr_end_of_burst (netfront_accel_vnic *vnic,
41401+ struct netfront_accel_ssr_state *st) {
41402+ if ( ! list_empty(&st->conns) )
41403+ __netfront_accel_ssr_end_of_burst(vnic, st);
41404+}
41405+
41406+#endif /* NETFRONT_ACCEL_SSR_H */
41407Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.c
41408===================================================================
41409--- /dev/null 1970-01-01 00:00:00.000000000 +0000
41410+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.c 2008-02-26 10:54:12.000000000 +0100
41411@@ -0,0 +1,511 @@
41412+/****************************************************************************
41413+ * Solarflare driver for Xen network acceleration
41414+ *
41415+ * Copyright 2006-2008: Solarflare Communications Inc,
41416+ * 9501 Jeronimo Road, Suite 250,
41417+ * Irvine, CA 92618, USA
41418+ *
41419+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41420+ *
41421+ * This program is free software; you can redistribute it and/or modify it
41422+ * under the terms of the GNU General Public License version 2 as published
41423+ * by the Free Software Foundation, incorporated herein by reference.
41424+ *
41425+ * This program is distributed in the hope that it will be useful,
41426+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
41427+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41428+ * GNU General Public License for more details.
41429+ *
41430+ * You should have received a copy of the GNU General Public License
41431+ * along with this program; if not, write to the Free Software
41432+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41433+ ****************************************************************************
41434+ */
41435+
41436+#include <linux/pci.h>
41437+#include <linux/tcp.h>
41438+#include <linux/ip.h>
41439+#include <linux/in.h>
41440+#include <linux/if_ether.h>
41441+
41442+#include "accel.h"
41443+#include "accel_util.h"
41444+
41445+#include "accel_tso.h"
41446+
41447+#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2))
41448+#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data)
41449+#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data)
41450+#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data)
41451+
41452+/*
41453+ * Set a maximum number of buffers in each output packet to make life
41454+ * a little simpler - if this is reached it will just move on to
41455+ * another packet
41456+ */
41457+#define ACCEL_TSO_MAX_BUFFERS (6)
41458+
41459+/** TSO State.
41460+ *
41461+ * The state used during segmentation. It is put into this data structure
41462+ * just to make it easy to pass into inline functions.
41463+ */
41464+struct netfront_accel_tso_state {
41465+ /** bytes of data we've yet to segment */
41466+ unsigned remaining_len;
41467+
41468+ /** current sequence number */
41469+ unsigned seqnum;
41470+
41471+ /** remaining space in current packet */
41472+ unsigned packet_space;
41473+
41474+ /** List of packets to be output, containing the buffers and
41475+ * iovecs to describe each packet
41476+ */
41477+ struct netfront_accel_tso_output_packet *output_packets;
41478+
41479+ /** Total number of buffers in output_packets */
41480+ unsigned buffers;
41481+
41482+ /** Total number of packets in output_packets */
41483+ unsigned packets;
41484+
41485+ /** Input Fragment Cursor.
41486+ *
41487+ * Where we are in the current fragment of the incoming SKB. These
41488+ * values get updated in place when we split a fragment over
41489+ * multiple packets.
41490+ */
41491+ struct {
41492+ /** address of current position */
41493+ void *addr;
41494+ /** remaining length */
41495+ unsigned int len;
41496+ } ifc; /* == ifc Input Fragment Cursor */
41497+
41498+ /** Parameters.
41499+ *
41500+ * These values are set once at the start of the TSO send and do
41501+ * not get changed as the routine progresses.
41502+ */
41503+ struct {
41504+ /* the number of bytes of header */
41505+ unsigned int header_length;
41506+
41507+ /* The number of bytes to put in each outgoing segment. */
41508+ int full_packet_size;
41509+
41510+ /* Current IP ID, host endian. */
41511+ unsigned ip_id;
41512+
41513+ /* Max size of each output packet payload */
41514+ int gso_size;
41515+ } p;
41516+};
41517+
41518+
41519+/**
41520+ * Verify that our various assumptions about sk_buffs and the conditions
41521+ * under which TSO will be attempted hold true.
41522+ *
41523+ * @v skb The sk_buff to check.
41524+ */
41525+static inline void tso_check_safe(struct sk_buff *skb) {
41526+ EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
41527+ EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
41528+ EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP);
41529+ EPRINTK_ON((SKB_TCP_OFF(skb)
41530+ + (skb->h.th->doff << 2u)) > skb_headlen(skb));
41531+}
41532+
41533+
41534+
41535+/** Parse the SKB header and initialise state. */
41536+static inline void tso_start(struct netfront_accel_tso_state *st,
41537+ struct sk_buff *skb) {
41538+
41539+ /*
41540+ * All ethernet/IP/TCP headers combined size is TCP header size
41541+ * plus offset of TCP header relative to start of packet.
41542+ */
41543+ st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb);
41544+ st->p.full_packet_size = (st->p.header_length
41545+ + skb_shinfo(skb)->gso_size);
41546+ st->p.gso_size = skb_shinfo(skb)->gso_size;
41547+
41548+ st->p.ip_id = htons(skb->nh.iph->id);
41549+ st->seqnum = ntohl(skb->h.th->seq);
41550+
41551+ EPRINTK_ON(skb->h.th->urg);
41552+ EPRINTK_ON(skb->h.th->syn);
41553+ EPRINTK_ON(skb->h.th->rst);
41554+
41555+ st->remaining_len = skb->len - st->p.header_length;
41556+
41557+ st->output_packets = NULL;
41558+ st->buffers = 0;
41559+ st->packets = 0;
41560+
41561+ VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n",
41562+ st->p.header_length, st->p.full_packet_size, st->p.gso_size,
41563+ st->seqnum, skb->len);
41564+}
41565+
41566+/**
41567+ * Add another NIC mapped buffer onto an output packet
41568+ */
41569+static inline int tso_start_new_buffer(netfront_accel_vnic *vnic,
41570+ struct netfront_accel_tso_state *st,
41571+ int first)
41572+{
41573+ struct netfront_accel_tso_buffer *tso_buf;
41574+ struct netfront_accel_pkt_desc *buf;
41575+
41576+ /* Get a mapped packet buffer */
41577+ buf = netfront_accel_buf_get(vnic->tx_bufs);
41578+ if (buf == NULL) {
41579+ DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
41580+ return -1;
41581+ }
41582+
41583+ /* Store a bit of meta-data at the end */
41584+ tso_buf =(struct netfront_accel_tso_buffer *)
41585+ (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH
41586+ + sizeof(struct netfront_accel_tso_output_packet));
41587+
41588+ tso_buf->buf = buf;
41589+
41590+ tso_buf->length = 0;
41591+
41592+ if (first) {
41593+ struct netfront_accel_tso_output_packet *output_packet
41594+ = (struct netfront_accel_tso_output_packet *)
41595+ (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH);
41596+ output_packet->next = st->output_packets;
41597+ st->output_packets = output_packet;
41598+ tso_buf->next = NULL;
41599+ st->output_packets->tso_bufs = tso_buf;
41600+ st->output_packets->tso_bufs_len = 1;
41601+ } else {
41602+ tso_buf->next = st->output_packets->tso_bufs;
41603+ st->output_packets->tso_bufs = tso_buf;
41604+ st->output_packets->tso_bufs_len ++;
41605+ }
41606+
41607+ BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
41608+
41609+ st->buffers ++;
41610+
41611+ /*
41612+ * Store the context, set to NULL, last packet buffer will get
41613+ * non-NULL later
41614+ */
41615+ tso_buf->buf->skb = NULL;
41616+
41617+ return 0;
41618+}
41619+
41620+
41621+/* Generate a new header, and prepare for the new packet.
41622+ *
41623+ * @v vnic VNIC
41624+ * @v skb Socket buffer
41625+ * @v st TSO state
41626+ * @ret rc 0 on success, or -1 if failed to alloc header
41627+ */
41628+
41629+static inline
41630+int tso_start_new_packet(netfront_accel_vnic *vnic,
41631+ struct sk_buff *skb,
41632+ struct netfront_accel_tso_state *st)
41633+{
41634+ struct netfront_accel_tso_buffer *tso_buf;
41635+ struct iphdr *tsoh_iph;
41636+ struct tcphdr *tsoh_th;
41637+ unsigned ip_length;
41638+
41639+ if (tso_start_new_buffer(vnic, st, 1) < 0) {
41640+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
41641+ return -1;
41642+ }
41643+
41644+ /* This has been set up by tso_start_new_buffer() */
41645+ tso_buf = st->output_packets->tso_bufs;
41646+
41647+ /* Copy in the header */
41648+ memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length);
41649+ tso_buf->length = st->p.header_length;
41650+
41651+ tsoh_th = (struct tcphdr*)
41652+ (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb));
41653+ tsoh_iph = (struct iphdr*)
41654+ (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb));
41655+
41656+ /* Set to zero to encourage falcon to fill these in */
41657+ tsoh_th->check = 0;
41658+ tsoh_iph->check = 0;
41659+
41660+ tsoh_th->seq = htonl(st->seqnum);
41661+ st->seqnum += st->p.gso_size;
41662+
41663+ if (st->remaining_len > st->p.gso_size) {
41664+ /* This packet will not finish the TSO burst. */
41665+ ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
41666+ tsoh_th->fin = 0;
41667+ tsoh_th->psh = 0;
41668+ } else {
41669+ /* This packet will be the last in the TSO burst. */
41670+ ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
41671+ + st->remaining_len);
41672+ tsoh_th->fin = skb->h.th->fin;
41673+ tsoh_th->psh = skb->h.th->psh;
41674+ }
41675+
41676+ tsoh_iph->tot_len = htons(ip_length);
41677+
41678+ /* Linux leaves suitable gaps in the IP ID space for us to fill. */
41679+ tsoh_iph->id = st->p.ip_id++;
41680+ tsoh_iph->id = htons(tsoh_iph->id);
41681+
41682+ st->packet_space = st->p.gso_size;
41683+
41684+ st->packets++;
41685+
41686+ return 0;
41687+}
41688+
41689+
41690+
41691+static inline void tso_get_fragment(struct netfront_accel_tso_state *st,
41692+ int len, void *addr)
41693+{
41694+ st->ifc.len = len;
41695+ st->ifc.addr = addr;
41696+ return;
41697+}
41698+
41699+
41700+static inline void tso_unwind(netfront_accel_vnic *vnic,
41701+ struct netfront_accel_tso_state *st)
41702+{
41703+ struct netfront_accel_tso_buffer *tso_buf;
41704+ struct netfront_accel_tso_output_packet *output_packet;
41705+
41706+ DPRINTK("%s\n", __FUNCTION__);
41707+
41708+ while (st->output_packets != NULL) {
41709+ output_packet = st->output_packets;
41710+ st->output_packets = output_packet->next;
41711+ while (output_packet->tso_bufs != NULL) {
41712+ tso_buf = output_packet->tso_bufs;
41713+ output_packet->tso_bufs = tso_buf->next;
41714+
41715+ st->buffers --;
41716+ output_packet->tso_bufs_len --;
41717+
41718+ netfront_accel_buf_put(vnic->tx_bufs,
41719+ tso_buf->buf->buf_id);
41720+ }
41721+ }
41722+ BUG_ON(st->buffers != 0);
41723+}
41724+
41725+
41726+
41727+static inline
41728+void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic,
41729+ struct netfront_accel_tso_state *st)
41730+{
41731+ struct netfront_accel_tso_buffer *tso_buf;
41732+ int n, space;
41733+
41734+ BUG_ON(st->output_packets == NULL);
41735+ BUG_ON(st->output_packets->tso_bufs == NULL);
41736+
41737+ tso_buf = st->output_packets->tso_bufs;
41738+
41739+ if (st->ifc.len == 0) return;
41740+ if (st->packet_space == 0) return;
41741+ if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return;
41742+
41743+ n = min(st->ifc.len, st->packet_space);
41744+
41745+ space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length;
41746+ n = min(n, space);
41747+
41748+ st->packet_space -= n;
41749+ st->remaining_len -= n;
41750+ st->ifc.len -= n;
41751+
41752+ memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
41753+
41754+ tso_buf->length += n;
41755+
41756+ BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH);
41757+
41758+ st->ifc.addr += n;
41759+
41760+ return;
41761+}
41762+
41763+
41764+int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
41765+ struct sk_buff *skb)
41766+{
41767+ struct netfront_accel_tso_state state;
41768+ struct netfront_accel_tso_buffer *tso_buf = NULL;
41769+ struct netfront_accel_tso_output_packet *reversed_list = NULL;
41770+ struct netfront_accel_tso_output_packet *tmp_pkt;
41771+ ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS];
41772+ int frag_i, rc, dma_id;
41773+ skb_frag_t *f;
41774+
41775+ tso_check_safe(skb);
41776+
41777+ if (skb->ip_summed != CHECKSUM_HW)
41778+ EPRINTK("Trying to TSO send a packet without HW checksum\n");
41779+
41780+ tso_start(&state, skb);
41781+
41782+ /*
41783+ * Setup the first payload fragment. If the skb header area
41784+ * contains exactly the headers and all payload is in the frag
41785+ * list things are little simpler
41786+ */
41787+ if (skb_headlen(skb) == state.p.header_length) {
41788+ /* Grab the first payload fragment. */
41789+ BUG_ON(skb_shinfo(skb)->nr_frags < 1);
41790+ frag_i = 0;
41791+ f = &skb_shinfo(skb)->frags[frag_i];
41792+ tso_get_fragment(&state, f->size,
41793+ page_address(f->page) + f->page_offset);
41794+ } else {
41795+ int hl = state.p.header_length;
41796+ tso_get_fragment(&state, skb_headlen(skb) - hl,
41797+ skb->data + hl);
41798+ frag_i = -1;
41799+ }
41800+
41801+ if (tso_start_new_packet(vnic, skb, &state) < 0) {
41802+ DPRINTK("%s: out of first start-packet memory\n",
41803+ __FUNCTION__);
41804+ goto unwind;
41805+ }
41806+
41807+ while (1) {
41808+ tso_fill_packet_with_fragment(vnic, &state);
41809+
41810+ /* Move onto the next fragment? */
41811+ if (state.ifc.len == 0) {
41812+ if (++frag_i >= skb_shinfo(skb)->nr_frags)
41813+ /* End of payload reached. */
41814+ break;
41815+ f = &skb_shinfo(skb)->frags[frag_i];
41816+ tso_get_fragment(&state, f->size,
41817+ page_address(f->page) +
41818+ f->page_offset);
41819+ }
41820+
41821+ /* Start a new buffer? */
41822+ if ((state.output_packets->tso_bufs->length ==
41823+ NETFRONT_ACCEL_TSO_BUF_LENGTH) &&
41824+ tso_start_new_buffer(vnic, &state, 0)) {
41825+ DPRINTK("%s: out of start-buffer memory\n",
41826+ __FUNCTION__);
41827+ goto unwind;
41828+ }
41829+
41830+ /* Start at new packet? */
41831+ if ((state.packet_space == 0 ||
41832+ ((state.output_packets->tso_bufs_len >=
41833+ ACCEL_TSO_MAX_BUFFERS) &&
41834+ (state.output_packets->tso_bufs->length >=
41835+ NETFRONT_ACCEL_TSO_BUF_LENGTH))) &&
41836+ tso_start_new_packet(vnic, skb, &state) < 0) {
41837+ DPRINTK("%s: out of start-packet memory\n",
41838+ __FUNCTION__);
41839+ goto unwind;
41840+ }
41841+
41842+ }
41843+
41844+ /* Check for space */
41845+ if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
41846+ DPRINTK("%s: Not enough TX space (%d)\n",
41847+ __FUNCTION__, state.buffers);
41848+ goto unwind;
41849+ }
41850+
41851+ /*
41852+ * Store the skb context in the most recent buffer (i.e. the
41853+ * last buffer that will be sent)
41854+ */
41855+ state.output_packets->tso_bufs->buf->skb = skb;
41856+
41857+ /* Reverse the list of packets as we construct it on a stack */
41858+ while (state.output_packets != NULL) {
41859+ tmp_pkt = state.output_packets;
41860+ state.output_packets = tmp_pkt->next;
41861+ tmp_pkt->next = reversed_list;
41862+ reversed_list = tmp_pkt;
41863+ }
41864+
41865+ /* Pass off to hardware */
41866+ while (reversed_list != NULL) {
41867+ tmp_pkt = reversed_list;
41868+ reversed_list = tmp_pkt->next;
41869+
41870+ BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
41871+ BUG_ON(tmp_pkt->tso_bufs_len == 0);
41872+
41873+ dma_id = tmp_pkt->tso_bufs->buf->buf_id;
41874+
41875+ /*
41876+ * Make an iovec of the buffers in the list, reversing
41877+ * the buffers as we go as they are constructed on a
41878+ * stack
41879+ */
41880+ tso_buf = tmp_pkt->tso_bufs;
41881+ for (frag_i = tmp_pkt->tso_bufs_len - 1;
41882+ frag_i >= 0;
41883+ frag_i--) {
41884+ iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
41885+ iovecs[frag_i].iov_len = tso_buf->length;
41886+ tso_buf = tso_buf->next;
41887+ }
41888+
41889+ rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len,
41890+ dma_id);
41891+ /*
41892+ * We checked for space already, so it really should
41893+ * succeed
41894+ */
41895+ BUG_ON(rc != 0);
41896+ }
41897+
41898+ /* Track number of tx fastpath stats */
41899+ vnic->netdev_stats.fastpath_tx_bytes += skb->len;
41900+ vnic->netdev_stats.fastpath_tx_pkts += state.packets;
41901+#if NETFRONT_ACCEL_STATS
41902+ {
41903+ unsigned n;
41904+ n = vnic->netdev_stats.fastpath_tx_pkts -
41905+ vnic->stats.fastpath_tx_completions;
41906+ if (n > vnic->stats.fastpath_tx_pending_max)
41907+ vnic->stats.fastpath_tx_pending_max = n;
41908+ }
41909+#endif
41910+
41911+ return NETFRONT_ACCEL_STATUS_GOOD;
41912+
41913+ unwind:
41914+ tso_unwind(vnic, &state);
41915+
41916+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
41917+
41918+ return NETFRONT_ACCEL_STATUS_BUSY;
41919+}
41920+
41921+
41922+
41923Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.h
41924===================================================================
41925--- /dev/null 1970-01-01 00:00:00.000000000 +0000
41926+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.h 2008-02-20 09:32:49.000000000 +0100
41927@@ -0,0 +1,57 @@
41928+/****************************************************************************
41929+ * Solarflare driver for Xen network acceleration
41930+ *
41931+ * Copyright 2006-2008: Solarflare Communications Inc,
41932+ * 9501 Jeronimo Road, Suite 250,
41933+ * Irvine, CA 92618, USA
41934+ *
41935+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41936+ *
41937+ * This program is free software; you can redistribute it and/or modify it
41938+ * under the terms of the GNU General Public License version 2 as published
41939+ * by the Free Software Foundation, incorporated herein by reference.
41940+ *
41941+ * This program is distributed in the hope that it will be useful,
41942+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
41943+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41944+ * GNU General Public License for more details.
41945+ *
41946+ * You should have received a copy of the GNU General Public License
41947+ * along with this program; if not, write to the Free Software
41948+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41949+ ****************************************************************************
41950+ */
41951+
41952+#ifndef NETFRONT_ACCEL_TSO_H
41953+#define NETFRONT_ACCEL_TSO_H
41954+
41955+#include "accel_bufs.h"
41956+
41957+/* Track the buffers used in each output packet */
41958+struct netfront_accel_tso_buffer {
41959+ struct netfront_accel_tso_buffer *next;
41960+ struct netfront_accel_pkt_desc *buf;
41961+ unsigned length;
41962+};
41963+
41964+/* Track the output packets formed from each input packet */
41965+struct netfront_accel_tso_output_packet {
41966+ struct netfront_accel_tso_output_packet *next;
41967+ struct netfront_accel_tso_buffer *tso_bufs;
41968+ unsigned tso_bufs_len;
41969+};
41970+
41971+
41972+/*
41973+ * Max available space in a buffer for data once meta-data has taken
41974+ * its place
41975+ */
41976+#define NETFRONT_ACCEL_TSO_BUF_LENGTH \
41977+ ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
41978+ - sizeof(struct netfront_accel_tso_buffer) \
41979+ - sizeof(struct netfront_accel_tso_output_packet))
41980+
41981+int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
41982+ struct sk_buff *skb);
41983+
41984+#endif /* NETFRONT_ACCEL_TSO_H */
41985Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_vi.c
41986===================================================================
41987--- /dev/null 1970-01-01 00:00:00.000000000 +0000
41988+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_vi.c 2008-02-20 09:32:49.000000000 +0100
41989@@ -0,0 +1,1194 @@
41990+/****************************************************************************
41991+ * Solarflare driver for Xen network acceleration
41992+ *
41993+ * Copyright 2006-2008: Solarflare Communications Inc,
41994+ * 9501 Jeronimo Road, Suite 250,
41995+ * Irvine, CA 92618, USA
41996+ *
41997+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41998+ *
41999+ * This program is free software; you can redistribute it and/or modify it
42000+ * under the terms of the GNU General Public License version 2 as published
42001+ * by the Free Software Foundation, incorporated herein by reference.
42002+ *
42003+ * This program is distributed in the hope that it will be useful,
42004+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
42005+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
42006+ * GNU General Public License for more details.
42007+ *
42008+ * You should have received a copy of the GNU General Public License
42009+ * along with this program; if not, write to the Free Software
42010+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
42011+ ****************************************************************************
42012+ */
42013+
42014+#include <linux/if_ether.h>
42015+#include <linux/ip.h>
42016+#include <net/checksum.h>
42017+#include <asm/io.h>
42018+
42019+#include "accel.h"
42020+#include "accel_util.h"
42021+#include "accel_bufs.h"
42022+#include "accel_tso.h"
42023+#include "accel_ssr.h"
42024+#include "netfront.h"
42025+
42026+#include "etherfabric/ef_vi.h"
42027+
42028+/*
42029+ * Max available space in a buffer for data once meta-data has taken
42030+ * its place
42031+ */
42032+#define NETFRONT_ACCEL_TX_BUF_LENGTH \
42033+ ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
42034+ - sizeof(struct netfront_accel_tso_buffer))
42035+
42036+#define ACCEL_TX_MAX_BUFFERS (6)
42037+#define ACCEL_VI_POLL_EVENTS (8)
42038+
42039+static
42040+int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic,
42041+ struct net_accel_msg_hw *hw_msg)
42042+{
42043+ struct ef_vi_nic_type nic_type;
42044+ struct net_accel_hw_falcon_b *hw_info;
42045+ void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
42046+ u32 *evq_gnts;
42047+ u32 evq_order;
42048+ int vi_state_size;
42049+ u8 vi_data[VI_MAPPINGS_SIZE];
42050+
42051+ if (hw_msg == NULL)
42052+ goto fini;
42053+
42054+ /* And create the local macs table lock */
42055+ spin_lock_init(&vnic->table_lock);
42056+
42057+ /* Create fastpath table, initial size 8, key length 8 */
42058+ if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
42059+ EPRINTK("failed to allocate fastpath table\n");
42060+ goto fail_cuckoo;
42061+ }
42062+
42063+ vnic->hw.falcon.type = hw_msg->type;
42064+
42065+ switch (hw_msg->type) {
42066+ case NET_ACCEL_MSG_HWTYPE_FALCON_A:
42067+ hw_info = &hw_msg->resources.falcon_a.common;
42068+ /* Need the extra rptr register page on A1 */
42069+ io_kva = net_accel_map_iomem_page
42070+ (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
42071+ &vnic->hw.falcon.evq_rptr_mapping);
42072+ if (io_kva == NULL) {
42073+ EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__);
42074+ goto evq_rptr_fail;
42075+ }
42076+
42077+ vnic->hw.falcon.evq_rptr = io_kva +
42078+ (hw_info->evq_rptr & (PAGE_SIZE - 1));
42079+ break;
42080+ case NET_ACCEL_MSG_HWTYPE_FALCON_B:
42081+ hw_info = &hw_msg->resources.falcon_b;
42082+ break;
42083+ default:
42084+ goto bad_type;
42085+ }
42086+
42087+ /**** Event Queue ****/
42088+
42089+ /* Map the event queue pages */
42090+ evq_gnts = hw_info->evq_mem_gnts;
42091+ evq_order = hw_info->evq_order;
42092+
42093+ EPRINTK_ON(hw_info->evq_offs != 0);
42094+
42095+ DPRINTK("Will map evq %d pages\n", 1 << evq_order);
42096+
42097+ evq_base =
42098+ net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
42099+ &vnic->evq_mapping);
42100+ if (evq_base == NULL) {
42101+ EPRINTK("%s: evq_base failed\n", __FUNCTION__);
42102+ goto evq_fail;
42103+ }
42104+
42105+ /**** Doorbells ****/
42106+ /* Set up the doorbell mappings. */
42107+ doorbell_kva =
42108+ net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
42109+ &vnic->hw.falcon.doorbell_mapping);
42110+ if (doorbell_kva == NULL) {
42111+ EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
42112+ goto doorbell_fail;
42113+ }
42114+ vnic->hw.falcon.doorbell = doorbell_kva;
42115+
42116+ /* On Falcon_B we get the rptr from the doorbell page */
42117+ if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B) {
42118+ vnic->hw.falcon.evq_rptr =
42119+ (u32 *)((char *)vnic->hw.falcon.doorbell
42120+ + hw_info->evq_rptr);
42121+ }
42122+
42123+ /**** DMA Queue ****/
42124+
42125+ /* Set up the DMA Queues from the message. */
42126+ tx_dma_kva = net_accel_map_grants_contig
42127+ (vnic->dev, &(hw_info->txdmaq_gnt), 1,
42128+ &vnic->hw.falcon.txdmaq_mapping);
42129+ if (tx_dma_kva == NULL) {
42130+ EPRINTK("%s: TX dma failed\n", __FUNCTION__);
42131+ goto tx_dma_fail;
42132+ }
42133+
42134+ rx_dma_kva = net_accel_map_grants_contig
42135+ (vnic->dev, &(hw_info->rxdmaq_gnt), 1,
42136+ &vnic->hw.falcon.rxdmaq_mapping);
42137+ if (rx_dma_kva == NULL) {
42138+ EPRINTK("%s: RX dma failed\n", __FUNCTION__);
42139+ goto rx_dma_fail;
42140+ }
42141+
42142+ /* Full confession */
42143+ DPRINTK("Mapped H/W"
42144+ " Tx DMAQ grant %x -> %p\n"
42145+ " Rx DMAQ grant %x -> %p\n"
42146+ " EVQ grant %x -> %p\n",
42147+ hw_info->txdmaq_gnt, tx_dma_kva,
42148+ hw_info->rxdmaq_gnt, rx_dma_kva,
42149+ evq_gnts[0], evq_base
42150+ );
42151+
42152+ memset(vi_data, 0, sizeof(vi_data));
42153+
42154+ /* TODO BUG11305: convert efhw_arch to ef_vi_arch
42155+ * e.g.
42156+ * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
42157+ * assert(arch >= 0);
42158+ * nic_type.arch = arch;
42159+ */
42160+ nic_type.arch = (unsigned char)hw_info->nic_arch;
42161+ nic_type.variant = (char)hw_info->nic_variant;
42162+ nic_type.revision = (unsigned char)hw_info->nic_revision;
42163+
42164+ ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance,
42165+ 1 << (evq_order + PAGE_SHIFT), evq_base,
42166+ (void *)0xdeadbeef);
42167+
42168+ ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity,
42169+ hw_info->tx_capacity, hw_info->instance,
42170+ doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
42171+
42172+ vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
42173+ hw_info->tx_capacity);
42174+ vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
42175+ if (vnic->vi_state == NULL) {
42176+ EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
42177+ goto vi_state_fail;
42178+ }
42179+ ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
42180+
42181+ ef_eventq_state_init(&vnic->vi);
42182+
42183+ ef_vi_state_init(&vnic->vi);
42184+
42185+ return 0;
42186+
42187+fini:
42188+ kfree(vnic->vi_state);
42189+ vnic->vi_state = NULL;
42190+vi_state_fail:
42191+ net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping);
42192+rx_dma_fail:
42193+ net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping);
42194+tx_dma_fail:
42195+ net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
42196+ vnic->hw.falcon.doorbell = NULL;
42197+doorbell_fail:
42198+ net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
42199+evq_fail:
42200+ if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
42201+ net_accel_unmap_iomem_page(vnic->dev,
42202+ vnic->hw.falcon.evq_rptr_mapping);
42203+ vnic->hw.falcon.evq_rptr = NULL;
42204+evq_rptr_fail:
42205+bad_type:
42206+ cuckoo_hash_destroy(&vnic->fastpath_table);
42207+fail_cuckoo:
42208+ return -EIO;
42209+}
42210+
42211+
42212+void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
42213+{
42214+ /* Just mark the VI as uninitialised. */
42215+ vnic->vi_state = NULL;
42216+}
42217+
42218+
42219+int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg)
42220+{
42221+ BUG_ON(hw_msg == NULL);
42222+ return netfront_accel_vi_init_fini(vnic, hw_msg);
42223+}
42224+
42225+
42226+void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
42227+{
42228+ if (vnic->vi_state != NULL)
42229+ netfront_accel_vi_init_fini(vnic, NULL);
42230+}
42231+
42232+
42233+static
42234+void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
42235+ netfront_accel_pkt_desc *buf)
42236+{
42237+
42238+ int idx = vnic->rx_dma_batched;
42239+
42240+#if 0
42241+ VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
42242+ id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
42243+#endif
42244+ /* Set up a virtual buffer descriptor */
42245+ ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
42246+ /*rx_bytes=max*/0);
42247+
42248+ idx++;
42249+
42250+ vnic->rx_dma_level++;
42251+
42252+ /*
42253+ * Only push the descriptor to the card if we've reached the
42254+ * batch size. Otherwise, the descriptors can sit around for
42255+ * a while. There will be plenty available.
42256+ */
42257+ if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
42258+ vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
42259+#if 0
42260+ VPRINTK("Flushing %d rx descriptors.\n", idx);
42261+#endif
42262+
42263+ /* Push buffer to hardware */
42264+ ef_vi_receive_push(&vnic->vi);
42265+
42266+ idx = 0;
42267+ }
42268+
42269+ vnic->rx_dma_batched = idx;
42270+}
42271+
42272+
42273+inline
42274+void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
42275+ netfront_accel_pkt_desc *buf)
42276+{
42277+
42278+ VPRINTK("%s: %d\n", __FUNCTION__, id);
42279+
42280+ if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
42281+ VPRINTK("RX space is full\n");
42282+ netfront_accel_buf_put(vnic->rx_bufs, id);
42283+ return;
42284+ }
42285+
42286+ VPRINTK("Completed buffer %d is reposted\n", id);
42287+ netfront_accel_vi_post_rx(vnic, id, buf);
42288+
42289+ /*
42290+ * Let's see if there's any more to be pushed out to the NIC
42291+ * while we're here
42292+ */
42293+ while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
42294+ /* Try to allocate a buffer. */
42295+ buf = netfront_accel_buf_get(vnic->rx_bufs);
42296+ if (buf == NULL)
42297+ break;
42298+
42299+ /* Add it to the rx dma queue. */
42300+ netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
42301+ }
42302+}
42303+
42304+
42305+void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
42306+{
42307+
42308+ while (is_rx &&
42309+ ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
42310+ netfront_accel_pkt_desc *buf;
42311+
42312+ VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
42313+
42314+ /* Try to allocate a buffer. */
42315+ buf = netfront_accel_buf_get(vnic->rx_bufs);
42316+
42317+ if (buf == NULL)
42318+ break;
42319+
42320+ /* Add it to the rx dma queue. */
42321+ netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
42322+ }
42323+
42324+ VPRINTK("%s: done\n", __FUNCTION__);
42325+}
42326+
42327+
42328+struct netfront_accel_multi_state {
42329+ unsigned remaining_len;
42330+
42331+ unsigned buffers;
42332+
42333+ struct netfront_accel_tso_buffer *output_buffers;
42334+
42335+ /* Where we are in the current fragment of the SKB. */
42336+ struct {
42337+ /* address of current position */
42338+ void *addr;
42339+ /* remaining length */
42340+ unsigned int len;
42341+ } ifc; /* == Input Fragment Cursor */
42342+};
42343+
42344+
42345+static inline void multi_post_start(struct netfront_accel_multi_state *st,
42346+ struct sk_buff *skb)
42347+{
42348+ st->remaining_len = skb->len;
42349+ st->output_buffers = NULL;
42350+ st->buffers = 0;
42351+ st->ifc.len = skb_headlen(skb);
42352+ st->ifc.addr = skb->data;
42353+}
42354+
42355+static int multi_post_start_new_buffer(netfront_accel_vnic *vnic,
42356+ struct netfront_accel_multi_state *st)
42357+{
42358+ struct netfront_accel_tso_buffer *tso_buf;
42359+ struct netfront_accel_pkt_desc *buf;
42360+
42361+ /* Get a mapped packet buffer */
42362+ buf = netfront_accel_buf_get(vnic->tx_bufs);
42363+ if (buf == NULL) {
42364+ DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
42365+ return -1;
42366+ }
42367+
42368+ /* Store a bit of meta-data at the end */
42369+ tso_buf = (struct netfront_accel_tso_buffer *)
42370+ (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
42371+
42372+ tso_buf->buf = buf;
42373+
42374+ tso_buf->length = 0;
42375+
42376+ tso_buf->next = st->output_buffers;
42377+ st->output_buffers = tso_buf;
42378+ st->buffers++;
42379+
42380+ BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
42381+
42382+ /*
42383+ * Store the context, set to NULL, last packet buffer will get
42384+ * non-NULL later
42385+ */
42386+ tso_buf->buf->skb = NULL;
42387+
42388+ return 0;
42389+}
42390+
42391+
42392+static void
42393+multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
42394+ struct netfront_accel_multi_state *st)
42395+{
42396+ struct netfront_accel_tso_buffer *tso_buf;
42397+ unsigned n, space;
42398+
42399+ BUG_ON(st->output_buffers == NULL);
42400+ tso_buf = st->output_buffers;
42401+
42402+ if (st->ifc.len == 0) return;
42403+ if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
42404+
42405+ BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
42406+
42407+ space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
42408+ n = min(st->ifc.len, space);
42409+
42410+ memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
42411+
42412+ st->remaining_len -= n;
42413+ st->ifc.len -= n;
42414+ tso_buf->length += n;
42415+ st->ifc.addr += n;
42416+
42417+ BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
42418+
42419+ return;
42420+}
42421+
42422+
42423+static inline void multi_post_unwind(netfront_accel_vnic *vnic,
42424+ struct netfront_accel_multi_state *st)
42425+{
42426+ struct netfront_accel_tso_buffer *tso_buf;
42427+
42428+ DPRINTK("%s\n", __FUNCTION__);
42429+
42430+ while (st->output_buffers != NULL) {
42431+ tso_buf = st->output_buffers;
42432+ st->output_buffers = tso_buf->next;
42433+ st->buffers--;
42434+ netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
42435+ }
42436+ BUG_ON(st->buffers != 0);
42437+}
42438+
42439+
42440+static enum netfront_accel_post_status
42441+netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb)
42442+{
42443+ struct netfront_accel_tso_buffer *tso_buf;
42444+ struct netfront_accel_multi_state state;
42445+ ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
42446+ skb_frag_t *f;
42447+ int frag_i, rc, dma_id;
42448+
42449+ multi_post_start(&state, skb);
42450+
42451+ frag_i = -1;
42452+
42453+ if (skb->ip_summed == CHECKSUM_HW) {
42454+ /* Set to zero to encourage falcon to work it out for us */
42455+ *(u16*)(skb->h.raw + skb->csum) = 0;
42456+ }
42457+
42458+ if (multi_post_start_new_buffer(vnic, &state)) {
42459+ DPRINTK("%s: out of buffers\n", __FUNCTION__);
42460+ goto unwind;
42461+ }
42462+
42463+ while (1) {
42464+ multi_post_fill_buffer_with_fragment(vnic, &state);
42465+
42466+ /* Move onto the next fragment? */
42467+ if (state.ifc.len == 0) {
42468+ if (++frag_i >= skb_shinfo(skb)->nr_frags)
42469+ /* End of payload reached. */
42470+ break;
42471+ f = &skb_shinfo(skb)->frags[frag_i];
42472+ state.ifc.len = f->size;
42473+ state.ifc.addr = page_address(f->page) + f->page_offset;
42474+ }
42475+
42476+ /* Start a new buffer? */
42477+ if ((state.output_buffers->length ==
42478+ NETFRONT_ACCEL_TX_BUF_LENGTH) &&
42479+ multi_post_start_new_buffer(vnic, &state)) {
42480+ DPRINTK("%s: out of buffers\n", __FUNCTION__);
42481+ goto unwind;
42482+ }
42483+ }
42484+
42485+ /* Check for space */
42486+ if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
42487+ DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers);
42488+ goto unwind;
42489+ }
42490+
42491+ /* Store the skb in what will be the last buffer's context */
42492+ state.output_buffers->buf->skb = skb;
42493+ /* Remember dma_id of what will be the last buffer */
42494+ dma_id = state.output_buffers->buf->buf_id;
42495+
42496+ /*
42497+ * Make an iovec of the buffers in the list, reversing the
42498+ * buffers as we go as they are constructed on a stack
42499+ */
42500+ tso_buf = state.output_buffers;
42501+ for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
42502+ iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
42503+ iovecs[frag_i].iov_len = tso_buf->length;
42504+ tso_buf = tso_buf->next;
42505+ }
42506+
42507+ rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
42508+
42509+ /* Track number of tx fastpath stats */
42510+ vnic->netdev_stats.fastpath_tx_bytes += skb->len;
42511+ vnic->netdev_stats.fastpath_tx_pkts ++;
42512+#if NETFRONT_ACCEL_STATS
42513+ {
42514+ u32 n;
42515+ n = vnic->netdev_stats.fastpath_tx_pkts -
42516+ (u32)vnic->stats.fastpath_tx_completions;
42517+ if (n > vnic->stats.fastpath_tx_pending_max)
42518+ vnic->stats.fastpath_tx_pending_max = n;
42519+ }
42520+#endif
42521+ return NETFRONT_ACCEL_STATUS_GOOD;
42522+
42523+unwind:
42524+ multi_post_unwind(vnic, &state);
42525+
42526+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
42527+
42528+ return NETFRONT_ACCEL_STATUS_BUSY;
42529+}
42530+
42531+
42532+static enum netfront_accel_post_status
42533+netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb)
42534+{
42535+ struct netfront_accel_tso_buffer *tso_buf;
42536+ struct netfront_accel_pkt_desc *buf;
42537+ u8 *kva;
42538+ int rc;
42539+
42540+ if (ef_vi_transmit_space(&vnic->vi) < 1) {
42541+ DPRINTK("%s: No TX space\n", __FUNCTION__);
42542+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
42543+ return NETFRONT_ACCEL_STATUS_BUSY;
42544+ }
42545+
42546+ buf = netfront_accel_buf_get(vnic->tx_bufs);
42547+ if (buf == NULL) {
42548+ DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
42549+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
42550+ return NETFRONT_ACCEL_STATUS_BUSY;
42551+ }
42552+
42553+ /* Track number of tx fastpath stats */
42554+ vnic->netdev_stats.fastpath_tx_pkts++;
42555+ vnic->netdev_stats.fastpath_tx_bytes += skb->len;
42556+
42557+#if NETFRONT_ACCEL_STATS
42558+ {
42559+ u32 n;
42560+ n = vnic->netdev_stats.fastpath_tx_pkts -
42561+ (u32)vnic->stats.fastpath_tx_completions;
42562+ if (n > vnic->stats.fastpath_tx_pending_max)
42563+ vnic->stats.fastpath_tx_pending_max = n;
42564+ }
42565+#endif
42566+
42567+ /* Store the context */
42568+ buf->skb = skb;
42569+
42570+ kva = buf->pkt_kva;
42571+
42572+ if (skb->ip_summed == CHECKSUM_HW) {
42573+ /* Set to zero to encourage falcon to work it out for us */
42574+ *(u16*)(skb->h.raw + skb->csum) = 0;
42575+ }
42576+ NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
42577+ (skb, idx, frag_data, frag_len, {
42578+ /* Copy in payload */
42579+ VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
42580+ memcpy(kva, frag_data, frag_len);
42581+ kva += frag_len;
42582+ });
42583+
42584+ VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
42585+ buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
42586+
42587+
42588+ /* Set up the TSO meta-data for a single buffer/packet */
42589+ tso_buf = (struct netfront_accel_tso_buffer *)
42590+ (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
42591+ tso_buf->next = NULL;
42592+ tso_buf->buf = buf;
42593+ tso_buf->length = skb->len;
42594+
42595+ rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
42596+ buf->buf_id);
42597+ /* We checked for space already, so it really should succeed */
42598+ BUG_ON(rc != 0);
42599+
42600+ return NETFRONT_ACCEL_STATUS_GOOD;
42601+}
42602+
42603+
42604+enum netfront_accel_post_status
42605+netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
42606+{
42607+ struct ethhdr *pkt_eth_hdr;
42608+ struct iphdr *pkt_ipv4_hdr;
42609+ int value, try_fastpath;
42610+
42611+ /*
42612+ * This assumes that the data field points to the dest mac
42613+ * address.
42614+ */
42615+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
42616+
42617+ /*
42618+ * NB very important that all things that could return "CANT"
42619+ * are tested before things that return "BUSY" as if it it
42620+ * returns "BUSY" it is assumed that it won't return "CANT"
42621+ * next time it is tried
42622+ */
42623+
42624+ /*
42625+ * Do a fastpath send if fast path table lookup returns true.
42626+ * We do this without the table lock and so may get the wrong
42627+ * answer, but current opinion is that's not a big problem
42628+ */
42629+ try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table,
42630+ (cuckoo_hash_key *)(&key), &value);
42631+
42632+ if (!try_fastpath) {
42633+ VPRINTK("try fast path false for mac: " MAC_FMT "\n",
42634+ MAC_ARG(skb->data));
42635+
42636+ return NETFRONT_ACCEL_STATUS_CANT;
42637+ }
42638+
42639+ /* Check to see if the packet can be sent. */
42640+ if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
42641+ EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
42642+ return NETFRONT_ACCEL_STATUS_CANT;
42643+ }
42644+
42645+ pkt_eth_hdr = (void*)skb->data;
42646+ pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
42647+
42648+ if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
42649+ DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__,
42650+ be16_to_cpu(pkt_eth_hdr->h_proto));
42651+ return NETFRONT_ACCEL_STATUS_CANT;
42652+ }
42653+
42654+ if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
42655+ pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
42656+ DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
42657+ __FUNCTION__, pkt_ipv4_hdr->protocol);
42658+ return NETFRONT_ACCEL_STATUS_CANT;
42659+ }
42660+
42661+ VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len,
42662+ skb_shinfo(skb)->gso_size);
42663+
42664+ if (skb_shinfo(skb)->gso_size) {
42665+ return netfront_accel_enqueue_skb_tso(vnic, skb);
42666+ }
42667+
42668+ if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
42669+ return netfront_accel_enqueue_skb_single(vnic, skb);
42670+ }
42671+
42672+ return netfront_accel_enqueue_skb_multi(vnic, skb);
42673+}
42674+
42675+
42676+/*
42677+ * Copy the data to required end destination. NB. len is the total new
42678+ * length of the socket buffer, not the amount of data to copy
42679+ */
42680+inline
42681+int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb,
42682+ struct netfront_accel_pkt_desc *buf, int len)
42683+{
42684+ int i, extra = len - skb->len;
42685+ char c;
42686+ int pkt_stride = vnic->rx_pkt_stride;
42687+ int skb_stride = vnic->rx_skb_stride;
42688+ char *skb_start;
42689+
42690+ /*
42691+ * This pulls stuff into the cache - have seen performance
42692+ * benefit in this, but disabled by default
42693+ */
42694+ skb_start = skb->data;
42695+ if (pkt_stride) {
42696+ for (i = 0; i < len; i += pkt_stride) {
42697+ c += ((volatile char*)(buf->pkt_kva))[i];
42698+ }
42699+ }
42700+ if (skb_stride) {
42701+ for (i = skb->len; i < len ; i += skb_stride) {
42702+ c += ((volatile char*)(skb_start))[i];
42703+ }
42704+ }
42705+
42706+ if (skb_tailroom(skb) >= extra) {
42707+ memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
42708+ return 0;
42709+ }
42710+
42711+ return -ENOSPC;
42712+}
42713+
42714+
42715+static void discard_jumbo_state(netfront_accel_vnic *vnic)
42716+{
42717+
42718+ if (vnic->jumbo_state.skb != NULL) {
42719+ dev_kfree_skb_any(vnic->jumbo_state.skb);
42720+
42721+ vnic->jumbo_state.skb = NULL;
42722+ }
42723+ vnic->jumbo_state.in_progress = 0;
42724+}
42725+
42726+
42727+static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
42728+ struct sk_buff *skb)
42729+{
42730+ cuckoo_hash_mac_key key;
42731+ unsigned long flags;
42732+ int value;
42733+ struct net_device *net_dev;
42734+
42735+
42736+ key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
42737+
42738+ /*
42739+ * If this is a MAC address that we want to do fast path TX
42740+ * to, and we don't already, add it to the fastpath table.
42741+ * The initial lookup is done without the table lock and so
42742+ * may get the wrong answer, but current opinion is that's not
42743+ * a big problem
42744+ */
42745+ if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
42746+ !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
42747+ &value)) {
42748+ spin_lock_irqsave(&vnic->table_lock, flags);
42749+
42750+ cuckoo_hash_add_check(&vnic->fastpath_table,
42751+ (cuckoo_hash_key *)&key,
42752+ 1, 1);
42753+
42754+ spin_unlock_irqrestore(&vnic->table_lock, flags);
42755+ }
42756+
42757+ if (compare_ether_addr(skb->data, vnic->mac)) {
42758+ struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
42759+ u16 port;
42760+
42761+ DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n",
42762+ __FUNCTION__, MAC_ARG(skb->data));
42763+
42764+ if (ip->protocol == IPPROTO_TCP) {
42765+ struct tcphdr *tcp = (struct tcphdr *)
42766+ ((char *)ip + 4 * ip->ihl);
42767+ port = tcp->dest;
42768+ } else {
42769+ struct udphdr *udp = (struct udphdr *)
42770+ ((char *)ip + 4 * ip->ihl);
42771+ EPRINTK_ON(ip->protocol != IPPROTO_UDP);
42772+ port = udp->dest;
42773+ }
42774+
42775+ netfront_accel_msg_tx_fastpath(vnic, skb->data,
42776+ ip->daddr, port,
42777+ ip->protocol);
42778+ }
42779+
42780+ net_dev = vnic->net_dev;
42781+ skb->dev = net_dev;
42782+ skb->protocol = eth_type_trans(skb, net_dev);
42783+ /* CHECKSUM_UNNECESSARY as hardware has done it already */
42784+ skb->ip_summed = CHECKSUM_UNNECESSARY;
42785+
42786+ if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
42787+ netif_receive_skb(skb);
42788+}
42789+
42790+
42791+static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic,
42792+ ef_event *ev)
42793+{
42794+ struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
42795+ struct netfront_accel_pkt_desc *buf = NULL;
42796+ struct sk_buff *skb;
42797+ int id, len, sop = 0, cont = 0;
42798+
42799+ VPRINTK("Rx event.\n");
42800+ /*
42801+ * Complete the receive operation, and get the request id of
42802+ * the buffer
42803+ */
42804+ id = ef_vi_receive_done(&vnic->vi, ev);
42805+
42806+ if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
42807+ EPRINTK("Rx packet %d is invalid\n", id);
42808+ /* Carry on round the loop if more events */
42809+ goto bad_packet;
42810+ }
42811+ /* Get our buffer descriptor */
42812+ buf = netfront_accel_buf_find(bufinfo, id);
42813+
42814+ len = EF_EVENT_RX_BYTES(*ev);
42815+
42816+ /* An RX buffer has been removed from the DMA ring. */
42817+ vnic->rx_dma_level--;
42818+
42819+ if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
42820+ sop = EF_EVENT_RX_SOP(*ev);
42821+ cont = EF_EVENT_RX_CONT(*ev);
42822+
42823+ skb = vnic->jumbo_state.skb;
42824+
42825+ VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n",
42826+ id, len, sop, cont);
42827+
42828+ if (sop) {
42829+ if (!vnic->jumbo_state.in_progress) {
42830+ vnic->jumbo_state.in_progress = 1;
42831+ BUG_ON(vnic->jumbo_state.skb != NULL);
42832+ } else {
42833+ /*
42834+ * This fragment shows a missing tail in
42835+ * previous one, but is itself possibly OK
42836+ */
42837+ DPRINTK("sop and in_progress => no tail\n");
42838+
42839+ /* Release the socket buffer we already had */
42840+ discard_jumbo_state(vnic);
42841+
42842+ /* Now start processing this fragment */
42843+ vnic->jumbo_state.in_progress = 1;
42844+ skb = NULL;
42845+ }
42846+ } else if (!vnic->jumbo_state.in_progress) {
42847+ DPRINTK("!sop and !in_progress => missing head\n");
42848+ goto missing_head;
42849+ }
42850+
42851+ if (!cont) {
42852+ /* Update state for next time */
42853+ vnic->jumbo_state.in_progress = 0;
42854+ vnic->jumbo_state.skb = NULL;
42855+ } else if (!vnic->jumbo_state.in_progress) {
42856+ DPRINTK("cont and !in_progress => missing head\n");
42857+ goto missing_head;
42858+ }
42859+
42860+ if (skb == NULL) {
42861+ BUG_ON(!sop);
42862+
42863+ if (!cont)
42864+ skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
42865+ else
42866+ skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN,
42867+ GFP_ATOMIC);
42868+
42869+ if (skb == NULL) {
42870+ DPRINTK("%s: Couldn't get an rx skb.\n",
42871+ __FUNCTION__);
42872+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42873+ /*
42874+ * Dropping this fragment means we
42875+ * should discard the rest too
42876+ */
42877+ discard_jumbo_state(vnic);
42878+
42879+ /* Carry on round the loop if more events */
42880+ return 0;
42881+ }
42882+
42883+ }
42884+
42885+ /* Copy the data to required end destination */
42886+ if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
42887+ /*
42888+ * No space in the skb - suggests > MTU packet
42889+ * received
42890+ */
42891+ EPRINTK("%s: Rx packet too large (%d)\n",
42892+ __FUNCTION__, len);
42893+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42894+ discard_jumbo_state(vnic);
42895+ return 0;
42896+ }
42897+
42898+ /* Put the buffer back in the DMA queue. */
42899+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42900+
42901+ if (cont) {
42902+ vnic->jumbo_state.skb = skb;
42903+
42904+ return 0;
42905+ } else {
42906+ /* Track number of rx fastpath packets */
42907+ vnic->netdev_stats.fastpath_rx_pkts++;
42908+ vnic->netdev_stats.fastpath_rx_bytes += len;
42909+
42910+ netfront_accel_vi_rx_complete(vnic, skb);
42911+
42912+ return 1;
42913+ }
42914+ } else {
42915+ BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
42916+
42917+ if (EF_EVENT_RX_DISCARD_TYPE(*ev)
42918+ == EF_EVENT_RX_DISCARD_TRUNC) {
42919+ DPRINTK("%s: " EF_EVENT_FMT
42920+ " buffer %d FRM_TRUNC q_id %d\n",
42921+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
42922+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
42923+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
42924+ } else if (EF_EVENT_RX_DISCARD_TYPE(*ev)
42925+ == EF_EVENT_RX_DISCARD_OTHER) {
42926+ DPRINTK("%s: " EF_EVENT_FMT
42927+ " buffer %d RX_DISCARD_OTHER q_id %d\n",
42928+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
42929+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
42930+ /*
42931+ * Probably tail of packet for which error has
42932+ * already been logged, so don't count in
42933+ * stats
42934+ */
42935+ } else {
42936+ EPRINTK("%s: " EF_EVENT_FMT
42937+ " buffer %d rx discard type %d q_id %d\n",
42938+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
42939+ EF_EVENT_RX_DISCARD_TYPE(*ev),
42940+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
42941+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.bad_event_count);
42942+ }
42943+ }
42944+
42945+ /* discard type drops through here */
42946+
42947+bad_packet:
42948+ /* Release the socket buffer we already had */
42949+ discard_jumbo_state(vnic);
42950+
42951+missing_head:
42952+ BUG_ON(vnic->jumbo_state.in_progress != 0);
42953+ BUG_ON(vnic->jumbo_state.skb != NULL);
42954+
42955+ if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
42956+ /* Put the buffer back in the DMA queue. */
42957+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42958+
42959+ vnic->netdev_stats.fastpath_rx_errors++;
42960+
42961+ DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
42962+ __FUNCTION__, ev->rx.flags);
42963+
42964+ return 0;
42965+}
42966+
42967+
42968+static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
42969+{
42970+ struct netfront_info *np = ((struct netfront_info *)
42971+ netdev_priv(vnic->net_dev));
42972+ struct sk_buff *skb;
42973+ int handled;
42974+ unsigned long flags;
42975+
42976+ /*
42977+ * TODO if we could safely check tx_skb == NULL and return
42978+ * early without taking the lock, that would obviously help
42979+ * performance
42980+ */
42981+
42982+ /* Take the netfront lock which protects tx_skb. */
42983+ spin_lock_irqsave(&np->tx_lock, flags);
42984+ if (vnic->tx_skb != NULL) {
42985+ DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
42986+
42987+ skb = vnic->tx_skb;
42988+ vnic->tx_skb = NULL;
42989+
42990+ spin_unlock_irqrestore(&np->tx_lock, flags);
42991+
42992+ handled = netfront_accel_vi_tx_post(vnic, skb);
42993+
42994+ spin_lock_irqsave(&np->tx_lock, flags);
42995+
42996+ if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
42997+ DPRINTK("%s restarting tx\n", __FUNCTION__);
42998+ if (netfront_check_queue_ready(vnic->net_dev)) {
42999+ netif_wake_queue(vnic->net_dev);
43000+ NETFRONT_ACCEL_STATS_OP
43001+ (vnic->stats.queue_wakes++);
43002+ }
43003+ } else {
43004+ vnic->tx_skb = skb;
43005+ }
43006+
43007+ /*
43008+ * Should never get a CANT, as it checks that before
43009+ * deciding it was BUSY first time round
43010+ */
43011+ BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
43012+ }
43013+ spin_unlock_irqrestore(&np->tx_lock, flags);
43014+}
43015+
43016+
43017+static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic,
43018+ struct netfront_accel_tso_buffer *tso_buf,
43019+ int is_last)
43020+{
43021+ struct netfront_accel_tso_buffer *next;
43022+
43023+ /*
43024+ * We get a single completion for every call to
43025+ * ef_vi_transmitv so handle any other buffers which are part
43026+ * of the same packet
43027+ */
43028+ while (tso_buf != NULL) {
43029+ if (tso_buf->buf->skb != NULL) {
43030+ dev_kfree_skb_any(tso_buf->buf->skb);
43031+ tso_buf->buf->skb = NULL;
43032+ }
43033+
43034+ next = tso_buf->next;
43035+
43036+ netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
43037+
43038+ tso_buf = next;
43039+ }
43040+
43041+ /*
43042+ * If this was the last one in the batch, we try and send any
43043+ * pending tx_skb. There should now be buffers and
43044+ * descriptors
43045+ */
43046+ if (is_last)
43047+ netfront_accel_vi_not_busy(vnic);
43048+}
43049+
43050+
43051+static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
43052+ ef_event *ev)
43053+{
43054+ struct netfront_accel_pkt_desc *buf;
43055+ struct netfront_accel_tso_buffer *tso_buf;
43056+ ef_request_id ids[EF_VI_TRANSMIT_BATCH];
43057+ int i, n_ids;
43058+ unsigned long flags;
43059+
43060+ /* Get the request ids for this tx completion event. */
43061+ n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
43062+
43063+ /* Take the tx buffer spin lock and hold for the duration */
43064+ spin_lock_irqsave(&vnic->tx_lock, flags);
43065+
43066+ for (i = 0; i < n_ids; ++i) {
43067+ VPRINTK("Tx packet %d complete\n", ids[i]);
43068+ buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
43069+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
43070+
43071+ tso_buf = (struct netfront_accel_tso_buffer *)
43072+ (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
43073+ BUG_ON(tso_buf->buf != buf);
43074+
43075+ netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
43076+ }
43077+
43078+ spin_unlock_irqrestore(&vnic->tx_lock, flags);
43079+}
43080+
43081+
43082+int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
43083+{
43084+ ef_event ev[ACCEL_VI_POLL_EVENTS];
43085+ int rx_remain = rx_packets, rc, events, i;
43086+#if NETFRONT_ACCEL_STATS
43087+ int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
43088+#endif
43089+ BUG_ON(rx_packets <= 0);
43090+
43091+ events = ef_eventq_poll(&vnic->vi, ev,
43092+ min(rx_remain, ACCEL_VI_POLL_EVENTS));
43093+ i = 0;
43094+ NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
43095+
43096+ VPRINTK("%s: %d events\n", __FUNCTION__, events);
43097+
43098+ /* Loop over each event */
43099+ while (events) {
43100+ VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__,
43101+ EF_EVENT_PRI_ARG(ev[i]),
43102+ (unsigned long)(vnic->vi.evq_state->evq_ptr));
43103+
43104+ if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
43105+ (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
43106+ rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
43107+ rx_remain -= rc;
43108+ BUG_ON(rx_remain < 0);
43109+ NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
43110+ } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
43111+ netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
43112+ NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
43113+ } else if (EF_EVENT_TYPE(ev[i]) ==
43114+ EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
43115+ DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
43116+ __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
43117+ discard_jumbo_state(vnic);
43118+ NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
43119+ } else {
43120+ EPRINTK("Unexpected event " EF_EVENT_FMT "\n",
43121+ EF_EVENT_PRI_ARG(ev[i]));
43122+ NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
43123+ }
43124+
43125+ i++;
43126+
43127+ /* Carry on round the loop if more events and more space */
43128+ if (i == events) {
43129+ if (rx_remain == 0)
43130+ break;
43131+
43132+ events = ef_eventq_poll(&vnic->vi, ev,
43133+ min(rx_remain,
43134+ ACCEL_VI_POLL_EVENTS));
43135+ i = 0;
43136+ NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
43137+ }
43138+ }
43139+
43140+#if NETFRONT_ACCEL_STATS
43141+ vnic->stats.event_count += n_evs_polled;
43142+ vnic->stats.event_count_since_irq += n_evs_polled;
43143+ if (n_evs_polled > vnic->stats.events_per_poll_max)
43144+ vnic->stats.events_per_poll_max = n_evs_polled;
43145+ if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
43146+ vnic->stats.events_per_poll_rx_max = rx_evs_polled;
43147+ if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
43148+ vnic->stats.events_per_poll_tx_max = tx_evs_polled;
43149+#endif
43150+
43151+ return rx_packets - rx_remain;
43152+}
43153+
43154+
43155+int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
43156+{
43157+ u32 sw_evq_ptr;
43158+
43159+ VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state);
43160+
43161+ BUG_ON(vnic == NULL);
43162+ BUG_ON(vnic->vi.evq_state == NULL);
43163+
43164+ /* Do a quick check for an event. */
43165+ if (ef_eventq_has_event(&vnic->vi)) {
43166+ VPRINTK("%s: found event\n", __FUNCTION__);
43167+ return 0;
43168+ }
43169+
43170+ VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n",
43171+ vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
43172+
43173+ /* Request a wakeup from the hardware. */
43174+ sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
43175+
43176+ BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
43177+
43178+ VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
43179+ vnic->hw.falcon.evq_rptr);
43180+ *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);
43181+
43182+ return 1;
43183+}
43184Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_xenbus.c
43185===================================================================
43186--- /dev/null 1970-01-01 00:00:00.000000000 +0000
43187+++ head-2008-11-25/drivers/xen/sfc_netfront/accel_xenbus.c 2008-02-20 09:32:49.000000000 +0100
43188@@ -0,0 +1,776 @@
43189+/****************************************************************************
43190+ * Solarflare driver for Xen network acceleration
43191+ *
43192+ * Copyright 2006-2008: Solarflare Communications Inc,
43193+ * 9501 Jeronimo Road, Suite 250,
43194+ * Irvine, CA 92618, USA
43195+ *
43196+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
43197+ *
43198+ * This program is free software; you can redistribute it and/or modify it
43199+ * under the terms of the GNU General Public License version 2 as published
43200+ * by the Free Software Foundation, incorporated herein by reference.
43201+ *
43202+ * This program is distributed in the hope that it will be useful,
43203+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
43204+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43205+ * GNU General Public License for more details.
43206+ *
43207+ * You should have received a copy of the GNU General Public License
43208+ * along with this program; if not, write to the Free Software
43209+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
43210+ ****************************************************************************
43211+ */
43212+
43213+#include <linux/stddef.h>
43214+#include <linux/errno.h>
43215+
43216+#include <xen/xenbus.h>
43217+#include <xen/evtchn.h>
43218+#include <xen/gnttab.h>
43219+
43220+#include "accel.h"
43221+#include "accel_util.h"
43222+#include "accel_msg_iface.h"
43223+#include "accel_bufs.h"
43224+#include "accel_ssr.h"
43225+/* drivers/xen/netfront/netfront.h */
43226+#include "netfront.h"
43227+
43228+void netfront_accel_set_closing(netfront_accel_vnic *vnic)
43229+{
43230+
43231+ vnic->frontend_state = XenbusStateClosing;
43232+ net_accel_update_state(vnic->dev, XenbusStateClosing);
43233+}
43234+
43235+
43236+static void mac_address_change(struct xenbus_watch *watch,
43237+ const char **vec, unsigned int len)
43238+{
43239+ netfront_accel_vnic *vnic;
43240+ struct xenbus_device *dev;
43241+ int rc;
43242+
43243+ DPRINTK("%s\n", __FUNCTION__);
43244+
43245+ vnic = container_of(watch, netfront_accel_vnic,
43246+ mac_address_watch);
43247+ dev = vnic->dev;
43248+
43249+ rc = net_accel_xen_net_read_mac(dev, vnic->mac);
43250+
43251+ if (rc != 0)
43252+ EPRINTK("%s: failed to read mac (%d)\n", __FUNCTION__, rc);
43253+}
43254+
43255+
43256+static int setup_mac_address_watch(struct xenbus_device *dev,
43257+ netfront_accel_vnic *vnic)
43258+{
43259+ int err;
43260+
43261+ DPRINTK("Setting watch on %s/%s\n", dev->nodename, "mac");
43262+
43263+ err = xenbus_watch_path2(dev, dev->nodename, "mac",
43264+ &vnic->mac_address_watch,
43265+ mac_address_change);
43266+ if (err) {
43267+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
43268+ __FUNCTION__, err);
43269+ goto fail;
43270+ }
43271+
43272+ return 0;
43273+ fail:
43274+ vnic->mac_address_watch.node = NULL;
43275+ return err;
43276+}
43277+
43278+
43279+/* Grant access to some pages and publish through xenbus */
43280+static int make_named_grant(struct xenbus_device *dev, void *page,
43281+ const char *name, grant_ref_t *gnt_ref)
43282+{
43283+ struct xenbus_transaction tr;
43284+ int err;
43285+ grant_ref_t gnt;
43286+
43287+ gnt = net_accel_grant_page(dev, virt_to_mfn(page), 0);
43288+ if (gnt < 0)
43289+ return gnt;
43290+
43291+ do {
43292+ err = xenbus_transaction_start(&tr);
43293+ if (err != 0) {
43294+ EPRINTK("%s: transaction start failed %d\n",
43295+ __FUNCTION__, err);
43296+ return err;
43297+ }
43298+ err = xenbus_printf(tr, dev->nodename, name, "%d", gnt);
43299+ if (err != 0) {
43300+ EPRINTK("%s: xenbus_printf failed %d\n", __FUNCTION__,
43301+ err);
43302+ xenbus_transaction_end(tr, 1);
43303+ return err;
43304+ }
43305+ err = xenbus_transaction_end(tr, 0);
43306+ } while (err == -EAGAIN);
43307+
43308+ if (err != 0) {
43309+ EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
43310+ return err;
43311+ }
43312+
43313+ *gnt_ref = gnt;
43314+
43315+ return 0;
43316+}
43317+
43318+
43319+static int remove_named_grant(struct xenbus_device *dev,
43320+ const char *name, grant_ref_t gnt_ref)
43321+{
43322+ struct xenbus_transaction tr;
43323+ int err;
43324+
43325+ net_accel_ungrant_page(gnt_ref);
43326+
43327+ do {
43328+ err = xenbus_transaction_start(&tr);
43329+ if (err != 0) {
43330+ EPRINTK("%s: transaction start failed %d\n",
43331+ __FUNCTION__, err);
43332+ return err;
43333+ }
43334+ err = xenbus_rm(tr, dev->nodename, name);
43335+ if (err != 0) {
43336+ EPRINTK("%s: xenbus_rm failed %d\n", __FUNCTION__,
43337+ err);
43338+ xenbus_transaction_end(tr, 1);
43339+ return err;
43340+ }
43341+ err = xenbus_transaction_end(tr, 0);
43342+ } while (err == -EAGAIN);
43343+
43344+ if (err != 0) {
43345+ EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
43346+ return err;
43347+ }
43348+
43349+ return 0;
43350+}
43351+
43352+
43353+static
43354+netfront_accel_vnic *netfront_accel_vnic_ctor(struct net_device *net_dev,
43355+ struct xenbus_device *dev)
43356+{
43357+ struct netfront_info *np =
43358+ (struct netfront_info *)netdev_priv(net_dev);
43359+ netfront_accel_vnic *vnic;
43360+ int err;
43361+
43362+ /*
43363+ * A bug in earlier versions of Xen accel plugin system meant
43364+ * you could be probed twice for the same device on suspend
43365+ * cancel. Be tolerant of that.
43366+ */
43367+ if (np->accel_priv != NULL)
43368+ return ERR_PTR(-EALREADY);
43369+
43370+ /* Alloc mem for state */
43371+ vnic = kzalloc(sizeof(netfront_accel_vnic), GFP_KERNEL);
43372+ if (vnic == NULL) {
43373+ EPRINTK("%s: no memory for vnic state\n", __FUNCTION__);
43374+ return ERR_PTR(-ENOMEM);
43375+ }
43376+
43377+ spin_lock_init(&vnic->tx_lock);
43378+
43379+ mutex_init(&vnic->vnic_mutex);
43380+ mutex_lock(&vnic->vnic_mutex);
43381+
43382+ /* Store so state can be retrieved from device */
43383+ BUG_ON(np->accel_priv != NULL);
43384+ np->accel_priv = vnic;
43385+ vnic->dev = dev;
43386+ vnic->net_dev = net_dev;
43387+ spin_lock_init(&vnic->irq_enabled_lock);
43388+ netfront_accel_ssr_init(&vnic->ssr_state);
43389+
43390+ init_waitqueue_head(&vnic->state_wait_queue);
43391+ vnic->backend_state = XenbusStateUnknown;
43392+ vnic->frontend_state = XenbusStateClosed;
43393+ vnic->removing = 0;
43394+ vnic->domU_state_is_setup = 0;
43395+ vnic->dom0_state_is_setup = 0;
43396+ vnic->poll_enabled = 0;
43397+ vnic->tx_enabled = 0;
43398+ vnic->tx_skb = NULL;
43399+
43400+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
43401+ INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend);
43402+#else
43403+ INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend, vnic);
43404+#endif
43405+
43406+ netfront_accel_debugfs_create(vnic);
43407+
43408+ mutex_unlock(&vnic->vnic_mutex);
43409+
43410+ err = net_accel_xen_net_read_mac(dev, vnic->mac);
43411+ if (err)
43412+ goto fail_mac;
43413+
43414+ /* Setup a watch on the frontend's MAC address */
43415+ err = setup_mac_address_watch(dev, vnic);
43416+ if (err)
43417+ goto fail_mac;
43418+
43419+ return vnic;
43420+
43421+fail_mac:
43422+
43423+ mutex_lock(&vnic->vnic_mutex);
43424+
43425+ netfront_accel_debugfs_remove(vnic);
43426+
43427+ netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
43428+
43429+ EPRINTK_ON(vnic->tx_skb != NULL);
43430+
43431+ vnic->frontend_state = XenbusStateUnknown;
43432+ net_accel_update_state(dev, XenbusStateUnknown);
43433+
43434+ mutex_unlock(&vnic->vnic_mutex);
43435+
43436+ np->accel_priv = NULL;
43437+ kfree(vnic);
43438+
43439+ return ERR_PTR(err);
43440+}
43441+
43442+
43443+static void netfront_accel_vnic_dtor(netfront_accel_vnic *vnic)
43444+{
43445+ struct net_device *net_dev = vnic->net_dev;
43446+ struct netfront_info *np =
43447+ (struct netfront_info *)netdev_priv(net_dev);
43448+
43449+ /*
43450+ * Now we don't hold the lock any more it is safe to remove
43451+ * this watch and synchonrise with the completion of
43452+ * watches
43453+ */
43454+ DPRINTK("%s: unregistering xenbus mac watch\n", __FUNCTION__);
43455+ unregister_xenbus_watch(&vnic->mac_address_watch);
43456+ kfree(vnic->mac_address_watch.node);
43457+
43458+ flush_workqueue(netfront_accel_workqueue);
43459+
43460+ mutex_lock(&vnic->vnic_mutex);
43461+
43462+ netfront_accel_debugfs_remove(vnic);
43463+
43464+ netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
43465+
43466+ EPRINTK_ON(vnic->tx_skb != NULL);
43467+
43468+ vnic->frontend_state = XenbusStateUnknown;
43469+ net_accel_update_state(vnic->dev, XenbusStateUnknown);
43470+
43471+ mutex_unlock(&vnic->vnic_mutex);
43472+
43473+ np->accel_priv = NULL;
43474+ kfree(vnic);
43475+}
43476+
43477+
43478+static int vnic_setup_domU_shared_state(struct xenbus_device *dev,
43479+ netfront_accel_vnic *vnic)
43480+{
43481+ struct xenbus_transaction tr;
43482+ int err;
43483+ int msgs_per_queue;
43484+
43485+
43486+ DPRINTK("Setting up domU shared state.\n");
43487+
43488+ msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
43489+
43490+ /* Allocate buffer state */
43491+ vnic->tx_bufs = netfront_accel_init_bufs(&vnic->tx_lock);
43492+ if (vnic->tx_bufs == NULL) {
43493+ err = -ENOMEM;
43494+ EPRINTK("%s: Failed to allocate tx buffers\n", __FUNCTION__);
43495+ goto fail_tx_bufs;
43496+ }
43497+
43498+ vnic->rx_bufs = netfront_accel_init_bufs(NULL);
43499+ if (vnic->rx_bufs == NULL) {
43500+ err = -ENOMEM;
43501+ EPRINTK("%s: Failed to allocate rx buffers\n", __FUNCTION__);
43502+ goto fail_rx_bufs;
43503+ }
43504+
43505+ /*
43506+ * This allocates two pages, one for the shared page and one
43507+ * for the message queue.
43508+ */
43509+ vnic->shared_page = (struct net_accel_shared_page *)
43510+ __get_free_pages(GFP_KERNEL, 1);
43511+ if (vnic->shared_page == NULL) {
43512+ EPRINTK("%s: no memory for shared pages\n", __FUNCTION__);
43513+ err = -ENOMEM;
43514+ goto fail_shared_page;
43515+ }
43516+
43517+ net_accel_msg_init_queue
43518+ (&vnic->from_dom0, &vnic->shared_page->queue0,
43519+ (struct net_accel_msg *)((u8*)vnic->shared_page + PAGE_SIZE),
43520+ msgs_per_queue);
43521+
43522+ net_accel_msg_init_queue
43523+ (&vnic->to_dom0, &vnic->shared_page->queue1,
43524+ (struct net_accel_msg *)((u8*)vnic->shared_page +
43525+ (3 * PAGE_SIZE / 2)),
43526+ msgs_per_queue);
43527+
43528+ vnic->msg_state = NETFRONT_ACCEL_MSG_NONE;
43529+
43530+ err = make_named_grant(dev, vnic->shared_page, "accel-ctrl-page",
43531+ &vnic->ctrl_page_gnt);
43532+ if (err) {
43533+ EPRINTK("couldn't make ctrl-page named grant\n");
43534+ goto fail_ctrl_page_grant;
43535+ }
43536+
43537+ err = make_named_grant(dev, (u8*)vnic->shared_page + PAGE_SIZE,
43538+ "accel-msg-page", &vnic->msg_page_gnt);
43539+ if (err) {
43540+ EPRINTK("couldn't make msg-page named grant\n");
43541+ goto fail_msg_page_grant;
43542+ }
43543+
43544+ /* Create xenbus msg event channel */
43545+ err = bind_listening_port_to_irqhandler
43546+ (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend,
43547+ SA_SAMPLE_RANDOM, "vnicctrl", vnic);
43548+ if (err < 0) {
43549+ EPRINTK("Couldn't bind msg event channel\n");
43550+ goto fail_msg_irq;
43551+ }
43552+ vnic->msg_channel_irq = err;
43553+ vnic->msg_channel = irq_to_evtchn_port(vnic->msg_channel_irq);
43554+
43555+ /* Create xenbus net event channel */
43556+ err = bind_listening_port_to_irqhandler
43557+ (dev->otherend_id, netfront_accel_net_channel_irq_from_bend,
43558+ SA_SAMPLE_RANDOM, "vnicfront", vnic);
43559+ if (err < 0) {
43560+ EPRINTK("Couldn't bind net event channel\n");
43561+ goto fail_net_irq;
43562+ }
43563+ vnic->net_channel_irq = err;
43564+ vnic->net_channel = irq_to_evtchn_port(vnic->net_channel_irq);
43565+ /* Want to ensure we don't get interrupts before we're ready */
43566+ netfront_accel_disable_net_interrupts(vnic);
43567+
43568+ DPRINTK("otherend %d has msg ch %u (%u) and net ch %u (%u)\n",
43569+ dev->otherend_id, vnic->msg_channel, vnic->msg_channel_irq,
43570+ vnic->net_channel, vnic->net_channel_irq);
43571+
43572+ do {
43573+ err = xenbus_transaction_start(&tr);
43574+ if (err != 0) {
43575+ EPRINTK("%s: Transaction start failed %d\n",
43576+ __FUNCTION__, err);
43577+ goto fail_transaction;
43578+ }
43579+
43580+ err = xenbus_printf(tr, dev->nodename, "accel-msg-channel",
43581+ "%u", vnic->msg_channel);
43582+ if (err != 0) {
43583+ EPRINTK("%s: event channel xenbus write failed %d\n",
43584+ __FUNCTION__, err);
43585+ xenbus_transaction_end(tr, 1);
43586+ goto fail_transaction;
43587+ }
43588+
43589+ err = xenbus_printf(tr, dev->nodename, "accel-net-channel",
43590+ "%u", vnic->net_channel);
43591+ if (err != 0) {
43592+ EPRINTK("%s: net channel xenbus write failed %d\n",
43593+ __FUNCTION__, err);
43594+ xenbus_transaction_end(tr, 1);
43595+ goto fail_transaction;
43596+ }
43597+
43598+ err = xenbus_transaction_end(tr, 0);
43599+ } while (err == -EAGAIN);
43600+
43601+ if (err != 0) {
43602+ EPRINTK("%s: Transaction end failed %d\n", __FUNCTION__, err);
43603+ goto fail_transaction;
43604+ }
43605+
43606+ DPRINTK("Completed setting up domU shared state\n");
43607+
43608+ return 0;
43609+
43610+fail_transaction:
43611+
43612+ unbind_from_irqhandler(vnic->net_channel_irq, vnic);
43613+fail_net_irq:
43614+
43615+ unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
43616+fail_msg_irq:
43617+
43618+ remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
43619+fail_msg_page_grant:
43620+
43621+ remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
43622+fail_ctrl_page_grant:
43623+
43624+ free_pages((unsigned long)vnic->shared_page, 1);
43625+ vnic->shared_page = NULL;
43626+fail_shared_page:
43627+
43628+ netfront_accel_fini_bufs(vnic->rx_bufs);
43629+fail_rx_bufs:
43630+
43631+ netfront_accel_fini_bufs(vnic->tx_bufs);
43632+fail_tx_bufs:
43633+
43634+ /* Undo the memory allocation created when we got the HELLO */
43635+ netfront_accel_free_buffer_mem(&vnic->bufpages,
43636+ vnic->rx_bufs,
43637+ vnic->tx_bufs);
43638+
43639+ DPRINTK("Failed to setup domU shared state with code %d\n", err);
43640+
43641+ return err;
43642+}
43643+
43644+
43645+static void vnic_remove_domU_shared_state(struct xenbus_device *dev,
43646+ netfront_accel_vnic *vnic)
43647+{
43648+ struct xenbus_transaction tr;
43649+
43650+ /*
43651+ * Don't remove any watches because we currently hold the
43652+ * mutex and the watches take the mutex.
43653+ */
43654+
43655+ DPRINTK("%s: removing event channel irq handlers %d %d\n",
43656+ __FUNCTION__, vnic->net_channel_irq, vnic->msg_channel_irq);
43657+ do {
43658+ if (xenbus_transaction_start(&tr) != 0)
43659+ break;
43660+ xenbus_rm(tr, dev->nodename, "accel-msg-channel");
43661+ xenbus_rm(tr, dev->nodename, "accel-net-channel");
43662+ } while (xenbus_transaction_end(tr, 0) == -EAGAIN);
43663+
43664+ unbind_from_irqhandler(vnic->net_channel_irq, vnic);
43665+ unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
43666+
43667+ /* ungrant pages for msg channel */
43668+ remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
43669+ remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
43670+ free_pages((unsigned long)vnic->shared_page, 1);
43671+ vnic->shared_page = NULL;
43672+
43673+ /* ungrant pages for buffers, and free buffer memory */
43674+ netfront_accel_free_buffer_mem(&vnic->bufpages,
43675+ vnic->rx_bufs,
43676+ vnic->tx_bufs);
43677+ netfront_accel_fini_bufs(vnic->rx_bufs);
43678+ netfront_accel_fini_bufs(vnic->tx_bufs);
43679+}
43680+
43681+
43682+static void vnic_setup_dom0_shared_state(struct xenbus_device *dev,
43683+ netfront_accel_vnic *vnic)
43684+{
43685+ DPRINTK("Setting up dom0 shared state\n");
43686+
43687+ netfront_accel_vi_ctor(vnic);
43688+
43689+ /*
43690+ * Message processing will be enabled when this function
43691+ * returns, but we might have missed an interrupt. Schedule a
43692+ * check just in case.
43693+ */
43694+ queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
43695+}
43696+
43697+
43698+static void vnic_remove_dom0_shared_state(struct xenbus_device *dev,
43699+ netfront_accel_vnic *vnic)
43700+{
43701+ DPRINTK("Removing dom0 shared state\n");
43702+
43703+ vnic_stop_fastpath(vnic);
43704+
43705+ netfront_accel_vi_dtor(vnic);
43706+}
43707+
43708+
43709+/*************************************************************************/
43710+
43711+/*
43712+ * The following code handles accelstate changes between the frontend
43713+ * and the backend. In response to transitions, calls the following
43714+ * functions in matching pairs:
43715+ *
43716+ * vnic_setup_domU_shared_state
43717+ * vnic_remove_domU_shared_state
43718+ *
43719+ * vnic_setup_dom0_shared_state
43720+ * vnic_remove_dom0_shared_state
43721+ *
43722+ * Valid state transitions for DomU are as follows:
43723+ *
43724+ * Closed->Init on probe or in response to Init from dom0
43725+ *
43726+ * Init->Connected in response to Init from dom0
43727+ * Init->Closing on error providing dom0 is in Init
43728+ * Init->Closed on remove or in response to Closing from dom0
43729+ *
43730+ * Connected->Closing on error/remove
43731+ * Connected->Closed in response to Closing from dom0
43732+ *
43733+ * Closing->Closed in response to Closing from dom0
43734+ *
43735+ */
43736+
43737+
43738+/* Function to deal with Xenbus accel state change in backend */
43739+static void netfront_accel_backend_accel_changed(netfront_accel_vnic *vnic,
43740+ XenbusState backend_state)
43741+{
43742+ struct xenbus_device *dev = vnic->dev;
43743+ XenbusState frontend_state;
43744+ int state;
43745+
43746+ DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
43747+ __FUNCTION__, xenbus_strstate(vnic->backend_state),
43748+ xenbus_strstate(backend_state), dev->nodename, dev->otherend);
43749+
43750+ /*
43751+ * Ignore duplicate state changes. This can happen if the
43752+ * backend changes state twice in quick succession and the
43753+ * first watch fires in the frontend after the second
43754+ * transition has completed.
43755+ */
43756+ if (vnic->backend_state == backend_state)
43757+ return;
43758+
43759+ vnic->backend_state = backend_state;
43760+ frontend_state = vnic->frontend_state;
43761+
43762+ switch (backend_state) {
43763+ case XenbusStateInitialising:
43764+ /*
43765+ * It's possible for us to miss the closed state from
43766+ * dom0, so do the work here.
43767+ */
43768+ if (vnic->domU_state_is_setup) {
43769+ vnic_remove_domU_shared_state(dev, vnic);
43770+ vnic->domU_state_is_setup = 0;
43771+ }
43772+
43773+ if (frontend_state != XenbusStateInitialising) {
43774+ /* Make sure the backend doesn't go away. */
43775+ frontend_state = XenbusStateInitialising;
43776+ net_accel_update_state(dev, frontend_state);
43777+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state);
43778+ backend_state = (XenbusState)state;
43779+ if (backend_state != XenbusStateInitialising)
43780+ break;
43781+ }
43782+
43783+ /* Start the new connection. */
43784+ if (!vnic->removing) {
43785+ BUG_ON(vnic->domU_state_is_setup);
43786+ if (vnic_setup_domU_shared_state(dev, vnic) == 0) {
43787+ vnic->domU_state_is_setup = 1;
43788+ frontend_state = XenbusStateConnected;
43789+ } else
43790+ frontend_state = XenbusStateClosing;
43791+ }
43792+ break;
43793+ case XenbusStateConnected:
43794+ if (vnic->domU_state_is_setup &&
43795+ !vnic->dom0_state_is_setup) {
43796+ vnic_setup_dom0_shared_state(dev, vnic);
43797+ vnic->dom0_state_is_setup = 1;
43798+ }
43799+ break;
43800+ default:
43801+ case XenbusStateClosing:
43802+ if (vnic->dom0_state_is_setup) {
43803+ vnic_remove_dom0_shared_state(dev, vnic);
43804+ vnic->dom0_state_is_setup = 0;
43805+ }
43806+ frontend_state = XenbusStateClosed;
43807+ break;
43808+ case XenbusStateUnknown:
43809+ case XenbusStateClosed:
43810+ if (vnic->domU_state_is_setup) {
43811+ vnic_remove_domU_shared_state(dev, vnic);
43812+ vnic->domU_state_is_setup = 0;
43813+ }
43814+ break;
43815+ }
43816+
43817+ if (frontend_state != vnic->frontend_state) {
43818+ DPRINTK("Switching from state %s (%d) to %s (%d)\n",
43819+ xenbus_strstate(vnic->frontend_state),
43820+ vnic->frontend_state,
43821+ xenbus_strstate(frontend_state), frontend_state);
43822+ vnic->frontend_state = frontend_state;
43823+ net_accel_update_state(dev, frontend_state);
43824+ }
43825+
43826+ wake_up(&vnic->state_wait_queue);
43827+}
43828+
43829+
43830+static void backend_accel_state_change(struct xenbus_watch *watch,
43831+ const char **vec, unsigned int len)
43832+{
43833+ int state;
43834+ netfront_accel_vnic *vnic;
43835+ struct xenbus_device *dev;
43836+
43837+ DPRINTK("%s\n", __FUNCTION__);
43838+
43839+ vnic = container_of(watch, struct netfront_accel_vnic,
43840+ backend_accel_watch);
43841+
43842+ mutex_lock(&vnic->vnic_mutex);
43843+
43844+ dev = vnic->dev;
43845+
43846+ state = (int)XenbusStateUnknown;
43847+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state);
43848+ netfront_accel_backend_accel_changed(vnic, state);
43849+
43850+ mutex_unlock(&vnic->vnic_mutex);
43851+}
43852+
43853+
43854+static int setup_dom0_accel_watch(struct xenbus_device *dev,
43855+ netfront_accel_vnic *vnic)
43856+{
43857+ int err;
43858+
43859+ DPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
43860+
43861+ err = xenbus_watch_path2(dev, dev->otherend, "accelstate",
43862+ &vnic->backend_accel_watch,
43863+ backend_accel_state_change);
43864+ if (err) {
43865+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
43866+ __FUNCTION__, err);
43867+ goto fail;
43868+ }
43869+ return 0;
43870+ fail:
43871+ vnic->backend_accel_watch.node = NULL;
43872+ return err;
43873+}
43874+
43875+
43876+int netfront_accel_probe(struct net_device *net_dev, struct xenbus_device *dev)
43877+{
43878+ netfront_accel_vnic *vnic;
43879+ int err;
43880+
43881+ DPRINTK("Probe passed device %s\n", dev->nodename);
43882+
43883+ vnic = netfront_accel_vnic_ctor(net_dev, dev);
43884+ if (IS_ERR(vnic))
43885+ return PTR_ERR(vnic);
43886+
43887+ /*
43888+ * Setup a watch on the backend accel state. This sets things
43889+ * going.
43890+ */
43891+ err = setup_dom0_accel_watch(dev, vnic);
43892+ if (err) {
43893+ netfront_accel_vnic_dtor(vnic);
43894+ EPRINTK("%s: probe failed with code %d\n", __FUNCTION__, err);
43895+ return err;
43896+ }
43897+
43898+ /*
43899+ * Indicate to the other end that we're ready to start unless
43900+ * the watch has already fired.
43901+ */
43902+ mutex_lock(&vnic->vnic_mutex);
43903+ VPRINTK("setup success, updating accelstate\n");
43904+ if (vnic->frontend_state == XenbusStateClosed) {
43905+ vnic->frontend_state = XenbusStateInitialising;
43906+ net_accel_update_state(dev, XenbusStateInitialising);
43907+ }
43908+ mutex_unlock(&vnic->vnic_mutex);
43909+
43910+ DPRINTK("Probe done device %s\n", dev->nodename);
43911+
43912+ return 0;
43913+}
43914+
43915+
43916+int netfront_accel_remove(struct xenbus_device *dev)
43917+{
43918+ struct netfront_info *np =
43919+ (struct netfront_info *)dev->dev.driver_data;
43920+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv;
43921+
43922+ DPRINTK("%s %s\n", __FUNCTION__, dev->nodename);
43923+
43924+ BUG_ON(vnic == NULL);
43925+
43926+ mutex_lock(&vnic->vnic_mutex);
43927+
43928+ /* Reject any attempts to connect. */
43929+ vnic->removing = 1;
43930+
43931+ /* Close any existing connection. */
43932+ if (vnic->frontend_state == XenbusStateConnected) {
43933+ vnic->frontend_state = XenbusStateClosing;
43934+ net_accel_update_state(dev, XenbusStateClosing);
43935+ }
43936+
43937+ mutex_unlock(&vnic->vnic_mutex);
43938+
43939+ DPRINTK("%s waiting for release of %s\n", __FUNCTION__, dev->nodename);
43940+
43941+ /*
43942+ * Wait for the xenbus watch to release the shared resources.
43943+ * This indicates that dom0 has made the transition
43944+ * Closing->Closed or that dom0 was in Closed or Init and no
43945+ * resources were mapped.
43946+ */
43947+ wait_event(vnic->state_wait_queue,
43948+ !vnic->domU_state_is_setup);
43949+
43950+ /*
43951+ * Now we don't need this watch anymore it is safe to remove
43952+ * it (and so synchronise with it completing if outstanding)
43953+ */
43954+ DPRINTK("%s: unregistering xenbus accel watch\n",
43955+ __FUNCTION__);
43956+ unregister_xenbus_watch(&vnic->backend_accel_watch);
43957+ kfree(vnic->backend_accel_watch.node);
43958+
43959+ netfront_accel_vnic_dtor(vnic);
43960+
43961+ DPRINTK("%s done %s\n", __FUNCTION__, dev->nodename);
43962+
43963+ return 0;
43964+}
43965Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon.h
43966===================================================================
43967--- /dev/null 1970-01-01 00:00:00.000000000 +0000
43968+++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon.h 2008-02-20 09:32:49.000000000 +0100
43969@@ -0,0 +1,172 @@
43970+/****************************************************************************
43971+ * Copyright 2002-2005: Level 5 Networks Inc.
43972+ * Copyright 2005-2008: Solarflare Communications Inc,
43973+ * 9501 Jeronimo Road, Suite 250,
43974+ * Irvine, CA 92618, USA
43975+ *
43976+ * Maintained by Solarflare Communications
43977+ * <linux-xen-drivers@solarflare.com>
43978+ * <onload-dev@solarflare.com>
43979+ *
43980+ * This program is free software; you can redistribute it and/or modify it
43981+ * under the terms of the GNU General Public License version 2 as published
43982+ * by the Free Software Foundation, incorporated herein by reference.
43983+ *
43984+ * This program is distributed in the hope that it will be useful,
43985+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
43986+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43987+ * GNU General Public License for more details.
43988+ *
43989+ * You should have received a copy of the GNU General Public License
43990+ * along with this program; if not, write to the Free Software
43991+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
43992+ ****************************************************************************
43993+ */
43994+
43995+/*
43996+ * \author slp
43997+ * \brief Falcon specific definitions
43998+ * \date 2004/08
43999+ */
44000+
44001+#ifndef __EF_VI_FALCON_H__
44002+#define __EF_VI_FALCON_H__
44003+
44004+#define EFHW_4K 0x00001000u
44005+#define EFHW_8K 0x00002000u
44006+
44007+/* include the autogenerated register definitions */
44008+
44009+#include "ef_vi_falcon_core.h"
44010+#include "ef_vi_falcon_desc.h"
44011+#include "ef_vi_falcon_event.h"
44012+
44013+
44014+/*----------------------------------------------------------------------------
44015+ *
44016+ * Helpers to turn bit shifts into dword shifts and check that the bit fields
44017+ * haven't overflown the dword etc. Aim is to preserve consistency with the
44018+ * autogenerated headers - once stable we could hard code.
44019+ *
44020+ *---------------------------------------------------------------------------*/
44021+
44022+/* mask constructors */
44023+#define __FALCON_MASK(WIDTH,T) ((((T)1) << (WIDTH)) - 1)
44024+#define __EFVI_MASK32(WIDTH) __FALCON_MASK((WIDTH),uint32_t)
44025+#define __EFVI_MASK64(WIDTH) __FALCON_MASK((WIDTH),uint64_t)
44026+
44027+#define __EFVI_FALCON_MASKFIELD32(LBN, WIDTH) ((uint32_t) \
44028+ (__EFVI_MASK32(WIDTH) << (LBN)))
44029+
44030+/* constructors for fields which span the first and second dwords */
44031+#define __LW(LBN) (32 - LBN)
44032+#define LOW(v, LBN, WIDTH) ((uint32_t) \
44033+ (((v) & __EFVI_MASK64(__LW((LBN)))) << (LBN)))
44034+#define HIGH(v, LBN, WIDTH) ((uint32_t)(((v) >> __LW((LBN))) & \
44035+ __EFVI_MASK64((WIDTH - __LW((LBN))))))
44036+/* constructors for fields within the second dword */
44037+#define __DW2(LBN) ((LBN) - 32)
44038+
44039+/* constructors for fields which span the second and third dwords */
44040+#define __LW2(LBN) (64 - LBN)
44041+#define LOW2(v, LBN, WIDTH) ((uint32_t) \
44042+ (((v) & __EFVI_MASK64(__LW2((LBN)))) << ((LBN) - 32)))
44043+#define HIGH2(v, LBN, WIDTH) ((uint32_t) \
44044+ (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN))))))
44045+
44046+/* constructors for fields within the third dword */
44047+#define __DW3(LBN) ((LBN) - 64)
44048+
44049+
44050+/* constructors for fields which span the third and fourth dwords */
44051+#define __LW3(LBN) (96 - LBN)
44052+#define LOW3(v, LBN, WIDTH) ((uint32_t) \
44053+ (((v) & __EFVI_MASK64(__LW3((LBN)))) << ((LBN) - 64)))
44054+#define HIGH3(v, LBN, WIDTH) ((unit32_t) \
44055+ (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN))))))
44056+
44057+/* constructors for fields within the fourth dword */
44058+#define __DW4(LBN) ((LBN) - 96)
44059+
44060+/* checks that the autogenerated headers our consistent with our model */
44061+#define WIDTHCHCK(a, b) ef_assert((a) == (b))
44062+#define RANGECHCK(v, WIDTH) \
44063+ ef_assert(((uint64_t)(v) & ~(__EFVI_MASK64((WIDTH)))) == 0)
44064+
44065+/* fields within the first dword */
44066+#define DWCHCK(LBN, WIDTH) ef_assert(((LBN) >= 0) &&(((LBN)+(WIDTH)) <= 32))
44067+
44068+/* fields which span the first and second dwords */
44069+#define LWCHK(LBN, WIDTH) ef_assert(WIDTH >= __LW(LBN))
44070+
44071+/*----------------------------------------------------------------------------
44072+ *
44073+ * Buffer virtual addresses (4K buffers)
44074+ *
44075+ *---------------------------------------------------------------------------*/
44076+
44077+/* Form a buffer virtual address from buffer ID and offset. If the offset
44078+** is larger than the buffer size, then the buffer indexed will be
44079+** calculated appropriately. It is the responsibility of the caller to
44080+** ensure that they have valid buffers programmed at that address.
44081+*/
44082+#define EFVI_FALCON_VADDR_4K_S (12)
44083+#define EFVI_FALCON_VADDR_M 0xfffff /* post shift mask */
44084+
44085+
44086+#define EFVI_FALCON_BUFFER_4K_ADDR(id,off) \
44087+ (((id) << EFVI_FALCON_VADDR_4K_S) + (off))
44088+
44089+#define EFVI_FALCON_BUFFER_4K_PAGE(vaddr) \
44090+ (((vaddr) >> EFVI_FALCON_VADDR_4K_S) & EFVI_FALCON_VADDR_M)
44091+
44092+#define EFVI_FALCON_BUFFER_4K_OFF(vaddr) \
44093+ ((vaddr) & __EFVI_MASK32(EFVI_FALCON_VADDR_4K_S))
44094+
44095+
44096+/*----------------------------------------------------------------------------
44097+ *
44098+ * Masks
44099+ *
44100+ *---------------------------------------------------------------------------*/
44101+
44102+#define EFVI_FALCON_CLOCK_ASIC_HZ (125000)
44103+#define EFVI_FALCON_CLOCK_FPGA_HZ (62500)
44104+#define EFVI_FALCON_CLOCK_HZ EFVI_FALCON_CLOCK_ASIC_HZ
44105+
44106+
44107+/*----------------------------------------------------------------------------
44108+ *
44109+ * Timers
44110+ *
44111+ *---------------------------------------------------------------------------*/
44112+
44113+/* Event-Queue Timer granularity - measured in us
44114+ Given by: 4096 * 3 cycle * clock period */
44115+
44116+#define EFVI_FALCON_EVQTIMER_PERIOD_US ((4096 * 3 * 1000) / EFVI_FALCON_CLOCK_HZ)
44117+
44118+/* mode bits */
44119+#define EFVI_FALCON_TIMER_MODE_DIS 0 /* disabled */
44120+#define EFVI_FALCON_TIMER_MODE_RUN 1 /* started counting right away */
44121+#define EFVI_FALCON_TIMER_MODE_HOLD 2 /* trigger mode (user queues) */
44122+
44123+#define EFVI_FALCON_EVQTIMER_HOLD (EFVI_FALCON_TIMER_MODE_HOLD << TIMER_MODE_LBN)
44124+#define EFVI_FALCON_EVQTIMER_RUN (EFVI_FALCON_TIMER_MODE_RUN << TIMER_MODE_LBN)
44125+#define EFVI_FALCON_EVQTIMER_DISABLE (EFVI_FALCON_TIMER_MODE_DIS << TIMER_MODE_LBN)
44126+
44127+
44128+/* ---- efhw_event_t helpers --- */
44129+
44130+#define EFVI_FALCON_EVENT_CODE(evp) \
44131+ ((evp)->u64 & EFVI_FALCON_EVENT_CODE_MASK)
44132+
44133+#define EFVI_FALCON_EVENT_SW_DATA_MASK 0x0000ffff
44134+
44135+#define __EFVI_FALCON_OPEN_MASK(WIDTH) ((((uint64_t)1) << (WIDTH)) - 1)
44136+
44137+#define EFVI_FALCON_EVENT_CODE_MASK \
44138+ (__EFVI_FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN)
44139+
44140+
44141+#endif /* __EF_VI_FALCON_H__ */
44142Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_core.h
44143===================================================================
44144--- /dev/null 1970-01-01 00:00:00.000000000 +0000
44145+++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_core.h 2008-02-20 09:32:49.000000000 +0100
44146@@ -0,0 +1,1075 @@
44147+
44148+#define EFVI_FALCON_EXTENDED_P_BAR 1
44149+
44150+//////////////---- Bus Interface Unit Registers C Header ----//////////////
44151+#define IOM_IND_ADR_REG_OFST 0x0 // IO-mapped indirect access address register
44152+ #define IOM_AUTO_ADR_INC_EN_LBN 16
44153+ #define IOM_AUTO_ADR_INC_EN_WIDTH 1
44154+ #define IOM_IND_ADR_LBN 0
44155+ #define IOM_IND_ADR_WIDTH 16
44156+#define IOM_IND_DAT_REG_OFST 0x4 // IO-mapped indirect access data register
44157+ #define IOM_IND_DAT_LBN 0
44158+ #define IOM_IND_DAT_WIDTH 32
44159+#define ADR_REGION_REG_KER_OFST 0x0 // Address region register
44160+#define ADR_REGION_REG_OFST 0x0 // Address region register
44161+ #define ADR_REGION3_LBN 96
44162+ #define ADR_REGION3_WIDTH 18
44163+ #define ADR_REGION2_LBN 64
44164+ #define ADR_REGION2_WIDTH 18
44165+ #define ADR_REGION1_LBN 32
44166+ #define ADR_REGION1_WIDTH 18
44167+ #define ADR_REGION0_LBN 0
44168+ #define ADR_REGION0_WIDTH 18
44169+#define INT_EN_REG_KER_OFST 0x10 // Kernel driver Interrupt enable register
44170+ #define KER_INT_CHAR_LBN 4
44171+ #define KER_INT_CHAR_WIDTH 1
44172+ #define KER_INT_KER_LBN 3
44173+ #define KER_INT_KER_WIDTH 1
44174+ #define ILL_ADR_ERR_INT_EN_KER_LBN 2
44175+ #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1
44176+ #define SRM_PERR_INT_EN_KER_LBN 1
44177+ #define SRM_PERR_INT_EN_KER_WIDTH 1
44178+ #define DRV_INT_EN_KER_LBN 0
44179+ #define DRV_INT_EN_KER_WIDTH 1
44180+#define INT_EN_REG_CHAR_OFST 0x20 // Char Driver interrupt enable register
44181+ #define CHAR_INT_CHAR_LBN 4
44182+ #define CHAR_INT_CHAR_WIDTH 1
44183+ #define CHAR_INT_KER_LBN 3
44184+ #define CHAR_INT_KER_WIDTH 1
44185+ #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2
44186+ #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1
44187+ #define SRM_PERR_INT_EN_CHAR_LBN 1
44188+ #define SRM_PERR_INT_EN_CHAR_WIDTH 1
44189+ #define DRV_INT_EN_CHAR_LBN 0
44190+ #define DRV_INT_EN_CHAR_WIDTH 1
44191+#define INT_ADR_REG_KER_OFST 0x30 // Interrupt host address for Kernel driver
44192+ #define INT_ADR_KER_LBN 0
44193+ #define INT_ADR_KER_WIDTH 64
44194+ #define DRV_INT_KER_LBN 32
44195+ #define DRV_INT_KER_WIDTH 1
44196+ #define EV_FF_HALF_INT_KER_LBN 3
44197+ #define EV_FF_HALF_INT_KER_WIDTH 1
44198+ #define EV_FF_FULL_INT_KER_LBN 2
44199+ #define EV_FF_FULL_INT_KER_WIDTH 1
44200+ #define ILL_ADR_ERR_INT_KER_LBN 1
44201+ #define ILL_ADR_ERR_INT_KER_WIDTH 1
44202+ #define SRAM_PERR_INT_KER_LBN 0
44203+ #define SRAM_PERR_INT_KER_WIDTH 1
44204+#define INT_ADR_REG_CHAR_OFST 0x40 // Interrupt host address for Char driver
44205+ #define INT_ADR_CHAR_LBN 0
44206+ #define INT_ADR_CHAR_WIDTH 64
44207+ #define DRV_INT_CHAR_LBN 32
44208+ #define DRV_INT_CHAR_WIDTH 1
44209+ #define EV_FF_HALF_INT_CHAR_LBN 3
44210+ #define EV_FF_HALF_INT_CHAR_WIDTH 1
44211+ #define EV_FF_FULL_INT_CHAR_LBN 2
44212+ #define EV_FF_FULL_INT_CHAR_WIDTH 1
44213+ #define ILL_ADR_ERR_INT_CHAR_LBN 1
44214+ #define ILL_ADR_ERR_INT_CHAR_WIDTH 1
44215+ #define SRAM_PERR_INT_CHAR_LBN 0
44216+ #define SRAM_PERR_INT_CHAR_WIDTH 1
44217+#define INT_ISR0_B0_OFST 0x90 // B0 only
44218+#define INT_ISR1_B0_OFST 0xA0
44219+#define INT_ACK_REG_KER_A1_OFST 0x50 // Kernel interrupt acknowledge register
44220+ #define RESERVED_LBN 0
44221+ #define RESERVED_WIDTH 32
44222+#define INT_ACK_REG_CHAR_A1_OFST 0x60 // CHAR interrupt acknowledge register
44223+ #define RESERVED_LBN 0
44224+ #define RESERVED_WIDTH 32
44225+//////////////---- Global CSR Registers C Header ----//////////////
44226+#define STRAP_REG_KER_OFST 0x200 // ASIC strap status register
44227+#define STRAP_REG_OFST 0x200 // ASIC strap status register
44228+ #define ONCHIP_SRAM_LBN 16
44229+ #define ONCHIP_SRAM_WIDTH 0
44230+ #define STRAP_ISCSI_EN_LBN 3
44231+ #define STRAP_ISCSI_EN_WIDTH 1
44232+ #define STRAP_PINS_LBN 0
44233+ #define STRAP_PINS_WIDTH 3
44234+#define GPIO_CTL_REG_KER_OFST 0x210 // GPIO control register
44235+#define GPIO_CTL_REG_OFST 0x210 // GPIO control register
44236+ #define GPIO_OEN_LBN 24
44237+ #define GPIO_OEN_WIDTH 4
44238+ #define GPIO_OUT_LBN 16
44239+ #define GPIO_OUT_WIDTH 4
44240+ #define GPIO_IN_LBN 8
44241+ #define GPIO_IN_WIDTH 4
44242+ #define GPIO_PWRUP_VALUE_LBN 0
44243+ #define GPIO_PWRUP_VALUE_WIDTH 4
44244+#define GLB_CTL_REG_KER_OFST 0x220 // Global control register
44245+#define GLB_CTL_REG_OFST 0x220 // Global control register
44246+ #define SWRST_LBN 0
44247+ #define SWRST_WIDTH 1
44248+#define FATAL_INTR_REG_KER_OFST 0x230 // Fatal interrupt register for Kernel
44249+ #define PCI_BUSERR_INT_KER_EN_LBN 43
44250+ #define PCI_BUSERR_INT_KER_EN_WIDTH 1
44251+ #define SRAM_OOB_INT_KER_EN_LBN 42
44252+ #define SRAM_OOB_INT_KER_EN_WIDTH 1
44253+ #define BUFID_OOB_INT_KER_EN_LBN 41
44254+ #define BUFID_OOB_INT_KER_EN_WIDTH 1
44255+ #define MEM_PERR_INT_KER_EN_LBN 40
44256+ #define MEM_PERR_INT_KER_EN_WIDTH 1
44257+ #define RBUF_OWN_INT_KER_EN_LBN 39
44258+ #define RBUF_OWN_INT_KER_EN_WIDTH 1
44259+ #define TBUF_OWN_INT_KER_EN_LBN 38
44260+ #define TBUF_OWN_INT_KER_EN_WIDTH 1
44261+ #define RDESCQ_OWN_INT_KER_EN_LBN 37
44262+ #define RDESCQ_OWN_INT_KER_EN_WIDTH 1
44263+ #define TDESCQ_OWN_INT_KER_EN_LBN 36
44264+ #define TDESCQ_OWN_INT_KER_EN_WIDTH 1
44265+ #define EVQ_OWN_INT_KER_EN_LBN 35
44266+ #define EVQ_OWN_INT_KER_EN_WIDTH 1
44267+ #define EVFF_OFLO_INT_KER_EN_LBN 34
44268+ #define EVFF_OFLO_INT_KER_EN_WIDTH 1
44269+ #define ILL_ADR_INT_KER_EN_LBN 33
44270+ #define ILL_ADR_INT_KER_EN_WIDTH 1
44271+ #define SRM_PERR_INT_KER_EN_LBN 32
44272+ #define SRM_PERR_INT_KER_EN_WIDTH 1
44273+ #define PCI_BUSERR_INT_KER_LBN 11
44274+ #define PCI_BUSERR_INT_KER_WIDTH 1
44275+ #define SRAM_OOB_INT_KER_LBN 10
44276+ #define SRAM_OOB_INT_KER_WIDTH 1
44277+ #define BUFID_OOB_INT_KER_LBN 9
44278+ #define BUFID_OOB_INT_KER_WIDTH 1
44279+ #define MEM_PERR_INT_KER_LBN 8
44280+ #define MEM_PERR_INT_KER_WIDTH 1
44281+ #define RBUF_OWN_INT_KER_LBN 7
44282+ #define RBUF_OWN_INT_KER_WIDTH 1
44283+ #define TBUF_OWN_INT_KER_LBN 6
44284+ #define TBUF_OWN_INT_KER_WIDTH 1
44285+ #define RDESCQ_OWN_INT_KER_LBN 5
44286+ #define RDESCQ_OWN_INT_KER_WIDTH 1
44287+ #define TDESCQ_OWN_INT_KER_LBN 4
44288+ #define TDESCQ_OWN_INT_KER_WIDTH 1
44289+ #define EVQ_OWN_INT_KER_LBN 3
44290+ #define EVQ_OWN_INT_KER_WIDTH 1
44291+ #define EVFF_OFLO_INT_KER_LBN 2
44292+ #define EVFF_OFLO_INT_KER_WIDTH 1
44293+ #define ILL_ADR_INT_KER_LBN 1
44294+ #define ILL_ADR_INT_KER_WIDTH 1
44295+ #define SRM_PERR_INT_KER_LBN 0
44296+ #define SRM_PERR_INT_KER_WIDTH 1
44297+#define FATAL_INTR_REG_OFST 0x240 // Fatal interrupt register for Char
44298+ #define PCI_BUSERR_INT_CHAR_EN_LBN 43
44299+ #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1
44300+ #define SRAM_OOB_INT_CHAR_EN_LBN 42
44301+ #define SRAM_OOB_INT_CHAR_EN_WIDTH 1
44302+ #define BUFID_OOB_INT_CHAR_EN_LBN 41
44303+ #define BUFID_OOB_INT_CHAR_EN_WIDTH 1
44304+ #define MEM_PERR_INT_CHAR_EN_LBN 40
44305+ #define MEM_PERR_INT_CHAR_EN_WIDTH 1
44306+ #define RBUF_OWN_INT_CHAR_EN_LBN 39
44307+ #define RBUF_OWN_INT_CHAR_EN_WIDTH 1
44308+ #define TBUF_OWN_INT_CHAR_EN_LBN 38
44309+ #define TBUF_OWN_INT_CHAR_EN_WIDTH 1
44310+ #define RDESCQ_OWN_INT_CHAR_EN_LBN 37
44311+ #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1
44312+ #define TDESCQ_OWN_INT_CHAR_EN_LBN 36
44313+ #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1
44314+ #define EVQ_OWN_INT_CHAR_EN_LBN 35
44315+ #define EVQ_OWN_INT_CHAR_EN_WIDTH 1
44316+ #define EVFF_OFLO_INT_CHAR_EN_LBN 34
44317+ #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1
44318+ #define ILL_ADR_INT_CHAR_EN_LBN 33
44319+ #define ILL_ADR_INT_CHAR_EN_WIDTH 1
44320+ #define SRM_PERR_INT_CHAR_EN_LBN 32
44321+ #define SRM_PERR_INT_CHAR_EN_WIDTH 1
44322+ #define FATAL_INTR_REG_EN_BITS 0xffffffffffffffffULL
44323+ #define PCI_BUSERR_INT_CHAR_LBN 11
44324+ #define PCI_BUSERR_INT_CHAR_WIDTH 1
44325+ #define SRAM_OOB_INT_CHAR_LBN 10
44326+ #define SRAM_OOB_INT_CHAR_WIDTH 1
44327+ #define BUFID_OOB_INT_CHAR_LBN 9
44328+ #define BUFID_OOB_INT_CHAR_WIDTH 1
44329+ #define MEM_PERR_INT_CHAR_LBN 8
44330+ #define MEM_PERR_INT_CHAR_WIDTH 1
44331+ #define RBUF_OWN_INT_CHAR_LBN 7
44332+ #define RBUF_OWN_INT_CHAR_WIDTH 1
44333+ #define TBUF_OWN_INT_CHAR_LBN 6
44334+ #define TBUF_OWN_INT_CHAR_WIDTH 1
44335+ #define RDESCQ_OWN_INT_CHAR_LBN 5
44336+ #define RDESCQ_OWN_INT_CHAR_WIDTH 1
44337+ #define TDESCQ_OWN_INT_CHAR_LBN 4
44338+ #define TDESCQ_OWN_INT_CHAR_WIDTH 1
44339+ #define EVQ_OWN_INT_CHAR_LBN 3
44340+ #define EVQ_OWN_INT_CHAR_WIDTH 1
44341+ #define EVFF_OFLO_INT_CHAR_LBN 2
44342+ #define EVFF_OFLO_INT_CHAR_WIDTH 1
44343+ #define ILL_ADR_INT_CHAR_LBN 1
44344+ #define ILL_ADR_INT_CHAR_WIDTH 1
44345+ #define SRM_PERR_INT_CHAR_LBN 0
44346+ #define SRM_PERR_INT_CHAR_WIDTH 1
44347+#define DP_CTRL_REG_OFST 0x250 // Datapath control register
44348+ #define FLS_EVQ_ID_LBN 0
44349+ #define FLS_EVQ_ID_WIDTH 12
44350+#define MEM_STAT_REG_KER_OFST 0x260 // Memory status register
44351+#define MEM_STAT_REG_OFST 0x260 // Memory status register
44352+ #define MEM_PERR_VEC_LBN 53
44353+ #define MEM_PERR_VEC_WIDTH 38
44354+ #define MBIST_CORR_LBN 38
44355+ #define MBIST_CORR_WIDTH 15
44356+ #define MBIST_ERR_LBN 0
44357+ #define MBIST_ERR_WIDTH 38
44358+#define DEBUG_REG_KER_OFST 0x270 // Debug register
44359+#define DEBUG_REG_OFST 0x270 // Debug register
44360+ #define DEBUG_BLK_SEL2_LBN 47
44361+ #define DEBUG_BLK_SEL2_WIDTH 3
44362+ #define DEBUG_BLK_SEL1_LBN 44
44363+ #define DEBUG_BLK_SEL1_WIDTH 3
44364+ #define DEBUG_BLK_SEL0_LBN 41
44365+ #define DEBUG_BLK_SEL0_WIDTH 3
44366+ #define MISC_DEBUG_ADDR_LBN 36
44367+ #define MISC_DEBUG_ADDR_WIDTH 5
44368+ #define SERDES_DEBUG_ADDR_LBN 31
44369+ #define SERDES_DEBUG_ADDR_WIDTH 5
44370+ #define EM_DEBUG_ADDR_LBN 26
44371+ #define EM_DEBUG_ADDR_WIDTH 5
44372+ #define SR_DEBUG_ADDR_LBN 21
44373+ #define SR_DEBUG_ADDR_WIDTH 5
44374+ #define EV_DEBUG_ADDR_LBN 16
44375+ #define EV_DEBUG_ADDR_WIDTH 5
44376+ #define RX_DEBUG_ADDR_LBN 11
44377+ #define RX_DEBUG_ADDR_WIDTH 5
44378+ #define TX_DEBUG_ADDR_LBN 6
44379+ #define TX_DEBUG_ADDR_WIDTH 5
44380+ #define BIU_DEBUG_ADDR_LBN 1
44381+ #define BIU_DEBUG_ADDR_WIDTH 5
44382+ #define DEBUG_EN_LBN 0
44383+ #define DEBUG_EN_WIDTH 1
44384+#define DRIVER_REG0_KER_OFST 0x280 // Driver scratch register 0
44385+#define DRIVER_REG0_OFST 0x280 // Driver scratch register 0
44386+ #define DRIVER_DW0_LBN 0
44387+ #define DRIVER_DW0_WIDTH 32
44388+#define DRIVER_REG1_KER_OFST 0x290 // Driver scratch register 1
44389+#define DRIVER_REG1_OFST 0x290 // Driver scratch register 1
44390+ #define DRIVER_DW1_LBN 0
44391+ #define DRIVER_DW1_WIDTH 32
44392+#define DRIVER_REG2_KER_OFST 0x2A0 // Driver scratch register 2
44393+#define DRIVER_REG2_OFST 0x2A0 // Driver scratch register 2
44394+ #define DRIVER_DW2_LBN 0
44395+ #define DRIVER_DW2_WIDTH 32
44396+#define DRIVER_REG3_KER_OFST 0x2B0 // Driver scratch register 3
44397+#define DRIVER_REG3_OFST 0x2B0 // Driver scratch register 3
44398+ #define DRIVER_DW3_LBN 0
44399+ #define DRIVER_DW3_WIDTH 32
44400+#define DRIVER_REG4_KER_OFST 0x2C0 // Driver scratch register 4
44401+#define DRIVER_REG4_OFST 0x2C0 // Driver scratch register 4
44402+ #define DRIVER_DW4_LBN 0
44403+ #define DRIVER_DW4_WIDTH 32
44404+#define DRIVER_REG5_KER_OFST 0x2D0 // Driver scratch register 5
44405+#define DRIVER_REG5_OFST 0x2D0 // Driver scratch register 5
44406+ #define DRIVER_DW5_LBN 0
44407+ #define DRIVER_DW5_WIDTH 32
44408+#define DRIVER_REG6_KER_OFST 0x2E0 // Driver scratch register 6
44409+#define DRIVER_REG6_OFST 0x2E0 // Driver scratch register 6
44410+ #define DRIVER_DW6_LBN 0
44411+ #define DRIVER_DW6_WIDTH 32
44412+#define DRIVER_REG7_KER_OFST 0x2F0 // Driver scratch register 7
44413+#define DRIVER_REG7_OFST 0x2F0 // Driver scratch register 7
44414+ #define DRIVER_DW7_LBN 0
44415+ #define DRIVER_DW7_WIDTH 32
44416+#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
44417+#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
44418+ #define ALTERA_BUILD_VER_LBN 0
44419+ #define ALTERA_BUILD_VER_WIDTH 32
44420+
44421+/* so called CSR spare register
44422+ - contains separate parity enable bits for the various internal memory blocks */
44423+#define MEM_PARITY_ERR_EN_REG_KER 0x310
44424+#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64
44425+#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38
44426+#define MEM_PARITY_TX_DATA_EN_LBN 72
44427+#define MEM_PARITY_TX_DATA_EN_WIDTH 2
44428+
44429+//////////////---- Event & Timer Module Registers C Header ----//////////////
44430+
44431+#if EFVI_FALCON_EXTENDED_P_BAR
44432+#define EVQ_RPTR_REG_KER_OFST 0x11B00 // Event queue read pointer register
44433+#else
44434+#define EVQ_RPTR_REG_KER_OFST 0x1B00 // Event queue read pointer register
44435+#endif
44436+
44437+#define EVQ_RPTR_REG_OFST 0xFA0000 // Event queue read pointer register array.
44438+ #define EVQ_RPTR_LBN 0
44439+ #define EVQ_RPTR_WIDTH 15
44440+
44441+#if EFVI_FALCON_EXTENDED_P_BAR
44442+#define EVQ_PTR_TBL_KER_OFST 0x11A00 // Event queue pointer table for kernel access
44443+#else
44444+#define EVQ_PTR_TBL_KER_OFST 0x1A00 // Event queue pointer table for kernel access
44445+#endif
44446+
44447+#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 // Event queue pointer table for char direct access
44448+ #define EVQ_WKUP_OR_INT_EN_LBN 39
44449+ #define EVQ_WKUP_OR_INT_EN_WIDTH 1
44450+ #define EVQ_NXT_WPTR_LBN 24
44451+ #define EVQ_NXT_WPTR_WIDTH 15
44452+ #define EVQ_EN_LBN 23
44453+ #define EVQ_EN_WIDTH 1
44454+ #define EVQ_SIZE_LBN 20
44455+ #define EVQ_SIZE_WIDTH 3
44456+ #define EVQ_BUF_BASE_ID_LBN 0
44457+ #define EVQ_BUF_BASE_ID_WIDTH 20
44458+#define TIMER_CMD_REG_KER_OFST 0x420 // Timer table for kernel access. Page-mapped
44459+#define TIMER_CMD_REG_PAGE4_OFST 0x8420 // Timer table for user-level access. Page-mapped. For lowest 1K queues.
44460+#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 // Timer table for user-level access. Page-mapped. For upper 3K queues.
44461+#define TIMER_TBL_OFST 0xF70000 // Timer table for char driver direct access
44462+ #define TIMER_MODE_LBN 12
44463+ #define TIMER_MODE_WIDTH 2
44464+ #define TIMER_VAL_LBN 0
44465+ #define TIMER_VAL_WIDTH 12
44466+ #define TIMER_MODE_INT_HLDOFF 2
44467+ #define EVQ_BUF_SIZE_LBN 0
44468+ #define EVQ_BUF_SIZE_WIDTH 1
44469+#define DRV_EV_REG_KER_OFST 0x440 // Driver generated event register
44470+#define DRV_EV_REG_OFST 0x440 // Driver generated event register
44471+ #define DRV_EV_QID_LBN 64
44472+ #define DRV_EV_QID_WIDTH 12
44473+ #define DRV_EV_DATA_LBN 0
44474+ #define DRV_EV_DATA_WIDTH 64
44475+#define EVQ_CTL_REG_KER_OFST 0x450 // Event queue control register
44476+#define EVQ_CTL_REG_OFST 0x450 // Event queue control register
44477+ #define RX_EVQ_WAKEUP_MASK_B0_LBN 15
44478+ #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6
44479+ #define EVQ_OWNERR_CTL_LBN 14
44480+ #define EVQ_OWNERR_CTL_WIDTH 1
44481+ #define EVQ_FIFO_AF_TH_LBN 8
44482+ #define EVQ_FIFO_AF_TH_WIDTH 6
44483+ #define EVQ_FIFO_NOTAF_TH_LBN 0
44484+ #define EVQ_FIFO_NOTAF_TH_WIDTH 6
44485+//////////////---- SRAM Module Registers C Header ----//////////////
44486+#define BUF_TBL_CFG_REG_KER_OFST 0x600 // Buffer table configuration register
44487+#define BUF_TBL_CFG_REG_OFST 0x600 // Buffer table configuration register
44488+ #define BUF_TBL_MODE_LBN 3
44489+ #define BUF_TBL_MODE_WIDTH 1
44490+#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 // SRAM receive descriptor cache configuration register
44491+#define SRM_RX_DC_CFG_REG_OFST 0x610 // SRAM receive descriptor cache configuration register
44492+ #define SRM_RX_DC_BASE_ADR_LBN 0
44493+ #define SRM_RX_DC_BASE_ADR_WIDTH 21
44494+#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 // SRAM transmit descriptor cache configuration register
44495+#define SRM_TX_DC_CFG_REG_OFST 0x620 // SRAM transmit descriptor cache configuration register
44496+ #define SRM_TX_DC_BASE_ADR_LBN 0
44497+ #define SRM_TX_DC_BASE_ADR_WIDTH 21
44498+#define SRM_CFG_REG_KER_OFST 0x630 // SRAM configuration register
44499+#define SRM_CFG_REG_OFST 0x630 // SRAM configuration register
44500+ #define SRAM_OOB_ADR_INTEN_LBN 5
44501+ #define SRAM_OOB_ADR_INTEN_WIDTH 1
44502+ #define SRAM_OOB_BUF_INTEN_LBN 4
44503+ #define SRAM_OOB_BUF_INTEN_WIDTH 1
44504+ #define SRAM_BT_INIT_EN_LBN 3
44505+ #define SRAM_BT_INIT_EN_WIDTH 1
44506+ #define SRM_NUM_BANK_LBN 2
44507+ #define SRM_NUM_BANK_WIDTH 1
44508+ #define SRM_BANK_SIZE_LBN 0
44509+ #define SRM_BANK_SIZE_WIDTH 2
44510+#define BUF_TBL_UPD_REG_KER_OFST 0x650 // Buffer table update register
44511+#define BUF_TBL_UPD_REG_OFST 0x650 // Buffer table update register
44512+ #define BUF_UPD_CMD_LBN 63
44513+ #define BUF_UPD_CMD_WIDTH 1
44514+ #define BUF_CLR_CMD_LBN 62
44515+ #define BUF_CLR_CMD_WIDTH 1
44516+ #define BUF_CLR_END_ID_LBN 32
44517+ #define BUF_CLR_END_ID_WIDTH 20
44518+ #define BUF_CLR_START_ID_LBN 0
44519+ #define BUF_CLR_START_ID_WIDTH 20
44520+#define SRM_UPD_EVQ_REG_KER_OFST 0x660 // Buffer table update register
44521+#define SRM_UPD_EVQ_REG_OFST 0x660 // Buffer table update register
44522+ #define SRM_UPD_EVQ_ID_LBN 0
44523+ #define SRM_UPD_EVQ_ID_WIDTH 12
44524+#define SRAM_PARITY_REG_KER_OFST 0x670 // SRAM parity register.
44525+#define SRAM_PARITY_REG_OFST 0x670 // SRAM parity register.
44526+ #define FORCE_SRAM_PERR_LBN 0
44527+ #define FORCE_SRAM_PERR_WIDTH 1
44528+
44529+#if EFVI_FALCON_EXTENDED_P_BAR
44530+#define BUF_HALF_TBL_KER_OFST 0x18000 // Buffer table in half buffer table mode direct access by kernel driver
44531+#else
44532+#define BUF_HALF_TBL_KER_OFST 0x8000 // Buffer table in half buffer table mode direct access by kernel driver
44533+#endif
44534+
44535+
44536+#define BUF_HALF_TBL_OFST 0x800000 // Buffer table in half buffer table mode direct access by char driver
44537+ #define BUF_ADR_HBUF_ODD_LBN 44
44538+ #define BUF_ADR_HBUF_ODD_WIDTH 20
44539+ #define BUF_OWNER_ID_HBUF_ODD_LBN 32
44540+ #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12
44541+ #define BUF_ADR_HBUF_EVEN_LBN 12
44542+ #define BUF_ADR_HBUF_EVEN_WIDTH 20
44543+ #define BUF_OWNER_ID_HBUF_EVEN_LBN 0
44544+ #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12
44545+
44546+
44547+#if EFVI_FALCON_EXTENDED_P_BAR
44548+#define BUF_FULL_TBL_KER_OFST 0x18000 // Buffer table in full buffer table mode direct access by kernel driver
44549+#else
44550+#define BUF_FULL_TBL_KER_OFST 0x8000 // Buffer table in full buffer table mode direct access by kernel driver
44551+#endif
44552+
44553+
44554+
44555+
44556+#define BUF_FULL_TBL_OFST 0x800000 // Buffer table in full buffer table mode direct access by char driver
44557+ #define IP_DAT_BUF_SIZE_LBN 50
44558+ #define IP_DAT_BUF_SIZE_WIDTH 1
44559+ #define BUF_ADR_REGION_LBN 48
44560+ #define BUF_ADR_REGION_WIDTH 2
44561+ #define BUF_ADR_FBUF_LBN 14
44562+ #define BUF_ADR_FBUF_WIDTH 34
44563+ #define BUF_OWNER_ID_FBUF_LBN 0
44564+ #define BUF_OWNER_ID_FBUF_WIDTH 14
44565+#define SRM_DBG_REG_OFST 0x3000000 // SRAM debug access
44566+ #define SRM_DBG_LBN 0
44567+ #define SRM_DBG_WIDTH 64
44568+//////////////---- RX Datapath Registers C Header ----//////////////
44569+
44570+#define RX_CFG_REG_KER_OFST 0x800 // Receive configuration register
44571+#define RX_CFG_REG_OFST 0x800 // Receive configuration register
44572+
44573+#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029)
44574+# if !defined(FALCON_128K_RXFIFO)
44575+# define FALCON_128K_RXFIFO
44576+# endif
44577+#endif
44578+
44579+#if defined(FALCON_128K_RXFIFO)
44580+
44581+/* new for B0 */
44582+ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48
44583+ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
44584+ #define RX_INGR_EN_B0_LBN 47
44585+ #define RX_INGR_EN_B0_WIDTH 1
44586+ #define RX_TOEP_IPV4_B0_LBN 46
44587+ #define RX_TOEP_IPV4_B0_WIDTH 1
44588+ #define RX_HASH_ALG_B0_LBN 45
44589+ #define RX_HASH_ALG_B0_WIDTH 1
44590+ #define RX_HASH_INSERT_HDR_B0_LBN 44
44591+ #define RX_HASH_INSERT_HDR_B0_WIDTH 1
44592+/* moved for B0 */
44593+ #define RX_DESC_PUSH_EN_B0_LBN 43
44594+ #define RX_DESC_PUSH_EN_B0_WIDTH 1
44595+ #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */
44596+ #define RX_RDW_PATCH_EN_WIDTH 1
44597+ #define RX_PCI_BURST_SIZE_B0_LBN 39
44598+ #define RX_PCI_BURST_SIZE_B0_WIDTH 3
44599+ #define RX_OWNERR_CTL_B0_LBN 38
44600+ #define RX_OWNERR_CTL_B0_WIDTH 1
44601+ #define RX_XON_TX_TH_B0_LBN 33
44602+ #define RX_XON_TX_TH_B0_WIDTH 5
44603+ #define RX_XOFF_TX_TH_B0_LBN 28
44604+ #define RX_XOFF_TX_TH_B0_WIDTH 5
44605+ #define RX_USR_BUF_SIZE_B0_LBN 19
44606+ #define RX_USR_BUF_SIZE_B0_WIDTH 9
44607+ #define RX_XON_MAC_TH_B0_LBN 10
44608+ #define RX_XON_MAC_TH_B0_WIDTH 9
44609+ #define RX_XOFF_MAC_TH_B0_LBN 1
44610+ #define RX_XOFF_MAC_TH_B0_WIDTH 9
44611+ #define RX_XOFF_MAC_EN_B0_LBN 0
44612+ #define RX_XOFF_MAC_EN_B0_WIDTH 1
44613+
44614+#elif !defined(FALCON_PRE_02020029)
44615+/* new for B0 */
44616+ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46
44617+ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
44618+ #define RX_INGR_EN_B0_LBN 45
44619+ #define RX_INGR_EN_B0_WIDTH 1
44620+ #define RX_TOEP_IPV4_B0_LBN 44
44621+ #define RX_TOEP_IPV4_B0_WIDTH 1
44622+ #define RX_HASH_ALG_B0_LBN 43
44623+ #define RX_HASH_ALG_B0_WIDTH 41
44624+ #define RX_HASH_INSERT_HDR_B0_LBN 42
44625+ #define RX_HASH_INSERT_HDR_B0_WIDTH 1
44626+/* moved for B0 */
44627+ #define RX_DESC_PUSH_EN_B0_LBN 41
44628+ #define RX_DESC_PUSH_EN_B0_WIDTH 1
44629+ #define RX_PCI_BURST_SIZE_B0_LBN 37
44630+ #define RX_PCI_BURST_SIZE_B0_WIDTH 3
44631+ #define RX_OWNERR_CTL_B0_LBN 36
44632+ #define RX_OWNERR_CTL_B0_WIDTH 1
44633+ #define RX_XON_TX_TH_B0_LBN 31
44634+ #define RX_XON_TX_TH_B0_WIDTH 5
44635+ #define RX_XOFF_TX_TH_B0_LBN 26
44636+ #define RX_XOFF_TX_TH_B0_WIDTH 5
44637+ #define RX_USR_BUF_SIZE_B0_LBN 17
44638+ #define RX_USR_BUF_SIZE_B0_WIDTH 9
44639+ #define RX_XON_MAC_TH_B0_LBN 9
44640+ #define RX_XON_MAC_TH_B0_WIDTH 8
44641+ #define RX_XOFF_MAC_TH_B0_LBN 1
44642+ #define RX_XOFF_MAC_TH_B0_WIDTH 8
44643+ #define RX_XOFF_MAC_EN_B0_LBN 0
44644+ #define RX_XOFF_MAC_EN_B0_WIDTH 1
44645+
44646+#else
44647+/* new for B0 */
44648+ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44
44649+ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
44650+ #define RX_INGR_EN_B0_LBN 43
44651+ #define RX_INGR_EN_B0_WIDTH 1
44652+ #define RX_TOEP_IPV4_B0_LBN 42
44653+ #define RX_TOEP_IPV4_B0_WIDTH 1
44654+ #define RX_HASH_ALG_B0_LBN 41
44655+ #define RX_HASH_ALG_B0_WIDTH 41
44656+ #define RX_HASH_INSERT_HDR_B0_LBN 40
44657+ #define RX_HASH_INSERT_HDR_B0_WIDTH 1
44658+/* moved for B0 */
44659+ #define RX_DESC_PUSH_EN_B0_LBN 35
44660+ #define RX_DESC_PUSH_EN_B0_WIDTH 1
44661+ #define RX_PCI_BURST_SIZE_B0_LBN 35
44662+ #define RX_PCI_BURST_SIZE_B0_WIDTH 2
44663+ #define RX_OWNERR_CTL_B0_LBN 34
44664+ #define RX_OWNERR_CTL_B0_WIDTH 1
44665+ #define RX_XON_TX_TH_B0_LBN 29
44666+ #define RX_XON_TX_TH_B0_WIDTH 5
44667+ #define RX_XOFF_TX_TH_B0_LBN 24
44668+ #define RX_XOFF_TX_TH_B0_WIDTH 5
44669+ #define RX_USR_BUF_SIZE_B0_LBN 15
44670+ #define RX_USR_BUF_SIZE_B0_WIDTH 9
44671+ #define RX_XON_MAC_TH_B0_LBN 8
44672+ #define RX_XON_MAC_TH_B0_WIDTH 7
44673+ #define RX_XOFF_MAC_TH_B0_LBN 1
44674+ #define RX_XOFF_MAC_TH_B0_WIDTH 7
44675+ #define RX_XOFF_MAC_EN_B0_LBN 0
44676+ #define RX_XOFF_MAC_EN_B0_WIDTH 1
44677+
44678+#endif
44679+
44680+/* A0/A1 */
44681+ #define RX_PUSH_EN_A1_LBN 35
44682+ #define RX_PUSH_EN_A1_WIDTH 1
44683+ #define RX_PCI_BURST_SIZE_A1_LBN 31
44684+ #define RX_PCI_BURST_SIZE_A1_WIDTH 3
44685+ #define RX_OWNERR_CTL_A1_LBN 30
44686+ #define RX_OWNERR_CTL_A1_WIDTH 1
44687+ #define RX_XON_TX_TH_A1_LBN 25
44688+ #define RX_XON_TX_TH_A1_WIDTH 5
44689+ #define RX_XOFF_TX_TH_A1_LBN 20
44690+ #define RX_XOFF_TX_TH_A1_WIDTH 5
44691+ #define RX_USR_BUF_SIZE_A1_LBN 11
44692+ #define RX_USR_BUF_SIZE_A1_WIDTH 9
44693+ #define RX_XON_MAC_TH_A1_LBN 6
44694+ #define RX_XON_MAC_TH_A1_WIDTH 5
44695+ #define RX_XOFF_MAC_TH_A1_LBN 1
44696+ #define RX_XOFF_MAC_TH_A1_WIDTH 5
44697+ #define RX_XOFF_MAC_EN_A1_LBN 0
44698+ #define RX_XOFF_MAC_EN_A1_WIDTH 1
44699+
44700+#define RX_FILTER_CTL_REG_OFST 0x810 // Receive filter control registers
44701+ #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40
44702+ #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1
44703+ #define UDP_FULL_SRCH_LIMIT_LBN 32
44704+ #define UDP_FULL_SRCH_LIMIT_WIDTH 8
44705+ #define NUM_KER_LBN 24
44706+ #define NUM_KER_WIDTH 2
44707+ #define UDP_WILD_SRCH_LIMIT_LBN 16
44708+ #define UDP_WILD_SRCH_LIMIT_WIDTH 8
44709+ #define TCP_WILD_SRCH_LIMIT_LBN 8
44710+ #define TCP_WILD_SRCH_LIMIT_WIDTH 8
44711+ #define TCP_FULL_SRCH_LIMIT_LBN 0
44712+ #define TCP_FULL_SRCH_LIMIT_WIDTH 8
44713+#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 // Receive flush descriptor queue register
44714+#define RX_FLUSH_DESCQ_REG_OFST 0x820 // Receive flush descriptor queue register
44715+ #define RX_FLUSH_DESCQ_CMD_LBN 24
44716+ #define RX_FLUSH_DESCQ_CMD_WIDTH 1
44717+ #define RX_FLUSH_EVQ_ID_LBN 12
44718+ #define RX_FLUSH_EVQ_ID_WIDTH 12
44719+ #define RX_FLUSH_DESCQ_LBN 0
44720+ #define RX_FLUSH_DESCQ_WIDTH 12
44721+#define RX_DESC_UPD_REG_KER_OFST 0x830 // Kernel receive descriptor update register. Page-mapped
44722+#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 // Char & user receive descriptor update register. Page-mapped. For lowest 1K queues.
44723+#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 // Char & user receive descriptor update register. Page-mapped. For upper 3K queues.
44724+ #define RX_DESC_WPTR_LBN 96
44725+ #define RX_DESC_WPTR_WIDTH 12
44726+ #define RX_DESC_PUSH_CMD_LBN 95
44727+ #define RX_DESC_PUSH_CMD_WIDTH 1
44728+ #define RX_DESC_LBN 0
44729+ #define RX_DESC_WIDTH 64
44730+ #define RX_KER_DESC_LBN 0
44731+ #define RX_KER_DESC_WIDTH 64
44732+ #define RX_USR_DESC_LBN 0
44733+ #define RX_USR_DESC_WIDTH 32
44734+#define RX_DC_CFG_REG_KER_OFST 0x840 // Receive descriptor cache configuration register
44735+#define RX_DC_CFG_REG_OFST 0x840 // Receive descriptor cache configuration register
44736+ #define RX_DC_SIZE_LBN 0
44737+ #define RX_DC_SIZE_WIDTH 2
44738+#define RX_DC_PF_WM_REG_KER_OFST 0x850 // Receive descriptor cache pre-fetch watermark register
44739+#define RX_DC_PF_WM_REG_OFST 0x850 // Receive descriptor cache pre-fetch watermark register
44740+ #define RX_DC_PF_LWM_LO_LBN 0
44741+ #define RX_DC_PF_LWM_LO_WIDTH 6
44742+
44743+#define RX_RSS_TKEY_B0_OFST 0x860 // RSS Toeplitz hash key (B0 only)
44744+
44745+#define RX_NODESC_DROP_REG 0x880
44746+ #define RX_NODESC_DROP_CNT_LBN 0
44747+ #define RX_NODESC_DROP_CNT_WIDTH 16
44748+
44749+#define XM_TX_CFG_REG_OFST 0x1230
44750+ #define XM_AUTO_PAD_LBN 5
44751+ #define XM_AUTO_PAD_WIDTH 1
44752+
44753+#define RX_FILTER_TBL0_OFST 0xF00000 // Receive filter table - even entries
44754+ #define RSS_EN_0_B0_LBN 110
44755+ #define RSS_EN_0_B0_WIDTH 1
44756+ #define SCATTER_EN_0_B0_LBN 109
44757+ #define SCATTER_EN_0_B0_WIDTH 1
44758+ #define TCP_UDP_0_LBN 108
44759+ #define TCP_UDP_0_WIDTH 1
44760+ #define RXQ_ID_0_LBN 96
44761+ #define RXQ_ID_0_WIDTH 12
44762+ #define DEST_IP_0_LBN 64
44763+ #define DEST_IP_0_WIDTH 32
44764+ #define DEST_PORT_TCP_0_LBN 48
44765+ #define DEST_PORT_TCP_0_WIDTH 16
44766+ #define SRC_IP_0_LBN 16
44767+ #define SRC_IP_0_WIDTH 32
44768+ #define SRC_TCP_DEST_UDP_0_LBN 0
44769+ #define SRC_TCP_DEST_UDP_0_WIDTH 16
44770+#define RX_FILTER_TBL1_OFST 0xF00010 // Receive filter table - odd entries
44771+ #define RSS_EN_1_B0_LBN 110
44772+ #define RSS_EN_1_B0_WIDTH 1
44773+ #define SCATTER_EN_1_B0_LBN 109
44774+ #define SCATTER_EN_1_B0_WIDTH 1
44775+ #define TCP_UDP_1_LBN 108
44776+ #define TCP_UDP_1_WIDTH 1
44777+ #define RXQ_ID_1_LBN 96
44778+ #define RXQ_ID_1_WIDTH 12
44779+ #define DEST_IP_1_LBN 64
44780+ #define DEST_IP_1_WIDTH 32
44781+ #define DEST_PORT_TCP_1_LBN 48
44782+ #define DEST_PORT_TCP_1_WIDTH 16
44783+ #define SRC_IP_1_LBN 16
44784+ #define SRC_IP_1_WIDTH 32
44785+ #define SRC_TCP_DEST_UDP_1_LBN 0
44786+ #define SRC_TCP_DEST_UDP_1_WIDTH 16
44787+
44788+#if EFVI_FALCON_EXTENDED_P_BAR
44789+#define RX_DESC_PTR_TBL_KER_OFST 0x11800 // Receive descriptor pointer kernel access
44790+#else
44791+#define RX_DESC_PTR_TBL_KER_OFST 0x1800 // Receive descriptor pointer kernel access
44792+#endif
44793+
44794+
44795+#define RX_DESC_PTR_TBL_OFST 0xF40000 // Receive descriptor pointer table
44796+ #define RX_ISCSI_DDIG_EN_LBN 88
44797+ #define RX_ISCSI_DDIG_EN_WIDTH 1
44798+ #define RX_ISCSI_HDIG_EN_LBN 87
44799+ #define RX_ISCSI_HDIG_EN_WIDTH 1
44800+ #define RX_DESC_PREF_ACT_LBN 86
44801+ #define RX_DESC_PREF_ACT_WIDTH 1
44802+ #define RX_DC_HW_RPTR_LBN 80
44803+ #define RX_DC_HW_RPTR_WIDTH 6
44804+ #define RX_DESCQ_HW_RPTR_LBN 68
44805+ #define RX_DESCQ_HW_RPTR_WIDTH 12
44806+ #define RX_DESCQ_SW_WPTR_LBN 56
44807+ #define RX_DESCQ_SW_WPTR_WIDTH 12
44808+ #define RX_DESCQ_BUF_BASE_ID_LBN 36
44809+ #define RX_DESCQ_BUF_BASE_ID_WIDTH 20
44810+ #define RX_DESCQ_EVQ_ID_LBN 24
44811+ #define RX_DESCQ_EVQ_ID_WIDTH 12
44812+ #define RX_DESCQ_OWNER_ID_LBN 10
44813+ #define RX_DESCQ_OWNER_ID_WIDTH 14
44814+ #define RX_DESCQ_LABEL_LBN 5
44815+ #define RX_DESCQ_LABEL_WIDTH 5
44816+ #define RX_DESCQ_SIZE_LBN 3
44817+ #define RX_DESCQ_SIZE_WIDTH 2
44818+ #define RX_DESCQ_TYPE_LBN 2
44819+ #define RX_DESCQ_TYPE_WIDTH 1
44820+ #define RX_DESCQ_JUMBO_LBN 1
44821+ #define RX_DESCQ_JUMBO_WIDTH 1
44822+ #define RX_DESCQ_EN_LBN 0
44823+ #define RX_DESCQ_EN_WIDTH 1
44824+
44825+
44826+#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 // RSS indirection table (B0 only)
44827+ #define RX_RSS_INDIR_ENT_B0_LBN 0
44828+ #define RX_RSS_INDIR_ENT_B0_WIDTH 6
44829+
44830+//////////////---- TX Datapath Registers C Header ----//////////////
44831+#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 // Transmit flush descriptor queue register
44832+#define TX_FLUSH_DESCQ_REG_OFST 0xA00 // Transmit flush descriptor queue register
44833+ #define TX_FLUSH_DESCQ_CMD_LBN 12
44834+ #define TX_FLUSH_DESCQ_CMD_WIDTH 1
44835+ #define TX_FLUSH_DESCQ_LBN 0
44836+ #define TX_FLUSH_DESCQ_WIDTH 12
44837+#define TX_DESC_UPD_REG_KER_OFST 0xA10 // Kernel transmit descriptor update register. Page-mapped
44838+#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 // Char & user transmit descriptor update register. Page-mapped
44839+#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 // Char & user transmit descriptor update register. Page-mapped
44840+ #define TX_DESC_WPTR_LBN 96
44841+ #define TX_DESC_WPTR_WIDTH 12
44842+ #define TX_DESC_PUSH_CMD_LBN 95
44843+ #define TX_DESC_PUSH_CMD_WIDTH 1
44844+ #define TX_DESC_LBN 0
44845+ #define TX_DESC_WIDTH 95
44846+ #define TX_KER_DESC_LBN 0
44847+ #define TX_KER_DESC_WIDTH 64
44848+ #define TX_USR_DESC_LBN 0
44849+ #define TX_USR_DESC_WIDTH 64
44850+#define TX_DC_CFG_REG_KER_OFST 0xA20 // Transmit descriptor cache configuration register
44851+#define TX_DC_CFG_REG_OFST 0xA20 // Transmit descriptor cache configuration register
44852+ #define TX_DC_SIZE_LBN 0
44853+ #define TX_DC_SIZE_WIDTH 2
44854+
44855+#if EFVI_FALCON_EXTENDED_P_BAR
44856+#define TX_DESC_PTR_TBL_KER_OFST 0x11900 // Transmit descriptor pointer.
44857+#else
44858+#define TX_DESC_PTR_TBL_KER_OFST 0x1900 // Transmit descriptor pointer.
44859+#endif
44860+
44861+
44862+#define TX_DESC_PTR_TBL_OFST 0xF50000 // Transmit descriptor pointer
44863+ #define TX_NON_IP_DROP_DIS_B0_LBN 91
44864+ #define TX_NON_IP_DROP_DIS_B0_WIDTH 1
44865+ #define TX_IP_CHKSM_DIS_B0_LBN 90
44866+ #define TX_IP_CHKSM_DIS_B0_WIDTH 1
44867+ #define TX_TCP_CHKSM_DIS_B0_LBN 89
44868+ #define TX_TCP_CHKSM_DIS_B0_WIDTH 1
44869+ #define TX_DESCQ_EN_LBN 88
44870+ #define TX_DESCQ_EN_WIDTH 1
44871+ #define TX_ISCSI_DDIG_EN_LBN 87
44872+ #define TX_ISCSI_DDIG_EN_WIDTH 1
44873+ #define TX_ISCSI_HDIG_EN_LBN 86
44874+ #define TX_ISCSI_HDIG_EN_WIDTH 1
44875+ #define TX_DC_HW_RPTR_LBN 80
44876+ #define TX_DC_HW_RPTR_WIDTH 6
44877+ #define TX_DESCQ_HW_RPTR_LBN 68
44878+ #define TX_DESCQ_HW_RPTR_WIDTH 12
44879+ #define TX_DESCQ_SW_WPTR_LBN 56
44880+ #define TX_DESCQ_SW_WPTR_WIDTH 12
44881+ #define TX_DESCQ_BUF_BASE_ID_LBN 36
44882+ #define TX_DESCQ_BUF_BASE_ID_WIDTH 20
44883+ #define TX_DESCQ_EVQ_ID_LBN 24
44884+ #define TX_DESCQ_EVQ_ID_WIDTH 12
44885+ #define TX_DESCQ_OWNER_ID_LBN 10
44886+ #define TX_DESCQ_OWNER_ID_WIDTH 14
44887+ #define TX_DESCQ_LABEL_LBN 5
44888+ #define TX_DESCQ_LABEL_WIDTH 5
44889+ #define TX_DESCQ_SIZE_LBN 3
44890+ #define TX_DESCQ_SIZE_WIDTH 2
44891+ #define TX_DESCQ_TYPE_LBN 1
44892+ #define TX_DESCQ_TYPE_WIDTH 2
44893+ #define TX_DESCQ_FLUSH_LBN 0
44894+ #define TX_DESCQ_FLUSH_WIDTH 1
44895+#define TX_CFG_REG_KER_OFST 0xA50 // Transmit configuration register
44896+#define TX_CFG_REG_OFST 0xA50 // Transmit configuration register
44897+ #define TX_IP_ID_P1_OFS_LBN 32
44898+ #define TX_IP_ID_P1_OFS_WIDTH 15
44899+ #define TX_IP_ID_P0_OFS_LBN 16
44900+ #define TX_IP_ID_P0_OFS_WIDTH 15
44901+ #define TX_TURBO_EN_LBN 3
44902+ #define TX_TURBO_EN_WIDTH 1
44903+ #define TX_OWNERR_CTL_LBN 2
44904+ #define TX_OWNERR_CTL_WIDTH 2
44905+ #define TX_NON_IP_DROP_DIS_LBN 1
44906+ #define TX_NON_IP_DROP_DIS_WIDTH 1
44907+ #define TX_IP_ID_REP_EN_LBN 0
44908+ #define TX_IP_ID_REP_EN_WIDTH 1
44909+#define TX_RESERVED_REG_KER_OFST 0xA80 // Transmit configuration register
44910+#define TX_RESERVED_REG_OFST 0xA80 // Transmit configuration register
44911+ #define TX_CSR_PUSH_EN_LBN 89
44912+ #define TX_CSR_PUSH_EN_WIDTH 1
44913+ #define TX_RX_SPACER_LBN 64
44914+ #define TX_RX_SPACER_WIDTH 8
44915+ #define TX_SW_EV_EN_LBN 59
44916+ #define TX_SW_EV_EN_WIDTH 1
44917+ #define TX_RX_SPACER_EN_LBN 57
44918+ #define TX_RX_SPACER_EN_WIDTH 1
44919+ #define TX_CSR_PREF_WD_TMR_LBN 24
44920+ #define TX_CSR_PREF_WD_TMR_WIDTH 16
44921+ #define TX_CSR_ONLY1TAG_LBN 21
44922+ #define TX_CSR_ONLY1TAG_WIDTH 1
44923+ #define TX_PREF_THRESHOLD_LBN 19
44924+ #define TX_PREF_THRESHOLD_WIDTH 2
44925+ #define TX_ONE_PKT_PER_Q_LBN 18
44926+ #define TX_ONE_PKT_PER_Q_WIDTH 1
44927+ #define TX_DIS_NON_IP_EV_LBN 17
44928+ #define TX_DIS_NON_IP_EV_WIDTH 1
44929+ #define TX_DMA_SPACER_LBN 8
44930+ #define TX_DMA_SPACER_WIDTH 8
44931+ #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7
44932+ #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1
44933+ #define TX_TCP_DIS_A1_LBN 7
44934+ #define TX_TCP_DIS_A1_WIDTH 1
44935+ #define TX_IP_DIS_A1_LBN 6
44936+ #define TX_IP_DIS_A1_WIDTH 1
44937+ #define TX_MAX_CPL_LBN 2
44938+ #define TX_MAX_CPL_WIDTH 2
44939+ #define TX_MAX_PREF_LBN 0
44940+ #define TX_MAX_PREF_WIDTH 2
44941+#define TX_VLAN_REG_OFST 0xAE0 // Transmit VLAN tag register
44942+ #define TX_VLAN_EN_LBN 127
44943+ #define TX_VLAN_EN_WIDTH 1
44944+ #define TX_VLAN7_PORT1_EN_LBN 125
44945+ #define TX_VLAN7_PORT1_EN_WIDTH 1
44946+ #define TX_VLAN7_PORT0_EN_LBN 124
44947+ #define TX_VLAN7_PORT0_EN_WIDTH 1
44948+ #define TX_VLAN7_LBN 112
44949+ #define TX_VLAN7_WIDTH 12
44950+ #define TX_VLAN6_PORT1_EN_LBN 109
44951+ #define TX_VLAN6_PORT1_EN_WIDTH 1
44952+ #define TX_VLAN6_PORT0_EN_LBN 108
44953+ #define TX_VLAN6_PORT0_EN_WIDTH 1
44954+ #define TX_VLAN6_LBN 96
44955+ #define TX_VLAN6_WIDTH 12
44956+ #define TX_VLAN5_PORT1_EN_LBN 93
44957+ #define TX_VLAN5_PORT1_EN_WIDTH 1
44958+ #define TX_VLAN5_PORT0_EN_LBN 92
44959+ #define TX_VLAN5_PORT0_EN_WIDTH 1
44960+ #define TX_VLAN5_LBN 80
44961+ #define TX_VLAN5_WIDTH 12
44962+ #define TX_VLAN4_PORT1_EN_LBN 77
44963+ #define TX_VLAN4_PORT1_EN_WIDTH 1
44964+ #define TX_VLAN4_PORT0_EN_LBN 76
44965+ #define TX_VLAN4_PORT0_EN_WIDTH 1
44966+ #define TX_VLAN4_LBN 64
44967+ #define TX_VLAN4_WIDTH 12
44968+ #define TX_VLAN3_PORT1_EN_LBN 61
44969+ #define TX_VLAN3_PORT1_EN_WIDTH 1
44970+ #define TX_VLAN3_PORT0_EN_LBN 60
44971+ #define TX_VLAN3_PORT0_EN_WIDTH 1
44972+ #define TX_VLAN3_LBN 48
44973+ #define TX_VLAN3_WIDTH 12
44974+ #define TX_VLAN2_PORT1_EN_LBN 45
44975+ #define TX_VLAN2_PORT1_EN_WIDTH 1
44976+ #define TX_VLAN2_PORT0_EN_LBN 44
44977+ #define TX_VLAN2_PORT0_EN_WIDTH 1
44978+ #define TX_VLAN2_LBN 32
44979+ #define TX_VLAN2_WIDTH 12
44980+ #define TX_VLAN1_PORT1_EN_LBN 29
44981+ #define TX_VLAN1_PORT1_EN_WIDTH 1
44982+ #define TX_VLAN1_PORT0_EN_LBN 28
44983+ #define TX_VLAN1_PORT0_EN_WIDTH 1
44984+ #define TX_VLAN1_LBN 16
44985+ #define TX_VLAN1_WIDTH 12
44986+ #define TX_VLAN0_PORT1_EN_LBN 13
44987+ #define TX_VLAN0_PORT1_EN_WIDTH 1
44988+ #define TX_VLAN0_PORT0_EN_LBN 12
44989+ #define TX_VLAN0_PORT0_EN_WIDTH 1
44990+ #define TX_VLAN0_LBN 0
44991+ #define TX_VLAN0_WIDTH 12
44992+#define TX_FIL_CTL_REG_OFST 0xAF0 // Transmit filter control register
44993+ #define TX_MADR1_FIL_EN_LBN 65
44994+ #define TX_MADR1_FIL_EN_WIDTH 1
44995+ #define TX_MADR0_FIL_EN_LBN 64
44996+ #define TX_MADR0_FIL_EN_WIDTH 1
44997+ #define TX_IPFIL31_PORT1_EN_LBN 63
44998+ #define TX_IPFIL31_PORT1_EN_WIDTH 1
44999+ #define TX_IPFIL31_PORT0_EN_LBN 62
45000+ #define TX_IPFIL31_PORT0_EN_WIDTH 1
45001+ #define TX_IPFIL30_PORT1_EN_LBN 61
45002+ #define TX_IPFIL30_PORT1_EN_WIDTH 1
45003+ #define TX_IPFIL30_PORT0_EN_LBN 60
45004+ #define TX_IPFIL30_PORT0_EN_WIDTH 1
45005+ #define TX_IPFIL29_PORT1_EN_LBN 59
45006+ #define TX_IPFIL29_PORT1_EN_WIDTH 1
45007+ #define TX_IPFIL29_PORT0_EN_LBN 58
45008+ #define TX_IPFIL29_PORT0_EN_WIDTH 1
45009+ #define TX_IPFIL28_PORT1_EN_LBN 57
45010+ #define TX_IPFIL28_PORT1_EN_WIDTH 1
45011+ #define TX_IPFIL28_PORT0_EN_LBN 56
45012+ #define TX_IPFIL28_PORT0_EN_WIDTH 1
45013+ #define TX_IPFIL27_PORT1_EN_LBN 55
45014+ #define TX_IPFIL27_PORT1_EN_WIDTH 1
45015+ #define TX_IPFIL27_PORT0_EN_LBN 54
45016+ #define TX_IPFIL27_PORT0_EN_WIDTH 1
45017+ #define TX_IPFIL26_PORT1_EN_LBN 53
45018+ #define TX_IPFIL26_PORT1_EN_WIDTH 1
45019+ #define TX_IPFIL26_PORT0_EN_LBN 52
45020+ #define TX_IPFIL26_PORT0_EN_WIDTH 1
45021+ #define TX_IPFIL25_PORT1_EN_LBN 51
45022+ #define TX_IPFIL25_PORT1_EN_WIDTH 1
45023+ #define TX_IPFIL25_PORT0_EN_LBN 50
45024+ #define TX_IPFIL25_PORT0_EN_WIDTH 1
45025+ #define TX_IPFIL24_PORT1_EN_LBN 49
45026+ #define TX_IPFIL24_PORT1_EN_WIDTH 1
45027+ #define TX_IPFIL24_PORT0_EN_LBN 48
45028+ #define TX_IPFIL24_PORT0_EN_WIDTH 1
45029+ #define TX_IPFIL23_PORT1_EN_LBN 47
45030+ #define TX_IPFIL23_PORT1_EN_WIDTH 1
45031+ #define TX_IPFIL23_PORT0_EN_LBN 46
45032+ #define TX_IPFIL23_PORT0_EN_WIDTH 1
45033+ #define TX_IPFIL22_PORT1_EN_LBN 45
45034+ #define TX_IPFIL22_PORT1_EN_WIDTH 1
45035+ #define TX_IPFIL22_PORT0_EN_LBN 44
45036+ #define TX_IPFIL22_PORT0_EN_WIDTH 1
45037+ #define TX_IPFIL21_PORT1_EN_LBN 43
45038+ #define TX_IPFIL21_PORT1_EN_WIDTH 1
45039+ #define TX_IPFIL21_PORT0_EN_LBN 42
45040+ #define TX_IPFIL21_PORT0_EN_WIDTH 1
45041+ #define TX_IPFIL20_PORT1_EN_LBN 41
45042+ #define TX_IPFIL20_PORT1_EN_WIDTH 1
45043+ #define TX_IPFIL20_PORT0_EN_LBN 40
45044+ #define TX_IPFIL20_PORT0_EN_WIDTH 1
45045+ #define TX_IPFIL19_PORT1_EN_LBN 39
45046+ #define TX_IPFIL19_PORT1_EN_WIDTH 1
45047+ #define TX_IPFIL19_PORT0_EN_LBN 38
45048+ #define TX_IPFIL19_PORT0_EN_WIDTH 1
45049+ #define TX_IPFIL18_PORT1_EN_LBN 37
45050+ #define TX_IPFIL18_PORT1_EN_WIDTH 1
45051+ #define TX_IPFIL18_PORT0_EN_LBN 36
45052+ #define TX_IPFIL18_PORT0_EN_WIDTH 1
45053+ #define TX_IPFIL17_PORT1_EN_LBN 35
45054+ #define TX_IPFIL17_PORT1_EN_WIDTH 1
45055+ #define TX_IPFIL17_PORT0_EN_LBN 34
45056+ #define TX_IPFIL17_PORT0_EN_WIDTH 1
45057+ #define TX_IPFIL16_PORT1_EN_LBN 33
45058+ #define TX_IPFIL16_PORT1_EN_WIDTH 1
45059+ #define TX_IPFIL16_PORT0_EN_LBN 32
45060+ #define TX_IPFIL16_PORT0_EN_WIDTH 1
45061+ #define TX_IPFIL15_PORT1_EN_LBN 31
45062+ #define TX_IPFIL15_PORT1_EN_WIDTH 1
45063+ #define TX_IPFIL15_PORT0_EN_LBN 30
45064+ #define TX_IPFIL15_PORT0_EN_WIDTH 1
45065+ #define TX_IPFIL14_PORT1_EN_LBN 29
45066+ #define TX_IPFIL14_PORT1_EN_WIDTH 1
45067+ #define TX_IPFIL14_PORT0_EN_LBN 28
45068+ #define TX_IPFIL14_PORT0_EN_WIDTH 1
45069+ #define TX_IPFIL13_PORT1_EN_LBN 27
45070+ #define TX_IPFIL13_PORT1_EN_WIDTH 1
45071+ #define TX_IPFIL13_PORT0_EN_LBN 26
45072+ #define TX_IPFIL13_PORT0_EN_WIDTH 1
45073+ #define TX_IPFIL12_PORT1_EN_LBN 25
45074+ #define TX_IPFIL12_PORT1_EN_WIDTH 1
45075+ #define TX_IPFIL12_PORT0_EN_LBN 24
45076+ #define TX_IPFIL12_PORT0_EN_WIDTH 1
45077+ #define TX_IPFIL11_PORT1_EN_LBN 23
45078+ #define TX_IPFIL11_PORT1_EN_WIDTH 1
45079+ #define TX_IPFIL11_PORT0_EN_LBN 22
45080+ #define TX_IPFIL11_PORT0_EN_WIDTH 1
45081+ #define TX_IPFIL10_PORT1_EN_LBN 21
45082+ #define TX_IPFIL10_PORT1_EN_WIDTH 1
45083+ #define TX_IPFIL10_PORT0_EN_LBN 20
45084+ #define TX_IPFIL10_PORT0_EN_WIDTH 1
45085+ #define TX_IPFIL9_PORT1_EN_LBN 19
45086+ #define TX_IPFIL9_PORT1_EN_WIDTH 1
45087+ #define TX_IPFIL9_PORT0_EN_LBN 18
45088+ #define TX_IPFIL9_PORT0_EN_WIDTH 1
45089+ #define TX_IPFIL8_PORT1_EN_LBN 17
45090+ #define TX_IPFIL8_PORT1_EN_WIDTH 1
45091+ #define TX_IPFIL8_PORT0_EN_LBN 16
45092+ #define TX_IPFIL8_PORT0_EN_WIDTH 1
45093+ #define TX_IPFIL7_PORT1_EN_LBN 15
45094+ #define TX_IPFIL7_PORT1_EN_WIDTH 1
45095+ #define TX_IPFIL7_PORT0_EN_LBN 14
45096+ #define TX_IPFIL7_PORT0_EN_WIDTH 1
45097+ #define TX_IPFIL6_PORT1_EN_LBN 13
45098+ #define TX_IPFIL6_PORT1_EN_WIDTH 1
45099+ #define TX_IPFIL6_PORT0_EN_LBN 12
45100+ #define TX_IPFIL6_PORT0_EN_WIDTH 1
45101+ #define TX_IPFIL5_PORT1_EN_LBN 11
45102+ #define TX_IPFIL5_PORT1_EN_WIDTH 1
45103+ #define TX_IPFIL5_PORT0_EN_LBN 10
45104+ #define TX_IPFIL5_PORT0_EN_WIDTH 1
45105+ #define TX_IPFIL4_PORT1_EN_LBN 9
45106+ #define TX_IPFIL4_PORT1_EN_WIDTH 1
45107+ #define TX_IPFIL4_PORT0_EN_LBN 8
45108+ #define TX_IPFIL4_PORT0_EN_WIDTH 1
45109+ #define TX_IPFIL3_PORT1_EN_LBN 7
45110+ #define TX_IPFIL3_PORT1_EN_WIDTH 1
45111+ #define TX_IPFIL3_PORT0_EN_LBN 6
45112+ #define TX_IPFIL3_PORT0_EN_WIDTH 1
45113+ #define TX_IPFIL2_PORT1_EN_LBN 5
45114+ #define TX_IPFIL2_PORT1_EN_WIDTH 1
45115+ #define TX_IPFIL2_PORT0_EN_LBN 4
45116+ #define TX_IPFIL2_PORT0_EN_WIDTH 1
45117+ #define TX_IPFIL1_PORT1_EN_LBN 3
45118+ #define TX_IPFIL1_PORT1_EN_WIDTH 1
45119+ #define TX_IPFIL1_PORT0_EN_LBN 2
45120+ #define TX_IPFIL1_PORT0_EN_WIDTH 1
45121+ #define TX_IPFIL0_PORT1_EN_LBN 1
45122+ #define TX_IPFIL0_PORT1_EN_WIDTH 1
45123+ #define TX_IPFIL0_PORT0_EN_LBN 0
45124+ #define TX_IPFIL0_PORT0_EN_WIDTH 1
45125+#define TX_IPFIL_TBL_OFST 0xB00 // Transmit IP source address filter table
45126+ #define TX_IPFIL_MASK_LBN 32
45127+ #define TX_IPFIL_MASK_WIDTH 32
45128+ #define TX_IP_SRC_ADR_LBN 0
45129+ #define TX_IP_SRC_ADR_WIDTH 32
45130+#define TX_PACE_REG_A1_OFST 0xF80000 // Transmit pace control register
45131+#define TX_PACE_REG_B0_OFST 0xA90 // Transmit pace control register
45132+ #define TX_PACE_SB_AF_LBN 19
45133+ #define TX_PACE_SB_AF_WIDTH 10
45134+ #define TX_PACE_SB_NOTAF_LBN 9
45135+ #define TX_PACE_SB_NOTAF_WIDTH 10
45136+ #define TX_PACE_FB_BASE_LBN 5
45137+ #define TX_PACE_FB_BASE_WIDTH 4
45138+ #define TX_PACE_BIN_TH_LBN 0
45139+ #define TX_PACE_BIN_TH_WIDTH 5
45140+#define TX_PACE_TBL_A1_OFST 0xF80040 // Transmit pacing table
45141+#define TX_PACE_TBL_FIRST_QUEUE_A1 4
45142+#define TX_PACE_TBL_B0_OFST 0xF80000 // Transmit pacing table
45143+#define TX_PACE_TBL_FIRST_QUEUE_B0 0
45144+ #define TX_PACE_LBN 0
45145+ #define TX_PACE_WIDTH 5
45146+
45147+//////////////---- EE/Flash Registers C Header ----//////////////
45148+#define EE_SPI_HCMD_REG_KER_OFST 0x100 // SPI host command register
45149+#define EE_SPI_HCMD_REG_OFST 0x100 // SPI host command register
45150+ #define EE_SPI_HCMD_CMD_EN_LBN 31
45151+ #define EE_SPI_HCMD_CMD_EN_WIDTH 1
45152+ #define EE_WR_TIMER_ACTIVE_LBN 28
45153+ #define EE_WR_TIMER_ACTIVE_WIDTH 1
45154+ #define EE_SPI_HCMD_SF_SEL_LBN 24
45155+ #define EE_SPI_HCMD_SF_SEL_WIDTH 1
45156+ #define EE_SPI_HCMD_DABCNT_LBN 16
45157+ #define EE_SPI_HCMD_DABCNT_WIDTH 5
45158+ #define EE_SPI_HCMD_READ_LBN 15
45159+ #define EE_SPI_HCMD_READ_WIDTH 1
45160+ #define EE_SPI_HCMD_DUBCNT_LBN 12
45161+ #define EE_SPI_HCMD_DUBCNT_WIDTH 2
45162+ #define EE_SPI_HCMD_ADBCNT_LBN 8
45163+ #define EE_SPI_HCMD_ADBCNT_WIDTH 2
45164+ #define EE_SPI_HCMD_ENC_LBN 0
45165+ #define EE_SPI_HCMD_ENC_WIDTH 8
45166+#define EE_SPI_HADR_REG_KER_OFST 0X110 // SPI host address register
45167+#define EE_SPI_HADR_REG_OFST 0X110 // SPI host address register
45168+ #define EE_SPI_HADR_DUBYTE_LBN 24
45169+ #define EE_SPI_HADR_DUBYTE_WIDTH 8
45170+ #define EE_SPI_HADR_ADR_LBN 0
45171+ #define EE_SPI_HADR_ADR_WIDTH 24
45172+#define EE_SPI_HDATA_REG_KER_OFST 0x120 // SPI host data register
45173+#define EE_SPI_HDATA_REG_OFST 0x120 // SPI host data register
45174+ #define EE_SPI_HDATA3_LBN 96
45175+ #define EE_SPI_HDATA3_WIDTH 32
45176+ #define EE_SPI_HDATA2_LBN 64
45177+ #define EE_SPI_HDATA2_WIDTH 32
45178+ #define EE_SPI_HDATA1_LBN 32
45179+ #define EE_SPI_HDATA1_WIDTH 32
45180+ #define EE_SPI_HDATA0_LBN 0
45181+ #define EE_SPI_HDATA0_WIDTH 32
45182+#define EE_BASE_PAGE_REG_KER_OFST 0x130 // Expansion ROM base mirror register
45183+#define EE_BASE_PAGE_REG_OFST 0x130 // Expansion ROM base mirror register
45184+ #define EE_EXP_ROM_WINDOW_BASE_LBN 16
45185+ #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13
45186+ #define EE_EXPROM_MASK_LBN 0
45187+ #define EE_EXPROM_MASK_WIDTH 13
45188+#define EE_VPD_CFG0_REG_KER_OFST 0X140 // SPI/VPD configuration register
45189+#define EE_VPD_CFG0_REG_OFST 0X140 // SPI/VPD configuration register
45190+ #define EE_SF_FASTRD_EN_LBN 127
45191+ #define EE_SF_FASTRD_EN_WIDTH 1
45192+ #define EE_SF_CLOCK_DIV_LBN 120
45193+ #define EE_SF_CLOCK_DIV_WIDTH 7
45194+ #define EE_VPD_WIP_POLL_LBN 119
45195+ #define EE_VPD_WIP_POLL_WIDTH 1
45196+ #define EE_VPDW_LENGTH_LBN 80
45197+ #define EE_VPDW_LENGTH_WIDTH 15
45198+ #define EE_VPDW_BASE_LBN 64
45199+ #define EE_VPDW_BASE_WIDTH 15
45200+ #define EE_VPD_WR_CMD_EN_LBN 56
45201+ #define EE_VPD_WR_CMD_EN_WIDTH 8
45202+ #define EE_VPD_BASE_LBN 32
45203+ #define EE_VPD_BASE_WIDTH 24
45204+ #define EE_VPD_LENGTH_LBN 16
45205+ #define EE_VPD_LENGTH_WIDTH 13
45206+ #define EE_VPD_AD_SIZE_LBN 8
45207+ #define EE_VPD_AD_SIZE_WIDTH 5
45208+ #define EE_VPD_ACCESS_ON_LBN 5
45209+ #define EE_VPD_ACCESS_ON_WIDTH 1
45210+#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 // VPD access SW control register
45211+#define EE_VPD_SW_CNTL_REG_OFST 0X150 // VPD access SW control register
45212+ #define EE_VPD_CYCLE_PENDING_LBN 31
45213+ #define EE_VPD_CYCLE_PENDING_WIDTH 1
45214+ #define EE_VPD_CYC_WRITE_LBN 28
45215+ #define EE_VPD_CYC_WRITE_WIDTH 1
45216+ #define EE_VPD_CYC_ADR_LBN 0
45217+ #define EE_VPD_CYC_ADR_WIDTH 15
45218+#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 // VPD access SW data register
45219+#define EE_VPD_SW_DATA_REG_OFST 0x160 // VPD access SW data register
45220+ #define EE_VPD_CYC_DAT_LBN 0
45221+ #define EE_VPD_CYC_DAT_WIDTH 32
45222Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h
45223===================================================================
45224--- /dev/null 1970-01-01 00:00:00.000000000 +0000
45225+++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h 2008-02-20 09:32:49.000000000 +0100
45226@@ -0,0 +1,43 @@
45227+//////////////---- Descriptors C Headers ----//////////////
45228+// Receive Kernel IP Descriptor
45229+ #define RX_KER_BUF_SIZE_LBN 48
45230+ #define RX_KER_BUF_SIZE_WIDTH 14
45231+ #define RX_KER_BUF_REGION_LBN 46
45232+ #define RX_KER_BUF_REGION_WIDTH 2
45233+ #define RX_KER_BUF_REGION0_DECODE 0
45234+ #define RX_KER_BUF_REGION1_DECODE 1
45235+ #define RX_KER_BUF_REGION2_DECODE 2
45236+ #define RX_KER_BUF_REGION3_DECODE 3
45237+ #define RX_KER_BUF_ADR_LBN 0
45238+ #define RX_KER_BUF_ADR_WIDTH 46
45239+// Receive User IP Descriptor
45240+ #define RX_USR_2BYTE_OFS_LBN 20
45241+ #define RX_USR_2BYTE_OFS_WIDTH 12
45242+ #define RX_USR_BUF_ID_LBN 0
45243+ #define RX_USR_BUF_ID_WIDTH 20
45244+// Transmit Kernel IP Descriptor
45245+ #define TX_KER_PORT_LBN 63
45246+ #define TX_KER_PORT_WIDTH 1
45247+ #define TX_KER_CONT_LBN 62
45248+ #define TX_KER_CONT_WIDTH 1
45249+ #define TX_KER_BYTE_CNT_LBN 48
45250+ #define TX_KER_BYTE_CNT_WIDTH 14
45251+ #define TX_KER_BUF_REGION_LBN 46
45252+ #define TX_KER_BUF_REGION_WIDTH 2
45253+ #define TX_KER_BUF_REGION0_DECODE 0
45254+ #define TX_KER_BUF_REGION1_DECODE 1
45255+ #define TX_KER_BUF_REGION2_DECODE 2
45256+ #define TX_KER_BUF_REGION3_DECODE 3
45257+ #define TX_KER_BUF_ADR_LBN 0
45258+ #define TX_KER_BUF_ADR_WIDTH 46
45259+// Transmit User IP Descriptor
45260+ #define TX_USR_PORT_LBN 47
45261+ #define TX_USR_PORT_WIDTH 1
45262+ #define TX_USR_CONT_LBN 46
45263+ #define TX_USR_CONT_WIDTH 1
45264+ #define TX_USR_BYTE_CNT_LBN 33
45265+ #define TX_USR_BYTE_CNT_WIDTH 13
45266+ #define TX_USR_BUF_ID_LBN 13
45267+ #define TX_USR_BUF_ID_WIDTH 20
45268+ #define TX_USR_BYTE_OFS_LBN 0
45269+ #define TX_USR_BYTE_OFS_WIDTH 13
45270Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_event.h
45271===================================================================
45272--- /dev/null 1970-01-01 00:00:00.000000000 +0000
45273+++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_event.h 2008-02-20 09:32:49.000000000 +0100
45274@@ -0,0 +1,123 @@
45275+//////////////---- Events Format C Header ----//////////////
45276+//////////////---- Event entry ----//////////////
45277+ #define EV_CODE_LBN 60
45278+ #define EV_CODE_WIDTH 4
45279+ #define RX_IP_EV_DECODE 0
45280+ #define TX_IP_EV_DECODE 2
45281+ #define DRIVER_EV_DECODE 5
45282+ #define GLOBAL_EV_DECODE 6
45283+ #define DRV_GEN_EV_DECODE 7
45284+ #define EV_DATA_LBN 0
45285+ #define EV_DATA_WIDTH 60
45286+//////////////---- Receive IP events for both Kernel & User event queues ----//////////////
45287+ #define RX_EV_PKT_OK_LBN 56
45288+ #define RX_EV_PKT_OK_WIDTH 1
45289+ #define RX_EV_BUF_OWNER_ID_ERR_LBN 54
45290+ #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1
45291+ #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52
45292+ #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1
45293+ #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51
45294+ #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1
45295+ #define RX_EV_ETH_CRC_ERR_LBN 50
45296+ #define RX_EV_ETH_CRC_ERR_WIDTH 1
45297+ #define RX_EV_FRM_TRUNC_LBN 49
45298+ #define RX_EV_FRM_TRUNC_WIDTH 1
45299+ #define RX_EV_DRIB_NIB_LBN 48
45300+ #define RX_EV_DRIB_NIB_WIDTH 1
45301+ #define RX_EV_TOBE_DISC_LBN 47
45302+ #define RX_EV_TOBE_DISC_WIDTH 1
45303+ #define RX_EV_PKT_TYPE_LBN 44
45304+ #define RX_EV_PKT_TYPE_WIDTH 3
45305+ #define RX_EV_PKT_TYPE_ETH_DECODE 0
45306+ #define RX_EV_PKT_TYPE_LLC_DECODE 1
45307+ #define RX_EV_PKT_TYPE_JUMBO_DECODE 2
45308+ #define RX_EV_PKT_TYPE_VLAN_DECODE 3
45309+ #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4
45310+ #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5
45311+ #define RX_EV_HDR_TYPE_LBN 42
45312+ #define RX_EV_HDR_TYPE_WIDTH 2
45313+ #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0
45314+ #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1
45315+ #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2
45316+ #define RX_EV_HDR_TYPE_NON_IP_DECODE 3
45317+ #define RX_EV_DESC_Q_EMPTY_LBN 41
45318+ #define RX_EV_DESC_Q_EMPTY_WIDTH 1
45319+ #define RX_EV_MCAST_HASH_MATCH_LBN 40
45320+ #define RX_EV_MCAST_HASH_MATCH_WIDTH 1
45321+ #define RX_EV_MCAST_PKT_LBN 39
45322+ #define RX_EV_MCAST_PKT_WIDTH 1
45323+ #define RX_EV_Q_LABEL_LBN 32
45324+ #define RX_EV_Q_LABEL_WIDTH 5
45325+ #define RX_JUMBO_CONT_LBN 31
45326+ #define RX_JUMBO_CONT_WIDTH 1
45327+ #define RX_SOP_LBN 15
45328+ #define RX_SOP_WIDTH 1
45329+ #define RX_PORT_LBN 30
45330+ #define RX_PORT_WIDTH 1
45331+ #define RX_EV_BYTE_CNT_LBN 16
45332+ #define RX_EV_BYTE_CNT_WIDTH 14
45333+ #define RX_iSCSI_PKT_OK_LBN 14
45334+ #define RX_iSCSI_PKT_OK_WIDTH 1
45335+ #define RX_ISCSI_DDIG_ERR_LBN 13
45336+ #define RX_ISCSI_DDIG_ERR_WIDTH 1
45337+ #define RX_ISCSI_HDIG_ERR_LBN 12
45338+ #define RX_ISCSI_HDIG_ERR_WIDTH 1
45339+ #define RX_EV_DESC_PTR_LBN 0
45340+ #define RX_EV_DESC_PTR_WIDTH 12
45341+//////////////---- Transmit IP events for both Kernel & User event queues ----//////////////
45342+ #define TX_EV_PKT_ERR_LBN 38
45343+ #define TX_EV_PKT_ERR_WIDTH 1
45344+ #define TX_EV_PKT_TOO_BIG_LBN 37
45345+ #define TX_EV_PKT_TOO_BIG_WIDTH 1
45346+ #define TX_EV_Q_LABEL_LBN 32
45347+ #define TX_EV_Q_LABEL_WIDTH 5
45348+ #define TX_EV_PORT_LBN 16
45349+ #define TX_EV_PORT_WIDTH 1
45350+ #define TX_EV_WQ_FF_FULL_LBN 15
45351+ #define TX_EV_WQ_FF_FULL_WIDTH 1
45352+ #define TX_EV_BUF_OWNER_ID_ERR_LBN 14
45353+ #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1
45354+ #define TX_EV_COMP_LBN 12
45355+ #define TX_EV_COMP_WIDTH 1
45356+ #define TX_EV_DESC_PTR_LBN 0
45357+ #define TX_EV_DESC_PTR_WIDTH 12
45358+//////////////---- Char or Kernel driver events ----//////////////
45359+ #define DRIVER_EV_SUB_CODE_LBN 56
45360+ #define DRIVER_EV_SUB_CODE_WIDTH 4
45361+ #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0
45362+ #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1
45363+ #define EVQ_INIT_DONE_EV_DECODE 0x2
45364+ #define EVQ_NOT_EN_EV_DECODE 0x3
45365+ #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4
45366+ #define SRM_UPD_DONE_EV_DECODE 0x5
45367+ #define WAKE_UP_EV_DECODE 0x6
45368+ #define TX_PKT_NON_TCP_UDP_DECODE 0x9
45369+ #define TIMER_EV_DECODE 0xA
45370+ #define RX_DSC_ERROR_EV_DECODE 0xE
45371+ #define DRIVER_EV_TX_DESCQ_ID_LBN 0
45372+ #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12
45373+ #define DRIVER_EV_RX_DESCQ_ID_LBN 0
45374+ #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12
45375+ #define DRIVER_EV_EVQ_ID_LBN 0
45376+ #define DRIVER_EV_EVQ_ID_WIDTH 12
45377+ #define DRIVER_TMR_ID_LBN 0
45378+ #define DRIVER_TMR_ID_WIDTH 12
45379+ #define DRIVER_EV_SRM_UPD_LBN 0
45380+ #define DRIVER_EV_SRM_UPD_WIDTH 2
45381+ #define SRM_CLR_EV_DECODE 0
45382+ #define SRM_UPD_EV_DECODE 1
45383+ #define SRM_ILLCLR_EV_DECODE 2
45384+//////////////---- Global events. Sent to both event queue 0 and 4. ----//////////////
45385+ #define XFP_PHY_INTR_LBN 10
45386+ #define XFP_PHY_INTR_WIDTH 1
45387+ #define XG_PHY_INTR_LBN 9
45388+ #define XG_PHY_INTR_WIDTH 1
45389+ #define G_PHY1_INTR_LBN 8
45390+ #define G_PHY1_INTR_WIDTH 1
45391+ #define G_PHY0_INTR_LBN 7
45392+ #define G_PHY0_INTR_WIDTH 1
45393+//////////////---- Driver generated events ----//////////////
45394+ #define DRV_GEN_EV_CODE_LBN 60
45395+ #define DRV_GEN_EV_CODE_WIDTH 4
45396+ #define DRV_GEN_EV_DATA_LBN 0
45397+ #define DRV_GEN_EV_DATA_WIDTH 60
45398Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_internal.h
45399===================================================================
45400--- /dev/null 1970-01-01 00:00:00.000000000 +0000
45401+++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_internal.h 2008-02-20 09:32:49.000000000 +0100
45402@@ -0,0 +1,256 @@
45403+/****************************************************************************
45404+ * Copyright 2002-2005: Level 5 Networks Inc.
45405+ * Copyright 2005-2008: Solarflare Communications Inc,
45406+ * 9501 Jeronimo Road, Suite 250,
45407+ * Irvine, CA 92618, USA
45408+ *
45409+ * Maintained by Solarflare Communications
45410+ * <linux-xen-drivers@solarflare.com>
45411+ * <onload-dev@solarflare.com>
45412+ *
45413+ * This program is free software; you can redistribute it and/or modify it
45414+ * under the terms of the GNU General Public License version 2 as published
45415+ * by the Free Software Foundation, incorporated herein by reference.
45416+ *
45417+ * This program is distributed in the hope that it will be useful,
45418+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
45419+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45420+ * GNU General Public License for more details.
45421+ *
45422+ * You should have received a copy of the GNU General Public License
45423+ * along with this program; if not, write to the Free Software
45424+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
45425+ ****************************************************************************
45426+ */
45427+
45428+/*
45429+ * \author djr
45430+ * \brief Really-and-truely-honestly internal stuff for libef.
45431+ * \date 2004/06/13
45432+ */
45433+
45434+/*! \cidoxg_include_ci_ul */
45435+#ifndef __CI_EF_VI_INTERNAL_H__
45436+#define __CI_EF_VI_INTERNAL_H__
45437+
45438+
45439+/* These flags share space with enum ef_vi_flags. */
45440+#define EF_VI_BUG5692_WORKAROUND 0x10000
45441+
45442+
45443+/* ***********************************************************************
45444+ * COMPILATION CONTROL FLAGS (see ef_vi.h for "workaround" controls)
45445+ */
45446+
45447+#define EF_VI_DO_MAGIC_CHECKS 1
45448+
45449+
45450+/**********************************************************************
45451+ * Headers
45452+ */
45453+
45454+#include <etherfabric/ef_vi.h>
45455+#include "sysdep.h"
45456+#include "ef_vi_falcon.h"
45457+
45458+
45459+/**********************************************************************
45460+ * Debugging.
45461+ */
45462+
45463+#ifndef NDEBUG
45464+
45465+# define _ef_assert(exp, file, line) BUG_ON(!(exp));
45466+
45467+# define _ef_assert2(exp, x, y, file, line) do { \
45468+ if (unlikely(!(exp))) \
45469+ BUG(); \
45470+ } while (0)
45471+
45472+#else
45473+
45474+# define _ef_assert(exp, file, line)
45475+# define _ef_assert2(e, x, y, file, line)
45476+
45477+#endif
45478+
45479+#define ef_assert(a) do{ _ef_assert((a),__FILE__,__LINE__); } while(0)
45480+#define ef_assert_equal(a,b) _ef_assert2((a)==(b),(a),(b),__FILE__,__LINE__)
45481+#define ef_assert_eq ef_assert_equal
45482+#define ef_assert_lt(a,b) _ef_assert2((a)<(b),(a),(b),__FILE__,__LINE__)
45483+#define ef_assert_le(a,b) _ef_assert2((a)<=(b),(a),(b),__FILE__,__LINE__)
45484+#define ef_assert_nequal(a,b) _ef_assert2((a)!=(b),(a),(b),__FILE__,__LINE__)
45485+#define ef_assert_ne ef_assert_nequal
45486+#define ef_assert_ge(a,b) _ef_assert2((a)>=(b),(a),(b),__FILE__,__LINE__)
45487+#define ef_assert_gt(a,b) _ef_assert2((a)>(b),(a),(b),__FILE__,__LINE__)
45488+
45489+/**********************************************************************
45490+ * Debug checks. ******************************************************
45491+ **********************************************************************/
45492+
45493+#ifdef NDEBUG
45494+# define EF_VI_MAGIC_SET(p, type)
45495+# define EF_VI_CHECK_VI(p)
45496+# define EF_VI_CHECK_EVENT_Q(p)
45497+# define EF_VI_CHECK_IOBUFSET(p)
45498+# define EF_VI_CHECK_FILTER(p)
45499+# define EF_VI_CHECK_SHMBUF(p)
45500+# define EF_VI_CHECK_PT_EP(p)
45501+#else
45502+# define EF_VI 0x3
45503+# define EF_EPLOCK 0x6
45504+# define EF_IOBUFSET 0x9
45505+# define EF_FILTER 0xa
45506+# define EF_SHMBUF 0x11
45507+
45508+# define EF_VI_MAGIC(p, type) \
45509+ (((unsigned)(type) << 28) | \
45510+ (((unsigned)(intptr_t)(p)) & 0x0fffffffu))
45511+
45512+# if !EF_VI_DO_MAGIC_CHECKS
45513+# define EF_VI_MAGIC_SET(p, type)
45514+# define EF_VI_MAGIC_CHECK(p, type)
45515+# else
45516+# define EF_VI_MAGIC_SET(p, type) \
45517+ do { \
45518+ (p)->magic = EF_VI_MAGIC((p), (type)); \
45519+ } while (0)
45520+
45521+# define EF_VI_MAGIC_OKAY(p, type) \
45522+ ((p)->magic == EF_VI_MAGIC((p), (type)))
45523+
45524+# define EF_VI_MAGIC_CHECK(p, type) \
45525+ ef_assert(EF_VI_MAGIC_OKAY((p), (type)))
45526+
45527+#endif /* EF_VI_DO_MAGIC_CHECKS */
45528+
45529+# define EF_VI_CHECK_VI(p) \
45530+ ef_assert(p); \
45531+ EF_VI_MAGIC_CHECK((p), EF_VI);
45532+
45533+# define EF_VI_CHECK_EVENT_Q(p) \
45534+ ef_assert(p); \
45535+ EF_VI_MAGIC_CHECK((p), EF_VI); \
45536+ ef_assert((p)->evq_base); \
45537+ ef_assert((p)->evq_mask);
45538+
45539+# define EF_VI_CHECK_PT_EP(p) \
45540+ ef_assert(p); \
45541+ EF_VI_MAGIC_CHECK((p), EF_VI); \
45542+ ef_assert((p)->ep_state);
45543+
45544+# define EF_VI_CHECK_IOBUFSET(p) \
45545+ ef_assert(p); \
45546+ EF_VI_MAGIC_CHECK((p), EF_IOBUFSET)
45547+
45548+# define EF_VI_CHECK_FILTER(p) \
45549+ ef_assert(p); \
45550+ EF_VI_MAGIC_CHECK((p), EF_FILTER);
45551+
45552+# define EF_VI_CHECK_SHMBUF(p) \
45553+ ef_assert(p); \
45554+ EF_VI_MAGIC_CHECK((p), EF_SHMBUF);
45555+
45556+#endif
45557+
45558+#ifndef NDEBUG
45559+# define EF_DRIVER_MAGIC 0x00f00ba4
45560+# define EF_ASSERT_THIS_DRIVER_VALID(driver) \
45561+ do{ ef_assert(driver); \
45562+ EF_VI_MAGIC_CHECK((driver), EF_DRIVER_MAGIC); \
45563+ ef_assert((driver)->init); }while(0)
45564+
45565+# define EF_ASSERT_DRIVER_VALID() EF_ASSERT_THIS_DRIVER_VALID(&ci_driver)
45566+#else
45567+# define EF_ASSERT_THIS_DRIVER_VALID(driver)
45568+# define EF_ASSERT_DRIVER_VALID()
45569+#endif
45570+
45571+
45572+/* *************************************
45573+ * Power of 2 FIFO
45574+ */
45575+
45576+#define EF_VI_FIFO2_M(f, x) ((x) & ((f)->fifo_mask))
45577+#define ef_vi_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \
45578+ (f)->fifo_rd_i <= (f)->fifo_mask && \
45579+ (f)->fifo_wr_i <= (f)->fifo_mask && \
45580+ EF_VI_IS_POW2((f)->fifo_mask+1u))
45581+
45582+#define ef_vi_fifo2_init(f, cap) \
45583+ do{ ef_assert(EF_VI_IS_POW2((cap) + 1)); \
45584+ (f)->fifo_rd_i = (f)->fifo_wr_i = 0u; \
45585+ (f)->fifo_mask = (cap); \
45586+ }while(0)
45587+
45588+#define ef_vi_fifo2_is_empty(f) ((f)->fifo_rd_i == (f)->fifo_wr_i)
45589+#define ef_vi_fifo2_capacity(f) ((f)->fifo_mask)
45590+#define ef_vi_fifo2_buf_size(f) ((f)->fifo_mask + 1u)
45591+#define ef_vi_fifo2_end(f) ((f)->fifo + ef_vi_fifo2_buf_size(f))
45592+#define ef_vi_fifo2_peek(f) ((f)->fifo[(f)->fifo_rd_i])
45593+#define ef_vi_fifo2_poke(f) ((f)->fifo[(f)->fifo_wr_i])
45594+#define ef_vi_fifo2_num(f) EF_VI_FIFO2_M((f),(f)->fifo_wr_i-(f)->fifo_rd_i)
45595+
45596+#define ef_vi_fifo2_wr_prev(f) \
45597+ do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i - 1u); }while(0)
45598+#define ef_vi_fifo2_wr_next(f) \
45599+ do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i + 1u); }while(0)
45600+#define ef_vi_fifo2_rd_adv(f, n) \
45601+ do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + (n)); }while(0)
45602+#define ef_vi_fifo2_rd_prev(f) \
45603+ do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i - 1u); }while(0)
45604+#define ef_vi_fifo2_rd_next(f) \
45605+ do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + 1u); }while(0)
45606+
45607+#define ef_vi_fifo2_put(f, v) \
45608+ do{ ef_vi_fifo2_poke(f) = (v); ef_vi_fifo2_wr_next(f); }while(0)
45609+#define ef_vi_fifo2_get(f, pv) \
45610+ do{ *(pv) = ef_vi_fifo2_peek(f); ef_vi_fifo2_rd_next(f); }while(0)
45611+
45612+
45613+/* *********************************************************************
45614+ * Eventq handling
45615+ */
45616+
45617+typedef union {
45618+ uint64_t u64;
45619+ struct {
45620+ uint32_t a;
45621+ uint32_t b;
45622+ } opaque;
45623+} ef_vi_event;
45624+
45625+
45626+#define EF_VI_EVENT_OFFSET(q, i) \
45627+ (((q)->evq_state->evq_ptr - (i) * sizeof(ef_vi_event)) & (q)->evq_mask)
45628+
45629+#define EF_VI_EVENT_PTR(q, i) \
45630+ ((ef_vi_event*) ((q)->evq_base + EF_VI_EVENT_OFFSET((q), (i))))
45631+
45632+/* *********************************************************************
45633+ * Miscellaneous goodies
45634+ */
45635+#ifdef NDEBUG
45636+# define EF_VI_DEBUG(x)
45637+#else
45638+# define EF_VI_DEBUG(x) x
45639+#endif
45640+
45641+#define EF_VI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u))
45642+#define EF_VI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u))
45643+#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align)-1u))
45644+#define EF_VI_PTR_ALIGN_BACK(p, align) \
45645+ ((char*)EF_VI_ALIGN_BACK(((intptr_t)(p)), ((intptr_t)(align))))
45646+#define EF_VI_IS_POW2(x) ((x) && ! ((x) & ((x) - 1)))
45647+
45648+
45649+/* ********************************************************************
45650+ */
45651+
45652+extern void falcon_vi_init(ef_vi*, void* vvis ) EF_VI_HF;
45653+extern void ef_eventq_state_init(ef_vi* evq) EF_VI_HF;
45654+extern void __ef_init(void) EF_VI_HF;
45655+
45656+
45657+#endif /* __CI_EF_VI_INTERNAL_H__ */
45658+
45659Index: head-2008-11-25/drivers/xen/sfc_netfront/etherfabric/ef_vi.h
45660===================================================================
45661--- /dev/null 1970-01-01 00:00:00.000000000 +0000
45662+++ head-2008-11-25/drivers/xen/sfc_netfront/etherfabric/ef_vi.h 2008-02-20 09:32:49.000000000 +0100
45663@@ -0,0 +1,665 @@
45664+/****************************************************************************
45665+ * Copyright 2002-2005: Level 5 Networks Inc.
45666+ * Copyright 2005-2008: Solarflare Communications Inc,
45667+ * 9501 Jeronimo Road, Suite 250,
45668+ * Irvine, CA 92618, USA
45669+ *
45670+ * Maintained by Solarflare Communications
45671+ * <linux-xen-drivers@solarflare.com>
45672+ * <onload-dev@solarflare.com>
45673+ *
45674+ * This program is free software; you can redistribute it and/or modify it
45675+ * under the terms of the GNU General Public License version 2 as published
45676+ * by the Free Software Foundation, incorporated herein by reference.
45677+ *
45678+ * This program is distributed in the hope that it will be useful,
45679+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
45680+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45681+ * GNU General Public License for more details.
45682+ *
45683+ * You should have received a copy of the GNU General Public License
45684+ * along with this program; if not, write to the Free Software
45685+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
45686+ ****************************************************************************
45687+ */
45688+
45689+/*
45690+ * \brief Virtual Interface
45691+ * \date 2007/05/16
45692+ */
45693+
45694+#ifndef __EFAB_EF_VI_H__
45695+#define __EFAB_EF_VI_H__
45696+
45697+
45698+/**********************************************************************
45699+ * Primitive types ****************************************************
45700+ **********************************************************************/
45701+
45702+/* We standardise on the types from stdint.h and synthesise these types
45703+ * for compilers/platforms that don't provide them */
45704+
45705+# include <linux/types.h>
45706+# define EF_VI_ALIGN(x) __attribute__ ((aligned (x)))
45707+# define ef_vi_inline static inline
45708+
45709+
45710+
45711+/**********************************************************************
45712+ * Types **************************************************************
45713+ **********************************************************************/
45714+
45715+typedef uint32_t ef_eventq_ptr;
45716+
45717+typedef uint64_t ef_addr;
45718+typedef char* ef_vi_ioaddr_t;
45719+
45720+/**********************************************************************
45721+ * ef_event ***********************************************************
45722+ **********************************************************************/
45723+
45724+/*! \i_ef_vi A DMA request identifier.
45725+**
45726+** This is an integer token specified by the transport and associated
45727+** with a DMA request. It is returned to the VI user with DMA completion
45728+** events. It is typically used to identify the buffer associated with
45729+** the transfer.
45730+*/
45731+typedef int ef_request_id;
45732+
45733+typedef union {
45734+ uint64_t u64[1];
45735+ uint32_t u32[2];
45736+} ef_vi_qword;
45737+
45738+typedef ef_vi_qword ef_hw_event;
45739+
45740+#define EF_REQUEST_ID_BITS 16u
45741+#define EF_REQUEST_ID_MASK ((1u << EF_REQUEST_ID_BITS) - 1u)
45742+
45743+/*! \i_ef_event An [ef_event] is a token that identifies something that
45744+** has happened. Examples include packets received, packets transmitted
45745+** and errors.
45746+*/
45747+typedef union {
45748+ struct {
45749+ ef_hw_event ev;
45750+ unsigned type :16;
45751+ } generic;
45752+ struct {
45753+ ef_hw_event ev;
45754+ unsigned type :16;
45755+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45756+ unsigned q_id :16;
45757+ unsigned len :16;
45758+ unsigned flags :16;
45759+ } rx;
45760+ struct { /* This *must* have same layout as [rx]. */
45761+ ef_hw_event ev;
45762+ unsigned type :16;
45763+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45764+ unsigned q_id :16;
45765+ unsigned len :16;
45766+ unsigned flags :16;
45767+ unsigned subtype :16;
45768+ } rx_discard;
45769+ struct {
45770+ ef_hw_event ev;
45771+ unsigned type :16;
45772+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45773+ unsigned q_id :16;
45774+ } tx;
45775+ struct {
45776+ ef_hw_event ev;
45777+ unsigned type :16;
45778+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45779+ unsigned q_id :16;
45780+ unsigned subtype :16;
45781+ } tx_error;
45782+ struct {
45783+ ef_hw_event ev;
45784+ unsigned type :16;
45785+ unsigned q_id :16;
45786+ } rx_no_desc_trunc;
45787+ struct {
45788+ ef_hw_event ev;
45789+ unsigned type :16;
45790+ unsigned data;
45791+ } sw;
45792+} ef_event;
45793+
45794+
45795+#define EF_EVENT_TYPE(e) ((e).generic.type)
45796+enum {
45797+ /** Good data was received. */
45798+ EF_EVENT_TYPE_RX,
45799+ /** Packets have been sent. */
45800+ EF_EVENT_TYPE_TX,
45801+ /** Data received and buffer consumed, but something is wrong. */
45802+ EF_EVENT_TYPE_RX_DISCARD,
45803+ /** Transmit of packet failed. */
45804+ EF_EVENT_TYPE_TX_ERROR,
45805+ /** Received packet was truncated due to lack of descriptors. */
45806+ EF_EVENT_TYPE_RX_NO_DESC_TRUNC,
45807+ /** Software generated event. */
45808+ EF_EVENT_TYPE_SW,
45809+ /** Event queue overflow. */
45810+ EF_EVENT_TYPE_OFLOW,
45811+};
45812+
45813+#define EF_EVENT_RX_BYTES(e) ((e).rx.len)
45814+#define EF_EVENT_RX_Q_ID(e) ((e).rx.q_id)
45815+#define EF_EVENT_RX_CONT(e) ((e).rx.flags & EF_EVENT_FLAG_CONT)
45816+#define EF_EVENT_RX_SOP(e) ((e).rx.flags & EF_EVENT_FLAG_SOP)
45817+#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK)
45818+#define EF_EVENT_FLAG_SOP 0x1
45819+#define EF_EVENT_FLAG_CONT 0x2
45820+#define EF_EVENT_FLAG_ISCSI_OK 0x4
45821+
45822+#define EF_EVENT_TX_Q_ID(e) ((e).tx.q_id)
45823+
45824+#define EF_EVENT_RX_DISCARD_Q_ID(e) ((e).rx_discard.q_id)
45825+#define EF_EVENT_RX_DISCARD_LEN(e) ((e).rx_discard.len)
45826+#define EF_EVENT_RX_DISCARD_TYPE(e) ((e).rx_discard.subtype)
45827+enum {
45828+ EF_EVENT_RX_DISCARD_CSUM_BAD,
45829+ EF_EVENT_RX_DISCARD_CRC_BAD,
45830+ EF_EVENT_RX_DISCARD_TRUNC,
45831+ EF_EVENT_RX_DISCARD_RIGHTS,
45832+ EF_EVENT_RX_DISCARD_OTHER,
45833+};
45834+
45835+#define EF_EVENT_TX_ERROR_Q_ID(e) ((e).tx_error.q_id)
45836+#define EF_EVENT_TX_ERROR_TYPE(e) ((e).tx_error.subtype)
45837+enum {
45838+ EF_EVENT_TX_ERROR_RIGHTS,
45839+ EF_EVENT_TX_ERROR_OFLOW,
45840+ EF_EVENT_TX_ERROR_2BIG,
45841+ EF_EVENT_TX_ERROR_BUS,
45842+};
45843+
45844+#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e) ((e).rx_no_desc_trunc.q_id)
45845+
45846+#define EF_EVENT_SW_DATA_MASK 0xffff
45847+#define EF_EVENT_SW_DATA(e) ((e).sw.data)
45848+
45849+#define EF_EVENT_FMT "[ev:%x:%08x:%08x]"
45850+#define EF_EVENT_PRI_ARG(e) (unsigned) (e).generic.type, \
45851+ (unsigned) (e).generic.ev.u32[1], \
45852+ (unsigned) (e).generic.ev.u32[0]
45853+
45854+#define EF_GET_HW_EV(e) ((e).generic.ev)
45855+#define EF_GET_HW_EV_PTR(e) (&(e).generic.ev)
45856+#define EF_GET_HW_EV_U64(e) ((e).generic.ev.u64[0])
45857+
45858+
45859+/* ***************** */
45860+
45861+/*! Used by netif shared state. Must use types of explicit size. */
45862+typedef struct {
45863+ uint16_t rx_last_desc_ptr; /* for RX duplicates */
45864+ uint8_t bad_sop; /* bad SOP detected */
45865+ uint8_t frag_num; /* next fragment #, 0=>SOP */
45866+} ef_rx_dup_state_t;
45867+
45868+
45869+/* Max number of ports on any SF NIC. */
45870+#define EFAB_DMAQS_PER_EVQ_MAX 32
45871+
45872+typedef struct {
45873+ ef_eventq_ptr evq_ptr;
45874+ int32_t trashed;
45875+ ef_rx_dup_state_t rx_dup_state[EFAB_DMAQS_PER_EVQ_MAX];
45876+} ef_eventq_state;
45877+
45878+
45879+/*! \i_ef_base [ef_iovec] is similar the standard [struct iovec]. An
45880+** array of these is used to designate a scatter/gather list of I/O
45881+** buffers.
45882+*/
45883+typedef struct {
45884+ ef_addr iov_base EF_VI_ALIGN(8);
45885+ unsigned iov_len;
45886+} ef_iovec;
45887+
45888+/* Falcon constants */
45889+#define TX_EV_DESC_PTR_LBN 0
45890+
45891+/**********************************************************************
45892+ * ef_iobufset ********************************************************
45893+ **********************************************************************/
45894+
45895+/*! \i_ef_bufs An [ef_iobufset] is a collection of buffers to be used
45896+** with the NIC.
45897+*/
45898+typedef struct ef_iobufset {
45899+ unsigned magic;
45900+ unsigned bufs_mmap_bytes;
45901+ unsigned bufs_handle;
45902+ int bufs_ptr_off;
45903+ ef_addr bufs_addr;
45904+ unsigned bufs_size; /* size rounded to pow2 */
45905+ int bufs_num;
45906+ int faultonaccess;
45907+} ef_iobufset;
45908+
45909+
45910+/**********************************************************************
45911+ * ef_vi **************************************************************
45912+ **********************************************************************/
45913+
45914+enum ef_vi_flags {
45915+ EF_VI_RX_SCATTER = 0x1,
45916+ EF_VI_ISCSI_RX_HDIG = 0x2,
45917+ EF_VI_ISCSI_TX_HDIG = 0x4,
45918+ EF_VI_ISCSI_RX_DDIG = 0x8,
45919+ EF_VI_ISCSI_TX_DDIG = 0x10,
45920+ EF_VI_TX_PHYS_ADDR = 0x20,
45921+ EF_VI_RX_PHYS_ADDR = 0x40,
45922+ EF_VI_TX_IP_CSUM_DIS = 0x80,
45923+ EF_VI_TX_TCPUDP_CSUM_DIS= 0x100,
45924+ EF_VI_TX_TCPUDP_ONLY = 0x200,
45925+ /* Flags in range 0xXXXX0000 are for internal use. */
45926+};
45927+
45928+typedef struct {
45929+ uint32_t added;
45930+ uint32_t removed;
45931+} ef_vi_txq_state;
45932+
45933+typedef struct {
45934+ uint32_t added;
45935+ uint32_t removed;
45936+} ef_vi_rxq_state;
45937+
45938+typedef struct {
45939+ uint32_t mask;
45940+ void* doorbell;
45941+ void* descriptors;
45942+ uint16_t* ids;
45943+ unsigned misalign_mask;
45944+} ef_vi_txq;
45945+
45946+typedef struct {
45947+ uint32_t mask;
45948+ void* doorbell;
45949+ void* descriptors;
45950+ uint16_t* ids;
45951+} ef_vi_rxq;
45952+
45953+typedef struct {
45954+ ef_eventq_state evq;
45955+ ef_vi_txq_state txq;
45956+ ef_vi_rxq_state rxq;
45957+ /* Followed by request id fifos. */
45958+} ef_vi_state;
45959+
45960+/*! \i_ef_vi A virtual interface.
45961+**
45962+** An [ef_vi] represents a virtual interface on a specific NIC. A
45963+** virtual interface is a collection of an event queue and two DMA queues
45964+** used to pass Ethernet frames between the transport implementation and
45965+** the network.
45966+*/
45967+typedef struct ef_vi {
45968+ unsigned magic;
45969+
45970+ unsigned vi_resource_id;
45971+ unsigned vi_resource_handle_hack;
45972+ unsigned vi_i;
45973+
45974+ char* vi_mem_mmap_ptr;
45975+ int vi_mem_mmap_bytes;
45976+ char* vi_io_mmap_ptr;
45977+ int vi_io_mmap_bytes;
45978+
45979+ ef_eventq_state* evq_state;
45980+ char* evq_base;
45981+ unsigned evq_mask;
45982+ ef_vi_ioaddr_t evq_timer_reg;
45983+
45984+ ef_vi_txq vi_txq;
45985+ ef_vi_rxq vi_rxq;
45986+ ef_vi_state* ep_state;
45987+ enum ef_vi_flags vi_flags;
45988+} ef_vi;
45989+
45990+
45991+enum ef_vi_arch {
45992+ EF_VI_ARCH_FALCON,
45993+};
45994+
45995+
45996+struct ef_vi_nic_type {
45997+ unsigned char arch;
45998+ char variant;
45999+ unsigned char revision;
46000+};
46001+
46002+
46003+/* This structure is opaque to the client & used to pass mapping data
46004+ * from the resource manager to the ef_vi lib. for ef_vi_init().
46005+ */
46006+struct vi_mappings {
46007+ uint32_t signature;
46008+# define VI_MAPPING_VERSION 0x02 /*Byte: Increment me if struct altered*/
46009+# define VI_MAPPING_SIGNATURE (0xBA1150 + VI_MAPPING_VERSION)
46010+
46011+ struct ef_vi_nic_type nic_type;
46012+
46013+ int vi_instance;
46014+
46015+ unsigned evq_bytes;
46016+ char* evq_base;
46017+ ef_vi_ioaddr_t evq_timer_reg;
46018+
46019+ unsigned rx_queue_capacity;
46020+ ef_vi_ioaddr_t rx_dma_ef1;
46021+ char* rx_dma_falcon;
46022+ ef_vi_ioaddr_t rx_bell;
46023+
46024+ unsigned tx_queue_capacity;
46025+ ef_vi_ioaddr_t tx_dma_ef1;
46026+ char* tx_dma_falcon;
46027+ ef_vi_ioaddr_t tx_bell;
46028+};
46029+/* This is used by clients to allocate a suitably sized buffer for the
46030+ * resource manager to fill & ef_vi_init() to use. */
46031+#define VI_MAPPINGS_SIZE (sizeof(struct vi_mappings))
46032+
46033+
46034+/**********************************************************************
46035+ * ef_config **********************************************************
46036+ **********************************************************************/
46037+
46038+struct ef_config_t {
46039+ int log; /* debug logging level */
46040+};
46041+
46042+extern struct ef_config_t ef_config;
46043+
46044+
46045+/**********************************************************************
46046+ * ef_vi **************************************************************
46047+ **********************************************************************/
46048+
46049+/* Initialise [data_area] with information required to initialise an ef_vi.
46050+ * In the following, an unused param should be set to NULL. Note the case
46051+ * marked (*) of [iobuf_mmap] for falcon/driver; for normal driver this
46052+ * must be NULL.
46053+ *
46054+ * \param data_area [in,out] required, must ref at least VI_MAPPINGS_SIZE
46055+ * bytes
46056+ * \param evq_capacity [in] number of events in event queue. Specify 0 for
46057+ * no event queue.
46058+ * \param rxq_capacity [in] number of descriptors in RX DMA queue. Specify
46059+ * 0 for no RX queue.
46060+ * \param txq_capacity [in] number of descriptors in TX DMA queue. Specify
46061+ * 0 for no TX queue.
46062+ * \param mmap_info [in] mem-map info for resource
46063+ * \param io_mmap [in] ef1, required
46064+ * falcon, required
46065+ * \param iobuf_mmap [in] ef1, UL: unused
46066+ * falcon, UL: required
46067+ */
46068+extern void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type,
46069+ unsigned rxq_capacity,
46070+ unsigned txq_capacity, int instance,
46071+ void* io_mmap, void* iobuf_mmap_rx,
46072+ void* iobuf_mmap_tx, enum ef_vi_flags);
46073+
46074+
46075+extern void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type,
46076+ int instance, unsigned evq_bytes,
46077+ void* base, void* timer_reg);
46078+
46079+ef_vi_inline unsigned ef_vi_resource_id(ef_vi* vi)
46080+{
46081+ return vi->vi_resource_id;
46082+}
46083+
46084+ef_vi_inline enum ef_vi_flags ef_vi_flags(ef_vi* vi)
46085+{
46086+ return vi->vi_flags;
46087+}
46088+
46089+
46090+/**********************************************************************
46091+ * Receive interface **************************************************
46092+ **********************************************************************/
46093+
46094+/*! \i_ef_vi Returns the amount of space in the RX descriptor ring.
46095+**
46096+** \return the amount of space in the queue.
46097+*/
46098+ef_vi_inline int ef_vi_receive_space(ef_vi* vi)
46099+{
46100+ ef_vi_rxq_state* qs = &vi->ep_state->rxq;
46101+ return vi->vi_rxq.mask - (qs->added - qs->removed);
46102+}
46103+
46104+
46105+/*! \i_ef_vi Returns the fill level of the RX descriptor ring.
46106+**
46107+** \return the fill level of the queue.
46108+*/
46109+ef_vi_inline int ef_vi_receive_fill_level(ef_vi* vi)
46110+{
46111+ ef_vi_rxq_state* qs = &vi->ep_state->rxq;
46112+ return qs->added - qs->removed;
46113+}
46114+
46115+
46116+ef_vi_inline int ef_vi_receive_capacity(ef_vi* vi)
46117+{
46118+ return vi->vi_rxq.mask;
46119+}
46120+
46121+/*! \i_ef_vi Complete a receive operation.
46122+**
46123+** When a receive completion event is received, it should be passed to
46124+** this function. The request-id for the buffer that the packet was
46125+** delivered to is returned.
46126+**
46127+** After this function returns, more space may be available in the
46128+** receive queue.
46129+*/
46130+extern ef_request_id ef_vi_receive_done(const ef_vi*, const ef_event*);
46131+
46132+/*! \i_ef_vi Return request ID indicated by a receive event
46133+ */
46134+ef_vi_inline ef_request_id ef_vi_receive_request_id(const ef_vi* vi,
46135+ const ef_event* ef_ev)
46136+{
46137+ const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
46138+ return ev->u32[0] & vi->vi_rxq.mask;
46139+}
46140+
46141+
46142+/*! \i_ef_vi Form a receive descriptor.
46143+**
46144+** If \c initial_rx_bytes is zero use a reception size at least as large
46145+** as an MTU.
46146+*/
46147+extern int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
46148+ int intial_rx_bytes);
46149+
46150+/*! \i_ef_vi Submit initialised receive descriptors to the NIC. */
46151+extern void ef_vi_receive_push(ef_vi* vi);
46152+
46153+/*! \i_ef_vi Post a buffer on the receive queue.
46154+**
46155+** \return 0 on success, or -EAGAIN if the receive queue is full
46156+*/
46157+extern int ef_vi_receive_post(ef_vi*, ef_addr addr,
46158+ ef_request_id dma_id);
46159+
46160+/**********************************************************************
46161+ * Transmit interface *************************************************
46162+ **********************************************************************/
46163+
46164+/*! \i_ef_vi Return the amount of space (in descriptors) in the transmit
46165+** queue.
46166+**
46167+** \return the amount of space in the queue (in descriptors)
46168+*/
46169+ef_vi_inline int ef_vi_transmit_space(ef_vi* vi)
46170+{
46171+ ef_vi_txq_state* qs = &vi->ep_state->txq;
46172+ return vi->vi_txq.mask - (qs->added - qs->removed);
46173+}
46174+
46175+
46176+/*! \i_ef_vi Returns the fill level of the TX descriptor ring.
46177+**
46178+** \return the fill level of the queue.
46179+*/
46180+ef_vi_inline int ef_vi_transmit_fill_level(ef_vi* vi)
46181+{
46182+ ef_vi_txq_state* qs = &vi->ep_state->txq;
46183+ return qs->added - qs->removed;
46184+}
46185+
46186+
46187+/*! \i_ef_vi Returns the total capacity of the TX descriptor ring.
46188+**
46189+** \return the capacity of the queue.
46190+*/
46191+ef_vi_inline int ef_vi_transmit_capacity(ef_vi* vi)
46192+{
46193+ return vi->vi_txq.mask;
46194+}
46195+
46196+
46197+/*! \i_ef_vi Transmit a packet.
46198+**
46199+** \param bytes must be greater than ETH_ZLEN.
46200+** \return -EAGAIN if the transmit queue is full, or 0 on success
46201+*/
46202+extern int ef_vi_transmit(ef_vi*, ef_addr, int bytes, ef_request_id dma_id);
46203+
46204+/*! \i_ef_vi Transmit a packet using a gather list.
46205+**
46206+** \param iov_len must be greater than zero
46207+** \param iov the first must be non-zero in length (but others need not)
46208+**
46209+** \return -EAGAIN if the queue is full, or 0 on success
46210+*/
46211+extern int ef_vi_transmitv(ef_vi*, const ef_iovec* iov, int iov_len,
46212+ ef_request_id dma_id);
46213+
46214+/*! \i_ef_vi Initialise a DMA request.
46215+**
46216+** \return -EAGAIN if the queue is full, or 0 on success
46217+*/
46218+extern int ef_vi_transmit_init(ef_vi*, ef_addr, int bytes,
46219+ ef_request_id dma_id);
46220+
46221+/*! \i_ef_vi Initialise a DMA request.
46222+**
46223+** \return -EAGAIN if the queue is full, or 0 on success
46224+*/
46225+extern int ef_vi_transmitv_init(ef_vi*, const ef_iovec*, int iov_len,
46226+ ef_request_id dma_id);
46227+
46228+/*! \i_ef_vi Submit DMA requests to the NIC.
46229+**
46230+** The DMA requests must have been initialised using
46231+** ef_vi_transmit_init() or ef_vi_transmitv_init().
46232+*/
46233+extern void ef_vi_transmit_push(ef_vi*);
46234+
46235+
46236+/*! \i_ef_vi Maximum number of transmit completions per transmit event. */
46237+#define EF_VI_TRANSMIT_BATCH 64
46238+
46239+/*! \i_ef_vi Determine the set of [ef_request_id]s for each DMA request
46240+** which has been completed by a given transmit completion
46241+** event.
46242+**
46243+** \param ids must point to an array of length EF_VI_TRANSMIT_BATCH
46244+** \return the number of valid [ef_request_id]s (can be zero)
46245+*/
46246+extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event*,
46247+ ef_request_id* ids);
46248+
46249+
46250+/*! \i_ef_event Returns true if ef_eventq_poll() will return event(s). */
46251+extern int ef_eventq_has_event(ef_vi* vi);
46252+
46253+/*! \i_ef_event Returns true if there are quite a few events in the event
46254+** queue.
46255+**
46256+** This looks ahead in the event queue, so has the property that it will
46257+** not ping-pong a cache-line when it is called concurrently with events
46258+** being delivered.
46259+*/
46260+extern int ef_eventq_has_many_events(ef_vi* evq, int look_ahead);
46261+
46262+/*! Type of function to handle unknown events arriving on event queue
46263+** Return CI_TRUE iff the event has been handled.
46264+*/
46265+typedef int/*bool*/ ef_event_handler_fn(void* priv, ef_vi* evq, ef_event* ev);
46266+
46267+/*! Standard poll exception routine */
46268+extern int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq,
46269+ ef_event* ev);
46270+
46271+/*! \i_ef_event Retrieve events from the event queue, handle RX/TX events
46272+** and pass any others to an exception handler function
46273+**
46274+** \return The number of events retrieved.
46275+*/
46276+extern int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
46277+ ef_event_handler_fn *exception, void *expt_priv);
46278+
46279+/*! \i_ef_event Retrieve events from the event queue.
46280+**
46281+** \return The number of events retrieved.
46282+*/
46283+ef_vi_inline int ef_eventq_poll(ef_vi* evq, ef_event* evs, int evs_len)
46284+{
46285+ return ef_eventq_poll_evs(evq, evs, evs_len,
46286+ &ef_eventq_poll_exception, (void*)0);
46287+}
46288+
46289+/*! \i_ef_event Returns the capacity of an event queue. */
46290+ef_vi_inline int ef_eventq_capacity(ef_vi* vi)
46291+{
46292+ return (vi->evq_mask + 1u) / sizeof(ef_hw_event);
46293+}
46294+
46295+/* Returns the instance ID of [vi] */
46296+ef_vi_inline unsigned ef_vi_instance(ef_vi* vi)
46297+{ return vi->vi_i; }
46298+
46299+
46300+/**********************************************************************
46301+ * Initialisation *****************************************************
46302+ **********************************************************************/
46303+
46304+/*! Return size of state buffer of an initialised VI. */
46305+extern int ef_vi_state_bytes(ef_vi*);
46306+
46307+/*! Return size of buffer needed for VI state given sizes of RX and TX
46308+** DMA queues. Queue sizes must be legal sizes (power of 2), or 0 (no
46309+** queue).
46310+*/
46311+extern int ef_vi_calc_state_bytes(int rxq_size, int txq_size);
46312+
46313+/*! Initialise [ef_vi] from the provided resources. [vvis] must have been
46314+** created by ef_make_vi_data() & remains owned by the caller.
46315+*/
46316+extern void ef_vi_init(ef_vi*, void* vi_info, ef_vi_state* state,
46317+ ef_eventq_state* evq_state, enum ef_vi_flags);
46318+
46319+extern void ef_vi_state_init(ef_vi*);
46320+extern void ef_eventq_state_init(ef_vi*);
46321+
46322+/*! Convert an efhw device arch to ef_vi_arch, or returns -1 if not
46323+** recognised.
46324+*/
46325+extern int ef_vi_arch_from_efhw_arch(int efhw_arch);
46326+
46327+
46328+#endif /* __EFAB_EF_VI_H__ */
46329Index: head-2008-11-25/drivers/xen/sfc_netfront/falcon_event.c
46330===================================================================
46331--- /dev/null 1970-01-01 00:00:00.000000000 +0000
46332+++ head-2008-11-25/drivers/xen/sfc_netfront/falcon_event.c 2008-02-20 09:32:49.000000000 +0100
46333@@ -0,0 +1,346 @@
46334+/****************************************************************************
46335+ * Copyright 2002-2005: Level 5 Networks Inc.
46336+ * Copyright 2005-2008: Solarflare Communications Inc,
46337+ * 9501 Jeronimo Road, Suite 250,
46338+ * Irvine, CA 92618, USA
46339+ *
46340+ * Maintained by Solarflare Communications
46341+ * <linux-xen-drivers@solarflare.com>
46342+ * <onload-dev@solarflare.com>
46343+ *
46344+ * This program is free software; you can redistribute it and/or modify it
46345+ * under the terms of the GNU General Public License version 2 as published
46346+ * by the Free Software Foundation, incorporated herein by reference.
46347+ *
46348+ * This program is distributed in the hope that it will be useful,
46349+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
46350+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46351+ * GNU General Public License for more details.
46352+ *
46353+ * You should have received a copy of the GNU General Public License
46354+ * along with this program; if not, write to the Free Software
46355+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
46356+ ****************************************************************************
46357+ */
46358+
46359+/*
46360+ * \author djr
46361+ * \brief Routine to poll event queues.
46362+ * \date 2003/03/04
46363+ */
46364+
46365+/*! \cidoxg_lib_ef */
46366+#include "ef_vi_internal.h"
46367+
46368+/* Be worried about this on byteswapped machines */
46369+/* Due to crazy chipsets, we see the event words being written in
46370+** arbitrary order (bug4539). So test for presence of event must ensure
46371+** that both halves have changed from the null.
46372+*/
46373+# define EF_VI_IS_EVENT(evp) \
46374+ ( (((evp)->opaque.a != (uint32_t)-1) && \
46375+ ((evp)->opaque.b != (uint32_t)-1)) )
46376+
46377+
46378+#ifdef NDEBUG
46379+# define IS_DEBUG 0
46380+#else
46381+# define IS_DEBUG 1
46382+#endif
46383+
46384+
46385+/*! Check for RX events with inconsistent SOP/CONT
46386+**
46387+** Returns true if this event should be discarded
46388+*/
46389+ef_vi_inline int ef_eventq_is_rx_sop_cont_bad_efab(ef_vi* vi,
46390+ const ef_vi_qword* ev)
46391+{
46392+ ef_rx_dup_state_t* rx_dup_state;
46393+ uint8_t* bad_sop;
46394+
46395+ unsigned label = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46396+ unsigned sop = QWORD_TEST_BIT(RX_SOP, *ev);
46397+
46398+ ef_assert(vi);
46399+ ef_assert_lt(label, EFAB_DMAQS_PER_EVQ_MAX);
46400+
46401+ rx_dup_state = &vi->evq_state->rx_dup_state[label];
46402+ bad_sop = &rx_dup_state->bad_sop;
46403+
46404+ if( ! ((vi->vi_flags & EF_VI_BUG5692_WORKAROUND) || IS_DEBUG) ) {
46405+ *bad_sop = (*bad_sop && !sop);
46406+ }
46407+ else {
46408+ unsigned cont = QWORD_TEST_BIT(RX_JUMBO_CONT, *ev);
46409+ uint8_t *frag_num = &rx_dup_state->frag_num;
46410+
46411+ /* bad_sop should latch till the next sop */
46412+ *bad_sop = (*bad_sop && !sop) || ( !!sop != (*frag_num==0) );
46413+
46414+ /* we do not check the number of bytes relative to the
46415+ * fragment number and size of the user rx buffer here
46416+ * because we don't know the size of the user rx
46417+ * buffer - we probably should perform this check in
46418+ * the nearest code calling this though.
46419+ */
46420+ *frag_num = cont ? (*frag_num + 1) : 0;
46421+ }
46422+
46423+ return *bad_sop;
46424+}
46425+
46426+
46427+ef_vi_inline int falcon_rx_check_dup(ef_vi* evq, ef_event* ev_out,
46428+ const ef_vi_qword* ev)
46429+{
46430+ unsigned q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46431+ unsigned desc_ptr = QWORD_GET_U(RX_EV_DESC_PTR, *ev);
46432+ ef_rx_dup_state_t* rx_dup_state = &evq->evq_state->rx_dup_state[q_id];
46433+
46434+ if(likely( desc_ptr != rx_dup_state->rx_last_desc_ptr )) {
46435+ rx_dup_state->rx_last_desc_ptr = desc_ptr;
46436+ return 0;
46437+ }
46438+
46439+ rx_dup_state->rx_last_desc_ptr = desc_ptr;
46440+ rx_dup_state->bad_sop = 1;
46441+#ifndef NDEBUG
46442+ rx_dup_state->frag_num = 0;
46443+#endif
46444+ BUG_ON(!QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev));
46445+ BUG_ON( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev));
46446+ BUG_ON(!QWORD_GET_U(RX_EV_BYTE_CNT, *ev) == 0);
46447+ ev_out->rx_no_desc_trunc.type = EF_EVENT_TYPE_RX_NO_DESC_TRUNC;
46448+ ev_out->rx_no_desc_trunc.q_id = q_id;
46449+ return 1;
46450+}
46451+
46452+
46453+ef_vi_inline void falcon_rx_event(ef_event* ev_out, const ef_vi_qword* ev)
46454+{
46455+ if(likely( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev) )) {
46456+ ev_out->rx.type = EF_EVENT_TYPE_RX;
46457+ ev_out->rx.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46458+ ev_out->rx.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
46459+ if( QWORD_TEST_BIT(RX_SOP, *ev) )
46460+ ev_out->rx.flags = EF_EVENT_FLAG_SOP;
46461+ else
46462+ ev_out->rx.flags = 0;
46463+ if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
46464+ ev_out->rx.flags |= EF_EVENT_FLAG_CONT;
46465+ if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
46466+ ev_out->rx.flags |= EF_EVENT_FLAG_ISCSI_OK;
46467+ }
46468+ else {
46469+ ev_out->rx_discard.type = EF_EVENT_TYPE_RX_DISCARD;
46470+ ev_out->rx_discard.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46471+ ev_out->rx_discard.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
46472+#if 1 /* hack for ptloop compatability: ?? TODO purge */
46473+ if( QWORD_TEST_BIT(RX_SOP, *ev) )
46474+ ev_out->rx_discard.flags = EF_EVENT_FLAG_SOP;
46475+ else
46476+ ev_out->rx_discard.flags = 0;
46477+ if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
46478+ ev_out->rx_discard.flags |= EF_EVENT_FLAG_CONT;
46479+ if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
46480+ ev_out->rx_discard.flags |= EF_EVENT_FLAG_ISCSI_OK;
46481+#endif
46482+ /* Order matters here: more fundamental errors first. */
46483+ if( QWORD_TEST_BIT(RX_EV_BUF_OWNER_ID_ERR, *ev) )
46484+ ev_out->rx_discard.subtype =
46485+ EF_EVENT_RX_DISCARD_RIGHTS;
46486+ else if( QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev) )
46487+ ev_out->rx_discard.subtype =
46488+ EF_EVENT_RX_DISCARD_TRUNC;
46489+ else if( QWORD_TEST_BIT(RX_EV_ETH_CRC_ERR, *ev) )
46490+ ev_out->rx_discard.subtype =
46491+ EF_EVENT_RX_DISCARD_CRC_BAD;
46492+ else if( QWORD_TEST_BIT(RX_EV_IP_HDR_CHKSUM_ERR, *ev) )
46493+ ev_out->rx_discard.subtype =
46494+ EF_EVENT_RX_DISCARD_CSUM_BAD;
46495+ else if( QWORD_TEST_BIT(RX_EV_TCP_UDP_CHKSUM_ERR, *ev) )
46496+ ev_out->rx_discard.subtype =
46497+ EF_EVENT_RX_DISCARD_CSUM_BAD;
46498+ else
46499+ ev_out->rx_discard.subtype =
46500+ EF_EVENT_RX_DISCARD_OTHER;
46501+ }
46502+}
46503+
46504+
46505+ef_vi_inline void falcon_tx_event(ef_event* ev_out, const ef_vi_qword* ev)
46506+{
46507+ /* Danger danger! No matter what we ask for wrt batching, we
46508+ ** will get a batched event every 16 descriptors, and we also
46509+ ** get dma-queue-empty events. i.e. Duplicates are expected.
46510+ **
46511+ ** In addition, if it's been requested in the descriptor, we
46512+ ** get an event per descriptor. (We don't currently request
46513+ ** this).
46514+ */
46515+ if(likely( QWORD_TEST_BIT(TX_EV_COMP, *ev) )) {
46516+ ev_out->tx.type = EF_EVENT_TYPE_TX;
46517+ ev_out->tx.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
46518+ }
46519+ else {
46520+ ev_out->tx_error.type = EF_EVENT_TYPE_TX_ERROR;
46521+ ev_out->tx_error.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
46522+ if(likely( QWORD_TEST_BIT(TX_EV_BUF_OWNER_ID_ERR, *ev) ))
46523+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_RIGHTS;
46524+ else if(likely( QWORD_TEST_BIT(TX_EV_WQ_FF_FULL, *ev) ))
46525+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_OFLOW;
46526+ else if(likely( QWORD_TEST_BIT(TX_EV_PKT_TOO_BIG, *ev) ))
46527+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_2BIG;
46528+ else if(likely( QWORD_TEST_BIT(TX_EV_PKT_ERR, *ev) ))
46529+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_BUS;
46530+ }
46531+}
46532+
46533+
46534+static void mark_bad(ef_event* ev)
46535+{
46536+ ev->generic.ev.u64[0] &=~ ((uint64_t) 1u << RX_EV_PKT_OK_LBN);
46537+}
46538+
46539+
46540+int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
46541+ ef_event_handler_fn *exception, void *expt_priv)
46542+{
46543+ int evs_len_orig = evs_len;
46544+
46545+ EF_VI_CHECK_EVENT_Q(evq);
46546+ ef_assert(evs);
46547+ ef_assert_gt(evs_len, 0);
46548+
46549+ if(unlikely( EF_VI_IS_EVENT(EF_VI_EVENT_PTR(evq, 1)) ))
46550+ goto overflow;
46551+
46552+ do {
46553+ { /* Read the event out of the ring, then fiddle with
46554+ * copied version. Reason is that the ring is
46555+ * likely to get pushed out of cache by another
46556+ * event being delivered by hardware. */
46557+ ef_vi_event* ev = EF_VI_EVENT_PTR(evq, 0);
46558+ if( ! EF_VI_IS_EVENT(ev) )
46559+ break;
46560+ evs->generic.ev.u64[0] = cpu_to_le64 (ev->u64);
46561+ evq->evq_state->evq_ptr += sizeof(ef_vi_event);
46562+ ev->u64 = (uint64_t)(int64_t) -1;
46563+ }
46564+
46565+ /* Ugly: Exploit the fact that event code lies in top
46566+ * bits of event. */
46567+ ef_assert_ge(EV_CODE_LBN, 32u);
46568+ switch( evs->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
46569+ case RX_IP_EV_DECODE:
46570+ /* Look for duplicate desc_ptr: it signals
46571+ * that a jumbo frame was truncated because we
46572+ * ran out of descriptors. */
46573+ if(unlikely( falcon_rx_check_dup
46574+ (evq, evs, &evs->generic.ev) )) {
46575+ --evs_len;
46576+ ++evs;
46577+ break;
46578+ }
46579+ else {
46580+ /* Cope with FalconA1 bugs where RX
46581+ * gives inconsistent RX events Mark
46582+ * events as bad until SOP becomes
46583+ * consistent again
46584+ * ef_eventq_is_rx_sop_cont_bad() has
46585+ * side effects - order is important
46586+ */
46587+ if(unlikely
46588+ (ef_eventq_is_rx_sop_cont_bad_efab
46589+ (evq, &evs->generic.ev) )) {
46590+ mark_bad(evs);
46591+ }
46592+ }
46593+ falcon_rx_event(evs, &evs->generic.ev);
46594+ --evs_len;
46595+ ++evs;
46596+ break;
46597+
46598+ case TX_IP_EV_DECODE:
46599+ falcon_tx_event(evs, &evs->generic.ev);
46600+ --evs_len;
46601+ ++evs;
46602+ break;
46603+
46604+ default:
46605+ break;
46606+ }
46607+ } while( evs_len );
46608+
46609+ return evs_len_orig - evs_len;
46610+
46611+
46612+ overflow:
46613+ evs->generic.type = EF_EVENT_TYPE_OFLOW;
46614+ evs->generic.ev.u64[0] = (uint64_t)((int64_t)-1);
46615+ return 1;
46616+}
46617+
46618+
46619+int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, ef_event* ev)
46620+{
46621+ int /*bool*/ handled = 0;
46622+
46623+ switch( ev->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
46624+ case DRIVER_EV_DECODE:
46625+ if( QWORD_GET_U(DRIVER_EV_SUB_CODE, ev->generic.ev) ==
46626+ EVQ_INIT_DONE_EV_DECODE )
46627+ /* EVQ initialised event: ignore. */
46628+ handled = 1;
46629+ break;
46630+ }
46631+ return handled;
46632+}
46633+
46634+
46635+void ef_eventq_iterate(ef_vi* vi,
46636+ void (*fn)(void* arg, ef_vi*, int rel_pos,
46637+ int abs_pos, void* event),
46638+ void* arg, int stop_at_end)
46639+{
46640+ int i, size_evs = (vi->evq_mask + 1) / sizeof(ef_vi_event);
46641+
46642+ for( i = 0; i < size_evs; ++i ) {
46643+ ef_vi_event* e = EF_VI_EVENT_PTR(vi, -i);
46644+ if( EF_VI_IS_EVENT(e) )
46645+ fn(arg, vi, i,
46646+ EF_VI_EVENT_OFFSET(vi, -i) / sizeof(ef_vi_event),
46647+ e);
46648+ else if( stop_at_end )
46649+ break;
46650+ }
46651+}
46652+
46653+
46654+int ef_eventq_has_event(ef_vi* vi)
46655+{
46656+ return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, 0));
46657+}
46658+
46659+
46660+int ef_eventq_has_many_events(ef_vi* vi, int look_ahead)
46661+{
46662+ ef_assert_ge(look_ahead, 0);
46663+ return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, -look_ahead));
46664+}
46665+
46666+
46667+int ef_eventq_has_rx_event(ef_vi* vi)
46668+{
46669+ ef_vi_event* ev;
46670+ int i, n_evs = 0;
46671+
46672+ for( i = 0; EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, i)); --i ) {
46673+ ev = EF_VI_EVENT_PTR(vi, i);
46674+ if( EFVI_FALCON_EVENT_CODE(ev) == EF_EVENT_TYPE_RX ) n_evs++;
46675+ }
46676+ return n_evs;
46677+}
46678+
46679+/*! \cidoxg_end */
46680Index: head-2008-11-25/drivers/xen/sfc_netfront/falcon_vi.c
46681===================================================================
46682--- /dev/null 1970-01-01 00:00:00.000000000 +0000
46683+++ head-2008-11-25/drivers/xen/sfc_netfront/falcon_vi.c 2008-02-20 09:32:49.000000000 +0100
46684@@ -0,0 +1,465 @@
46685+/****************************************************************************
46686+ * Copyright 2002-2005: Level 5 Networks Inc.
46687+ * Copyright 2005-2008: Solarflare Communications Inc,
46688+ * 9501 Jeronimo Road, Suite 250,
46689+ * Irvine, CA 92618, USA
46690+ *
46691+ * Maintained by Solarflare Communications
46692+ * <linux-xen-drivers@solarflare.com>
46693+ * <onload-dev@solarflare.com>
46694+ *
46695+ * This program is free software; you can redistribute it and/or modify it
46696+ * under the terms of the GNU General Public License version 2 as published
46697+ * by the Free Software Foundation, incorporated herein by reference.
46698+ *
46699+ * This program is distributed in the hope that it will be useful,
46700+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
46701+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46702+ * GNU General Public License for more details.
46703+ *
46704+ * You should have received a copy of the GNU General Public License
46705+ * along with this program; if not, write to the Free Software
46706+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
46707+ ****************************************************************************
46708+ */
46709+
46710+/*
46711+ * \author djr, stg
46712+ * \brief Falcon-specific VI
46713+ * \date 2006/11/30
46714+ */
46715+
46716+#include "ef_vi_internal.h"
46717+
46718+
46719+#define EFVI_FALCON_DMA_TX_FRAG 1
46720+
46721+
46722+/* TX descriptor for both physical and virtual packet transfers */
46723+typedef union {
46724+ uint32_t dword[2];
46725+} ef_vi_falcon_dma_tx_buf_desc;
46726+typedef ef_vi_falcon_dma_tx_buf_desc ef_vi_falcon_dma_tx_phys_desc;
46727+
46728+
46729+/* RX descriptor for physical addressed transfers */
46730+typedef union {
46731+ uint32_t dword[2];
46732+} ef_vi_falcon_dma_rx_phys_desc;
46733+
46734+
46735+/* RX descriptor for virtual packet transfers */
46736+typedef struct {
46737+ uint32_t dword[1];
46738+} ef_vi_falcon_dma_rx_buf_desc;
46739+
46740+/* Buffer table index */
46741+typedef uint32_t ef_vi_buffer_addr_t;
46742+
46743+ef_vi_inline int64_t dma_addr_to_u46(int64_t src_dma_addr)
46744+{
46745+ return (src_dma_addr & __FALCON_MASK(46, int64_t));
46746+}
46747+
46748+/*! Setup a physical address based descriptor with a specified length */
46749+ef_vi_inline void
46750+__falcon_dma_rx_calc_ip_phys(ef_vi_dma_addr_t dest_pa,
46751+ ef_vi_falcon_dma_rx_phys_desc *desc,
46752+ int bytes)
46753+{
46754+ int region = 0; /* TODO fixme */
46755+ int64_t dest = dma_addr_to_u46(dest_pa); /* lower 46 bits */
46756+
46757+ DWCHCK(__DW2(RX_KER_BUF_SIZE_LBN), RX_KER_BUF_SIZE_WIDTH);
46758+ DWCHCK(__DW2(RX_KER_BUF_REGION_LBN),RX_KER_BUF_REGION_WIDTH);
46759+
46760+ LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
46761+
46762+ RANGECHCK(bytes, RX_KER_BUF_SIZE_WIDTH);
46763+ RANGECHCK(region, RX_KER_BUF_REGION_WIDTH);
46764+
46765+ ef_assert(desc);
46766+
46767+ desc->dword[1] = ((bytes << __DW2(RX_KER_BUF_SIZE_LBN)) |
46768+ (region << __DW2(RX_KER_BUF_REGION_LBN)) |
46769+ (HIGH(dest,
46770+ RX_KER_BUF_ADR_LBN,
46771+ RX_KER_BUF_ADR_WIDTH)));
46772+
46773+ desc->dword[0] = LOW(dest,
46774+ RX_KER_BUF_ADR_LBN,
46775+ RX_KER_BUF_ADR_WIDTH);
46776+}
46777+
46778+/*! Setup a virtual buffer descriptor for an IPMODE transfer */
46779+ef_vi_inline void
46780+__falcon_dma_tx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, unsigned bytes,
46781+ int port, int frag,
46782+ ef_vi_falcon_dma_tx_buf_desc *desc)
46783+{
46784+ DWCHCK(__DW2(TX_USR_PORT_LBN), TX_USR_PORT_WIDTH);
46785+ DWCHCK(__DW2(TX_USR_CONT_LBN), TX_USR_CONT_WIDTH);
46786+ DWCHCK(__DW2(TX_USR_BYTE_CNT_LBN), TX_USR_BYTE_CNT_WIDTH);
46787+ LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
46788+ DWCHCK(TX_USR_BYTE_OFS_LBN, TX_USR_BYTE_OFS_WIDTH);
46789+
46790+ RANGECHCK(bytes, TX_USR_BYTE_CNT_WIDTH);
46791+ RANGECHCK(port, TX_USR_PORT_WIDTH);
46792+ RANGECHCK(frag, TX_USR_CONT_WIDTH);
46793+ RANGECHCK(buf_id, TX_USR_BUF_ID_WIDTH);
46794+ RANGECHCK(buf_ofs, TX_USR_BYTE_OFS_WIDTH);
46795+
46796+ ef_assert(desc);
46797+
46798+ desc->dword[1] = ((port << __DW2(TX_USR_PORT_LBN)) |
46799+ (frag << __DW2(TX_USR_CONT_LBN)) |
46800+ (bytes << __DW2(TX_USR_BYTE_CNT_LBN)) |
46801+ (HIGH(buf_id,
46802+ TX_USR_BUF_ID_LBN,
46803+ TX_USR_BUF_ID_WIDTH)));
46804+
46805+ desc->dword[0] = ((LOW(buf_id,
46806+ TX_USR_BUF_ID_LBN,
46807+ (TX_USR_BUF_ID_WIDTH))) |
46808+ (buf_ofs << TX_USR_BYTE_OFS_LBN));
46809+}
46810+
46811+ef_vi_inline void
46812+falcon_dma_tx_calc_ip_buf_4k(unsigned buf_vaddr, unsigned bytes,
46813+ int port, int frag,
46814+ ef_vi_falcon_dma_tx_buf_desc *desc)
46815+{
46816+ /* TODO FIXME [buf_vaddr] consists of the buffer index in the
46817+ ** high bits, and an offset in the low bits. Assumptions
46818+ ** permate the code that these can be rolled into one 32bit
46819+ ** value, so this is currently preserved for Falcon. But we
46820+ ** should change to support 8K pages
46821+ */
46822+ unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
46823+ unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
46824+
46825+ __falcon_dma_tx_calc_ip_buf( buf_id, buf_ofs, bytes, port, frag, desc);
46826+}
46827+
46828+ef_vi_inline void
46829+falcon_dma_tx_calc_ip_buf(unsigned buf_vaddr, unsigned bytes, int port,
46830+ int frag, ef_vi_falcon_dma_tx_buf_desc *desc)
46831+{
46832+ falcon_dma_tx_calc_ip_buf_4k(buf_vaddr, bytes, port, frag, desc);
46833+}
46834+
46835+/*! Setup a virtual buffer based descriptor */
46836+ef_vi_inline void
46837+__falcon_dma_rx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs,
46838+ ef_vi_falcon_dma_rx_buf_desc *desc)
46839+{
46840+ /* check alignment of buffer offset and pack */
46841+ ef_assert((buf_ofs & 0x1) == 0);
46842+
46843+ buf_ofs >>= 1;
46844+
46845+ DWCHCK(RX_USR_2BYTE_OFS_LBN, RX_USR_2BYTE_OFS_WIDTH);
46846+ DWCHCK(RX_USR_BUF_ID_LBN, RX_USR_BUF_ID_WIDTH);
46847+
46848+ RANGECHCK(buf_ofs, RX_USR_2BYTE_OFS_WIDTH);
46849+ RANGECHCK(buf_id, RX_USR_BUF_ID_WIDTH);
46850+
46851+ ef_assert(desc);
46852+
46853+ desc->dword[0] = ((buf_ofs << RX_USR_2BYTE_OFS_LBN) |
46854+ (buf_id << RX_USR_BUF_ID_LBN));
46855+}
46856+
46857+ef_vi_inline void
46858+falcon_dma_rx_calc_ip_buf_4k(unsigned buf_vaddr,
46859+ ef_vi_falcon_dma_rx_buf_desc *desc)
46860+{
46861+ /* TODO FIXME [buf_vaddr] consists of the buffer index in the
46862+ ** high bits, and an offset in the low bits. Assumptions
46863+ ** permeate the code that these can be rolled into one 32bit
46864+ ** value, so this is currently preserved for Falcon. But we
46865+ ** should change to support 8K pages
46866+ */
46867+ unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
46868+ unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
46869+
46870+ __falcon_dma_rx_calc_ip_buf(buf_id, buf_ofs, desc);
46871+}
46872+
46873+ef_vi_inline void
46874+falcon_dma_rx_calc_ip_buf(unsigned buf_vaddr,
46875+ ef_vi_falcon_dma_rx_buf_desc *desc)
46876+{
46877+ falcon_dma_rx_calc_ip_buf_4k(buf_vaddr, desc);
46878+}
46879+
46880+
46881+ef_vi_inline ef_vi_dma_addr_t ef_physaddr(ef_addr efaddr)
46882+{
46883+ return (ef_vi_dma_addr_t) efaddr;
46884+}
46885+
46886+
46887+/*! Convert between an ef_addr and a buffer table index
46888+** Assert that this was not a physical address
46889+*/
46890+ef_vi_inline ef_vi_buffer_addr_t ef_bufaddr(ef_addr efaddr)
46891+{
46892+ ef_assert(efaddr < ((uint64_t)1 << 32) );
46893+
46894+ return (ef_vi_buffer_addr_t) efaddr;
46895+}
46896+
46897+
46898+/*! Setup an physical address based descriptor for an IPMODE transfer */
46899+ef_vi_inline void
46900+falcon_dma_tx_calc_ip_phys(ef_vi_dma_addr_t src_dma_addr, unsigned bytes,
46901+ int port, int frag,
46902+ ef_vi_falcon_dma_tx_phys_desc *desc)
46903+{
46904+
46905+ int region = 0; /* FIXME */
46906+ int64_t src = dma_addr_to_u46(src_dma_addr); /* lower 46 bits */
46907+
46908+ DWCHCK(__DW2(TX_KER_PORT_LBN), TX_KER_PORT_WIDTH);
46909+ DWCHCK(__DW2(TX_KER_CONT_LBN), TX_KER_CONT_WIDTH);
46910+ DWCHCK(__DW2(TX_KER_BYTE_CNT_LBN), TX_KER_BYTE_CNT_WIDTH);
46911+ DWCHCK(__DW2(TX_KER_BUF_REGION_LBN),TX_KER_BUF_REGION_WIDTH);
46912+
46913+ LWCHK(TX_KER_BUF_ADR_LBN, TX_KER_BUF_ADR_WIDTH);
46914+
46915+ RANGECHCK(port, TX_KER_PORT_WIDTH);
46916+ RANGECHCK(frag, TX_KER_CONT_WIDTH);
46917+ RANGECHCK(bytes, TX_KER_BYTE_CNT_WIDTH);
46918+ RANGECHCK(region, TX_KER_BUF_REGION_WIDTH);
46919+
46920+ desc->dword[1] = ((port << __DW2(TX_KER_PORT_LBN)) |
46921+ (frag << __DW2(TX_KER_CONT_LBN)) |
46922+ (bytes << __DW2(TX_KER_BYTE_CNT_LBN)) |
46923+ (region << __DW2(TX_KER_BUF_REGION_LBN)) |
46924+ (HIGH(src,
46925+ TX_KER_BUF_ADR_LBN,
46926+ TX_KER_BUF_ADR_WIDTH)));
46927+
46928+ ef_assert_equal(TX_KER_BUF_ADR_LBN, 0);
46929+ desc->dword[0] = (uint32_t) src_dma_addr;
46930+}
46931+
46932+
46933+void falcon_vi_init(ef_vi* vi, void* vvis)
46934+{
46935+ struct vi_mappings *vm = (struct vi_mappings*)vvis;
46936+ uint16_t* ids;
46937+
46938+ ef_assert(vi);
46939+ ef_assert(vvis);
46940+ ef_assert_equal(vm->signature, VI_MAPPING_SIGNATURE);
46941+ ef_assert_equal(vm->nic_type.arch, EF_VI_ARCH_FALCON);
46942+
46943+ /* Initialise masks to zero, so that ef_vi_state_init() will
46944+ ** not do any harm when we don't have DMA queues. */
46945+ vi->vi_rxq.mask = vi->vi_txq.mask = 0;
46946+
46947+ /* Used for BUG5391_WORKAROUND. */
46948+ vi->vi_txq.misalign_mask = 0;
46949+
46950+ /* Initialise doorbell addresses to a distinctive small value
46951+ ** which will cause a segfault, to trap doorbell pushes to VIs
46952+ ** without DMA queues. */
46953+ vi->vi_rxq.doorbell = vi->vi_txq.doorbell = (ef_vi_ioaddr_t)0xdb;
46954+
46955+ ids = (uint16_t*) (vi->ep_state + 1);
46956+
46957+ if( vm->tx_queue_capacity ) {
46958+ vi->vi_txq.mask = vm->tx_queue_capacity - 1;
46959+ vi->vi_txq.doorbell = vm->tx_bell + 12;
46960+ vi->vi_txq.descriptors = vm->tx_dma_falcon;
46961+ vi->vi_txq.ids = ids;
46962+ ids += vi->vi_txq.mask + 1;
46963+ /* Check that the id fifo fits in the space allocated. */
46964+ ef_assert_le((char*) (vi->vi_txq.ids + vm->tx_queue_capacity),
46965+ (char*) vi->ep_state
46966+ + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
46967+ vm->tx_queue_capacity));
46968+ }
46969+ if( vm->rx_queue_capacity ) {
46970+ vi->vi_rxq.mask = vm->rx_queue_capacity - 1;
46971+ vi->vi_rxq.doorbell = vm->rx_bell + 12;
46972+ vi->vi_rxq.descriptors = vm->rx_dma_falcon;
46973+ vi->vi_rxq.ids = ids;
46974+ /* Check that the id fifo fits in the space allocated. */
46975+ ef_assert_le((char*) (vi->vi_rxq.ids + vm->rx_queue_capacity),
46976+ (char*) vi->ep_state
46977+ + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
46978+ vm->tx_queue_capacity));
46979+ }
46980+
46981+ if( vm->nic_type.variant == 'A' ) {
46982+ vi->vi_txq.misalign_mask = 15; /* BUG5391_WORKAROUND */
46983+ vi->vi_flags |= EF_VI_BUG5692_WORKAROUND;
46984+ }
46985+}
46986+
46987+
46988+int ef_vi_transmitv_init(ef_vi* vi, const ef_iovec* iov, int iov_len,
46989+ ef_request_id dma_id)
46990+{
46991+ ef_vi_txq* q = &vi->vi_txq;
46992+ ef_vi_txq_state* qs = &vi->ep_state->txq;
46993+ ef_vi_falcon_dma_tx_buf_desc* dp;
46994+ unsigned len, dma_len, di;
46995+ unsigned added_save = qs->added;
46996+ ef_addr dma_addr;
46997+ unsigned last_len = 0;
46998+
46999+ ef_assert(iov_len > 0);
47000+ ef_assert(iov);
47001+ ef_assert_equal((dma_id & EF_REQUEST_ID_MASK), dma_id);
47002+ ef_assert_nequal(dma_id, 0xffff);
47003+
47004+ dma_addr = iov->iov_base;
47005+ len = iov->iov_len;
47006+
47007+ if( vi->vi_flags & EF_VI_ISCSI_TX_DDIG ) {
47008+ /* Last 4 bytes of placeholder for digest must be
47009+ * removed for h/w */
47010+ ef_assert(len > 4);
47011+ last_len = iov[iov_len - 1].iov_len;
47012+ if( last_len <= 4 ) {
47013+ ef_assert(iov_len > 1);
47014+ --iov_len;
47015+ last_len = iov[iov_len - 1].iov_len - (4 - last_len);
47016+ }
47017+ else {
47018+ last_len = iov[iov_len - 1].iov_len - 4;
47019+ }
47020+ if( iov_len == 1 )
47021+ len = last_len;
47022+ }
47023+
47024+ while( 1 ) {
47025+ if( qs->added - qs->removed >= q->mask ) {
47026+ qs->added = added_save;
47027+ return -EAGAIN;
47028+ }
47029+
47030+ dma_len = (~((unsigned) dma_addr) & 0xfff) + 1;
47031+ if( dma_len > len ) dma_len = len;
47032+ { /* BUG5391_WORKAROUND */
47033+ unsigned misalign =
47034+ (unsigned) dma_addr & q->misalign_mask;
47035+ if( misalign && dma_len + misalign > 512 )
47036+ dma_len = 512 - misalign;
47037+ }
47038+
47039+ di = qs->added++ & q->mask;
47040+ dp = (ef_vi_falcon_dma_tx_buf_desc*) q->descriptors + di;
47041+ if( vi->vi_flags & EF_VI_TX_PHYS_ADDR )
47042+ falcon_dma_tx_calc_ip_phys
47043+ (ef_physaddr(dma_addr), dma_len, /*port*/ 0,
47044+ (iov_len == 1 && dma_len == len) ? 0 :
47045+ EFVI_FALCON_DMA_TX_FRAG, dp);
47046+ else
47047+ falcon_dma_tx_calc_ip_buf
47048+ (ef_bufaddr(dma_addr), dma_len, /*port*/ 0,
47049+ (iov_len == 1 && dma_len == len) ? 0 :
47050+ EFVI_FALCON_DMA_TX_FRAG, dp);
47051+
47052+ dma_addr += dma_len;
47053+ len -= dma_len;
47054+
47055+ if( len == 0 ) {
47056+ if( --iov_len == 0 ) break;
47057+ ++iov;
47058+ dma_addr = iov->iov_base;
47059+ len = iov->iov_len;
47060+ if( (vi->vi_flags & EF_VI_ISCSI_TX_DDIG) &&
47061+ (iov_len == 1) )
47062+ len = last_len;
47063+ }
47064+ }
47065+
47066+ q->ids[di] = (uint16_t) dma_id;
47067+ return 0;
47068+}
47069+
47070+
47071+void ef_vi_transmit_push(ef_vi* vi)
47072+{
47073+ ef_vi_wiob();
47074+ writel((vi->ep_state->txq.added & vi->vi_txq.mask) <<
47075+ __DW4(TX_DESC_WPTR_LBN),
47076+ vi->vi_txq.doorbell);
47077+}
47078+
47079+
47080+/*! The value of initial_rx_bytes is used to set RX_KER_BUF_SIZE in an initial
47081+** receive descriptor here if physical addressing is being used. A value of
47082+** zero represents 16384 bytes. This is okay, because caller must provide a
47083+** buffer than is > MTU, and mac should filter anything bigger than that.
47084+*/
47085+int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
47086+ int initial_rx_bytes)
47087+{
47088+ ef_vi_rxq* q = &vi->vi_rxq;
47089+ ef_vi_rxq_state* qs = &vi->ep_state->rxq;
47090+ unsigned di;
47091+
47092+ if( ef_vi_receive_space(vi) ) {
47093+ di = qs->added++ & q->mask;
47094+ ef_assert_equal(q->ids[di], 0xffff);
47095+ q->ids[di] = (uint16_t) dma_id;
47096+
47097+ if( ! (vi->vi_flags & EF_VI_RX_PHYS_ADDR) ) {
47098+ ef_vi_falcon_dma_rx_buf_desc* dp;
47099+ dp = (ef_vi_falcon_dma_rx_buf_desc*)
47100+ q->descriptors + di;
47101+ falcon_dma_rx_calc_ip_buf(ef_bufaddr(addr), dp);
47102+ }
47103+ else {
47104+ ef_vi_falcon_dma_rx_phys_desc* dp;
47105+ dp = (ef_vi_falcon_dma_rx_phys_desc*)
47106+ q->descriptors + di;
47107+ __falcon_dma_rx_calc_ip_phys(addr, dp,
47108+ initial_rx_bytes);
47109+ }
47110+
47111+ return 0;
47112+ }
47113+
47114+ return -EAGAIN;
47115+}
47116+
47117+
47118+void ef_vi_receive_push(ef_vi* vi)
47119+{
47120+ ef_vi_wiob();
47121+ writel ((vi->ep_state->rxq.added & vi->vi_rxq.mask) <<
47122+ __DW4(RX_DESC_WPTR_LBN),
47123+ vi->vi_rxq.doorbell);
47124+}
47125+
47126+
47127+ef_request_id ef_vi_receive_done(const ef_vi* vi, const ef_event* ef_ev)
47128+{
47129+ const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
47130+ unsigned di = ev->u32[0] & vi->vi_rxq.mask;
47131+ ef_request_id rq_id;
47132+
47133+ ef_assert(EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX ||
47134+ EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX_DISCARD);
47135+
47136+ /* Detect spurious / duplicate RX events. We may need to modify this
47137+ ** code so that we are robust if they happen. */
47138+ ef_assert_equal(di, vi->ep_state->rxq.removed & vi->vi_rxq.mask);
47139+
47140+ /* We only support 1 port: so events should be in order. */
47141+ ef_assert(vi->vi_rxq.ids[di] != 0xffff);
47142+
47143+ rq_id = vi->vi_rxq.ids[di];
47144+ vi->vi_rxq.ids[di] = 0xffff;
47145+ ++vi->ep_state->rxq.removed;
47146+ return rq_id;
47147+}
47148+
47149+/*! \cidoxg_end */
47150Index: head-2008-11-25/drivers/xen/sfc_netfront/pt_tx.c
47151===================================================================
47152--- /dev/null 1970-01-01 00:00:00.000000000 +0000
47153+++ head-2008-11-25/drivers/xen/sfc_netfront/pt_tx.c 2008-02-20 09:32:49.000000000 +0100
47154@@ -0,0 +1,91 @@
47155+/****************************************************************************
47156+ * Copyright 2002-2005: Level 5 Networks Inc.
47157+ * Copyright 2005-2008: Solarflare Communications Inc,
47158+ * 9501 Jeronimo Road, Suite 250,
47159+ * Irvine, CA 92618, USA
47160+ *
47161+ * Maintained by Solarflare Communications
47162+ * <linux-xen-drivers@solarflare.com>
47163+ * <onload-dev@solarflare.com>
47164+ *
47165+ * This program is free software; you can redistribute it and/or modify it
47166+ * under the terms of the GNU General Public License version 2 as published
47167+ * by the Free Software Foundation, incorporated herein by reference.
47168+ *
47169+ * This program is distributed in the hope that it will be useful,
47170+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
47171+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47172+ * GNU General Public License for more details.
47173+ *
47174+ * You should have received a copy of the GNU General Public License
47175+ * along with this program; if not, write to the Free Software
47176+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47177+ ****************************************************************************
47178+ */
47179+
47180+/*
47181+ * \author djr
47182+ * \brief Packet-mode transmit interface.
47183+ * \date 2003/04/02
47184+ */
47185+
47186+/*! \cidoxg_lib_ef */
47187+#include "ef_vi_internal.h"
47188+
47189+
47190+int ef_vi_transmit_init(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
47191+{
47192+ ef_iovec iov = { base, len };
47193+ return ef_vi_transmitv_init(vi, &iov, 1, dma_id);
47194+}
47195+
47196+
47197+int ef_vi_transmit(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
47198+{
47199+ ef_iovec iov = { base, len };
47200+ int rc = ef_vi_transmitv_init(vi, &iov, 1, dma_id);
47201+ if( rc == 0 ) ef_vi_transmit_push(vi);
47202+ return rc;
47203+}
47204+
47205+
47206+int ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len,
47207+ ef_request_id dma_id)
47208+{
47209+ int rc = ef_vi_transmitv_init(vi, iov, iov_len, dma_id);
47210+ if( rc == 0 ) ef_vi_transmit_push(vi);
47211+ return rc;
47212+}
47213+
47214+
47215+int ef_vi_transmit_unbundle(ef_vi* vi, const ef_event* __ev,
47216+ ef_request_id* ids)
47217+{
47218+ ef_request_id* ids_in = ids;
47219+ ef_vi_txq* q = &vi->vi_txq;
47220+ ef_vi_txq_state* qs = &vi->ep_state->txq;
47221+ const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*__ev);
47222+ unsigned i, stop = (ev->u32[0] + 1) & q->mask;
47223+
47224+ ef_assert(EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX ||
47225+ EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX_ERROR);
47226+
47227+ /* Shouldn't be batching more than 64 descriptors, and should not go
47228+ ** backwards. */
47229+ ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), 64);
47230+ /* Should not complete more than we've posted. */
47231+ ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask),
47232+ qs->added - qs->removed);
47233+
47234+ for( i = qs->removed & q->mask; i != stop; i = ++qs->removed & q->mask )
47235+ if( q->ids[i] != 0xffff ) {
47236+ *ids++ = q->ids[i];
47237+ q->ids[i] = 0xffff;
47238+ }
47239+
47240+ ef_assert_le(ids - ids_in, EF_VI_TRANSMIT_BATCH);
47241+
47242+ return (int) (ids - ids_in);
47243+}
47244+
47245+/*! \cidoxg_end */
47246Index: head-2008-11-25/drivers/xen/sfc_netfront/sysdep.h
47247===================================================================
47248--- /dev/null 1970-01-01 00:00:00.000000000 +0000
47249+++ head-2008-11-25/drivers/xen/sfc_netfront/sysdep.h 2008-02-20 09:32:49.000000000 +0100
47250@@ -0,0 +1,184 @@
47251+/****************************************************************************
47252+ * Copyright 2002-2005: Level 5 Networks Inc.
47253+ * Copyright 2005-2008: Solarflare Communications Inc,
47254+ * 9501 Jeronimo Road, Suite 250,
47255+ * Irvine, CA 92618, USA
47256+ *
47257+ * Maintained by Solarflare Communications
47258+ * <linux-xen-drivers@solarflare.com>
47259+ * <onload-dev@solarflare.com>
47260+ *
47261+ * This program is free software; you can redistribute it and/or modify it
47262+ * under the terms of the GNU General Public License version 2 as published
47263+ * by the Free Software Foundation, incorporated herein by reference.
47264+ *
47265+ * This program is distributed in the hope that it will be useful,
47266+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
47267+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47268+ * GNU General Public License for more details.
47269+ *
47270+ * You should have received a copy of the GNU General Public License
47271+ * along with this program; if not, write to the Free Software
47272+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47273+ ****************************************************************************
47274+ */
47275+
47276+/*
47277+ * \author stg
47278+ * \brief System dependent support for ef vi lib
47279+ * \date 2007/05/10
47280+ */
47281+
47282+/*! \cidoxg_include_ci_ul */
47283+#ifndef __CI_CIUL_SYSDEP_LINUX_H__
47284+#define __CI_CIUL_SYSDEP_LINUX_H__
47285+
47286+/**********************************************************************
47287+ * Kernel version compatability
47288+ */
47289+
47290+#if defined(__GNUC__)
47291+
47292+/* Linux kernel doesn't have stdint.h or [u]intptr_t. */
47293+# if !defined(LINUX_VERSION_CODE)
47294+# include <linux/version.h>
47295+# endif
47296+# include <asm/io.h>
47297+
47298+/* In Linux 2.6.24, linux/types.h has uintptr_t */
47299+# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
47300+# if BITS_PER_LONG == 32
47301+ typedef __u32 uintptr_t;
47302+# else
47303+ typedef __u64 uintptr_t;
47304+# endif
47305+# endif
47306+
47307+/* But even 2.6.24 doesn't define intptr_t */
47308+# if BITS_PER_LONG == 32
47309+ typedef __s32 intptr_t;
47310+# else
47311+ typedef __s64 intptr_t;
47312+# endif
47313+
47314+# if defined(__ia64__)
47315+# define EF_VI_PRIx64 "lx"
47316+# else
47317+# define EF_VI_PRIx64 "llx"
47318+# endif
47319+
47320+# define EF_VI_HF __attribute__((visibility("hidden")))
47321+# define EF_VI_HV __attribute__((visibility("hidden")))
47322+
47323+# if defined(__i386__) || defined(__x86_64__) /* GCC x86/x64 */
47324+ typedef unsigned long long ef_vi_dma_addr_t;
47325+# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
47326+# define ef_vi_wiob() __asm__ __volatile__ ("sfence")
47327+# else
47328+# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
47329+# endif
47330+
47331+# endif
47332+#endif
47333+
47334+#ifdef EFX_NOT_UPSTREAM
47335+
47336+/* Stuff for architectures/compilers not officially supported */
47337+
47338+#if !defined(__GNUC__)
47339+# if defined(__PPC__) /* GCC, PPC */
47340+ typedef unsigned long ef_vi_dma_addr_t;
47341+# define ef_vi_wiob() wmb()
47342+
47343+# ifdef __powerpc64__
47344+# ifdef CONFIG_SMP
47345+# define CI_SMP_SYNC "\n eieio \n" /* memory cache sync */
47346+# define CI_SMP_ISYNC "\n isync \n" /* instr cache sync */
47347+# else
47348+# define CI_SMP_SYNC
47349+# define CI_SMP_ISYNC
47350+# endif
47351+# else /* for ppc32 systems */
47352+# ifdef CONFIG_SMP
47353+# define CI_SMP_SYNC "\n eieio \n"
47354+# define CI_SMP_ISYNC "\n sync \n"
47355+# else
47356+# define CI_SMP_SYNC
47357+# define CI_SMP_ISYNC
47358+# endif
47359+# endif
47360+
47361+# elif defined(__ia64__) /* GCC, IA64 */
47362+ typedef unsigned long ef_vi_dma_addr_t;
47363+# define ef_vi_wiob() __asm__ __volatile__("mf.a": : :"memory")
47364+
47365+# else
47366+# error Unknown processor - GNU C
47367+# endif
47368+
47369+#elif defined(__PGI)
47370+# error PGI not supported
47371+
47372+#elif defined(__INTEL_COMPILER)
47373+
47374+/* Intel compilers v7 claim to be very gcc compatible. */
47375+# if __INTEL_COMPILER >= 700
47376+# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91)
47377+# define EF_VI_LIKELY(t) __builtin_expect((t), 1)
47378+# define EF_VI_UNLIKELY(t) __builtin_expect((t), 0)
47379+# endif
47380+
47381+# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
47382+# define ef_vi_wiob() __asm__ __volatile__ ("sfence")
47383+# else
47384+# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
47385+# endif
47386+
47387+# else
47388+# error Old Intel compiler not supported.
47389+# endif
47390+
47391+#else
47392+# error Unknown compiler.
47393+#endif
47394+
47395+#endif
47396+
47397+
47398+# include <linux/errno.h>
47399+
47400+
47401+/**********************************************************************
47402+ * Extracting bit fields.
47403+ */
47404+
47405+#define _QWORD_GET_LOW(f, v) \
47406+ (((v).u32[0] >> (f##_LBN)) & ((1u << f##_WIDTH) - 1u))
47407+#define _QWORD_GET_HIGH(f, v) \
47408+ (((v).u32[1] >> (f##_LBN - 32u)) & ((1u << f##_WIDTH) - 1u))
47409+#define _QWORD_GET_ANY(f, v) \
47410+ (((v).u64[0] >> f##_LBN) & (((uint64_t) 1u << f##_WIDTH) - 1u))
47411+
47412+#define QWORD_GET(f, v) \
47413+ ((f##_LBN + f##_WIDTH) <= 32u \
47414+ ? _QWORD_GET_LOW(f, (v)) \
47415+ : ((f##_LBN >= 32u) ? _QWORD_GET_HIGH(f, (v)) : _QWORD_GET_ANY(f, (v))))
47416+
47417+#define QWORD_GET_U(f, v) ((unsigned) QWORD_GET(f, (v)))
47418+
47419+#define _QWORD_TEST_BIT_LOW(f, v) ((v).u32[0] & (1u << (f##_LBN)))
47420+#define _QWORD_TEST_BIT_HIGH(f, v) ((v).u32[1] & (1u << (f##_LBN - 32u)))
47421+
47422+#define QWORD_TEST_BIT(f, v) \
47423+ (f##_LBN < 32 ? _QWORD_TEST_BIT_LOW(f, (v)) : _QWORD_TEST_BIT_HIGH(f, (v)))
47424+
47425+
47426+
47427+
47428+#ifndef DECLSPEC_NORETURN
47429+/* normally defined on Windows to expand to a declaration that the
47430+ function will not return */
47431+# define DECLSPEC_NORETURN
47432+#endif
47433+
47434+#endif /* __CI_CIUL_SYSDEP_LINUX_H__ */
47435Index: head-2008-11-25/drivers/xen/sfc_netfront/vi_init.c
47436===================================================================
47437--- /dev/null 1970-01-01 00:00:00.000000000 +0000
47438+++ head-2008-11-25/drivers/xen/sfc_netfront/vi_init.c 2008-02-20 09:32:49.000000000 +0100
47439@@ -0,0 +1,183 @@
47440+/****************************************************************************
47441+ * Copyright 2002-2005: Level 5 Networks Inc.
47442+ * Copyright 2005-2008: Solarflare Communications Inc,
47443+ * 9501 Jeronimo Road, Suite 250,
47444+ * Irvine, CA 92618, USA
47445+ *
47446+ * Maintained by Solarflare Communications
47447+ * <linux-xen-drivers@solarflare.com>
47448+ * <onload-dev@solarflare.com>
47449+ *
47450+ * This program is free software; you can redistribute it and/or modify it
47451+ * under the terms of the GNU General Public License version 2 as published
47452+ * by the Free Software Foundation, incorporated herein by reference.
47453+ *
47454+ * This program is distributed in the hope that it will be useful,
47455+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
47456+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47457+ * GNU General Public License for more details.
47458+ *
47459+ * You should have received a copy of the GNU General Public License
47460+ * along with this program; if not, write to the Free Software
47461+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47462+ ****************************************************************************
47463+ */
47464+
47465+/*
47466+ * \author djr
47467+ * \brief Initialisation of VIs.
47468+ * \date 2007/06/08
47469+ */
47470+
47471+#include "ef_vi_internal.h"
47472+
47473+#define EF_VI_STATE_BYTES(rxq_sz, txq_sz) \
47474+ (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint16_t) \
47475+ + (txq_sz) * sizeof(uint16_t))
47476+
47477+int ef_vi_calc_state_bytes(int rxq_sz, int txq_sz)
47478+{
47479+ ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
47480+ ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
47481+
47482+ return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
47483+}
47484+
47485+
47486+int ef_vi_state_bytes(ef_vi* vi)
47487+{
47488+ int rxq_sz = 0, txq_sz = 0;
47489+ if( ef_vi_receive_capacity(vi) )
47490+ rxq_sz = ef_vi_receive_capacity(vi) + 1;
47491+ if( ef_vi_transmit_capacity(vi) )
47492+ txq_sz = ef_vi_transmit_capacity(vi) + 1;
47493+
47494+ ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
47495+ ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
47496+
47497+ return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
47498+}
47499+
47500+
47501+void ef_eventq_state_init(ef_vi* evq)
47502+{
47503+ int j;
47504+
47505+ for (j = 0; j<EFAB_DMAQS_PER_EVQ_MAX; j++) {
47506+ ef_rx_dup_state_t *rx_dup_state =
47507+ &evq->evq_state->rx_dup_state[j];
47508+ rx_dup_state->bad_sop = 0;
47509+ rx_dup_state->rx_last_desc_ptr = -1;
47510+ rx_dup_state->frag_num = 0;
47511+ }
47512+
47513+ evq->evq_state->evq_ptr = 0;
47514+}
47515+
47516+
47517+void ef_vi_state_init(ef_vi* vi)
47518+{
47519+ ef_vi_state* state = vi->ep_state;
47520+ unsigned i;
47521+
47522+ state->txq.added = state->txq.removed = 0;
47523+ state->rxq.added = state->rxq.removed = 0;
47524+
47525+ if( vi->vi_rxq.mask )
47526+ for( i = 0; i <= vi->vi_rxq.mask; ++i )
47527+ vi->vi_rxq.ids[i] = (uint16_t) -1;
47528+ if( vi->vi_txq.mask )
47529+ for( i = 0; i <= vi->vi_txq.mask; ++i )
47530+ vi->vi_txq.ids[i] = (uint16_t) -1;
47531+}
47532+
47533+
47534+void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type nic_type,
47535+ int instance, unsigned evq_bytes, void* base,
47536+ void* timer_reg)
47537+{
47538+ struct vi_mappings* vm = (struct vi_mappings*) data_area;
47539+
47540+ vm->signature = VI_MAPPING_SIGNATURE;
47541+ vm->vi_instance = instance;
47542+ vm->nic_type = nic_type;
47543+ vm->evq_bytes = evq_bytes;
47544+ vm->evq_base = base;
47545+ vm->evq_timer_reg = timer_reg;
47546+}
47547+
47548+
47549+void ef_vi_init(ef_vi* vi, void* vvis, ef_vi_state* state,
47550+ ef_eventq_state* evq_state, enum ef_vi_flags vi_flags)
47551+{
47552+ struct vi_mappings* vm = (struct vi_mappings*) vvis;
47553+
47554+ vi->vi_i = vm->vi_instance;
47555+ vi->ep_state = state;
47556+ vi->vi_flags = vi_flags;
47557+
47558+ switch( vm->nic_type.arch ) {
47559+ case EF_VI_ARCH_FALCON:
47560+ falcon_vi_init(vi, vvis);
47561+ break;
47562+ default:
47563+ /* ?? TODO: We should return an error code. */
47564+ ef_assert(0);
47565+ break;
47566+ }
47567+
47568+ if( vm->evq_bytes ) {
47569+ vi->evq_state = evq_state;
47570+ vi->evq_mask = vm->evq_bytes - 1u;
47571+ vi->evq_base = vm->evq_base;
47572+ vi->evq_timer_reg = vm->evq_timer_reg;
47573+ }
47574+
47575+ EF_VI_MAGIC_SET(vi, EF_VI);
47576+}
47577+
47578+
47579+/* Initialise [data_area] with information required to initialise an ef_vi.
47580+ * In the following, an unused param should be set to NULL. Note the case
47581+ * marked (*) of [iobuf_mmap] for falcon/driver; for the normal driver this
47582+ * must be NULL.
47583+ *
47584+ * \param data_area [in,out] required, must ref at least VI_MAPPING_SIZE
47585+ * bytes
47586+ * \param io_mmap [in] ef1, required
47587+ * falcon, required
47588+ * \param iobuf_mmap [in] ef1, unused
47589+ * falcon, required
47590+ */
47591+void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type nic_type,
47592+ unsigned rxq_capacity, unsigned txq_capacity,
47593+ int instance, void* io_mmap,
47594+ void* iobuf_mmap_rx, void* iobuf_mmap_tx,
47595+ enum ef_vi_flags vi_flags)
47596+{
47597+ struct vi_mappings* vm = (struct vi_mappings*) data_area;
47598+ int rx_desc_bytes, rxq_bytes;
47599+
47600+ ef_assert(rxq_capacity > 0 || txq_capacity > 0);
47601+ ef_assert(vm);
47602+ ef_assert(io_mmap);
47603+ ef_assert(iobuf_mmap_rx || iobuf_mmap_tx);
47604+
47605+ vm->signature = VI_MAPPING_SIGNATURE;
47606+ vm->vi_instance = instance;
47607+ vm->nic_type = nic_type;
47608+
47609+ rx_desc_bytes = (vi_flags & EF_VI_RX_PHYS_ADDR) ? 8 : 4;
47610+ rxq_bytes = rxq_capacity * rx_desc_bytes;
47611+ rxq_bytes = (rxq_bytes + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
47612+
47613+ if( iobuf_mmap_rx == iobuf_mmap_tx )
47614+ iobuf_mmap_tx = (char*) iobuf_mmap_rx + rxq_bytes;
47615+
47616+ vm->rx_queue_capacity = rxq_capacity;
47617+ vm->rx_dma_falcon = iobuf_mmap_rx;
47618+ vm->rx_bell = (char*) io_mmap + (RX_DESC_UPD_REG_KER_OFST & 4095);
47619+ vm->tx_queue_capacity = txq_capacity;
47620+ vm->tx_dma_falcon = iobuf_mmap_tx;
47621+ vm->tx_bell = (char*) io_mmap + (TX_DESC_UPD_REG_KER_OFST & 4095);
47622+}
47623Index: head-2008-11-25/drivers/xen/sfc_netutil/Makefile
47624===================================================================
47625--- /dev/null 1970-01-01 00:00:00.000000000 +0000
47626+++ head-2008-11-25/drivers/xen/sfc_netutil/Makefile 2008-02-26 10:54:12.000000000 +0100
47627@@ -0,0 +1,11 @@
47628+EXTRA_CFLAGS += -Idrivers/xen/sfc_netutil
47629+EXTRA_CFLAGS += -Werror
47630+
47631+ifdef GGOV
47632+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
47633+endif
47634+
47635+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) := sfc_netutil.o
47636+
47637+sfc_netutil-objs := accel_cuckoo_hash.o accel_msg_iface.o accel_util.o
47638+
47639Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c
47640===================================================================
47641--- /dev/null 1970-01-01 00:00:00.000000000 +0000
47642+++ head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2008-02-20 09:32:49.000000000 +0100
47643@@ -0,0 +1,651 @@
47644+/****************************************************************************
47645+ * Solarflare driver for Xen network acceleration
47646+ *
47647+ * Copyright 2006-2008: Solarflare Communications Inc,
47648+ * 9501 Jeronimo Road, Suite 250,
47649+ * Irvine, CA 92618, USA
47650+ *
47651+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
47652+ *
47653+ * This program is free software; you can redistribute it and/or modify it
47654+ * under the terms of the GNU General Public License version 2 as published
47655+ * by the Free Software Foundation, incorporated herein by reference.
47656+ *
47657+ * This program is distributed in the hope that it will be useful,
47658+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
47659+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47660+ * GNU General Public License for more details.
47661+ *
47662+ * You should have received a copy of the GNU General Public License
47663+ * along with this program; if not, write to the Free Software
47664+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47665+ ****************************************************************************
47666+ */
47667+
47668+#include <linux/types.h> /* needed for linux/random.h */
47669+#include <linux/random.h>
47670+
47671+#include "accel_cuckoo_hash.h"
47672+#include "accel_util.h"
47673+
47674+static inline int cuckoo_hash_key_compare(cuckoo_hash_table *hashtab,
47675+ cuckoo_hash_key *key1,
47676+ cuckoo_hash_key *key2)
47677+{
47678+ return !memcmp(key1, key2, hashtab->key_length);
47679+}
47680+
47681+
47682+static inline void cuckoo_hash_key_set(cuckoo_hash_key *key1,
47683+ cuckoo_hash_key *key2)
47684+{
47685+ *key1 = *key2;
47686+}
47687+
47688+
47689+/*
47690+ * Sets hash function parameters. Chooses "a" to be odd, 0 < a < 2^w
47691+ * where w is the length of the key
47692+ */
47693+static void set_hash_parameters(cuckoo_hash_table *hashtab)
47694+{
47695+ again:
47696+ hashtab->a0 = hashtab->a1 = 0;
47697+
47698+ /* Make sure random */
47699+ get_random_bytes(&hashtab->a0, hashtab->key_length);
47700+ get_random_bytes(&hashtab->a1, hashtab->key_length);
47701+
47702+ /* Make sure odd */
47703+ hashtab->a0 |= 1;
47704+ hashtab->a1 |= 1;
47705+
47706+ /* Being different is good */
47707+ if (hashtab->a0 != hashtab->a1)
47708+ return;
47709+
47710+ goto again;
47711+}
47712+
47713+int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
47714+ unsigned key_length)
47715+{
47716+ char *table_mem;
47717+ unsigned length = 1 << length_bits;
47718+
47719+ BUG_ON(length_bits >= sizeof(unsigned) * 8);
47720+ BUG_ON(key_length > sizeof(cuckoo_hash_key));
47721+
47722+ table_mem = kmalloc(sizeof(cuckoo_hash_entry) * 2 * length, GFP_KERNEL);
47723+
47724+ if (table_mem == NULL)
47725+ return -ENOMEM;
47726+
47727+ hashtab->length = length;
47728+ hashtab->length_bits = length_bits;
47729+ hashtab->key_length = key_length;
47730+ hashtab->entries = 0;
47731+
47732+ hashtab->table0 = (cuckoo_hash_entry *)table_mem;
47733+ hashtab->table1 = (cuckoo_hash_entry *)
47734+ (table_mem + length * sizeof(cuckoo_hash_entry));
47735+
47736+ set_hash_parameters(hashtab);
47737+
47738+ /* Zero the table */
47739+ memset(hashtab->table0, 0, length * 2 * sizeof(cuckoo_hash_entry));
47740+
47741+ return 0;
47742+}
47743+EXPORT_SYMBOL_GPL(cuckoo_hash_init);
47744+
47745+void cuckoo_hash_destroy(cuckoo_hash_table *hashtab)
47746+{
47747+ if (hashtab->table0 != NULL)
47748+ kfree(hashtab->table0);
47749+}
47750+
47751+EXPORT_SYMBOL_GPL(cuckoo_hash_destroy);
47752+
47753+/*
47754+ * This computes sizeof(cuckoo_hash) bits of hash, not all will be
47755+ * necessarily used, but the hash function throws away any that
47756+ * aren't
47757+ */
47758+static inline void cuckoo_compute_hash_helper(cuckoo_hash_table *hashtab,
47759+ cuckoo_hash_key *a,
47760+ cuckoo_hash_key *x,
47761+ cuckoo_hash *result)
47762+{
47763+ u64 multiply_result = 0, a_temp, x_temp;
47764+ u32 carry = 0;
47765+ u32 *a_words;
47766+ u32 *x_words;
47767+ int i;
47768+
47769+ /*
47770+ * As the mod and div operations in the function effectively
47771+ * reduce and shift the bits of the product down to just the
47772+ * third word, we need only compute that and return it as a
47773+ * result.
47774+ *
47775+ * Do enough long multiplication to get the word we need
47776+ */
47777+
47778+ /* This assumes things about the sizes of the key and hash */
47779+ BUG_ON(hashtab->key_length % sizeof(u32) != 0);
47780+ BUG_ON(sizeof(cuckoo_hash) != sizeof(u32));
47781+
47782+ a_words = (u32 *)a;
47783+ x_words = (u32 *)x;
47784+
47785+ for (i = 0; i < hashtab->key_length / sizeof(u32); i++) {
47786+ a_temp = a_words[i];
47787+ x_temp = x_words[i];
47788+
47789+ multiply_result = (a_temp * x_temp) + carry;
47790+ carry = (multiply_result >> 32) & 0xffffffff;
47791+ }
47792+
47793+ *result = multiply_result & 0xffffffff;
47794+}
47795+
47796+
47797+/*
47798+ * Want to implement (ax mod 2^w) div 2^(w-q) for odd a, 0 < a < 2^w;
47799+ * w is the length of the key, q is the length of the hash, I think.
47800+ * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
47801+ */
47802+static cuckoo_hash cuckoo_compute_hash(cuckoo_hash_table *hashtab,
47803+ cuckoo_hash_key *key,
47804+ cuckoo_hash_key *a)
47805+{
47806+ unsigned q = hashtab->length_bits;
47807+ unsigned shift = 32 - q;
47808+ unsigned mask = ((1 << q) - 1) << shift;
47809+ cuckoo_hash hash;
47810+
47811+ cuckoo_compute_hash_helper(hashtab, a, key, &hash);
47812+
47813+ /*
47814+ * Take the top few bits to get the right length for this
47815+ * hash table
47816+ */
47817+ hash = (hash & mask) >> shift;
47818+
47819+ BUG_ON(hash >= hashtab->length);
47820+
47821+ return hash;
47822+}
47823+
47824+
47825+static int cuckoo_hash_lookup0(cuckoo_hash_table *hashtab,
47826+ cuckoo_hash_key *key,
47827+ cuckoo_hash_value *value)
47828+{
47829+ cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
47830+
47831+ if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
47832+ && cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
47833+ key)) {
47834+ *value = hashtab->table0[hash].value;
47835+ return 1;
47836+ }
47837+
47838+ return 0;
47839+}
47840+
47841+static int cuckoo_hash_lookup1(cuckoo_hash_table *hashtab,
47842+ cuckoo_hash_key *key,
47843+ cuckoo_hash_value *value)
47844+{
47845+ cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
47846+
47847+ if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
47848+ && cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
47849+ key)) {
47850+ *value = hashtab->table1[hash].value;
47851+ return 1;
47852+ }
47853+
47854+ return 0;
47855+}
47856+
47857+
47858+int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
47859+ cuckoo_hash_value *value)
47860+{
47861+ return cuckoo_hash_lookup0(hashtab, key, value)
47862+ || cuckoo_hash_lookup1(hashtab, key, value);
47863+}
47864+EXPORT_SYMBOL_GPL(cuckoo_hash_lookup);
47865+
47866+
47867+/* Transfer any active entries from "old_table" into hashtab */
47868+static int cuckoo_hash_transfer_entries(cuckoo_hash_table *hashtab,
47869+ cuckoo_hash_entry *old_table,
47870+ unsigned capacity)
47871+{
47872+ int i, rc;
47873+ cuckoo_hash_entry *entry;
47874+
47875+ hashtab->entries = 0;
47876+
47877+ for (i = 0; i < capacity; i++) {
47878+ entry = &old_table[i];
47879+ if (entry->state == CUCKOO_HASH_STATE_OCCUPIED) {
47880+ rc = cuckoo_hash_add(hashtab, &(entry->key),
47881+ entry->value, 0);
47882+ if (rc != 0) {
47883+ return rc;
47884+ }
47885+ }
47886+ }
47887+
47888+ return 0;
47889+}
47890+
47891+
47892+int cuckoo_hash_rehash(cuckoo_hash_table *hashtab)
47893+{
47894+ cuckoo_hash_entry *new_table;
47895+ cuckoo_hash_table old_hashtab;
47896+ int resize = 0, rc, rehash_count;
47897+
47898+ /*
47899+ * Store old tables so we can access the existing values and
47900+ * copy across
47901+ */
47902+ memcpy(&old_hashtab, hashtab, sizeof(cuckoo_hash_table));
47903+
47904+ /* resize if hashtable is more than half full */
47905+ if (old_hashtab.entries > old_hashtab.length &&
47906+ old_hashtab.length_bits < 32)
47907+ resize = 1;
47908+
47909+ resize:
47910+ if (resize) {
47911+ new_table = kmalloc(sizeof(cuckoo_hash_entry) * 4 * hashtab->length,
47912+ GFP_ATOMIC);
47913+ if (new_table == NULL) {
47914+ rc = -ENOMEM;
47915+ goto err;
47916+ }
47917+
47918+ hashtab->length = 2 * hashtab->length;
47919+ hashtab->length_bits++;
47920+ } else {
47921+ new_table = kmalloc(sizeof(cuckoo_hash_entry) * 2 * hashtab->length,
47922+ GFP_ATOMIC);
47923+ if (new_table == NULL) {
47924+ rc = -ENOMEM;
47925+ goto err;
47926+ }
47927+ }
47928+
47929+ /*
47930+ * Point hashtab to new memory region so we can try to
47931+ * construct new table
47932+ */
47933+ hashtab->table0 = new_table;
47934+ hashtab->table1 = (cuckoo_hash_entry *)
47935+ ((char *)new_table + hashtab->length * sizeof(cuckoo_hash_entry));
47936+
47937+ rehash_count = 0;
47938+
47939+ again:
47940+ /* Zero the new tables */
47941+ memset(new_table, 0, hashtab->length * 2 * sizeof(cuckoo_hash_entry));
47942+
47943+ /* Choose new parameters for the hash functions */
47944+ set_hash_parameters(hashtab);
47945+
47946+ /*
47947+ * Multiply old_table_length by 2 as the length refers to each
47948+ * table, and there are two of them. This assumes that they
47949+ * are arranged sequentially in memory, so assert it
47950+ */
47951+ BUG_ON(((char *)old_hashtab.table1) !=
47952+ ((char *)old_hashtab.table0 + old_hashtab.length
47953+ * sizeof(cuckoo_hash_entry)));
47954+ rc = cuckoo_hash_transfer_entries(hashtab, old_hashtab.table0,
47955+ old_hashtab.length * 2);
47956+ if (rc < 0) {
47957+ /* Problem */
47958+ if (rc == -ENOSPC) {
47959+ ++rehash_count;
47960+ if (rehash_count < CUCKOO_HASH_MAX_LOOP) {
47961+ /*
47962+ * Wanted to rehash, but rather than
47963+ * recurse we can just do it here
47964+ */
47965+ goto again;
47966+ } else {
47967+ /*
47968+ * Didn't manage to rehash, so let's
47969+ * go up a size (if we haven't already
47970+ * and there's space)
47971+ */
47972+ if (!resize && hashtab->length_bits < 32) {
47973+ resize = 1;
47974+ kfree(new_table);
47975+ goto resize;
47976+ }
47977+ else
47978+ goto err;
47979+ }
47980+ }
47981+ else
47982+ goto err;
47983+ }
47984+
47985+ /* Success, I think. Free up the old table */
47986+ kfree(old_hashtab.table0);
47987+
47988+ /* We should have put all the entries from old table in the new one */
47989+ BUG_ON(hashtab->entries != old_hashtab.entries);
47990+
47991+ return 0;
47992+ err:
47993+ EPRINTK("%s: Rehash failed, giving up\n", __FUNCTION__);
47994+ /* Some other error, give up, at least restore table to how it was */
47995+ memcpy(hashtab, &old_hashtab, sizeof(cuckoo_hash_table));
47996+ if (new_table)
47997+ kfree(new_table);
47998+ return rc;
47999+}
48000+EXPORT_SYMBOL_GPL(cuckoo_hash_rehash);
48001+
48002+
48003+static int
48004+cuckoo_hash_insert_or_displace(cuckoo_hash_entry *table, unsigned hash,
48005+ cuckoo_hash_key *key,
48006+ cuckoo_hash_value value,
48007+ cuckoo_hash_key *displaced_key,
48008+ cuckoo_hash_value *displaced_value)
48009+{
48010+ if (table[hash].state == CUCKOO_HASH_STATE_VACANT) {
48011+ cuckoo_hash_key_set(&(table[hash].key), key);
48012+ table[hash].value = value;
48013+ table[hash].state = CUCKOO_HASH_STATE_OCCUPIED;
48014+
48015+ return 1;
48016+ } else {
48017+ cuckoo_hash_key_set(displaced_key, &(table[hash].key));
48018+ *displaced_value = table[hash].value;
48019+ cuckoo_hash_key_set(&(table[hash].key), key);
48020+ table[hash].value = value;
48021+
48022+ return 0;
48023+ }
48024+}
48025+
48026+
48027+int cuckoo_hash_add(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
48028+ cuckoo_hash_value value, int can_rehash)
48029+{
48030+ cuckoo_hash hash0, hash1;
48031+ int i, rc;
48032+ cuckoo_hash_key key1, key2;
48033+
48034+ cuckoo_hash_key_set(&key1, key);
48035+
48036+ again:
48037+ i = 0;
48038+ do {
48039+ hash0 = cuckoo_compute_hash(hashtab, &key1, &hashtab->a0);
48040+ if (cuckoo_hash_insert_or_displace(hashtab->table0, hash0,
48041+ &key1, value, &key2,
48042+ &value)) {
48043+ /* Success */
48044+ hashtab->entries++;
48045+ return 0;
48046+ }
48047+
48048+ hash1 = cuckoo_compute_hash(hashtab, &key2, &hashtab->a1);
48049+ if (cuckoo_hash_insert_or_displace(hashtab->table1, hash1,
48050+ &key2, value, &key1,
48051+ &value)) {
48052+ /* Success */
48053+ hashtab->entries++;
48054+ return 0;
48055+ }
48056+ } while (++i < CUCKOO_HASH_MAX_LOOP);
48057+
48058+ if (can_rehash) {
48059+ if ((rc = cuckoo_hash_rehash(hashtab)) < 0) {
48060+ /*
48061+ * Give up - this will drop whichever
48062+ * key/value pair we have currently displaced
48063+ * on the floor
48064+ */
48065+ return rc;
48066+ }
48067+ goto again;
48068+ }
48069+
48070+ EPRINTK("%s: failed hash add\n", __FUNCTION__);
48071+ /*
48072+ * Couldn't do it - bad as we've now removed some random thing
48073+ * from the table, and will just drop it on the floor. Better
48074+ * would be to somehow revert the table to the state it was in
48075+ * at the start
48076+ */
48077+ return -ENOSPC;
48078+}
48079+EXPORT_SYMBOL_GPL(cuckoo_hash_add);
48080+
48081+
48082+int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
48083+ cuckoo_hash_key *key, cuckoo_hash_value value,
48084+ int can_rehash)
48085+{
48086+ int stored_value;
48087+
48088+ if (cuckoo_hash_lookup(hashtab, key, &stored_value))
48089+ return -EBUSY;
48090+
48091+ return cuckoo_hash_add(hashtab, key, value, can_rehash);
48092+}
48093+EXPORT_SYMBOL_GPL(cuckoo_hash_add_check);
48094+
48095+
48096+int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key)
48097+{
48098+ cuckoo_hash hash;
48099+
48100+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
48101+ if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48102+ cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
48103+ key)) {
48104+ hashtab->table0[hash].state = CUCKOO_HASH_STATE_VACANT;
48105+ hashtab->entries--;
48106+ return 0;
48107+ }
48108+
48109+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
48110+ if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48111+ cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
48112+ key)) {
48113+ hashtab->table1[hash].state = CUCKOO_HASH_STATE_VACANT;
48114+ hashtab->entries--;
48115+ return 0;
48116+ }
48117+
48118+ return -EINVAL;
48119+}
48120+EXPORT_SYMBOL_GPL(cuckoo_hash_remove);
48121+
48122+
48123+int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
48124+ cuckoo_hash_value value)
48125+{
48126+ cuckoo_hash hash;
48127+
48128+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
48129+ if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48130+ cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
48131+ key)) {
48132+ hashtab->table0[hash].value = value;
48133+ return 0;
48134+ }
48135+
48136+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
48137+ if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48138+ cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
48139+ key)) {
48140+ hashtab->table1[hash].value = value;
48141+ return 0;
48142+ }
48143+
48144+ return -EINVAL;
48145+}
48146+EXPORT_SYMBOL_GPL(cuckoo_hash_update);
48147+
48148+
48149+void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab)
48150+{
48151+ hashtab->iterate_index = 0;
48152+}
48153+EXPORT_SYMBOL_GPL(cuckoo_hash_iterate_reset);
48154+
48155+
48156+int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
48157+ cuckoo_hash_key *key, cuckoo_hash_value *value)
48158+{
48159+ unsigned index;
48160+
48161+ while (hashtab->iterate_index < hashtab->length) {
48162+ index = hashtab->iterate_index;
48163+ ++hashtab->iterate_index;
48164+ if (hashtab->table0[index].state == CUCKOO_HASH_STATE_OCCUPIED) {
48165+ *key = hashtab->table0[index].key;
48166+ *value = hashtab->table0[index].value;
48167+ return 0;
48168+ }
48169+ }
48170+
48171+ while (hashtab->iterate_index >= hashtab->length &&
48172+ hashtab->iterate_index < hashtab->length * 2) {
48173+ index = hashtab->iterate_index - hashtab->length;
48174+ ++hashtab->iterate_index;
48175+ if (hashtab->table1[index].state == CUCKOO_HASH_STATE_OCCUPIED) {
48176+ *key = hashtab->table1[index].key;
48177+ *value = hashtab->table1[index].value;
48178+ return 0;
48179+ }
48180+ }
48181+
48182+ return -ENOSPC;
48183+}
48184+EXPORT_SYMBOL_GPL(cuckoo_hash_iterate);
48185+
48186+
48187+#if 0
48188+void cuckoo_hash_valid(cuckoo_hash_table *hashtab)
48189+{
48190+ int i, entry_count = 0;
48191+
48192+ for (i=0; i < hashtab->length; i++) {
48193+ EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT &&
48194+ hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48195+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48196+ entry_count++;
48197+ EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT &&
48198+ hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48199+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48200+ entry_count++;
48201+ }
48202+
48203+ if (entry_count != hashtab->entries) {
48204+ EPRINTK("%s: bad count\n", __FUNCTION__);
48205+ cuckoo_hash_dump(hashtab);
48206+ return;
48207+ }
48208+
48209+ for (i=0; i< hashtab->length; i++) {
48210+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48211+ if (i != cuckoo_compute_hash(hashtab,
48212+ &hashtab->table0[i].key,
48213+ &hashtab->a0)) {
48214+ EPRINTK("%s: Bad key table 0 index %d\n",
48215+ __FUNCTION__, i);
48216+ cuckoo_hash_dump(hashtab);
48217+ return;
48218+ }
48219+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48220+ if (i != cuckoo_compute_hash(hashtab,
48221+ &hashtab->table1[i].key,
48222+ &hashtab->a1)) {
48223+ EPRINTK("%s: Bad key table 1 index %d\n",
48224+ __FUNCTION__, i);
48225+ cuckoo_hash_dump(hashtab);
48226+ return;
48227+ }
48228+ }
48229+
48230+}
48231+EXPORT_SYMBOL_GPL(cuckoo_hash_valid);
48232+
48233+
48234+void cuckoo_hash_dump(cuckoo_hash_table *hashtab)
48235+{
48236+ int i, entry_count;
48237+
48238+ entry_count = 0;
48239+ for (i=0; i < hashtab->length; i++) {
48240+ EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT &&
48241+ hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48242+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48243+ entry_count++;
48244+ EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT &&
48245+ hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48246+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48247+ entry_count++;
48248+ }
48249+
48250+ EPRINTK("======================\n");
48251+ EPRINTK("Cuckoo hash table dump\n");
48252+ EPRINTK("======================\n");
48253+ EPRINTK("length: %d; length_bits: %d; key_length: %d\n", hashtab->length,
48254+ hashtab->length_bits, hashtab->key_length);
48255+ EPRINTK("Recorded entries: %d\n", hashtab->entries);
48256+ EPRINTK("Counted entries: %d\n", entry_count);
48257+ EPRINTK("a0: %llx; a1: %llx\n", hashtab->a0, hashtab->a1);
48258+ EPRINTK("-----------------------------------------\n");
48259+ EPRINTK("Index Occupied Key Value Index0 Index1\n");
48260+ EPRINTK("-----------------------------------------\n");
48261+ for (i=0; i< hashtab->length; i++) {
48262+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48263+ EPRINTK("%d %d %llx %d %d %d\n", i,
48264+ hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED,
48265+ hashtab->table0[i].key, hashtab->table0[i].value,
48266+ cuckoo_compute_hash(hashtab, &hashtab->table0[i].key,
48267+ &hashtab->a0),
48268+ cuckoo_compute_hash(hashtab, &hashtab->table0[i].key,
48269+ &hashtab->a1));
48270+ else
48271+ EPRINTK("%d %d - - - -\n", i,
48272+ hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED);
48273+
48274+ }
48275+ EPRINTK("-----------------------------------------\n");
48276+ EPRINTK("Index Occupied Key Value Index0 Index1\n");
48277+ EPRINTK("-----------------------------------------\n");
48278+ for (i=0; i< hashtab->length; i++) {
48279+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48280+ EPRINTK("%d %d %llx %d %d %d\n", i,
48281+ hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED,
48282+ hashtab->table1[i].key, hashtab->table1[i].value,
48283+ cuckoo_compute_hash(hashtab, &hashtab->table1[i].key,
48284+ &hashtab->a0),
48285+ cuckoo_compute_hash(hashtab, &hashtab->table1[i].key,
48286+ &hashtab->a1));
48287+ else
48288+ EPRINTK("%d %d - - - -\n", i,
48289+ hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED);
48290+ }
48291+ EPRINTK("======================\n");
48292+}
48293+EXPORT_SYMBOL_GPL(cuckoo_hash_dump);
48294+#endif
48295Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.h
48296===================================================================
48297--- /dev/null 1970-01-01 00:00:00.000000000 +0000
48298+++ head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.h 2008-02-20 09:32:49.000000000 +0100
48299@@ -0,0 +1,227 @@
48300+/****************************************************************************
48301+ * Solarflare driver for Xen network acceleration
48302+ *
48303+ * Copyright 2006-2008: Solarflare Communications Inc,
48304+ * 9501 Jeronimo Road, Suite 250,
48305+ * Irvine, CA 92618, USA
48306+ *
48307+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
48308+ *
48309+ * This program is free software; you can redistribute it and/or modify it
48310+ * under the terms of the GNU General Public License version 2 as published
48311+ * by the Free Software Foundation, incorporated herein by reference.
48312+ *
48313+ * This program is distributed in the hope that it will be useful,
48314+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
48315+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48316+ * GNU General Public License for more details.
48317+ *
48318+ * You should have received a copy of the GNU General Public License
48319+ * along with this program; if not, write to the Free Software
48320+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
48321+ ****************************************************************************
48322+ */
48323+
48324+/*
48325+ * A cuckoo hash table consists of two sub tables. Each entry can
48326+ * hash to a position in each table. If, on entry, its position is
48327+ * found to be occupied, the existing element is moved to it's other
48328+ * location. This recurses until success or a loop is found. If a
48329+ * loop is found the table is rehashed.
48330+ *
48331+ * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
48332+ */
48333+
48334+#ifndef NET_ACCEL_CUCKOO_HASH_H
48335+#define NET_ACCEL_CUCKOO_HASH_H
48336+
48337+/*! Type used for hash table keys of ip pairs */
48338+typedef struct {
48339+ u32 local_ip;
48340+ //u32 remote_ip;
48341+ u16 local_port;
48342+ //u16 remote_port;
48343+ /* Technically only 1 bit, but use 16 to make key a round
48344+ number size */
48345+ u16 proto;
48346+} cuckoo_hash_ip_key;
48347+
48348+/*! Type used for hash table keys of mac addresses */
48349+typedef u64 cuckoo_hash_mac_key;
48350+
48351+/*! This type is designed to be large enough to hold all supported key
48352+ * sizes to avoid having to malloc storage for them.
48353+ */
48354+typedef u64 cuckoo_hash_key;
48355+
48356+/*! Type used for the values stored in the hash table */
48357+typedef int cuckoo_hash_value;
48358+
48359+/*! Type used for the hash used to index the table */
48360+typedef u32 cuckoo_hash;
48361+
48362+/*! How long to spend displacing values when adding before giving up
48363+ * and rehashing */
48364+#define CUCKOO_HASH_MAX_LOOP (hashtab->length)
48365+
48366+/*! State of hash table entry */
48367+typedef enum {
48368+ CUCKOO_HASH_STATE_VACANT = 0,
48369+ CUCKOO_HASH_STATE_OCCUPIED
48370+} cuckoo_hash_state;
48371+
48372+/*! An entry in the hash table */
48373+typedef struct {
48374+ cuckoo_hash_state state;
48375+ cuckoo_hash_key key;
48376+ cuckoo_hash_value value;
48377+} cuckoo_hash_entry;
48378+
48379+/*! A cuckoo hash table */
48380+typedef struct {
48381+ /*! The length of each table (NB. there are two tables of this
48382+ * length) */
48383+ unsigned length;
48384+ /*! The length of each table in bits */
48385+ unsigned length_bits;
48386+ /*! The length of the key in bytes */
48387+ unsigned key_length;
48388+ /*! The number of entries currently stored in the table */
48389+ unsigned entries;
48390+ /*! Index into table used by cuckoo_hash_iterate */
48391+ unsigned iterate_index;
48392+
48393+ /* parameter of hash functions */
48394+ /*! The "a" parameter of the first hash function */
48395+ cuckoo_hash_key a0;
48396+ /*! The "a" parameter of the second hash function */
48397+ cuckoo_hash_key a1;
48398+
48399+ /*! The first table */
48400+ cuckoo_hash_entry *table0;
48401+ /*! The second table */
48402+ cuckoo_hash_entry *table1;
48403+} cuckoo_hash_table;
48404+
48405+/*! Initialise the cuckoo has table
48406+ *
48407+ * \param hashtab A pointer to an unitialised hash table structure
48408+ * \param length_bits The number of elements in each table equals
48409+ * 2**length_bits
48410+ * \param key_length The length of the key in bytes
48411+ *
48412+ * \return 0 on success, -ENOMEM if it couldn't allocate the tables
48413+ */
48414+extern
48415+int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
48416+ unsigned key_length);
48417+
48418+
48419+/*! Destroy a hash table
48420+ *
48421+ * \param hashtab A hash table that has previously been passed to a
48422+ * successful call of cuckoo_hash_init()
48423+ */
48424+extern
48425+void cuckoo_hash_destroy(cuckoo_hash_table *hashtab);
48426+
48427+
48428+/*! Lookup an entry in the hash table
48429+ *
48430+ * \param hashtab The hash table in which to look.
48431+ * \param key Pointer to a mac address to use as the key
48432+ * \param value On exit set to the value stored if key was present
48433+ *
48434+ * \return 0 if not present in the table, non-zero if it is (and value
48435+ * is set accordingly)
48436+ */
48437+extern
48438+int cuckoo_hash_lookup(cuckoo_hash_table *hashtab,
48439+ cuckoo_hash_key *key,
48440+ cuckoo_hash_value *value);
48441+
48442+/*! Add an entry to the hash table. Key must not be a duplicate of
48443+ * anything already in the table. If this is a risk, see
48444+ * cuckoo_hash_add_check
48445+ *
48446+ * \param hashtab The hash table to add the entry to
48447+ * \param key Pointer to a mac address to use as a key
48448+ * \param value The value to store
48449+ * \param can_rehash Flag to allow the add function to rehash the
48450+ * table if necessary
48451+ *
48452+ * \return 0 on success, non-zero on failure. -ENOSPC means it just
48453+ * couldn't find anywhere to put it - this is bad and probably means
48454+ * an entry has been dropped on the floor (but the entry you just
48455+ * tried to add may now be included)
48456+ */
48457+extern
48458+int cuckoo_hash_add(cuckoo_hash_table *hashtab,
48459+ cuckoo_hash_key *key,
48460+ cuckoo_hash_value value,
48461+ int can_rehash);
48462+
48463+/*! Same as cuckoo_hash_add but first checks to ensure entry is not
48464+ * already there
48465+ * \return -EBUSY if already there
48466+ */
48467+
48468+extern
48469+int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
48470+ cuckoo_hash_key *key,
48471+ cuckoo_hash_value value,
48472+ int can_rehash);
48473+/*! Remove an entry from the table
48474+ *
48475+ * \param hashtab The hash table to remove the entry from
48476+ * \param key The key that was used to previously add the entry
48477+ *
48478+ * \return 0 on success, -EINVAL if the entry couldn't be found
48479+ */
48480+extern
48481+int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key);
48482+
48483+
48484+/*! Helper for those using mac addresses to convert to a key for the
48485+ * hash table
48486+ */
48487+static inline cuckoo_hash_mac_key cuckoo_mac_to_key(const u8 *mac)
48488+{
48489+ return (cuckoo_hash_mac_key)(mac[0])
48490+ | (cuckoo_hash_mac_key)(mac[1]) << 8
48491+ | (cuckoo_hash_mac_key)(mac[2]) << 16
48492+ | (cuckoo_hash_mac_key)(mac[3]) << 24
48493+ | (cuckoo_hash_mac_key)(mac[4]) << 32
48494+ | (cuckoo_hash_mac_key)(mac[5]) << 40;
48495+}
48496+
48497+
48498+/*! Update an entry already in the hash table to take a new value
48499+ *
48500+ * \param hashtab The hash table to add the entry to
48501+ * \param key Pointer to a mac address to use as a key
48502+ * \param value The value to store
48503+ *
48504+ * \return 0 on success, non-zero on failure.
48505+ */
48506+int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
48507+ cuckoo_hash_value value);
48508+
48509+
48510+/*! Go through the hash table and return all used entries (one per call)
48511+ *
48512+ * \param hashtab The hash table to iterate over
48513+ * \param key Pointer to a key to take the returned key
48514+ * \param value Pointer to a value to take the returned value
48515+ *
48516+ * \return 0 on success (key, value set), non-zero on failure.
48517+ */
48518+int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
48519+ cuckoo_hash_key *key, cuckoo_hash_value *value);
48520+void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab);
48521+
48522+/* debug, not compiled by default */
48523+void cuckoo_hash_valid(cuckoo_hash_table *hashtab);
48524+void cuckoo_hash_dump(cuckoo_hash_table *hashtab);
48525+
48526+#endif /* NET_ACCEL_CUCKOO_HASH_H */
48527Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.c
48528===================================================================
48529--- /dev/null 1970-01-01 00:00:00.000000000 +0000
48530+++ head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.c 2008-02-20 09:32:49.000000000 +0100
48531@@ -0,0 +1,301 @@
48532+/****************************************************************************
48533+ * Solarflare driver for Xen network acceleration
48534+ *
48535+ * Copyright 2006-2008: Solarflare Communications Inc,
48536+ * 9501 Jeronimo Road, Suite 250,
48537+ * Irvine, CA 92618, USA
48538+ *
48539+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
48540+ *
48541+ * This program is free software; you can redistribute it and/or modify it
48542+ * under the terms of the GNU General Public License version 2 as published
48543+ * by the Free Software Foundation, incorporated herein by reference.
48544+ *
48545+ * This program is distributed in the hope that it will be useful,
48546+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
48547+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48548+ * GNU General Public License for more details.
48549+ *
48550+ * You should have received a copy of the GNU General Public License
48551+ * along with this program; if not, write to the Free Software
48552+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
48553+ ****************************************************************************
48554+ */
48555+
48556+#include <xen/evtchn.h>
48557+
48558+#include "accel_util.h"
48559+#include "accel_msg_iface.h"
48560+
48561+#define NET_ACCEL_MSG_Q_SIZE (1024)
48562+#define NET_ACCEL_MSG_Q_MASK (NET_ACCEL_MSG_Q_SIZE - 1)
48563+
48564+#ifdef NDEBUG
48565+#define NET_ACCEL_CHECK_MAGIC(_p, _errval)
48566+#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id)
48567+#else
48568+#define NET_ACCEL_CHECK_MAGIC(_p, _errval) \
48569+ if (_p->magic != NET_ACCEL_MSG_MAGIC) { \
48570+ printk(KERN_ERR "%s: passed invalid shared page %p!\n", \
48571+ __FUNCTION__, _p); \
48572+ return _errval; \
48573+ }
48574+#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id) \
48575+ printk(_t ": queue %d write %x read %x base %x limit %x\n", \
48576+ _id, _q->write, _q->read, _q->base, _q->limit);
48577+#endif
48578+
48579+/*
48580+ * We've been passed at least 2 pages. 1 control page and 1 or more
48581+ * data pages.
48582+ */
48583+int net_accel_msg_init_page(void *mem, int len, int up)
48584+{
48585+ struct net_accel_shared_page *shared_page =
48586+ (struct net_accel_shared_page*)mem;
48587+
48588+ if ((unsigned long)shared_page & NET_ACCEL_MSG_Q_MASK)
48589+ return -EINVAL;
48590+
48591+ shared_page->magic = NET_ACCEL_MSG_MAGIC;
48592+
48593+ shared_page->aflags = 0;
48594+
48595+ shared_page->net_dev_up = up;
48596+
48597+ return 0;
48598+}
48599+EXPORT_SYMBOL_GPL(net_accel_msg_init_page);
48600+
48601+
48602+void net_accel_msg_init_queue(sh_msg_fifo2 *queue,
48603+ struct net_accel_msg_queue *indices,
48604+ struct net_accel_msg *base, int size)
48605+{
48606+ queue->fifo = base;
48607+ spin_lock_init(&queue->lock);
48608+ sh_fifo2_init(queue, size-1, &indices->read, &indices->write);
48609+}
48610+EXPORT_SYMBOL_GPL(net_accel_msg_init_queue);
48611+
48612+
48613+static inline int _net_accel_msg_send(struct net_accel_shared_page *sp,
48614+ sh_msg_fifo2 *queue,
48615+ struct net_accel_msg *msg,
48616+ int is_reply)
48617+{
48618+ int rc = 0;
48619+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48620+ rmb();
48621+ if (is_reply) {
48622+ EPRINTK_ON(sh_fifo2_is_full(queue));
48623+ sh_fifo2_put(queue, *msg);
48624+ } else {
48625+ if (sh_fifo2_not_half_full(queue)) {
48626+ sh_fifo2_put(queue, *msg);
48627+ } else {
48628+ rc = -ENOSPC;
48629+ }
48630+ }
48631+ wmb();
48632+ return rc;
48633+}
48634+
48635+/* Notify after a batch of messages have been sent */
48636+void net_accel_msg_notify(int irq)
48637+{
48638+ notify_remote_via_irq(irq);
48639+}
48640+EXPORT_SYMBOL_GPL(net_accel_msg_notify);
48641+
48642+/*
48643+ * Send a message on the specified FIFO. Returns 0 on success, -errno
48644+ * on failure. The message in msg is copied to the current slot of the
48645+ * FIFO.
48646+ */
48647+int net_accel_msg_send(struct net_accel_shared_page *sp, sh_msg_fifo2 *q,
48648+ struct net_accel_msg *msg)
48649+{
48650+ unsigned long flags;
48651+ int rc;
48652+ net_accel_msg_lock_queue(q, &flags);
48653+ rc = _net_accel_msg_send(sp, q, msg, 0);
48654+ net_accel_msg_unlock_queue(q, &flags);
48655+ return rc;
48656+}
48657+EXPORT_SYMBOL_GPL(net_accel_msg_send);
48658+
48659+
48660+/* As net_accel_msg_send but also posts a notification to the far end. */
48661+int net_accel_msg_send_notify(struct net_accel_shared_page *sp, int irq,
48662+ sh_msg_fifo2 *q, struct net_accel_msg *msg)
48663+{
48664+ unsigned long flags;
48665+ int rc;
48666+ net_accel_msg_lock_queue(q, &flags);
48667+ rc = _net_accel_msg_send(sp, q, msg, 0);
48668+ net_accel_msg_unlock_queue(q, &flags);
48669+ if (rc >= 0)
48670+ notify_remote_via_irq(irq);
48671+ return rc;
48672+}
48673+EXPORT_SYMBOL_GPL(net_accel_msg_send_notify);
48674+
48675+
48676+int net_accel_msg_reply(struct net_accel_shared_page *sp, sh_msg_fifo2 *q,
48677+ struct net_accel_msg *msg)
48678+{
48679+ unsigned long flags;
48680+ int rc;
48681+ net_accel_msg_lock_queue(q, &flags);
48682+ rc = _net_accel_msg_send(sp, q, msg, 1);
48683+ net_accel_msg_unlock_queue(q, &flags);
48684+ return rc;
48685+}
48686+EXPORT_SYMBOL_GPL(net_accel_msg_reply);
48687+
48688+
48689+/* As net_accel_msg_send but also posts a notification to the far end. */
48690+int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, int irq,
48691+ sh_msg_fifo2 *q, struct net_accel_msg *msg)
48692+{
48693+ unsigned long flags;
48694+ int rc;
48695+ net_accel_msg_lock_queue(q, &flags);
48696+ rc = _net_accel_msg_send(sp, q, msg, 1);
48697+ net_accel_msg_unlock_queue(q, &flags);
48698+ if (rc >= 0)
48699+ notify_remote_via_irq(irq);
48700+ return rc;
48701+}
48702+EXPORT_SYMBOL_GPL(net_accel_msg_reply_notify);
48703+
48704+
48705+/*
48706+ * Look at a received message, if any, so a decision can be made about
48707+ * whether to read it now or not. Cookie is a bit of debug which is
48708+ * set here and checked when passed to net_accel_msg_recv_next()
48709+ */
48710+int net_accel_msg_peek(struct net_accel_shared_page *sp,
48711+ sh_msg_fifo2 *queue,
48712+ struct net_accel_msg *msg, int *cookie)
48713+{
48714+ unsigned long flags;
48715+ int rc = 0;
48716+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48717+ net_accel_msg_lock_queue(queue, &flags);
48718+ rmb();
48719+ if (sh_fifo2_is_empty(queue)) {
48720+ rc = -ENOENT;
48721+ } else {
48722+ *msg = sh_fifo2_peek(queue);
48723+ *cookie = *(queue->fifo_rd_i);
48724+ }
48725+ net_accel_msg_unlock_queue(queue, &flags);
48726+ return rc;
48727+}
48728+EXPORT_SYMBOL_GPL(net_accel_msg_peek);
48729+
48730+
48731+/*
48732+ * Move the queue onto the next element, used after finished with a
48733+ * peeked msg
48734+ */
48735+int net_accel_msg_recv_next(struct net_accel_shared_page *sp,
48736+ sh_msg_fifo2 *queue, int cookie)
48737+{
48738+ unsigned long flags;
48739+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48740+ net_accel_msg_lock_queue(queue, &flags);
48741+ rmb();
48742+ /* Mustn't be empty */
48743+ BUG_ON(sh_fifo2_is_empty(queue));
48744+ /*
48745+ * Check cookie matches, i.e. we're advancing over the same message
48746+ * as was got using peek
48747+ */
48748+ BUG_ON(cookie != *(queue->fifo_rd_i));
48749+ sh_fifo2_rd_next(queue);
48750+ wmb();
48751+ net_accel_msg_unlock_queue(queue, &flags);
48752+ return 0;
48753+}
48754+EXPORT_SYMBOL_GPL(net_accel_msg_recv_next);
48755+
48756+
48757+/*
48758+ * Receive a message on the specified FIFO. Returns 0 on success,
48759+ * -errno on failure.
48760+ */
48761+int net_accel_msg_recv(struct net_accel_shared_page *sp, sh_msg_fifo2 *queue,
48762+ struct net_accel_msg *msg)
48763+{
48764+ unsigned long flags;
48765+ int rc = 0;
48766+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48767+ net_accel_msg_lock_queue(queue, &flags);
48768+ rmb();
48769+ if (sh_fifo2_is_empty(queue)) {
48770+ rc = -ENOENT;
48771+ } else {
48772+ sh_fifo2_get(queue, msg);
48773+ }
48774+ wmb();
48775+ net_accel_msg_unlock_queue(queue, &flags);
48776+ return rc;
48777+}
48778+EXPORT_SYMBOL_GPL(net_accel_msg_recv);
48779+
48780+
48781+/*
48782+ * Start sending a message without copying. returns a pointer to a message
48783+ * that will be filled out in place. The queue is locked until the message
48784+ * is sent.
48785+ */
48786+struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp,
48787+ sh_msg_fifo2 *queue, unsigned long *flags)
48788+{
48789+ struct net_accel_msg *msg;
48790+ NET_ACCEL_CHECK_MAGIC(sp, NULL);
48791+ net_accel_msg_lock_queue(queue, flags);
48792+ rmb();
48793+ if (sh_fifo2_not_half_full(queue)) {
48794+ msg = sh_fifo2_pokep(queue);
48795+ } else {
48796+ net_accel_msg_unlock_queue(queue, flags);
48797+ msg = NULL;
48798+ }
48799+ return msg;
48800+}
48801+EXPORT_SYMBOL_GPL(net_accel_msg_start_send);
48802+
48803+
48804+static inline void _msg_complete(struct net_accel_shared_page *sp,
48805+ sh_msg_fifo2 *queue,
48806+ unsigned long *flags)
48807+{
48808+ sh_fifo2_wr_next(queue);
48809+ net_accel_msg_unlock_queue(queue, flags);
48810+}
48811+
48812+/*
48813+ * Complete the sending of a message started with net_accel_msg_start_send. The
48814+ * message is implicit since the queue was locked by _start
48815+ */
48816+void net_accel_msg_complete_send(struct net_accel_shared_page *sp,
48817+ sh_msg_fifo2 *queue,
48818+ unsigned long *flags)
48819+{
48820+ _msg_complete(sp, queue, flags);
48821+}
48822+EXPORT_SYMBOL_GPL(net_accel_msg_complete_send);
48823+
48824+/* As net_accel_msg_complete_send but does the notify. */
48825+void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp,
48826+ sh_msg_fifo2 *queue,
48827+ unsigned long *flags, int irq)
48828+{
48829+ _msg_complete(sp, queue, flags);
48830+ notify_remote_via_irq(irq);
48831+}
48832+EXPORT_SYMBOL_GPL(net_accel_msg_complete_send_notify);
48833Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.h
48834===================================================================
48835--- /dev/null 1970-01-01 00:00:00.000000000 +0000
48836+++ head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.h 2008-02-20 09:32:49.000000000 +0100
48837@@ -0,0 +1,414 @@
48838+/****************************************************************************
48839+ * Solarflare driver for Xen network acceleration
48840+ *
48841+ * Copyright 2006-2008: Solarflare Communications Inc,
48842+ * 9501 Jeronimo Road, Suite 250,
48843+ * Irvine, CA 92618, USA
48844+ *
48845+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
48846+ *
48847+ * This program is free software; you can redistribute it and/or modify it
48848+ * under the terms of the GNU General Public License version 2 as published
48849+ * by the Free Software Foundation, incorporated herein by reference.
48850+ *
48851+ * This program is distributed in the hope that it will be useful,
48852+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
48853+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48854+ * GNU General Public License for more details.
48855+ *
48856+ * You should have received a copy of the GNU General Public License
48857+ * along with this program; if not, write to the Free Software
48858+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
48859+ ****************************************************************************
48860+ */
48861+
48862+#ifndef NET_ACCEL_MSG_IFACE_H
48863+#define NET_ACCEL_MSG_IFACE_H
48864+
48865+#include <linux/ip.h>
48866+#include <linux/tcp.h>
48867+#include <linux/udp.h>
48868+#include <linux/in.h>
48869+#include <linux/netdevice.h>
48870+#include <linux/etherdevice.h>
48871+
48872+#include "accel_shared_fifo.h"
48873+
48874+#define NET_ACCEL_MSG_MAGIC (0x85465479)
48875+
48876+/*! We talk version 0.010 of the interdomain protocol */
48877+#define NET_ACCEL_MSG_VERSION (0x00001000)
48878+
48879+/*! Shared memory portion of inter-domain FIFO */
48880+struct net_accel_msg_queue {
48881+ u32 read;
48882+ u32 write;
48883+};
48884+
48885+
48886+/*
48887+ * The aflags in the following structure is used as follows:
48888+ *
48889+ * - each bit is set when one of the corresponding variables is
48890+ * changed by either end.
48891+ *
48892+ * - the end that has made the change then forwards an IRQ to the
48893+ * other
48894+ *
48895+ * - the IRQ handler deals with these bits either on the fast path, or
48896+ * for less common changes, by jumping onto the slow path.
48897+ *
48898+ * - once it has seen a change, it clears the relevant bit.
48899+ *
48900+ * aflags is accessed atomically using clear_bit, test_bit,
48901+ * test_and_set_bit etc
48902+ */
48903+
48904+/*
48905+ * The following used to signify to the other domain when the queue
48906+ * they want to use is full, and when it is no longer full. Could be
48907+ * compressed to use fewer bits but done this way for simplicity and
48908+ * clarity
48909+ */
48910+
48911+/* "dom0->domU queue" is full */
48912+#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL 0x1
48913+#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B 0
48914+/* "dom0->domU queue" is not full */
48915+#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL 0x2
48916+#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B 1
48917+/* "domU->dom0 queue" is full */
48918+#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL 0x4
48919+#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B 2
48920+/* "domU->dom0 queue" is not full */
48921+#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL 0x8
48922+#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B 3
48923+/* dom0 -> domU net_dev up/down events */
48924+#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN 0x10
48925+#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B 4
48926+
48927+/*
48928+ * Masks used to test if there are any messages for domU and dom0
48929+ * respectively
48930+ */
48931+#define NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK \
48932+ (NET_ACCEL_MSG_AFLAGS_QUEUE0FULL | \
48933+ NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL | \
48934+ NET_ACCEL_MSG_AFLAGS_NETUPDOWN)
48935+#define NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK \
48936+ (NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL | \
48937+ NET_ACCEL_MSG_AFLAGS_QUEUEUFULL)
48938+
48939+/*! The shared data structure used for inter-VM communication. */
48940+struct net_accel_shared_page {
48941+ /*! Sanity check */
48942+ u32 magic;
48943+ /*! Used by host/Dom0 */
48944+ struct net_accel_msg_queue queue0;
48945+ /*! Used by guest/DomU */
48946+ struct net_accel_msg_queue queue1;
48947+ /*! Atomic flags, used to communicate simple state changes */
48948+ u32 aflags;
48949+ /*! State of net_dev used for acceleration */
48950+ u32 net_dev_up;
48951+};
48952+
48953+
48954+enum net_accel_hw_type {
48955+ /*! Not a virtualisable NIC: use slow path. */
48956+ NET_ACCEL_MSG_HWTYPE_NONE = 0,
48957+ /*! NIC is Falcon-based */
48958+ NET_ACCEL_MSG_HWTYPE_FALCON_A = 1,
48959+ NET_ACCEL_MSG_HWTYPE_FALCON_B = 2,
48960+};
48961+
48962+/*! The maximum number of pages used by an event queue. */
48963+#define EF_HW_FALCON_EVQ_PAGES 8
48964+
48965+struct net_accel_hw_falcon_b {
48966+ /* VI */
48967+ /*! Grant for Tx DMA Q */
48968+ u32 txdmaq_gnt;
48969+ /*! Grant for Rx DMA Q */
48970+ u32 rxdmaq_gnt;
48971+ /*! Machine frame number for Tx/Rx doorbell page */
48972+ u32 doorbell_mfn;
48973+ /*! Grant for Tx/Rx doorbell page */
48974+ u32 doorbell_gnt;
48975+
48976+ /* Event Q */
48977+ /*! Grants for the pages of the EVQ */
48978+ u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES];
48979+ u32 evq_offs;
48980+ /*! log2(pages in event Q) */
48981+ u32 evq_order;
48982+ /*! Capacity in events */
48983+ u32 evq_capacity;
48984+ /*! Eventq pointer register physical address */
48985+ u32 evq_rptr;
48986+ /*! Interface instance */
48987+ u32 instance;
48988+ /*! Capacity of RX queue */
48989+ u32 rx_capacity;
48990+ /*! Capacity of TX queue */
48991+ u32 tx_capacity;
48992+
48993+ /* NIC */
48994+ s32 nic_arch;
48995+ s32 nic_revision;
48996+ u8 nic_variant;
48997+};
48998+
48999+struct net_accel_hw_falcon_a {
49000+ struct net_accel_hw_falcon_b common;
49001+ u32 evq_rptr_gnt;
49002+};
49003+
49004+
49005+/*! Description of the hardware that the DomU is being given. */
49006+struct net_accel_msg_hw {
49007+ u32 type; /*!< Hardware type */
49008+ union {
49009+ struct net_accel_hw_falcon_a falcon_a;
49010+ struct net_accel_hw_falcon_b falcon_b;
49011+ } resources;
49012+};
49013+
49014+/*! Start-of-day handshake message. Dom0 fills in its version and
49015+ * sends, DomU checks, inserts its version and replies
49016+ */
49017+struct net_accel_msg_hello {
49018+ /*! Sender's version (set by each side in turn) */
49019+ u32 version;
49020+ /*! max pages allocated/allowed for buffers */
49021+ u32 max_pages;
49022+};
49023+
49024+/*! Maximum number of page requests that can fit in a message. */
49025+#define NET_ACCEL_MSG_MAX_PAGE_REQ (8)
49026+
49027+/*! Request for NIC buffers. DomU fils out pages and grants (and
49028+ * optionally) reqid, dom0 fills out buf and sends reply
49029+ */
49030+struct net_accel_msg_map_buffers {
49031+ u32 reqid; /*!< Optional request ID */
49032+ u32 pages; /*!< Number of pages to map */
49033+ u32 grants[NET_ACCEL_MSG_MAX_PAGE_REQ]; /*!< Grant ids to map */
49034+ u32 buf; /*!< NIC buffer address of pages obtained */
49035+};
49036+
49037+/*! Notification of a change to local mac address, used to filter
49038+ locally destined packets off the fast path */
49039+struct net_accel_msg_localmac {
49040+ u32 flags; /*!< Should this be added or removed? */
49041+ u8 mac[ETH_ALEN]; /*!< The mac address to filter onto slow path */
49042+};
49043+
49044+struct net_accel_msg_fastpath {
49045+ u32 flags; /*!< Should this be added or removed? */
49046+ u8 mac[ETH_ALEN];/*!< The mac address to filter onto fast path */
49047+ u16 port; /*!< The port of the connection */
49048+ u32 ip; /*!< The IP address of the connection */
49049+ u8 proto; /*!< The protocol of connection (TCP/UDP) */
49050+};
49051+
49052+/*! Values for struct ef_msg_localmac/fastpath.flags */
49053+#define NET_ACCEL_MSG_ADD 0x1
49054+#define NET_ACCEL_MSG_REMOVE 0x2
49055+
49056+/*! Overall message structure */
49057+struct net_accel_msg {
49058+ /*! ID specifying type of messge */
49059+ u32 id;
49060+ union {
49061+ /*! handshake */
49062+ struct net_accel_msg_hello hello;
49063+ /*! hardware description */
49064+ struct net_accel_msg_hw hw;
49065+ /*! buffer map request */
49066+ struct net_accel_msg_map_buffers mapbufs;
49067+ /*! mac address of a local interface */
49068+ struct net_accel_msg_localmac localmac;
49069+ /*! address of a new fastpath connection */
49070+ struct net_accel_msg_fastpath fastpath;
49071+ /*! make the message a fixed size */
49072+ u8 pad[128 - sizeof(u32)];
49073+ } u;
49074+};
49075+
49076+
49077+#define NET_ACCEL_MSG_HW_TO_MSG(_u) container_of(_u, struct net_accel_msg, u.hw)
49078+
49079+/*! Inter-domain message FIFO */
49080+typedef struct {
49081+ struct net_accel_msg *fifo;
49082+ u32 fifo_mask;
49083+ u32 *fifo_rd_i;
49084+ u32 *fifo_wr_i;
49085+ spinlock_t lock;
49086+ u32 is_locked; /* Debug flag */
49087+} sh_msg_fifo2;
49088+
49089+
49090+#define NET_ACCEL_MSG_OFFSET_MASK PAGE_MASK
49091+
49092+/* Modifiers */
49093+#define NET_ACCEL_MSG_REPLY (0x80000000)
49094+#define NET_ACCEL_MSG_ERROR (0x40000000)
49095+
49096+/* Dom0 -> DomU and reply. Handshake/version check. */
49097+#define NET_ACCEL_MSG_HELLO (0x00000001)
49098+/* Dom0 -> DomU : hardware setup (VI info.) */
49099+#define NET_ACCEL_MSG_SETHW (0x00000002)
49100+/*
49101+ * Dom0 -> DomU. Notification of a local mac to add/remove from slow
49102+ * path filter
49103+ */
49104+#define NET_ACCEL_MSG_LOCALMAC (0x00000003)
49105+/*
49106+ * DomU -> Dom0 and reply. Request for buffer table entries for
49107+ * preallocated pages.
49108+ */
49109+#define NET_ACCEL_MSG_MAPBUF (0x00000004)
49110+/*
49111+ * Dom0 -> DomU. Notification of a local mac to add/remove from fast
49112+ * path filter
49113+ */
49114+#define NET_ACCEL_MSG_FASTPATH (0x00000005)
49115+
49116+/*! Initialise a message and set the type
49117+ * \param message : the message
49118+ * \param code : the message type
49119+ */
49120+static inline void net_accel_msg_init(struct net_accel_msg *msg, int code) {
49121+ msg->id = (u32)code;
49122+}
49123+
49124+/*! initialise a shared page structure
49125+ * \param shared_page : mapped memory in which the structure resides
49126+ * \param len : size of the message FIFO area that follows
49127+ * \param up : initial up/down state of netdev
49128+ * \return 0 or an error code
49129+ */
49130+extern int net_accel_msg_init_page(void *shared_page, int len, int up);
49131+
49132+/*! initialise a message queue
49133+ * \param queue : the message FIFO to initialise
49134+ * \param indices : the read and write indices in shared memory
49135+ * \param base : the start of the memory area for the FIFO
49136+ * \param size : the size of the FIFO in bytes
49137+ */
49138+extern void net_accel_msg_init_queue(sh_msg_fifo2 *queue,
49139+ struct net_accel_msg_queue *indices,
49140+ struct net_accel_msg *base, int size);
49141+
49142+/* Notify after a batch of messages have been sent */
49143+extern void net_accel_msg_notify(int irq);
49144+
49145+/*! Send a message on the specified FIFO. The message is copied to the
49146+ * current slot of the FIFO.
49147+ * \param sp : pointer to shared page
49148+ * \param q : pointer to message FIFO to use
49149+ * \param msg : pointer to message
49150+ * \return 0 on success, -errno on
49151+ */
49152+extern int net_accel_msg_send(struct net_accel_shared_page *sp,
49153+ sh_msg_fifo2 *q,
49154+ struct net_accel_msg *msg);
49155+extern int net_accel_msg_reply(struct net_accel_shared_page *sp,
49156+ sh_msg_fifo2 *q,
49157+ struct net_accel_msg *msg);
49158+
49159+/*! As net_accel_msg_send but also posts a notification to the far end. */
49160+extern int net_accel_msg_send_notify(struct net_accel_shared_page *sp,
49161+ int irq, sh_msg_fifo2 *q,
49162+ struct net_accel_msg *msg);
49163+/*! As net_accel_msg_send but also posts a notification to the far end. */
49164+extern int net_accel_msg_reply_notify(struct net_accel_shared_page *sp,
49165+ int irq, sh_msg_fifo2 *q,
49166+ struct net_accel_msg *msg);
49167+
49168+/*! Receive a message on the specified FIFO. Returns 0 on success,
49169+ * -errno on failure.
49170+ */
49171+extern int net_accel_msg_recv(struct net_accel_shared_page *sp,
49172+ sh_msg_fifo2 *q,
49173+ struct net_accel_msg *msg);
49174+
49175+/*! Look at a received message, if any, so a decision can be made
49176+ * about whether to read it now or not. Cookie is a bit of debug
49177+ * which is set here and checked when passed to
49178+ * net_accel_msg_recv_next()
49179+ */
49180+extern int net_accel_msg_peek(struct net_accel_shared_page *sp,
49181+ sh_msg_fifo2 *queue,
49182+ struct net_accel_msg *msg, int *cookie);
49183+/*! Move the queue onto the next element, used after finished with a
49184+ * peeked msg
49185+ */
49186+extern int net_accel_msg_recv_next(struct net_accel_shared_page *sp,
49187+ sh_msg_fifo2 *queue, int cookie);
49188+
49189+/*! Start sending a message without copying. returns a pointer to a
49190+ * message that will be filled out in place. The queue is locked
49191+ * until the message is sent.
49192+ */
49193+extern
49194+struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp,
49195+ sh_msg_fifo2 *queue,
49196+ unsigned long *flags);
49197+
49198+
49199+/*! Complete the sending of a message started with
49200+ * net_accel_msg_start_send. The message is implicit since the queue
49201+ * was locked by _start
49202+ */
49203+extern void net_accel_msg_complete_send(struct net_accel_shared_page *sp,
49204+ sh_msg_fifo2 *queue,
49205+ unsigned long *flags);
49206+
49207+/*! As net_accel_msg_complete_send but does the notify. */
49208+extern void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp,
49209+ sh_msg_fifo2 *queue,
49210+ unsigned long *flags, int irq);
49211+
49212+/*! Lock the queue so that multiple "_locked" functions can be called
49213+ * without the queue being modified by others
49214+ */
49215+static inline
49216+void net_accel_msg_lock_queue(sh_msg_fifo2 *queue, unsigned long *flags)
49217+{
49218+ spin_lock_irqsave(&queue->lock, (*flags));
49219+ rmb();
49220+ BUG_ON(queue->is_locked);
49221+ queue->is_locked = 1;
49222+}
49223+
49224+/*! Unlock the queue */
49225+static inline
49226+void net_accel_msg_unlock_queue(sh_msg_fifo2 *queue, unsigned long *flags)
49227+{
49228+ BUG_ON(!queue->is_locked);
49229+ queue->is_locked = 0;
49230+ wmb();
49231+ spin_unlock_irqrestore(&queue->lock, (*flags));
49232+}
49233+
49234+/*! Give up without sending a message that was started with
49235+ * net_accel_msg_start_send()
49236+ */
49237+static inline
49238+void net_accel_msg_abort_send(struct net_accel_shared_page *sp,
49239+ sh_msg_fifo2 *queue, unsigned long *flags)
49240+{
49241+ net_accel_msg_unlock_queue(queue, flags);
49242+}
49243+
49244+/*! Test the queue to ensure there is sufficient space */
49245+static inline
49246+int net_accel_msg_check_space(sh_msg_fifo2 *queue, unsigned space)
49247+{
49248+ return sh_fifo2_space(queue) >= space;
49249+}
49250+
49251+#endif /* NET_ACCEL_MSG_IFACE_H */
49252Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_shared_fifo.h
49253===================================================================
49254--- /dev/null 1970-01-01 00:00:00.000000000 +0000
49255+++ head-2008-11-25/drivers/xen/sfc_netutil/accel_shared_fifo.h 2008-02-20 09:32:49.000000000 +0100
49256@@ -0,0 +1,127 @@
49257+/****************************************************************************
49258+ * Solarflare driver for Xen network acceleration
49259+ *
49260+ * Copyright 2006-2008: Solarflare Communications Inc,
49261+ * 9501 Jeronimo Road, Suite 250,
49262+ * Irvine, CA 92618, USA
49263+ *
49264+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
49265+ *
49266+ * This program is free software; you can redistribute it and/or modify it
49267+ * under the terms of the GNU General Public License version 2 as published
49268+ * by the Free Software Foundation, incorporated herein by reference.
49269+ *
49270+ * This program is distributed in the hope that it will be useful,
49271+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
49272+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49273+ * GNU General Public License for more details.
49274+ *
49275+ * You should have received a copy of the GNU General Public License
49276+ * along with this program; if not, write to the Free Software
49277+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
49278+ ****************************************************************************
49279+ */
49280+
49281+#ifndef NET_ACCEL_SHARED_FIFO_H
49282+#define NET_ACCEL_SHARED_FIFO_H
49283+
49284+/*
49285+ * This is based on fifo.h, but handles sharing between address spaces
49286+ * that don't trust each other, by splitting out the read and write
49287+ * indices. This costs at least one pointer indirection more than the
49288+ * vanilla version per access.
49289+ */
49290+
49291+typedef struct {
49292+ char* fifo;
49293+ unsigned fifo_mask;
49294+ unsigned *fifo_rd_i;
49295+ unsigned *fifo_wr_i;
49296+} sh_byte_fifo2;
49297+
49298+#define SH_FIFO2_M(f, x) ((x) & ((f)->fifo_mask))
49299+
49300+static inline unsigned log2_ge(unsigned long n, unsigned min_order) {
49301+ unsigned order = min_order;
49302+ while((1ul << order) < n) ++order;
49303+ return order;
49304+}
49305+
49306+static inline unsigned long pow2(unsigned order) {
49307+ return (1ul << order);
49308+}
49309+
49310+#define is_pow2(x) (pow2(log2_ge((x), 0)) == (x))
49311+
49312+#define sh_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \
49313+ is_pow2((f)->fifo_mask+1u))
49314+
49315+#define sh_fifo2_init(f, cap, _rptr, _wptr) \
49316+ do { \
49317+ BUG_ON(!is_pow2((cap) + 1)); \
49318+ (f)->fifo_rd_i = _rptr; \
49319+ (f)->fifo_wr_i = _wptr; \
49320+ *(f)->fifo_rd_i = *(f)->fifo_wr_i = 0u; \
49321+ (f)->fifo_mask = (cap); \
49322+ } while(0)
49323+
49324+#define sh_fifo2_num(f) SH_FIFO2_M((f),*(f)->fifo_wr_i - *(f)->fifo_rd_i)
49325+#define sh_fifo2_space(f) SH_FIFO2_M((f),*(f)->fifo_rd_i - *(f)->fifo_wr_i-1u)
49326+#define sh_fifo2_is_empty(f) (sh_fifo2_num(f)==0)
49327+#define sh_fifo2_not_empty(f) (sh_fifo2_num(f)!=0)
49328+#define sh_fifo2_is_full(f) (sh_fifo2_space(f)==0u)
49329+#define sh_fifo2_not_full(f) (sh_fifo2_space(f)!=0u)
49330+#define sh_fifo2_buf_size(f) ((f)->fifo_mask + 1u)
49331+#define sh_fifo2_capacity(f) ((f)->fifo_mask)
49332+#define sh_fifo2_end(f) ((f)->fifo + sh_fifo2_buf_size(f))
49333+#define sh_fifo2_not_half_full(f) (sh_fifo2_space(f) > (sh_fifo2_capacity(f) >> 1))
49334+
49335+#define sh_fifo2_peek(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i)])
49336+#define sh_fifo2_peekp(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_rd_i))
49337+#define sh_fifo2_poke(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i)])
49338+#define sh_fifo2_pokep(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_wr_i))
49339+#define sh_fifo2_peek_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i+(i))])
49340+#define sh_fifo2_poke_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i+(i))])
49341+
49342+#define sh_fifo2_rd_next(f) \
49343+ do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + 1u;} while(0)
49344+#define sh_fifo2_wr_next(f) \
49345+ do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + 1u;} while(0)
49346+#define sh_fifo2_rd_adv(f, n) \
49347+ do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + (n);} while(0)
49348+#define sh_fifo2_wr_adv(f, n) \
49349+ do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + (n);} while(0)
49350+
49351+#define sh_fifo2_put(f, v) \
49352+ do {sh_fifo2_poke(f) = (v); wmb(); sh_fifo2_wr_next(f);} while(0)
49353+
49354+#define sh_fifo2_get(f, pv) \
49355+ do {*(pv) = sh_fifo2_peek(f); mb(); sh_fifo2_rd_next(f);} while(0)
49356+
49357+static inline unsigned sh_fifo2_contig_num(sh_byte_fifo2 *f)
49358+{
49359+ unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i);
49360+ unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i);
49361+
49362+ return (fifo_wr_i >= fifo_rd_i)
49363+ ? fifo_wr_i - fifo_rd_i
49364+ : f->fifo_mask + 1u - *(f)->fifo_rd_i;
49365+}
49366+
49367+static inline unsigned sh_fifo2_contig_space(sh_byte_fifo2 *f)
49368+{
49369+ unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i);
49370+ unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i);
49371+
49372+ return (fifo_rd_i > fifo_wr_i)
49373+ ? fifo_rd_i - fifo_wr_i - 1
49374+ : (f->fifo_mask + 1u - fifo_wr_i
49375+ /*
49376+ * The last byte can't be used if the read pointer
49377+ * is at zero.
49378+ */
49379+ - (fifo_rd_i==0));
49380+}
49381+
49382+
49383+#endif /* NET_ACCEL_SHARED_FIFO_H */
49384Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_util.c
49385===================================================================
49386--- /dev/null 1970-01-01 00:00:00.000000000 +0000
49387+++ head-2008-11-25/drivers/xen/sfc_netutil/accel_util.c 2008-02-20 09:32:49.000000000 +0100
49388@@ -0,0 +1,333 @@
49389+/****************************************************************************
49390+ * Solarflare driver for Xen network acceleration
49391+ *
49392+ * Copyright 2006-2008: Solarflare Communications Inc,
49393+ * 9501 Jeronimo Road, Suite 250,
49394+ * Irvine, CA 92618, USA
49395+ *
49396+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
49397+ *
49398+ * This program is free software; you can redistribute it and/or modify it
49399+ * under the terms of the GNU General Public License version 2 as published
49400+ * by the Free Software Foundation, incorporated herein by reference.
49401+ *
49402+ * This program is distributed in the hope that it will be useful,
49403+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
49404+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49405+ * GNU General Public License for more details.
49406+ *
49407+ * You should have received a copy of the GNU General Public License
49408+ * along with this program; if not, write to the Free Software
49409+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
49410+ ****************************************************************************
49411+ */
49412+
49413+#include <linux/if_ether.h>
49414+#include <asm/io.h>
49415+#include <asm/pgtable.h>
49416+#include <asm/hypercall.h>
49417+#include <xen/xenbus.h>
49418+#include <xen/driver_util.h>
49419+#include <xen/gnttab.h>
49420+
49421+#include "accel_util.h"
49422+
49423+#ifdef EFX_GCOV
49424+#include "gcov.h"
49425+
49426+static int __init net_accel_init(void)
49427+{
49428+ gcov_provider_init(THIS_MODULE);
49429+ return 0;
49430+}
49431+module_init(net_accel_init);
49432+
49433+static void __exit net_accel_exit(void)
49434+{
49435+ gcov_provider_fini(THIS_MODULE);
49436+}
49437+module_exit(net_accel_exit);
49438+#endif
49439+
49440+/* Shutdown remote domain that is misbehaving */
49441+int net_accel_shutdown_remote(int domain)
49442+{
49443+ struct sched_remote_shutdown sched_shutdown = {
49444+ .domain_id = domain,
49445+ .reason = SHUTDOWN_crash
49446+ };
49447+
49448+ EPRINTK("Crashing domain %d\n", domain);
49449+
49450+ return HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &sched_shutdown);
49451+}
49452+EXPORT_SYMBOL(net_accel_shutdown_remote);
49453+
49454+
49455+/* Based on xenbus_backend_client.c:xenbus_map_ring() */
49456+static int net_accel_map_grant(struct xenbus_device *dev, int gnt_ref,
49457+ grant_handle_t *handle, void *vaddr,
49458+ u64 *dev_bus_addr, unsigned flags)
49459+{
49460+ struct gnttab_map_grant_ref op;
49461+
49462+ gnttab_set_map_op(&op, (unsigned long)vaddr, flags,
49463+ gnt_ref, dev->otherend_id);
49464+
49465+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
49466+
49467+ if (op.status != GNTST_okay) {
49468+ xenbus_dev_error
49469+ (dev, op.status,
49470+ "failed mapping in shared page %d from domain %d\n",
49471+ gnt_ref, dev->otherend_id);
49472+ } else {
49473+ *handle = op.handle;
49474+ if (dev_bus_addr)
49475+ *dev_bus_addr = op.dev_bus_addr;
49476+ }
49477+
49478+ return op.status;
49479+}
49480+
49481+
49482+/* Based on xenbus_backend_client.c:xenbus_unmap_ring() */
49483+static int net_accel_unmap_grant(struct xenbus_device *dev,
49484+ grant_handle_t handle,
49485+ void *vaddr, u64 dev_bus_addr,
49486+ unsigned flags)
49487+{
49488+ struct gnttab_unmap_grant_ref op;
49489+
49490+ gnttab_set_unmap_op(&op, (unsigned long)vaddr, flags, handle);
49491+
49492+ if (dev_bus_addr)
49493+ op.dev_bus_addr = dev_bus_addr;
49494+
49495+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
49496+
49497+ if (op.status != GNTST_okay)
49498+ xenbus_dev_error(dev, op.status,
49499+ "failed unmapping page at handle %d error %d\n",
49500+ handle, op.status);
49501+
49502+ return op.status;
49503+}
49504+
49505+
49506+int net_accel_map_device_page(struct xenbus_device *dev,
49507+ int gnt_ref, grant_handle_t *handle,
49508+ u64 *dev_bus_addr)
49509+{
49510+ return net_accel_map_grant(dev, gnt_ref, handle, 0, dev_bus_addr,
49511+ GNTMAP_device_map);
49512+}
49513+EXPORT_SYMBOL_GPL(net_accel_map_device_page);
49514+
49515+
49516+int net_accel_unmap_device_page(struct xenbus_device *dev,
49517+ grant_handle_t handle, u64 dev_bus_addr)
49518+{
49519+ return net_accel_unmap_grant(dev, handle, 0, dev_bus_addr,
49520+ GNTMAP_device_map);
49521+}
49522+EXPORT_SYMBOL_GPL(net_accel_unmap_device_page);
49523+
49524+
49525+struct net_accel_valloc_grant_mapping {
49526+ struct vm_struct *vm;
49527+ int pages;
49528+ grant_handle_t grant_handles[0];
49529+};
49530+
49531+/* Map a series of grants into a contiguous virtual area */
49532+static void *net_accel_map_grants_valloc(struct xenbus_device *dev,
49533+ unsigned *grants, int npages,
49534+ unsigned flags, void **priv)
49535+{
49536+ struct net_accel_valloc_grant_mapping *map;
49537+ struct vm_struct *vm;
49538+ void *addr;
49539+ int i, j, rc;
49540+
49541+ vm = alloc_vm_area(PAGE_SIZE * npages);
49542+ if (vm == NULL) {
49543+ EPRINTK("No memory from alloc_vm_area.\n");
49544+ return NULL;
49545+ }
49546+ /*
49547+ * Get a structure in which we will record all the info needed
49548+ * to undo the mapping.
49549+ */
49550+ map = kzalloc(sizeof(struct net_accel_valloc_grant_mapping) +
49551+ npages * sizeof(grant_handle_t), GFP_KERNEL);
49552+ if (map == NULL) {
49553+ EPRINTK("No memory for net_accel_valloc_grant_mapping\n");
49554+ free_vm_area(vm);
49555+ return NULL;
49556+ }
49557+ map->vm = vm;
49558+ map->pages = npages;
49559+
49560+ /* Do the actual mapping */
49561+ addr = vm->addr;
49562+ for (i = 0; i < npages; i++) {
49563+ rc = net_accel_map_grant(dev, grants[i], map->grant_handles + i,
49564+ addr, NULL, flags);
49565+ if (rc != 0)
49566+ goto undo;
49567+ addr = (void*)((unsigned long)addr + PAGE_SIZE);
49568+ }
49569+
49570+ if (priv)
49571+ *priv = (void *)map;
49572+ else
49573+ kfree(map);
49574+
49575+ return vm->addr;
49576+
49577+ undo:
49578+ EPRINTK("Aborting contig map due to single map failure %d (%d of %d)\n",
49579+ rc, i+1, npages);
49580+ for (j = 0; j < i; j++) {
49581+ addr = (void*)((unsigned long)vm->addr + (j * PAGE_SIZE));
49582+ net_accel_unmap_grant(dev, map->grant_handles[j], addr, 0,
49583+ flags);
49584+ }
49585+ free_vm_area(vm);
49586+ kfree(map);
49587+ return NULL;
49588+}
49589+
49590+/* Undo the result of the mapping */
49591+static void net_accel_unmap_grants_vfree(struct xenbus_device *dev,
49592+ unsigned flags, void *priv)
49593+{
49594+ struct net_accel_valloc_grant_mapping *map =
49595+ (struct net_accel_valloc_grant_mapping *)priv;
49596+
49597+ void *addr = map->vm->addr;
49598+ int npages = map->pages;
49599+ int i;
49600+
49601+ for (i = 0; i < npages; i++) {
49602+ net_accel_unmap_grant(dev, map->grant_handles[i], addr, 0,
49603+ flags);
49604+ addr = (void*)((unsigned long)addr + PAGE_SIZE);
49605+ }
49606+ free_vm_area(map->vm);
49607+ kfree(map);
49608+}
49609+
49610+
49611+void *net_accel_map_grants_contig(struct xenbus_device *dev,
49612+ unsigned *grants, int npages,
49613+ void **priv)
49614+{
49615+ return net_accel_map_grants_valloc(dev, grants, npages,
49616+ GNTMAP_host_map, priv);
49617+}
49618+EXPORT_SYMBOL(net_accel_map_grants_contig);
49619+
49620+
49621+void net_accel_unmap_grants_contig(struct xenbus_device *dev,
49622+ void *priv)
49623+{
49624+ net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv);
49625+}
49626+EXPORT_SYMBOL(net_accel_unmap_grants_contig);
49627+
49628+
49629+void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref,
49630+ void **priv)
49631+{
49632+ return net_accel_map_grants_valloc(dev, &gnt_ref, 1,
49633+ GNTMAP_host_map, priv);
49634+}
49635+EXPORT_SYMBOL(net_accel_map_iomem_page);
49636+
49637+
49638+void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv)
49639+{
49640+ net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv);
49641+}
49642+EXPORT_SYMBOL(net_accel_unmap_iomem_page);
49643+
49644+
49645+int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn,
49646+ int is_iomem)
49647+{
49648+ int err = gnttab_grant_foreign_access(dev->otherend_id, mfn,
49649+ is_iomem ? GTF_PCD : 0);
49650+ if (err < 0)
49651+ xenbus_dev_error(dev, err, "failed granting access to page\n");
49652+ return err;
49653+}
49654+EXPORT_SYMBOL_GPL(net_accel_grant_page);
49655+
49656+
49657+int net_accel_ungrant_page(grant_ref_t gntref)
49658+{
49659+ if (unlikely(gnttab_query_foreign_access(gntref) != 0)) {
49660+ EPRINTK("%s: remote domain still using grant %d\n", __FUNCTION__,
49661+ gntref);
49662+ return -EBUSY;
49663+ }
49664+
49665+ gnttab_end_foreign_access(gntref, 0);
49666+ return 0;
49667+}
49668+EXPORT_SYMBOL_GPL(net_accel_ungrant_page);
49669+
49670+
49671+int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
49672+{
49673+ char *s, *e, *macstr;
49674+ int i;
49675+
49676+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
49677+ if (IS_ERR(macstr))
49678+ return PTR_ERR(macstr);
49679+
49680+ for (i = 0; i < ETH_ALEN; i++) {
49681+ mac[i] = simple_strtoul(s, &e, 16);
49682+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
49683+ kfree(macstr);
49684+ return -ENOENT;
49685+ }
49686+ s = e+1;
49687+ }
49688+
49689+ kfree(macstr);
49690+ return 0;
49691+}
49692+EXPORT_SYMBOL_GPL(net_accel_xen_net_read_mac);
49693+
49694+
49695+void net_accel_update_state(struct xenbus_device *dev, int state)
49696+{
49697+ struct xenbus_transaction tr;
49698+ int err;
49699+
49700+ DPRINTK("%s: setting accelstate to %s\n", __FUNCTION__,
49701+ xenbus_strstate(state));
49702+
49703+ if (xenbus_exists(XBT_NIL, dev->nodename, "")) {
49704+ VPRINTK("%s: nodename %s\n", __FUNCTION__, dev->nodename);
49705+ again:
49706+ err = xenbus_transaction_start(&tr);
49707+ if (err == 0)
49708+ err = xenbus_printf(tr, dev->nodename, "accelstate",
49709+ "%d", state);
49710+ if (err != 0) {
49711+ xenbus_transaction_end(tr, 1);
49712+ } else {
49713+ err = xenbus_transaction_end(tr, 0);
49714+ if (err == -EAGAIN)
49715+ goto again;
49716+ }
49717+ }
49718+}
49719+EXPORT_SYMBOL_GPL(net_accel_update_state);
49720+
49721+MODULE_LICENSE("GPL");
49722Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_util.h
49723===================================================================
49724--- /dev/null 1970-01-01 00:00:00.000000000 +0000
49725+++ head-2008-11-25/drivers/xen/sfc_netutil/accel_util.h 2008-02-20 09:32:49.000000000 +0100
49726@@ -0,0 +1,127 @@
49727+/****************************************************************************
49728+ * Solarflare driver for Xen network acceleration
49729+ *
49730+ * Copyright 2006-2008: Solarflare Communications Inc,
49731+ * 9501 Jeronimo Road, Suite 250,
49732+ * Irvine, CA 92618, USA
49733+ *
49734+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
49735+ *
49736+ * This program is free software; you can redistribute it and/or modify it
49737+ * under the terms of the GNU General Public License version 2 as published
49738+ * by the Free Software Foundation, incorporated herein by reference.
49739+ *
49740+ * This program is distributed in the hope that it will be useful,
49741+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
49742+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49743+ * GNU General Public License for more details.
49744+ *
49745+ * You should have received a copy of the GNU General Public License
49746+ * along with this program; if not, write to the Free Software
49747+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
49748+ ****************************************************************************
49749+ */
49750+
49751+#ifndef NETBACK_ACCEL_UTIL_H
49752+#define NETBACK_ACCEL_UTIL_H
49753+
49754+#ifdef DPRINTK
49755+#undef DPRINTK
49756+#endif
49757+
49758+#define FILE_LEAF strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__
49759+
49760+#if 1
49761+#define VPRINTK(_f, _a...)
49762+#else
49763+#define VPRINTK(_f, _a...) \
49764+ printk("(file=%s, line=%d) " _f, \
49765+ FILE_LEAF , __LINE__ , ## _a )
49766+#endif
49767+
49768+#if 1
49769+#define DPRINTK(_f, _a...)
49770+#else
49771+#define DPRINTK(_f, _a...) \
49772+ printk("(file=%s, line=%d) " _f, \
49773+ FILE_LEAF , __LINE__ , ## _a )
49774+#endif
49775+
49776+#define EPRINTK(_f, _a...) \
49777+ printk("(file=%s, line=%d) " _f, \
49778+ FILE_LEAF , __LINE__ , ## _a )
49779+
49780+#define EPRINTK_ON(exp) \
49781+ do { \
49782+ if (exp) \
49783+ EPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \
49784+ } while(0)
49785+
49786+#define DPRINTK_ON(exp) \
49787+ do { \
49788+ if (exp) \
49789+ DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \
49790+ } while(0)
49791+
49792+#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"
49793+#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5]
49794+
49795+#include <xen/xenbus.h>
49796+
49797+/*! Map a set of pages from another domain
49798+ * \param dev The xenbus device context
49799+ * \param priv The private data returned by the mapping function
49800+ */
49801+extern
49802+void *net_accel_map_grants_contig(struct xenbus_device *dev,
49803+ unsigned *grants, int npages,
49804+ void **priv);
49805+
49806+/*! Unmap a set of pages mapped using net_accel_map_grants_contig.
49807+ * \param dev The xenbus device context
49808+ * \param priv The private data returned by the mapping function
49809+ */
49810+extern
49811+void net_accel_unmap_grants_contig(struct xenbus_device *dev, void *priv);
49812+
49813+/*! Read the MAC address of a device from xenstore */
49814+extern
49815+int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
49816+
49817+/*! Update the accelstate field for a device in xenstore */
49818+extern
49819+void net_accel_update_state(struct xenbus_device *dev, int state);
49820+
49821+/* These four map/unmap functions are based on
49822+ * xenbus_backend_client.c:xenbus_map_ring(). However, they are not
49823+ * used for ring buffers, instead just to map pages between domains,
49824+ * or to map a page so that it is accessible by a device
49825+ */
49826+extern
49827+int net_accel_map_device_page(struct xenbus_device *dev,
49828+ int gnt_ref, grant_handle_t *handle,
49829+ u64 *dev_bus_addr);
49830+extern
49831+int net_accel_unmap_device_page(struct xenbus_device *dev,
49832+ grant_handle_t handle, u64 dev_bus_addr);
49833+extern
49834+void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref,
49835+ void **priv);
49836+extern
49837+void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv);
49838+
49839+/*! Grrant a page to remote domain */
49840+extern
49841+int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn,
49842+ int is_iomem);
49843+/*! Undo a net_accel_grant_page */
49844+extern
49845+int net_accel_ungrant_page(grant_ref_t gntref);
49846+
49847+
49848+/*! Shutdown remote domain that is misbehaving */
49849+extern
49850+int net_accel_shutdown_remote(int domain);
49851+
49852+
49853+#endif
49854Index: head-2008-11-25/drivers/xen/tpmback/Makefile
49855===================================================================
49856--- /dev/null 1970-01-01 00:00:00.000000000 +0000
49857+++ head-2008-11-25/drivers/xen/tpmback/Makefile 2007-06-12 13:13:45.000000000 +0200
49858@@ -0,0 +1,4 @@
49859+
49860+obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmbk.o
49861+
49862+tpmbk-y += tpmback.o interface.o xenbus.o
49863Index: head-2008-11-25/drivers/xen/tpmback/common.h
49864===================================================================
49865--- /dev/null 1970-01-01 00:00:00.000000000 +0000
49866+++ head-2008-11-25/drivers/xen/tpmback/common.h 2007-06-12 13:13:45.000000000 +0200
49867@@ -0,0 +1,85 @@
49868+/******************************************************************************
49869+ * drivers/xen/tpmback/common.h
49870+ */
49871+
49872+#ifndef __TPM__BACKEND__COMMON_H__
49873+#define __TPM__BACKEND__COMMON_H__
49874+
49875+#include <linux/version.h>
49876+#include <linux/module.h>
49877+#include <linux/interrupt.h>
49878+#include <linux/slab.h>
49879+#include <xen/evtchn.h>
49880+#include <xen/driver_util.h>
49881+#include <xen/interface/grant_table.h>
49882+#include <xen/interface/io/tpmif.h>
49883+#include <asm/io.h>
49884+#include <asm/pgalloc.h>
49885+
49886+#define DPRINTK(_f, _a...) \
49887+ pr_debug("(file=%s, line=%d) " _f, \
49888+ __FILE__ , __LINE__ , ## _a )
49889+
49890+struct backend_info;
49891+
49892+typedef struct tpmif_st {
49893+ struct list_head tpmif_list;
49894+ /* Unique identifier for this interface. */
49895+ domid_t domid;
49896+ unsigned int handle;
49897+
49898+ /* Physical parameters of the comms window. */
49899+ unsigned int irq;
49900+
49901+ /* The shared rings and indexes. */
49902+ tpmif_tx_interface_t *tx;
49903+ struct vm_struct *tx_area;
49904+
49905+ /* Miscellaneous private stuff. */
49906+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
49907+ int active;
49908+
49909+ struct tpmif_st *hash_next;
49910+ struct list_head list; /* scheduling list */
49911+ atomic_t refcnt;
49912+
49913+ struct backend_info *bi;
49914+
49915+ grant_handle_t shmem_handle;
49916+ grant_ref_t shmem_ref;
49917+ struct page **mmap_pages;
49918+
49919+ char devname[20];
49920+} tpmif_t;
49921+
49922+void tpmif_disconnect_complete(tpmif_t * tpmif);
49923+tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi);
49924+void tpmif_interface_init(void);
49925+void tpmif_interface_exit(void);
49926+void tpmif_schedule_work(tpmif_t * tpmif);
49927+void tpmif_deschedule_work(tpmif_t * tpmif);
49928+void tpmif_xenbus_init(void);
49929+void tpmif_xenbus_exit(void);
49930+int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
49931+irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
49932+
49933+long int tpmback_get_instance(struct backend_info *bi);
49934+
49935+int vtpm_release_packets(tpmif_t * tpmif, int send_msgs);
49936+
49937+
49938+#define tpmif_get(_b) (atomic_inc(&(_b)->refcnt))
49939+#define tpmif_put(_b) \
49940+ do { \
49941+ if (atomic_dec_and_test(&(_b)->refcnt)) \
49942+ tpmif_disconnect_complete(_b); \
49943+ } while (0)
49944+
49945+extern int num_frontends;
49946+
49947+static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx)
49948+{
49949+ return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx]));
49950+}
49951+
49952+#endif /* __TPMIF__BACKEND__COMMON_H__ */
49953Index: head-2008-11-25/drivers/xen/tpmback/interface.c
49954===================================================================
49955--- /dev/null 1970-01-01 00:00:00.000000000 +0000
49956+++ head-2008-11-25/drivers/xen/tpmback/interface.c 2008-01-21 11:15:26.000000000 +0100
49957@@ -0,0 +1,168 @@
49958+ /*****************************************************************************
49959+ * drivers/xen/tpmback/interface.c
49960+ *
49961+ * Vritual TPM interface management.
49962+ *
49963+ * Copyright (c) 2005, IBM Corporation
49964+ *
49965+ * Author: Stefan Berger, stefanb@us.ibm.com
49966+ *
49967+ * This code has been derived from drivers/xen/netback/interface.c
49968+ * Copyright (c) 2004, Keir Fraser
49969+ */
49970+
49971+#include "common.h"
49972+#include <xen/balloon.h>
49973+#include <xen/gnttab.h>
49974+
49975+static kmem_cache_t *tpmif_cachep;
49976+int num_frontends = 0;
49977+
49978+LIST_HEAD(tpmif_list);
49979+
49980+static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
49981+{
49982+ tpmif_t *tpmif;
49983+
49984+ tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
49985+ if (tpmif == NULL)
49986+ goto out_of_memory;
49987+
49988+ memset(tpmif, 0, sizeof (*tpmif));
49989+ tpmif->domid = domid;
49990+ tpmif->status = DISCONNECTED;
49991+ tpmif->bi = bi;
49992+ snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid);
49993+ atomic_set(&tpmif->refcnt, 1);
49994+
49995+ tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE);
49996+ if (tpmif->mmap_pages == NULL)
49997+ goto out_of_memory;
49998+
49999+ list_add(&tpmif->tpmif_list, &tpmif_list);
50000+ num_frontends++;
50001+
50002+ return tpmif;
50003+
50004+ out_of_memory:
50005+ if (tpmif != NULL)
50006+ kmem_cache_free(tpmif_cachep, tpmif);
50007+ printk("%s: out of memory\n", __FUNCTION__);
50008+ return ERR_PTR(-ENOMEM);
50009+}
50010+
50011+static void free_tpmif(tpmif_t * tpmif)
50012+{
50013+ num_frontends--;
50014+ list_del(&tpmif->tpmif_list);
50015+ free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE);
50016+ kmem_cache_free(tpmif_cachep, tpmif);
50017+}
50018+
50019+tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi)
50020+{
50021+ tpmif_t *tpmif;
50022+
50023+ list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
50024+ if (tpmif->bi == bi) {
50025+ if (tpmif->domid == domid) {
50026+ tpmif_get(tpmif);
50027+ return tpmif;
50028+ } else {
50029+ return ERR_PTR(-EEXIST);
50030+ }
50031+ }
50032+ }
50033+
50034+ return alloc_tpmif(domid, bi);
50035+}
50036+
50037+static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page)
50038+{
50039+ struct gnttab_map_grant_ref op;
50040+
50041+ gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr,
50042+ GNTMAP_host_map, shared_page, tpmif->domid);
50043+
50044+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
50045+ BUG();
50046+
50047+ if (op.status) {
50048+ DPRINTK(" Grant table operation failure !\n");
50049+ return op.status;
50050+ }
50051+
50052+ tpmif->shmem_ref = shared_page;
50053+ tpmif->shmem_handle = op.handle;
50054+
50055+ return 0;
50056+}
50057+
50058+static void unmap_frontend_page(tpmif_t *tpmif)
50059+{
50060+ struct gnttab_unmap_grant_ref op;
50061+
50062+ gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr,
50063+ GNTMAP_host_map, tpmif->shmem_handle);
50064+
50065+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
50066+ BUG();
50067+}
50068+
50069+int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn)
50070+{
50071+ int err;
50072+
50073+ if (tpmif->irq)
50074+ return 0;
50075+
50076+ if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL)
50077+ return -ENOMEM;
50078+
50079+ err = map_frontend_page(tpmif, shared_page);
50080+ if (err) {
50081+ free_vm_area(tpmif->tx_area);
50082+ return err;
50083+ }
50084+
50085+ tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr;
50086+ memset(tpmif->tx, 0, PAGE_SIZE);
50087+
50088+ err = bind_interdomain_evtchn_to_irqhandler(
50089+ tpmif->domid, evtchn, tpmif_be_int, 0, tpmif->devname, tpmif);
50090+ if (err < 0) {
50091+ unmap_frontend_page(tpmif);
50092+ free_vm_area(tpmif->tx_area);
50093+ return err;
50094+ }
50095+ tpmif->irq = err;
50096+
50097+ tpmif->shmem_ref = shared_page;
50098+ tpmif->active = 1;
50099+
50100+ return 0;
50101+}
50102+
50103+void tpmif_disconnect_complete(tpmif_t *tpmif)
50104+{
50105+ if (tpmif->irq)
50106+ unbind_from_irqhandler(tpmif->irq, tpmif);
50107+
50108+ if (tpmif->tx) {
50109+ unmap_frontend_page(tpmif);
50110+ free_vm_area(tpmif->tx_area);
50111+ }
50112+
50113+ free_tpmif(tpmif);
50114+}
50115+
50116+void __init tpmif_interface_init(void)
50117+{
50118+ tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
50119+ 0, 0, NULL, NULL);
50120+}
50121+
50122+void __exit tpmif_interface_exit(void)
50123+{
50124+ kmem_cache_destroy(tpmif_cachep);
50125+}
50126Index: head-2008-11-25/drivers/xen/tpmback/tpmback.c
50127===================================================================
50128--- /dev/null 1970-01-01 00:00:00.000000000 +0000
50129+++ head-2008-11-25/drivers/xen/tpmback/tpmback.c 2007-06-12 13:13:45.000000000 +0200
50130@@ -0,0 +1,944 @@
50131+/******************************************************************************
50132+ * drivers/xen/tpmback/tpmback.c
50133+ *
50134+ * Copyright (c) 2005, IBM Corporation
50135+ *
50136+ * Author: Stefan Berger, stefanb@us.ibm.com
50137+ * Grant table support: Mahadevan Gomathisankaran
50138+ *
50139+ * This code has been derived from drivers/xen/netback/netback.c
50140+ * Copyright (c) 2002-2004, K A Fraser
50141+ *
50142+ */
50143+
50144+#include "common.h"
50145+#include <xen/evtchn.h>
50146+
50147+#include <linux/types.h>
50148+#include <linux/list.h>
50149+#include <linux/miscdevice.h>
50150+#include <linux/poll.h>
50151+#include <asm/uaccess.h>
50152+#include <xen/xenbus.h>
50153+#include <xen/interface/grant_table.h>
50154+#include <xen/gnttab.h>
50155+
50156+/* local data structures */
50157+struct data_exchange {
50158+ struct list_head pending_pak;
50159+ struct list_head current_pak;
50160+ unsigned int copied_so_far;
50161+ u8 has_opener:1;
50162+ u8 aborted:1;
50163+ rwlock_t pak_lock; // protects all of the previous fields
50164+ wait_queue_head_t wait_queue;
50165+};
50166+
50167+struct vtpm_resp_hdr {
50168+ uint32_t instance_no;
50169+ uint16_t tag_no;
50170+ uint32_t len_no;
50171+ uint32_t ordinal_no;
50172+} __attribute__ ((packed));
50173+
50174+struct packet {
50175+ struct list_head next;
50176+ unsigned int data_len;
50177+ u8 *data_buffer;
50178+ tpmif_t *tpmif;
50179+ u32 tpm_instance;
50180+ u8 req_tag;
50181+ u32 last_read;
50182+ u8 flags;
50183+ struct timer_list processing_timer;
50184+};
50185+
50186+enum {
50187+ PACKET_FLAG_DISCARD_RESPONSE = 1,
50188+};
50189+
50190+/* local variables */
50191+static struct data_exchange dataex;
50192+
50193+/* local function prototypes */
50194+static int _packet_write(struct packet *pak,
50195+ const char *data, size_t size, int userbuffer);
50196+static void processing_timeout(unsigned long ptr);
50197+static int packet_read_shmem(struct packet *pak,
50198+ tpmif_t * tpmif,
50199+ u32 offset,
50200+ char *buffer, int isuserbuffer, u32 left);
50201+static int vtpm_queue_packet(struct packet *pak);
50202+
50203+/***************************************************************
50204+ Buffer copying fo user and kernel space buffes.
50205+***************************************************************/
50206+static inline int copy_from_buffer(void *to,
50207+ const void *from, unsigned long size,
50208+ int isuserbuffer)
50209+{
50210+ if (isuserbuffer) {
50211+ if (copy_from_user(to, (void __user *)from, size))
50212+ return -EFAULT;
50213+ } else {
50214+ memcpy(to, from, size);
50215+ }
50216+ return 0;
50217+}
50218+
50219+static inline int copy_to_buffer(void *to,
50220+ const void *from, unsigned long size,
50221+ int isuserbuffer)
50222+{
50223+ if (isuserbuffer) {
50224+ if (copy_to_user((void __user *)to, from, size))
50225+ return -EFAULT;
50226+ } else {
50227+ memcpy(to, from, size);
50228+ }
50229+ return 0;
50230+}
50231+
50232+
50233+static void dataex_init(struct data_exchange *dataex)
50234+{
50235+ INIT_LIST_HEAD(&dataex->pending_pak);
50236+ INIT_LIST_HEAD(&dataex->current_pak);
50237+ dataex->has_opener = 0;
50238+ rwlock_init(&dataex->pak_lock);
50239+ init_waitqueue_head(&dataex->wait_queue);
50240+}
50241+
50242+/***************************************************************
50243+ Packet-related functions
50244+***************************************************************/
50245+
50246+static struct packet *packet_find_instance(struct list_head *head,
50247+ u32 tpm_instance)
50248+{
50249+ struct packet *pak;
50250+ struct list_head *p;
50251+
50252+ /*
50253+ * traverse the list of packets and return the first
50254+ * one with the given instance number
50255+ */
50256+ list_for_each(p, head) {
50257+ pak = list_entry(p, struct packet, next);
50258+
50259+ if (pak->tpm_instance == tpm_instance) {
50260+ return pak;
50261+ }
50262+ }
50263+ return NULL;
50264+}
50265+
50266+static struct packet *packet_find_packet(struct list_head *head, void *packet)
50267+{
50268+ struct packet *pak;
50269+ struct list_head *p;
50270+
50271+ /*
50272+ * traverse the list of packets and return the first
50273+ * one with the given instance number
50274+ */
50275+ list_for_each(p, head) {
50276+ pak = list_entry(p, struct packet, next);
50277+
50278+ if (pak == packet) {
50279+ return pak;
50280+ }
50281+ }
50282+ return NULL;
50283+}
50284+
50285+static struct packet *packet_alloc(tpmif_t * tpmif,
50286+ u32 size, u8 req_tag, u8 flags)
50287+{
50288+ struct packet *pak = NULL;
50289+ pak = kzalloc(sizeof (struct packet), GFP_ATOMIC);
50290+ if (NULL != pak) {
50291+ if (tpmif) {
50292+ pak->tpmif = tpmif;
50293+ pak->tpm_instance = tpmback_get_instance(tpmif->bi);
50294+ tpmif_get(tpmif);
50295+ }
50296+ pak->data_len = size;
50297+ pak->req_tag = req_tag;
50298+ pak->last_read = 0;
50299+ pak->flags = flags;
50300+
50301+ /*
50302+ * cannot do tpmif_get(tpmif); bad things happen
50303+ * on the last tpmif_put()
50304+ */
50305+ init_timer(&pak->processing_timer);
50306+ pak->processing_timer.function = processing_timeout;
50307+ pak->processing_timer.data = (unsigned long)pak;
50308+ }
50309+ return pak;
50310+}
50311+
50312+static void inline packet_reset(struct packet *pak)
50313+{
50314+ pak->last_read = 0;
50315+}
50316+
50317+static void packet_free(struct packet *pak)
50318+{
50319+ if (timer_pending(&pak->processing_timer)) {
50320+ BUG();
50321+ }
50322+
50323+ if (pak->tpmif)
50324+ tpmif_put(pak->tpmif);
50325+ kfree(pak->data_buffer);
50326+ /*
50327+ * cannot do tpmif_put(pak->tpmif); bad things happen
50328+ * on the last tpmif_put()
50329+ */
50330+ kfree(pak);
50331+}
50332+
50333+
50334+/*
50335+ * Write data to the shared memory and send it to the FE.
50336+ */
50337+static int packet_write(struct packet *pak,
50338+ const char *data, size_t size, int isuserbuffer)
50339+{
50340+ int rc = 0;
50341+
50342+ if (0 != (pak->flags & PACKET_FLAG_DISCARD_RESPONSE)) {
50343+ /* Don't send a respone to this packet. Just acknowledge it. */
50344+ rc = size;
50345+ } else {
50346+ rc = _packet_write(pak, data, size, isuserbuffer);
50347+ }
50348+
50349+ return rc;
50350+}
50351+
50352+int _packet_write(struct packet *pak,
50353+ const char *data, size_t size, int isuserbuffer)
50354+{
50355+ /*
50356+ * Write into the shared memory pages directly
50357+ * and send it to the front end.
50358+ */
50359+ tpmif_t *tpmif = pak->tpmif;
50360+ grant_handle_t handle;
50361+ int rc = 0;
50362+ unsigned int i = 0;
50363+ unsigned int offset = 0;
50364+
50365+ if (tpmif == NULL) {
50366+ return -EFAULT;
50367+ }
50368+
50369+ if (tpmif->status == DISCONNECTED) {
50370+ return size;
50371+ }
50372+
50373+ while (offset < size && i < TPMIF_TX_RING_SIZE) {
50374+ unsigned int tocopy;
50375+ struct gnttab_map_grant_ref map_op;
50376+ struct gnttab_unmap_grant_ref unmap_op;
50377+ tpmif_tx_request_t *tx;
50378+
50379+ tx = &tpmif->tx->ring[i].req;
50380+
50381+ if (0 == tx->addr) {
50382+ DPRINTK("ERROR: Buffer for outgoing packet NULL?! i=%d\n", i);
50383+ return 0;
50384+ }
50385+
50386+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
50387+ GNTMAP_host_map, tx->ref, tpmif->domid);
50388+
50389+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
50390+ &map_op, 1))) {
50391+ BUG();
50392+ }
50393+
50394+ handle = map_op.handle;
50395+
50396+ if (map_op.status) {
50397+ DPRINTK(" Grant table operation failure !\n");
50398+ return 0;
50399+ }
50400+
50401+ tocopy = min_t(size_t, size - offset, PAGE_SIZE);
50402+
50403+ if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) |
50404+ (tx->addr & ~PAGE_MASK)),
50405+ &data[offset], tocopy, isuserbuffer)) {
50406+ tpmif_put(tpmif);
50407+ return -EFAULT;
50408+ }
50409+ tx->size = tocopy;
50410+
50411+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
50412+ GNTMAP_host_map, handle);
50413+
50414+ if (unlikely
50415+ (HYPERVISOR_grant_table_op
50416+ (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
50417+ BUG();
50418+ }
50419+
50420+ offset += tocopy;
50421+ i++;
50422+ }
50423+
50424+ rc = offset;
50425+ DPRINTK("Notifying frontend via irq %d\n", tpmif->irq);
50426+ notify_remote_via_irq(tpmif->irq);
50427+
50428+ return rc;
50429+}
50430+
50431+/*
50432+ * Read data from the shared memory and copy it directly into the
50433+ * provided buffer. Advance the read_last indicator which tells
50434+ * how many bytes have already been read.
50435+ */
50436+static int packet_read(struct packet *pak, size_t numbytes,
50437+ char *buffer, size_t buffersize, int isuserbuffer)
50438+{
50439+ tpmif_t *tpmif = pak->tpmif;
50440+
50441+ /*
50442+ * Read 'numbytes' of data from the buffer. The first 4
50443+ * bytes are the instance number in network byte order,
50444+ * after that come the data from the shared memory buffer.
50445+ */
50446+ u32 to_copy;
50447+ u32 offset = 0;
50448+ u32 room_left = buffersize;
50449+
50450+ if (pak->last_read < 4) {
50451+ /*
50452+ * copy the instance number into the buffer
50453+ */
50454+ u32 instance_no = htonl(pak->tpm_instance);
50455+ u32 last_read = pak->last_read;
50456+
50457+ to_copy = min_t(size_t, 4 - last_read, numbytes);
50458+
50459+ if (copy_to_buffer(&buffer[0],
50460+ &(((u8 *) & instance_no)[last_read]),
50461+ to_copy, isuserbuffer)) {
50462+ return -EFAULT;
50463+ }
50464+
50465+ pak->last_read += to_copy;
50466+ offset += to_copy;
50467+ room_left -= to_copy;
50468+ }
50469+
50470+ /*
50471+ * If the packet has a data buffer appended, read from it...
50472+ */
50473+
50474+ if (room_left > 0) {
50475+ if (pak->data_buffer) {
50476+ u32 to_copy = min_t(u32, pak->data_len - offset, room_left);
50477+ u32 last_read = pak->last_read - 4;
50478+
50479+ if (copy_to_buffer(&buffer[offset],
50480+ &pak->data_buffer[last_read],
50481+ to_copy, isuserbuffer)) {
50482+ return -EFAULT;
50483+ }
50484+ pak->last_read += to_copy;
50485+ offset += to_copy;
50486+ } else {
50487+ offset = packet_read_shmem(pak,
50488+ tpmif,
50489+ offset,
50490+ buffer,
50491+ isuserbuffer, room_left);
50492+ }
50493+ }
50494+ return offset;
50495+}
50496+
50497+static int packet_read_shmem(struct packet *pak,
50498+ tpmif_t * tpmif,
50499+ u32 offset, char *buffer, int isuserbuffer,
50500+ u32 room_left)
50501+{
50502+ u32 last_read = pak->last_read - 4;
50503+ u32 i = (last_read / PAGE_SIZE);
50504+ u32 pg_offset = last_read & (PAGE_SIZE - 1);
50505+ u32 to_copy;
50506+ grant_handle_t handle;
50507+
50508+ tpmif_tx_request_t *tx;
50509+
50510+ tx = &tpmif->tx->ring[0].req;
50511+ /*
50512+ * Start copying data at the page with index 'index'
50513+ * and within that page at offset 'offset'.
50514+ * Copy a maximum of 'room_left' bytes.
50515+ */
50516+ to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
50517+ while (to_copy > 0) {
50518+ void *src;
50519+ struct gnttab_map_grant_ref map_op;
50520+ struct gnttab_unmap_grant_ref unmap_op;
50521+
50522+ tx = &tpmif->tx->ring[i].req;
50523+
50524+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
50525+ GNTMAP_host_map, tx->ref, tpmif->domid);
50526+
50527+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
50528+ &map_op, 1))) {
50529+ BUG();
50530+ }
50531+
50532+ if (map_op.status) {
50533+ DPRINTK(" Grant table operation failure !\n");
50534+ return -EFAULT;
50535+ }
50536+
50537+ handle = map_op.handle;
50538+
50539+ if (to_copy > tx->size) {
50540+ /*
50541+ * User requests more than what's available
50542+ */
50543+ to_copy = min_t(u32, tx->size, to_copy);
50544+ }
50545+
50546+ DPRINTK("Copying from mapped memory at %08lx\n",
50547+ (unsigned long)(idx_to_kaddr(tpmif, i) |
50548+ (tx->addr & ~PAGE_MASK)));
50549+
50550+ src = (void *)(idx_to_kaddr(tpmif, i) |
50551+ ((tx->addr & ~PAGE_MASK) + pg_offset));
50552+ if (copy_to_buffer(&buffer[offset],
50553+ src, to_copy, isuserbuffer)) {
50554+ return -EFAULT;
50555+ }
50556+
50557+ DPRINTK("Data from TPM-FE of domain %d are %d %d %d %d\n",
50558+ tpmif->domid, buffer[offset], buffer[offset + 1],
50559+ buffer[offset + 2], buffer[offset + 3]);
50560+
50561+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
50562+ GNTMAP_host_map, handle);
50563+
50564+ if (unlikely
50565+ (HYPERVISOR_grant_table_op
50566+ (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
50567+ BUG();
50568+ }
50569+
50570+ offset += to_copy;
50571+ pg_offset = 0;
50572+ last_read += to_copy;
50573+ room_left -= to_copy;
50574+
50575+ to_copy = min_t(u32, PAGE_SIZE, room_left);
50576+ i++;
50577+ } /* while (to_copy > 0) */
50578+ /*
50579+ * Adjust the last_read pointer
50580+ */
50581+ pak->last_read = last_read + 4;
50582+ return offset;
50583+}
50584+
50585+/* ============================================================
50586+ * The file layer for reading data from this device
50587+ * ============================================================
50588+ */
50589+static int vtpm_op_open(struct inode *inode, struct file *f)
50590+{
50591+ int rc = 0;
50592+ unsigned long flags;
50593+
50594+ write_lock_irqsave(&dataex.pak_lock, flags);
50595+ if (dataex.has_opener == 0) {
50596+ dataex.has_opener = 1;
50597+ } else {
50598+ rc = -EPERM;
50599+ }
50600+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50601+ return rc;
50602+}
50603+
50604+static ssize_t vtpm_op_read(struct file *file,
50605+ char __user * data, size_t size, loff_t * offset)
50606+{
50607+ int ret_size = -ENODATA;
50608+ struct packet *pak = NULL;
50609+ unsigned long flags;
50610+
50611+ write_lock_irqsave(&dataex.pak_lock, flags);
50612+ if (dataex.aborted) {
50613+ dataex.aborted = 0;
50614+ dataex.copied_so_far = 0;
50615+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50616+ return -EIO;
50617+ }
50618+
50619+ if (list_empty(&dataex.pending_pak)) {
50620+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50621+ wait_event_interruptible(dataex.wait_queue,
50622+ !list_empty(&dataex.pending_pak));
50623+ write_lock_irqsave(&dataex.pak_lock, flags);
50624+ dataex.copied_so_far = 0;
50625+ }
50626+
50627+ if (!list_empty(&dataex.pending_pak)) {
50628+ unsigned int left;
50629+
50630+ pak = list_entry(dataex.pending_pak.next, struct packet, next);
50631+ left = pak->data_len - dataex.copied_so_far;
50632+ list_del(&pak->next);
50633+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50634+
50635+ DPRINTK("size given by app: %d, available: %d\n", size, left);
50636+
50637+ ret_size = min_t(size_t, size, left);
50638+
50639+ ret_size = packet_read(pak, ret_size, data, size, 1);
50640+
50641+ write_lock_irqsave(&dataex.pak_lock, flags);
50642+
50643+ if (ret_size < 0) {
50644+ del_singleshot_timer_sync(&pak->processing_timer);
50645+ packet_free(pak);
50646+ dataex.copied_so_far = 0;
50647+ } else {
50648+ DPRINTK("Copied %d bytes to user buffer\n", ret_size);
50649+
50650+ dataex.copied_so_far += ret_size;
50651+ if (dataex.copied_so_far >= pak->data_len + 4) {
50652+ DPRINTK("All data from this packet given to app.\n");
50653+ /* All data given to app */
50654+
50655+ del_singleshot_timer_sync(&pak->
50656+ processing_timer);
50657+ list_add_tail(&pak->next, &dataex.current_pak);
50658+ /*
50659+ * The more fontends that are handled at the same time,
50660+ * the more time we give the TPM to process the request.
50661+ */
50662+ mod_timer(&pak->processing_timer,
50663+ jiffies + (num_frontends * 60 * HZ));
50664+ dataex.copied_so_far = 0;
50665+ } else {
50666+ list_add(&pak->next, &dataex.pending_pak);
50667+ }
50668+ }
50669+ }
50670+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50671+
50672+ DPRINTK("Returning result from read to app: %d\n", ret_size);
50673+
50674+ return ret_size;
50675+}
50676+
50677+/*
50678+ * Write operation - only works after a previous read operation!
50679+ */
50680+static ssize_t vtpm_op_write(struct file *file,
50681+ const char __user * data, size_t size,
50682+ loff_t * offset)
50683+{
50684+ struct packet *pak;
50685+ int rc = 0;
50686+ unsigned int off = 4;
50687+ unsigned long flags;
50688+ struct vtpm_resp_hdr vrh;
50689+
50690+ /*
50691+ * Minimum required packet size is:
50692+ * 4 bytes for instance number
50693+ * 2 bytes for tag
50694+ * 4 bytes for paramSize
50695+ * 4 bytes for the ordinal
50696+ * sum: 14 bytes
50697+ */
50698+ if (size < sizeof (vrh))
50699+ return -EFAULT;
50700+
50701+ if (copy_from_user(&vrh, data, sizeof (vrh)))
50702+ return -EFAULT;
50703+
50704+ /* malformed packet? */
50705+ if ((off + ntohl(vrh.len_no)) != size)
50706+ return -EFAULT;
50707+
50708+ write_lock_irqsave(&dataex.pak_lock, flags);
50709+ pak = packet_find_instance(&dataex.current_pak,
50710+ ntohl(vrh.instance_no));
50711+
50712+ if (pak == NULL) {
50713+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50714+ DPRINTK(KERN_ALERT "No associated packet! (inst=%d)\n",
50715+ ntohl(vrh.instance_no));
50716+ return -EFAULT;
50717+ }
50718+
50719+ del_singleshot_timer_sync(&pak->processing_timer);
50720+ list_del(&pak->next);
50721+
50722+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50723+
50724+ /*
50725+ * The first 'offset' bytes must be the instance number - skip them.
50726+ */
50727+ size -= off;
50728+
50729+ rc = packet_write(pak, &data[off], size, 1);
50730+
50731+ if (rc > 0) {
50732+ /* I neglected the first 4 bytes */
50733+ rc += off;
50734+ }
50735+ packet_free(pak);
50736+ return rc;
50737+}
50738+
50739+static int vtpm_op_release(struct inode *inode, struct file *file)
50740+{
50741+ unsigned long flags;
50742+
50743+ vtpm_release_packets(NULL, 1);
50744+ write_lock_irqsave(&dataex.pak_lock, flags);
50745+ dataex.has_opener = 0;
50746+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50747+ return 0;
50748+}
50749+
50750+static unsigned int vtpm_op_poll(struct file *file,
50751+ struct poll_table_struct *pts)
50752+{
50753+ unsigned int flags = POLLOUT | POLLWRNORM;
50754+
50755+ poll_wait(file, &dataex.wait_queue, pts);
50756+ if (!list_empty(&dataex.pending_pak)) {
50757+ flags |= POLLIN | POLLRDNORM;
50758+ }
50759+ return flags;
50760+}
50761+
50762+static const struct file_operations vtpm_ops = {
50763+ .owner = THIS_MODULE,
50764+ .llseek = no_llseek,
50765+ .open = vtpm_op_open,
50766+ .read = vtpm_op_read,
50767+ .write = vtpm_op_write,
50768+ .release = vtpm_op_release,
50769+ .poll = vtpm_op_poll,
50770+};
50771+
50772+static struct miscdevice vtpms_miscdevice = {
50773+ .minor = 225,
50774+ .name = "vtpm",
50775+ .fops = &vtpm_ops,
50776+};
50777+
50778+/***************************************************************
50779+ Utility functions
50780+***************************************************************/
50781+
50782+static int tpm_send_fail_message(struct packet *pak, u8 req_tag)
50783+{
50784+ int rc;
50785+ static const unsigned char tpm_error_message_fail[] = {
50786+ 0x00, 0x00,
50787+ 0x00, 0x00, 0x00, 0x0a,
50788+ 0x00, 0x00, 0x00, 0x09 /* TPM_FAIL */
50789+ };
50790+ unsigned char buffer[sizeof (tpm_error_message_fail)];
50791+
50792+ memcpy(buffer, tpm_error_message_fail,
50793+ sizeof (tpm_error_message_fail));
50794+ /*
50795+ * Insert the right response tag depending on the given tag
50796+ * All response tags are '+3' to the request tag.
50797+ */
50798+ buffer[1] = req_tag + 3;
50799+
50800+ /*
50801+ * Write the data to shared memory and notify the front-end
50802+ */
50803+ rc = packet_write(pak, buffer, sizeof (buffer), 0);
50804+
50805+ return rc;
50806+}
50807+
50808+static int _vtpm_release_packets(struct list_head *head,
50809+ tpmif_t * tpmif, int send_msgs)
50810+{
50811+ int aborted = 0;
50812+ int c = 0;
50813+ struct packet *pak;
50814+ struct list_head *pos, *tmp;
50815+
50816+ list_for_each_safe(pos, tmp, head) {
50817+ pak = list_entry(pos, struct packet, next);
50818+ c += 1;
50819+
50820+ if (tpmif == NULL || pak->tpmif == tpmif) {
50821+ int can_send = 0;
50822+
50823+ del_singleshot_timer_sync(&pak->processing_timer);
50824+ list_del(&pak->next);
50825+
50826+ if (pak->tpmif && pak->tpmif->status == CONNECTED) {
50827+ can_send = 1;
50828+ }
50829+
50830+ if (send_msgs && can_send) {
50831+ tpm_send_fail_message(pak, pak->req_tag);
50832+ }
50833+ packet_free(pak);
50834+ if (c == 1)
50835+ aborted = 1;
50836+ }
50837+ }
50838+ return aborted;
50839+}
50840+
50841+int vtpm_release_packets(tpmif_t * tpmif, int send_msgs)
50842+{
50843+ unsigned long flags;
50844+
50845+ write_lock_irqsave(&dataex.pak_lock, flags);
50846+
50847+ dataex.aborted = _vtpm_release_packets(&dataex.pending_pak,
50848+ tpmif,
50849+ send_msgs);
50850+ _vtpm_release_packets(&dataex.current_pak, tpmif, send_msgs);
50851+
50852+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50853+ return 0;
50854+}
50855+
50856+static int vtpm_queue_packet(struct packet *pak)
50857+{
50858+ int rc = 0;
50859+
50860+ if (dataex.has_opener) {
50861+ unsigned long flags;
50862+
50863+ write_lock_irqsave(&dataex.pak_lock, flags);
50864+ list_add_tail(&pak->next, &dataex.pending_pak);
50865+ /* give the TPM some time to pick up the request */
50866+ mod_timer(&pak->processing_timer, jiffies + (30 * HZ));
50867+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50868+
50869+ wake_up_interruptible(&dataex.wait_queue);
50870+ } else {
50871+ rc = -EFAULT;
50872+ }
50873+ return rc;
50874+}
50875+
50876+static int vtpm_receive(tpmif_t * tpmif, u32 size)
50877+{
50878+ int rc = 0;
50879+ unsigned char buffer[10];
50880+ __be32 *native_size;
50881+ struct packet *pak = packet_alloc(tpmif, size, 0, 0);
50882+
50883+ if (!pak)
50884+ return -ENOMEM;
50885+ /*
50886+ * Read 10 bytes from the received buffer to test its
50887+ * content for validity.
50888+ */
50889+ if (sizeof (buffer) != packet_read(pak,
50890+ sizeof (buffer), buffer,
50891+ sizeof (buffer), 0)) {
50892+ goto failexit;
50893+ }
50894+ /*
50895+ * Reset the packet read pointer so we can read all its
50896+ * contents again.
50897+ */
50898+ packet_reset(pak);
50899+
50900+ native_size = (__force __be32 *) (&buffer[4 + 2]);
50901+ /*
50902+ * Verify that the size of the packet is correct
50903+ * as indicated and that there's actually someone reading packets.
50904+ * The minimum size of the packet is '10' for tag, size indicator
50905+ * and ordinal.
50906+ */
50907+ if (size < 10 ||
50908+ be32_to_cpu(*native_size) != size ||
50909+ 0 == dataex.has_opener || tpmif->status != CONNECTED) {
50910+ rc = -EINVAL;
50911+ goto failexit;
50912+ } else {
50913+ rc = vtpm_queue_packet(pak);
50914+ if (rc < 0)
50915+ goto failexit;
50916+ }
50917+ return 0;
50918+
50919+ failexit:
50920+ if (pak) {
50921+ tpm_send_fail_message(pak, buffer[4 + 1]);
50922+ packet_free(pak);
50923+ }
50924+ return rc;
50925+}
50926+
50927+/*
50928+ * Timeout function that gets invoked when a packet has not been processed
50929+ * during the timeout period.
50930+ * The packet must be on a list when this function is invoked. This
50931+ * also means that once its taken off a list, the timer must be
50932+ * destroyed as well.
50933+ */
50934+static void processing_timeout(unsigned long ptr)
50935+{
50936+ struct packet *pak = (struct packet *)ptr;
50937+ unsigned long flags;
50938+
50939+ write_lock_irqsave(&dataex.pak_lock, flags);
50940+ /*
50941+ * The packet needs to be searched whether it
50942+ * is still on the list.
50943+ */
50944+ if (pak == packet_find_packet(&dataex.pending_pak, pak) ||
50945+ pak == packet_find_packet(&dataex.current_pak, pak)) {
50946+ if ((pak->flags & PACKET_FLAG_DISCARD_RESPONSE) == 0) {
50947+ tpm_send_fail_message(pak, pak->req_tag);
50948+ }
50949+ /* discard future responses */
50950+ pak->flags |= PACKET_FLAG_DISCARD_RESPONSE;
50951+ }
50952+
50953+ write_unlock_irqrestore(&dataex.pak_lock, flags);
50954+}
50955+
50956+static void tpm_tx_action(unsigned long unused);
50957+static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0);
50958+
50959+static struct list_head tpm_schedule_list;
50960+static spinlock_t tpm_schedule_list_lock;
50961+
50962+static inline void maybe_schedule_tx_action(void)
50963+{
50964+ smp_mb();
50965+ tasklet_schedule(&tpm_tx_tasklet);
50966+}
50967+
50968+static inline int __on_tpm_schedule_list(tpmif_t * tpmif)
50969+{
50970+ return tpmif->list.next != NULL;
50971+}
50972+
50973+static void remove_from_tpm_schedule_list(tpmif_t * tpmif)
50974+{
50975+ spin_lock_irq(&tpm_schedule_list_lock);
50976+ if (likely(__on_tpm_schedule_list(tpmif))) {
50977+ list_del(&tpmif->list);
50978+ tpmif->list.next = NULL;
50979+ tpmif_put(tpmif);
50980+ }
50981+ spin_unlock_irq(&tpm_schedule_list_lock);
50982+}
50983+
50984+static void add_to_tpm_schedule_list_tail(tpmif_t * tpmif)
50985+{
50986+ if (__on_tpm_schedule_list(tpmif))
50987+ return;
50988+
50989+ spin_lock_irq(&tpm_schedule_list_lock);
50990+ if (!__on_tpm_schedule_list(tpmif) && tpmif->active) {
50991+ list_add_tail(&tpmif->list, &tpm_schedule_list);
50992+ tpmif_get(tpmif);
50993+ }
50994+ spin_unlock_irq(&tpm_schedule_list_lock);
50995+}
50996+
50997+void tpmif_schedule_work(tpmif_t * tpmif)
50998+{
50999+ add_to_tpm_schedule_list_tail(tpmif);
51000+ maybe_schedule_tx_action();
51001+}
51002+
51003+void tpmif_deschedule_work(tpmif_t * tpmif)
51004+{
51005+ remove_from_tpm_schedule_list(tpmif);
51006+}
51007+
51008+static void tpm_tx_action(unsigned long unused)
51009+{
51010+ struct list_head *ent;
51011+ tpmif_t *tpmif;
51012+ tpmif_tx_request_t *tx;
51013+
51014+ DPRINTK("%s: Getting data from front-end(s)!\n", __FUNCTION__);
51015+
51016+ while (!list_empty(&tpm_schedule_list)) {
51017+ /* Get a tpmif from the list with work to do. */
51018+ ent = tpm_schedule_list.next;
51019+ tpmif = list_entry(ent, tpmif_t, list);
51020+ tpmif_get(tpmif);
51021+ remove_from_tpm_schedule_list(tpmif);
51022+
51023+ tx = &tpmif->tx->ring[0].req;
51024+
51025+ /* pass it up */
51026+ vtpm_receive(tpmif, tx->size);
51027+
51028+ tpmif_put(tpmif);
51029+ }
51030+}
51031+
51032+irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
51033+{
51034+ tpmif_t *tpmif = (tpmif_t *) dev_id;
51035+
51036+ add_to_tpm_schedule_list_tail(tpmif);
51037+ maybe_schedule_tx_action();
51038+ return IRQ_HANDLED;
51039+}
51040+
51041+static int __init tpmback_init(void)
51042+{
51043+ int rc;
51044+
51045+ if ((rc = misc_register(&vtpms_miscdevice)) != 0) {
51046+ printk(KERN_ALERT
51047+ "Could not register misc device for TPM BE.\n");
51048+ return rc;
51049+ }
51050+
51051+ dataex_init(&dataex);
51052+
51053+ spin_lock_init(&tpm_schedule_list_lock);
51054+ INIT_LIST_HEAD(&tpm_schedule_list);
51055+
51056+ tpmif_interface_init();
51057+ tpmif_xenbus_init();
51058+
51059+ printk(KERN_ALERT "Successfully initialized TPM backend driver.\n");
51060+
51061+ return 0;
51062+}
51063+
51064+module_init(tpmback_init);
51065+
51066+void __exit tpmback_exit(void)
51067+{
51068+ vtpm_release_packets(NULL, 0);
51069+ tpmif_xenbus_exit();
51070+ tpmif_interface_exit();
51071+ misc_deregister(&vtpms_miscdevice);
51072+}
51073+
51074+MODULE_LICENSE("Dual BSD/GPL");
51075Index: head-2008-11-25/drivers/xen/tpmback/xenbus.c
51076===================================================================
51077--- /dev/null 1970-01-01 00:00:00.000000000 +0000
51078+++ head-2008-11-25/drivers/xen/tpmback/xenbus.c 2008-03-06 08:54:32.000000000 +0100
51079@@ -0,0 +1,289 @@
51080+/* Xenbus code for tpmif backend
51081+ Copyright (C) 2005 IBM Corporation
51082+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
51083+
51084+ This program is free software; you can redistribute it and/or modify
51085+ it under the terms of the GNU General Public License as published by
51086+ the Free Software Foundation; either version 2 of the License, or
51087+ (at your option) any later version.
51088+
51089+ This program is distributed in the hope that it will be useful,
51090+ but WITHOUT ANY WARRANTY; without even the implied warranty of
51091+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
51092+ GNU General Public License for more details.
51093+
51094+ You should have received a copy of the GNU General Public License
51095+ along with this program; if not, write to the Free Software
51096+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
51097+*/
51098+#include <stdarg.h>
51099+#include <linux/module.h>
51100+#include <xen/xenbus.h>
51101+#include "common.h"
51102+
51103+struct backend_info
51104+{
51105+ struct xenbus_device *dev;
51106+
51107+ /* our communications channel */
51108+ tpmif_t *tpmif;
51109+
51110+ long int frontend_id;
51111+ long int instance; // instance of TPM
51112+ u8 is_instance_set;// whether instance number has been set
51113+
51114+ /* watch front end for changes */
51115+ struct xenbus_watch backend_watch;
51116+};
51117+
51118+static void maybe_connect(struct backend_info *be);
51119+static void connect(struct backend_info *be);
51120+static int connect_ring(struct backend_info *be);
51121+static void backend_changed(struct xenbus_watch *watch,
51122+ const char **vec, unsigned int len);
51123+static void frontend_changed(struct xenbus_device *dev,
51124+ enum xenbus_state frontend_state);
51125+
51126+long int tpmback_get_instance(struct backend_info *bi)
51127+{
51128+ long int res = -1;
51129+ if (bi && bi->is_instance_set)
51130+ res = bi->instance;
51131+ return res;
51132+}
51133+
51134+static int tpmback_remove(struct xenbus_device *dev)
51135+{
51136+ struct backend_info *be = dev->dev.driver_data;
51137+
51138+ if (!be) return 0;
51139+
51140+ if (be->backend_watch.node) {
51141+ unregister_xenbus_watch(&be->backend_watch);
51142+ kfree(be->backend_watch.node);
51143+ be->backend_watch.node = NULL;
51144+ }
51145+ if (be->tpmif) {
51146+ be->tpmif->bi = NULL;
51147+ vtpm_release_packets(be->tpmif, 0);
51148+ tpmif_put(be->tpmif);
51149+ be->tpmif = NULL;
51150+ }
51151+ kfree(be);
51152+ dev->dev.driver_data = NULL;
51153+ return 0;
51154+}
51155+
51156+static int tpmback_probe(struct xenbus_device *dev,
51157+ const struct xenbus_device_id *id)
51158+{
51159+ int err;
51160+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
51161+ GFP_KERNEL);
51162+
51163+ if (!be) {
51164+ xenbus_dev_fatal(dev, -ENOMEM,
51165+ "allocating backend structure");
51166+ return -ENOMEM;
51167+ }
51168+
51169+ be->is_instance_set = 0;
51170+ be->dev = dev;
51171+ dev->dev.driver_data = be;
51172+
51173+ err = xenbus_watch_path2(dev, dev->nodename,
51174+ "instance", &be->backend_watch,
51175+ backend_changed);
51176+ if (err) {
51177+ goto fail;
51178+ }
51179+
51180+ err = xenbus_switch_state(dev, XenbusStateInitWait);
51181+ if (err) {
51182+ goto fail;
51183+ }
51184+ return 0;
51185+fail:
51186+ tpmback_remove(dev);
51187+ return err;
51188+}
51189+
51190+
51191+static void backend_changed(struct xenbus_watch *watch,
51192+ const char **vec, unsigned int len)
51193+{
51194+ int err;
51195+ long instance;
51196+ struct backend_info *be
51197+ = container_of(watch, struct backend_info, backend_watch);
51198+ struct xenbus_device *dev = be->dev;
51199+
51200+ err = xenbus_scanf(XBT_NIL, dev->nodename,
51201+ "instance","%li", &instance);
51202+ if (XENBUS_EXIST_ERR(err)) {
51203+ return;
51204+ }
51205+
51206+ if (err != 1) {
51207+ xenbus_dev_fatal(dev, err, "reading instance");
51208+ return;
51209+ }
51210+
51211+ if (be->is_instance_set == 0) {
51212+ be->instance = instance;
51213+ be->is_instance_set = 1;
51214+ }
51215+}
51216+
51217+
51218+static void frontend_changed(struct xenbus_device *dev,
51219+ enum xenbus_state frontend_state)
51220+{
51221+ struct backend_info *be = dev->dev.driver_data;
51222+ int err;
51223+
51224+ switch (frontend_state) {
51225+ case XenbusStateInitialising:
51226+ case XenbusStateInitialised:
51227+ break;
51228+
51229+ case XenbusStateConnected:
51230+ err = connect_ring(be);
51231+ if (err) {
51232+ return;
51233+ }
51234+ maybe_connect(be);
51235+ break;
51236+
51237+ case XenbusStateClosing:
51238+ be->instance = -1;
51239+ xenbus_switch_state(dev, XenbusStateClosing);
51240+ break;
51241+
51242+ case XenbusStateUnknown: /* keep it here */
51243+ case XenbusStateClosed:
51244+ xenbus_switch_state(dev, XenbusStateClosed);
51245+ device_unregister(&be->dev->dev);
51246+ tpmback_remove(dev);
51247+ break;
51248+
51249+ default:
51250+ xenbus_dev_fatal(dev, -EINVAL,
51251+ "saw state %d at frontend",
51252+ frontend_state);
51253+ break;
51254+ }
51255+}
51256+
51257+
51258+
51259+static void maybe_connect(struct backend_info *be)
51260+{
51261+ if (be->tpmif == NULL || be->tpmif->status == CONNECTED)
51262+ return;
51263+
51264+ connect(be);
51265+}
51266+
51267+
51268+static void connect(struct backend_info *be)
51269+{
51270+ struct xenbus_transaction xbt;
51271+ int err;
51272+ struct xenbus_device *dev = be->dev;
51273+ unsigned long ready = 1;
51274+
51275+again:
51276+ err = xenbus_transaction_start(&xbt);
51277+ if (err) {
51278+ xenbus_dev_fatal(be->dev, err, "starting transaction");
51279+ return;
51280+ }
51281+
51282+ err = xenbus_printf(xbt, be->dev->nodename,
51283+ "ready", "%lu", ready);
51284+ if (err) {
51285+ xenbus_dev_fatal(be->dev, err, "writing 'ready'");
51286+ goto abort;
51287+ }
51288+
51289+ err = xenbus_transaction_end(xbt, 0);
51290+ if (err == -EAGAIN)
51291+ goto again;
51292+ if (err)
51293+ xenbus_dev_fatal(be->dev, err, "end of transaction");
51294+
51295+ err = xenbus_switch_state(dev, XenbusStateConnected);
51296+ if (!err)
51297+ be->tpmif->status = CONNECTED;
51298+ return;
51299+abort:
51300+ xenbus_transaction_end(xbt, 1);
51301+}
51302+
51303+
51304+static int connect_ring(struct backend_info *be)
51305+{
51306+ struct xenbus_device *dev = be->dev;
51307+ unsigned long ring_ref;
51308+ unsigned int evtchn;
51309+ int err;
51310+
51311+ err = xenbus_gather(XBT_NIL, dev->otherend,
51312+ "ring-ref", "%lu", &ring_ref,
51313+ "event-channel", "%u", &evtchn, NULL);
51314+ if (err) {
51315+ xenbus_dev_error(dev, err,
51316+ "reading %s/ring-ref and event-channel",
51317+ dev->otherend);
51318+ return err;
51319+ }
51320+
51321+ if (!be->tpmif) {
51322+ be->tpmif = tpmif_find(dev->otherend_id, be);
51323+ if (IS_ERR(be->tpmif)) {
51324+ err = PTR_ERR(be->tpmif);
51325+ be->tpmif = NULL;
51326+ xenbus_dev_fatal(dev,err,"creating vtpm interface");
51327+ return err;
51328+ }
51329+ }
51330+
51331+ if (be->tpmif != NULL) {
51332+ err = tpmif_map(be->tpmif, ring_ref, evtchn);
51333+ if (err) {
51334+ xenbus_dev_error(dev, err,
51335+ "mapping shared-frame %lu port %u",
51336+ ring_ref, evtchn);
51337+ return err;
51338+ }
51339+ }
51340+ return 0;
51341+}
51342+
51343+
51344+static const struct xenbus_device_id tpmback_ids[] = {
51345+ { "vtpm" },
51346+ { "" }
51347+};
51348+
51349+
51350+static struct xenbus_driver tpmback = {
51351+ .name = "vtpm",
51352+ .owner = THIS_MODULE,
51353+ .ids = tpmback_ids,
51354+ .probe = tpmback_probe,
51355+ .remove = tpmback_remove,
51356+ .otherend_changed = frontend_changed,
51357+};
51358+
51359+
51360+void tpmif_xenbus_init(void)
51361+{
51362+ xenbus_register_backend(&tpmback);
51363+}
51364+
51365+void tpmif_xenbus_exit(void)
51366+{
51367+ xenbus_unregister_driver(&tpmback);
51368+}
51369Index: head-2008-11-25/drivers/xen/util.c
51370===================================================================
51371--- /dev/null 1970-01-01 00:00:00.000000000 +0000
51372+++ head-2008-11-25/drivers/xen/util.c 2007-07-10 09:42:30.000000000 +0200
51373@@ -0,0 +1,65 @@
51374+#include <linux/mm.h>
51375+#include <linux/module.h>
51376+#include <linux/slab.h>
51377+#include <linux/vmalloc.h>
51378+#include <asm/uaccess.h>
51379+#include <xen/driver_util.h>
51380+
51381+struct class *get_xen_class(void)
51382+{
51383+ static struct class *xen_class;
51384+
51385+ if (xen_class)
51386+ return xen_class;
51387+
51388+ xen_class = class_create(THIS_MODULE, "xen");
51389+ if (IS_ERR(xen_class)) {
51390+ printk("Failed to create xen sysfs class.\n");
51391+ xen_class = NULL;
51392+ }
51393+
51394+ return xen_class;
51395+}
51396+EXPORT_SYMBOL_GPL(get_xen_class);
51397+
51398+#ifdef CONFIG_X86
51399+static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
51400+{
51401+ /* apply_to_page_range() does all the hard work. */
51402+ return 0;
51403+}
51404+
51405+struct vm_struct *alloc_vm_area(unsigned long size)
51406+{
51407+ struct vm_struct *area;
51408+
51409+ area = get_vm_area(size, VM_IOREMAP);
51410+ if (area == NULL)
51411+ return NULL;
51412+
51413+ /*
51414+ * This ensures that page tables are constructed for this region
51415+ * of kernel virtual address space and mapped into init_mm.
51416+ */
51417+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
51418+ area->size, f, NULL)) {
51419+ free_vm_area(area);
51420+ return NULL;
51421+ }
51422+
51423+ /* Map page directories into every address space. */
51424+ vmalloc_sync_all();
51425+
51426+ return area;
51427+}
51428+EXPORT_SYMBOL_GPL(alloc_vm_area);
51429+
51430+void free_vm_area(struct vm_struct *area)
51431+{
51432+ struct vm_struct *ret;
51433+ ret = remove_vm_area(area->addr);
51434+ BUG_ON(ret != area);
51435+ kfree(area);
51436+}
51437+EXPORT_SYMBOL_GPL(free_vm_area);
51438+#endif /* CONFIG_X86 */
51439Index: head-2008-11-25/drivers/xen/xenbus/xenbus_backend_client.c
51440===================================================================
51441--- /dev/null 1970-01-01 00:00:00.000000000 +0000
51442+++ head-2008-11-25/drivers/xen/xenbus/xenbus_backend_client.c 2007-06-12 13:13:45.000000000 +0200
51443@@ -0,0 +1,147 @@
51444+/******************************************************************************
51445+ * Backend-client-facing interface for the Xenbus driver. In other words, the
51446+ * interface between the Xenbus and the device-specific code in the backend
51447+ * driver.
51448+ *
51449+ * Copyright (C) 2005-2006 XenSource Ltd
51450+ *
51451+ * This program is free software; you can redistribute it and/or
51452+ * modify it under the terms of the GNU General Public License version 2
51453+ * as published by the Free Software Foundation; or, when distributed
51454+ * separately from the Linux kernel or incorporated into other
51455+ * software packages, subject to the following license:
51456+ *
51457+ * Permission is hereby granted, free of charge, to any person obtaining a copy
51458+ * of this source file (the "Software"), to deal in the Software without
51459+ * restriction, including without limitation the rights to use, copy, modify,
51460+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
51461+ * and to permit persons to whom the Software is furnished to do so, subject to
51462+ * the following conditions:
51463+ *
51464+ * The above copyright notice and this permission notice shall be included in
51465+ * all copies or substantial portions of the Software.
51466+ *
51467+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
51468+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51469+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
51470+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51471+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
51472+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
51473+ * IN THE SOFTWARE.
51474+ */
51475+
51476+#include <linux/err.h>
51477+#include <xen/gnttab.h>
51478+#include <xen/xenbus.h>
51479+#include <xen/driver_util.h>
51480+
51481+/* Based on Rusty Russell's skeleton driver's map_page */
51482+struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref)
51483+{
51484+ struct gnttab_map_grant_ref op;
51485+ struct vm_struct *area;
51486+
51487+ area = alloc_vm_area(PAGE_SIZE);
51488+ if (!area)
51489+ return ERR_PTR(-ENOMEM);
51490+
51491+ gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
51492+ gnt_ref, dev->otherend_id);
51493+
51494+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
51495+ BUG();
51496+
51497+ if (op.status != GNTST_okay) {
51498+ free_vm_area(area);
51499+ xenbus_dev_fatal(dev, op.status,
51500+ "mapping in shared page %d from domain %d",
51501+ gnt_ref, dev->otherend_id);
51502+ BUG_ON(!IS_ERR(ERR_PTR(op.status)));
51503+ return ERR_PTR(op.status);
51504+ }
51505+
51506+ /* Stuff the handle in an unused field */
51507+ area->phys_addr = (unsigned long)op.handle;
51508+
51509+ return area;
51510+}
51511+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
51512+
51513+
51514+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
51515+ grant_handle_t *handle, void *vaddr)
51516+{
51517+ struct gnttab_map_grant_ref op;
51518+
51519+ gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
51520+ gnt_ref, dev->otherend_id);
51521+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
51522+ BUG();
51523+
51524+ if (op.status != GNTST_okay) {
51525+ xenbus_dev_fatal(dev, op.status,
51526+ "mapping in shared page %d from domain %d",
51527+ gnt_ref, dev->otherend_id);
51528+ } else
51529+ *handle = op.handle;
51530+
51531+ return op.status;
51532+}
51533+EXPORT_SYMBOL_GPL(xenbus_map_ring);
51534+
51535+
51536+/* Based on Rusty Russell's skeleton driver's unmap_page */
51537+int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *area)
51538+{
51539+ struct gnttab_unmap_grant_ref op;
51540+
51541+ gnttab_set_unmap_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
51542+ (grant_handle_t)area->phys_addr);
51543+
51544+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
51545+ BUG();
51546+
51547+ if (op.status == GNTST_okay)
51548+ free_vm_area(area);
51549+ else
51550+ xenbus_dev_error(dev, op.status,
51551+ "unmapping page at handle %d error %d",
51552+ (int16_t)area->phys_addr, op.status);
51553+
51554+ return op.status;
51555+}
51556+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
51557+
51558+
51559+int xenbus_unmap_ring(struct xenbus_device *dev,
51560+ grant_handle_t handle, void *vaddr)
51561+{
51562+ struct gnttab_unmap_grant_ref op;
51563+
51564+ gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
51565+ handle);
51566+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
51567+ BUG();
51568+
51569+ if (op.status != GNTST_okay)
51570+ xenbus_dev_error(dev, op.status,
51571+ "unmapping page at handle %d error %d",
51572+ handle, op.status);
51573+
51574+ return op.status;
51575+}
51576+EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
51577+
51578+int xenbus_dev_is_online(struct xenbus_device *dev)
51579+{
51580+ int rc, val;
51581+
51582+ rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
51583+ if (rc != 1)
51584+ val = 0; /* no online node present */
51585+
51586+ return val;
51587+}
51588+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
51589+
51590+MODULE_LICENSE("Dual BSD/GPL");
51591Index: head-2008-11-25/drivers/xen/xenbus/xenbus_dev.c
51592===================================================================
51593--- /dev/null 1970-01-01 00:00:00.000000000 +0000
51594+++ head-2008-11-25/drivers/xen/xenbus/xenbus_dev.c 2008-07-21 11:00:33.000000000 +0200
51595@@ -0,0 +1,408 @@
51596+/*
51597+ * xenbus_dev.c
51598+ *
51599+ * Driver giving user-space access to the kernel's xenbus connection
51600+ * to xenstore.
51601+ *
51602+ * Copyright (c) 2005, Christian Limpach
51603+ * Copyright (c) 2005, Rusty Russell, IBM Corporation
51604+ *
51605+ * This program is free software; you can redistribute it and/or
51606+ * modify it under the terms of the GNU General Public License version 2
51607+ * as published by the Free Software Foundation; or, when distributed
51608+ * separately from the Linux kernel or incorporated into other
51609+ * software packages, subject to the following license:
51610+ *
51611+ * Permission is hereby granted, free of charge, to any person obtaining a copy
51612+ * of this source file (the "Software"), to deal in the Software without
51613+ * restriction, including without limitation the rights to use, copy, modify,
51614+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
51615+ * and to permit persons to whom the Software is furnished to do so, subject to
51616+ * the following conditions:
51617+ *
51618+ * The above copyright notice and this permission notice shall be included in
51619+ * all copies or substantial portions of the Software.
51620+ *
51621+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
51622+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51623+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
51624+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51625+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
51626+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
51627+ * IN THE SOFTWARE.
51628+ */
51629+
51630+#include <linux/kernel.h>
51631+#include <linux/errno.h>
51632+#include <linux/uio.h>
51633+#include <linux/notifier.h>
51634+#include <linux/wait.h>
51635+#include <linux/fs.h>
51636+#include <linux/poll.h>
51637+#include <linux/mutex.h>
51638+
51639+#include "xenbus_comms.h"
51640+
51641+#include <asm/uaccess.h>
51642+#include <asm/hypervisor.h>
51643+#include <xen/xenbus.h>
51644+#include <xen/xen_proc.h>
51645+#include <asm/hypervisor.h>
51646+
51647+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
51648+#include <xen/platform-compat.h>
51649+#endif
51650+
51651+struct xenbus_dev_transaction {
51652+ struct list_head list;
51653+ struct xenbus_transaction handle;
51654+};
51655+
51656+struct read_buffer {
51657+ struct list_head list;
51658+ unsigned int cons;
51659+ unsigned int len;
51660+ char msg[];
51661+};
51662+
51663+struct xenbus_dev_data {
51664+ /* In-progress transaction. */
51665+ struct list_head transactions;
51666+
51667+ /* Active watches. */
51668+ struct list_head watches;
51669+
51670+ /* Partial request. */
51671+ unsigned int len;
51672+ union {
51673+ struct xsd_sockmsg msg;
51674+ char buffer[PAGE_SIZE];
51675+ } u;
51676+
51677+ /* Response queue. */
51678+ struct list_head read_buffers;
51679+ wait_queue_head_t read_waitq;
51680+
51681+ struct mutex reply_mutex;
51682+};
51683+
51684+static struct proc_dir_entry *xenbus_dev_intf;
51685+
51686+static ssize_t xenbus_dev_read(struct file *filp,
51687+ char __user *ubuf,
51688+ size_t len, loff_t *ppos)
51689+{
51690+ struct xenbus_dev_data *u = filp->private_data;
51691+ struct read_buffer *rb;
51692+ int i, ret;
51693+
51694+ mutex_lock(&u->reply_mutex);
51695+ while (list_empty(&u->read_buffers)) {
51696+ mutex_unlock(&u->reply_mutex);
51697+ ret = wait_event_interruptible(u->read_waitq,
51698+ !list_empty(&u->read_buffers));
51699+ if (ret)
51700+ return ret;
51701+ mutex_lock(&u->reply_mutex);
51702+ }
51703+
51704+ rb = list_entry(u->read_buffers.next, struct read_buffer, list);
51705+ for (i = 0; i < len;) {
51706+ put_user(rb->msg[rb->cons], ubuf + i);
51707+ i++;
51708+ rb->cons++;
51709+ if (rb->cons == rb->len) {
51710+ list_del(&rb->list);
51711+ kfree(rb);
51712+ if (list_empty(&u->read_buffers))
51713+ break;
51714+ rb = list_entry(u->read_buffers.next,
51715+ struct read_buffer, list);
51716+ }
51717+ }
51718+ mutex_unlock(&u->reply_mutex);
51719+
51720+ return i;
51721+}
51722+
51723+static void queue_reply(struct xenbus_dev_data *u,
51724+ char *data, unsigned int len)
51725+{
51726+ struct read_buffer *rb;
51727+
51728+ if (len == 0)
51729+ return;
51730+
51731+ rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
51732+ BUG_ON(rb == NULL);
51733+
51734+ rb->cons = 0;
51735+ rb->len = len;
51736+
51737+ memcpy(rb->msg, data, len);
51738+
51739+ list_add_tail(&rb->list, &u->read_buffers);
51740+
51741+ wake_up(&u->read_waitq);
51742+}
51743+
51744+struct watch_adapter
51745+{
51746+ struct list_head list;
51747+ struct xenbus_watch watch;
51748+ struct xenbus_dev_data *dev_data;
51749+ char *token;
51750+};
51751+
51752+static void free_watch_adapter (struct watch_adapter *watch)
51753+{
51754+ kfree(watch->watch.node);
51755+ kfree(watch->token);
51756+ kfree(watch);
51757+}
51758+
51759+static void watch_fired(struct xenbus_watch *watch,
51760+ const char **vec,
51761+ unsigned int len)
51762+{
51763+ struct watch_adapter *adap =
51764+ container_of(watch, struct watch_adapter, watch);
51765+ struct xsd_sockmsg hdr;
51766+ const char *path, *token;
51767+ int path_len, tok_len, body_len, data_len = 0;
51768+
51769+ path = vec[XS_WATCH_PATH];
51770+ token = adap->token;
51771+
51772+ path_len = strlen(path) + 1;
51773+ tok_len = strlen(token) + 1;
51774+ if (len > 2)
51775+ data_len = vec[len] - vec[2] + 1;
51776+ body_len = path_len + tok_len + data_len;
51777+
51778+ hdr.type = XS_WATCH_EVENT;
51779+ hdr.len = body_len;
51780+
51781+ mutex_lock(&adap->dev_data->reply_mutex);
51782+ queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr));
51783+ queue_reply(adap->dev_data, (char *)path, path_len);
51784+ queue_reply(adap->dev_data, (char *)token, tok_len);
51785+ if (len > 2)
51786+ queue_reply(adap->dev_data, (char *)vec[2], data_len);
51787+ mutex_unlock(&adap->dev_data->reply_mutex);
51788+}
51789+
51790+static LIST_HEAD(watch_list);
51791+
51792+static ssize_t xenbus_dev_write(struct file *filp,
51793+ const char __user *ubuf,
51794+ size_t len, loff_t *ppos)
51795+{
51796+ struct xenbus_dev_data *u = filp->private_data;
51797+ struct xenbus_dev_transaction *trans = NULL;
51798+ uint32_t msg_type;
51799+ void *reply;
51800+ char *path, *token;
51801+ struct watch_adapter *watch, *tmp_watch;
51802+ int err, rc = len;
51803+
51804+ if ((len + u->len) > sizeof(u->u.buffer)) {
51805+ rc = -EINVAL;
51806+ goto out;
51807+ }
51808+
51809+ if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) {
51810+ rc = -EFAULT;
51811+ goto out;
51812+ }
51813+
51814+ u->len += len;
51815+ if ((u->len < sizeof(u->u.msg)) ||
51816+ (u->len < (sizeof(u->u.msg) + u->u.msg.len)))
51817+ return rc;
51818+
51819+ msg_type = u->u.msg.type;
51820+
51821+ switch (msg_type) {
51822+ case XS_TRANSACTION_START:
51823+ case XS_TRANSACTION_END:
51824+ case XS_DIRECTORY:
51825+ case XS_READ:
51826+ case XS_GET_PERMS:
51827+ case XS_RELEASE:
51828+ case XS_GET_DOMAIN_PATH:
51829+ case XS_WRITE:
51830+ case XS_MKDIR:
51831+ case XS_RM:
51832+ case XS_SET_PERMS:
51833+ if (msg_type == XS_TRANSACTION_START) {
51834+ trans = kmalloc(sizeof(*trans), GFP_KERNEL);
51835+ if (!trans) {
51836+ rc = -ENOMEM;
51837+ goto out;
51838+ }
51839+ }
51840+
51841+ reply = xenbus_dev_request_and_reply(&u->u.msg);
51842+ if (IS_ERR(reply)) {
51843+ kfree(trans);
51844+ rc = PTR_ERR(reply);
51845+ goto out;
51846+ }
51847+
51848+ if (msg_type == XS_TRANSACTION_START) {
51849+ trans->handle.id = simple_strtoul(reply, NULL, 0);
51850+ list_add(&trans->list, &u->transactions);
51851+ } else if (msg_type == XS_TRANSACTION_END) {
51852+ list_for_each_entry(trans, &u->transactions, list)
51853+ if (trans->handle.id == u->u.msg.tx_id)
51854+ break;
51855+ BUG_ON(&trans->list == &u->transactions);
51856+ list_del(&trans->list);
51857+ kfree(trans);
51858+ }
51859+ mutex_lock(&u->reply_mutex);
51860+ queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
51861+ queue_reply(u, (char *)reply, u->u.msg.len);
51862+ mutex_unlock(&u->reply_mutex);
51863+ kfree(reply);
51864+ break;
51865+
51866+ case XS_WATCH:
51867+ case XS_UNWATCH: {
51868+ static const char *XS_RESP = "OK";
51869+ struct xsd_sockmsg hdr;
51870+
51871+ path = u->u.buffer + sizeof(u->u.msg);
51872+ token = memchr(path, 0, u->u.msg.len);
51873+ if (token == NULL) {
51874+ rc = -EILSEQ;
51875+ goto out;
51876+ }
51877+ token++;
51878+
51879+ if (msg_type == XS_WATCH) {
51880+ watch = kzalloc(sizeof(*watch), GFP_KERNEL);
51881+ watch->watch.node = kmalloc(strlen(path)+1,
51882+ GFP_KERNEL);
51883+ strcpy((char *)watch->watch.node, path);
51884+ watch->watch.callback = watch_fired;
51885+ watch->token = kmalloc(strlen(token)+1, GFP_KERNEL);
51886+ strcpy(watch->token, token);
51887+ watch->dev_data = u;
51888+
51889+ err = register_xenbus_watch(&watch->watch);
51890+ if (err) {
51891+ free_watch_adapter(watch);
51892+ rc = err;
51893+ goto out;
51894+ }
51895+
51896+ list_add(&watch->list, &u->watches);
51897+ } else {
51898+ list_for_each_entry_safe(watch, tmp_watch,
51899+ &u->watches, list) {
51900+ if (!strcmp(watch->token, token) &&
51901+ !strcmp(watch->watch.node, path))
51902+ {
51903+ unregister_xenbus_watch(&watch->watch);
51904+ list_del(&watch->list);
51905+ free_watch_adapter(watch);
51906+ break;
51907+ }
51908+ }
51909+ }
51910+
51911+ hdr.type = msg_type;
51912+ hdr.len = strlen(XS_RESP) + 1;
51913+ mutex_lock(&u->reply_mutex);
51914+ queue_reply(u, (char *)&hdr, sizeof(hdr));
51915+ queue_reply(u, (char *)XS_RESP, hdr.len);
51916+ mutex_unlock(&u->reply_mutex);
51917+ break;
51918+ }
51919+
51920+ default:
51921+ rc = -EINVAL;
51922+ break;
51923+ }
51924+
51925+ out:
51926+ u->len = 0;
51927+ return rc;
51928+}
51929+
51930+static int xenbus_dev_open(struct inode *inode, struct file *filp)
51931+{
51932+ struct xenbus_dev_data *u;
51933+
51934+ if (xen_store_evtchn == 0)
51935+ return -ENOENT;
51936+
51937+ nonseekable_open(inode, filp);
51938+
51939+ u = kzalloc(sizeof(*u), GFP_KERNEL);
51940+ if (u == NULL)
51941+ return -ENOMEM;
51942+
51943+ INIT_LIST_HEAD(&u->transactions);
51944+ INIT_LIST_HEAD(&u->watches);
51945+ INIT_LIST_HEAD(&u->read_buffers);
51946+ init_waitqueue_head(&u->read_waitq);
51947+
51948+ mutex_init(&u->reply_mutex);
51949+
51950+ filp->private_data = u;
51951+
51952+ return 0;
51953+}
51954+
51955+static int xenbus_dev_release(struct inode *inode, struct file *filp)
51956+{
51957+ struct xenbus_dev_data *u = filp->private_data;
51958+ struct xenbus_dev_transaction *trans, *tmp;
51959+ struct watch_adapter *watch, *tmp_watch;
51960+
51961+ list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
51962+ xenbus_transaction_end(trans->handle, 1);
51963+ list_del(&trans->list);
51964+ kfree(trans);
51965+ }
51966+
51967+ list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
51968+ unregister_xenbus_watch(&watch->watch);
51969+ list_del(&watch->list);
51970+ free_watch_adapter(watch);
51971+ }
51972+
51973+ kfree(u);
51974+
51975+ return 0;
51976+}
51977+
51978+static unsigned int xenbus_dev_poll(struct file *file, poll_table *wait)
51979+{
51980+ struct xenbus_dev_data *u = file->private_data;
51981+
51982+ poll_wait(file, &u->read_waitq, wait);
51983+ if (!list_empty(&u->read_buffers))
51984+ return POLLIN | POLLRDNORM;
51985+ return 0;
51986+}
51987+
51988+static const struct file_operations xenbus_dev_file_ops = {
51989+ .read = xenbus_dev_read,
51990+ .write = xenbus_dev_write,
51991+ .open = xenbus_dev_open,
51992+ .release = xenbus_dev_release,
51993+ .poll = xenbus_dev_poll,
51994+};
51995+
51996+int xenbus_dev_init(void)
51997+{
51998+ xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
51999+ if (xenbus_dev_intf)
52000+ xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops;
52001+
52002+ return 0;
52003+}
52004Index: head-2008-11-25/drivers/xen/xenbus/xenbus_probe_backend.c
52005===================================================================
52006--- /dev/null 1970-01-01 00:00:00.000000000 +0000
52007+++ head-2008-11-25/drivers/xen/xenbus/xenbus_probe_backend.c 2008-01-21 11:15:26.000000000 +0100
52008@@ -0,0 +1,292 @@
52009+/******************************************************************************
52010+ * Talks to Xen Store to figure out what devices we have (backend half).
52011+ *
52012+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
52013+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
52014+ * Copyright (C) 2005, 2006 XenSource Ltd
52015+ * Copyright (C) 2007 Solarflare Communications, Inc.
52016+ *
52017+ * This program is free software; you can redistribute it and/or
52018+ * modify it under the terms of the GNU General Public License version 2
52019+ * as published by the Free Software Foundation; or, when distributed
52020+ * separately from the Linux kernel or incorporated into other
52021+ * software packages, subject to the following license:
52022+ *
52023+ * Permission is hereby granted, free of charge, to any person obtaining a copy
52024+ * of this source file (the "Software"), to deal in the Software without
52025+ * restriction, including without limitation the rights to use, copy, modify,
52026+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
52027+ * and to permit persons to whom the Software is furnished to do so, subject to
52028+ * the following conditions:
52029+ *
52030+ * The above copyright notice and this permission notice shall be included in
52031+ * all copies or substantial portions of the Software.
52032+ *
52033+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52034+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
52035+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52036+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
52037+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
52038+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
52039+ * IN THE SOFTWARE.
52040+ */
52041+
52042+#define DPRINTK(fmt, args...) \
52043+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
52044+ __FUNCTION__, __LINE__, ##args)
52045+
52046+#include <linux/kernel.h>
52047+#include <linux/err.h>
52048+#include <linux/string.h>
52049+#include <linux/ctype.h>
52050+#include <linux/fcntl.h>
52051+#include <linux/mm.h>
52052+#include <linux/notifier.h>
52053+
52054+#include <asm/io.h>
52055+#include <asm/page.h>
52056+#include <asm/maddr.h>
52057+#include <asm/pgtable.h>
52058+#include <asm/hypervisor.h>
52059+#include <xen/xenbus.h>
52060+#include <xen/xen_proc.h>
52061+#include <xen/evtchn.h>
52062+#include <xen/features.h>
52063+
52064+#include "xenbus_comms.h"
52065+#include "xenbus_probe.h"
52066+
52067+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
52068+#include <xen/platform-compat.h>
52069+#endif
52070+
52071+static int xenbus_uevent_backend(struct device *dev, char **envp,
52072+ int num_envp, char *buffer, int buffer_size);
52073+static int xenbus_probe_backend(const char *type, const char *domid);
52074+
52075+extern int read_otherend_details(struct xenbus_device *xendev,
52076+ char *id_node, char *path_node);
52077+
52078+static int read_frontend_details(struct xenbus_device *xendev)
52079+{
52080+ return read_otherend_details(xendev, "frontend-id", "frontend");
52081+}
52082+
52083+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
52084+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
52085+{
52086+ int domid, err;
52087+ const char *devid, *type, *frontend;
52088+ unsigned int typelen;
52089+
52090+ type = strchr(nodename, '/');
52091+ if (!type)
52092+ return -EINVAL;
52093+ type++;
52094+ typelen = strcspn(type, "/");
52095+ if (!typelen || type[typelen] != '/')
52096+ return -EINVAL;
52097+
52098+ devid = strrchr(nodename, '/') + 1;
52099+
52100+ err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
52101+ "frontend", NULL, &frontend,
52102+ NULL);
52103+ if (err)
52104+ return err;
52105+ if (strlen(frontend) == 0)
52106+ err = -ERANGE;
52107+ if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
52108+ err = -ENOENT;
52109+ kfree(frontend);
52110+
52111+ if (err)
52112+ return err;
52113+
52114+ if (snprintf(bus_id, BUS_ID_SIZE,
52115+ "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
52116+ return -ENOSPC;
52117+ return 0;
52118+}
52119+
52120+static struct xen_bus_type xenbus_backend = {
52121+ .root = "backend",
52122+ .levels = 3, /* backend/type/<frontend>/<id> */
52123+ .get_bus_id = backend_bus_id,
52124+ .probe = xenbus_probe_backend,
52125+ .error = -ENODEV,
52126+ .bus = {
52127+ .name = "xen-backend",
52128+ .match = xenbus_match,
52129+ .probe = xenbus_dev_probe,
52130+ .remove = xenbus_dev_remove,
52131+// .shutdown = xenbus_dev_shutdown,
52132+ .uevent = xenbus_uevent_backend,
52133+ },
52134+ .dev = {
52135+ .bus_id = "xen-backend",
52136+ },
52137+};
52138+
52139+static int xenbus_uevent_backend(struct device *dev, char **envp,
52140+ int num_envp, char *buffer, int buffer_size)
52141+{
52142+ struct xenbus_device *xdev;
52143+ struct xenbus_driver *drv;
52144+ int i = 0;
52145+ int length = 0;
52146+
52147+ DPRINTK("");
52148+
52149+ if (dev == NULL)
52150+ return -ENODEV;
52151+
52152+ xdev = to_xenbus_device(dev);
52153+ if (xdev == NULL)
52154+ return -ENODEV;
52155+
52156+ /* stuff we want to pass to /sbin/hotplug */
52157+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
52158+ "XENBUS_TYPE=%s", xdev->devicetype);
52159+
52160+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
52161+ "XENBUS_PATH=%s", xdev->nodename);
52162+
52163+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
52164+ "XENBUS_BASE_PATH=%s", xenbus_backend.root);
52165+
52166+ /* terminate, set to next free slot, shrink available space */
52167+ envp[i] = NULL;
52168+ envp = &envp[i];
52169+ num_envp -= i;
52170+ buffer = &buffer[length];
52171+ buffer_size -= length;
52172+
52173+ if (dev->driver) {
52174+ drv = to_xenbus_driver(dev->driver);
52175+ if (drv && drv->uevent)
52176+ return drv->uevent(xdev, envp, num_envp, buffer,
52177+ buffer_size);
52178+ }
52179+
52180+ return 0;
52181+}
52182+
52183+int xenbus_register_backend(struct xenbus_driver *drv)
52184+{
52185+ drv->read_otherend_details = read_frontend_details;
52186+
52187+ return xenbus_register_driver_common(drv, &xenbus_backend);
52188+}
52189+EXPORT_SYMBOL_GPL(xenbus_register_backend);
52190+
52191+/* backend/<typename>/<frontend-uuid>/<name> */
52192+static int xenbus_probe_backend_unit(const char *dir,
52193+ const char *type,
52194+ const char *name)
52195+{
52196+ char *nodename;
52197+ int err;
52198+
52199+ nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
52200+ if (!nodename)
52201+ return -ENOMEM;
52202+
52203+ DPRINTK("%s\n", nodename);
52204+
52205+ err = xenbus_probe_node(&xenbus_backend, type, nodename);
52206+ kfree(nodename);
52207+ return err;
52208+}
52209+
52210+/* backend/<typename>/<frontend-domid> */
52211+static int xenbus_probe_backend(const char *type, const char *domid)
52212+{
52213+ char *nodename;
52214+ int err = 0;
52215+ char **dir;
52216+ unsigned int i, dir_n = 0;
52217+
52218+ DPRINTK("");
52219+
52220+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid);
52221+ if (!nodename)
52222+ return -ENOMEM;
52223+
52224+ dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
52225+ if (IS_ERR(dir)) {
52226+ kfree(nodename);
52227+ return PTR_ERR(dir);
52228+ }
52229+
52230+ for (i = 0; i < dir_n; i++) {
52231+ err = xenbus_probe_backend_unit(nodename, type, dir[i]);
52232+ if (err)
52233+ break;
52234+ }
52235+ kfree(dir);
52236+ kfree(nodename);
52237+ return err;
52238+}
52239+
52240+static void backend_changed(struct xenbus_watch *watch,
52241+ const char **vec, unsigned int len)
52242+{
52243+ DPRINTK("");
52244+
52245+ dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
52246+}
52247+
52248+static struct xenbus_watch be_watch = {
52249+ .node = "backend",
52250+ .callback = backend_changed,
52251+};
52252+
52253+void xenbus_backend_suspend(int (*fn)(struct device *, void *))
52254+{
52255+ DPRINTK("");
52256+ if (!xenbus_backend.error)
52257+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
52258+}
52259+
52260+void xenbus_backend_resume(int (*fn)(struct device *, void *))
52261+{
52262+ DPRINTK("");
52263+ if (!xenbus_backend.error)
52264+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
52265+}
52266+
52267+void xenbus_backend_probe_and_watch(void)
52268+{
52269+ xenbus_probe_devices(&xenbus_backend);
52270+ register_xenbus_watch(&be_watch);
52271+}
52272+
52273+void xenbus_backend_bus_register(void)
52274+{
52275+ xenbus_backend.error = bus_register(&xenbus_backend.bus);
52276+ if (xenbus_backend.error)
52277+ printk(KERN_WARNING
52278+ "XENBUS: Error registering backend bus: %i\n",
52279+ xenbus_backend.error);
52280+}
52281+
52282+void xenbus_backend_device_register(void)
52283+{
52284+ if (xenbus_backend.error)
52285+ return;
52286+
52287+ xenbus_backend.error = device_register(&xenbus_backend.dev);
52288+ if (xenbus_backend.error) {
52289+ bus_unregister(&xenbus_backend.bus);
52290+ printk(KERN_WARNING
52291+ "XENBUS: Error registering backend device: %i\n",
52292+ xenbus_backend.error);
52293+ }
52294+}
52295+
52296+int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *))
52297+{
52298+ return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn);
52299+}
52300+EXPORT_SYMBOL_GPL(xenbus_for_each_backend);
52301Index: head-2008-11-25/drivers/xen/xenoprof/xenoprofile.c
52302===================================================================
52303--- /dev/null 1970-01-01 00:00:00.000000000 +0000
52304+++ head-2008-11-25/drivers/xen/xenoprof/xenoprofile.c 2008-09-15 13:40:15.000000000 +0200
52305@@ -0,0 +1,545 @@
52306+/**
52307+ * @file xenoprofile.c
52308+ *
52309+ * @remark Copyright 2002 OProfile authors
52310+ * @remark Read the file COPYING
52311+ *
52312+ * @author John Levon <levon@movementarian.org>
52313+ *
52314+ * Modified by Aravind Menon and Jose Renato Santos for Xen
52315+ * These modifications are:
52316+ * Copyright (C) 2005 Hewlett-Packard Co.
52317+ *
52318+ * Separated out arch-generic part
52319+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
52320+ * VA Linux Systems Japan K.K.
52321+ */
52322+
52323+#include <linux/init.h>
52324+#include <linux/notifier.h>
52325+#include <linux/smp.h>
52326+#include <linux/oprofile.h>
52327+#include <linux/sysdev.h>
52328+#include <linux/slab.h>
52329+#include <linux/interrupt.h>
52330+#include <linux/vmalloc.h>
52331+#include <asm/pgtable.h>
52332+#include <xen/evtchn.h>
52333+#include <xen/xenoprof.h>
52334+#include <xen/driver_util.h>
52335+#include <xen/interface/xen.h>
52336+#include <xen/interface/xenoprof.h>
52337+#include "../../../drivers/oprofile/cpu_buffer.h"
52338+#include "../../../drivers/oprofile/event_buffer.h"
52339+
52340+#define MAX_XENOPROF_SAMPLES 16
52341+
52342+/* sample buffers shared with Xen */
52343+static xenoprof_buf_t *xenoprof_buf[MAX_VIRT_CPUS];
52344+/* Shared buffer area */
52345+static struct xenoprof_shared_buffer shared_buffer;
52346+
52347+/* Passive sample buffers shared with Xen */
52348+static xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
52349+/* Passive shared buffer area */
52350+static struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS];
52351+
52352+static int xenoprof_start(void);
52353+static void xenoprof_stop(void);
52354+
52355+static int xenoprof_enabled = 0;
52356+static int xenoprof_is_primary = 0;
52357+static int active_defined;
52358+
52359+extern unsigned long backtrace_depth;
52360+
52361+/* Number of buffers in shared area (one per VCPU) */
52362+static int nbuf;
52363+/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
52364+static int ovf_irq[NR_CPUS];
52365+/* cpu model type string - copied from Xen on XENOPROF_init command */
52366+static char cpu_type[XENOPROF_CPU_TYPE_SIZE];
52367+
52368+#ifdef CONFIG_PM
52369+
52370+static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
52371+{
52372+ if (xenoprof_enabled == 1)
52373+ xenoprof_stop();
52374+ return 0;
52375+}
52376+
52377+
52378+static int xenoprof_resume(struct sys_device * dev)
52379+{
52380+ if (xenoprof_enabled == 1)
52381+ xenoprof_start();
52382+ return 0;
52383+}
52384+
52385+
52386+static struct sysdev_class oprofile_sysclass = {
52387+ set_kset_name("oprofile"),
52388+ .resume = xenoprof_resume,
52389+ .suspend = xenoprof_suspend
52390+};
52391+
52392+
52393+static struct sys_device device_oprofile = {
52394+ .id = 0,
52395+ .cls = &oprofile_sysclass,
52396+};
52397+
52398+
52399+static int __init init_driverfs(void)
52400+{
52401+ int error;
52402+ if (!(error = sysdev_class_register(&oprofile_sysclass)))
52403+ error = sysdev_register(&device_oprofile);
52404+ return error;
52405+}
52406+
52407+
52408+static void exit_driverfs(void)
52409+{
52410+ sysdev_unregister(&device_oprofile);
52411+ sysdev_class_unregister(&oprofile_sysclass);
52412+}
52413+
52414+#else
52415+#define init_driverfs() do { } while (0)
52416+#define exit_driverfs() do { } while (0)
52417+#endif /* CONFIG_PM */
52418+
52419+static unsigned long long oprofile_samples;
52420+static unsigned long long p_oprofile_samples;
52421+
52422+static unsigned int pdomains;
52423+static struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
52424+
52425+/* Check whether the given entry is an escape code */
52426+static int xenoprof_is_escape(xenoprof_buf_t * buf, int tail)
52427+{
52428+ return (buf->event_log[tail].eip == XENOPROF_ESCAPE_CODE);
52429+}
52430+
52431+/* Get the event at the given entry */
52432+static uint8_t xenoprof_get_event(xenoprof_buf_t * buf, int tail)
52433+{
52434+ return (buf->event_log[tail].event);
52435+}
52436+
52437+static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
52438+{
52439+ int head, tail, size;
52440+ int tracing = 0;
52441+
52442+ head = buf->event_head;
52443+ tail = buf->event_tail;
52444+ size = buf->event_size;
52445+
52446+ while (tail != head) {
52447+ if (xenoprof_is_escape(buf, tail) &&
52448+ xenoprof_get_event(buf, tail) == XENOPROF_TRACE_BEGIN) {
52449+ tracing=1;
52450+ oprofile_add_pc(ESCAPE_CODE, buf->event_log[tail].mode,
52451+ CPU_TRACE_BEGIN);
52452+ if (!is_passive)
52453+ oprofile_samples++;
52454+ else
52455+ p_oprofile_samples++;
52456+
52457+ } else {
52458+ oprofile_add_pc(buf->event_log[tail].eip,
52459+ buf->event_log[tail].mode,
52460+ buf->event_log[tail].event);
52461+ if (!tracing) {
52462+ if (!is_passive)
52463+ oprofile_samples++;
52464+ else
52465+ p_oprofile_samples++;
52466+ }
52467+
52468+ }
52469+ tail++;
52470+ if(tail==size)
52471+ tail=0;
52472+ }
52473+ buf->event_tail = tail;
52474+}
52475+
52476+static void xenoprof_handle_passive(void)
52477+{
52478+ int i, j;
52479+ int flag_domain, flag_switch = 0;
52480+
52481+ for (i = 0; i < pdomains; i++) {
52482+ flag_domain = 0;
52483+ for (j = 0; j < passive_domains[i].nbuf; j++) {
52484+ xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
52485+ if (buf->event_head == buf->event_tail)
52486+ continue;
52487+ if (!flag_domain) {
52488+ if (!oprofile_add_domain_switch(
52489+ passive_domains[i].domain_id))
52490+ goto done;
52491+ flag_domain = 1;
52492+ }
52493+ xenoprof_add_pc(buf, 1);
52494+ flag_switch = 1;
52495+ }
52496+ }
52497+done:
52498+ if (flag_switch)
52499+ oprofile_add_domain_switch(COORDINATOR_DOMAIN);
52500+}
52501+
52502+static irqreturn_t
52503+xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
52504+{
52505+ struct xenoprof_buf * buf;
52506+ static unsigned long flag;
52507+
52508+ buf = xenoprof_buf[smp_processor_id()];
52509+
52510+ xenoprof_add_pc(buf, 0);
52511+
52512+ if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) {
52513+ xenoprof_handle_passive();
52514+ smp_mb__before_clear_bit();
52515+ clear_bit(0, &flag);
52516+ }
52517+
52518+ return IRQ_HANDLED;
52519+}
52520+
52521+
52522+static void unbind_virq(void)
52523+{
52524+ unsigned int i;
52525+
52526+ for_each_online_cpu(i) {
52527+ if (ovf_irq[i] >= 0) {
52528+ unbind_from_irqhandler(ovf_irq[i], NULL);
52529+ ovf_irq[i] = -1;
52530+ }
52531+ }
52532+}
52533+
52534+
52535+static int bind_virq(void)
52536+{
52537+ unsigned int i;
52538+ int result;
52539+
52540+ for_each_online_cpu(i) {
52541+ result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
52542+ i,
52543+ xenoprof_ovf_interrupt,
52544+ SA_INTERRUPT,
52545+ "xenoprof",
52546+ NULL);
52547+
52548+ if (result < 0) {
52549+ unbind_virq();
52550+ return result;
52551+ }
52552+
52553+ ovf_irq[i] = result;
52554+ }
52555+
52556+ return 0;
52557+}
52558+
52559+
52560+static void unmap_passive_list(void)
52561+{
52562+ int i;
52563+ for (i = 0; i < pdomains; i++)
52564+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
52565+ pdomains = 0;
52566+}
52567+
52568+
52569+static int map_xenoprof_buffer(int max_samples)
52570+{
52571+ struct xenoprof_get_buffer get_buffer;
52572+ struct xenoprof_buf *buf;
52573+ int ret, i;
52574+
52575+ if ( shared_buffer.buffer )
52576+ return 0;
52577+
52578+ get_buffer.max_samples = max_samples;
52579+ ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer);
52580+ if (ret)
52581+ return ret;
52582+ nbuf = get_buffer.nbuf;
52583+
52584+ for (i=0; i< nbuf; i++) {
52585+ buf = (struct xenoprof_buf*)
52586+ &shared_buffer.buffer[i * get_buffer.bufsize];
52587+ BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
52588+ xenoprof_buf[buf->vcpu_id] = buf;
52589+ }
52590+
52591+ return 0;
52592+}
52593+
52594+
52595+static int xenoprof_setup(void)
52596+{
52597+ int ret;
52598+
52599+ if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
52600+ return ret;
52601+
52602+ if ( (ret = bind_virq()) )
52603+ return ret;
52604+
52605+ if (xenoprof_is_primary) {
52606+ /* Define dom0 as an active domain if not done yet */
52607+ if (!active_defined) {
52608+ domid_t domid;
52609+ ret = HYPERVISOR_xenoprof_op(
52610+ XENOPROF_reset_active_list, NULL);
52611+ if (ret)
52612+ goto err;
52613+ domid = 0;
52614+ ret = HYPERVISOR_xenoprof_op(
52615+ XENOPROF_set_active, &domid);
52616+ if (ret)
52617+ goto err;
52618+ active_defined = 1;
52619+ }
52620+
52621+ if (backtrace_depth > 0) {
52622+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_backtrace,
52623+ &backtrace_depth);
52624+ if (ret)
52625+ backtrace_depth = 0;
52626+ }
52627+
52628+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
52629+ if (ret)
52630+ goto err;
52631+
52632+ xenoprof_arch_counter();
52633+ ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
52634+ if (ret)
52635+ goto err;
52636+ }
52637+
52638+ ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
52639+ if (ret)
52640+ goto err;
52641+
52642+ xenoprof_enabled = 1;
52643+ return 0;
52644+ err:
52645+ unbind_virq();
52646+ return ret;
52647+}
52648+
52649+
52650+static void xenoprof_shutdown(void)
52651+{
52652+ xenoprof_enabled = 0;
52653+
52654+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL));
52655+
52656+ if (xenoprof_is_primary) {
52657+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_release_counters,
52658+ NULL));
52659+ active_defined = 0;
52660+ }
52661+
52662+ unbind_virq();
52663+
52664+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
52665+ if (xenoprof_is_primary)
52666+ unmap_passive_list();
52667+}
52668+
52669+
52670+static int xenoprof_start(void)
52671+{
52672+ int ret = 0;
52673+
52674+ if (xenoprof_is_primary)
52675+ ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
52676+ if (!ret)
52677+ xenoprof_arch_start();
52678+ return ret;
52679+}
52680+
52681+
52682+static void xenoprof_stop(void)
52683+{
52684+ if (xenoprof_is_primary)
52685+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL));
52686+ xenoprof_arch_stop();
52687+}
52688+
52689+
52690+static int xenoprof_set_active(int * active_domains,
52691+ unsigned int adomains)
52692+{
52693+ int ret = 0;
52694+ int i;
52695+ int set_dom0 = 0;
52696+ domid_t domid;
52697+
52698+ if (!xenoprof_is_primary)
52699+ return 0;
52700+
52701+ if (adomains > MAX_OPROF_DOMAINS)
52702+ return -E2BIG;
52703+
52704+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
52705+ if (ret)
52706+ return ret;
52707+
52708+ for (i=0; i<adomains; i++) {
52709+ domid = active_domains[i];
52710+ if (domid != active_domains[i]) {
52711+ ret = -EINVAL;
52712+ goto out;
52713+ }
52714+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
52715+ if (ret)
52716+ goto out;
52717+ if (active_domains[i] == 0)
52718+ set_dom0 = 1;
52719+ }
52720+ /* dom0 must always be active but may not be in the list */
52721+ if (!set_dom0) {
52722+ domid = 0;
52723+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
52724+ }
52725+
52726+out:
52727+ if (ret)
52728+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list,
52729+ NULL));
52730+ active_defined = !ret;
52731+ return ret;
52732+}
52733+
52734+static int xenoprof_set_passive(int * p_domains,
52735+ unsigned int pdoms)
52736+{
52737+ int ret;
52738+ unsigned int i, j;
52739+ struct xenoprof_buf *buf;
52740+
52741+ if (!xenoprof_is_primary)
52742+ return 0;
52743+
52744+ if (pdoms > MAX_OPROF_DOMAINS)
52745+ return -E2BIG;
52746+
52747+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
52748+ if (ret)
52749+ return ret;
52750+ unmap_passive_list();
52751+
52752+ for (i = 0; i < pdoms; i++) {
52753+ passive_domains[i].domain_id = p_domains[i];
52754+ passive_domains[i].max_samples = 2048;
52755+ ret = xenoprof_arch_set_passive(&passive_domains[i],
52756+ &p_shared_buffer[i]);
52757+ if (ret)
52758+ goto out;
52759+ for (j = 0; j < passive_domains[i].nbuf; j++) {
52760+ buf = (struct xenoprof_buf *)
52761+ &p_shared_buffer[i].buffer[
52762+ j * passive_domains[i].bufsize];
52763+ BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
52764+ p_xenoprof_buf[i][buf->vcpu_id] = buf;
52765+ }
52766+ }
52767+
52768+ pdomains = pdoms;
52769+ return 0;
52770+
52771+out:
52772+ for (j = 0; j < i; j++)
52773+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
52774+
52775+ return ret;
52776+}
52777+
52778+
52779+/* The dummy backtrace function to keep oprofile happy
52780+ * The real backtrace is done in xen
52781+ */
52782+static void xenoprof_dummy_backtrace(struct pt_regs * const regs,
52783+ unsigned int depth)
52784+{
52785+ /* this should never be called */
52786+ BUG();
52787+ return;
52788+}
52789+
52790+
52791+static struct oprofile_operations xenoprof_ops = {
52792+#ifdef HAVE_XENOPROF_CREATE_FILES
52793+ .create_files = xenoprof_create_files,
52794+#endif
52795+ .set_active = xenoprof_set_active,
52796+ .set_passive = xenoprof_set_passive,
52797+ .setup = xenoprof_setup,
52798+ .shutdown = xenoprof_shutdown,
52799+ .start = xenoprof_start,
52800+ .stop = xenoprof_stop,
52801+ .backtrace = xenoprof_dummy_backtrace
52802+};
52803+
52804+
52805+/* in order to get driverfs right */
52806+static int using_xenoprof;
52807+
52808+int __init xenoprofile_init(struct oprofile_operations * ops)
52809+{
52810+ struct xenoprof_init init;
52811+ unsigned int i;
52812+ int ret;
52813+
52814+ ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
52815+ if (!ret) {
52816+ xenoprof_arch_init_counter(&init);
52817+ xenoprof_is_primary = init.is_primary;
52818+
52819+ /* cpu_type is detected by Xen */
52820+ cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
52821+ strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
52822+ xenoprof_ops.cpu_type = cpu_type;
52823+
52824+ init_driverfs();
52825+ using_xenoprof = 1;
52826+ *ops = xenoprof_ops;
52827+
52828+ for (i=0; i<NR_CPUS; i++)
52829+ ovf_irq[i] = -1;
52830+
52831+ active_defined = 0;
52832+ }
52833+
52834+ printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n",
52835+ __func__, ret, init.num_events, xenoprof_is_primary);
52836+ return ret;
52837+}
52838+
52839+
52840+void xenoprofile_exit(void)
52841+{
52842+ if (using_xenoprof)
52843+ exit_driverfs();
52844+
52845+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
52846+ if (xenoprof_is_primary) {
52847+ unmap_passive_list();
52848+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL));
52849+ }
52850+}