]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/60009_xen3-auto-xen-drivers.patch1
Stop dhcpcd before starting if it was running
[people/pmueller/ipfire-2.x.git] / src / patches / 60009_xen3-auto-xen-drivers.patch1
1 Subject: xen3 xen-drivers
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd)
3 Patch-mainline: obsolete
4 Acked-by: jbeulich@novell.com
5
6 Index: head-2008-11-25/drivers/xen/balloon/Makefile
7 ===================================================================
8 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
9 +++ head-2008-11-25/drivers/xen/balloon/Makefile 2007-06-12 13:13:44.000000000 +0200
10 @@ -0,0 +1,2 @@
11 +
12 +obj-y := balloon.o sysfs.o
13 Index: head-2008-11-25/drivers/xen/balloon/balloon.c
14 ===================================================================
15 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
16 +++ head-2008-11-25/drivers/xen/balloon/balloon.c 2008-07-21 11:00:33.000000000 +0200
17 @@ -0,0 +1,724 @@
18 +/******************************************************************************
19 + * balloon.c
20 + *
21 + * Xen balloon driver - enables returning/claiming memory to/from Xen.
22 + *
23 + * Copyright (c) 2003, B Dragovic
24 + * Copyright (c) 2003-2004, M Williamson, K Fraser
25 + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
26 + *
27 + * This program is free software; you can redistribute it and/or
28 + * modify it under the terms of the GNU General Public License version 2
29 + * as published by the Free Software Foundation; or, when distributed
30 + * separately from the Linux kernel or incorporated into other
31 + * software packages, subject to the following license:
32 + *
33 + * Permission is hereby granted, free of charge, to any person obtaining a copy
34 + * of this source file (the "Software"), to deal in the Software without
35 + * restriction, including without limitation the rights to use, copy, modify,
36 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
37 + * and to permit persons to whom the Software is furnished to do so, subject to
38 + * the following conditions:
39 + *
40 + * The above copyright notice and this permission notice shall be included in
41 + * all copies or substantial portions of the Software.
42 + *
43 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
44 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
45 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
46 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
47 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
48 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
49 + * IN THE SOFTWARE.
50 + */
51 +
52 +#include <linux/kernel.h>
53 +#include <linux/module.h>
54 +#include <linux/sched.h>
55 +#include <linux/errno.h>
56 +#include <linux/mm.h>
57 +#include <linux/mman.h>
58 +#include <linux/smp_lock.h>
59 +#include <linux/pagemap.h>
60 +#include <linux/bootmem.h>
61 +#include <linux/highmem.h>
62 +#include <linux/vmalloc.h>
63 +#include <linux/mutex.h>
64 +#include <xen/xen_proc.h>
65 +#include <asm/hypervisor.h>
66 +#include <xen/balloon.h>
67 +#include <xen/interface/memory.h>
68 +#include <asm/maddr.h>
69 +#include <asm/page.h>
70 +#include <asm/pgalloc.h>
71 +#include <asm/pgtable.h>
72 +#include <asm/uaccess.h>
73 +#include <asm/tlb.h>
74 +#include <linux/highmem.h>
75 +#include <linux/list.h>
76 +#include <xen/xenbus.h>
77 +#include "common.h"
78 +
79 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
80 +#include <xen/platform-compat.h>
81 +#endif
82 +
83 +#ifdef CONFIG_PROC_FS
84 +static struct proc_dir_entry *balloon_pde;
85 +#endif
86 +
87 +static DEFINE_MUTEX(balloon_mutex);
88 +
89 +/*
90 + * Protects atomic reservation decrease/increase against concurrent increases.
91 + * Also protects non-atomic updates of current_pages and driver_pages, and
92 + * balloon lists.
93 + */
94 +DEFINE_SPINLOCK(balloon_lock);
95 +
96 +struct balloon_stats balloon_stats;
97 +
98 +/* We increase/decrease in batches which fit in a page */
99 +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
100 +
101 +/* VM /proc information for memory */
102 +extern unsigned long totalram_pages;
103 +
104 +#ifndef MODULE
105 +extern unsigned long totalhigh_pages;
106 +#define inc_totalhigh_pages() (totalhigh_pages++)
107 +#define dec_totalhigh_pages() (totalhigh_pages--)
108 +#else
109 +#define inc_totalhigh_pages() ((void)0)
110 +#define dec_totalhigh_pages() ((void)0)
111 +#endif
112 +
113 +/* List of ballooned pages, threaded through the mem_map array. */
114 +static LIST_HEAD(ballooned_pages);
115 +
116 +/* Main work function, always executed in process context. */
117 +static void balloon_process(void *unused);
118 +static DECLARE_WORK(balloon_worker, balloon_process, NULL);
119 +static struct timer_list balloon_timer;
120 +
121 +/* When ballooning out (allocating memory to return to Xen) we don't really
122 + want the kernel to try too hard since that can trigger the oom killer. */
123 +#define GFP_BALLOON \
124 + (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD)
125 +
126 +#define PAGE_TO_LIST(p) (&(p)->lru)
127 +#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
128 +#define UNLIST_PAGE(p) \
129 + do { \
130 + list_del(PAGE_TO_LIST(p)); \
131 + PAGE_TO_LIST(p)->next = NULL; \
132 + PAGE_TO_LIST(p)->prev = NULL; \
133 + } while(0)
134 +
135 +#define IPRINTK(fmt, args...) \
136 + printk(KERN_INFO "xen_mem: " fmt, ##args)
137 +#define WPRINTK(fmt, args...) \
138 + printk(KERN_WARNING "xen_mem: " fmt, ##args)
139 +
140 +/* balloon_append: add the given page to the balloon. */
141 +static void balloon_append(struct page *page)
142 +{
143 + /* Lowmem is re-populated first, so highmem pages go at list tail. */
144 + if (PageHighMem(page)) {
145 + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
146 + bs.balloon_high++;
147 + dec_totalhigh_pages();
148 + } else {
149 + list_add(PAGE_TO_LIST(page), &ballooned_pages);
150 + bs.balloon_low++;
151 + }
152 +}
153 +
154 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
155 +static struct page *balloon_retrieve(void)
156 +{
157 + struct page *page;
158 +
159 + if (list_empty(&ballooned_pages))
160 + return NULL;
161 +
162 + page = LIST_TO_PAGE(ballooned_pages.next);
163 + UNLIST_PAGE(page);
164 +
165 + if (PageHighMem(page)) {
166 + bs.balloon_high--;
167 + inc_totalhigh_pages();
168 + }
169 + else
170 + bs.balloon_low--;
171 +
172 + return page;
173 +}
174 +
175 +static struct page *balloon_first_page(void)
176 +{
177 + if (list_empty(&ballooned_pages))
178 + return NULL;
179 + return LIST_TO_PAGE(ballooned_pages.next);
180 +}
181 +
182 +static struct page *balloon_next_page(struct page *page)
183 +{
184 + struct list_head *next = PAGE_TO_LIST(page)->next;
185 + if (next == &ballooned_pages)
186 + return NULL;
187 + return LIST_TO_PAGE(next);
188 +}
189 +
190 +static inline void balloon_free_page(struct page *page)
191 +{
192 +#ifndef MODULE
193 + if (put_page_testzero(page))
194 + free_cold_page(page);
195 +#else
196 + /* free_cold_page() is not being exported. */
197 + __free_page(page);
198 +#endif
199 +}
200 +
201 +static void balloon_alarm(unsigned long unused)
202 +{
203 + schedule_work(&balloon_worker);
204 +}
205 +
206 +static unsigned long current_target(void)
207 +{
208 + unsigned long target = min(bs.target_pages, bs.hard_limit);
209 + if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
210 + target = bs.current_pages + bs.balloon_low + bs.balloon_high;
211 + return target;
212 +}
213 +
214 +static unsigned long minimum_target(void)
215 +{
216 +#ifndef CONFIG_XEN
217 +#define max_pfn num_physpages
218 +#endif
219 + unsigned long min_pages, curr_pages = current_target();
220 +
221 +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
222 + /* Simple continuous piecewiese linear function:
223 + * max MiB -> min MiB gradient
224 + * 0 0
225 + * 16 16
226 + * 32 24
227 + * 128 72 (1/2)
228 + * 512 168 (1/4)
229 + * 2048 360 (1/8)
230 + * 8192 552 (1/32)
231 + * 32768 1320
232 + * 131072 4392
233 + */
234 + if (max_pfn < MB2PAGES(128))
235 + min_pages = MB2PAGES(8) + (max_pfn >> 1);
236 + else if (max_pfn < MB2PAGES(512))
237 + min_pages = MB2PAGES(40) + (max_pfn >> 2);
238 + else if (max_pfn < MB2PAGES(2048))
239 + min_pages = MB2PAGES(104) + (max_pfn >> 3);
240 + else
241 + min_pages = MB2PAGES(296) + (max_pfn >> 5);
242 +#undef MB2PAGES
243 +
244 + /* Don't enforce growth */
245 + return min(min_pages, curr_pages);
246 +#ifndef CONFIG_XEN
247 +#undef max_pfn
248 +#endif
249 +}
250 +
251 +static int increase_reservation(unsigned long nr_pages)
252 +{
253 + unsigned long pfn, i, flags;
254 + struct page *page;
255 + long rc;
256 + struct xen_memory_reservation reservation = {
257 + .address_bits = 0,
258 + .extent_order = 0,
259 + .domid = DOMID_SELF
260 + };
261 +
262 + if (nr_pages > ARRAY_SIZE(frame_list))
263 + nr_pages = ARRAY_SIZE(frame_list);
264 +
265 + balloon_lock(flags);
266 +
267 + page = balloon_first_page();
268 + for (i = 0; i < nr_pages; i++) {
269 + BUG_ON(page == NULL);
270 + frame_list[i] = page_to_pfn(page);;
271 + page = balloon_next_page(page);
272 + }
273 +
274 + set_xen_guest_handle(reservation.extent_start, frame_list);
275 + reservation.nr_extents = nr_pages;
276 + rc = HYPERVISOR_memory_op(
277 + XENMEM_populate_physmap, &reservation);
278 + if (rc < nr_pages) {
279 + if (rc > 0) {
280 + int ret;
281 +
282 + /* We hit the Xen hard limit: reprobe. */
283 + reservation.nr_extents = rc;
284 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
285 + &reservation);
286 + BUG_ON(ret != rc);
287 + }
288 + if (rc >= 0)
289 + bs.hard_limit = (bs.current_pages + rc -
290 + bs.driver_pages);
291 + goto out;
292 + }
293 +
294 + for (i = 0; i < nr_pages; i++) {
295 + page = balloon_retrieve();
296 + BUG_ON(page == NULL);
297 +
298 + pfn = page_to_pfn(page);
299 + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
300 + phys_to_machine_mapping_valid(pfn));
301 +
302 + set_phys_to_machine(pfn, frame_list[i]);
303 +
304 +#ifdef CONFIG_XEN
305 + /* Link back into the page tables if not highmem. */
306 + if (pfn < max_low_pfn) {
307 + int ret;
308 + ret = HYPERVISOR_update_va_mapping(
309 + (unsigned long)__va(pfn << PAGE_SHIFT),
310 + pfn_pte_ma(frame_list[i], PAGE_KERNEL),
311 + 0);
312 + BUG_ON(ret);
313 + }
314 +#endif
315 +
316 + /* Relinquish the page back to the allocator. */
317 + ClearPageReserved(page);
318 + init_page_count(page);
319 + balloon_free_page(page);
320 + }
321 +
322 + bs.current_pages += nr_pages;
323 + totalram_pages = bs.current_pages;
324 +
325 + out:
326 + balloon_unlock(flags);
327 +
328 + return 0;
329 +}
330 +
331 +static int decrease_reservation(unsigned long nr_pages)
332 +{
333 + unsigned long pfn, i, flags;
334 + struct page *page;
335 + void *v;
336 + int need_sleep = 0;
337 + int ret;
338 + struct xen_memory_reservation reservation = {
339 + .address_bits = 0,
340 + .extent_order = 0,
341 + .domid = DOMID_SELF
342 + };
343 +
344 + if (nr_pages > ARRAY_SIZE(frame_list))
345 + nr_pages = ARRAY_SIZE(frame_list);
346 +
347 + for (i = 0; i < nr_pages; i++) {
348 + if ((page = alloc_page(GFP_BALLOON)) == NULL) {
349 + nr_pages = i;
350 + need_sleep = 1;
351 + break;
352 + }
353 +
354 + pfn = page_to_pfn(page);
355 + frame_list[i] = pfn_to_mfn(pfn);
356 +
357 + if (!PageHighMem(page)) {
358 + v = phys_to_virt(pfn << PAGE_SHIFT);
359 + scrub_pages(v, 1);
360 +#ifdef CONFIG_XEN
361 + ret = HYPERVISOR_update_va_mapping(
362 + (unsigned long)v, __pte_ma(0), 0);
363 + BUG_ON(ret);
364 +#endif
365 + }
366 +#ifdef CONFIG_XEN_SCRUB_PAGES
367 + else {
368 + v = kmap(page);
369 + scrub_pages(v, 1);
370 + kunmap(page);
371 + }
372 +#endif
373 + }
374 +
375 +#ifdef CONFIG_XEN
376 + /* Ensure that ballooned highmem pages don't have kmaps. */
377 + kmap_flush_unused();
378 + flush_tlb_all();
379 +#endif
380 +
381 + balloon_lock(flags);
382 +
383 + /* No more mappings: invalidate P2M and add to balloon. */
384 + for (i = 0; i < nr_pages; i++) {
385 + pfn = mfn_to_pfn(frame_list[i]);
386 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
387 + balloon_append(pfn_to_page(pfn));
388 + }
389 +
390 + set_xen_guest_handle(reservation.extent_start, frame_list);
391 + reservation.nr_extents = nr_pages;
392 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
393 + BUG_ON(ret != nr_pages);
394 +
395 + bs.current_pages -= nr_pages;
396 + totalram_pages = bs.current_pages;
397 +
398 + balloon_unlock(flags);
399 +
400 + return need_sleep;
401 +}
402 +
403 +/*
404 + * We avoid multiple worker processes conflicting via the balloon mutex.
405 + * We may of course race updates of the target counts (which are protected
406 + * by the balloon lock), or with changes to the Xen hard limit, but we will
407 + * recover from these in time.
408 + */
409 +static void balloon_process(void *unused)
410 +{
411 + int need_sleep = 0;
412 + long credit;
413 +
414 + mutex_lock(&balloon_mutex);
415 +
416 + do {
417 + credit = current_target() - bs.current_pages;
418 + if (credit > 0)
419 + need_sleep = (increase_reservation(credit) != 0);
420 + if (credit < 0)
421 + need_sleep = (decrease_reservation(-credit) != 0);
422 +
423 +#ifndef CONFIG_PREEMPT
424 + if (need_resched())
425 + schedule();
426 +#endif
427 + } while ((credit != 0) && !need_sleep);
428 +
429 + /* Schedule more work if there is some still to be done. */
430 + if (current_target() != bs.current_pages)
431 + mod_timer(&balloon_timer, jiffies + HZ);
432 +
433 + mutex_unlock(&balloon_mutex);
434 +}
435 +
436 +/* Resets the Xen limit, sets new target, and kicks off processing. */
437 +void balloon_set_new_target(unsigned long target)
438 +{
439 + /* No need for lock. Not read-modify-write updates. */
440 + bs.hard_limit = ~0UL;
441 + bs.target_pages = max(target, minimum_target());
442 + schedule_work(&balloon_worker);
443 +}
444 +
445 +static struct xenbus_watch target_watch =
446 +{
447 + .node = "memory/target"
448 +};
449 +
450 +/* React to a change in the target key */
451 +static void watch_target(struct xenbus_watch *watch,
452 + const char **vec, unsigned int len)
453 +{
454 + unsigned long long new_target;
455 + int err;
456 +
457 + err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
458 + if (err != 1) {
459 + /* This is ok (for domain0 at least) - so just return */
460 + return;
461 + }
462 +
463 + /* The given memory/target value is in KiB, so it needs converting to
464 + * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
465 + */
466 + balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
467 +}
468 +
469 +static int balloon_init_watcher(struct notifier_block *notifier,
470 + unsigned long event,
471 + void *data)
472 +{
473 + int err;
474 +
475 + err = register_xenbus_watch(&target_watch);
476 + if (err)
477 + printk(KERN_ERR "Failed to set balloon watcher\n");
478 +
479 + return NOTIFY_DONE;
480 +}
481 +
482 +#ifdef CONFIG_PROC_FS
483 +static int balloon_write(struct file *file, const char __user *buffer,
484 + unsigned long count, void *data)
485 +{
486 + char memstring[64], *endchar;
487 + unsigned long long target_bytes;
488 +
489 + if (!capable(CAP_SYS_ADMIN))
490 + return -EPERM;
491 +
492 + if (count <= 1)
493 + return -EBADMSG; /* runt */
494 + if (count > sizeof(memstring))
495 + return -EFBIG; /* too long */
496 +
497 + if (copy_from_user(memstring, buffer, count))
498 + return -EFAULT;
499 + memstring[sizeof(memstring)-1] = '\0';
500 +
501 + target_bytes = memparse(memstring, &endchar);
502 + balloon_set_new_target(target_bytes >> PAGE_SHIFT);
503 +
504 + return count;
505 +}
506 +
507 +static int balloon_read(char *page, char **start, off_t off,
508 + int count, int *eof, void *data)
509 +{
510 + int len;
511 +
512 + len = sprintf(
513 + page,
514 + "Current allocation: %8lu kB\n"
515 + "Requested target: %8lu kB\n"
516 + "Low-mem balloon: %8lu kB\n"
517 + "High-mem balloon: %8lu kB\n"
518 + "Driver pages: %8lu kB\n"
519 + "Xen hard limit: ",
520 + PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
521 + PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
522 + PAGES2KB(bs.driver_pages));
523 +
524 + if (bs.hard_limit != ~0UL)
525 + len += sprintf(page + len, "%8lu kB\n",
526 + PAGES2KB(bs.hard_limit));
527 + else
528 + len += sprintf(page + len, " ??? kB\n");
529 +
530 + *eof = 1;
531 + return len;
532 +}
533 +#endif
534 +
535 +static struct notifier_block xenstore_notifier;
536 +
537 +static int __init balloon_init(void)
538 +{
539 +#if defined(CONFIG_X86) && defined(CONFIG_XEN)
540 + unsigned long pfn;
541 + struct page *page;
542 +#endif
543 +
544 + if (!is_running_on_xen())
545 + return -ENODEV;
546 +
547 + IPRINTK("Initialising balloon driver.\n");
548 +
549 +#ifdef CONFIG_XEN
550 + bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
551 + totalram_pages = bs.current_pages;
552 +#else
553 + bs.current_pages = totalram_pages;
554 +#endif
555 + bs.target_pages = bs.current_pages;
556 + bs.balloon_low = 0;
557 + bs.balloon_high = 0;
558 + bs.driver_pages = 0UL;
559 + bs.hard_limit = ~0UL;
560 +
561 + init_timer(&balloon_timer);
562 + balloon_timer.data = 0;
563 + balloon_timer.function = balloon_alarm;
564 +
565 +#ifdef CONFIG_PROC_FS
566 + if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
567 + WPRINTK("Unable to create /proc/xen/balloon.\n");
568 + return -1;
569 + }
570 +
571 + balloon_pde->read_proc = balloon_read;
572 + balloon_pde->write_proc = balloon_write;
573 +#endif
574 + balloon_sysfs_init();
575 +
576 +#if defined(CONFIG_X86) && defined(CONFIG_XEN)
577 + /* Initialise the balloon with excess memory space. */
578 + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
579 + page = pfn_to_page(pfn);
580 + if (!PageReserved(page))
581 + balloon_append(page);
582 + }
583 +#endif
584 +
585 + target_watch.callback = watch_target;
586 + xenstore_notifier.notifier_call = balloon_init_watcher;
587 +
588 + register_xenstore_notifier(&xenstore_notifier);
589 +
590 + return 0;
591 +}
592 +
593 +subsys_initcall(balloon_init);
594 +
595 +static void __exit balloon_exit(void)
596 +{
597 + /* XXX - release balloon here */
598 + return;
599 +}
600 +
601 +module_exit(balloon_exit);
602 +
603 +void balloon_update_driver_allowance(long delta)
604 +{
605 + unsigned long flags;
606 +
607 + balloon_lock(flags);
608 + bs.driver_pages += delta;
609 + balloon_unlock(flags);
610 +}
611 +
612 +#ifdef CONFIG_XEN
613 +static int dealloc_pte_fn(
614 + pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
615 +{
616 + unsigned long mfn = pte_mfn(*pte);
617 + int ret;
618 + struct xen_memory_reservation reservation = {
619 + .nr_extents = 1,
620 + .extent_order = 0,
621 + .domid = DOMID_SELF
622 + };
623 + set_xen_guest_handle(reservation.extent_start, &mfn);
624 + set_pte_at(&init_mm, addr, pte, __pte_ma(0));
625 + set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
626 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
627 + BUG_ON(ret != 1);
628 + return 0;
629 +}
630 +#endif
631 +
632 +struct page **alloc_empty_pages_and_pagevec(int nr_pages)
633 +{
634 + unsigned long flags;
635 + void *v;
636 + struct page *page, **pagevec;
637 + int i, ret;
638 +
639 + pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
640 + if (pagevec == NULL)
641 + return NULL;
642 +
643 + for (i = 0; i < nr_pages; i++) {
644 + page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
645 + if (page == NULL)
646 + goto err;
647 +
648 + v = page_address(page);
649 + scrub_pages(v, 1);
650 +
651 + balloon_lock(flags);
652 +
653 + if (xen_feature(XENFEAT_auto_translated_physmap)) {
654 + unsigned long gmfn = page_to_pfn(page);
655 + struct xen_memory_reservation reservation = {
656 + .nr_extents = 1,
657 + .extent_order = 0,
658 + .domid = DOMID_SELF
659 + };
660 + set_xen_guest_handle(reservation.extent_start, &gmfn);
661 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
662 + &reservation);
663 + if (ret == 1)
664 + ret = 0; /* success */
665 + } else {
666 +#ifdef CONFIG_XEN
667 + ret = apply_to_page_range(&init_mm, (unsigned long)v,
668 + PAGE_SIZE, dealloc_pte_fn,
669 + NULL);
670 +#else
671 + /* Cannot handle non-auto translate mode. */
672 + ret = 1;
673 +#endif
674 + }
675 +
676 + if (ret != 0) {
677 + balloon_unlock(flags);
678 + balloon_free_page(page);
679 + goto err;
680 + }
681 +
682 + totalram_pages = --bs.current_pages;
683 +
684 + balloon_unlock(flags);
685 + }
686 +
687 + out:
688 + schedule_work(&balloon_worker);
689 +#ifdef CONFIG_XEN
690 + flush_tlb_all();
691 +#endif
692 + return pagevec;
693 +
694 + err:
695 + balloon_lock(flags);
696 + while (--i >= 0)
697 + balloon_append(pagevec[i]);
698 + balloon_unlock(flags);
699 + kfree(pagevec);
700 + pagevec = NULL;
701 + goto out;
702 +}
703 +
704 +void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
705 +{
706 + unsigned long flags;
707 + int i;
708 +
709 + if (pagevec == NULL)
710 + return;
711 +
712 + balloon_lock(flags);
713 + for (i = 0; i < nr_pages; i++) {
714 + BUG_ON(page_count(pagevec[i]) != 1);
715 + balloon_append(pagevec[i]);
716 + }
717 + balloon_unlock(flags);
718 +
719 + kfree(pagevec);
720 +
721 + schedule_work(&balloon_worker);
722 +}
723 +
724 +void balloon_release_driver_page(struct page *page)
725 +{
726 + unsigned long flags;
727 +
728 + balloon_lock(flags);
729 + balloon_append(page);
730 + bs.driver_pages--;
731 + balloon_unlock(flags);
732 +
733 + schedule_work(&balloon_worker);
734 +}
735 +
736 +EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
737 +EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
738 +EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
739 +EXPORT_SYMBOL_GPL(balloon_release_driver_page);
740 +
741 +MODULE_LICENSE("Dual BSD/GPL");
742 Index: head-2008-11-25/drivers/xen/balloon/common.h
743 ===================================================================
744 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
745 +++ head-2008-11-25/drivers/xen/balloon/common.h 2007-06-12 13:13:44.000000000 +0200
746 @@ -0,0 +1,58 @@
747 +/******************************************************************************
748 + * balloon/common.h
749 + *
750 + * This program is free software; you can redistribute it and/or
751 + * modify it under the terms of the GNU General Public License version 2
752 + * as published by the Free Software Foundation; or, when distributed
753 + * separately from the Linux kernel or incorporated into other
754 + * software packages, subject to the following license:
755 + *
756 + * Permission is hereby granted, free of charge, to any person obtaining a copy
757 + * of this source file (the "Software"), to deal in the Software without
758 + * restriction, including without limitation the rights to use, copy, modify,
759 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
760 + * and to permit persons to whom the Software is furnished to do so, subject to
761 + * the following conditions:
762 + *
763 + * The above copyright notice and this permission notice shall be included in
764 + * all copies or substantial portions of the Software.
765 + *
766 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
767 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
768 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
769 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
770 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
771 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
772 + * IN THE SOFTWARE.
773 + */
774 +
775 +#ifndef __XEN_BALLOON_COMMON_H__
776 +#define __XEN_BALLOON_COMMON_H__
777 +
778 +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
779 +
780 +struct balloon_stats {
781 + /* We aim for 'current allocation' == 'target allocation'. */
782 + unsigned long current_pages;
783 + unsigned long target_pages;
784 + /* We may hit the hard limit in Xen. If we do then we remember it. */
785 + unsigned long hard_limit;
786 + /*
787 + * Drivers may alter the memory reservation independently, but they
788 + * must inform the balloon driver so we avoid hitting the hard limit.
789 + */
790 + unsigned long driver_pages;
791 + /* Number of pages in high- and low-memory balloons. */
792 + unsigned long balloon_low;
793 + unsigned long balloon_high;
794 +};
795 +
796 +extern struct balloon_stats balloon_stats;
797 +#define bs balloon_stats
798 +
799 +int balloon_sysfs_init(void);
800 +void balloon_sysfs_exit(void);
801 +
802 +void balloon_set_new_target(unsigned long target);
803 +
804 +#endif /* __XEN_BALLOON_COMMON_H__ */
805 Index: head-2008-11-25/drivers/xen/balloon/sysfs.c
806 ===================================================================
807 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
808 +++ head-2008-11-25/drivers/xen/balloon/sysfs.c 2008-04-02 12:34:02.000000000 +0200
809 @@ -0,0 +1,170 @@
810 +/******************************************************************************
811 + * balloon/sysfs.c
812 + *
813 + * Xen balloon driver - sysfs interfaces.
814 + *
815 + * This program is free software; you can redistribute it and/or
816 + * modify it under the terms of the GNU General Public License version 2
817 + * as published by the Free Software Foundation; or, when distributed
818 + * separately from the Linux kernel or incorporated into other
819 + * software packages, subject to the following license:
820 + *
821 + * Permission is hereby granted, free of charge, to any person obtaining a copy
822 + * of this source file (the "Software"), to deal in the Software without
823 + * restriction, including without limitation the rights to use, copy, modify,
824 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
825 + * and to permit persons to whom the Software is furnished to do so, subject to
826 + * the following conditions:
827 + *
828 + * The above copyright notice and this permission notice shall be included in
829 + * all copies or substantial portions of the Software.
830 + *
831 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
832 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
833 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
834 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
835 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
836 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
837 + * IN THE SOFTWARE.
838 + */
839 +
840 +#include <linux/capability.h>
841 +#include <linux/errno.h>
842 +#include <linux/stat.h>
843 +#include <linux/string.h>
844 +#include <linux/sysdev.h>
845 +#include "common.h"
846 +
847 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
848 +#include <xen/platform-compat.h>
849 +#endif
850 +
851 +#define BALLOON_CLASS_NAME "xen_memory"
852 +
853 +#define BALLOON_SHOW(name, format, args...) \
854 + static ssize_t show_##name(struct sys_device *dev, \
855 + char *buf) \
856 + { \
857 + return sprintf(buf, format, ##args); \
858 + } \
859 + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
860 +
861 +BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
862 +BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
863 +BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
864 +BALLOON_SHOW(hard_limit_kb,
865 + (bs.hard_limit!=~0UL) ? "%lu\n" : "???\n",
866 + (bs.hard_limit!=~0UL) ? PAGES2KB(bs.hard_limit) : 0);
867 +BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
868 +
869 +static ssize_t show_target_kb(struct sys_device *dev, char *buf)
870 +{
871 + return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
872 +}
873 +
874 +static ssize_t store_target_kb(struct sys_device *dev,
875 + const char *buf,
876 + size_t count)
877 +{
878 + char memstring[64], *endchar;
879 + unsigned long long target_bytes;
880 +
881 + if (!capable(CAP_SYS_ADMIN))
882 + return -EPERM;
883 +
884 + if (count <= 1)
885 + return -EBADMSG; /* runt */
886 + if (count > sizeof(memstring))
887 + return -EFBIG; /* too long */
888 + strcpy(memstring, buf);
889 +
890 + target_bytes = memparse(memstring, &endchar);
891 + balloon_set_new_target(target_bytes >> PAGE_SHIFT);
892 +
893 + return count;
894 +}
895 +
896 +static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
897 + show_target_kb, store_target_kb);
898 +
899 +static struct sysdev_attribute *balloon_attrs[] = {
900 + &attr_target_kb,
901 +};
902 +
903 +static struct attribute *balloon_info_attrs[] = {
904 + &attr_current_kb.attr,
905 + &attr_low_kb.attr,
906 + &attr_high_kb.attr,
907 + &attr_hard_limit_kb.attr,
908 + &attr_driver_kb.attr,
909 + NULL
910 +};
911 +
912 +static struct attribute_group balloon_info_group = {
913 + .name = "info",
914 + .attrs = balloon_info_attrs,
915 +};
916 +
917 +static struct sysdev_class balloon_sysdev_class = {
918 + set_kset_name(BALLOON_CLASS_NAME),
919 +};
920 +
921 +static struct sys_device balloon_sysdev;
922 +
923 +static int register_balloon(struct sys_device *sysdev)
924 +{
925 + int i, error;
926 +
927 + error = sysdev_class_register(&balloon_sysdev_class);
928 + if (error)
929 + return error;
930 +
931 + sysdev->id = 0;
932 + sysdev->cls = &balloon_sysdev_class;
933 +
934 + error = sysdev_register(sysdev);
935 + if (error) {
936 + sysdev_class_unregister(&balloon_sysdev_class);
937 + return error;
938 + }
939 +
940 + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
941 + error = sysdev_create_file(sysdev, balloon_attrs[i]);
942 + if (error)
943 + goto fail;
944 + }
945 +
946 + error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
947 + if (error)
948 + goto fail;
949 +
950 + return 0;
951 +
952 + fail:
953 + while (--i >= 0)
954 + sysdev_remove_file(sysdev, balloon_attrs[i]);
955 + sysdev_unregister(sysdev);
956 + sysdev_class_unregister(&balloon_sysdev_class);
957 + return error;
958 +}
959 +
960 +static void unregister_balloon(struct sys_device *sysdev)
961 +{
962 + int i;
963 +
964 + sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
965 + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
966 + sysdev_remove_file(sysdev, balloon_attrs[i]);
967 + sysdev_unregister(sysdev);
968 + sysdev_class_unregister(&balloon_sysdev_class);
969 +}
970 +
971 +int balloon_sysfs_init(void)
972 +{
973 + return register_balloon(&balloon_sysdev);
974 +}
975 +
976 +void balloon_sysfs_exit(void)
977 +{
978 + unregister_balloon(&balloon_sysdev);
979 +}
980 Index: head-2008-11-25/drivers/xen/blkback/Makefile
981 ===================================================================
982 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
983 +++ head-2008-11-25/drivers/xen/blkback/Makefile 2007-06-12 13:13:44.000000000 +0200
984 @@ -0,0 +1,3 @@
985 +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o
986 +
987 +blkbk-y := blkback.o xenbus.o interface.o vbd.o
988 Index: head-2008-11-25/drivers/xen/blkback/blkback.c
989 ===================================================================
990 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
991 +++ head-2008-11-25/drivers/xen/blkback/blkback.c 2008-11-10 11:44:21.000000000 +0100
992 @@ -0,0 +1,656 @@
993 +/******************************************************************************
994 + * arch/xen/drivers/blkif/backend/main.c
995 + *
996 + * Back-end of the driver for virtual block devices. This portion of the
997 + * driver exports a 'unified' block-device interface that can be accessed
998 + * by any operating system that implements a compatible front end. A
999 + * reference front-end implementation can be found in:
1000 + * arch/xen/drivers/blkif/frontend
1001 + *
1002 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
1003 + * Copyright (c) 2005, Christopher Clark
1004 + *
1005 + * This program is free software; you can redistribute it and/or
1006 + * modify it under the terms of the GNU General Public License version 2
1007 + * as published by the Free Software Foundation; or, when distributed
1008 + * separately from the Linux kernel or incorporated into other
1009 + * software packages, subject to the following license:
1010 + *
1011 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1012 + * of this source file (the "Software"), to deal in the Software without
1013 + * restriction, including without limitation the rights to use, copy, modify,
1014 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1015 + * and to permit persons to whom the Software is furnished to do so, subject to
1016 + * the following conditions:
1017 + *
1018 + * The above copyright notice and this permission notice shall be included in
1019 + * all copies or substantial portions of the Software.
1020 + *
1021 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1022 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1023 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1024 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1025 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1026 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1027 + * IN THE SOFTWARE.
1028 + */
1029 +
1030 +#include <linux/spinlock.h>
1031 +#include <linux/kthread.h>
1032 +#include <linux/list.h>
1033 +#include <linux/delay.h>
1034 +#include <xen/balloon.h>
1035 +#include <asm/hypervisor.h>
1036 +#include "common.h"
1037 +
1038 +/*
1039 + * These are rather arbitrary. They are fairly large because adjacent requests
1040 + * pulled from a communication ring are quite likely to end up being part of
1041 + * the same scatter/gather request at the disc.
1042 + *
1043 + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
1044 + *
1045 + * This will increase the chances of being able to write whole tracks.
1046 + * 64 should be enough to keep us competitive with Linux.
1047 + */
1048 +static int blkif_reqs = 64;
1049 +module_param_named(reqs, blkif_reqs, int, 0);
1050 +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
1051 +
1052 +/* Run-time switchable: /sys/module/blkback/parameters/ */
1053 +static unsigned int log_stats = 0;
1054 +static unsigned int debug_lvl = 0;
1055 +module_param(log_stats, int, 0644);
1056 +module_param(debug_lvl, int, 0644);
1057 +
1058 +/*
1059 + * Each outstanding request that we've passed to the lower device layers has a
1060 + * 'pending_req' allocated to it. Each buffer_head that completes decrements
1061 + * the pendcnt towards zero. When it hits zero, the specified domain has a
1062 + * response queued for it, with the saved 'id' passed back.
1063 + */
1064 +typedef struct {
1065 + blkif_t *blkif;
1066 + u64 id;
1067 + int nr_pages;
1068 + atomic_t pendcnt;
1069 + unsigned short operation;
1070 + int status;
1071 + struct list_head free_list;
1072 +} pending_req_t;
1073 +
1074 +static pending_req_t *pending_reqs;
1075 +static struct list_head pending_free;
1076 +static DEFINE_SPINLOCK(pending_free_lock);
1077 +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
1078 +
1079 +#define BLKBACK_INVALID_HANDLE (~0)
1080 +
1081 +static struct page **pending_pages;
1082 +static grant_handle_t *pending_grant_handles;
1083 +
1084 +static inline int vaddr_pagenr(pending_req_t *req, int seg)
1085 +{
1086 + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
1087 +}
1088 +
1089 +static inline unsigned long vaddr(pending_req_t *req, int seg)
1090 +{
1091 + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
1092 + return (unsigned long)pfn_to_kaddr(pfn);
1093 +}
1094 +
1095 +#define pending_handle(_req, _seg) \
1096 + (pending_grant_handles[vaddr_pagenr(_req, _seg)])
1097 +
1098 +
1099 +static int do_block_io_op(blkif_t *blkif);
1100 +static void dispatch_rw_block_io(blkif_t *blkif,
1101 + blkif_request_t *req,
1102 + pending_req_t *pending_req);
1103 +static void make_response(blkif_t *blkif, u64 id,
1104 + unsigned short op, int st);
1105 +
1106 +/******************************************************************
1107 + * misc small helpers
1108 + */
1109 +static pending_req_t* alloc_req(void)
1110 +{
1111 + pending_req_t *req = NULL;
1112 + unsigned long flags;
1113 +
1114 + spin_lock_irqsave(&pending_free_lock, flags);
1115 + if (!list_empty(&pending_free)) {
1116 + req = list_entry(pending_free.next, pending_req_t, free_list);
1117 + list_del(&req->free_list);
1118 + }
1119 + spin_unlock_irqrestore(&pending_free_lock, flags);
1120 + return req;
1121 +}
1122 +
1123 +static void free_req(pending_req_t *req)
1124 +{
1125 + unsigned long flags;
1126 + int was_empty;
1127 +
1128 + spin_lock_irqsave(&pending_free_lock, flags);
1129 + was_empty = list_empty(&pending_free);
1130 + list_add(&req->free_list, &pending_free);
1131 + spin_unlock_irqrestore(&pending_free_lock, flags);
1132 + if (was_empty)
1133 + wake_up(&pending_free_wq);
1134 +}
1135 +
1136 +static void unplug_queue(blkif_t *blkif)
1137 +{
1138 + if (blkif->plug == NULL)
1139 + return;
1140 + if (blkif->plug->unplug_fn)
1141 + blkif->plug->unplug_fn(blkif->plug);
1142 + blk_put_queue(blkif->plug);
1143 + blkif->plug = NULL;
1144 +}
1145 +
1146 +static void plug_queue(blkif_t *blkif, struct block_device *bdev)
1147 +{
1148 + request_queue_t *q = bdev_get_queue(bdev);
1149 +
1150 + if (q == blkif->plug)
1151 + return;
1152 + unplug_queue(blkif);
1153 + blk_get_queue(q);
1154 + blkif->plug = q;
1155 +}
1156 +
1157 +static void fast_flush_area(pending_req_t *req)
1158 +{
1159 + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1160 + unsigned int i, invcount = 0;
1161 + grant_handle_t handle;
1162 + int ret;
1163 +
1164 + for (i = 0; i < req->nr_pages; i++) {
1165 + handle = pending_handle(req, i);
1166 + if (handle == BLKBACK_INVALID_HANDLE)
1167 + continue;
1168 + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
1169 + GNTMAP_host_map, handle);
1170 + pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
1171 + invcount++;
1172 + }
1173 +
1174 + ret = HYPERVISOR_grant_table_op(
1175 + GNTTABOP_unmap_grant_ref, unmap, invcount);
1176 + BUG_ON(ret);
1177 +}
1178 +
1179 +/******************************************************************
1180 + * SCHEDULER FUNCTIONS
1181 + */
1182 +
1183 +static void print_stats(blkif_t *blkif)
1184 +{
1185 + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
1186 + current->comm, blkif->st_oo_req,
1187 + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
1188 + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
1189 + blkif->st_rd_req = 0;
1190 + blkif->st_wr_req = 0;
1191 + blkif->st_oo_req = 0;
1192 +}
1193 +
1194 +int blkif_schedule(void *arg)
1195 +{
1196 + blkif_t *blkif = arg;
1197 +
1198 + blkif_get(blkif);
1199 +
1200 + if (debug_lvl)
1201 + printk(KERN_DEBUG "%s: started\n", current->comm);
1202 +
1203 + while (!kthread_should_stop()) {
1204 + if (try_to_freeze())
1205 + continue;
1206 +
1207 + wait_event_interruptible(
1208 + blkif->wq,
1209 + blkif->waiting_reqs || kthread_should_stop());
1210 + wait_event_interruptible(
1211 + pending_free_wq,
1212 + !list_empty(&pending_free) || kthread_should_stop());
1213 +
1214 + blkif->waiting_reqs = 0;
1215 + smp_mb(); /* clear flag *before* checking for work */
1216 +
1217 + if (do_block_io_op(blkif))
1218 + blkif->waiting_reqs = 1;
1219 + unplug_queue(blkif);
1220 +
1221 + if (log_stats && time_after(jiffies, blkif->st_print))
1222 + print_stats(blkif);
1223 + }
1224 +
1225 + if (log_stats)
1226 + print_stats(blkif);
1227 + if (debug_lvl)
1228 + printk(KERN_DEBUG "%s: exiting\n", current->comm);
1229 +
1230 + blkif->xenblkd = NULL;
1231 + blkif_put(blkif);
1232 +
1233 + return 0;
1234 +}
1235 +
1236 +/******************************************************************
1237 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
1238 + */
1239 +
1240 +static void __end_block_io_op(pending_req_t *pending_req, int error)
1241 +{
1242 + /* An error fails the entire request. */
1243 + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
1244 + (error == -EOPNOTSUPP)) {
1245 + DPRINTK("blkback: write barrier op failed, not supported\n");
1246 + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
1247 + pending_req->status = BLKIF_RSP_EOPNOTSUPP;
1248 + } else if (error) {
1249 + DPRINTK("Buffer not up-to-date at end of operation, "
1250 + "error=%d\n", error);
1251 + pending_req->status = BLKIF_RSP_ERROR;
1252 + }
1253 +
1254 + if (atomic_dec_and_test(&pending_req->pendcnt)) {
1255 + fast_flush_area(pending_req);
1256 + make_response(pending_req->blkif, pending_req->id,
1257 + pending_req->operation, pending_req->status);
1258 + blkif_put(pending_req->blkif);
1259 + free_req(pending_req);
1260 + }
1261 +}
1262 +
1263 +static int end_block_io_op(struct bio *bio, unsigned int done, int error)
1264 +{
1265 + if (bio->bi_size != 0)
1266 + return 1;
1267 + __end_block_io_op(bio->bi_private, error);
1268 + bio_put(bio);
1269 + return error;
1270 +}
1271 +
1272 +
1273 +/******************************************************************************
1274 + * NOTIFICATION FROM GUEST OS.
1275 + */
1276 +
1277 +static void blkif_notify_work(blkif_t *blkif)
1278 +{
1279 + blkif->waiting_reqs = 1;
1280 + wake_up(&blkif->wq);
1281 +}
1282 +
1283 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1284 +{
1285 + blkif_notify_work(dev_id);
1286 + return IRQ_HANDLED;
1287 +}
1288 +
1289 +
1290 +
1291 +/******************************************************************
1292 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
1293 + */
1294 +
1295 +static int do_block_io_op(blkif_t *blkif)
1296 +{
1297 + blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1298 + blkif_request_t req;
1299 + pending_req_t *pending_req;
1300 + RING_IDX rc, rp;
1301 + int more_to_do = 0;
1302 +
1303 + rc = blk_rings->common.req_cons;
1304 + rp = blk_rings->common.sring->req_prod;
1305 + rmb(); /* Ensure we see queued requests up to 'rp'. */
1306 +
1307 + while (rc != rp) {
1308 +
1309 + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
1310 + break;
1311 +
1312 + pending_req = alloc_req();
1313 + if (NULL == pending_req) {
1314 + blkif->st_oo_req++;
1315 + more_to_do = 1;
1316 + break;
1317 + }
1318 +
1319 + if (kthread_should_stop()) {
1320 + more_to_do = 1;
1321 + break;
1322 + }
1323 +
1324 + switch (blkif->blk_protocol) {
1325 + case BLKIF_PROTOCOL_NATIVE:
1326 + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
1327 + break;
1328 + case BLKIF_PROTOCOL_X86_32:
1329 + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
1330 + break;
1331 + case BLKIF_PROTOCOL_X86_64:
1332 + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
1333 + break;
1334 + default:
1335 + BUG();
1336 + }
1337 + blk_rings->common.req_cons = ++rc; /* before make_response() */
1338 +
1339 + /* Apply all sanity checks to /private copy/ of request. */
1340 + barrier();
1341 +
1342 + switch (req.operation) {
1343 + case BLKIF_OP_READ:
1344 + blkif->st_rd_req++;
1345 + dispatch_rw_block_io(blkif, &req, pending_req);
1346 + break;
1347 + case BLKIF_OP_WRITE_BARRIER:
1348 + blkif->st_br_req++;
1349 + /* fall through */
1350 + case BLKIF_OP_WRITE:
1351 + blkif->st_wr_req++;
1352 + dispatch_rw_block_io(blkif, &req, pending_req);
1353 + break;
1354 + default:
1355 + /* A good sign something is wrong: sleep for a while to
1356 + * avoid excessive CPU consumption by a bad guest. */
1357 + msleep(1);
1358 + DPRINTK("error: unknown block io operation [%d]\n",
1359 + req.operation);
1360 + make_response(blkif, req.id, req.operation,
1361 + BLKIF_RSP_ERROR);
1362 + free_req(pending_req);
1363 + break;
1364 + }
1365 +
1366 + /* Yield point for this unbounded loop. */
1367 + cond_resched();
1368 + }
1369 +
1370 + return more_to_do;
1371 +}
1372 +
1373 +static void dispatch_rw_block_io(blkif_t *blkif,
1374 + blkif_request_t *req,
1375 + pending_req_t *pending_req)
1376 +{
1377 + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
1378 + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1379 + struct phys_req preq;
1380 + struct {
1381 + unsigned long buf; unsigned int nsec;
1382 + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1383 + unsigned int nseg;
1384 + struct bio *bio = NULL;
1385 + int ret, i;
1386 + int operation;
1387 +
1388 + switch (req->operation) {
1389 + case BLKIF_OP_READ:
1390 + operation = READ;
1391 + break;
1392 + case BLKIF_OP_WRITE:
1393 + operation = WRITE;
1394 + break;
1395 + case BLKIF_OP_WRITE_BARRIER:
1396 + operation = WRITE_BARRIER;
1397 + break;
1398 + default:
1399 + operation = 0; /* make gcc happy */
1400 + BUG();
1401 + }
1402 +
1403 + /* Check that number of segments is sane. */
1404 + nseg = req->nr_segments;
1405 + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
1406 + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
1407 + DPRINTK("Bad number of segments in request (%d)\n", nseg);
1408 + goto fail_response;
1409 + }
1410 +
1411 + preq.dev = req->handle;
1412 + preq.sector_number = req->sector_number;
1413 + preq.nr_sects = 0;
1414 +
1415 + pending_req->blkif = blkif;
1416 + pending_req->id = req->id;
1417 + pending_req->operation = req->operation;
1418 + pending_req->status = BLKIF_RSP_OKAY;
1419 + pending_req->nr_pages = nseg;
1420 +
1421 + for (i = 0; i < nseg; i++) {
1422 + uint32_t flags;
1423 +
1424 + seg[i].nsec = req->seg[i].last_sect -
1425 + req->seg[i].first_sect + 1;
1426 +
1427 + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
1428 + (req->seg[i].last_sect < req->seg[i].first_sect))
1429 + goto fail_response;
1430 + preq.nr_sects += seg[i].nsec;
1431 +
1432 + flags = GNTMAP_host_map;
1433 + if (operation != READ)
1434 + flags |= GNTMAP_readonly;
1435 + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
1436 + req->seg[i].gref, blkif->domid);
1437 + }
1438 +
1439 + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
1440 + BUG_ON(ret);
1441 +
1442 + for (i = 0; i < nseg; i++) {
1443 + if (unlikely(map[i].status != 0)) {
1444 + DPRINTK("invalid buffer -- could not remap it\n");
1445 + map[i].handle = BLKBACK_INVALID_HANDLE;
1446 + ret |= 1;
1447 + }
1448 +
1449 + pending_handle(pending_req, i) = map[i].handle;
1450 +
1451 + if (ret)
1452 + continue;
1453 +
1454 + set_phys_to_machine(__pa(vaddr(
1455 + pending_req, i)) >> PAGE_SHIFT,
1456 + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
1457 + seg[i].buf = map[i].dev_bus_addr |
1458 + (req->seg[i].first_sect << 9);
1459 + }
1460 +
1461 + if (ret)
1462 + goto fail_flush;
1463 +
1464 + if (vbd_translate(&preq, blkif, operation) != 0) {
1465 + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
1466 + operation == READ ? "read" : "write",
1467 + preq.sector_number,
1468 + preq.sector_number + preq.nr_sects, preq.dev);
1469 + goto fail_flush;
1470 + }
1471 +
1472 + plug_queue(blkif, preq.bdev);
1473 + atomic_set(&pending_req->pendcnt, 1);
1474 + blkif_get(blkif);
1475 +
1476 + for (i = 0; i < nseg; i++) {
1477 + if (((int)preq.sector_number|(int)seg[i].nsec) &
1478 + ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
1479 + DPRINTK("Misaligned I/O request from domain %d",
1480 + blkif->domid);
1481 + goto fail_put_bio;
1482 + }
1483 +
1484 + while ((bio == NULL) ||
1485 + (bio_add_page(bio,
1486 + virt_to_page(vaddr(pending_req, i)),
1487 + seg[i].nsec << 9,
1488 + seg[i].buf & ~PAGE_MASK) == 0)) {
1489 + if (bio) {
1490 + atomic_inc(&pending_req->pendcnt);
1491 + submit_bio(operation, bio);
1492 + }
1493 +
1494 + bio = bio_alloc(GFP_KERNEL, nseg-i);
1495 + if (unlikely(bio == NULL))
1496 + goto fail_put_bio;
1497 +
1498 + bio->bi_bdev = preq.bdev;
1499 + bio->bi_private = pending_req;
1500 + bio->bi_end_io = end_block_io_op;
1501 + bio->bi_sector = preq.sector_number;
1502 + }
1503 +
1504 + preq.sector_number += seg[i].nsec;
1505 + }
1506 +
1507 + if (!bio) {
1508 + BUG_ON(operation != WRITE_BARRIER);
1509 + bio = bio_alloc(GFP_KERNEL, 0);
1510 + if (unlikely(bio == NULL))
1511 + goto fail_put_bio;
1512 +
1513 + bio->bi_bdev = preq.bdev;
1514 + bio->bi_private = pending_req;
1515 + bio->bi_end_io = end_block_io_op;
1516 + bio->bi_sector = -1;
1517 + }
1518 +
1519 + submit_bio(operation, bio);
1520 +
1521 + if (operation == READ)
1522 + blkif->st_rd_sect += preq.nr_sects;
1523 + else if (operation == WRITE || operation == WRITE_BARRIER)
1524 + blkif->st_wr_sect += preq.nr_sects;
1525 +
1526 + return;
1527 +
1528 + fail_flush:
1529 + fast_flush_area(pending_req);
1530 + fail_response:
1531 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
1532 + free_req(pending_req);
1533 + msleep(1); /* back off a bit */
1534 + return;
1535 +
1536 + fail_put_bio:
1537 + __end_block_io_op(pending_req, -EINVAL);
1538 + if (bio)
1539 + bio_put(bio);
1540 + unplug_queue(blkif);
1541 + msleep(1); /* back off a bit */
1542 + return;
1543 +}
1544 +
1545 +
1546 +
1547 +/******************************************************************
1548 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
1549 + */
1550 +
1551 +
1552 +static void make_response(blkif_t *blkif, u64 id,
1553 + unsigned short op, int st)
1554 +{
1555 + blkif_response_t resp;
1556 + unsigned long flags;
1557 + blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1558 + int more_to_do = 0;
1559 + int notify;
1560 +
1561 + resp.id = id;
1562 + resp.operation = op;
1563 + resp.status = st;
1564 +
1565 + spin_lock_irqsave(&blkif->blk_ring_lock, flags);
1566 + /* Place on the response ring for the relevant domain. */
1567 + switch (blkif->blk_protocol) {
1568 + case BLKIF_PROTOCOL_NATIVE:
1569 + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
1570 + &resp, sizeof(resp));
1571 + break;
1572 + case BLKIF_PROTOCOL_X86_32:
1573 + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
1574 + &resp, sizeof(resp));
1575 + break;
1576 + case BLKIF_PROTOCOL_X86_64:
1577 + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
1578 + &resp, sizeof(resp));
1579 + break;
1580 + default:
1581 + BUG();
1582 + }
1583 + blk_rings->common.rsp_prod_pvt++;
1584 + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
1585 + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
1586 + /*
1587 + * Tail check for pending requests. Allows frontend to avoid
1588 + * notifications if requests are already in flight (lower
1589 + * overheads and promotes batching).
1590 + */
1591 + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
1592 +
1593 + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
1594 + more_to_do = 1;
1595 + }
1596 +
1597 + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
1598 +
1599 + if (more_to_do)
1600 + blkif_notify_work(blkif);
1601 + if (notify)
1602 + notify_remote_via_irq(blkif->irq);
1603 +}
1604 +
1605 +static int __init blkif_init(void)
1606 +{
1607 + int i, mmap_pages;
1608 +
1609 + if (!is_running_on_xen())
1610 + return -ENODEV;
1611 +
1612 + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
1613 +
1614 + pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
1615 + blkif_reqs, GFP_KERNEL);
1616 + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
1617 + mmap_pages, GFP_KERNEL);
1618 + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
1619 +
1620 + if (!pending_reqs || !pending_grant_handles || !pending_pages)
1621 + goto out_of_memory;
1622 +
1623 + for (i = 0; i < mmap_pages; i++)
1624 + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
1625 +
1626 + blkif_interface_init();
1627 +
1628 + memset(pending_reqs, 0, sizeof(pending_reqs));
1629 + INIT_LIST_HEAD(&pending_free);
1630 +
1631 + for (i = 0; i < blkif_reqs; i++)
1632 + list_add_tail(&pending_reqs[i].free_list, &pending_free);
1633 +
1634 + blkif_xenbus_init();
1635 +
1636 + return 0;
1637 +
1638 + out_of_memory:
1639 + kfree(pending_reqs);
1640 + kfree(pending_grant_handles);
1641 + free_empty_pages_and_pagevec(pending_pages, mmap_pages);
1642 + printk("%s: out of memory\n", __FUNCTION__);
1643 + return -ENOMEM;
1644 +}
1645 +
1646 +module_init(blkif_init);
1647 +
1648 +MODULE_LICENSE("Dual BSD/GPL");
1649 Index: head-2008-11-25/drivers/xen/blkback/common.h
1650 ===================================================================
1651 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1652 +++ head-2008-11-25/drivers/xen/blkback/common.h 2008-05-08 14:02:04.000000000 +0200
1653 @@ -0,0 +1,139 @@
1654 +/*
1655 + * This program is free software; you can redistribute it and/or
1656 + * modify it under the terms of the GNU General Public License version 2
1657 + * as published by the Free Software Foundation; or, when distributed
1658 + * separately from the Linux kernel or incorporated into other
1659 + * software packages, subject to the following license:
1660 + *
1661 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1662 + * of this source file (the "Software"), to deal in the Software without
1663 + * restriction, including without limitation the rights to use, copy, modify,
1664 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1665 + * and to permit persons to whom the Software is furnished to do so, subject to
1666 + * the following conditions:
1667 + *
1668 + * The above copyright notice and this permission notice shall be included in
1669 + * all copies or substantial portions of the Software.
1670 + *
1671 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1672 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1673 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1674 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1675 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1676 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1677 + * IN THE SOFTWARE.
1678 + */
1679 +
1680 +#ifndef __BLKIF__BACKEND__COMMON_H__
1681 +#define __BLKIF__BACKEND__COMMON_H__
1682 +
1683 +#include <linux/version.h>
1684 +#include <linux/module.h>
1685 +#include <linux/interrupt.h>
1686 +#include <linux/slab.h>
1687 +#include <linux/blkdev.h>
1688 +#include <linux/vmalloc.h>
1689 +#include <linux/wait.h>
1690 +#include <asm/io.h>
1691 +#include <asm/setup.h>
1692 +#include <asm/pgalloc.h>
1693 +#include <xen/evtchn.h>
1694 +#include <asm/hypervisor.h>
1695 +#include <xen/blkif.h>
1696 +#include <xen/gnttab.h>
1697 +#include <xen/driver_util.h>
1698 +#include <xen/xenbus.h>
1699 +
1700 +#define DPRINTK(_f, _a...) \
1701 + pr_debug("(file=%s, line=%d) " _f, \
1702 + __FILE__ , __LINE__ , ## _a )
1703 +
1704 +struct vbd {
1705 + blkif_vdev_t handle; /* what the domain refers to this vbd as */
1706 + unsigned char readonly; /* Non-zero -> read-only */
1707 + unsigned char type; /* VDISK_xxx */
1708 + u32 pdevice; /* phys device that this vbd maps to */
1709 + struct block_device *bdev;
1710 +};
1711 +
1712 +struct backend_info;
1713 +
1714 +typedef struct blkif_st {
1715 + /* Unique identifier for this interface. */
1716 + domid_t domid;
1717 + unsigned int handle;
1718 + /* Physical parameters of the comms window. */
1719 + unsigned int irq;
1720 + /* Comms information. */
1721 + enum blkif_protocol blk_protocol;
1722 + blkif_back_rings_t blk_rings;
1723 + struct vm_struct *blk_ring_area;
1724 + /* The VBD attached to this interface. */
1725 + struct vbd vbd;
1726 + /* Back pointer to the backend_info. */
1727 + struct backend_info *be;
1728 + /* Private fields. */
1729 + spinlock_t blk_ring_lock;
1730 + atomic_t refcnt;
1731 +
1732 + wait_queue_head_t wq;
1733 + struct task_struct *xenblkd;
1734 + unsigned int waiting_reqs;
1735 + request_queue_t *plug;
1736 +
1737 + /* statistics */
1738 + unsigned long st_print;
1739 + int st_rd_req;
1740 + int st_wr_req;
1741 + int st_oo_req;
1742 + int st_br_req;
1743 + int st_rd_sect;
1744 + int st_wr_sect;
1745 +
1746 + wait_queue_head_t waiting_to_free;
1747 +
1748 + grant_handle_t shmem_handle;
1749 + grant_ref_t shmem_ref;
1750 +} blkif_t;
1751 +
1752 +blkif_t *blkif_alloc(domid_t domid);
1753 +void blkif_disconnect(blkif_t *blkif);
1754 +void blkif_free(blkif_t *blkif);
1755 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
1756 +
1757 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
1758 +#define blkif_put(_b) \
1759 + do { \
1760 + if (atomic_dec_and_test(&(_b)->refcnt)) \
1761 + wake_up(&(_b)->waiting_to_free);\
1762 + } while (0)
1763 +
1764 +/* Create a vbd. */
1765 +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
1766 + unsigned minor, int readonly, int cdrom);
1767 +void vbd_free(struct vbd *vbd);
1768 +
1769 +unsigned long long vbd_size(struct vbd *vbd);
1770 +unsigned int vbd_info(struct vbd *vbd);
1771 +unsigned long vbd_secsize(struct vbd *vbd);
1772 +
1773 +struct phys_req {
1774 + unsigned short dev;
1775 + unsigned short nr_sects;
1776 + struct block_device *bdev;
1777 + blkif_sector_t sector_number;
1778 +};
1779 +
1780 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
1781 +
1782 +void blkif_interface_init(void);
1783 +
1784 +void blkif_xenbus_init(void);
1785 +
1786 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
1787 +int blkif_schedule(void *arg);
1788 +
1789 +int blkback_barrier(struct xenbus_transaction xbt,
1790 + struct backend_info *be, int state);
1791 +
1792 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
1793 Index: head-2008-11-25/drivers/xen/blkback/interface.c
1794 ===================================================================
1795 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1796 +++ head-2008-11-25/drivers/xen/blkback/interface.c 2007-06-12 13:13:44.000000000 +0200
1797 @@ -0,0 +1,181 @@
1798 +/******************************************************************************
1799 + * arch/xen/drivers/blkif/backend/interface.c
1800 + *
1801 + * Block-device interface management.
1802 + *
1803 + * Copyright (c) 2004, Keir Fraser
1804 + *
1805 + * This program is free software; you can redistribute it and/or
1806 + * modify it under the terms of the GNU General Public License version 2
1807 + * as published by the Free Software Foundation; or, when distributed
1808 + * separately from the Linux kernel or incorporated into other
1809 + * software packages, subject to the following license:
1810 + *
1811 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1812 + * of this source file (the "Software"), to deal in the Software without
1813 + * restriction, including without limitation the rights to use, copy, modify,
1814 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1815 + * and to permit persons to whom the Software is furnished to do so, subject to
1816 + * the following conditions:
1817 + *
1818 + * The above copyright notice and this permission notice shall be included in
1819 + * all copies or substantial portions of the Software.
1820 + *
1821 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1822 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1823 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1824 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1825 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1826 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1827 + * IN THE SOFTWARE.
1828 + */
1829 +
1830 +#include "common.h"
1831 +#include <xen/evtchn.h>
1832 +#include <linux/kthread.h>
1833 +
1834 +static kmem_cache_t *blkif_cachep;
1835 +
1836 +blkif_t *blkif_alloc(domid_t domid)
1837 +{
1838 + blkif_t *blkif;
1839 +
1840 + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
1841 + if (!blkif)
1842 + return ERR_PTR(-ENOMEM);
1843 +
1844 + memset(blkif, 0, sizeof(*blkif));
1845 + blkif->domid = domid;
1846 + spin_lock_init(&blkif->blk_ring_lock);
1847 + atomic_set(&blkif->refcnt, 1);
1848 + init_waitqueue_head(&blkif->wq);
1849 + blkif->st_print = jiffies;
1850 + init_waitqueue_head(&blkif->waiting_to_free);
1851 +
1852 + return blkif;
1853 +}
1854 +
1855 +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
1856 +{
1857 + struct gnttab_map_grant_ref op;
1858 +
1859 + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
1860 + GNTMAP_host_map, shared_page, blkif->domid);
1861 +
1862 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
1863 + BUG();
1864 +
1865 + if (op.status) {
1866 + DPRINTK(" Grant table operation failure !\n");
1867 + return op.status;
1868 + }
1869 +
1870 + blkif->shmem_ref = shared_page;
1871 + blkif->shmem_handle = op.handle;
1872 +
1873 + return 0;
1874 +}
1875 +
1876 +static void unmap_frontend_page(blkif_t *blkif)
1877 +{
1878 + struct gnttab_unmap_grant_ref op;
1879 +
1880 + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
1881 + GNTMAP_host_map, blkif->shmem_handle);
1882 +
1883 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
1884 + BUG();
1885 +}
1886 +
1887 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
1888 +{
1889 + int err;
1890 +
1891 + /* Already connected through? */
1892 + if (blkif->irq)
1893 + return 0;
1894 +
1895 + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
1896 + return -ENOMEM;
1897 +
1898 + err = map_frontend_page(blkif, shared_page);
1899 + if (err) {
1900 + free_vm_area(blkif->blk_ring_area);
1901 + return err;
1902 + }
1903 +
1904 + switch (blkif->blk_protocol) {
1905 + case BLKIF_PROTOCOL_NATIVE:
1906 + {
1907 + blkif_sring_t *sring;
1908 + sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
1909 + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
1910 + break;
1911 + }
1912 + case BLKIF_PROTOCOL_X86_32:
1913 + {
1914 + blkif_x86_32_sring_t *sring_x86_32;
1915 + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
1916 + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
1917 + break;
1918 + }
1919 + case BLKIF_PROTOCOL_X86_64:
1920 + {
1921 + blkif_x86_64_sring_t *sring_x86_64;
1922 + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
1923 + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
1924 + break;
1925 + }
1926 + default:
1927 + BUG();
1928 + }
1929 +
1930 + err = bind_interdomain_evtchn_to_irqhandler(
1931 + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
1932 + if (err < 0)
1933 + {
1934 + unmap_frontend_page(blkif);
1935 + free_vm_area(blkif->blk_ring_area);
1936 + blkif->blk_rings.common.sring = NULL;
1937 + return err;
1938 + }
1939 + blkif->irq = err;
1940 +
1941 + return 0;
1942 +}
1943 +
1944 +void blkif_disconnect(blkif_t *blkif)
1945 +{
1946 + if (blkif->xenblkd) {
1947 + kthread_stop(blkif->xenblkd);
1948 + blkif->xenblkd = NULL;
1949 + }
1950 +
1951 + atomic_dec(&blkif->refcnt);
1952 + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
1953 + atomic_inc(&blkif->refcnt);
1954 +
1955 + if (blkif->irq) {
1956 + unbind_from_irqhandler(blkif->irq, blkif);
1957 + blkif->irq = 0;
1958 + }
1959 +
1960 + if (blkif->blk_rings.common.sring) {
1961 + unmap_frontend_page(blkif);
1962 + free_vm_area(blkif->blk_ring_area);
1963 + blkif->blk_rings.common.sring = NULL;
1964 + }
1965 +}
1966 +
1967 +void blkif_free(blkif_t *blkif)
1968 +{
1969 + if (!atomic_dec_and_test(&blkif->refcnt))
1970 + BUG();
1971 + kmem_cache_free(blkif_cachep, blkif);
1972 +}
1973 +
1974 +void __init blkif_interface_init(void)
1975 +{
1976 + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
1977 + 0, 0, NULL, NULL);
1978 +}
1979 Index: head-2008-11-25/drivers/xen/blkback/vbd.c
1980 ===================================================================
1981 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1982 +++ head-2008-11-25/drivers/xen/blkback/vbd.c 2008-05-08 14:02:04.000000000 +0200
1983 @@ -0,0 +1,118 @@
1984 +/******************************************************************************
1985 + * blkback/vbd.c
1986 + *
1987 + * Routines for managing virtual block devices (VBDs).
1988 + *
1989 + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
1990 + *
1991 + * This program is free software; you can redistribute it and/or
1992 + * modify it under the terms of the GNU General Public License version 2
1993 + * as published by the Free Software Foundation; or, when distributed
1994 + * separately from the Linux kernel or incorporated into other
1995 + * software packages, subject to the following license:
1996 + *
1997 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1998 + * of this source file (the "Software"), to deal in the Software without
1999 + * restriction, including without limitation the rights to use, copy, modify,
2000 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2001 + * and to permit persons to whom the Software is furnished to do so, subject to
2002 + * the following conditions:
2003 + *
2004 + * The above copyright notice and this permission notice shall be included in
2005 + * all copies or substantial portions of the Software.
2006 + *
2007 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2008 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2009 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2010 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2011 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2012 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2013 + * IN THE SOFTWARE.
2014 + */
2015 +
2016 +#include "common.h"
2017 +
2018 +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
2019 + (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
2020 +
2021 +unsigned long long vbd_size(struct vbd *vbd)
2022 +{
2023 + return vbd_sz(vbd);
2024 +}
2025 +
2026 +unsigned int vbd_info(struct vbd *vbd)
2027 +{
2028 + return vbd->type | (vbd->readonly?VDISK_READONLY:0);
2029 +}
2030 +
2031 +unsigned long vbd_secsize(struct vbd *vbd)
2032 +{
2033 + return bdev_hardsect_size(vbd->bdev);
2034 +}
2035 +
2036 +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
2037 + unsigned minor, int readonly, int cdrom)
2038 +{
2039 + struct vbd *vbd;
2040 + struct block_device *bdev;
2041 +
2042 + vbd = &blkif->vbd;
2043 + vbd->handle = handle;
2044 + vbd->readonly = readonly;
2045 + vbd->type = 0;
2046 +
2047 + vbd->pdevice = MKDEV(major, minor);
2048 +
2049 + bdev = open_by_devnum(vbd->pdevice,
2050 + vbd->readonly ? FMODE_READ : FMODE_WRITE);
2051 +
2052 + if (IS_ERR(bdev)) {
2053 + DPRINTK("vbd_creat: device %08x could not be opened.\n",
2054 + vbd->pdevice);
2055 + return -ENOENT;
2056 + }
2057 +
2058 + vbd->bdev = bdev;
2059 +
2060 + if (vbd->bdev->bd_disk == NULL) {
2061 + DPRINTK("vbd_creat: device %08x doesn't exist.\n",
2062 + vbd->pdevice);
2063 + vbd_free(vbd);
2064 + return -ENOENT;
2065 + }
2066 +
2067 + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
2068 + vbd->type |= VDISK_CDROM;
2069 + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
2070 + vbd->type |= VDISK_REMOVABLE;
2071 +
2072 + DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
2073 + handle, blkif->domid);
2074 + return 0;
2075 +}
2076 +
2077 +void vbd_free(struct vbd *vbd)
2078 +{
2079 + if (vbd->bdev)
2080 + blkdev_put(vbd->bdev);
2081 + vbd->bdev = NULL;
2082 +}
2083 +
2084 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
2085 +{
2086 + struct vbd *vbd = &blkif->vbd;
2087 + int rc = -EACCES;
2088 +
2089 + if ((operation != READ) && vbd->readonly)
2090 + goto out;
2091 +
2092 + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
2093 + goto out;
2094 +
2095 + req->dev = vbd->pdevice;
2096 + req->bdev = vbd->bdev;
2097 + rc = 0;
2098 +
2099 + out:
2100 + return rc;
2101 +}
2102 Index: head-2008-11-25/drivers/xen/blkback/xenbus.c
2103 ===================================================================
2104 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2105 +++ head-2008-11-25/drivers/xen/blkback/xenbus.c 2008-05-08 14:02:04.000000000 +0200
2106 @@ -0,0 +1,541 @@
2107 +/* Xenbus code for blkif backend
2108 + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
2109 + Copyright (C) 2005 XenSource Ltd
2110 +
2111 + This program is free software; you can redistribute it and/or modify
2112 + it under the terms of the GNU General Public License as published by
2113 + the Free Software Foundation; either version 2 of the License, or
2114 + (at your option) any later version.
2115 +
2116 + This program is distributed in the hope that it will be useful,
2117 + but WITHOUT ANY WARRANTY; without even the implied warranty of
2118 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2119 + GNU General Public License for more details.
2120 +
2121 + You should have received a copy of the GNU General Public License
2122 + along with this program; if not, write to the Free Software
2123 + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2124 +*/
2125 +
2126 +#include <stdarg.h>
2127 +#include <linux/module.h>
2128 +#include <linux/kthread.h>
2129 +#include "common.h"
2130 +
2131 +#undef DPRINTK
2132 +#define DPRINTK(fmt, args...) \
2133 + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
2134 + __FUNCTION__, __LINE__, ##args)
2135 +
2136 +struct backend_info
2137 +{
2138 + struct xenbus_device *dev;
2139 + blkif_t *blkif;
2140 + struct xenbus_watch backend_watch;
2141 + unsigned major;
2142 + unsigned minor;
2143 + char *mode;
2144 +};
2145 +
2146 +static void connect(struct backend_info *);
2147 +static int connect_ring(struct backend_info *);
2148 +static void backend_changed(struct xenbus_watch *, const char **,
2149 + unsigned int);
2150 +
2151 +static int blkback_name(blkif_t *blkif, char *buf)
2152 +{
2153 + char *devpath, *devname;
2154 + struct xenbus_device *dev = blkif->be->dev;
2155 +
2156 + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
2157 + if (IS_ERR(devpath))
2158 + return PTR_ERR(devpath);
2159 +
2160 + if ((devname = strstr(devpath, "/dev/")) != NULL)
2161 + devname += strlen("/dev/");
2162 + else
2163 + devname = devpath;
2164 +
2165 + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
2166 + kfree(devpath);
2167 +
2168 + return 0;
2169 +}
2170 +
2171 +static void update_blkif_status(blkif_t *blkif)
2172 +{
2173 + int err;
2174 + char name[TASK_COMM_LEN];
2175 +
2176 + /* Not ready to connect? */
2177 + if (!blkif->irq || !blkif->vbd.bdev)
2178 + return;
2179 +
2180 + /* Already connected? */
2181 + if (blkif->be->dev->state == XenbusStateConnected)
2182 + return;
2183 +
2184 + /* Attempt to connect: exit if we fail to. */
2185 + connect(blkif->be);
2186 + if (blkif->be->dev->state != XenbusStateConnected)
2187 + return;
2188 +
2189 + err = blkback_name(blkif, name);
2190 + if (err) {
2191 + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
2192 + return;
2193 + }
2194 +
2195 + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
2196 + if (IS_ERR(blkif->xenblkd)) {
2197 + err = PTR_ERR(blkif->xenblkd);
2198 + blkif->xenblkd = NULL;
2199 + xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
2200 + }
2201 +}
2202 +
2203 +
2204 +/****************************************************************
2205 + * sysfs interface for VBD I/O requests
2206 + */
2207 +
2208 +#define VBD_SHOW(name, format, args...) \
2209 + static ssize_t show_##name(struct device *_dev, \
2210 + struct device_attribute *attr, \
2211 + char *buf) \
2212 + { \
2213 + struct xenbus_device *dev = to_xenbus_device(_dev); \
2214 + struct backend_info *be = dev->dev.driver_data; \
2215 + \
2216 + return sprintf(buf, format, ##args); \
2217 + } \
2218 + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
2219 +
2220 +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
2221 +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
2222 +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
2223 +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
2224 +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
2225 +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
2226 +
2227 +static struct attribute *vbdstat_attrs[] = {
2228 + &dev_attr_oo_req.attr,
2229 + &dev_attr_rd_req.attr,
2230 + &dev_attr_wr_req.attr,
2231 + &dev_attr_br_req.attr,
2232 + &dev_attr_rd_sect.attr,
2233 + &dev_attr_wr_sect.attr,
2234 + NULL
2235 +};
2236 +
2237 +static struct attribute_group vbdstat_group = {
2238 + .name = "statistics",
2239 + .attrs = vbdstat_attrs,
2240 +};
2241 +
2242 +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
2243 +VBD_SHOW(mode, "%s\n", be->mode);
2244 +
2245 +int xenvbd_sysfs_addif(struct xenbus_device *dev)
2246 +{
2247 + int error;
2248 +
2249 + error = device_create_file(&dev->dev, &dev_attr_physical_device);
2250 + if (error)
2251 + goto fail1;
2252 +
2253 + error = device_create_file(&dev->dev, &dev_attr_mode);
2254 + if (error)
2255 + goto fail2;
2256 +
2257 + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
2258 + if (error)
2259 + goto fail3;
2260 +
2261 + return 0;
2262 +
2263 +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2264 +fail2: device_remove_file(&dev->dev, &dev_attr_mode);
2265 +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
2266 + return error;
2267 +}
2268 +
2269 +void xenvbd_sysfs_delif(struct xenbus_device *dev)
2270 +{
2271 + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2272 + device_remove_file(&dev->dev, &dev_attr_mode);
2273 + device_remove_file(&dev->dev, &dev_attr_physical_device);
2274 +}
2275 +
2276 +static int blkback_remove(struct xenbus_device *dev)
2277 +{
2278 + struct backend_info *be = dev->dev.driver_data;
2279 +
2280 + DPRINTK("");
2281 +
2282 + if (be->major || be->minor)
2283 + xenvbd_sysfs_delif(dev);
2284 +
2285 + if (be->backend_watch.node) {
2286 + unregister_xenbus_watch(&be->backend_watch);
2287 + kfree(be->backend_watch.node);
2288 + be->backend_watch.node = NULL;
2289 + }
2290 +
2291 + if (be->blkif) {
2292 + blkif_disconnect(be->blkif);
2293 + vbd_free(&be->blkif->vbd);
2294 + blkif_free(be->blkif);
2295 + be->blkif = NULL;
2296 + }
2297 +
2298 + kfree(be);
2299 + dev->dev.driver_data = NULL;
2300 + return 0;
2301 +}
2302 +
2303 +int blkback_barrier(struct xenbus_transaction xbt,
2304 + struct backend_info *be, int state)
2305 +{
2306 + struct xenbus_device *dev = be->dev;
2307 + int err;
2308 +
2309 + err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
2310 + "%d", state);
2311 + if (err)
2312 + xenbus_dev_fatal(dev, err, "writing feature-barrier");
2313 +
2314 + return err;
2315 +}
2316 +
2317 +/**
2318 + * Entry point to this code when a new device is created. Allocate the basic
2319 + * structures, and watch the store waiting for the hotplug scripts to tell us
2320 + * the device's physical major and minor numbers. Switch to InitWait.
2321 + */
2322 +static int blkback_probe(struct xenbus_device *dev,
2323 + const struct xenbus_device_id *id)
2324 +{
2325 + int err;
2326 + struct backend_info *be = kzalloc(sizeof(struct backend_info),
2327 + GFP_KERNEL);
2328 + if (!be) {
2329 + xenbus_dev_fatal(dev, -ENOMEM,
2330 + "allocating backend structure");
2331 + return -ENOMEM;
2332 + }
2333 + be->dev = dev;
2334 + dev->dev.driver_data = be;
2335 +
2336 + be->blkif = blkif_alloc(dev->otherend_id);
2337 + if (IS_ERR(be->blkif)) {
2338 + err = PTR_ERR(be->blkif);
2339 + be->blkif = NULL;
2340 + xenbus_dev_fatal(dev, err, "creating block interface");
2341 + goto fail;
2342 + }
2343 +
2344 + /* setup back pointer */
2345 + be->blkif->be = be;
2346 +
2347 + err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
2348 + &be->backend_watch, backend_changed);
2349 + if (err)
2350 + goto fail;
2351 +
2352 + err = xenbus_switch_state(dev, XenbusStateInitWait);
2353 + if (err)
2354 + goto fail;
2355 +
2356 + return 0;
2357 +
2358 +fail:
2359 + DPRINTK("failed");
2360 + blkback_remove(dev);
2361 + return err;
2362 +}
2363 +
2364 +
2365 +/**
2366 + * Callback received when the hotplug scripts have placed the physical-device
2367 + * node. Read it and the mode node, and create a vbd. If the frontend is
2368 + * ready, connect.
2369 + */
2370 +static void backend_changed(struct xenbus_watch *watch,
2371 + const char **vec, unsigned int len)
2372 +{
2373 + int err;
2374 + unsigned major;
2375 + unsigned minor;
2376 + struct backend_info *be
2377 + = container_of(watch, struct backend_info, backend_watch);
2378 + struct xenbus_device *dev = be->dev;
2379 + int cdrom = 0;
2380 + char *device_type;
2381 +
2382 + DPRINTK("");
2383 +
2384 + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
2385 + &major, &minor);
2386 + if (XENBUS_EXIST_ERR(err)) {
2387 + /* Since this watch will fire once immediately after it is
2388 + registered, we expect this. Ignore it, and wait for the
2389 + hotplug scripts. */
2390 + return;
2391 + }
2392 + if (err != 2) {
2393 + xenbus_dev_fatal(dev, err, "reading physical-device");
2394 + return;
2395 + }
2396 +
2397 + if ((be->major || be->minor) &&
2398 + ((be->major != major) || (be->minor != minor))) {
2399 + printk(KERN_WARNING
2400 + "blkback: changing physical device (from %x:%x to "
2401 + "%x:%x) not supported.\n", be->major, be->minor,
2402 + major, minor);
2403 + return;
2404 + }
2405 +
2406 + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
2407 + if (IS_ERR(be->mode)) {
2408 + err = PTR_ERR(be->mode);
2409 + be->mode = NULL;
2410 + xenbus_dev_fatal(dev, err, "reading mode");
2411 + return;
2412 + }
2413 +
2414 + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
2415 + if (!IS_ERR(device_type)) {
2416 + cdrom = strcmp(device_type, "cdrom") == 0;
2417 + kfree(device_type);
2418 + }
2419 +
2420 + if (be->major == 0 && be->minor == 0) {
2421 + /* Front end dir is a number, which is used as the handle. */
2422 +
2423 + char *p = strrchr(dev->otherend, '/') + 1;
2424 + long handle = simple_strtoul(p, NULL, 0);
2425 +
2426 + be->major = major;
2427 + be->minor = minor;
2428 +
2429 + err = vbd_create(be->blkif, handle, major, minor,
2430 + (NULL == strchr(be->mode, 'w')), cdrom);
2431 + if (err) {
2432 + be->major = be->minor = 0;
2433 + xenbus_dev_fatal(dev, err, "creating vbd structure");
2434 + return;
2435 + }
2436 +
2437 + err = xenvbd_sysfs_addif(dev);
2438 + if (err) {
2439 + vbd_free(&be->blkif->vbd);
2440 + be->major = be->minor = 0;
2441 + xenbus_dev_fatal(dev, err, "creating sysfs entries");
2442 + return;
2443 + }
2444 +
2445 + /* We're potentially connected now */
2446 + update_blkif_status(be->blkif);
2447 + }
2448 +}
2449 +
2450 +
2451 +/**
2452 + * Callback received when the frontend's state changes.
2453 + */
2454 +static void frontend_changed(struct xenbus_device *dev,
2455 + enum xenbus_state frontend_state)
2456 +{
2457 + struct backend_info *be = dev->dev.driver_data;
2458 + int err;
2459 +
2460 + DPRINTK("%s", xenbus_strstate(frontend_state));
2461 +
2462 + switch (frontend_state) {
2463 + case XenbusStateInitialising:
2464 + if (dev->state == XenbusStateClosed) {
2465 + printk(KERN_INFO "%s: %s: prepare for reconnect\n",
2466 + __FUNCTION__, dev->nodename);
2467 + xenbus_switch_state(dev, XenbusStateInitWait);
2468 + }
2469 + break;
2470 +
2471 + case XenbusStateInitialised:
2472 + case XenbusStateConnected:
2473 + /* Ensure we connect even when two watches fire in
2474 + close successsion and we miss the intermediate value
2475 + of frontend_state. */
2476 + if (dev->state == XenbusStateConnected)
2477 + break;
2478 +
2479 + err = connect_ring(be);
2480 + if (err)
2481 + break;
2482 + update_blkif_status(be->blkif);
2483 + break;
2484 +
2485 + case XenbusStateClosing:
2486 + blkif_disconnect(be->blkif);
2487 + xenbus_switch_state(dev, XenbusStateClosing);
2488 + break;
2489 +
2490 + case XenbusStateClosed:
2491 + xenbus_switch_state(dev, XenbusStateClosed);
2492 + if (xenbus_dev_is_online(dev))
2493 + break;
2494 + /* fall through if not online */
2495 + case XenbusStateUnknown:
2496 + device_unregister(&dev->dev);
2497 + break;
2498 +
2499 + default:
2500 + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
2501 + frontend_state);
2502 + break;
2503 + }
2504 +}
2505 +
2506 +
2507 +/* ** Connection ** */
2508 +
2509 +
2510 +/**
2511 + * Write the physical details regarding the block device to the store, and
2512 + * switch to Connected state.
2513 + */
2514 +static void connect(struct backend_info *be)
2515 +{
2516 + struct xenbus_transaction xbt;
2517 + int err;
2518 + struct xenbus_device *dev = be->dev;
2519 +
2520 + DPRINTK("%s", dev->otherend);
2521 +
2522 + /* Supply the information about the device the frontend needs */
2523 +again:
2524 + err = xenbus_transaction_start(&xbt);
2525 + if (err) {
2526 + xenbus_dev_fatal(dev, err, "starting transaction");
2527 + return;
2528 + }
2529 +
2530 + err = blkback_barrier(xbt, be, 1);
2531 + if (err)
2532 + goto abort;
2533 +
2534 + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
2535 + vbd_size(&be->blkif->vbd));
2536 + if (err) {
2537 + xenbus_dev_fatal(dev, err, "writing %s/sectors",
2538 + dev->nodename);
2539 + goto abort;
2540 + }
2541 +
2542 + /* FIXME: use a typename instead */
2543 + err = xenbus_printf(xbt, dev->nodename, "info", "%u",
2544 + vbd_info(&be->blkif->vbd));
2545 + if (err) {
2546 + xenbus_dev_fatal(dev, err, "writing %s/info",
2547 + dev->nodename);
2548 + goto abort;
2549 + }
2550 + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
2551 + vbd_secsize(&be->blkif->vbd));
2552 + if (err) {
2553 + xenbus_dev_fatal(dev, err, "writing %s/sector-size",
2554 + dev->nodename);
2555 + goto abort;
2556 + }
2557 +
2558 + err = xenbus_transaction_end(xbt, 0);
2559 + if (err == -EAGAIN)
2560 + goto again;
2561 + if (err)
2562 + xenbus_dev_fatal(dev, err, "ending transaction");
2563 +
2564 + err = xenbus_switch_state(dev, XenbusStateConnected);
2565 + if (err)
2566 + xenbus_dev_fatal(dev, err, "switching to Connected state",
2567 + dev->nodename);
2568 +
2569 + return;
2570 + abort:
2571 + xenbus_transaction_end(xbt, 1);
2572 +}
2573 +
2574 +
2575 +static int connect_ring(struct backend_info *be)
2576 +{
2577 + struct xenbus_device *dev = be->dev;
2578 + unsigned long ring_ref;
2579 + unsigned int evtchn;
2580 + char protocol[64] = "";
2581 + int err;
2582 +
2583 + DPRINTK("%s", dev->otherend);
2584 +
2585 + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
2586 + "event-channel", "%u", &evtchn, NULL);
2587 + if (err) {
2588 + xenbus_dev_fatal(dev, err,
2589 + "reading %s/ring-ref and event-channel",
2590 + dev->otherend);
2591 + return err;
2592 + }
2593 +
2594 + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
2595 + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
2596 + "%63s", protocol, NULL);
2597 + if (err)
2598 + strcpy(protocol, "unspecified, assuming native");
2599 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
2600 + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
2601 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
2602 + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
2603 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
2604 + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
2605 + else {
2606 + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
2607 + return -1;
2608 + }
2609 + printk(KERN_INFO
2610 + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
2611 + ring_ref, evtchn, be->blkif->blk_protocol, protocol);
2612 +
2613 + /* Map the shared frame, irq etc. */
2614 + err = blkif_map(be->blkif, ring_ref, evtchn);
2615 + if (err) {
2616 + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
2617 + ring_ref, evtchn);
2618 + return err;
2619 + }
2620 +
2621 + return 0;
2622 +}
2623 +
2624 +
2625 +/* ** Driver Registration ** */
2626 +
2627 +
2628 +static const struct xenbus_device_id blkback_ids[] = {
2629 + { "vbd" },
2630 + { "" }
2631 +};
2632 +
2633 +
2634 +static struct xenbus_driver blkback = {
2635 + .name = "vbd",
2636 + .owner = THIS_MODULE,
2637 + .ids = blkback_ids,
2638 + .probe = blkback_probe,
2639 + .remove = blkback_remove,
2640 + .otherend_changed = frontend_changed
2641 +};
2642 +
2643 +
2644 +void blkif_xenbus_init(void)
2645 +{
2646 + xenbus_register_backend(&blkback);
2647 +}
2648 Index: head-2008-11-25/drivers/xen/blkfront/Makefile
2649 ===================================================================
2650 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2651 +++ head-2008-11-25/drivers/xen/blkfront/Makefile 2007-06-12 13:13:44.000000000 +0200
2652 @@ -0,0 +1,5 @@
2653 +
2654 +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) := xenblk.o
2655 +
2656 +xenblk-objs := blkfront.o vbd.o
2657 +
2658 Index: head-2008-11-25/drivers/xen/blkfront/blkfront.c
2659 ===================================================================
2660 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2661 +++ head-2008-11-25/drivers/xen/blkfront/blkfront.c 2008-08-07 12:44:36.000000000 +0200
2662 @@ -0,0 +1,936 @@
2663 +/******************************************************************************
2664 + * blkfront.c
2665 + *
2666 + * XenLinux virtual block-device driver.
2667 + *
2668 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
2669 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
2670 + * Copyright (c) 2004, Christian Limpach
2671 + * Copyright (c) 2004, Andrew Warfield
2672 + * Copyright (c) 2005, Christopher Clark
2673 + * Copyright (c) 2005, XenSource Ltd
2674 + *
2675 + * This program is free software; you can redistribute it and/or
2676 + * modify it under the terms of the GNU General Public License version 2
2677 + * as published by the Free Software Foundation; or, when distributed
2678 + * separately from the Linux kernel or incorporated into other
2679 + * software packages, subject to the following license:
2680 + *
2681 + * Permission is hereby granted, free of charge, to any person obtaining a copy
2682 + * of this source file (the "Software"), to deal in the Software without
2683 + * restriction, including without limitation the rights to use, copy, modify,
2684 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2685 + * and to permit persons to whom the Software is furnished to do so, subject to
2686 + * the following conditions:
2687 + *
2688 + * The above copyright notice and this permission notice shall be included in
2689 + * all copies or substantial portions of the Software.
2690 + *
2691 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2692 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2693 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2694 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2695 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2696 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2697 + * IN THE SOFTWARE.
2698 + */
2699 +
2700 +#include <linux/version.h>
2701 +#include "block.h"
2702 +#include <linux/cdrom.h>
2703 +#include <linux/sched.h>
2704 +#include <linux/interrupt.h>
2705 +#include <scsi/scsi.h>
2706 +#include <xen/evtchn.h>
2707 +#include <xen/xenbus.h>
2708 +#include <xen/interface/grant_table.h>
2709 +#include <xen/interface/io/protocols.h>
2710 +#include <xen/gnttab.h>
2711 +#include <asm/hypervisor.h>
2712 +#include <asm/maddr.h>
2713 +
2714 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
2715 +#include <xen/platform-compat.h>
2716 +#endif
2717 +
2718 +#define BLKIF_STATE_DISCONNECTED 0
2719 +#define BLKIF_STATE_CONNECTED 1
2720 +#define BLKIF_STATE_SUSPENDED 2
2721 +
2722 +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
2723 + (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
2724 +#define GRANT_INVALID_REF 0
2725 +
2726 +static void connect(struct blkfront_info *);
2727 +static void blkfront_closing(struct xenbus_device *);
2728 +static int blkfront_remove(struct xenbus_device *);
2729 +static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
2730 +static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
2731 +
2732 +static void kick_pending_request_queues(struct blkfront_info *);
2733 +
2734 +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
2735 +static void blkif_restart_queue(void *arg);
2736 +static void blkif_recover(struct blkfront_info *);
2737 +static void blkif_completion(struct blk_shadow *);
2738 +static void blkif_free(struct blkfront_info *, int);
2739 +
2740 +
2741 +/**
2742 + * Entry point to this code when a new device is created. Allocate the basic
2743 + * structures and the ring buffer for communication with the backend, and
2744 + * inform the backend of the appropriate details for those. Switch to
2745 + * Initialised state.
2746 + */
2747 +static int blkfront_probe(struct xenbus_device *dev,
2748 + const struct xenbus_device_id *id)
2749 +{
2750 + int err, vdevice, i;
2751 + struct blkfront_info *info;
2752 +
2753 + /* FIXME: Use dynamic device id if this is not set. */
2754 + err = xenbus_scanf(XBT_NIL, dev->nodename,
2755 + "virtual-device", "%i", &vdevice);
2756 + if (err != 1) {
2757 + /* go looking in the extended area instead */
2758 + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
2759 + "%i", &vdevice);
2760 + if (err != 1) {
2761 + xenbus_dev_fatal(dev, err, "reading virtual-device");
2762 + return err;
2763 + }
2764 + }
2765 +
2766 + info = kzalloc(sizeof(*info), GFP_KERNEL);
2767 + if (!info) {
2768 + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
2769 + return -ENOMEM;
2770 + }
2771 +
2772 + info->xbdev = dev;
2773 + info->vdevice = vdevice;
2774 + info->connected = BLKIF_STATE_DISCONNECTED;
2775 + INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
2776 +
2777 + for (i = 0; i < BLK_RING_SIZE; i++)
2778 + info->shadow[i].req.id = i+1;
2779 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
2780 +
2781 + /* Front end dir is a number, which is used as the id. */
2782 + info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
2783 + dev->dev.driver_data = info;
2784 +
2785 + err = talk_to_backend(dev, info);
2786 + if (err) {
2787 + kfree(info);
2788 + dev->dev.driver_data = NULL;
2789 + return err;
2790 + }
2791 +
2792 + return 0;
2793 +}
2794 +
2795 +
2796 +/**
2797 + * We are reconnecting to the backend, due to a suspend/resume, or a backend
2798 + * driver restart. We tear down our blkif structure and recreate it, but
2799 + * leave the device-layer structures intact so that this is transparent to the
2800 + * rest of the kernel.
2801 + */
2802 +static int blkfront_resume(struct xenbus_device *dev)
2803 +{
2804 + struct blkfront_info *info = dev->dev.driver_data;
2805 + int err;
2806 +
2807 + DPRINTK("blkfront_resume: %s\n", dev->nodename);
2808 +
2809 + blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
2810 +
2811 + err = talk_to_backend(dev, info);
2812 + if (info->connected == BLKIF_STATE_SUSPENDED && !err)
2813 + blkif_recover(info);
2814 +
2815 + return err;
2816 +}
2817 +
2818 +
2819 +/* Common code used when first setting up, and when resuming. */
2820 +static int talk_to_backend(struct xenbus_device *dev,
2821 + struct blkfront_info *info)
2822 +{
2823 + const char *message = NULL;
2824 + struct xenbus_transaction xbt;
2825 + int err;
2826 +
2827 + /* Create shared ring, alloc event channel. */
2828 + err = setup_blkring(dev, info);
2829 + if (err)
2830 + goto out;
2831 +
2832 +again:
2833 + err = xenbus_transaction_start(&xbt);
2834 + if (err) {
2835 + xenbus_dev_fatal(dev, err, "starting transaction");
2836 + goto destroy_blkring;
2837 + }
2838 +
2839 + err = xenbus_printf(xbt, dev->nodename,
2840 + "ring-ref","%u", info->ring_ref);
2841 + if (err) {
2842 + message = "writing ring-ref";
2843 + goto abort_transaction;
2844 + }
2845 + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
2846 + irq_to_evtchn_port(info->irq));
2847 + if (err) {
2848 + message = "writing event-channel";
2849 + goto abort_transaction;
2850 + }
2851 + err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
2852 + XEN_IO_PROTO_ABI_NATIVE);
2853 + if (err) {
2854 + message = "writing protocol";
2855 + goto abort_transaction;
2856 + }
2857 +
2858 + err = xenbus_transaction_end(xbt, 0);
2859 + if (err) {
2860 + if (err == -EAGAIN)
2861 + goto again;
2862 + xenbus_dev_fatal(dev, err, "completing transaction");
2863 + goto destroy_blkring;
2864 + }
2865 +
2866 + xenbus_switch_state(dev, XenbusStateInitialised);
2867 +
2868 + return 0;
2869 +
2870 + abort_transaction:
2871 + xenbus_transaction_end(xbt, 1);
2872 + if (message)
2873 + xenbus_dev_fatal(dev, err, "%s", message);
2874 + destroy_blkring:
2875 + blkif_free(info, 0);
2876 + out:
2877 + return err;
2878 +}
2879 +
2880 +
2881 +static int setup_blkring(struct xenbus_device *dev,
2882 + struct blkfront_info *info)
2883 +{
2884 + blkif_sring_t *sring;
2885 + int err;
2886 +
2887 + info->ring_ref = GRANT_INVALID_REF;
2888 +
2889 + sring = (blkif_sring_t *)__get_free_page(GFP_NOIO | __GFP_HIGH);
2890 + if (!sring) {
2891 + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
2892 + return -ENOMEM;
2893 + }
2894 + SHARED_RING_INIT(sring);
2895 + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
2896 +
2897 + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
2898 + if (err < 0) {
2899 + free_page((unsigned long)sring);
2900 + info->ring.sring = NULL;
2901 + goto fail;
2902 + }
2903 + info->ring_ref = err;
2904 +
2905 + err = bind_listening_port_to_irqhandler(
2906 + dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
2907 + if (err <= 0) {
2908 + xenbus_dev_fatal(dev, err,
2909 + "bind_listening_port_to_irqhandler");
2910 + goto fail;
2911 + }
2912 + info->irq = err;
2913 +
2914 + return 0;
2915 +fail:
2916 + blkif_free(info, 0);
2917 + return err;
2918 +}
2919 +
2920 +
2921 +/**
2922 + * Callback received when the backend's state changes.
2923 + */
2924 +static void backend_changed(struct xenbus_device *dev,
2925 + enum xenbus_state backend_state)
2926 +{
2927 + struct blkfront_info *info = dev->dev.driver_data;
2928 + struct block_device *bd;
2929 +
2930 + DPRINTK("blkfront:backend_changed.\n");
2931 +
2932 + switch (backend_state) {
2933 + case XenbusStateInitialising:
2934 + case XenbusStateInitWait:
2935 + case XenbusStateInitialised:
2936 + case XenbusStateReconfiguring:
2937 + case XenbusStateReconfigured:
2938 + case XenbusStateUnknown:
2939 + case XenbusStateClosed:
2940 + break;
2941 +
2942 + case XenbusStateConnected:
2943 + connect(info);
2944 + break;
2945 +
2946 + case XenbusStateClosing:
2947 + bd = bdget(info->dev);
2948 + if (bd == NULL)
2949 + xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
2950 +
2951 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
2952 + down(&bd->bd_sem);
2953 +#else
2954 + mutex_lock(&bd->bd_mutex);
2955 +#endif
2956 + if (info->users > 0)
2957 + xenbus_dev_error(dev, -EBUSY,
2958 + "Device in use; refusing to close");
2959 + else
2960 + blkfront_closing(dev);
2961 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
2962 + up(&bd->bd_sem);
2963 +#else
2964 + mutex_unlock(&bd->bd_mutex);
2965 +#endif
2966 + bdput(bd);
2967 + break;
2968 + }
2969 +}
2970 +
2971 +
2972 +/* ** Connection ** */
2973 +
2974 +
2975 +/*
2976 + * Invoked when the backend is finally 'ready' (and has told produced
2977 + * the details about the physical device - #sectors, size, etc).
2978 + */
2979 +static void connect(struct blkfront_info *info)
2980 +{
2981 + unsigned long long sectors;
2982 + unsigned long sector_size;
2983 + unsigned int binfo;
2984 + int err;
2985 +
2986 + if ((info->connected == BLKIF_STATE_CONNECTED) ||
2987 + (info->connected == BLKIF_STATE_SUSPENDED) )
2988 + return;
2989 +
2990 + DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
2991 +
2992 + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
2993 + "sectors", "%Lu", &sectors,
2994 + "info", "%u", &binfo,
2995 + "sector-size", "%lu", &sector_size,
2996 + NULL);
2997 + if (err) {
2998 + xenbus_dev_fatal(info->xbdev, err,
2999 + "reading backend fields at %s",
3000 + info->xbdev->otherend);
3001 + return;
3002 + }
3003 +
3004 + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
3005 + "feature-barrier", "%lu", &info->feature_barrier,
3006 + NULL);
3007 + if (err)
3008 + info->feature_barrier = 0;
3009 +
3010 + err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
3011 + if (err) {
3012 + xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
3013 + info->xbdev->otherend);
3014 + return;
3015 + }
3016 +
3017 + err = xlvbd_sysfs_addif(info);
3018 + if (err) {
3019 + xenbus_dev_fatal(info->xbdev, err, "xlvbd_sysfs_addif at %s",
3020 + info->xbdev->otherend);
3021 + return;
3022 + }
3023 +
3024 + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
3025 +
3026 + /* Kick pending requests. */
3027 + spin_lock_irq(&blkif_io_lock);
3028 + info->connected = BLKIF_STATE_CONNECTED;
3029 + kick_pending_request_queues(info);
3030 + spin_unlock_irq(&blkif_io_lock);
3031 +
3032 + add_disk(info->gd);
3033 +
3034 + info->is_ready = 1;
3035 +}
3036 +
3037 +/**
3038 + * Handle the change of state of the backend to Closing. We must delete our
3039 + * device-layer structures now, to ensure that writes are flushed through to
3040 + * the backend. Once is this done, we can switch to Closed in
3041 + * acknowledgement.
3042 + */
3043 +static void blkfront_closing(struct xenbus_device *dev)
3044 +{
3045 + struct blkfront_info *info = dev->dev.driver_data;
3046 + unsigned long flags;
3047 +
3048 + DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
3049 +
3050 + if (info->rq == NULL)
3051 + goto out;
3052 +
3053 + spin_lock_irqsave(&blkif_io_lock, flags);
3054 + /* No more blkif_request(). */
3055 + blk_stop_queue(info->rq);
3056 + /* No more gnttab callback work. */
3057 + gnttab_cancel_free_callback(&info->callback);
3058 + spin_unlock_irqrestore(&blkif_io_lock, flags);
3059 +
3060 + /* Flush gnttab callback work. Must be done with no locks held. */
3061 + flush_scheduled_work();
3062 +
3063 + xlvbd_sysfs_delif(info);
3064 +
3065 + xlvbd_del(info);
3066 +
3067 + out:
3068 + xenbus_frontend_closed(dev);
3069 +}
3070 +
3071 +
3072 +static int blkfront_remove(struct xenbus_device *dev)
3073 +{
3074 + struct blkfront_info *info = dev->dev.driver_data;
3075 +
3076 + DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
3077 +
3078 + blkif_free(info, 0);
3079 +
3080 + kfree(info);
3081 +
3082 + return 0;
3083 +}
3084 +
3085 +
3086 +static inline int GET_ID_FROM_FREELIST(
3087 + struct blkfront_info *info)
3088 +{
3089 + unsigned long free = info->shadow_free;
3090 + BUG_ON(free > BLK_RING_SIZE);
3091 + info->shadow_free = info->shadow[free].req.id;
3092 + info->shadow[free].req.id = 0x0fffffee; /* debug */
3093 + return free;
3094 +}
3095 +
3096 +static inline void ADD_ID_TO_FREELIST(
3097 + struct blkfront_info *info, unsigned long id)
3098 +{
3099 + info->shadow[id].req.id = info->shadow_free;
3100 + info->shadow[id].request = 0;
3101 + info->shadow_free = id;
3102 +}
3103 +
3104 +static inline void flush_requests(struct blkfront_info *info)
3105 +{
3106 + int notify;
3107 +
3108 + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
3109 +
3110 + if (notify)
3111 + notify_remote_via_irq(info->irq);
3112 +}
3113 +
3114 +static void kick_pending_request_queues(struct blkfront_info *info)
3115 +{
3116 + if (!RING_FULL(&info->ring)) {
3117 + /* Re-enable calldowns. */
3118 + blk_start_queue(info->rq);
3119 + /* Kick things off immediately. */
3120 + do_blkif_request(info->rq);
3121 + }
3122 +}
3123 +
3124 +static void blkif_restart_queue(void *arg)
3125 +{
3126 + struct blkfront_info *info = (struct blkfront_info *)arg;
3127 + spin_lock_irq(&blkif_io_lock);
3128 + if (info->connected == BLKIF_STATE_CONNECTED)
3129 + kick_pending_request_queues(info);
3130 + spin_unlock_irq(&blkif_io_lock);
3131 +}
3132 +
3133 +static void blkif_restart_queue_callback(void *arg)
3134 +{
3135 + struct blkfront_info *info = (struct blkfront_info *)arg;
3136 + schedule_work(&info->work);
3137 +}
3138 +
3139 +int blkif_open(struct inode *inode, struct file *filep)
3140 +{
3141 + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
3142 + info->users++;
3143 + return 0;
3144 +}
3145 +
3146 +
3147 +int blkif_release(struct inode *inode, struct file *filep)
3148 +{
3149 + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
3150 + info->users--;
3151 + if (info->users == 0) {
3152 + /* Check whether we have been instructed to close. We will
3153 + have ignored this request initially, as the device was
3154 + still mounted. */
3155 + struct xenbus_device * dev = info->xbdev;
3156 + enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
3157 +
3158 + if (state == XenbusStateClosing && info->is_ready)
3159 + blkfront_closing(dev);
3160 + }
3161 + return 0;
3162 +}
3163 +
3164 +
3165 +int blkif_ioctl(struct inode *inode, struct file *filep,
3166 + unsigned command, unsigned long argument)
3167 +{
3168 + int i;
3169 +
3170 + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
3171 + command, (long)argument, inode->i_rdev);
3172 +
3173 + switch (command) {
3174 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
3175 + case HDIO_GETGEO: {
3176 + struct block_device *bd = inode->i_bdev;
3177 + struct hd_geometry geo;
3178 + int ret;
3179 +
3180 + if (!argument)
3181 + return -EINVAL;
3182 +
3183 + geo.start = get_start_sect(bd);
3184 + ret = blkif_getgeo(bd, &geo);
3185 + if (ret)
3186 + return ret;
3187 +
3188 + if (copy_to_user((struct hd_geometry __user *)argument, &geo,
3189 + sizeof(geo)))
3190 + return -EFAULT;
3191 +
3192 + return 0;
3193 + }
3194 +#endif
3195 + case CDROMMULTISESSION:
3196 + DPRINTK("FIXME: support multisession CDs later\n");
3197 + for (i = 0; i < sizeof(struct cdrom_multisession); i++)
3198 + if (put_user(0, (char __user *)(argument + i)))
3199 + return -EFAULT;
3200 + return 0;
3201 +
3202 + case CDROM_GET_CAPABILITY: {
3203 + struct blkfront_info *info =
3204 + inode->i_bdev->bd_disk->private_data;
3205 + struct gendisk *gd = info->gd;
3206 + if (gd->flags & GENHD_FL_CD)
3207 + return 0;
3208 + return -EINVAL;
3209 + }
3210 + default:
3211 + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
3212 + command);*/
3213 + return -EINVAL; /* same return as native Linux */
3214 + }
3215 +
3216 + return 0;
3217 +}
3218 +
3219 +
3220 +int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
3221 +{
3222 + /* We don't have real geometry info, but let's at least return
3223 + values consistent with the size of the device */
3224 + sector_t nsect = get_capacity(bd->bd_disk);
3225 + sector_t cylinders = nsect;
3226 +
3227 + hg->heads = 0xff;
3228 + hg->sectors = 0x3f;
3229 + sector_div(cylinders, hg->heads * hg->sectors);
3230 + hg->cylinders = cylinders;
3231 + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
3232 + hg->cylinders = 0xffff;
3233 + return 0;
3234 +}
3235 +
3236 +
3237 +/*
3238 + * blkif_queue_request
3239 + *
3240 + * request block io
3241 + *
3242 + * id: for guest use only.
3243 + * operation: BLKIF_OP_{READ,WRITE,PROBE}
3244 + * buffer: buffer to read/write into. this should be a
3245 + * virtual address in the guest os.
3246 + */
3247 +static int blkif_queue_request(struct request *req)
3248 +{
3249 + struct blkfront_info *info = req->rq_disk->private_data;
3250 + unsigned long buffer_mfn;
3251 + blkif_request_t *ring_req;
3252 + struct bio *bio;
3253 + struct bio_vec *bvec;
3254 + int idx;
3255 + unsigned long id;
3256 + unsigned int fsect, lsect;
3257 + int ref;
3258 + grant_ref_t gref_head;
3259 +
3260 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
3261 + return 1;
3262 +
3263 + if (gnttab_alloc_grant_references(
3264 + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
3265 + gnttab_request_free_callback(
3266 + &info->callback,
3267 + blkif_restart_queue_callback,
3268 + info,
3269 + BLKIF_MAX_SEGMENTS_PER_REQUEST);
3270 + return 1;
3271 + }
3272 +
3273 + /* Fill out a communications ring structure. */
3274 + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
3275 + id = GET_ID_FROM_FREELIST(info);
3276 + info->shadow[id].request = (unsigned long)req;
3277 +
3278 + ring_req->id = id;
3279 + ring_req->sector_number = (blkif_sector_t)req->sector;
3280 + ring_req->handle = info->handle;
3281 +
3282 + ring_req->operation = rq_data_dir(req) ?
3283 + BLKIF_OP_WRITE : BLKIF_OP_READ;
3284 + if (blk_barrier_rq(req))
3285 + ring_req->operation = BLKIF_OP_WRITE_BARRIER;
3286 +
3287 + ring_req->nr_segments = 0;
3288 + rq_for_each_bio (bio, req) {
3289 + bio_for_each_segment (bvec, bio, idx) {
3290 + BUG_ON(ring_req->nr_segments
3291 + == BLKIF_MAX_SEGMENTS_PER_REQUEST);
3292 + buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
3293 + fsect = bvec->bv_offset >> 9;
3294 + lsect = fsect + (bvec->bv_len >> 9) - 1;
3295 + /* install a grant reference. */
3296 + ref = gnttab_claim_grant_reference(&gref_head);
3297 + BUG_ON(ref == -ENOSPC);
3298 +
3299 + gnttab_grant_foreign_access_ref(
3300 + ref,
3301 + info->xbdev->otherend_id,
3302 + buffer_mfn,
3303 + rq_data_dir(req) ? GTF_readonly : 0 );
3304 +
3305 + info->shadow[id].frame[ring_req->nr_segments] =
3306 + mfn_to_pfn(buffer_mfn);
3307 +
3308 + ring_req->seg[ring_req->nr_segments] =
3309 + (struct blkif_request_segment) {
3310 + .gref = ref,
3311 + .first_sect = fsect,
3312 + .last_sect = lsect };
3313 +
3314 + ring_req->nr_segments++;
3315 + }
3316 + }
3317 +
3318 + info->ring.req_prod_pvt++;
3319 +
3320 + /* Keep a private copy so we can reissue requests when recovering. */
3321 + info->shadow[id].req = *ring_req;
3322 +
3323 + gnttab_free_grant_references(gref_head);
3324 +
3325 + return 0;
3326 +}
3327 +
3328 +/*
3329 + * do_blkif_request
3330 + * read a block; request is in a request queue
3331 + */
3332 +void do_blkif_request(request_queue_t *rq)
3333 +{
3334 + struct blkfront_info *info = NULL;
3335 + struct request *req;
3336 + int queued;
3337 +
3338 + DPRINTK("Entered do_blkif_request\n");
3339 +
3340 + queued = 0;
3341 +
3342 + while ((req = elv_next_request(rq)) != NULL) {
3343 + info = req->rq_disk->private_data;
3344 + if (!blk_fs_request(req)) {
3345 + end_request(req, 0);
3346 + continue;
3347 + }
3348 +
3349 + if (RING_FULL(&info->ring))
3350 + goto wait;
3351 +
3352 + DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
3353 + "(%u/%li) buffer:%p [%s]\n",
3354 + req, req->cmd, (long long)req->sector,
3355 + req->current_nr_sectors,
3356 + req->nr_sectors, req->buffer,
3357 + rq_data_dir(req) ? "write" : "read");
3358 +
3359 +
3360 + blkdev_dequeue_request(req);
3361 + if (blkif_queue_request(req)) {
3362 + blk_requeue_request(rq, req);
3363 + wait:
3364 + /* Avoid pointless unplugs. */
3365 + blk_stop_queue(rq);
3366 + break;
3367 + }
3368 +
3369 + queued++;
3370 + }
3371 +
3372 + if (queued != 0)
3373 + flush_requests(info);
3374 +}
3375 +
3376 +
3377 +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
3378 +{
3379 + struct request *req;
3380 + blkif_response_t *bret;
3381 + RING_IDX i, rp;
3382 + unsigned long flags;
3383 + struct blkfront_info *info = (struct blkfront_info *)dev_id;
3384 + int uptodate;
3385 +
3386 + spin_lock_irqsave(&blkif_io_lock, flags);
3387 +
3388 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
3389 + spin_unlock_irqrestore(&blkif_io_lock, flags);
3390 + return IRQ_HANDLED;
3391 + }
3392 +
3393 + again:
3394 + rp = info->ring.sring->rsp_prod;
3395 + rmb(); /* Ensure we see queued responses up to 'rp'. */
3396 +
3397 + for (i = info->ring.rsp_cons; i != rp; i++) {
3398 + unsigned long id;
3399 + int ret;
3400 +
3401 + bret = RING_GET_RESPONSE(&info->ring, i);
3402 + id = bret->id;
3403 + req = (struct request *)info->shadow[id].request;
3404 +
3405 + blkif_completion(&info->shadow[id]);
3406 +
3407 + ADD_ID_TO_FREELIST(info, id);
3408 +
3409 + uptodate = (bret->status == BLKIF_RSP_OKAY);
3410 + switch (bret->operation) {
3411 + case BLKIF_OP_WRITE_BARRIER:
3412 + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
3413 + printk("blkfront: %s: write barrier op failed\n",
3414 + info->gd->disk_name);
3415 + uptodate = -EOPNOTSUPP;
3416 + info->feature_barrier = 0;
3417 + xlvbd_barrier(info);
3418 + }
3419 + /* fall through */
3420 + case BLKIF_OP_READ:
3421 + case BLKIF_OP_WRITE:
3422 + if (unlikely(bret->status != BLKIF_RSP_OKAY))
3423 + DPRINTK("Bad return from blkdev data "
3424 + "request: %x\n", bret->status);
3425 +
3426 + ret = end_that_request_first(req, uptodate,
3427 + req->hard_nr_sectors);
3428 + BUG_ON(ret);
3429 + end_that_request_last(req, uptodate);
3430 + break;
3431 + default:
3432 + BUG();
3433 + }
3434 + }
3435 +
3436 + info->ring.rsp_cons = i;
3437 +
3438 + if (i != info->ring.req_prod_pvt) {
3439 + int more_to_do;
3440 + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
3441 + if (more_to_do)
3442 + goto again;
3443 + } else
3444 + info->ring.sring->rsp_event = i + 1;
3445 +
3446 + kick_pending_request_queues(info);
3447 +
3448 + spin_unlock_irqrestore(&blkif_io_lock, flags);
3449 +
3450 + return IRQ_HANDLED;
3451 +}
3452 +
3453 +static void blkif_free(struct blkfront_info *info, int suspend)
3454 +{
3455 + /* Prevent new requests being issued until we fix things up. */
3456 + spin_lock_irq(&blkif_io_lock);
3457 + info->connected = suspend ?
3458 + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
3459 + /* No more blkif_request(). */
3460 + if (info->rq)
3461 + blk_stop_queue(info->rq);
3462 + /* No more gnttab callback work. */
3463 + gnttab_cancel_free_callback(&info->callback);
3464 + spin_unlock_irq(&blkif_io_lock);
3465 +
3466 + /* Flush gnttab callback work. Must be done with no locks held. */
3467 + flush_scheduled_work();
3468 +
3469 + /* Free resources associated with old device channel. */
3470 + if (info->ring_ref != GRANT_INVALID_REF) {
3471 + gnttab_end_foreign_access(info->ring_ref,
3472 + (unsigned long)info->ring.sring);
3473 + info->ring_ref = GRANT_INVALID_REF;
3474 + info->ring.sring = NULL;
3475 + }
3476 + if (info->irq)
3477 + unbind_from_irqhandler(info->irq, info);
3478 + info->irq = 0;
3479 +}
3480 +
3481 +static void blkif_completion(struct blk_shadow *s)
3482 +{
3483 + int i;
3484 + for (i = 0; i < s->req.nr_segments; i++)
3485 + gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
3486 +}
3487 +
3488 +static void blkif_recover(struct blkfront_info *info)
3489 +{
3490 + int i;
3491 + blkif_request_t *req;
3492 + struct blk_shadow *copy;
3493 + int j;
3494 +
3495 + /* Stage 1: Make a safe copy of the shadow state. */
3496 + copy = kmalloc(sizeof(info->shadow), GFP_NOIO | __GFP_NOFAIL | __GFP_HIGH);
3497 + memcpy(copy, info->shadow, sizeof(info->shadow));
3498 +
3499 + /* Stage 2: Set up free list. */
3500 + memset(&info->shadow, 0, sizeof(info->shadow));
3501 + for (i = 0; i < BLK_RING_SIZE; i++)
3502 + info->shadow[i].req.id = i+1;
3503 + info->shadow_free = info->ring.req_prod_pvt;
3504 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
3505 +
3506 + /* Stage 3: Find pending requests and requeue them. */
3507 + for (i = 0; i < BLK_RING_SIZE; i++) {
3508 + /* Not in use? */
3509 + if (copy[i].request == 0)
3510 + continue;
3511 +
3512 + /* Grab a request slot and copy shadow state into it. */
3513 + req = RING_GET_REQUEST(
3514 + &info->ring, info->ring.req_prod_pvt);
3515 + *req = copy[i].req;
3516 +
3517 + /* We get a new request id, and must reset the shadow state. */
3518 + req->id = GET_ID_FROM_FREELIST(info);
3519 + memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
3520 +
3521 + /* Rewrite any grant references invalidated by susp/resume. */
3522 + for (j = 0; j < req->nr_segments; j++)
3523 + gnttab_grant_foreign_access_ref(
3524 + req->seg[j].gref,
3525 + info->xbdev->otherend_id,
3526 + pfn_to_mfn(info->shadow[req->id].frame[j]),
3527 + rq_data_dir((struct request *)
3528 + info->shadow[req->id].request) ?
3529 + GTF_readonly : 0);
3530 + info->shadow[req->id].req = *req;
3531 +
3532 + info->ring.req_prod_pvt++;
3533 + }
3534 +
3535 + kfree(copy);
3536 +
3537 + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
3538 +
3539 + spin_lock_irq(&blkif_io_lock);
3540 +
3541 + /* Now safe for us to use the shared ring */
3542 + info->connected = BLKIF_STATE_CONNECTED;
3543 +
3544 + /* Send off requeued requests */
3545 + flush_requests(info);
3546 +
3547 + /* Kick any other new requests queued since we resumed */
3548 + kick_pending_request_queues(info);
3549 +
3550 + spin_unlock_irq(&blkif_io_lock);
3551 +}
3552 +
3553 +int blkfront_is_ready(struct xenbus_device *dev)
3554 +{
3555 + struct blkfront_info *info = dev->dev.driver_data;
3556 +
3557 + return info->is_ready;
3558 +}
3559 +
3560 +
3561 +/* ** Driver Registration ** */
3562 +
3563 +
3564 +static const struct xenbus_device_id blkfront_ids[] = {
3565 + { "vbd" },
3566 + { "" }
3567 +};
3568 +MODULE_ALIAS("xen:vbd");
3569 +
3570 +static struct xenbus_driver blkfront = {
3571 + .name = "vbd",
3572 + .owner = THIS_MODULE,
3573 + .ids = blkfront_ids,
3574 + .probe = blkfront_probe,
3575 + .remove = blkfront_remove,
3576 + .resume = blkfront_resume,
3577 + .otherend_changed = backend_changed,
3578 + .is_ready = blkfront_is_ready,
3579 +};
3580 +
3581 +
3582 +static int __init xlblk_init(void)
3583 +{
3584 + if (!is_running_on_xen())
3585 + return -ENODEV;
3586 +
3587 + return xenbus_register_frontend(&blkfront);
3588 +}
3589 +module_init(xlblk_init);
3590 +
3591 +
3592 +static void __exit xlblk_exit(void)
3593 +{
3594 + return xenbus_unregister_driver(&blkfront);
3595 +}
3596 +module_exit(xlblk_exit);
3597 +
3598 +MODULE_LICENSE("Dual BSD/GPL");
3599 Index: head-2008-11-25/drivers/xen/blkfront/block.h
3600 ===================================================================
3601 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
3602 +++ head-2008-11-25/drivers/xen/blkfront/block.h 2008-08-07 12:44:36.000000000 +0200
3603 @@ -0,0 +1,158 @@
3604 +/******************************************************************************
3605 + * block.h
3606 + *
3607 + * Shared definitions between all levels of XenLinux Virtual block devices.
3608 + *
3609 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
3610 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
3611 + * Copyright (c) 2004-2005, Christian Limpach
3612 + *
3613 + * This program is free software; you can redistribute it and/or
3614 + * modify it under the terms of the GNU General Public License version 2
3615 + * as published by the Free Software Foundation; or, when distributed
3616 + * separately from the Linux kernel or incorporated into other
3617 + * software packages, subject to the following license:
3618 + *
3619 + * Permission is hereby granted, free of charge, to any person obtaining a copy
3620 + * of this source file (the "Software"), to deal in the Software without
3621 + * restriction, including without limitation the rights to use, copy, modify,
3622 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
3623 + * and to permit persons to whom the Software is furnished to do so, subject to
3624 + * the following conditions:
3625 + *
3626 + * The above copyright notice and this permission notice shall be included in
3627 + * all copies or substantial portions of the Software.
3628 + *
3629 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3630 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3631 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3632 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3633 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
3634 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
3635 + * IN THE SOFTWARE.
3636 + */
3637 +
3638 +#ifndef __XEN_DRIVERS_BLOCK_H__
3639 +#define __XEN_DRIVERS_BLOCK_H__
3640 +
3641 +#include <linux/version.h>
3642 +#include <linux/module.h>
3643 +#include <linux/kernel.h>
3644 +#include <linux/sched.h>
3645 +#include <linux/slab.h>
3646 +#include <linux/string.h>
3647 +#include <linux/errno.h>
3648 +#include <linux/fs.h>
3649 +#include <linux/hdreg.h>
3650 +#include <linux/blkdev.h>
3651 +#include <linux/major.h>
3652 +#include <asm/hypervisor.h>
3653 +#include <xen/xenbus.h>
3654 +#include <xen/gnttab.h>
3655 +#include <xen/interface/xen.h>
3656 +#include <xen/interface/io/blkif.h>
3657 +#include <xen/interface/io/ring.h>
3658 +#include <asm/io.h>
3659 +#include <asm/atomic.h>
3660 +#include <asm/uaccess.h>
3661 +
3662 +#define DPRINTK(_f, _a...) pr_debug(_f, ## _a)
3663 +
3664 +#if 0
3665 +#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
3666 +#else
3667 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
3668 +#endif
3669 +
3670 +struct xlbd_type_info
3671 +{
3672 + int partn_shift;
3673 + int disks_per_major;
3674 + char *devname;
3675 + char *diskname;
3676 +};
3677 +
3678 +struct xlbd_major_info
3679 +{
3680 + int major;
3681 + int index;
3682 + int usage;
3683 + struct xlbd_type_info *type;
3684 +};
3685 +
3686 +struct blk_shadow {
3687 + blkif_request_t req;
3688 + unsigned long request;
3689 + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
3690 +};
3691 +
3692 +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
3693 +
3694 +/*
3695 + * We have one of these per vbd, whether ide, scsi or 'other'. They
3696 + * hang in private_data off the gendisk structure. We may end up
3697 + * putting all kinds of interesting stuff here :-)
3698 + */
3699 +struct blkfront_info
3700 +{
3701 + struct xenbus_device *xbdev;
3702 + dev_t dev;
3703 + struct gendisk *gd;
3704 + int vdevice;
3705 + blkif_vdev_t handle;
3706 + int connected;
3707 + int ring_ref;
3708 + blkif_front_ring_t ring;
3709 + unsigned int irq;
3710 + struct xlbd_major_info *mi;
3711 + request_queue_t *rq;
3712 + struct work_struct work;
3713 + struct gnttab_free_callback callback;
3714 + struct blk_shadow shadow[BLK_RING_SIZE];
3715 + unsigned long shadow_free;
3716 + int feature_barrier;
3717 + int is_ready;
3718 +
3719 + /**
3720 + * The number of people holding this device open. We won't allow a
3721 + * hot-unplug unless this is 0.
3722 + */
3723 + int users;
3724 +};
3725 +
3726 +extern spinlock_t blkif_io_lock;
3727 +
3728 +extern int blkif_open(struct inode *inode, struct file *filep);
3729 +extern int blkif_release(struct inode *inode, struct file *filep);
3730 +extern int blkif_ioctl(struct inode *inode, struct file *filep,
3731 + unsigned command, unsigned long argument);
3732 +extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
3733 +extern int blkif_check(dev_t dev);
3734 +extern int blkif_revalidate(dev_t dev);
3735 +extern void do_blkif_request (request_queue_t *rq);
3736 +
3737 +/* Virtual block-device subsystem. */
3738 +/* Note that xlvbd_add doesn't call add_disk for you: you're expected
3739 + to call add_disk on info->gd once the disk is properly connected
3740 + up. */
3741 +int xlvbd_add(blkif_sector_t capacity, int device,
3742 + u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
3743 +void xlvbd_del(struct blkfront_info *info);
3744 +int xlvbd_barrier(struct blkfront_info *info);
3745 +
3746 +#ifdef CONFIG_SYSFS
3747 +int xlvbd_sysfs_addif(struct blkfront_info *info);
3748 +void xlvbd_sysfs_delif(struct blkfront_info *info);
3749 +#else
3750 +static inline int xlvbd_sysfs_addif(struct blkfront_info *info)
3751 +{
3752 + return 0;
3753 +}
3754 +
3755 +static inline void xlvbd_sysfs_delif(struct blkfront_info *info)
3756 +{
3757 + ;
3758 +}
3759 +#endif
3760 +
3761 +#endif /* __XEN_DRIVERS_BLOCK_H__ */
3762 Index: head-2008-11-25/drivers/xen/blkfront/vbd.c
3763 ===================================================================
3764 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
3765 +++ head-2008-11-25/drivers/xen/blkfront/vbd.c 2008-08-07 12:44:36.000000000 +0200
3766 @@ -0,0 +1,460 @@
3767 +/******************************************************************************
3768 + * vbd.c
3769 + *
3770 + * XenLinux virtual block-device driver (xvd).
3771 + *
3772 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
3773 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
3774 + * Copyright (c) 2004-2005, Christian Limpach
3775 + *
3776 + * This program is free software; you can redistribute it and/or
3777 + * modify it under the terms of the GNU General Public License version 2
3778 + * as published by the Free Software Foundation; or, when distributed
3779 + * separately from the Linux kernel or incorporated into other
3780 + * software packages, subject to the following license:
3781 + *
3782 + * Permission is hereby granted, free of charge, to any person obtaining a copy
3783 + * of this source file (the "Software"), to deal in the Software without
3784 + * restriction, including without limitation the rights to use, copy, modify,
3785 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
3786 + * and to permit persons to whom the Software is furnished to do so, subject to
3787 + * the following conditions:
3788 + *
3789 + * The above copyright notice and this permission notice shall be included in
3790 + * all copies or substantial portions of the Software.
3791 + *
3792 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3793 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3794 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3795 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3796 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
3797 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
3798 + * IN THE SOFTWARE.
3799 + */
3800 +
3801 +#include "block.h"
3802 +#include <linux/blkdev.h>
3803 +#include <linux/list.h>
3804 +
3805 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
3806 +#include <xen/platform-compat.h>
3807 +#endif
3808 +
3809 +#define BLKIF_MAJOR(dev) ((dev)>>8)
3810 +#define BLKIF_MINOR(dev) ((dev) & 0xff)
3811 +
3812 +#define EXT_SHIFT 28
3813 +#define EXTENDED (1<<EXT_SHIFT)
3814 +#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
3815 +#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
3816 +
3817 +/*
3818 + * For convenience we distinguish between ide, scsi and 'other' (i.e.,
3819 + * potentially combinations of the two) in the naming scheme and in a few other
3820 + * places.
3821 + */
3822 +
3823 +#define NUM_IDE_MAJORS 10
3824 +#define NUM_SCSI_MAJORS 17
3825 +#define NUM_VBD_MAJORS 2
3826 +
3827 +static struct xlbd_type_info xlbd_ide_type = {
3828 + .partn_shift = 6,
3829 + .disks_per_major = 2,
3830 + .devname = "ide",
3831 + .diskname = "hd",
3832 +};
3833 +
3834 +static struct xlbd_type_info xlbd_scsi_type = {
3835 + .partn_shift = 4,
3836 + .disks_per_major = 16,
3837 + .devname = "sd",
3838 + .diskname = "sd",
3839 +};
3840 +
3841 +static struct xlbd_type_info xlbd_vbd_type = {
3842 + .partn_shift = 4,
3843 + .disks_per_major = 16,
3844 + .devname = "xvd",
3845 + .diskname = "xvd",
3846 +};
3847 +
3848 +static struct xlbd_type_info xlbd_vbd_type_ext = {
3849 + .partn_shift = 8,
3850 + .disks_per_major = 256,
3851 + .devname = "xvd",
3852 + .diskname = "xvd",
3853 +};
3854 +
3855 +static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
3856 + NUM_VBD_MAJORS];
3857 +
3858 +#define XLBD_MAJOR_IDE_START 0
3859 +#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS)
3860 +#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
3861 +
3862 +#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1
3863 +#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
3864 +#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
3865 +
3866 +static struct block_device_operations xlvbd_block_fops =
3867 +{
3868 + .owner = THIS_MODULE,
3869 + .open = blkif_open,
3870 + .release = blkif_release,
3871 + .ioctl = blkif_ioctl,
3872 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
3873 + .getgeo = blkif_getgeo
3874 +#endif
3875 +};
3876 +
3877 +DEFINE_SPINLOCK(blkif_io_lock);
3878 +
3879 +static struct xlbd_major_info *
3880 +xlbd_alloc_major_info(int major, int minor, int index)
3881 +{
3882 + struct xlbd_major_info *ptr;
3883 + int do_register;
3884 +
3885 + ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
3886 + if (ptr == NULL)
3887 + return NULL;
3888 +
3889 + ptr->major = major;
3890 + do_register = 1;
3891 +
3892 + switch (index) {
3893 + case XLBD_MAJOR_IDE_RANGE:
3894 + ptr->type = &xlbd_ide_type;
3895 + ptr->index = index - XLBD_MAJOR_IDE_START;
3896 + break;
3897 + case XLBD_MAJOR_SCSI_RANGE:
3898 + ptr->type = &xlbd_scsi_type;
3899 + ptr->index = index - XLBD_MAJOR_SCSI_START;
3900 + break;
3901 + case XLBD_MAJOR_VBD_RANGE:
3902 + ptr->index = 0;
3903 + if ((index - XLBD_MAJOR_VBD_START) == 0)
3904 + ptr->type = &xlbd_vbd_type;
3905 + else
3906 + ptr->type = &xlbd_vbd_type_ext;
3907 +
3908 + /*
3909 + * if someone already registered block major 202,
3910 + * don't try to register it again
3911 + */
3912 + if (major_info[XLBD_MAJOR_VBD_START] != NULL)
3913 + do_register = 0;
3914 + break;
3915 + }
3916 +
3917 + if (do_register) {
3918 + if (register_blkdev(ptr->major, ptr->type->devname)) {
3919 + kfree(ptr);
3920 + return NULL;
3921 + }
3922 +
3923 + printk("xen-vbd: registered block device major %i\n", ptr->major);
3924 + }
3925 +
3926 + major_info[index] = ptr;
3927 + return ptr;
3928 +}
3929 +
3930 +static struct xlbd_major_info *
3931 +xlbd_get_major_info(int major, int minor, int vdevice)
3932 +{
3933 + struct xlbd_major_info *mi;
3934 + int index;
3935 +
3936 + switch (major) {
3937 + case IDE0_MAJOR: index = 0; break;
3938 + case IDE1_MAJOR: index = 1; break;
3939 + case IDE2_MAJOR: index = 2; break;
3940 + case IDE3_MAJOR: index = 3; break;
3941 + case IDE4_MAJOR: index = 4; break;
3942 + case IDE5_MAJOR: index = 5; break;
3943 + case IDE6_MAJOR: index = 6; break;
3944 + case IDE7_MAJOR: index = 7; break;
3945 + case IDE8_MAJOR: index = 8; break;
3946 + case IDE9_MAJOR: index = 9; break;
3947 + case SCSI_DISK0_MAJOR: index = 10; break;
3948 + case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
3949 + index = 11 + major - SCSI_DISK1_MAJOR;
3950 + break;
3951 + case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
3952 + index = 18 + major - SCSI_DISK8_MAJOR;
3953 + break;
3954 + case SCSI_CDROM_MAJOR: index = 26; break;
3955 + default:
3956 + if (!VDEV_IS_EXTENDED(vdevice))
3957 + index = 27;
3958 + else
3959 + index = 28;
3960 + break;
3961 + }
3962 +
3963 + mi = ((major_info[index] != NULL) ? major_info[index] :
3964 + xlbd_alloc_major_info(major, minor, index));
3965 + if (mi)
3966 + mi->usage++;
3967 + return mi;
3968 +}
3969 +
3970 +static void
3971 +xlbd_put_major_info(struct xlbd_major_info *mi)
3972 +{
3973 + mi->usage--;
3974 + /* XXX: release major if 0 */
3975 +}
3976 +
3977 +static int
3978 +xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
3979 +{
3980 + request_queue_t *rq;
3981 +
3982 + rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
3983 + if (rq == NULL)
3984 + return -1;
3985 +
3986 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
3987 + elevator_init(rq, "noop");
3988 +#else
3989 + elevator_init(rq, &elevator_noop);
3990 +#endif
3991 +
3992 + /* Hard sector size and max sectors impersonate the equiv. hardware. */
3993 + blk_queue_hardsect_size(rq, sector_size);
3994 + blk_queue_max_sectors(rq, 512);
3995 +
3996 + /* Each segment in a request is up to an aligned page in size. */
3997 + blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
3998 + blk_queue_max_segment_size(rq, PAGE_SIZE);
3999 +
4000 + /* Ensure a merged request will fit in a single I/O ring slot. */
4001 + blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
4002 + blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
4003 +
4004 + /* Make sure buffer addresses are sector-aligned. */
4005 + blk_queue_dma_alignment(rq, 511);
4006 +
4007 + /* Make sure we don't use bounce buffers. */
4008 + blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
4009 +
4010 + gd->queue = rq;
4011 +
4012 + return 0;
4013 +}
4014 +
4015 +static int
4016 +xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
4017 + u16 vdisk_info, u16 sector_size,
4018 + struct blkfront_info *info)
4019 +{
4020 + struct gendisk *gd;
4021 + struct xlbd_major_info *mi;
4022 + int nr_minors = 1;
4023 + int err = -ENODEV;
4024 + unsigned int offset;
4025 +
4026 + BUG_ON(info->gd != NULL);
4027 + BUG_ON(info->mi != NULL);
4028 + BUG_ON(info->rq != NULL);
4029 +
4030 + mi = xlbd_get_major_info(major, minor, vdevice);
4031 + if (mi == NULL)
4032 + goto out;
4033 + info->mi = mi;
4034 +
4035 + if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
4036 + nr_minors = 1 << mi->type->partn_shift;
4037 +
4038 + gd = alloc_disk(nr_minors);
4039 + if (gd == NULL)
4040 + goto out;
4041 +
4042 + offset = mi->index * mi->type->disks_per_major +
4043 + (minor >> mi->type->partn_shift);
4044 + if (nr_minors > 1) {
4045 + if (offset < 26) {
4046 + sprintf(gd->disk_name, "%s%c",
4047 + mi->type->diskname, 'a' + offset );
4048 + }
4049 + else {
4050 + sprintf(gd->disk_name, "%s%c%c",
4051 + mi->type->diskname,
4052 + 'a' + ((offset/26)-1), 'a' + (offset%26) );
4053 + }
4054 + }
4055 + else {
4056 + if (offset < 26) {
4057 + sprintf(gd->disk_name, "%s%c%d",
4058 + mi->type->diskname,
4059 + 'a' + offset,
4060 + minor & ((1 << mi->type->partn_shift) - 1));
4061 + }
4062 + else {
4063 + sprintf(gd->disk_name, "%s%c%c%d",
4064 + mi->type->diskname,
4065 + 'a' + ((offset/26)-1), 'a' + (offset%26),
4066 + minor & ((1 << mi->type->partn_shift) - 1));
4067 + }
4068 + }
4069 +
4070 + gd->major = mi->major;
4071 + gd->first_minor = minor;
4072 + gd->fops = &xlvbd_block_fops;
4073 + gd->private_data = info;
4074 + gd->driverfs_dev = &(info->xbdev->dev);
4075 + set_capacity(gd, capacity);
4076 +
4077 + if (xlvbd_init_blk_queue(gd, sector_size)) {
4078 + del_gendisk(gd);
4079 + goto out;
4080 + }
4081 +
4082 + info->rq = gd->queue;
4083 + info->gd = gd;
4084 +
4085 + if (info->feature_barrier)
4086 + xlvbd_barrier(info);
4087 +
4088 + if (vdisk_info & VDISK_READONLY)
4089 + set_disk_ro(gd, 1);
4090 +
4091 + if (vdisk_info & VDISK_REMOVABLE)
4092 + gd->flags |= GENHD_FL_REMOVABLE;
4093 +
4094 + if (vdisk_info & VDISK_CDROM)
4095 + gd->flags |= GENHD_FL_CD;
4096 +
4097 + return 0;
4098 +
4099 + out:
4100 + if (mi)
4101 + xlbd_put_major_info(mi);
4102 + info->mi = NULL;
4103 + return err;
4104 +}
4105 +
4106 +int
4107 +xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
4108 + u16 sector_size, struct blkfront_info *info)
4109 +{
4110 + struct block_device *bd;
4111 + int err = 0;
4112 + int major, minor;
4113 +
4114 + if ((vdevice>>EXT_SHIFT) > 1) {
4115 + /* this is above the extended range; something is wrong */
4116 + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
4117 + return -ENODEV;
4118 + }
4119 +
4120 + if (!VDEV_IS_EXTENDED(vdevice)) {
4121 + major = BLKIF_MAJOR(vdevice);
4122 + minor = BLKIF_MINOR(vdevice);
4123 + }
4124 + else {
4125 + major = 202;
4126 + minor = BLKIF_MINOR_EXT(vdevice);
4127 + }
4128 +
4129 + info->dev = MKDEV(major, minor);
4130 + bd = bdget(info->dev);
4131 + if (bd == NULL)
4132 + return -ENODEV;
4133 +
4134 + err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
4135 + sector_size, info);
4136 +
4137 + bdput(bd);
4138 + return err;
4139 +}
4140 +
4141 +void
4142 +xlvbd_del(struct blkfront_info *info)
4143 +{
4144 + if (info->mi == NULL)
4145 + return;
4146 +
4147 + BUG_ON(info->gd == NULL);
4148 + del_gendisk(info->gd);
4149 + put_disk(info->gd);
4150 + info->gd = NULL;
4151 +
4152 + xlbd_put_major_info(info->mi);
4153 + info->mi = NULL;
4154 +
4155 + BUG_ON(info->rq == NULL);
4156 + blk_cleanup_queue(info->rq);
4157 + info->rq = NULL;
4158 +}
4159 +
4160 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
4161 +int
4162 +xlvbd_barrier(struct blkfront_info *info)
4163 +{
4164 + int err;
4165 +
4166 + err = blk_queue_ordered(info->rq,
4167 + info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
4168 + if (err)
4169 + return err;
4170 + printk(KERN_INFO "blkfront: %s: barriers %s\n",
4171 + info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled");
4172 + return 0;
4173 +}
4174 +#else
4175 +int
4176 +xlvbd_barrier(struct blkfront_info *info)
4177 +{
4178 + printk(KERN_INFO "blkfront: %s: barriers disabled\n", info->gd->disk_name);
4179 + return -ENOSYS;
4180 +}
4181 +#endif
4182 +
4183 +#ifdef CONFIG_SYSFS
4184 +static ssize_t show_media(struct device *dev,
4185 + struct device_attribute *attr, char *buf)
4186 +{
4187 + struct xenbus_device *xendev = to_xenbus_device(dev);
4188 + struct blkfront_info *info = xendev->dev.driver_data;
4189 +
4190 + if (info->gd->flags & GENHD_FL_CD)
4191 + return sprintf(buf, "cdrom\n");
4192 + return sprintf(buf, "disk\n");
4193 +}
4194 +
4195 +static struct device_attribute xlvbd_attrs[] = {
4196 + __ATTR(media, S_IRUGO, show_media, NULL),
4197 +};
4198 +
4199 +int xlvbd_sysfs_addif(struct blkfront_info *info)
4200 +{
4201 + int i;
4202 + int error = 0;
4203 +
4204 + for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++) {
4205 + error = device_create_file(info->gd->driverfs_dev,
4206 + &xlvbd_attrs[i]);
4207 + if (error)
4208 + goto fail;
4209 + }
4210 + return 0;
4211 +
4212 +fail:
4213 + while (--i >= 0)
4214 + device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]);
4215 + return error;
4216 +}
4217 +
4218 +void xlvbd_sysfs_delif(struct blkfront_info *info)
4219 +{
4220 + int i;
4221 +
4222 + for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++)
4223 + device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]);
4224 +}
4225 +
4226 +#endif /* CONFIG_SYSFS */
4227 Index: head-2008-11-25/drivers/xen/blktap/Makefile
4228 ===================================================================
4229 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
4230 +++ head-2008-11-25/drivers/xen/blktap/Makefile 2007-06-12 13:13:44.000000000 +0200
4231 @@ -0,0 +1,5 @@
4232 +LINUXINCLUDE += -I../xen/include/public/io
4233 +
4234 +obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o
4235 +
4236 +xenblktap-y := xenbus.o interface.o blktap.o
4237 Index: head-2008-11-25/drivers/xen/blktap/blktap.c
4238 ===================================================================
4239 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
4240 +++ head-2008-11-25/drivers/xen/blktap/blktap.c 2008-11-10 11:44:21.000000000 +0100
4241 @@ -0,0 +1,1704 @@
4242 +/******************************************************************************
4243 + * drivers/xen/blktap/blktap.c
4244 + *
4245 + * Back-end driver for user level virtual block devices. This portion of the
4246 + * driver exports a 'unified' block-device interface that can be accessed
4247 + * by any operating system that implements a compatible front end. Requests
4248 + * are remapped to a user-space memory region.
4249 + *
4250 + * Based on the blkback driver code.
4251 + *
4252 + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
4253 + *
4254 + * Clean ups and fix ups:
4255 + * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
4256 + *
4257 + * This program is free software; you can redistribute it and/or
4258 + * modify it under the terms of the GNU General Public License version 2
4259 + * as published by the Free Software Foundation; or, when distributed
4260 + * separately from the Linux kernel or incorporated into other
4261 + * software packages, subject to the following license:
4262 + *
4263 + * Permission is hereby granted, free of charge, to any person obtaining a copy
4264 + * of this source file (the "Software"), to deal in the Software without
4265 + * restriction, including without limitation the rights to use, copy, modify,
4266 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
4267 + * and to permit persons to whom the Software is furnished to do so, subject to
4268 + * the following conditions:
4269 + *
4270 + * The above copyright notice and this permission notice shall be included in
4271 + * all copies or substantial portions of the Software.
4272 + *
4273 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4274 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4275 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4276 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4277 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
4278 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
4279 + * IN THE SOFTWARE.
4280 + */
4281 +
4282 +#include <linux/spinlock.h>
4283 +#include <linux/kthread.h>
4284 +#include <linux/list.h>
4285 +#include <asm/hypervisor.h>
4286 +#include "common.h"
4287 +#include <xen/balloon.h>
4288 +#include <xen/driver_util.h>
4289 +#include <linux/kernel.h>
4290 +#include <linux/fs.h>
4291 +#include <linux/mm.h>
4292 +#include <linux/errno.h>
4293 +#include <linux/major.h>
4294 +#include <linux/gfp.h>
4295 +#include <linux/poll.h>
4296 +#include <linux/delay.h>
4297 +#include <asm/tlbflush.h>
4298 +
4299 +#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
4300 +#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */
4301 +
4302 +/*
4303 + * The maximum number of requests that can be outstanding at any time
4304 + * is determined by
4305 + *
4306 + * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
4307 + *
4308 + * where mmap_alloc < MAX_DYNAMIC_MEM.
4309 + *
4310 + * TODO:
4311 + * mmap_alloc is initialised to 2 and should be adjustable on the fly via
4312 + * sysfs.
4313 + */
4314 +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
4315 +#define MAX_DYNAMIC_MEM BLK_RING_SIZE
4316 +#define MAX_PENDING_REQS BLK_RING_SIZE
4317 +#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
4318 +#define MMAP_VADDR(_start, _req,_seg) \
4319 + (_start + \
4320 + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
4321 + ((_seg) * PAGE_SIZE))
4322 +static int blkif_reqs = MAX_PENDING_REQS;
4323 +static int mmap_pages = MMAP_PAGES;
4324 +
4325 +#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
4326 + * have a bunch of pages reserved for shared
4327 + * memory rings.
4328 + */
4329 +
4330 +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
4331 +typedef struct domid_translate {
4332 + unsigned short domid;
4333 + unsigned short busid;
4334 +} domid_translate_t ;
4335 +
4336 +typedef struct domid_translate_ext {
4337 + unsigned short domid;
4338 + u32 busid;
4339 +} domid_translate_ext_t ;
4340 +
4341 +/*Data struct associated with each of the tapdisk devices*/
4342 +typedef struct tap_blkif {
4343 + struct vm_area_struct *vma; /*Shared memory area */
4344 + unsigned long rings_vstart; /*Kernel memory mapping */
4345 + unsigned long user_vstart; /*User memory mapping */
4346 + unsigned long dev_inuse; /*One process opens device at a time. */
4347 + unsigned long dev_pending; /*In process of being opened */
4348 + unsigned long ring_ok; /*make this ring->state */
4349 + blkif_front_ring_t ufe_ring; /*Rings up to user space. */
4350 + wait_queue_head_t wait; /*for poll */
4351 + unsigned long mode; /*current switching mode */
4352 + int minor; /*Minor number for tapdisk device */
4353 + pid_t pid; /*tapdisk process id */
4354 + enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
4355 + shutdown */
4356 + unsigned long *idx_map; /*Record the user ring id to kern
4357 + [req id, idx] tuple */
4358 + blkif_t *blkif; /*Associate blkif with tapdev */
4359 + struct domid_translate_ext trans; /*Translation from domid to bus. */
4360 +} tap_blkif_t;
4361 +
4362 +static struct tap_blkif *tapfds[MAX_TAP_DEV];
4363 +static int blktap_next_minor;
4364 +
4365 +module_param(blkif_reqs, int, 0);
4366 +/* Run-time switchable: /sys/module/blktap/parameters/ */
4367 +static unsigned int log_stats = 0;
4368 +static unsigned int debug_lvl = 0;
4369 +module_param(log_stats, int, 0644);
4370 +module_param(debug_lvl, int, 0644);
4371 +
4372 +/*
4373 + * Each outstanding request that we've passed to the lower device layers has a
4374 + * 'pending_req' allocated to it. Each buffer_head that completes decrements
4375 + * the pendcnt towards zero. When it hits zero, the specified domain has a
4376 + * response queued for it, with the saved 'id' passed back.
4377 + */
4378 +typedef struct {
4379 + blkif_t *blkif;
4380 + u64 id;
4381 + unsigned short mem_idx;
4382 + int nr_pages;
4383 + atomic_t pendcnt;
4384 + unsigned short operation;
4385 + int status;
4386 + struct list_head free_list;
4387 + int inuse;
4388 +} pending_req_t;
4389 +
4390 +static pending_req_t *pending_reqs[MAX_PENDING_REQS];
4391 +static struct list_head pending_free;
4392 +static DEFINE_SPINLOCK(pending_free_lock);
4393 +static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
4394 +static int alloc_pending_reqs;
4395 +
4396 +typedef unsigned int PEND_RING_IDX;
4397 +
4398 +static inline int MASK_PEND_IDX(int i) {
4399 + return (i & (MAX_PENDING_REQS-1));
4400 +}
4401 +
4402 +static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
4403 + return (req - pending_reqs[idx]);
4404 +}
4405 +
4406 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
4407 +
4408 +#define BLKBACK_INVALID_HANDLE (~0)
4409 +
4410 +static struct page **foreign_pages[MAX_DYNAMIC_MEM];
4411 +static inline unsigned long idx_to_kaddr(
4412 + unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
4413 +{
4414 + unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx;
4415 + unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]);
4416 + return (unsigned long)pfn_to_kaddr(pfn);
4417 +}
4418 +
4419 +static unsigned short mmap_alloc = 0;
4420 +static unsigned short mmap_lock = 0;
4421 +static unsigned short mmap_inuse = 0;
4422 +
4423 +/******************************************************************
4424 + * GRANT HANDLES
4425 + */
4426 +
4427 +/* When using grant tables to map a frame for device access then the
4428 + * handle returned must be used to unmap the frame. This is needed to
4429 + * drop the ref count on the frame.
4430 + */
4431 +struct grant_handle_pair
4432 +{
4433 + grant_handle_t kernel;
4434 + grant_handle_t user;
4435 +};
4436 +#define INVALID_GRANT_HANDLE 0xFFFF
4437 +
4438 +static struct grant_handle_pair
4439 + pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
4440 +#define pending_handle(_id, _idx, _i) \
4441 + (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
4442 + + (_i)])
4443 +
4444 +
4445 +static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
4446 +
4447 +#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */
4448 +#define BLKTAP_DEV_DIR "/dev/xen"
4449 +
4450 +static int blktap_major;
4451 +
4452 +/* blktap IOCTLs: */
4453 +#define BLKTAP_IOCTL_KICK_FE 1
4454 +#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */
4455 +#define BLKTAP_IOCTL_SETMODE 3
4456 +#define BLKTAP_IOCTL_SENDPID 4
4457 +#define BLKTAP_IOCTL_NEWINTF 5
4458 +#define BLKTAP_IOCTL_MINOR 6
4459 +#define BLKTAP_IOCTL_MAJOR 7
4460 +#define BLKTAP_QUERY_ALLOC_REQS 8
4461 +#define BLKTAP_IOCTL_FREEINTF 9
4462 +#define BLKTAP_IOCTL_NEWINTF_EXT 50
4463 +#define BLKTAP_IOCTL_PRINT_IDXS 100
4464 +
4465 +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
4466 +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
4467 +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
4468 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */
4469 +
4470 +#define BLKTAP_MODE_INTERPOSE \
4471 + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
4472 +
4473 +
4474 +static inline int BLKTAP_MODE_VALID(unsigned long arg)
4475 +{
4476 + return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
4477 + (arg == BLKTAP_MODE_INTERCEPT_FE) ||
4478 + (arg == BLKTAP_MODE_INTERPOSE ));
4479 +}
4480 +
4481 +/* Requests passing through the tap to userspace are re-assigned an ID.
4482 + * We must record a mapping between the BE [IDX,ID] tuple and the userspace
4483 + * ring ID.
4484 + */
4485 +
4486 +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
4487 +{
4488 + return ((fe_dom << 16) | MASK_PEND_IDX(idx));
4489 +}
4490 +
4491 +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
4492 +{
4493 + return (PEND_RING_IDX)(id & 0x0000ffff);
4494 +}
4495 +
4496 +extern inline int ID_TO_MIDX(unsigned long id)
4497 +{
4498 + return (int)(id >> 16);
4499 +}
4500 +
4501 +#define INVALID_REQ 0xdead0000
4502 +
4503 +/*TODO: Convert to a free list*/
4504 +static inline int GET_NEXT_REQ(unsigned long *idx_map)
4505 +{
4506 + int i;
4507 + for (i = 0; i < MAX_PENDING_REQS; i++)
4508 + if (idx_map[i] == INVALID_REQ)
4509 + return i;
4510 +
4511 + return INVALID_REQ;
4512 +}
4513 +
4514 +static inline int OFFSET_TO_USR_IDX(int offset)
4515 +{
4516 + return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
4517 +}
4518 +
4519 +static inline int OFFSET_TO_SEG(int offset)
4520 +{
4521 + return offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
4522 +}
4523 +
4524 +
4525 +#define BLKTAP_INVALID_HANDLE(_g) \
4526 + (((_g->kernel) == INVALID_GRANT_HANDLE) && \
4527 + ((_g->user) == INVALID_GRANT_HANDLE))
4528 +
4529 +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \
4530 + (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \
4531 + } while(0)
4532 +
4533 +
4534 +/******************************************************************
4535 + * BLKTAP VM OPS
4536 + */
4537 +
4538 +static struct page *blktap_nopage(struct vm_area_struct *vma,
4539 + unsigned long address,
4540 + int *type)
4541 +{
4542 + /*
4543 + * if the page has not been mapped in by the driver then return
4544 + * NOPAGE_SIGBUS to the domain.
4545 + */
4546 +
4547 + return NOPAGE_SIGBUS;
4548 +}
4549 +
4550 +static pte_t blktap_clear_pte(struct vm_area_struct *vma,
4551 + unsigned long uvaddr,
4552 + pte_t *ptep, int is_fullmm)
4553 +{
4554 + pte_t copy;
4555 + tap_blkif_t *info;
4556 + int offset, seg, usr_idx, pending_idx, mmap_idx;
4557 + unsigned long uvstart = vma->vm_start + (RING_PAGES << PAGE_SHIFT);
4558 + unsigned long kvaddr;
4559 + struct page **map;
4560 + struct page *pg;
4561 + struct grant_handle_pair *khandle;
4562 + struct gnttab_unmap_grant_ref unmap[2];
4563 + int count = 0;
4564 +
4565 + /*
4566 + * If the address is before the start of the grant mapped region or
4567 + * if vm_file is NULL (meaning mmap failed and we have nothing to do)
4568 + */
4569 + if (uvaddr < uvstart || vma->vm_file == NULL)
4570 + return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
4571 + ptep, is_fullmm);
4572 +
4573 + info = vma->vm_file->private_data;
4574 + map = vma->vm_private_data;
4575 +
4576 + /* TODO Should these be changed to if statements? */
4577 + BUG_ON(!info);
4578 + BUG_ON(!info->idx_map);
4579 + BUG_ON(!map);
4580 +
4581 + offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT);
4582 + usr_idx = OFFSET_TO_USR_IDX(offset);
4583 + seg = OFFSET_TO_SEG(offset);
4584 +
4585 + pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
4586 + mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
4587 +
4588 + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg);
4589 + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
4590 + ClearPageReserved(pg);
4591 + map[offset + RING_PAGES] = NULL;
4592 +
4593 + khandle = &pending_handle(mmap_idx, pending_idx, seg);
4594 +
4595 + if (khandle->kernel != INVALID_GRANT_HANDLE) {
4596 + gnttab_set_unmap_op(&unmap[count], kvaddr,
4597 + GNTMAP_host_map, khandle->kernel);
4598 + count++;
4599 +
4600 + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
4601 + INVALID_P2M_ENTRY);
4602 + }
4603 +
4604 + if (khandle->user != INVALID_GRANT_HANDLE) {
4605 + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
4606 +
4607 + copy = *ptep;
4608 + gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep),
4609 + GNTMAP_host_map
4610 + | GNTMAP_application_map
4611 + | GNTMAP_contains_pte,
4612 + khandle->user);
4613 + count++;
4614 + } else {
4615 + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap));
4616 +
4617 + /* USING SHADOW PAGE TABLES. */
4618 + copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
4619 + is_fullmm);
4620 + }
4621 +
4622 + if (count) {
4623 + BLKTAP_INVALIDATE_HANDLE(khandle);
4624 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
4625 + unmap, count))
4626 + BUG();
4627 + }
4628 +
4629 + return copy;
4630 +}
4631 +
4632 +struct vm_operations_struct blktap_vm_ops = {
4633 + nopage: blktap_nopage,
4634 + zap_pte: blktap_clear_pte,
4635 +};
4636 +
4637 +/******************************************************************
4638 + * BLKTAP FILE OPS
4639 + */
4640 +
4641 +/*Function Declarations*/
4642 +static tap_blkif_t *get_next_free_dev(void);
4643 +static int blktap_open(struct inode *inode, struct file *filp);
4644 +static int blktap_release(struct inode *inode, struct file *filp);
4645 +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
4646 +static int blktap_ioctl(struct inode *inode, struct file *filp,
4647 + unsigned int cmd, unsigned long arg);
4648 +static unsigned int blktap_poll(struct file *file, poll_table *wait);
4649 +
4650 +static const struct file_operations blktap_fops = {
4651 + .owner = THIS_MODULE,
4652 + .poll = blktap_poll,
4653 + .ioctl = blktap_ioctl,
4654 + .open = blktap_open,
4655 + .release = blktap_release,
4656 + .mmap = blktap_mmap,
4657 +};
4658 +
4659 +
4660 +static tap_blkif_t *get_next_free_dev(void)
4661 +{
4662 + struct class *class;
4663 + tap_blkif_t *info;
4664 + int minor;
4665 +
4666 + /*
4667 + * This is called only from the ioctl, which
4668 + * means we should always have interrupts enabled.
4669 + */
4670 + BUG_ON(irqs_disabled());
4671 +
4672 + spin_lock_irq(&pending_free_lock);
4673 +
4674 + /* tapfds[0] is always NULL */
4675 +
4676 + for (minor = 1; minor < blktap_next_minor; minor++) {
4677 + info = tapfds[minor];
4678 + /* we could have failed a previous attempt. */
4679 + if (!info ||
4680 + ((info->dev_inuse == 0) &&
4681 + (info->dev_pending == 0)) ) {
4682 + info->dev_pending = 1;
4683 + goto found;
4684 + }
4685 + }
4686 + info = NULL;
4687 + minor = -1;
4688 +
4689 + /*
4690 + * We didn't find free device. If we can still allocate
4691 + * more, then we grab the next device minor that is
4692 + * available. This is done while we are still under
4693 + * the protection of the pending_free_lock.
4694 + */
4695 + if (blktap_next_minor < MAX_TAP_DEV)
4696 + minor = blktap_next_minor++;
4697 +found:
4698 + spin_unlock_irq(&pending_free_lock);
4699 +
4700 + if (!info && minor > 0) {
4701 + info = kzalloc(sizeof(*info), GFP_KERNEL);
4702 + if (unlikely(!info)) {
4703 + /*
4704 + * If we failed here, try to put back
4705 + * the next minor number. But if one
4706 + * was just taken, then we just lose this
4707 + * minor. We can try to allocate this
4708 + * minor again later.
4709 + */
4710 + spin_lock_irq(&pending_free_lock);
4711 + if (blktap_next_minor == minor+1)
4712 + blktap_next_minor--;
4713 + spin_unlock_irq(&pending_free_lock);
4714 + goto out;
4715 + }
4716 +
4717 + info->minor = minor;
4718 + /*
4719 + * Make sure that we have a minor before others can
4720 + * see us.
4721 + */
4722 + wmb();
4723 + tapfds[minor] = info;
4724 +
4725 + if ((class = get_xen_class()) != NULL)
4726 + class_device_create(class, NULL,
4727 + MKDEV(blktap_major, minor), NULL,
4728 + "blktap%d", minor);
4729 + }
4730 +
4731 +out:
4732 + return info;
4733 +}
4734 +
4735 +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif)
4736 +{
4737 + tap_blkif_t *info;
4738 + int i;
4739 +
4740 + for (i = 1; i < blktap_next_minor; i++) {
4741 + info = tapfds[i];
4742 + if ( info &&
4743 + (info->trans.domid == domid) &&
4744 + (info->trans.busid == xenbus_id) ) {
4745 + info->blkif = blkif;
4746 + info->status = RUNNING;
4747 + return i;
4748 + }
4749 + }
4750 + return -1;
4751 +}
4752 +
4753 +void signal_tapdisk(int idx)
4754 +{
4755 + tap_blkif_t *info;
4756 + struct task_struct *ptask;
4757 +
4758 + /*
4759 + * if the userland tools set things up wrong, this could be negative;
4760 + * just don't try to signal in this case
4761 + */
4762 + if (idx < 0)
4763 + return;
4764 +
4765 + info = tapfds[idx];
4766 + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
4767 + return;
4768 +
4769 + if (info->pid > 0) {
4770 + ptask = find_task_by_pid(info->pid);
4771 + if (ptask)
4772 + info->status = CLEANSHUTDOWN;
4773 + }
4774 + info->blkif = NULL;
4775 +
4776 + return;
4777 +}
4778 +
4779 +static int blktap_open(struct inode *inode, struct file *filp)
4780 +{
4781 + blkif_sring_t *sring;
4782 + int idx = iminor(inode) - BLKTAP_MINOR;
4783 + tap_blkif_t *info;
4784 + int i;
4785 +
4786 + /* ctrl device, treat differently */
4787 + if (!idx)
4788 + return 0;
4789 +
4790 + info = tapfds[idx];
4791 +
4792 + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
4793 + WPRINTK("Unable to open device /dev/xen/blktap%d\n",
4794 + idx);
4795 + return -ENODEV;
4796 + }
4797 +
4798 + DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
4799 +
4800 + /*Only one process can access device at a time*/
4801 + if (test_and_set_bit(0, &info->dev_inuse))
4802 + return -EBUSY;
4803 +
4804 + info->dev_pending = 0;
4805 +
4806 + /* Allocate the fe ring. */
4807 + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
4808 + if (sring == NULL)
4809 + goto fail_nomem;
4810 +
4811 + SetPageReserved(virt_to_page(sring));
4812 +
4813 + SHARED_RING_INIT(sring);
4814 + FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
4815 +
4816 + filp->private_data = info;
4817 + info->vma = NULL;
4818 +
4819 + info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS,
4820 + GFP_KERNEL);
4821 +
4822 + if (info->idx_map == NULL)
4823 + goto fail_nomem;
4824 +
4825 + if (idx > 0) {
4826 + init_waitqueue_head(&info->wait);
4827 + for (i = 0; i < MAX_PENDING_REQS; i++)
4828 + info->idx_map[i] = INVALID_REQ;
4829 + }
4830 +
4831 + DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
4832 + return 0;
4833 +
4834 + fail_nomem:
4835 + return -ENOMEM;
4836 +}
4837 +
4838 +static int blktap_release(struct inode *inode, struct file *filp)
4839 +{
4840 + tap_blkif_t *info = filp->private_data;
4841 +
4842 + /* check for control device */
4843 + if (!info)
4844 + return 0;
4845 +
4846 + info->dev_inuse = 0;
4847 + DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
4848 +
4849 + /* Free the ring page. */
4850 + ClearPageReserved(virt_to_page(info->ufe_ring.sring));
4851 + free_page((unsigned long) info->ufe_ring.sring);
4852 +
4853 + /* Clear any active mappings and free foreign map table */
4854 + if (info->vma) {
4855 + struct mm_struct *mm = info->vma->vm_mm;
4856 +
4857 + down_write(&mm->mmap_sem);
4858 + zap_page_range(
4859 + info->vma, info->vma->vm_start,
4860 + info->vma->vm_end - info->vma->vm_start, NULL);
4861 + up_write(&mm->mmap_sem);
4862 +
4863 + kfree(info->vma->vm_private_data);
4864 +
4865 + info->vma = NULL;
4866 + }
4867 +
4868 + if (info->idx_map) {
4869 + kfree(info->idx_map);
4870 + info->idx_map = NULL;
4871 + }
4872 +
4873 + if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
4874 + if (info->blkif->xenblkd != NULL) {
4875 + kthread_stop(info->blkif->xenblkd);
4876 + info->blkif->xenblkd = NULL;
4877 + }
4878 + info->status = CLEANSHUTDOWN;
4879 + }
4880 +
4881 + return 0;
4882 +}
4883 +
4884 +
4885 +/* Note on mmap:
4886 + * We need to map pages to user space in a way that will allow the block
4887 + * subsystem set up direct IO to them. This couldn't be done before, because
4888 + * there isn't really a sane way to translate a user virtual address down to a
4889 + * physical address when the page belongs to another domain.
4890 + *
4891 + * My first approach was to map the page in to kernel memory, add an entry
4892 + * for it in the physical frame list (using alloc_lomem_region as in blkback)
4893 + * and then attempt to map that page up to user space. This is disallowed
4894 + * by xen though, which realizes that we don't really own the machine frame
4895 + * underlying the physical page.
4896 + *
4897 + * The new approach is to provide explicit support for this in xen linux.
4898 + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
4899 + * mapped from other vms. vma->vm_private_data is set up as a mapping
4900 + * from pages to actual page structs. There is a new clause in get_user_pages
4901 + * that does the right thing for this sort of mapping.
4902 + */
4903 +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
4904 +{
4905 + int size;
4906 + struct page **map;
4907 + int i;
4908 + tap_blkif_t *info = filp->private_data;
4909 + int ret;
4910 +
4911 + if (info == NULL) {
4912 + WPRINTK("blktap: mmap, retrieving idx failed\n");
4913 + return -ENOMEM;
4914 + }
4915 +
4916 + vma->vm_flags |= VM_RESERVED;
4917 + vma->vm_ops = &blktap_vm_ops;
4918 +
4919 + size = vma->vm_end - vma->vm_start;
4920 + if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
4921 + WPRINTK("you _must_ map exactly %d pages!\n",
4922 + mmap_pages + RING_PAGES);
4923 + return -EAGAIN;
4924 + }
4925 +
4926 + size >>= PAGE_SHIFT;
4927 + info->rings_vstart = vma->vm_start;
4928 + info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
4929 +
4930 + /* Map the ring pages to the start of the region and reserve it. */
4931 + if (xen_feature(XENFEAT_auto_translated_physmap))
4932 + ret = vm_insert_page(vma, vma->vm_start,
4933 + virt_to_page(info->ufe_ring.sring));
4934 + else
4935 + ret = remap_pfn_range(vma, vma->vm_start,
4936 + __pa(info->ufe_ring.sring) >> PAGE_SHIFT,
4937 + PAGE_SIZE, vma->vm_page_prot);
4938 + if (ret) {
4939 + WPRINTK("Mapping user ring failed!\n");
4940 + goto fail;
4941 + }
4942 +
4943 + /* Mark this VM as containing foreign pages, and set up mappings. */
4944 + map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
4945 + * sizeof(struct page *),
4946 + GFP_KERNEL);
4947 + if (map == NULL) {
4948 + WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
4949 + goto fail;
4950 + }
4951 +
4952 + for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
4953 + map[i] = NULL;
4954 +
4955 + vma->vm_private_data = map;
4956 + vma->vm_flags |= VM_FOREIGN;
4957 + vma->vm_flags |= VM_DONTCOPY;
4958 +
4959 +#ifdef CONFIG_X86
4960 + vma->vm_mm->context.has_foreign_mappings = 1;
4961 +#endif
4962 +
4963 + info->vma = vma;
4964 + info->ring_ok = 1;
4965 + return 0;
4966 + fail:
4967 + /* Clear any active mappings. */
4968 + zap_page_range(vma, vma->vm_start,
4969 + vma->vm_end - vma->vm_start, NULL);
4970 +
4971 + return -ENOMEM;
4972 +}
4973 +
4974 +
4975 +static int blktap_ioctl(struct inode *inode, struct file *filp,
4976 + unsigned int cmd, unsigned long arg)
4977 +{
4978 + tap_blkif_t *info = filp->private_data;
4979 +
4980 + switch(cmd) {
4981 + case BLKTAP_IOCTL_KICK_FE:
4982 + {
4983 + /* There are fe messages to process. */
4984 + return blktap_read_ufe_ring(info);
4985 + }
4986 + case BLKTAP_IOCTL_SETMODE:
4987 + {
4988 + if (info) {
4989 + if (BLKTAP_MODE_VALID(arg)) {
4990 + info->mode = arg;
4991 + /* XXX: may need to flush rings here. */
4992 + DPRINTK("blktap: set mode to %lx\n",
4993 + arg);
4994 + return 0;
4995 + }
4996 + }
4997 + return 0;
4998 + }
4999 + case BLKTAP_IOCTL_PRINT_IDXS:
5000 + {
5001 + if (info) {
5002 + printk("User Rings: \n-----------\n");
5003 + printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
5004 + "| req_prod: %2d, rsp_prod: %2d\n",
5005 + info->ufe_ring.rsp_cons,
5006 + info->ufe_ring.req_prod_pvt,
5007 + info->ufe_ring.sring->req_prod,
5008 + info->ufe_ring.sring->rsp_prod);
5009 + }
5010 + return 0;
5011 + }
5012 + case BLKTAP_IOCTL_SENDPID:
5013 + {
5014 + if (info) {
5015 + info->pid = (pid_t)arg;
5016 + DPRINTK("blktap: pid received %d\n",
5017 + info->pid);
5018 + }
5019 + return 0;
5020 + }
5021 + case BLKTAP_IOCTL_NEWINTF:
5022 + {
5023 + uint64_t val = (uint64_t)arg;
5024 + domid_translate_t *tr = (domid_translate_t *)&val;
5025 +
5026 + DPRINTK("NEWINTF Req for domid %d and bus id %d\n",
5027 + tr->domid, tr->busid);
5028 + info = get_next_free_dev();
5029 + if (!info) {
5030 + WPRINTK("Error initialising /dev/xen/blktap - "
5031 + "No more devices\n");
5032 + return -1;
5033 + }
5034 + info->trans.domid = tr->domid;
5035 + info->trans.busid = tr->busid;
5036 + return info->minor;
5037 + }
5038 + case BLKTAP_IOCTL_NEWINTF_EXT:
5039 + {
5040 + void __user *udata = (void __user *) arg;
5041 + domid_translate_ext_t tr;
5042 +
5043 + if (copy_from_user(&tr, udata, sizeof(domid_translate_ext_t)))
5044 + return -EFAULT;
5045 +
5046 + DPRINTK("NEWINTF_EXT Req for domid %d and bus id %d\n",
5047 + tr.domid, tr.busid);
5048 + info = get_next_free_dev();
5049 + if (!info) {
5050 + WPRINTK("Error initialising /dev/xen/blktap - "
5051 + "No more devices\n");
5052 + return -1;
5053 + }
5054 + info->trans.domid = tr.domid;
5055 + info->trans.busid = tr.busid;
5056 + return info->minor;
5057 + }
5058 + case BLKTAP_IOCTL_FREEINTF:
5059 + {
5060 + unsigned long dev = arg;
5061 + unsigned long flags;
5062 +
5063 + info = tapfds[dev];
5064 +
5065 + if ((dev > MAX_TAP_DEV) || !info)
5066 + return 0; /* should this be an error? */
5067 +
5068 + spin_lock_irqsave(&pending_free_lock, flags);
5069 + if (info->dev_pending)
5070 + info->dev_pending = 0;
5071 + spin_unlock_irqrestore(&pending_free_lock, flags);
5072 +
5073 + return 0;
5074 + }
5075 + case BLKTAP_IOCTL_MINOR:
5076 + {
5077 + unsigned long dev = arg;
5078 +
5079 + info = tapfds[dev];
5080 +
5081 + if ((dev > MAX_TAP_DEV) || !info)
5082 + return -EINVAL;
5083 +
5084 + return info->minor;
5085 + }
5086 + case BLKTAP_IOCTL_MAJOR:
5087 + return blktap_major;
5088 +
5089 + case BLKTAP_QUERY_ALLOC_REQS:
5090 + {
5091 + WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
5092 + alloc_pending_reqs, blkif_reqs);
5093 + return (alloc_pending_reqs/blkif_reqs) * 100;
5094 + }
5095 + }
5096 + return -ENOIOCTLCMD;
5097 +}
5098 +
5099 +static unsigned int blktap_poll(struct file *filp, poll_table *wait)
5100 +{
5101 + tap_blkif_t *info = filp->private_data;
5102 +
5103 + /* do not work on the control device */
5104 + if (!info)
5105 + return 0;
5106 +
5107 + poll_wait(filp, &info->wait, wait);
5108 + if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
5109 + RING_PUSH_REQUESTS(&info->ufe_ring);
5110 + return POLLIN | POLLRDNORM;
5111 + }
5112 + return 0;
5113 +}
5114 +
5115 +void blktap_kick_user(int idx)
5116 +{
5117 + tap_blkif_t *info;
5118 +
5119 + info = tapfds[idx];
5120 +
5121 + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
5122 + return;
5123 +
5124 + wake_up_interruptible(&info->wait);
5125 +
5126 + return;
5127 +}
5128 +
5129 +static int do_block_io_op(blkif_t *blkif);
5130 +static void dispatch_rw_block_io(blkif_t *blkif,
5131 + blkif_request_t *req,
5132 + pending_req_t *pending_req);
5133 +static void make_response(blkif_t *blkif, u64 id,
5134 + unsigned short op, int st);
5135 +
5136 +/******************************************************************
5137 + * misc small helpers
5138 + */
5139 +static int req_increase(void)
5140 +{
5141 + int i, j;
5142 +
5143 + if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock)
5144 + return -EINVAL;
5145 +
5146 + pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t)
5147 + * blkif_reqs, GFP_KERNEL);
5148 + foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages);
5149 +
5150 + if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc])
5151 + goto out_of_memory;
5152 +
5153 + DPRINTK("%s: reqs=%d, pages=%d\n",
5154 + __FUNCTION__, blkif_reqs, mmap_pages);
5155 +
5156 + for (i = 0; i < MAX_PENDING_REQS; i++) {
5157 + list_add_tail(&pending_reqs[mmap_alloc][i].free_list,
5158 + &pending_free);
5159 + pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
5160 + for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
5161 + BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc,
5162 + i, j));
5163 + }
5164 +
5165 + mmap_alloc++;
5166 + DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
5167 + return 0;
5168 +
5169 + out_of_memory:
5170 + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
5171 + kfree(pending_reqs[mmap_alloc]);
5172 + WPRINTK("%s: out of memory\n", __FUNCTION__);
5173 + return -ENOMEM;
5174 +}
5175 +
5176 +static void mmap_req_del(int mmap)
5177 +{
5178 + BUG_ON(!spin_is_locked(&pending_free_lock));
5179 +
5180 + kfree(pending_reqs[mmap]);
5181 + pending_reqs[mmap] = NULL;
5182 +
5183 + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
5184 + foreign_pages[mmap] = NULL;
5185 +
5186 + mmap_lock = 0;
5187 + DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
5188 + mmap_alloc--;
5189 +}
5190 +
5191 +static pending_req_t* alloc_req(void)
5192 +{
5193 + pending_req_t *req = NULL;
5194 + unsigned long flags;
5195 +
5196 + spin_lock_irqsave(&pending_free_lock, flags);
5197 +
5198 + if (!list_empty(&pending_free)) {
5199 + req = list_entry(pending_free.next, pending_req_t, free_list);
5200 + list_del(&req->free_list);
5201 + }
5202 +
5203 + if (req) {
5204 + req->inuse = 1;
5205 + alloc_pending_reqs++;
5206 + }
5207 + spin_unlock_irqrestore(&pending_free_lock, flags);
5208 +
5209 + return req;
5210 +}
5211 +
5212 +static void free_req(pending_req_t *req)
5213 +{
5214 + unsigned long flags;
5215 + int was_empty;
5216 +
5217 + spin_lock_irqsave(&pending_free_lock, flags);
5218 +
5219 + alloc_pending_reqs--;
5220 + req->inuse = 0;
5221 + if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
5222 + mmap_inuse--;
5223 + if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
5224 + spin_unlock_irqrestore(&pending_free_lock, flags);
5225 + return;
5226 + }
5227 + was_empty = list_empty(&pending_free);
5228 + list_add(&req->free_list, &pending_free);
5229 +
5230 + spin_unlock_irqrestore(&pending_free_lock, flags);
5231 +
5232 + if (was_empty)
5233 + wake_up(&pending_free_wq);
5234 +}
5235 +
5236 +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
5237 + int tapidx)
5238 +{
5239 + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
5240 + unsigned int i, invcount = 0, locked = 0;
5241 + struct grant_handle_pair *khandle;
5242 + uint64_t ptep;
5243 + int ret, mmap_idx;
5244 + unsigned long kvaddr, uvaddr;
5245 + tap_blkif_t *info;
5246 + struct mm_struct *mm;
5247 +
5248 +
5249 + info = tapfds[tapidx];
5250 +
5251 + if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
5252 + WPRINTK("fast_flush: Couldn't get info!\n");
5253 + return;
5254 + }
5255 +
5256 + mm = info->vma ? info->vma->vm_mm : NULL;
5257 +
5258 + if (info->vma != NULL &&
5259 + xen_feature(XENFEAT_auto_translated_physmap)) {
5260 + down_write(&mm->mmap_sem);
5261 + zap_page_range(info->vma,
5262 + MMAP_VADDR(info->user_vstart, u_idx, 0),
5263 + req->nr_pages << PAGE_SHIFT, NULL);
5264 + up_write(&mm->mmap_sem);
5265 + return;
5266 + }
5267 +
5268 + mmap_idx = req->mem_idx;
5269 +
5270 + for (i = 0; i < req->nr_pages; i++) {
5271 + kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
5272 + uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
5273 +
5274 + khandle = &pending_handle(mmap_idx, k_idx, i);
5275 +
5276 + if (khandle->kernel != INVALID_GRANT_HANDLE) {
5277 + gnttab_set_unmap_op(&unmap[invcount],
5278 + idx_to_kaddr(mmap_idx, k_idx, i),
5279 + GNTMAP_host_map, khandle->kernel);
5280 + invcount++;
5281 +
5282 + set_phys_to_machine(
5283 + __pa(idx_to_kaddr(mmap_idx, k_idx, i))
5284 + >> PAGE_SHIFT, INVALID_P2M_ENTRY);
5285 + }
5286 +
5287 + if (khandle->user != INVALID_GRANT_HANDLE) {
5288 + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
5289 + if (!locked++)
5290 + down_write(&mm->mmap_sem);
5291 + if (create_lookup_pte_addr(
5292 + mm,
5293 + MMAP_VADDR(info->user_vstart, u_idx, i),
5294 + &ptep) !=0) {
5295 + up_write(&mm->mmap_sem);
5296 + WPRINTK("Couldn't get a pte addr!\n");
5297 + return;
5298 + }
5299 +
5300 + gnttab_set_unmap_op(&unmap[invcount], ptep,
5301 + GNTMAP_host_map
5302 + | GNTMAP_application_map
5303 + | GNTMAP_contains_pte,
5304 + khandle->user);
5305 + invcount++;
5306 + }
5307 +
5308 + BLKTAP_INVALIDATE_HANDLE(khandle);
5309 + }
5310 + ret = HYPERVISOR_grant_table_op(
5311 + GNTTABOP_unmap_grant_ref, unmap, invcount);
5312 + BUG_ON(ret);
5313 +
5314 + if (info->vma != NULL &&
5315 + !xen_feature(XENFEAT_auto_translated_physmap)) {
5316 + if (!locked++)
5317 + down_write(&mm->mmap_sem);
5318 + zap_page_range(info->vma,
5319 + MMAP_VADDR(info->user_vstart, u_idx, 0),
5320 + req->nr_pages << PAGE_SHIFT, NULL);
5321 + }
5322 +
5323 + if (locked)
5324 + up_write(&mm->mmap_sem);
5325 +}
5326 +
5327 +/******************************************************************
5328 + * SCHEDULER FUNCTIONS
5329 + */
5330 +
5331 +static void print_stats(blkif_t *blkif)
5332 +{
5333 + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
5334 + current->comm, blkif->st_oo_req,
5335 + blkif->st_rd_req, blkif->st_wr_req);
5336 + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
5337 + blkif->st_rd_req = 0;
5338 + blkif->st_wr_req = 0;
5339 + blkif->st_oo_req = 0;
5340 +}
5341 +
5342 +int tap_blkif_schedule(void *arg)
5343 +{
5344 + blkif_t *blkif = arg;
5345 +
5346 + blkif_get(blkif);
5347 +
5348 + if (debug_lvl)
5349 + printk(KERN_DEBUG "%s: started\n", current->comm);
5350 +
5351 + while (!kthread_should_stop()) {
5352 + if (try_to_freeze())
5353 + continue;
5354 +
5355 + wait_event_interruptible(
5356 + blkif->wq,
5357 + blkif->waiting_reqs || kthread_should_stop());
5358 + wait_event_interruptible(
5359 + pending_free_wq,
5360 + !list_empty(&pending_free) || kthread_should_stop());
5361 +
5362 + blkif->waiting_reqs = 0;
5363 + smp_mb(); /* clear flag *before* checking for work */
5364 +
5365 + if (do_block_io_op(blkif))
5366 + blkif->waiting_reqs = 1;
5367 +
5368 + if (log_stats && time_after(jiffies, blkif->st_print))
5369 + print_stats(blkif);
5370 + }
5371 +
5372 + if (log_stats)
5373 + print_stats(blkif);
5374 + if (debug_lvl)
5375 + printk(KERN_DEBUG "%s: exiting\n", current->comm);
5376 +
5377 + blkif->xenblkd = NULL;
5378 + blkif_put(blkif);
5379 +
5380 + return 0;
5381 +}
5382 +
5383 +/******************************************************************
5384 + * COMPLETION CALLBACK -- Called by user level ioctl()
5385 + */
5386 +
5387 +static int blktap_read_ufe_ring(tap_blkif_t *info)
5388 +{
5389 + /* This is called to read responses from the UFE ring. */
5390 + RING_IDX i, j, rp;
5391 + blkif_response_t *resp;
5392 + blkif_t *blkif=NULL;
5393 + int pending_idx, usr_idx, mmap_idx;
5394 + pending_req_t *pending_req;
5395 +
5396 + if (!info)
5397 + return 0;
5398 +
5399 + /* We currently only forward packets in INTERCEPT_FE mode. */
5400 + if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
5401 + return 0;
5402 +
5403 + /* for each outstanding message on the UFEring */
5404 + rp = info->ufe_ring.sring->rsp_prod;
5405 + rmb();
5406 +
5407 + for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
5408 + blkif_response_t res;
5409 + resp = RING_GET_RESPONSE(&info->ufe_ring, i);
5410 + memcpy(&res, resp, sizeof(res));
5411 + mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
5412 + ++info->ufe_ring.rsp_cons;
5413 +
5414 + /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
5415 + usr_idx = (int)res.id;
5416 + pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
5417 + mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
5418 +
5419 + if ( (mmap_idx >= mmap_alloc) ||
5420 + (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
5421 + WPRINTK("Incorrect req map"
5422 + "[%d], internal map [%d,%d (%d)]\n",
5423 + usr_idx, mmap_idx,
5424 + ID_TO_IDX(info->idx_map[usr_idx]),
5425 + MASK_PEND_IDX(
5426 + ID_TO_IDX(info->idx_map[usr_idx])));
5427 +
5428 + pending_req = &pending_reqs[mmap_idx][pending_idx];
5429 + blkif = pending_req->blkif;
5430 +
5431 + for (j = 0; j < pending_req->nr_pages; j++) {
5432 +
5433 + unsigned long kvaddr, uvaddr;
5434 + struct page **map = info->vma->vm_private_data;
5435 + struct page *pg;
5436 + int offset;
5437 +
5438 + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
5439 + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j);
5440 +
5441 + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5442 + ClearPageReserved(pg);
5443 + offset = (uvaddr - info->vma->vm_start)
5444 + >> PAGE_SHIFT;
5445 + map[offset] = NULL;
5446 + }
5447 + fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
5448 + info->idx_map[usr_idx] = INVALID_REQ;
5449 + make_response(blkif, pending_req->id, res.operation,
5450 + res.status);
5451 + blkif_put(pending_req->blkif);
5452 + free_req(pending_req);
5453 + }
5454 +
5455 + return 0;
5456 +}
5457 +
5458 +
5459 +/******************************************************************************
5460 + * NOTIFICATION FROM GUEST OS.
5461 + */
5462 +
5463 +static void blkif_notify_work(blkif_t *blkif)
5464 +{
5465 + blkif->waiting_reqs = 1;
5466 + wake_up(&blkif->wq);
5467 +}
5468 +
5469 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
5470 +{
5471 + blkif_notify_work(dev_id);
5472 + return IRQ_HANDLED;
5473 +}
5474 +
5475 +
5476 +
5477 +/******************************************************************
5478 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
5479 + */
5480 +static int print_dbug = 1;
5481 +static int do_block_io_op(blkif_t *blkif)
5482 +{
5483 + blkif_back_rings_t *blk_rings = &blkif->blk_rings;
5484 + blkif_request_t req;
5485 + pending_req_t *pending_req;
5486 + RING_IDX rc, rp;
5487 + int more_to_do = 0;
5488 + tap_blkif_t *info;
5489 +
5490 + rc = blk_rings->common.req_cons;
5491 + rp = blk_rings->common.sring->req_prod;
5492 + rmb(); /* Ensure we see queued requests up to 'rp'. */
5493 +
5494 + /*Check blkif has corresponding UE ring*/
5495 + if (blkif->dev_num < 0) {
5496 + /*oops*/
5497 + if (print_dbug) {
5498 + WPRINTK("Corresponding UE "
5499 + "ring does not exist!\n");
5500 + print_dbug = 0; /*We only print this message once*/
5501 + }
5502 + return 0;
5503 + }
5504 +
5505 + info = tapfds[blkif->dev_num];
5506 +
5507 + if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
5508 + if (print_dbug) {
5509 + WPRINTK("Can't get UE info!\n");
5510 + print_dbug = 0;
5511 + }
5512 + return 0;
5513 + }
5514 +
5515 + while (rc != rp) {
5516 +
5517 + if (RING_FULL(&info->ufe_ring)) {
5518 + WPRINTK("RING_FULL! More to do\n");
5519 + more_to_do = 1;
5520 + break;
5521 + }
5522 +
5523 + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) {
5524 + WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
5525 + " More to do\n");
5526 + more_to_do = 1;
5527 + break;
5528 + }
5529 +
5530 + pending_req = alloc_req();
5531 + if (NULL == pending_req) {
5532 + blkif->st_oo_req++;
5533 + more_to_do = 1;
5534 + break;
5535 + }
5536 +
5537 + if (kthread_should_stop()) {
5538 + more_to_do = 1;
5539 + break;
5540 + }
5541 +
5542 + switch (blkif->blk_protocol) {
5543 + case BLKIF_PROTOCOL_NATIVE:
5544 + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc),
5545 + sizeof(req));
5546 + break;
5547 + case BLKIF_PROTOCOL_X86_32:
5548 + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
5549 + break;
5550 + case BLKIF_PROTOCOL_X86_64:
5551 + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
5552 + break;
5553 + default:
5554 + BUG();
5555 + }
5556 + blk_rings->common.req_cons = ++rc; /* before make_response() */
5557 +
5558 + /* Apply all sanity checks to /private copy/ of request. */
5559 + barrier();
5560 +
5561 + switch (req.operation) {
5562 + case BLKIF_OP_READ:
5563 + blkif->st_rd_req++;
5564 + dispatch_rw_block_io(blkif, &req, pending_req);
5565 + break;
5566 +
5567 + case BLKIF_OP_WRITE:
5568 + blkif->st_wr_req++;
5569 + dispatch_rw_block_io(blkif, &req, pending_req);
5570 + break;
5571 +
5572 + default:
5573 + /* A good sign something is wrong: sleep for a while to
5574 + * avoid excessive CPU consumption by a bad guest. */
5575 + msleep(1);
5576 + WPRINTK("unknown operation [%d]\n",
5577 + req.operation);
5578 + make_response(blkif, req.id, req.operation,
5579 + BLKIF_RSP_ERROR);
5580 + free_req(pending_req);
5581 + break;
5582 + }
5583 +
5584 + /* Yield point for this unbounded loop. */
5585 + cond_resched();
5586 + }
5587 +
5588 + blktap_kick_user(blkif->dev_num);
5589 +
5590 + return more_to_do;
5591 +}
5592 +
5593 +static void dispatch_rw_block_io(blkif_t *blkif,
5594 + blkif_request_t *req,
5595 + pending_req_t *pending_req)
5596 +{
5597 + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
5598 + int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
5599 + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
5600 + unsigned int nseg;
5601 + int ret, i, nr_sects = 0;
5602 + tap_blkif_t *info;
5603 + blkif_request_t *target;
5604 + int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
5605 + int usr_idx;
5606 + uint16_t mmap_idx = pending_req->mem_idx;
5607 + struct mm_struct *mm;
5608 +
5609 + if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
5610 + goto fail_response;
5611 +
5612 + info = tapfds[blkif->dev_num];
5613 + if (info == NULL)
5614 + goto fail_response;
5615 +
5616 + /* Check we have space on user ring - should never fail. */
5617 + usr_idx = GET_NEXT_REQ(info->idx_map);
5618 + if (usr_idx == INVALID_REQ) {
5619 + BUG();
5620 + goto fail_response;
5621 + }
5622 +
5623 + /* Check that number of segments is sane. */
5624 + nseg = req->nr_segments;
5625 + if ( unlikely(nseg == 0) ||
5626 + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
5627 + WPRINTK("Bad number of segments in request (%d)\n", nseg);
5628 + goto fail_response;
5629 + }
5630 +
5631 + /* Make sure userspace is ready. */
5632 + if (!info->ring_ok) {
5633 + WPRINTK("blktap: ring not ready for requests!\n");
5634 + goto fail_response;
5635 + }
5636 +
5637 + if (RING_FULL(&info->ufe_ring)) {
5638 + WPRINTK("blktap: fe_ring is full, can't add "
5639 + "IO Request will be dropped. %d %d\n",
5640 + RING_SIZE(&info->ufe_ring),
5641 + RING_SIZE(&blkif->blk_rings.common));
5642 + goto fail_response;
5643 + }
5644 +
5645 + pending_req->blkif = blkif;
5646 + pending_req->id = req->id;
5647 + pending_req->operation = operation;
5648 + pending_req->status = BLKIF_RSP_OKAY;
5649 + pending_req->nr_pages = nseg;
5650 + op = 0;
5651 + mm = info->vma->vm_mm;
5652 + if (!xen_feature(XENFEAT_auto_translated_physmap))
5653 + down_write(&mm->mmap_sem);
5654 + for (i = 0; i < nseg; i++) {
5655 + unsigned long uvaddr;
5656 + unsigned long kvaddr;
5657 + uint64_t ptep;
5658 + uint32_t flags;
5659 +
5660 + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
5661 + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
5662 +
5663 + flags = GNTMAP_host_map;
5664 + if (operation == WRITE)
5665 + flags |= GNTMAP_readonly;
5666 + gnttab_set_map_op(&map[op], kvaddr, flags,
5667 + req->seg[i].gref, blkif->domid);
5668 + op++;
5669 +
5670 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
5671 + /* Now map it to user. */
5672 + ret = create_lookup_pte_addr(mm, uvaddr, &ptep);
5673 + if (ret) {
5674 + up_write(&mm->mmap_sem);
5675 + WPRINTK("Couldn't get a pte addr!\n");
5676 + goto fail_flush;
5677 + }
5678 +
5679 + flags = GNTMAP_host_map | GNTMAP_application_map
5680 + | GNTMAP_contains_pte;
5681 + if (operation == WRITE)
5682 + flags |= GNTMAP_readonly;
5683 + gnttab_set_map_op(&map[op], ptep, flags,
5684 + req->seg[i].gref, blkif->domid);
5685 + op++;
5686 + }
5687 +
5688 + nr_sects += (req->seg[i].last_sect -
5689 + req->seg[i].first_sect + 1);
5690 + }
5691 +
5692 + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
5693 + BUG_ON(ret);
5694 +
5695 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
5696 + up_write(&mm->mmap_sem);
5697 +
5698 + for (i = 0; i < (nseg*2); i+=2) {
5699 + unsigned long uvaddr;
5700 + unsigned long kvaddr;
5701 + unsigned long offset;
5702 + struct page *pg;
5703 +
5704 + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
5705 + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2);
5706 +
5707 + if (unlikely(map[i].status != 0)) {
5708 + WPRINTK("invalid kernel buffer -- "
5709 + "could not remap it\n");
5710 + ret |= 1;
5711 + map[i].handle = INVALID_GRANT_HANDLE;
5712 + }
5713 +
5714 + if (unlikely(map[i+1].status != 0)) {
5715 + WPRINTK("invalid user buffer -- "
5716 + "could not remap it\n");
5717 + ret |= 1;
5718 + map[i+1].handle = INVALID_GRANT_HANDLE;
5719 + }
5720 +
5721 + pending_handle(mmap_idx, pending_idx, i/2).kernel
5722 + = map[i].handle;
5723 + pending_handle(mmap_idx, pending_idx, i/2).user
5724 + = map[i+1].handle;
5725 +
5726 + if (ret)
5727 + continue;
5728 +
5729 + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
5730 + FOREIGN_FRAME(map[i].dev_bus_addr
5731 + >> PAGE_SHIFT));
5732 + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
5733 + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5734 + ((struct page **)info->vma->vm_private_data)[offset] =
5735 + pg;
5736 + }
5737 + } else {
5738 + for (i = 0; i < nseg; i++) {
5739 + unsigned long uvaddr;
5740 + unsigned long kvaddr;
5741 + unsigned long offset;
5742 + struct page *pg;
5743 +
5744 + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
5745 + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
5746 +
5747 + if (unlikely(map[i].status != 0)) {
5748 + WPRINTK("invalid kernel buffer -- "
5749 + "could not remap it\n");
5750 + ret |= 1;
5751 + map[i].handle = INVALID_GRANT_HANDLE;
5752 + }
5753 +
5754 + pending_handle(mmap_idx, pending_idx, i).kernel
5755 + = map[i].handle;
5756 +
5757 + if (ret)
5758 + continue;
5759 +
5760 + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
5761 + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5762 + ((struct page **)info->vma->vm_private_data)[offset] =
5763 + pg;
5764 + }
5765 + }
5766 +
5767 + if (ret)
5768 + goto fail_flush;
5769 +
5770 + if (xen_feature(XENFEAT_auto_translated_physmap))
5771 + down_write(&mm->mmap_sem);
5772 + /* Mark mapped pages as reserved: */
5773 + for (i = 0; i < req->nr_segments; i++) {
5774 + unsigned long kvaddr;
5775 + struct page *pg;
5776 +
5777 + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
5778 + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
5779 + SetPageReserved(pg);
5780 + if (xen_feature(XENFEAT_auto_translated_physmap)) {
5781 + ret = vm_insert_page(info->vma,
5782 + MMAP_VADDR(info->user_vstart,
5783 + usr_idx, i), pg);
5784 + if (ret) {
5785 + up_write(&mm->mmap_sem);
5786 + goto fail_flush;
5787 + }
5788 + }
5789 + }
5790 + if (xen_feature(XENFEAT_auto_translated_physmap))
5791 + up_write(&mm->mmap_sem);
5792 +
5793 + /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
5794 + info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
5795 +
5796 + blkif_get(blkif);
5797 + /* Finally, write the request message to the user ring. */
5798 + target = RING_GET_REQUEST(&info->ufe_ring,
5799 + info->ufe_ring.req_prod_pvt);
5800 + memcpy(target, req, sizeof(*req));
5801 + target->id = usr_idx;
5802 + wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
5803 + info->ufe_ring.req_prod_pvt++;
5804 +
5805 + if (operation == READ)
5806 + blkif->st_rd_sect += nr_sects;
5807 + else if (operation == WRITE)
5808 + blkif->st_wr_sect += nr_sects;
5809 +
5810 + return;
5811 +
5812 + fail_flush:
5813 + WPRINTK("Reached Fail_flush\n");
5814 + fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
5815 + fail_response:
5816 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
5817 + free_req(pending_req);
5818 + msleep(1); /* back off a bit */
5819 +}
5820 +
5821 +
5822 +
5823 +/******************************************************************
5824 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
5825 + */
5826 +
5827 +
5828 +static void make_response(blkif_t *blkif, u64 id,
5829 + unsigned short op, int st)
5830 +{
5831 + blkif_response_t resp;
5832 + unsigned long flags;
5833 + blkif_back_rings_t *blk_rings = &blkif->blk_rings;
5834 + int more_to_do = 0;
5835 + int notify;
5836 +
5837 + resp.id = id;
5838 + resp.operation = op;
5839 + resp.status = st;
5840 +
5841 + spin_lock_irqsave(&blkif->blk_ring_lock, flags);
5842 + /* Place on the response ring for the relevant domain. */
5843 + switch (blkif->blk_protocol) {
5844 + case BLKIF_PROTOCOL_NATIVE:
5845 + memcpy(RING_GET_RESPONSE(&blk_rings->native,
5846 + blk_rings->native.rsp_prod_pvt),
5847 + &resp, sizeof(resp));
5848 + break;
5849 + case BLKIF_PROTOCOL_X86_32:
5850 + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32,
5851 + blk_rings->x86_32.rsp_prod_pvt),
5852 + &resp, sizeof(resp));
5853 + break;
5854 + case BLKIF_PROTOCOL_X86_64:
5855 + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64,
5856 + blk_rings->x86_64.rsp_prod_pvt),
5857 + &resp, sizeof(resp));
5858 + break;
5859 + default:
5860 + BUG();
5861 + }
5862 + blk_rings->common.rsp_prod_pvt++;
5863 + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
5864 +
5865 + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
5866 + /*
5867 + * Tail check for pending requests. Allows frontend to avoid
5868 + * notifications if requests are already in flight (lower
5869 + * overheads and promotes batching).
5870 + */
5871 + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
5872 + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
5873 + more_to_do = 1;
5874 + }
5875 +
5876 + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
5877 + if (more_to_do)
5878 + blkif_notify_work(blkif);
5879 + if (notify)
5880 + notify_remote_via_irq(blkif->irq);
5881 +}
5882 +
5883 +static int __init blkif_init(void)
5884 +{
5885 + int i, ret;
5886 + struct class *class;
5887 +
5888 + if (!is_running_on_xen())
5889 + return -ENODEV;
5890 +
5891 + INIT_LIST_HEAD(&pending_free);
5892 + for(i = 0; i < 2; i++) {
5893 + ret = req_increase();
5894 + if (ret)
5895 + break;
5896 + }
5897 + if (i == 0)
5898 + return ret;
5899 +
5900 + tap_blkif_interface_init();
5901 +
5902 + alloc_pending_reqs = 0;
5903 +
5904 + tap_blkif_xenbus_init();
5905 +
5906 + /* Dynamically allocate a major for this device */
5907 + ret = register_chrdev(0, "blktap", &blktap_fops);
5908 +
5909 + if (ret < 0) {
5910 + WPRINTK("Couldn't register /dev/xen/blktap\n");
5911 + return -ENOMEM;
5912 + }
5913 +
5914 + blktap_major = ret;
5915 +
5916 + /* tapfds[0] is always NULL */
5917 + blktap_next_minor++;
5918 +
5919 + DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
5920 +
5921 + /* Make sure the xen class exists */
5922 + if ((class = get_xen_class()) != NULL) {
5923 + /*
5924 + * This will allow udev to create the blktap ctrl device.
5925 + * We only want to create blktap0 first. We don't want
5926 + * to flood the sysfs system with needless blktap devices.
5927 + * We only create the device when a request of a new device is
5928 + * made.
5929 + */
5930 + class_device_create(class, NULL,
5931 + MKDEV(blktap_major, 0), NULL,
5932 + "blktap0");
5933 + } else {
5934 + /* this is bad, but not fatal */
5935 + WPRINTK("blktap: sysfs xen_class not created\n");
5936 + }
5937 +
5938 + DPRINTK("Blktap device successfully created\n");
5939 +
5940 + return 0;
5941 +}
5942 +
5943 +module_init(blkif_init);
5944 +
5945 +MODULE_LICENSE("Dual BSD/GPL");
5946 Index: head-2008-11-25/drivers/xen/blktap/common.h
5947 ===================================================================
5948 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
5949 +++ head-2008-11-25/drivers/xen/blktap/common.h 2008-09-15 13:40:15.000000000 +0200
5950 @@ -0,0 +1,122 @@
5951 +/*
5952 + * This program is free software; you can redistribute it and/or
5953 + * modify it under the terms of the GNU General Public License version 2
5954 + * as published by the Free Software Foundation; or, when distributed
5955 + * separately from the Linux kernel or incorporated into other
5956 + * software packages, subject to the following license:
5957 + *
5958 + * Permission is hereby granted, free of charge, to any person obtaining a copy
5959 + * of this source file (the "Software"), to deal in the Software without
5960 + * restriction, including without limitation the rights to use, copy, modify,
5961 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
5962 + * and to permit persons to whom the Software is furnished to do so, subject to
5963 + * the following conditions:
5964 + *
5965 + * The above copyright notice and this permission notice shall be included in
5966 + * all copies or substantial portions of the Software.
5967 + *
5968 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5969 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5970 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5971 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5972 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
5973 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
5974 + * IN THE SOFTWARE.
5975 + */
5976 +
5977 +#ifndef __BLKIF__BACKEND__COMMON_H__
5978 +#define __BLKIF__BACKEND__COMMON_H__
5979 +
5980 +#include <linux/version.h>
5981 +#include <linux/module.h>
5982 +#include <linux/interrupt.h>
5983 +#include <linux/slab.h>
5984 +#include <linux/blkdev.h>
5985 +#include <linux/vmalloc.h>
5986 +#include <asm/io.h>
5987 +#include <asm/setup.h>
5988 +#include <asm/pgalloc.h>
5989 +#include <xen/evtchn.h>
5990 +#include <asm/hypervisor.h>
5991 +#include <xen/blkif.h>
5992 +#include <xen/gnttab.h>
5993 +#include <xen/driver_util.h>
5994 +
5995 +#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \
5996 + __FILE__ , __LINE__ , ## _a )
5997 +
5998 +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
5999 +
6000 +struct backend_info;
6001 +
6002 +typedef struct blkif_st {
6003 + /* Unique identifier for this interface. */
6004 + domid_t domid;
6005 + unsigned int handle;
6006 + /* Physical parameters of the comms window. */
6007 + unsigned int irq;
6008 + /* Comms information. */
6009 + enum blkif_protocol blk_protocol;
6010 + blkif_back_rings_t blk_rings;
6011 + struct vm_struct *blk_ring_area;
6012 + /* Back pointer to the backend_info. */
6013 + struct backend_info *be;
6014 + /* Private fields. */
6015 + spinlock_t blk_ring_lock;
6016 + atomic_t refcnt;
6017 +
6018 + wait_queue_head_t wq;
6019 + struct task_struct *xenblkd;
6020 + unsigned int waiting_reqs;
6021 + request_queue_t *plug;
6022 +
6023 + /* statistics */
6024 + unsigned long st_print;
6025 + int st_rd_req;
6026 + int st_wr_req;
6027 + int st_oo_req;
6028 + int st_rd_sect;
6029 + int st_wr_sect;
6030 +
6031 + wait_queue_head_t waiting_to_free;
6032 +
6033 + grant_handle_t shmem_handle;
6034 + grant_ref_t shmem_ref;
6035 +
6036 + int dev_num;
6037 + uint64_t sectors;
6038 +} blkif_t;
6039 +
6040 +blkif_t *tap_alloc_blkif(domid_t domid);
6041 +void tap_blkif_free(blkif_t *blkif);
6042 +void tap_blkif_kmem_cache_free(blkif_t *blkif);
6043 +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
6044 + unsigned int evtchn);
6045 +void tap_blkif_unmap(blkif_t *blkif);
6046 +
6047 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
6048 +#define blkif_put(_b) \
6049 + do { \
6050 + if (atomic_dec_and_test(&(_b)->refcnt)) \
6051 + wake_up(&(_b)->waiting_to_free);\
6052 + } while (0)
6053 +
6054 +
6055 +struct phys_req {
6056 + unsigned short dev;
6057 + unsigned short nr_sects;
6058 + struct block_device *bdev;
6059 + blkif_sector_t sector_number;
6060 +};
6061 +
6062 +void tap_blkif_interface_init(void);
6063 +
6064 +void tap_blkif_xenbus_init(void);
6065 +
6066 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
6067 +int tap_blkif_schedule(void *arg);
6068 +
6069 +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
6070 +void signal_tapdisk(int idx);
6071 +
6072 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
6073 Index: head-2008-11-25/drivers/xen/blktap/interface.c
6074 ===================================================================
6075 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6076 +++ head-2008-11-25/drivers/xen/blktap/interface.c 2008-09-15 13:40:15.000000000 +0200
6077 @@ -0,0 +1,181 @@
6078 +/******************************************************************************
6079 + * drivers/xen/blktap/interface.c
6080 + *
6081 + * Block-device interface management.
6082 + *
6083 + * Copyright (c) 2004, Keir Fraser
6084 + *
6085 + * This program is free software; you can redistribute it and/or
6086 + * modify it under the terms of the GNU General Public License version 2
6087 + * as published by the Free Software Foundation; or, when distributed
6088 + * separately from the Linux kernel or incorporated into other
6089 + * software packages, subject to the following license:
6090 + *
6091 + * Permission is hereby granted, free of charge, to any person obtaining a copy
6092 + * of this source file (the "Software"), to deal in the Software without
6093 + * restriction, including without limitation the rights to use, copy, modify,
6094 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
6095 + * and to permit persons to whom the Software is furnished to do so, subject to
6096 + * the following conditions:
6097 + *
6098 + * The above copyright notice and this permission notice shall be included in
6099 + * all copies or substantial portions of the Software.
6100 + *
6101 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6102 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6103 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6104 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
6105 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
6106 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
6107 + * IN THE SOFTWARE.
6108 +
6109 + */
6110 +
6111 +#include "common.h"
6112 +#include <xen/evtchn.h>
6113 +
6114 +static kmem_cache_t *blkif_cachep;
6115 +
6116 +blkif_t *tap_alloc_blkif(domid_t domid)
6117 +{
6118 + blkif_t *blkif;
6119 +
6120 + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
6121 + if (!blkif)
6122 + return ERR_PTR(-ENOMEM);
6123 +
6124 + memset(blkif, 0, sizeof(*blkif));
6125 + blkif->domid = domid;
6126 + spin_lock_init(&blkif->blk_ring_lock);
6127 + atomic_set(&blkif->refcnt, 1);
6128 + init_waitqueue_head(&blkif->wq);
6129 + blkif->st_print = jiffies;
6130 + init_waitqueue_head(&blkif->waiting_to_free);
6131 +
6132 + return blkif;
6133 +}
6134 +
6135 +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
6136 +{
6137 + struct gnttab_map_grant_ref op;
6138 +
6139 + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
6140 + GNTMAP_host_map, shared_page, blkif->domid);
6141 +
6142 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
6143 + BUG();
6144 +
6145 + if (op.status) {
6146 + DPRINTK(" Grant table operation failure !\n");
6147 + return op.status;
6148 + }
6149 +
6150 + blkif->shmem_ref = shared_page;
6151 + blkif->shmem_handle = op.handle;
6152 +
6153 + return 0;
6154 +}
6155 +
6156 +static void unmap_frontend_page(blkif_t *blkif)
6157 +{
6158 + struct gnttab_unmap_grant_ref op;
6159 +
6160 + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
6161 + GNTMAP_host_map, blkif->shmem_handle);
6162 +
6163 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
6164 + BUG();
6165 +}
6166 +
6167 +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
6168 + unsigned int evtchn)
6169 +{
6170 + int err;
6171 +
6172 + /* Already connected through? */
6173 + if (blkif->irq)
6174 + return 0;
6175 +
6176 + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
6177 + return -ENOMEM;
6178 +
6179 + err = map_frontend_page(blkif, shared_page);
6180 + if (err) {
6181 + free_vm_area(blkif->blk_ring_area);
6182 + return err;
6183 + }
6184 +
6185 + switch (blkif->blk_protocol) {
6186 + case BLKIF_PROTOCOL_NATIVE:
6187 + {
6188 + blkif_sring_t *sring;
6189 + sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
6190 + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
6191 + break;
6192 + }
6193 + case BLKIF_PROTOCOL_X86_32:
6194 + {
6195 + blkif_x86_32_sring_t *sring_x86_32;
6196 + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
6197 + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
6198 + break;
6199 + }
6200 + case BLKIF_PROTOCOL_X86_64:
6201 + {
6202 + blkif_x86_64_sring_t *sring_x86_64;
6203 + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
6204 + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
6205 + break;
6206 + }
6207 + default:
6208 + BUG();
6209 + }
6210 +
6211 + err = bind_interdomain_evtchn_to_irqhandler(
6212 + blkif->domid, evtchn, tap_blkif_be_int,
6213 + 0, "blkif-backend", blkif);
6214 + if (err < 0) {
6215 + unmap_frontend_page(blkif);
6216 + free_vm_area(blkif->blk_ring_area);
6217 + blkif->blk_rings.common.sring = NULL;
6218 + return err;
6219 + }
6220 + blkif->irq = err;
6221 +
6222 + return 0;
6223 +}
6224 +
6225 +void tap_blkif_unmap(blkif_t *blkif)
6226 +{
6227 + if (blkif->irq) {
6228 + unbind_from_irqhandler(blkif->irq, blkif);
6229 + blkif->irq = 0;
6230 + }
6231 + if (blkif->blk_rings.common.sring) {
6232 + unmap_frontend_page(blkif);
6233 + free_vm_area(blkif->blk_ring_area);
6234 + blkif->blk_rings.common.sring = NULL;
6235 + }
6236 +}
6237 +
6238 +void tap_blkif_free(blkif_t *blkif)
6239 +{
6240 + atomic_dec(&blkif->refcnt);
6241 + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
6242 + atomic_inc(&blkif->refcnt);
6243 +
6244 + tap_blkif_unmap(blkif);
6245 +}
6246 +
6247 +void tap_blkif_kmem_cache_free(blkif_t *blkif)
6248 +{
6249 + if (!atomic_dec_and_test(&blkif->refcnt))
6250 + BUG();
6251 + kmem_cache_free(blkif_cachep, blkif);
6252 +}
6253 +
6254 +void __init tap_blkif_interface_init(void)
6255 +{
6256 + blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t),
6257 + 0, 0, NULL, NULL);
6258 +}
6259 Index: head-2008-11-25/drivers/xen/blktap/xenbus.c
6260 ===================================================================
6261 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6262 +++ head-2008-11-25/drivers/xen/blktap/xenbus.c 2008-09-15 13:40:15.000000000 +0200
6263 @@ -0,0 +1,479 @@
6264 +/* drivers/xen/blktap/xenbus.c
6265 + *
6266 + * Xenbus code for blktap
6267 + *
6268 + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
6269 + *
6270 + * Based on the blkback xenbus code:
6271 + *
6272 + * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
6273 + * Copyright (C) 2005 XenSource Ltd
6274 + *
6275 + * This program is free software; you can redistribute it and/or
6276 + * modify it under the terms of the GNU General Public License version 2
6277 + * as published by the Free Software Foundation; or, when distributed
6278 + * separately from the Linux kernel or incorporated into other
6279 + * software packages, subject to the following license:
6280 + *
6281 + * Permission is hereby granted, free of charge, to any person obtaining a copy
6282 + * of this source file (the "Software"), to deal in the Software without
6283 + * restriction, including without limitation the rights to use, copy, modify,
6284 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
6285 + * and to permit persons to whom the Software is furnished to do so, subject to
6286 + * the following conditions:
6287 + *
6288 + * The above copyright notice and this permission notice shall be included in
6289 + * all copies or substantial portions of the Software.
6290 + *
6291 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6292 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6293 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6294 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
6295 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
6296 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
6297 + * IN THE SOFTWARE.
6298 + */
6299 +
6300 +#include <stdarg.h>
6301 +#include <linux/module.h>
6302 +#include <linux/kthread.h>
6303 +#include <xen/xenbus.h>
6304 +#include "common.h"
6305 +
6306 +
6307 +struct backend_info
6308 +{
6309 + struct xenbus_device *dev;
6310 + blkif_t *blkif;
6311 + struct xenbus_watch backend_watch;
6312 + int xenbus_id;
6313 + int group_added;
6314 +};
6315 +
6316 +
6317 +static void connect(struct backend_info *);
6318 +static int connect_ring(struct backend_info *);
6319 +static int blktap_remove(struct xenbus_device *dev);
6320 +static int blktap_probe(struct xenbus_device *dev,
6321 + const struct xenbus_device_id *id);
6322 +static void tap_backend_changed(struct xenbus_watch *, const char **,
6323 + unsigned int);
6324 +static void tap_frontend_changed(struct xenbus_device *dev,
6325 + enum xenbus_state frontend_state);
6326 +
6327 +static int strsep_len(const char *str, char c, unsigned int len)
6328 +{
6329 + unsigned int i;
6330 +
6331 + for (i = 0; str[i]; i++)
6332 + if (str[i] == c) {
6333 + if (len == 0)
6334 + return i;
6335 + len--;
6336 + }
6337 + return (len == 0) ? i : -ERANGE;
6338 +}
6339 +
6340 +static long get_id(const char *str)
6341 +{
6342 + int len,end;
6343 + const char *ptr;
6344 + char *tptr, num[10];
6345 +
6346 + len = strsep_len(str, '/', 2);
6347 + end = strlen(str);
6348 + if ( (len < 0) || (end < 0) ) return -1;
6349 +
6350 + ptr = str + len + 1;
6351 + strncpy(num,ptr,end - len);
6352 + tptr = num + (end - (len + 1));
6353 + *tptr = '\0';
6354 + DPRINTK("Get_id called for %s (%s)\n",str,num);
6355 +
6356 + return simple_strtol(num, NULL, 10);
6357 +}
6358 +
6359 +static int blktap_name(blkif_t *blkif, char *buf)
6360 +{
6361 + char *devpath, *devname;
6362 + struct xenbus_device *dev = blkif->be->dev;
6363 +
6364 + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
6365 + if (IS_ERR(devpath))
6366 + return PTR_ERR(devpath);
6367 +
6368 + if ((devname = strstr(devpath, "/dev/")) != NULL)
6369 + devname += strlen("/dev/");
6370 + else
6371 + devname = devpath;
6372 +
6373 + snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname);
6374 + kfree(devpath);
6375 +
6376 + return 0;
6377 +}
6378 +
6379 +/****************************************************************
6380 + * sysfs interface for I/O requests of blktap device
6381 + */
6382 +
6383 +#define VBD_SHOW(name, format, args...) \
6384 + static ssize_t show_##name(struct device *_dev, \
6385 + struct device_attribute *attr, \
6386 + char *buf) \
6387 + { \
6388 + struct xenbus_device *dev = to_xenbus_device(_dev); \
6389 + struct backend_info *be = dev->dev.driver_data; \
6390 + \
6391 + return sprintf(buf, format, ##args); \
6392 + } \
6393 + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
6394 +
6395 +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
6396 +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
6397 +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
6398 +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
6399 +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
6400 +
6401 +static struct attribute *tapstat_attrs[] = {
6402 + &dev_attr_oo_req.attr,
6403 + &dev_attr_rd_req.attr,
6404 + &dev_attr_wr_req.attr,
6405 + &dev_attr_rd_sect.attr,
6406 + &dev_attr_wr_sect.attr,
6407 + NULL
6408 +};
6409 +
6410 +static struct attribute_group tapstat_group = {
6411 + .name = "statistics",
6412 + .attrs = tapstat_attrs,
6413 +};
6414 +
6415 +int xentap_sysfs_addif(struct xenbus_device *dev)
6416 +{
6417 + int err;
6418 + struct backend_info *be = dev->dev.driver_data;
6419 + err = sysfs_create_group(&dev->dev.kobj, &tapstat_group);
6420 + if (!err)
6421 + be->group_added = 1;
6422 + return err;
6423 +}
6424 +
6425 +void xentap_sysfs_delif(struct xenbus_device *dev)
6426 +{
6427 + struct backend_info *be = dev->dev.driver_data;
6428 + sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
6429 + be->group_added = 0;
6430 +}
6431 +
6432 +static int blktap_remove(struct xenbus_device *dev)
6433 +{
6434 + struct backend_info *be = dev->dev.driver_data;
6435 +
6436 + if (be->group_added)
6437 + xentap_sysfs_delif(be->dev);
6438 + if (be->backend_watch.node) {
6439 + unregister_xenbus_watch(&be->backend_watch);
6440 + kfree(be->backend_watch.node);
6441 + be->backend_watch.node = NULL;
6442 + }
6443 + if (be->blkif) {
6444 + if (be->blkif->xenblkd)
6445 + kthread_stop(be->blkif->xenblkd);
6446 + signal_tapdisk(be->blkif->dev_num);
6447 + tap_blkif_free(be->blkif);
6448 + tap_blkif_kmem_cache_free(be->blkif);
6449 + be->blkif = NULL;
6450 + }
6451 + kfree(be);
6452 + dev->dev.driver_data = NULL;
6453 + return 0;
6454 +}
6455 +
6456 +static void tap_update_blkif_status(blkif_t *blkif)
6457 +{
6458 + int err;
6459 + char name[TASK_COMM_LEN];
6460 +
6461 + /* Not ready to connect? */
6462 + if(!blkif->irq || !blkif->sectors) {
6463 + return;
6464 + }
6465 +
6466 + /* Already connected? */
6467 + if (blkif->be->dev->state == XenbusStateConnected)
6468 + return;
6469 +
6470 + /* Attempt to connect: exit if we fail to. */
6471 + connect(blkif->be);
6472 + if (blkif->be->dev->state != XenbusStateConnected)
6473 + return;
6474 +
6475 + err = blktap_name(blkif, name);
6476 + if (err) {
6477 + xenbus_dev_error(blkif->be->dev, err, "get blktap dev name");
6478 + return;
6479 + }
6480 +
6481 + if (!blkif->be->group_added) {
6482 + err = xentap_sysfs_addif(blkif->be->dev);
6483 + if (err) {
6484 + xenbus_dev_fatal(blkif->be->dev, err,
6485 + "creating sysfs entries");
6486 + return;
6487 + }
6488 + }
6489 +
6490 + blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name);
6491 + if (IS_ERR(blkif->xenblkd)) {
6492 + err = PTR_ERR(blkif->xenblkd);
6493 + blkif->xenblkd = NULL;
6494 + xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd");
6495 + WPRINTK("Error starting thread\n");
6496 + }
6497 +}
6498 +
6499 +/**
6500 + * Entry point to this code when a new device is created. Allocate
6501 + * the basic structures, and watch the store waiting for the
6502 + * user-space program to tell us the physical device info. Switch to
6503 + * InitWait.
6504 + */
6505 +static int blktap_probe(struct xenbus_device *dev,
6506 + const struct xenbus_device_id *id)
6507 +{
6508 + int err;
6509 + struct backend_info *be = kzalloc(sizeof(struct backend_info),
6510 + GFP_KERNEL);
6511 + if (!be) {
6512 + xenbus_dev_fatal(dev, -ENOMEM,
6513 + "allocating backend structure");
6514 + return -ENOMEM;
6515 + }
6516 +
6517 + be->dev = dev;
6518 + dev->dev.driver_data = be;
6519 + be->xenbus_id = get_id(dev->nodename);
6520 +
6521 + be->blkif = tap_alloc_blkif(dev->otherend_id);
6522 + if (IS_ERR(be->blkif)) {
6523 + err = PTR_ERR(be->blkif);
6524 + be->blkif = NULL;
6525 + xenbus_dev_fatal(dev, err, "creating block interface");
6526 + goto fail;
6527 + }
6528 +
6529 + /* setup back pointer */
6530 + be->blkif->be = be;
6531 + be->blkif->sectors = 0;
6532 +
6533 + /* set a watch on disk info, waiting for userspace to update details*/
6534 + err = xenbus_watch_path2(dev, dev->nodename, "info",
6535 + &be->backend_watch, tap_backend_changed);
6536 + if (err)
6537 + goto fail;
6538 +
6539 + err = xenbus_switch_state(dev, XenbusStateInitWait);
6540 + if (err)
6541 + goto fail;
6542 + return 0;
6543 +
6544 +fail:
6545 + DPRINTK("blktap probe failed\n");
6546 + blktap_remove(dev);
6547 + return err;
6548 +}
6549 +
6550 +
6551 +/**
6552 + * Callback received when the user space code has placed the device
6553 + * information in xenstore.
6554 + */
6555 +static void tap_backend_changed(struct xenbus_watch *watch,
6556 + const char **vec, unsigned int len)
6557 +{
6558 + int err;
6559 + unsigned long info;
6560 + struct backend_info *be
6561 + = container_of(watch, struct backend_info, backend_watch);
6562 + struct xenbus_device *dev = be->dev;
6563 +
6564 + /**
6565 + * Check to see whether userspace code has opened the image
6566 + * and written sector
6567 + * and disk info to xenstore
6568 + */
6569 + err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info,
6570 + NULL);
6571 + if (XENBUS_EXIST_ERR(err))
6572 + return;
6573 + if (err) {
6574 + xenbus_dev_error(dev, err, "getting info");
6575 + return;
6576 + }
6577 +
6578 + DPRINTK("Userspace update on disk info, %lu\n",info);
6579 +
6580 + err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu",
6581 + &be->blkif->sectors, NULL);
6582 +
6583 + /* Associate tap dev with domid*/
6584 + be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id,
6585 + be->blkif);
6586 + DPRINTK("Thread started for domid [%d], connecting disk\n",
6587 + be->blkif->dev_num);
6588 +
6589 + tap_update_blkif_status(be->blkif);
6590 +}
6591 +
6592 +/**
6593 + * Callback received when the frontend's state changes.
6594 + */
6595 +static void tap_frontend_changed(struct xenbus_device *dev,
6596 + enum xenbus_state frontend_state)
6597 +{
6598 + struct backend_info *be = dev->dev.driver_data;
6599 + int err;
6600 +
6601 + DPRINTK("\n");
6602 +
6603 + switch (frontend_state) {
6604 + case XenbusStateInitialising:
6605 + if (dev->state == XenbusStateClosed) {
6606 + printk(KERN_INFO "%s: %s: prepare for reconnect\n",
6607 + __FUNCTION__, dev->nodename);
6608 + xenbus_switch_state(dev, XenbusStateInitWait);
6609 + }
6610 + break;
6611 +
6612 + case XenbusStateInitialised:
6613 + case XenbusStateConnected:
6614 + /* Ensure we connect even when two watches fire in
6615 + close successsion and we miss the intermediate value
6616 + of frontend_state. */
6617 + if (dev->state == XenbusStateConnected)
6618 + break;
6619 +
6620 + err = connect_ring(be);
6621 + if (err)
6622 + break;
6623 + tap_update_blkif_status(be->blkif);
6624 + break;
6625 +
6626 + case XenbusStateClosing:
6627 + if (be->blkif->xenblkd) {
6628 + kthread_stop(be->blkif->xenblkd);
6629 + be->blkif->xenblkd = NULL;
6630 + }
6631 + tap_blkif_free(be->blkif);
6632 + xenbus_switch_state(dev, XenbusStateClosing);
6633 + break;
6634 +
6635 + case XenbusStateClosed:
6636 + xenbus_switch_state(dev, XenbusStateClosed);
6637 + if (xenbus_dev_is_online(dev))
6638 + break;
6639 + /* fall through if not online */
6640 + case XenbusStateUnknown:
6641 + device_unregister(&dev->dev);
6642 + break;
6643 +
6644 + default:
6645 + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
6646 + frontend_state);
6647 + break;
6648 + }
6649 +}
6650 +
6651 +
6652 +/**
6653 + * Switch to Connected state.
6654 + */
6655 +static void connect(struct backend_info *be)
6656 +{
6657 + int err;
6658 +
6659 + struct xenbus_device *dev = be->dev;
6660 +
6661 + err = xenbus_switch_state(dev, XenbusStateConnected);
6662 + if (err)
6663 + xenbus_dev_fatal(dev, err, "switching to Connected state",
6664 + dev->nodename);
6665 +
6666 + return;
6667 +}
6668 +
6669 +
6670 +static int connect_ring(struct backend_info *be)
6671 +{
6672 + struct xenbus_device *dev = be->dev;
6673 + unsigned long ring_ref;
6674 + unsigned int evtchn;
6675 + char protocol[64];
6676 + int err;
6677 +
6678 + DPRINTK("%s\n", dev->otherend);
6679 +
6680 + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
6681 + &ring_ref, "event-channel", "%u", &evtchn, NULL);
6682 + if (err) {
6683 + xenbus_dev_fatal(dev, err,
6684 + "reading %s/ring-ref and event-channel",
6685 + dev->otherend);
6686 + return err;
6687 + }
6688 +
6689 + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
6690 + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
6691 + "%63s", protocol, NULL);
6692 + if (err)
6693 + strcpy(protocol, "unspecified, assuming native");
6694 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
6695 + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
6696 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
6697 + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
6698 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
6699 + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
6700 + else {
6701 + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
6702 + return -1;
6703 + }
6704 + printk(KERN_INFO
6705 + "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
6706 + ring_ref, evtchn, be->blkif->blk_protocol, protocol);
6707 +
6708 + /* Map the shared frame, irq etc. */
6709 + err = tap_blkif_map(be->blkif, ring_ref, evtchn);
6710 + if (err) {
6711 + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
6712 + ring_ref, evtchn);
6713 + return err;
6714 + }
6715 +
6716 + return 0;
6717 +}
6718 +
6719 +
6720 +/* ** Driver Registration ** */
6721 +
6722 +
6723 +static const struct xenbus_device_id blktap_ids[] = {
6724 + { "tap" },
6725 + { "" }
6726 +};
6727 +
6728 +
6729 +static struct xenbus_driver blktap = {
6730 + .name = "tap",
6731 + .owner = THIS_MODULE,
6732 + .ids = blktap_ids,
6733 + .probe = blktap_probe,
6734 + .remove = blktap_remove,
6735 + .otherend_changed = tap_frontend_changed
6736 +};
6737 +
6738 +
6739 +void tap_blkif_xenbus_init(void)
6740 +{
6741 + xenbus_register_backend(&blktap);
6742 +}
6743 Index: head-2008-11-25/drivers/xen/char/Makefile
6744 ===================================================================
6745 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6746 +++ head-2008-11-25/drivers/xen/char/Makefile 2007-07-10 09:42:30.000000000 +0200
6747 @@ -0,0 +1 @@
6748 +obj-$(CONFIG_XEN_DEVMEM) := mem.o
6749 Index: head-2008-11-25/drivers/xen/char/mem.c
6750 ===================================================================
6751 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6752 +++ head-2008-11-25/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200
6753 @@ -0,0 +1,190 @@
6754 +/*
6755 + * Originally from linux/drivers/char/mem.c
6756 + *
6757 + * Copyright (C) 1991, 1992 Linus Torvalds
6758 + *
6759 + * Added devfs support.
6760 + * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
6761 + * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
6762 + */
6763 +
6764 +#include <linux/mm.h>
6765 +#include <linux/miscdevice.h>
6766 +#include <linux/slab.h>
6767 +#include <linux/vmalloc.h>
6768 +#include <linux/mman.h>
6769 +#include <linux/random.h>
6770 +#include <linux/init.h>
6771 +#include <linux/raw.h>
6772 +#include <linux/tty.h>
6773 +#include <linux/capability.h>
6774 +#include <linux/smp_lock.h>
6775 +#include <linux/ptrace.h>
6776 +#include <linux/device.h>
6777 +#include <asm/pgalloc.h>
6778 +#include <asm/uaccess.h>
6779 +#include <asm/io.h>
6780 +#include <asm/hypervisor.h>
6781 +
6782 +static inline int uncached_access(struct file *file)
6783 +{
6784 + if (file->f_flags & O_SYNC)
6785 + return 1;
6786 + /* Xen sets correct MTRR type on non-RAM for us. */
6787 + return 0;
6788 +}
6789 +
6790 +/*
6791 + * This funcion reads the *physical* memory. The f_pos points directly to the
6792 + * memory location.
6793 + */
6794 +static ssize_t read_mem(struct file * file, char __user * buf,
6795 + size_t count, loff_t *ppos)
6796 +{
6797 + unsigned long p = *ppos, ignored;
6798 + ssize_t read = 0, sz;
6799 + void __iomem *v;
6800 +
6801 + while (count > 0) {
6802 + /*
6803 + * Handle first page in case it's not aligned
6804 + */
6805 + if (-p & (PAGE_SIZE - 1))
6806 + sz = -p & (PAGE_SIZE - 1);
6807 + else
6808 + sz = PAGE_SIZE;
6809 +
6810 + sz = min_t(unsigned long, sz, count);
6811 +
6812 + v = ioremap(p, sz);
6813 + if (IS_ERR(v) || v == NULL) {
6814 + /*
6815 + * Some programs (e.g., dmidecode) groove off into
6816 + * weird RAM areas where no tables can possibly exist
6817 + * (because Xen will have stomped on them!). These
6818 + * programs get rather upset if we let them know that
6819 + * Xen failed their access, so we fake out a read of
6820 + * all zeroes.
6821 + */
6822 + if (clear_user(buf, count))
6823 + return -EFAULT;
6824 + read += count;
6825 + break;
6826 + }
6827 +
6828 + ignored = copy_to_user(buf, v, sz);
6829 + iounmap(v);
6830 + if (ignored)
6831 + return -EFAULT;
6832 + buf += sz;
6833 + p += sz;
6834 + count -= sz;
6835 + read += sz;
6836 + }
6837 +
6838 + *ppos += read;
6839 + return read;
6840 +}
6841 +
6842 +static ssize_t write_mem(struct file * file, const char __user * buf,
6843 + size_t count, loff_t *ppos)
6844 +{
6845 + unsigned long p = *ppos, ignored;
6846 + ssize_t written = 0, sz;
6847 + void __iomem *v;
6848 +
6849 + while (count > 0) {
6850 + /*
6851 + * Handle first page in case it's not aligned
6852 + */
6853 + if (-p & (PAGE_SIZE - 1))
6854 + sz = -p & (PAGE_SIZE - 1);
6855 + else
6856 + sz = PAGE_SIZE;
6857 +
6858 + sz = min_t(unsigned long, sz, count);
6859 +
6860 + v = ioremap(p, sz);
6861 + if (v == NULL)
6862 + break;
6863 + if (IS_ERR(v)) {
6864 + if (written == 0)
6865 + return PTR_ERR(v);
6866 + break;
6867 + }
6868 +
6869 + ignored = copy_from_user(v, buf, sz);
6870 + iounmap(v);
6871 + if (ignored) {
6872 + written += sz - ignored;
6873 + if (written)
6874 + break;
6875 + return -EFAULT;
6876 + }
6877 + buf += sz;
6878 + p += sz;
6879 + count -= sz;
6880 + written += sz;
6881 + }
6882 +
6883 + *ppos += written;
6884 + return written;
6885 +}
6886 +
6887 +#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
6888 +static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
6889 +{
6890 + size_t size = vma->vm_end - vma->vm_start;
6891 +
6892 + if (uncached_access(file))
6893 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
6894 +
6895 + /* We want to return the real error code, not EAGAIN. */
6896 + return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
6897 + size, vma->vm_page_prot, DOMID_IO);
6898 +}
6899 +#endif
6900 +
6901 +/*
6902 + * The memory devices use the full 32/64 bits of the offset, and so we cannot
6903 + * check against negative addresses: they are ok. The return value is weird,
6904 + * though, in that case (0).
6905 + *
6906 + * also note that seeking relative to the "end of file" isn't supported:
6907 + * it has no meaning, so it returns -EINVAL.
6908 + */
6909 +static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
6910 +{
6911 + loff_t ret;
6912 +
6913 + mutex_lock(&file->f_dentry->d_inode->i_mutex);
6914 + switch (orig) {
6915 + case 0:
6916 + file->f_pos = offset;
6917 + ret = file->f_pos;
6918 + force_successful_syscall_return();
6919 + break;
6920 + case 1:
6921 + file->f_pos += offset;
6922 + ret = file->f_pos;
6923 + force_successful_syscall_return();
6924 + break;
6925 + default:
6926 + ret = -EINVAL;
6927 + }
6928 + mutex_unlock(&file->f_dentry->d_inode->i_mutex);
6929 + return ret;
6930 +}
6931 +
6932 +static int open_mem(struct inode * inode, struct file * filp)
6933 +{
6934 + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
6935 +}
6936 +
6937 +const struct file_operations mem_fops = {
6938 + .llseek = memory_lseek,
6939 + .read = read_mem,
6940 + .write = write_mem,
6941 + .mmap = xen_mmap_mem,
6942 + .open = open_mem,
6943 +};
6944 Index: head-2008-11-25/drivers/xen/console/Makefile
6945 ===================================================================
6946 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6947 +++ head-2008-11-25/drivers/xen/console/Makefile 2007-06-12 13:13:44.000000000 +0200
6948 @@ -0,0 +1,2 @@
6949 +
6950 +obj-y := console.o xencons_ring.o
6951 Index: head-2008-11-25/drivers/xen/console/console.c
6952 ===================================================================
6953 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
6954 +++ head-2008-11-25/drivers/xen/console/console.c 2007-10-15 09:39:38.000000000 +0200
6955 @@ -0,0 +1,731 @@
6956 +/******************************************************************************
6957 + * console.c
6958 + *
6959 + * Virtual console driver.
6960 + *
6961 + * Copyright (c) 2002-2004, K A Fraser.
6962 + *
6963 + * This program is free software; you can redistribute it and/or
6964 + * modify it under the terms of the GNU General Public License version 2
6965 + * as published by the Free Software Foundation; or, when distributed
6966 + * separately from the Linux kernel or incorporated into other
6967 + * software packages, subject to the following license:
6968 + *
6969 + * Permission is hereby granted, free of charge, to any person obtaining a copy
6970 + * of this source file (the "Software"), to deal in the Software without
6971 + * restriction, including without limitation the rights to use, copy, modify,
6972 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
6973 + * and to permit persons to whom the Software is furnished to do so, subject to
6974 + * the following conditions:
6975 + *
6976 + * The above copyright notice and this permission notice shall be included in
6977 + * all copies or substantial portions of the Software.
6978 + *
6979 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6980 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6981 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6982 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
6983 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
6984 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
6985 + * IN THE SOFTWARE.
6986 + */
6987 +
6988 +#include <linux/version.h>
6989 +#include <linux/module.h>
6990 +#include <linux/errno.h>
6991 +#include <linux/signal.h>
6992 +#include <linux/sched.h>
6993 +#include <linux/interrupt.h>
6994 +#include <linux/tty.h>
6995 +#include <linux/tty_flip.h>
6996 +#include <linux/serial.h>
6997 +#include <linux/major.h>
6998 +#include <linux/ptrace.h>
6999 +#include <linux/ioport.h>
7000 +#include <linux/mm.h>
7001 +#include <linux/slab.h>
7002 +#include <linux/init.h>
7003 +#include <linux/console.h>
7004 +#include <linux/bootmem.h>
7005 +#include <linux/sysrq.h>
7006 +#include <linux/screen_info.h>
7007 +#include <linux/vt.h>
7008 +#include <asm/io.h>
7009 +#include <asm/irq.h>
7010 +#include <asm/uaccess.h>
7011 +#include <xen/interface/xen.h>
7012 +#include <xen/interface/event_channel.h>
7013 +#include <asm/hypervisor.h>
7014 +#include <xen/evtchn.h>
7015 +#include <xen/xenbus.h>
7016 +#include <xen/xencons.h>
7017 +
7018 +/*
7019 + * Modes:
7020 + * 'xencons=off' [XC_OFF]: Console is disabled.
7021 + * 'xencons=tty' [XC_TTY]: Console attached to '/dev/tty[0-9]+'.
7022 + * 'xencons=ttyS' [XC_SERIAL]: Console attached to '/dev/ttyS[0-9]+'.
7023 + * 'xencons=xvc' [XC_XVC]: Console attached to '/dev/xvc0'.
7024 + * default: XC_XVC
7025 + *
7026 + * NB. In mode XC_TTY, we create dummy consoles for tty2-63. This suppresses
7027 + * warnings from standard distro startup scripts.
7028 + */
7029 +static enum {
7030 + XC_OFF, XC_TTY, XC_SERIAL, XC_XVC
7031 +} xc_mode = XC_XVC;
7032 +static int xc_num = -1;
7033 +
7034 +/* /dev/xvc0 device number allocated by lanana.org. */
7035 +#define XEN_XVC_MAJOR 204
7036 +#define XEN_XVC_MINOR 191
7037 +
7038 +#ifdef CONFIG_MAGIC_SYSRQ
7039 +static unsigned long sysrq_requested;
7040 +extern int sysrq_enabled;
7041 +#endif
7042 +
7043 +static int __init xencons_setup(char *str)
7044 +{
7045 + char *q;
7046 + int n;
7047 + extern int console_use_vt;
7048 +
7049 + console_use_vt = 1;
7050 + if (!strncmp(str, "ttyS", 4)) {
7051 + xc_mode = XC_SERIAL;
7052 + str += 4;
7053 + } else if (!strncmp(str, "tty", 3)) {
7054 + xc_mode = XC_TTY;
7055 + str += 3;
7056 + console_use_vt = 0;
7057 + } else if (!strncmp(str, "xvc", 3)) {
7058 + xc_mode = XC_XVC;
7059 + str += 3;
7060 + } else if (!strncmp(str, "off", 3)) {
7061 + xc_mode = XC_OFF;
7062 + str += 3;
7063 + }
7064 +
7065 + n = simple_strtol(str, &q, 10);
7066 + if (q != str)
7067 + xc_num = n;
7068 +
7069 + return 1;
7070 +}
7071 +__setup("xencons=", xencons_setup);
7072 +
7073 +/* The kernel and user-land drivers share a common transmit buffer. */
7074 +static unsigned int wbuf_size = 4096;
7075 +#define WBUF_MASK(_i) ((_i)&(wbuf_size-1))
7076 +static char *wbuf;
7077 +static unsigned int wc, wp; /* write_cons, write_prod */
7078 +
7079 +static int __init xencons_bufsz_setup(char *str)
7080 +{
7081 + unsigned int goal;
7082 + goal = simple_strtoul(str, NULL, 0);
7083 + if (goal) {
7084 + goal = roundup_pow_of_two(goal);
7085 + if (wbuf_size < goal)
7086 + wbuf_size = goal;
7087 + }
7088 + return 1;
7089 +}
7090 +__setup("xencons_bufsz=", xencons_bufsz_setup);
7091 +
7092 +/* This lock protects accesses to the common transmit buffer. */
7093 +static DEFINE_SPINLOCK(xencons_lock);
7094 +
7095 +/* Common transmit-kick routine. */
7096 +static void __xencons_tx_flush(void);
7097 +
7098 +static struct tty_driver *xencons_driver;
7099 +
7100 +/******************** Kernel console driver ********************************/
7101 +
7102 +static void kcons_write(struct console *c, const char *s, unsigned int count)
7103 +{
7104 + int i = 0;
7105 + unsigned long flags;
7106 +
7107 + spin_lock_irqsave(&xencons_lock, flags);
7108 +
7109 + while (i < count) {
7110 + for (; i < count; i++) {
7111 + if ((wp - wc) >= (wbuf_size - 1))
7112 + break;
7113 + if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n')
7114 + wbuf[WBUF_MASK(wp++)] = '\r';
7115 + }
7116 +
7117 + __xencons_tx_flush();
7118 + }
7119 +
7120 + spin_unlock_irqrestore(&xencons_lock, flags);
7121 +}
7122 +
7123 +static void kcons_write_dom0(struct console *c, const char *s, unsigned int count)
7124 +{
7125 +
7126 + while (count > 0) {
7127 + int rc;
7128 + rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
7129 + if (rc <= 0)
7130 + break;
7131 + count -= rc;
7132 + s += rc;
7133 + }
7134 +}
7135 +
7136 +static struct tty_driver *kcons_device(struct console *c, int *index)
7137 +{
7138 + *index = 0;
7139 + return xencons_driver;
7140 +}
7141 +
7142 +static struct console kcons_info = {
7143 + .device = kcons_device,
7144 + .flags = CON_PRINTBUFFER | CON_ENABLED,
7145 + .index = -1,
7146 +};
7147 +
7148 +static int __init xen_console_init(void)
7149 +{
7150 + if (!is_running_on_xen())
7151 + goto out;
7152 +
7153 + if (is_initial_xendomain()) {
7154 + kcons_info.write = kcons_write_dom0;
7155 + } else {
7156 + if (!xen_start_info->console.domU.evtchn)
7157 + goto out;
7158 + kcons_info.write = kcons_write;
7159 + }
7160 +
7161 + switch (xc_mode) {
7162 + case XC_XVC:
7163 + strcpy(kcons_info.name, "xvc");
7164 + if (xc_num == -1)
7165 + xc_num = 0;
7166 + break;
7167 +
7168 + case XC_SERIAL:
7169 + strcpy(kcons_info.name, "ttyS");
7170 + if (xc_num == -1)
7171 + xc_num = 0;
7172 + break;
7173 +
7174 + case XC_TTY:
7175 + strcpy(kcons_info.name, "tty");
7176 + if (xc_num == -1)
7177 + xc_num = 1;
7178 + break;
7179 +
7180 + default:
7181 + goto out;
7182 + }
7183 +
7184 + wbuf = alloc_bootmem(wbuf_size);
7185 +
7186 + register_console(&kcons_info);
7187 +
7188 + out:
7189 + return 0;
7190 +}
7191 +console_initcall(xen_console_init);
7192 +
7193 +/*** Useful function for console debugging -- goes straight to Xen. ***/
7194 +asmlinkage int xprintk(const char *fmt, ...)
7195 +{
7196 + va_list args;
7197 + int printk_len;
7198 + static char printk_buf[1024];
7199 +
7200 + /* Emit the output into the temporary buffer */
7201 + va_start(args, fmt);
7202 + printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
7203 + va_end(args);
7204 +
7205 + /* Send the processed output directly to Xen. */
7206 + kcons_write_dom0(NULL, printk_buf, printk_len);
7207 +
7208 + return 0;
7209 +}
7210 +
7211 +/*** Forcibly flush console data before dying. ***/
7212 +void xencons_force_flush(void)
7213 +{
7214 + int sz;
7215 +
7216 + /* Emergency console is synchronous, so there's nothing to flush. */
7217 + if (!is_running_on_xen() ||
7218 + is_initial_xendomain() ||
7219 + !xen_start_info->console.domU.evtchn)
7220 + return;
7221 +
7222 + /* Spin until console data is flushed through to the daemon. */
7223 + while (wc != wp) {
7224 + int sent = 0;
7225 + if ((sz = wp - wc) == 0)
7226 + continue;
7227 + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
7228 + if (sent > 0)
7229 + wc += sent;
7230 + }
7231 +}
7232 +
7233 +
7234 +void __init dom0_init_screen_info(const struct dom0_vga_console_info *info, size_t size)
7235 +{
7236 + /* This is drawn from a dump from vgacon:startup in
7237 + * standard Linux. */
7238 + screen_info.orig_video_mode = 3;
7239 + screen_info.orig_video_isVGA = 1;
7240 + screen_info.orig_video_lines = 25;
7241 + screen_info.orig_video_cols = 80;
7242 + screen_info.orig_video_ega_bx = 3;
7243 + screen_info.orig_video_points = 16;
7244 + screen_info.orig_y = screen_info.orig_video_lines - 1;
7245 +
7246 + switch (info->video_type) {
7247 + case XEN_VGATYPE_TEXT_MODE_3:
7248 + if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
7249 + + sizeof(info->u.text_mode_3))
7250 + break;
7251 + screen_info.orig_video_lines = info->u.text_mode_3.rows;
7252 + screen_info.orig_video_cols = info->u.text_mode_3.columns;
7253 + screen_info.orig_x = info->u.text_mode_3.cursor_x;
7254 + screen_info.orig_y = info->u.text_mode_3.cursor_y;
7255 + screen_info.orig_video_points =
7256 + info->u.text_mode_3.font_height;
7257 + break;
7258 +
7259 + case XEN_VGATYPE_VESA_LFB:
7260 + if (size < offsetof(struct dom0_vga_console_info,
7261 + u.vesa_lfb.gbl_caps))
7262 + break;
7263 + screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
7264 + screen_info.lfb_width = info->u.vesa_lfb.width;
7265 + screen_info.lfb_height = info->u.vesa_lfb.height;
7266 + screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel;
7267 + screen_info.lfb_base = info->u.vesa_lfb.lfb_base;
7268 + screen_info.lfb_size = info->u.vesa_lfb.lfb_size;
7269 + screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line;
7270 + screen_info.red_size = info->u.vesa_lfb.red_size;
7271 + screen_info.red_pos = info->u.vesa_lfb.red_pos;
7272 + screen_info.green_size = info->u.vesa_lfb.green_size;
7273 + screen_info.green_pos = info->u.vesa_lfb.green_pos;
7274 + screen_info.blue_size = info->u.vesa_lfb.blue_size;
7275 + screen_info.blue_pos = info->u.vesa_lfb.blue_pos;
7276 + screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size;
7277 + screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos;
7278 + if (size >= offsetof(struct dom0_vga_console_info,
7279 + u.vesa_lfb.gbl_caps)
7280 + + sizeof(info->u.vesa_lfb.gbl_caps))
7281 + screen_info.capabilities = info->u.vesa_lfb.gbl_caps;
7282 + if (size >= offsetof(struct dom0_vga_console_info,
7283 + u.vesa_lfb.mode_attrs)
7284 + + sizeof(info->u.vesa_lfb.mode_attrs))
7285 + screen_info.vesa_attributes = info->u.vesa_lfb.mode_attrs;
7286 + break;
7287 + }
7288 +}
7289 +
7290 +
7291 +/******************** User-space console driver (/dev/console) ************/
7292 +
7293 +#define DRV(_d) (_d)
7294 +#define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \
7295 + ((_tty)->index != (xc_num - 1)))
7296 +
7297 +static struct termios *xencons_termios[MAX_NR_CONSOLES];
7298 +static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
7299 +static struct tty_struct *xencons_tty;
7300 +static int xencons_priv_irq;
7301 +static char x_char;
7302 +
7303 +void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
7304 +{
7305 + int i;
7306 + unsigned long flags;
7307 +
7308 + spin_lock_irqsave(&xencons_lock, flags);
7309 + if (xencons_tty == NULL)
7310 + goto out;
7311 +
7312 + for (i = 0; i < len; i++) {
7313 +#ifdef CONFIG_MAGIC_SYSRQ
7314 + if (sysrq_enabled) {
7315 + if (buf[i] == '\x0f') { /* ^O */
7316 + if (!sysrq_requested) {
7317 + sysrq_requested = jiffies;
7318 + continue; /* don't print sysrq key */
7319 + }
7320 + sysrq_requested = 0;
7321 + } else if (sysrq_requested) {
7322 + unsigned long sysrq_timeout =
7323 + sysrq_requested + HZ*2;
7324 + sysrq_requested = 0;
7325 + if (time_before(jiffies, sysrq_timeout)) {
7326 + spin_unlock_irqrestore(
7327 + &xencons_lock, flags);
7328 + handle_sysrq(
7329 + buf[i], regs, xencons_tty);
7330 + spin_lock_irqsave(
7331 + &xencons_lock, flags);
7332 + continue;
7333 + }
7334 + }
7335 + }
7336 +#endif
7337 + tty_insert_flip_char(xencons_tty, buf[i], 0);
7338 + }
7339 + tty_flip_buffer_push(xencons_tty);
7340 +
7341 + out:
7342 + spin_unlock_irqrestore(&xencons_lock, flags);
7343 +}
7344 +
7345 +static void __xencons_tx_flush(void)
7346 +{
7347 + int sent, sz, work_done = 0;
7348 +
7349 + if (x_char) {
7350 + if (is_initial_xendomain())
7351 + kcons_write_dom0(NULL, &x_char, 1);
7352 + else
7353 + while (x_char)
7354 + if (xencons_ring_send(&x_char, 1) == 1)
7355 + break;
7356 + x_char = 0;
7357 + work_done = 1;
7358 + }
7359 +
7360 + while (wc != wp) {
7361 + sz = wp - wc;
7362 + if (sz > (wbuf_size - WBUF_MASK(wc)))
7363 + sz = wbuf_size - WBUF_MASK(wc);
7364 + if (is_initial_xendomain()) {
7365 + kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
7366 + wc += sz;
7367 + } else {
7368 + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
7369 + if (sent == 0)
7370 + break;
7371 + wc += sent;
7372 + }
7373 + work_done = 1;
7374 + }
7375 +
7376 + if (work_done && (xencons_tty != NULL)) {
7377 + wake_up_interruptible(&xencons_tty->write_wait);
7378 + if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
7379 + (xencons_tty->ldisc.write_wakeup != NULL))
7380 + (xencons_tty->ldisc.write_wakeup)(xencons_tty);
7381 + }
7382 +}
7383 +
7384 +void xencons_tx(void)
7385 +{
7386 + unsigned long flags;
7387 +
7388 + spin_lock_irqsave(&xencons_lock, flags);
7389 + __xencons_tx_flush();
7390 + spin_unlock_irqrestore(&xencons_lock, flags);
7391 +}
7392 +
7393 +/* Privileged receive callback and transmit kicker. */
7394 +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
7395 + struct pt_regs *regs)
7396 +{
7397 + static char rbuf[16];
7398 + int l;
7399 +
7400 + while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
7401 + xencons_rx(rbuf, l, regs);
7402 +
7403 + xencons_tx();
7404 +
7405 + return IRQ_HANDLED;
7406 +}
7407 +
7408 +static int xencons_write_room(struct tty_struct *tty)
7409 +{
7410 + return wbuf_size - (wp - wc);
7411 +}
7412 +
7413 +static int xencons_chars_in_buffer(struct tty_struct *tty)
7414 +{
7415 + return wp - wc;
7416 +}
7417 +
7418 +static void xencons_send_xchar(struct tty_struct *tty, char ch)
7419 +{
7420 + unsigned long flags;
7421 +
7422 + if (DUMMY_TTY(tty))
7423 + return;
7424 +
7425 + spin_lock_irqsave(&xencons_lock, flags);
7426 + x_char = ch;
7427 + __xencons_tx_flush();
7428 + spin_unlock_irqrestore(&xencons_lock, flags);
7429 +}
7430 +
7431 +static void xencons_throttle(struct tty_struct *tty)
7432 +{
7433 + if (DUMMY_TTY(tty))
7434 + return;
7435 +
7436 + if (I_IXOFF(tty))
7437 + xencons_send_xchar(tty, STOP_CHAR(tty));
7438 +}
7439 +
7440 +static void xencons_unthrottle(struct tty_struct *tty)
7441 +{
7442 + if (DUMMY_TTY(tty))
7443 + return;
7444 +
7445 + if (I_IXOFF(tty)) {
7446 + if (x_char != 0)
7447 + x_char = 0;
7448 + else
7449 + xencons_send_xchar(tty, START_CHAR(tty));
7450 + }
7451 +}
7452 +
7453 +static void xencons_flush_buffer(struct tty_struct *tty)
7454 +{
7455 + unsigned long flags;
7456 +
7457 + if (DUMMY_TTY(tty))
7458 + return;
7459 +
7460 + spin_lock_irqsave(&xencons_lock, flags);
7461 + wc = wp = 0;
7462 + spin_unlock_irqrestore(&xencons_lock, flags);
7463 +}
7464 +
7465 +static inline int __xencons_put_char(int ch)
7466 +{
7467 + char _ch = (char)ch;
7468 + if ((wp - wc) == wbuf_size)
7469 + return 0;
7470 + wbuf[WBUF_MASK(wp++)] = _ch;
7471 + return 1;
7472 +}
7473 +
7474 +static int xencons_write(
7475 + struct tty_struct *tty,
7476 + const unsigned char *buf,
7477 + int count)
7478 +{
7479 + int i;
7480 + unsigned long flags;
7481 +
7482 + if (DUMMY_TTY(tty))
7483 + return count;
7484 +
7485 + spin_lock_irqsave(&xencons_lock, flags);
7486 +
7487 + for (i = 0; i < count; i++)
7488 + if (!__xencons_put_char(buf[i]))
7489 + break;
7490 +
7491 + if (i != 0)
7492 + __xencons_tx_flush();
7493 +
7494 + spin_unlock_irqrestore(&xencons_lock, flags);
7495 +
7496 + return i;
7497 +}
7498 +
7499 +static void xencons_put_char(struct tty_struct *tty, u_char ch)
7500 +{
7501 + unsigned long flags;
7502 +
7503 + if (DUMMY_TTY(tty))
7504 + return;
7505 +
7506 + spin_lock_irqsave(&xencons_lock, flags);
7507 + (void)__xencons_put_char(ch);
7508 + spin_unlock_irqrestore(&xencons_lock, flags);
7509 +}
7510 +
7511 +static void xencons_flush_chars(struct tty_struct *tty)
7512 +{
7513 + unsigned long flags;
7514 +
7515 + if (DUMMY_TTY(tty))
7516 + return;
7517 +
7518 + spin_lock_irqsave(&xencons_lock, flags);
7519 + __xencons_tx_flush();
7520 + spin_unlock_irqrestore(&xencons_lock, flags);
7521 +}
7522 +
7523 +static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
7524 +{
7525 + unsigned long orig_jiffies = jiffies;
7526 +
7527 + if (DUMMY_TTY(tty))
7528 + return;
7529 +
7530 + while (DRV(tty->driver)->chars_in_buffer(tty)) {
7531 + set_current_state(TASK_INTERRUPTIBLE);
7532 + schedule_timeout(1);
7533 + if (signal_pending(current))
7534 + break;
7535 + if (timeout && time_after(jiffies, orig_jiffies + timeout))
7536 + break;
7537 + }
7538 +
7539 + set_current_state(TASK_RUNNING);
7540 +}
7541 +
7542 +static int xencons_open(struct tty_struct *tty, struct file *filp)
7543 +{
7544 + unsigned long flags;
7545 +
7546 + if (DUMMY_TTY(tty))
7547 + return 0;
7548 +
7549 + spin_lock_irqsave(&xencons_lock, flags);
7550 + tty->driver_data = NULL;
7551 + if (xencons_tty == NULL)
7552 + xencons_tty = tty;
7553 + __xencons_tx_flush();
7554 + spin_unlock_irqrestore(&xencons_lock, flags);
7555 +
7556 + return 0;
7557 +}
7558 +
7559 +static void xencons_close(struct tty_struct *tty, struct file *filp)
7560 +{
7561 + unsigned long flags;
7562 +
7563 + if (DUMMY_TTY(tty))
7564 + return;
7565 +
7566 + mutex_lock(&tty_mutex);
7567 +
7568 + if (tty->count != 1) {
7569 + mutex_unlock(&tty_mutex);
7570 + return;
7571 + }
7572 +
7573 + /* Prevent other threads from re-opening this tty. */
7574 + set_bit(TTY_CLOSING, &tty->flags);
7575 + mutex_unlock(&tty_mutex);
7576 +
7577 + tty->closing = 1;
7578 + tty_wait_until_sent(tty, 0);
7579 + if (DRV(tty->driver)->flush_buffer != NULL)
7580 + DRV(tty->driver)->flush_buffer(tty);
7581 + if (tty->ldisc.flush_buffer != NULL)
7582 + tty->ldisc.flush_buffer(tty);
7583 + tty->closing = 0;
7584 + spin_lock_irqsave(&xencons_lock, flags);
7585 + xencons_tty = NULL;
7586 + spin_unlock_irqrestore(&xencons_lock, flags);
7587 +}
7588 +
7589 +static struct tty_operations xencons_ops = {
7590 + .open = xencons_open,
7591 + .close = xencons_close,
7592 + .write = xencons_write,
7593 + .write_room = xencons_write_room,
7594 + .put_char = xencons_put_char,
7595 + .flush_chars = xencons_flush_chars,
7596 + .chars_in_buffer = xencons_chars_in_buffer,
7597 + .send_xchar = xencons_send_xchar,
7598 + .flush_buffer = xencons_flush_buffer,
7599 + .throttle = xencons_throttle,
7600 + .unthrottle = xencons_unthrottle,
7601 + .wait_until_sent = xencons_wait_until_sent,
7602 +};
7603 +
7604 +static int __init xencons_init(void)
7605 +{
7606 + int rc;
7607 +
7608 + if (!is_running_on_xen())
7609 + return -ENODEV;
7610 +
7611 + if (xc_mode == XC_OFF)
7612 + return 0;
7613 +
7614 + if (!is_initial_xendomain()) {
7615 + rc = xencons_ring_init();
7616 + if (rc)
7617 + return rc;
7618 + }
7619 +
7620 + xencons_driver = alloc_tty_driver((xc_mode == XC_TTY) ?
7621 + MAX_NR_CONSOLES : 1);
7622 + if (xencons_driver == NULL)
7623 + return -ENOMEM;
7624 +
7625 + DRV(xencons_driver)->name = "xencons";
7626 + DRV(xencons_driver)->major = TTY_MAJOR;
7627 + DRV(xencons_driver)->type = TTY_DRIVER_TYPE_SERIAL;
7628 + DRV(xencons_driver)->subtype = SERIAL_TYPE_NORMAL;
7629 + DRV(xencons_driver)->init_termios = tty_std_termios;
7630 + DRV(xencons_driver)->flags =
7631 + TTY_DRIVER_REAL_RAW |
7632 + TTY_DRIVER_RESET_TERMIOS;
7633 + DRV(xencons_driver)->termios = xencons_termios;
7634 + DRV(xencons_driver)->termios_locked = xencons_termios_locked;
7635 +
7636 + switch (xc_mode) {
7637 + case XC_XVC:
7638 + DRV(xencons_driver)->name = "xvc";
7639 + DRV(xencons_driver)->major = XEN_XVC_MAJOR;
7640 + DRV(xencons_driver)->minor_start = XEN_XVC_MINOR;
7641 + DRV(xencons_driver)->name_base = xc_num;
7642 + break;
7643 + case XC_SERIAL:
7644 + DRV(xencons_driver)->name = "ttyS";
7645 + DRV(xencons_driver)->minor_start = 64 + xc_num;
7646 + DRV(xencons_driver)->name_base = xc_num;
7647 + break;
7648 + default:
7649 + DRV(xencons_driver)->name = "tty";
7650 + DRV(xencons_driver)->minor_start = 1;
7651 + DRV(xencons_driver)->name_base = 1;
7652 + break;
7653 + }
7654 +
7655 + tty_set_operations(xencons_driver, &xencons_ops);
7656 +
7657 + if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) {
7658 + printk("WARNING: Failed to register Xen virtual "
7659 + "console driver as '%s%d'\n",
7660 + DRV(xencons_driver)->name,
7661 + DRV(xencons_driver)->name_base);
7662 + put_tty_driver(xencons_driver);
7663 + xencons_driver = NULL;
7664 + return rc;
7665 + }
7666 +
7667 + if (is_initial_xendomain()) {
7668 + xencons_priv_irq = bind_virq_to_irqhandler(
7669 + VIRQ_CONSOLE,
7670 + 0,
7671 + xencons_priv_interrupt,
7672 + 0,
7673 + "console",
7674 + NULL);
7675 + BUG_ON(xencons_priv_irq < 0);
7676 + }
7677 +
7678 + printk("Xen virtual console successfully installed as %s%d\n",
7679 + DRV(xencons_driver)->name, xc_num);
7680 +
7681 + return 0;
7682 +}
7683 +
7684 +module_init(xencons_init);
7685 +
7686 +MODULE_LICENSE("Dual BSD/GPL");
7687 Index: head-2008-11-25/drivers/xen/console/xencons_ring.c
7688 ===================================================================
7689 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
7690 +++ head-2008-11-25/drivers/xen/console/xencons_ring.c 2007-06-12 13:13:44.000000000 +0200
7691 @@ -0,0 +1,143 @@
7692 +/*
7693 + * This program is free software; you can redistribute it and/or
7694 + * modify it under the terms of the GNU General Public License version 2
7695 + * as published by the Free Software Foundation; or, when distributed
7696 + * separately from the Linux kernel or incorporated into other
7697 + * software packages, subject to the following license:
7698 + *
7699 + * Permission is hereby granted, free of charge, to any person obtaining a copy
7700 + * of this source file (the "Software"), to deal in the Software without
7701 + * restriction, including without limitation the rights to use, copy, modify,
7702 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
7703 + * and to permit persons to whom the Software is furnished to do so, subject to
7704 + * the following conditions:
7705 + *
7706 + * The above copyright notice and this permission notice shall be included in
7707 + * all copies or substantial portions of the Software.
7708 + *
7709 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7710 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7711 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7712 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7713 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
7714 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
7715 + * IN THE SOFTWARE.
7716 + */
7717 +
7718 +#include <linux/version.h>
7719 +#include <linux/module.h>
7720 +#include <linux/errno.h>
7721 +#include <linux/signal.h>
7722 +#include <linux/sched.h>
7723 +#include <linux/interrupt.h>
7724 +#include <linux/tty.h>
7725 +#include <linux/tty_flip.h>
7726 +#include <linux/serial.h>
7727 +#include <linux/major.h>
7728 +#include <linux/ptrace.h>
7729 +#include <linux/ioport.h>
7730 +#include <linux/mm.h>
7731 +#include <linux/slab.h>
7732 +
7733 +#include <asm/hypervisor.h>
7734 +#include <xen/evtchn.h>
7735 +#include <xen/xencons.h>
7736 +#include <linux/wait.h>
7737 +#include <linux/interrupt.h>
7738 +#include <linux/sched.h>
7739 +#include <linux/err.h>
7740 +#include <xen/interface/io/console.h>
7741 +
7742 +static int xencons_irq;
7743 +
7744 +static inline struct xencons_interface *xencons_interface(void)
7745 +{
7746 + return mfn_to_virt(xen_start_info->console.domU.mfn);
7747 +}
7748 +
7749 +static inline void notify_daemon(void)
7750 +{
7751 + /* Use evtchn: this is called early, before irq is set up. */
7752 + notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
7753 +}
7754 +
7755 +int xencons_ring_send(const char *data, unsigned len)
7756 +{
7757 + int sent = 0;
7758 + struct xencons_interface *intf = xencons_interface();
7759 + XENCONS_RING_IDX cons, prod;
7760 +
7761 + cons = intf->out_cons;
7762 + prod = intf->out_prod;
7763 + mb();
7764 + BUG_ON((prod - cons) > sizeof(intf->out));
7765 +
7766 + while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
7767 + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
7768 +
7769 + wmb();
7770 + intf->out_prod = prod;
7771 +
7772 + notify_daemon();
7773 +
7774 + return sent;
7775 +}
7776 +
7777 +static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
7778 +{
7779 + struct xencons_interface *intf = xencons_interface();
7780 + XENCONS_RING_IDX cons, prod;
7781 +
7782 + cons = intf->in_cons;
7783 + prod = intf->in_prod;
7784 + mb();
7785 + BUG_ON((prod - cons) > sizeof(intf->in));
7786 +
7787 + while (cons != prod) {
7788 + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
7789 + cons++;
7790 + }
7791 +
7792 + mb();
7793 + intf->in_cons = cons;
7794 +
7795 + notify_daemon();
7796 +
7797 + xencons_tx();
7798 +
7799 + return IRQ_HANDLED;
7800 +}
7801 +
7802 +int xencons_ring_init(void)
7803 +{
7804 + int irq;
7805 +
7806 + if (xencons_irq)
7807 + unbind_from_irqhandler(xencons_irq, NULL);
7808 + xencons_irq = 0;
7809 +
7810 + if (!is_running_on_xen() ||
7811 + is_initial_xendomain() ||
7812 + !xen_start_info->console.domU.evtchn)
7813 + return -ENODEV;
7814 +
7815 + irq = bind_caller_port_to_irqhandler(
7816 + xen_start_info->console.domU.evtchn,
7817 + handle_input, 0, "xencons", NULL);
7818 + if (irq < 0) {
7819 + printk(KERN_ERR "XEN console request irq failed %i\n", irq);
7820 + return irq;
7821 + }
7822 +
7823 + xencons_irq = irq;
7824 +
7825 + /* In case we have in-flight data after save/restore... */
7826 + notify_daemon();
7827 +
7828 + return 0;
7829 +}
7830 +
7831 +void xencons_resume(void)
7832 +{
7833 + (void)xencons_ring_init();
7834 +}
7835 Index: head-2008-11-25/drivers/xen/core/Makefile
7836 ===================================================================
7837 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
7838 +++ head-2008-11-25/drivers/xen/core/Makefile 2008-07-21 11:00:33.000000000 +0200
7839 @@ -0,0 +1,14 @@
7840 +#
7841 +# Makefile for the linux kernel.
7842 +#
7843 +
7844 +obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o firmware.o
7845 +
7846 +obj-$(CONFIG_PCI) += pci.o
7847 +obj-$(CONFIG_PROC_FS) += xen_proc.o
7848 +obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor_sysfs.o
7849 +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
7850 +obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
7851 +obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
7852 +obj-$(CONFIG_KEXEC) += machine_kexec.o
7853 +obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
7854 Index: head-2008-11-25/drivers/xen/core/cpu_hotplug.c
7855 ===================================================================
7856 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
7857 +++ head-2008-11-25/drivers/xen/core/cpu_hotplug.c 2008-01-21 11:15:26.000000000 +0100
7858 @@ -0,0 +1,173 @@
7859 +#include <linux/init.h>
7860 +#include <linux/kernel.h>
7861 +#include <linux/sched.h>
7862 +#include <linux/notifier.h>
7863 +#include <linux/cpu.h>
7864 +#include <xen/cpu_hotplug.h>
7865 +#include <xen/xenbus.h>
7866 +
7867 +/*
7868 + * Set of CPUs that remote admin software will allow us to bring online.
7869 + * Notified to us via xenbus.
7870 + */
7871 +static cpumask_t xenbus_allowed_cpumask;
7872 +
7873 +/* Set of CPUs that local admin will allow us to bring online. */
7874 +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
7875 +
7876 +static int local_cpu_hotplug_request(void)
7877 +{
7878 + /*
7879 + * We assume a CPU hotplug request comes from local admin if it is made
7880 + * via a userspace process (i.e., one with a real mm_struct).
7881 + */
7882 + return (current->mm != NULL);
7883 +}
7884 +
7885 +static void vcpu_hotplug(unsigned int cpu)
7886 +{
7887 + int err;
7888 + char dir[32], state[32];
7889 +
7890 + if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
7891 + return;
7892 +
7893 + sprintf(dir, "cpu/%u", cpu);
7894 + err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
7895 + if (err != 1) {
7896 + printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
7897 + return;
7898 + }
7899 +
7900 + if (strcmp(state, "online") == 0) {
7901 + cpu_set(cpu, xenbus_allowed_cpumask);
7902 + (void)cpu_up(cpu);
7903 + } else if (strcmp(state, "offline") == 0) {
7904 + cpu_clear(cpu, xenbus_allowed_cpumask);
7905 + (void)cpu_down(cpu);
7906 + } else {
7907 + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
7908 + state, cpu);
7909 + }
7910 +}
7911 +
7912 +static void handle_vcpu_hotplug_event(
7913 + struct xenbus_watch *watch, const char **vec, unsigned int len)
7914 +{
7915 + unsigned int cpu;
7916 + char *cpustr;
7917 + const char *node = vec[XS_WATCH_PATH];
7918 +
7919 + if ((cpustr = strstr(node, "cpu/")) != NULL) {
7920 + sscanf(cpustr, "cpu/%u", &cpu);
7921 + vcpu_hotplug(cpu);
7922 + }
7923 +}
7924 +
7925 +static int smpboot_cpu_notify(struct notifier_block *notifier,
7926 + unsigned long action, void *hcpu)
7927 +{
7928 + unsigned int cpu = (long)hcpu;
7929 +
7930 + /*
7931 + * We do this in a callback notifier rather than __cpu_disable()
7932 + * because local_cpu_hotplug_request() does not work in the latter
7933 + * as it's always executed from within a stopmachine kthread.
7934 + */
7935 + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
7936 + cpu_clear(cpu, local_allowed_cpumask);
7937 +
7938 + return NOTIFY_OK;
7939 +}
7940 +
7941 +static int setup_cpu_watcher(struct notifier_block *notifier,
7942 + unsigned long event, void *data)
7943 +{
7944 + unsigned int i;
7945 +
7946 + static struct xenbus_watch cpu_watch = {
7947 + .node = "cpu",
7948 + .callback = handle_vcpu_hotplug_event,
7949 + .flags = XBWF_new_thread };
7950 + (void)register_xenbus_watch(&cpu_watch);
7951 +
7952 + if (!is_initial_xendomain()) {
7953 + for_each_possible_cpu(i)
7954 + vcpu_hotplug(i);
7955 + printk(KERN_INFO "Brought up %ld CPUs\n",
7956 + (long)num_online_cpus());
7957 + }
7958 +
7959 + return NOTIFY_DONE;
7960 +}
7961 +
7962 +static int __init setup_vcpu_hotplug_event(void)
7963 +{
7964 + static struct notifier_block hotplug_cpu = {
7965 + .notifier_call = smpboot_cpu_notify };
7966 + static struct notifier_block xsn_cpu = {
7967 + .notifier_call = setup_cpu_watcher };
7968 +
7969 + if (!is_running_on_xen())
7970 + return -ENODEV;
7971 +
7972 + register_cpu_notifier(&hotplug_cpu);
7973 + register_xenstore_notifier(&xsn_cpu);
7974 +
7975 + return 0;
7976 +}
7977 +
7978 +arch_initcall(setup_vcpu_hotplug_event);
7979 +
7980 +int smp_suspend(void)
7981 +{
7982 + unsigned int cpu;
7983 + int err;
7984 +
7985 + for_each_online_cpu(cpu) {
7986 + if (cpu == 0)
7987 + continue;
7988 + err = cpu_down(cpu);
7989 + if (err) {
7990 + printk(KERN_CRIT "Failed to take all CPUs "
7991 + "down: %d.\n", err);
7992 + for_each_possible_cpu(cpu)
7993 + vcpu_hotplug(cpu);
7994 + return err;
7995 + }
7996 + }
7997 +
7998 + return 0;
7999 +}
8000 +
8001 +void smp_resume(void)
8002 +{
8003 + unsigned int cpu;
8004 +
8005 + for_each_possible_cpu(cpu)
8006 + vcpu_hotplug(cpu);
8007 +}
8008 +
8009 +int cpu_up_check(unsigned int cpu)
8010 +{
8011 + int rc = 0;
8012 +
8013 + if (local_cpu_hotplug_request()) {
8014 + cpu_set(cpu, local_allowed_cpumask);
8015 + if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
8016 + printk("%s: attempt to bring up CPU %u disallowed by "
8017 + "remote admin.\n", __FUNCTION__, cpu);
8018 + rc = -EBUSY;
8019 + }
8020 + } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
8021 + !cpu_isset(cpu, xenbus_allowed_cpumask)) {
8022 + rc = -EBUSY;
8023 + }
8024 +
8025 + return rc;
8026 +}
8027 +
8028 +void init_xenbus_allowed_cpumask(void)
8029 +{
8030 + xenbus_allowed_cpumask = cpu_present_map;
8031 +}
8032 Index: head-2008-11-25/drivers/xen/core/evtchn.c
8033 ===================================================================
8034 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
8035 +++ head-2008-11-25/drivers/xen/core/evtchn.c 2008-11-10 11:44:21.000000000 +0100
8036 @@ -0,0 +1,1140 @@
8037 +/******************************************************************************
8038 + * evtchn.c
8039 + *
8040 + * Communication via Xen event channels.
8041 + *
8042 + * Copyright (c) 2002-2005, K A Fraser
8043 + *
8044 + * This program is free software; you can redistribute it and/or
8045 + * modify it under the terms of the GNU General Public License version 2
8046 + * as published by the Free Software Foundation; or, when distributed
8047 + * separately from the Linux kernel or incorporated into other
8048 + * software packages, subject to the following license:
8049 + *
8050 + * Permission is hereby granted, free of charge, to any person obtaining a copy
8051 + * of this source file (the "Software"), to deal in the Software without
8052 + * restriction, including without limitation the rights to use, copy, modify,
8053 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
8054 + * and to permit persons to whom the Software is furnished to do so, subject to
8055 + * the following conditions:
8056 + *
8057 + * The above copyright notice and this permission notice shall be included in
8058 + * all copies or substantial portions of the Software.
8059 + *
8060 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8061 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
8062 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
8063 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
8064 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
8065 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
8066 + * IN THE SOFTWARE.
8067 + */
8068 +
8069 +#include <linux/module.h>
8070 +#include <linux/irq.h>
8071 +#include <linux/interrupt.h>
8072 +#include <linux/sched.h>
8073 +#include <linux/kernel_stat.h>
8074 +#include <linux/version.h>
8075 +#include <asm/atomic.h>
8076 +#include <asm/system.h>
8077 +#include <asm/ptrace.h>
8078 +#include <asm/synch_bitops.h>
8079 +#include <xen/evtchn.h>
8080 +#include <xen/interface/event_channel.h>
8081 +#include <xen/interface/physdev.h>
8082 +#include <asm/hypervisor.h>
8083 +#include <linux/mc146818rtc.h> /* RTC_IRQ */
8084 +
8085 +/*
8086 + * This lock protects updates to the following mapping and reference-count
8087 + * arrays. The lock does not need to be acquired to read the mapping tables.
8088 + */
8089 +static DEFINE_SPINLOCK(irq_mapping_update_lock);
8090 +
8091 +/* IRQ <-> event-channel mappings. */
8092 +static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
8093 + [0 ... NR_EVENT_CHANNELS-1] = -1 };
8094 +
8095 +/* Packed IRQ information: binding type, sub-type index, and event channel. */
8096 +static u32 irq_info[NR_IRQS];
8097 +
8098 +/* Binding types. */
8099 +enum {
8100 + IRQT_UNBOUND,
8101 + IRQT_PIRQ,
8102 + IRQT_VIRQ,
8103 + IRQT_IPI,
8104 + IRQT_LOCAL_PORT,
8105 + IRQT_CALLER_PORT,
8106 + _IRQT_COUNT
8107 +};
8108 +
8109 +#define _IRQT_BITS 4
8110 +#define _EVTCHN_BITS 12
8111 +#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS)
8112 +
8113 +/* Constructor for packed IRQ information. */
8114 +static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
8115 +{
8116 + BUILD_BUG_ON(_IRQT_COUNT > (1U << _IRQT_BITS));
8117 +
8118 + BUILD_BUG_ON(NR_PIRQS > (1U << _INDEX_BITS));
8119 + BUILD_BUG_ON(NR_VIRQS > (1U << _INDEX_BITS));
8120 + BUILD_BUG_ON(NR_IPIS > (1U << _INDEX_BITS));
8121 + BUG_ON(index >> _INDEX_BITS);
8122 +
8123 + BUILD_BUG_ON(NR_EVENT_CHANNELS > (1U << _EVTCHN_BITS));
8124 +
8125 + return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn);
8126 +}
8127 +
8128 +/* Convenient shorthand for packed representation of an unbound IRQ. */
8129 +#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
8130 +
8131 +/*
8132 + * Accessors for packed IRQ information.
8133 + */
8134 +
8135 +static inline unsigned int evtchn_from_irq(int irq)
8136 +{
8137 + return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1);
8138 +}
8139 +
8140 +static inline unsigned int index_from_irq(int irq)
8141 +{
8142 + return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
8143 +}
8144 +
8145 +static inline unsigned int type_from_irq(int irq)
8146 +{
8147 + return irq_info[irq] >> (32 - _IRQT_BITS);
8148 +}
8149 +
8150 +/* IRQ <-> VIRQ mapping. */
8151 +DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
8152 +
8153 +/* IRQ <-> IPI mapping. */
8154 +#ifndef NR_IPIS
8155 +#define NR_IPIS 1
8156 +#endif
8157 +DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
8158 +
8159 +/* Reference counts for bindings to IRQs. */
8160 +static int irq_bindcount[NR_IRQS];
8161 +
8162 +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
8163 +static DECLARE_BITMAP(pirq_needs_eoi, NR_PIRQS);
8164 +
8165 +#ifdef CONFIG_SMP
8166 +
8167 +static u8 cpu_evtchn[NR_EVENT_CHANNELS];
8168 +static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
8169 +
8170 +static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
8171 + unsigned int idx)
8172 +{
8173 + return (sh->evtchn_pending[idx] &
8174 + cpu_evtchn_mask[cpu][idx] &
8175 + ~sh->evtchn_mask[idx]);
8176 +}
8177 +
8178 +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
8179 +{
8180 + shared_info_t *s = HYPERVISOR_shared_info;
8181 + int irq = evtchn_to_irq[chn];
8182 +
8183 + BUG_ON(!test_bit(chn, s->evtchn_mask));
8184 +
8185 + if (irq != -1)
8186 + set_native_irq_info(irq, cpumask_of_cpu(cpu));
8187 +
8188 + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
8189 + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
8190 + cpu_evtchn[chn] = cpu;
8191 +}
8192 +
8193 +static void init_evtchn_cpu_bindings(void)
8194 +{
8195 + int i;
8196 +
8197 + /* By default all event channels notify CPU#0. */
8198 + for (i = 0; i < NR_IRQS; i++)
8199 + set_native_irq_info(i, cpumask_of_cpu(0));
8200 +
8201 + memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
8202 + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
8203 +}
8204 +
8205 +static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
8206 +{
8207 + return cpu_evtchn[evtchn];
8208 +}
8209 +
8210 +#else
8211 +
8212 +static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
8213 + unsigned int idx)
8214 +{
8215 + return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
8216 +}
8217 +
8218 +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
8219 +{
8220 +}
8221 +
8222 +static void init_evtchn_cpu_bindings(void)
8223 +{
8224 +}
8225 +
8226 +static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
8227 +{
8228 + return 0;
8229 +}
8230 +
8231 +#endif
8232 +
8233 +/* Upcall to generic IRQ layer. */
8234 +#ifdef CONFIG_X86
8235 +extern fastcall unsigned int do_IRQ(struct pt_regs *regs);
8236 +void __init xen_init_IRQ(void);
8237 +void __init init_IRQ(void)
8238 +{
8239 + irq_ctx_init(0);
8240 + xen_init_IRQ();
8241 +}
8242 +#if defined (__i386__)
8243 +static inline void exit_idle(void) {}
8244 +#define IRQ_REG orig_eax
8245 +#elif defined (__x86_64__)
8246 +#include <asm/idle.h>
8247 +#define IRQ_REG orig_rax
8248 +#endif
8249 +#define do_IRQ(irq, regs) do { \
8250 + (regs)->IRQ_REG = ~(irq); \
8251 + do_IRQ((regs)); \
8252 +} while (0)
8253 +#endif
8254 +
8255 +/* Xen will never allocate port zero for any purpose. */
8256 +#define VALID_EVTCHN(chn) ((chn) != 0)
8257 +
8258 +/*
8259 + * Force a proper event-channel callback from Xen after clearing the
8260 + * callback mask. We do this in a very simple manner, by making a call
8261 + * down into Xen. The pending flag will be checked by Xen on return.
8262 + */
8263 +void force_evtchn_callback(void)
8264 +{
8265 + VOID(HYPERVISOR_xen_version(0, NULL));
8266 +}
8267 +/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
8268 +EXPORT_SYMBOL(force_evtchn_callback);
8269 +
8270 +static DEFINE_PER_CPU(unsigned int, upcall_count) = { 0 };
8271 +static DEFINE_PER_CPU(unsigned int, last_processed_l1i) = { BITS_PER_LONG - 1 };
8272 +static DEFINE_PER_CPU(unsigned int, last_processed_l2i) = { BITS_PER_LONG - 1 };
8273 +
8274 +/* NB. Interrupts are disabled on entry. */
8275 +asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
8276 +{
8277 + unsigned long l1, l2;
8278 + unsigned long masked_l1, masked_l2;
8279 + unsigned int l1i, l2i, port, count;
8280 + int irq;
8281 + unsigned int cpu = smp_processor_id();
8282 + shared_info_t *s = HYPERVISOR_shared_info;
8283 + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
8284 +
8285 + exit_idle();
8286 + irq_enter();
8287 +
8288 + do {
8289 + /* Avoid a callback storm when we reenable delivery. */
8290 + vcpu_info->evtchn_upcall_pending = 0;
8291 +
8292 + /* Nested invocations bail immediately. */
8293 + if (unlikely(per_cpu(upcall_count, cpu)++))
8294 + break;
8295 +
8296 +#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
8297 + /* Clear master flag /before/ clearing selector flag. */
8298 + wmb();
8299 +#endif
8300 + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
8301 +
8302 + l1i = per_cpu(last_processed_l1i, cpu);
8303 + l2i = per_cpu(last_processed_l2i, cpu);
8304 +
8305 + while (l1 != 0) {
8306 +
8307 + l1i = (l1i + 1) % BITS_PER_LONG;
8308 + masked_l1 = l1 & ((~0UL) << l1i);
8309 +
8310 + if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */
8311 + l1i = BITS_PER_LONG - 1;
8312 + l2i = BITS_PER_LONG - 1;
8313 + continue;
8314 + }
8315 + l1i = __ffs(masked_l1);
8316 +
8317 + do {
8318 + l2 = active_evtchns(cpu, s, l1i);
8319 +
8320 + l2i = (l2i + 1) % BITS_PER_LONG;
8321 + masked_l2 = l2 & ((~0UL) << l2i);
8322 +
8323 + if (masked_l2 == 0) { /* if we masked out all events, move on */
8324 + l2i = BITS_PER_LONG - 1;
8325 + break;
8326 + }
8327 +
8328 + l2i = __ffs(masked_l2);
8329 +
8330 + /* process port */
8331 + port = (l1i * BITS_PER_LONG) + l2i;
8332 + if ((irq = evtchn_to_irq[port]) != -1)
8333 + do_IRQ(irq, regs);
8334 + else
8335 + evtchn_device_upcall(port);
8336 +
8337 + /* if this is the final port processed, we'll pick up here+1 next time */
8338 + per_cpu(last_processed_l1i, cpu) = l1i;
8339 + per_cpu(last_processed_l2i, cpu) = l2i;
8340 +
8341 + } while (l2i != BITS_PER_LONG - 1);
8342 +
8343 + l2 = active_evtchns(cpu, s, l1i);
8344 + if (l2 == 0) /* we handled all ports, so we can clear the selector bit */
8345 + l1 &= ~(1UL << l1i);
8346 +
8347 + }
8348 +
8349 + /* If there were nested callbacks then we have more to do. */
8350 + count = per_cpu(upcall_count, cpu);
8351 + per_cpu(upcall_count, cpu) = 0;
8352 + } while (unlikely(count != 1));
8353 +
8354 + irq_exit();
8355 +}
8356 +
8357 +static int find_unbound_irq(void)
8358 +{
8359 + static int warned;
8360 + int irq;
8361 +
8362 + for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++)
8363 + if (irq_bindcount[irq] == 0)
8364 + return irq;
8365 +
8366 + if (!warned) {
8367 + warned = 1;
8368 + printk(KERN_WARNING "No available IRQ to bind to: "
8369 + "increase NR_DYNIRQS.\n");
8370 + }
8371 +
8372 + return -ENOSPC;
8373 +}
8374 +
8375 +static int bind_caller_port_to_irq(unsigned int caller_port)
8376 +{
8377 + int irq;
8378 +
8379 + spin_lock(&irq_mapping_update_lock);
8380 +
8381 + if ((irq = evtchn_to_irq[caller_port]) == -1) {
8382 + if ((irq = find_unbound_irq()) < 0)
8383 + goto out;
8384 +
8385 + evtchn_to_irq[caller_port] = irq;
8386 + irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
8387 + }
8388 +
8389 + irq_bindcount[irq]++;
8390 +
8391 + out:
8392 + spin_unlock(&irq_mapping_update_lock);
8393 + return irq;
8394 +}
8395 +
8396 +static int bind_local_port_to_irq(unsigned int local_port)
8397 +{
8398 + int irq;
8399 +
8400 + spin_lock(&irq_mapping_update_lock);
8401 +
8402 + BUG_ON(evtchn_to_irq[local_port] != -1);
8403 +
8404 + if ((irq = find_unbound_irq()) < 0) {
8405 + struct evtchn_close close = { .port = local_port };
8406 + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
8407 + BUG();
8408 + goto out;
8409 + }
8410 +
8411 + evtchn_to_irq[local_port] = irq;
8412 + irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
8413 + irq_bindcount[irq]++;
8414 +
8415 + out:
8416 + spin_unlock(&irq_mapping_update_lock);
8417 + return irq;
8418 +}
8419 +
8420 +static int bind_listening_port_to_irq(unsigned int remote_domain)
8421 +{
8422 + struct evtchn_alloc_unbound alloc_unbound;
8423 + int err;
8424 +
8425 + alloc_unbound.dom = DOMID_SELF;
8426 + alloc_unbound.remote_dom = remote_domain;
8427 +
8428 + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
8429 + &alloc_unbound);
8430 +
8431 + return err ? : bind_local_port_to_irq(alloc_unbound.port);
8432 +}
8433 +
8434 +static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
8435 + unsigned int remote_port)
8436 +{
8437 + struct evtchn_bind_interdomain bind_interdomain;
8438 + int err;
8439 +
8440 + bind_interdomain.remote_dom = remote_domain;
8441 + bind_interdomain.remote_port = remote_port;
8442 +
8443 + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
8444 + &bind_interdomain);
8445 +
8446 + return err ? : bind_local_port_to_irq(bind_interdomain.local_port);
8447 +}
8448 +
8449 +static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
8450 +{
8451 + struct evtchn_bind_virq bind_virq;
8452 + int evtchn, irq;
8453 +
8454 + spin_lock(&irq_mapping_update_lock);
8455 +
8456 + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
8457 + if ((irq = find_unbound_irq()) < 0)
8458 + goto out;
8459 +
8460 + bind_virq.virq = virq;
8461 + bind_virq.vcpu = cpu;
8462 + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
8463 + &bind_virq) != 0)
8464 + BUG();
8465 + evtchn = bind_virq.port;
8466 +
8467 + evtchn_to_irq[evtchn] = irq;
8468 + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
8469 +
8470 + per_cpu(virq_to_irq, cpu)[virq] = irq;
8471 +
8472 + bind_evtchn_to_cpu(evtchn, cpu);
8473 + }
8474 +
8475 + irq_bindcount[irq]++;
8476 +
8477 + out:
8478 + spin_unlock(&irq_mapping_update_lock);
8479 + return irq;
8480 +}
8481 +
8482 +static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
8483 +{
8484 + struct evtchn_bind_ipi bind_ipi;
8485 + int evtchn, irq;
8486 +
8487 + spin_lock(&irq_mapping_update_lock);
8488 +
8489 + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
8490 + if ((irq = find_unbound_irq()) < 0)
8491 + goto out;
8492 +
8493 + bind_ipi.vcpu = cpu;
8494 + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
8495 + &bind_ipi) != 0)
8496 + BUG();
8497 + evtchn = bind_ipi.port;
8498 +
8499 + evtchn_to_irq[evtchn] = irq;
8500 + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
8501 +
8502 + per_cpu(ipi_to_irq, cpu)[ipi] = irq;
8503 +
8504 + bind_evtchn_to_cpu(evtchn, cpu);
8505 + }
8506 +
8507 + irq_bindcount[irq]++;
8508 +
8509 + out:
8510 + spin_unlock(&irq_mapping_update_lock);
8511 + return irq;
8512 +}
8513 +
8514 +static void unbind_from_irq(unsigned int irq)
8515 +{
8516 + struct evtchn_close close;
8517 + unsigned int cpu;
8518 + int evtchn = evtchn_from_irq(irq);
8519 +
8520 + spin_lock(&irq_mapping_update_lock);
8521 +
8522 + if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
8523 + close.port = evtchn;
8524 + if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
8525 + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
8526 + BUG();
8527 +
8528 + switch (type_from_irq(irq)) {
8529 + case IRQT_VIRQ:
8530 + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
8531 + [index_from_irq(irq)] = -1;
8532 + break;
8533 + case IRQT_IPI:
8534 + per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
8535 + [index_from_irq(irq)] = -1;
8536 + break;
8537 + default:
8538 + break;
8539 + }
8540 +
8541 + /* Closed ports are implicitly re-bound to VCPU0. */
8542 + bind_evtchn_to_cpu(evtchn, 0);
8543 +
8544 + evtchn_to_irq[evtchn] = -1;
8545 + irq_info[irq] = IRQ_UNBOUND;
8546 +
8547 + /* Zap stats across IRQ changes of use. */
8548 + for_each_possible_cpu(cpu)
8549 + kstat_cpu(cpu).irqs[irq] = 0;
8550 + }
8551 +
8552 + spin_unlock(&irq_mapping_update_lock);
8553 +}
8554 +
8555 +int bind_caller_port_to_irqhandler(
8556 + unsigned int caller_port,
8557 + irqreturn_t (*handler)(int, void *, struct pt_regs *),
8558 + unsigned long irqflags,
8559 + const char *devname,
8560 + void *dev_id)
8561 +{
8562 + int irq, retval;
8563 +
8564 + irq = bind_caller_port_to_irq(caller_port);
8565 + if (irq < 0)
8566 + return irq;
8567 +
8568 + retval = request_irq(irq, handler, irqflags, devname, dev_id);
8569 + if (retval != 0) {
8570 + unbind_from_irq(irq);
8571 + return retval;
8572 + }
8573 +
8574 + return irq;
8575 +}
8576 +EXPORT_SYMBOL_GPL(bind_caller_port_to_irqhandler);
8577 +
8578 +int bind_listening_port_to_irqhandler(
8579 + unsigned int remote_domain,
8580 + irqreturn_t (*handler)(int, void *, struct pt_regs *),
8581 + unsigned long irqflags,
8582 + const char *devname,
8583 + void *dev_id)
8584 +{
8585 + int irq, retval;
8586 +
8587 + irq = bind_listening_port_to_irq(remote_domain);
8588 + if (irq < 0)
8589 + return irq;
8590 +
8591 + retval = request_irq(irq, handler, irqflags, devname, dev_id);
8592 + if (retval != 0) {
8593 + unbind_from_irq(irq);
8594 + return retval;
8595 + }
8596 +
8597 + return irq;
8598 +}
8599 +EXPORT_SYMBOL_GPL(bind_listening_port_to_irqhandler);
8600 +
8601 +int bind_interdomain_evtchn_to_irqhandler(
8602 + unsigned int remote_domain,
8603 + unsigned int remote_port,
8604 + irqreturn_t (*handler)(int, void *, struct pt_regs *),
8605 + unsigned long irqflags,
8606 + const char *devname,
8607 + void *dev_id)
8608 +{
8609 + int irq, retval;
8610 +
8611 + irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
8612 + if (irq < 0)
8613 + return irq;
8614 +
8615 + retval = request_irq(irq, handler, irqflags, devname, dev_id);
8616 + if (retval != 0) {
8617 + unbind_from_irq(irq);
8618 + return retval;
8619 + }
8620 +
8621 + return irq;
8622 +}
8623 +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
8624 +
8625 +int bind_virq_to_irqhandler(
8626 + unsigned int virq,
8627 + unsigned int cpu,
8628 + irqreturn_t (*handler)(int, void *, struct pt_regs *),
8629 + unsigned long irqflags,
8630 + const char *devname,
8631 + void *dev_id)
8632 +{
8633 + int irq, retval;
8634 +
8635 + irq = bind_virq_to_irq(virq, cpu);
8636 + if (irq < 0)
8637 + return irq;
8638 +
8639 + retval = request_irq(irq, handler, irqflags, devname, dev_id);
8640 + if (retval != 0) {
8641 + unbind_from_irq(irq);
8642 + return retval;
8643 + }
8644 +
8645 + return irq;
8646 +}
8647 +EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
8648 +
8649 +int bind_ipi_to_irqhandler(
8650 + unsigned int ipi,
8651 + unsigned int cpu,
8652 + irqreturn_t (*handler)(int, void *, struct pt_regs *),
8653 + unsigned long irqflags,
8654 + const char *devname,
8655 + void *dev_id)
8656 +{
8657 + int irq, retval;
8658 +
8659 + irq = bind_ipi_to_irq(ipi, cpu);
8660 + if (irq < 0)
8661 + return irq;
8662 +
8663 + retval = request_irq(irq, handler, irqflags, devname, dev_id);
8664 + if (retval != 0) {
8665 + unbind_from_irq(irq);
8666 + return retval;
8667 + }
8668 +
8669 + return irq;
8670 +}
8671 +EXPORT_SYMBOL_GPL(bind_ipi_to_irqhandler);
8672 +
8673 +void unbind_from_irqhandler(unsigned int irq, void *dev_id)
8674 +{
8675 + free_irq(irq, dev_id);
8676 + unbind_from_irq(irq);
8677 +}
8678 +EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
8679 +
8680 +#ifdef CONFIG_SMP
8681 +void rebind_evtchn_to_cpu(int port, unsigned int cpu)
8682 +{
8683 + struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu };
8684 + int masked;
8685 +
8686 + masked = test_and_set_evtchn_mask(port);
8687 + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv) == 0)
8688 + bind_evtchn_to_cpu(port, cpu);
8689 + if (!masked)
8690 + unmask_evtchn(port);
8691 +}
8692 +
8693 +static void rebind_irq_to_cpu(unsigned int irq, unsigned int tcpu)
8694 +{
8695 + int evtchn = evtchn_from_irq(irq);
8696 +
8697 + if (VALID_EVTCHN(evtchn))
8698 + rebind_evtchn_to_cpu(evtchn, tcpu);
8699 +}
8700 +
8701 +static void set_affinity_irq(unsigned int irq, cpumask_t dest)
8702 +{
8703 + unsigned tcpu = first_cpu(dest);
8704 + rebind_irq_to_cpu(irq, tcpu);
8705 +}
8706 +#endif
8707 +
8708 +int resend_irq_on_evtchn(unsigned int irq)
8709 +{
8710 + int masked, evtchn = evtchn_from_irq(irq);
8711 + shared_info_t *s = HYPERVISOR_shared_info;
8712 +
8713 + if (!VALID_EVTCHN(evtchn))
8714 + return 1;
8715 +
8716 + masked = test_and_set_evtchn_mask(evtchn);
8717 + synch_set_bit(evtchn, s->evtchn_pending);
8718 + if (!masked)
8719 + unmask_evtchn(evtchn);
8720 +
8721 + return 1;
8722 +}
8723 +
8724 +/*
8725 + * Interface to generic handling in irq.c
8726 + */
8727 +
8728 +static unsigned int startup_dynirq(unsigned int irq)
8729 +{
8730 + int evtchn = evtchn_from_irq(irq);
8731 +
8732 + if (VALID_EVTCHN(evtchn))
8733 + unmask_evtchn(evtchn);
8734 + return 0;
8735 +}
8736 +
8737 +static void shutdown_dynirq(unsigned int irq)
8738 +{
8739 + int evtchn = evtchn_from_irq(irq);
8740 +
8741 + if (VALID_EVTCHN(evtchn))
8742 + mask_evtchn(evtchn);
8743 +}
8744 +
8745 +static void enable_dynirq(unsigned int irq)
8746 +{
8747 + int evtchn = evtchn_from_irq(irq);
8748 +
8749 + if (VALID_EVTCHN(evtchn))
8750 + unmask_evtchn(evtchn);
8751 +}
8752 +
8753 +static void disable_dynirq(unsigned int irq)
8754 +{
8755 + int evtchn = evtchn_from_irq(irq);
8756 +
8757 + if (VALID_EVTCHN(evtchn))
8758 + mask_evtchn(evtchn);
8759 +}
8760 +
8761 +static void ack_dynirq(unsigned int irq)
8762 +{
8763 + int evtchn = evtchn_from_irq(irq);
8764 +
8765 + move_native_irq(irq);
8766 +
8767 + if (VALID_EVTCHN(evtchn)) {
8768 + mask_evtchn(evtchn);
8769 + clear_evtchn(evtchn);
8770 + }
8771 +}
8772 +
8773 +static void end_dynirq(unsigned int irq)
8774 +{
8775 + int evtchn = evtchn_from_irq(irq);
8776 +
8777 + if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED))
8778 + unmask_evtchn(evtchn);
8779 +}
8780 +
8781 +static struct hw_interrupt_type dynirq_type = {
8782 + .typename = "Dynamic-irq",
8783 + .startup = startup_dynirq,
8784 + .shutdown = shutdown_dynirq,
8785 + .enable = enable_dynirq,
8786 + .disable = disable_dynirq,
8787 + .ack = ack_dynirq,
8788 + .end = end_dynirq,
8789 +#ifdef CONFIG_SMP
8790 + .set_affinity = set_affinity_irq,
8791 +#endif
8792 + .retrigger = resend_irq_on_evtchn,
8793 +};
8794 +
8795 +static inline void pirq_unmask_notify(int irq)
8796 +{
8797 + struct physdev_eoi eoi = { .irq = evtchn_get_xen_pirq(irq) };
8798 + if (unlikely(test_bit(irq - PIRQ_BASE, pirq_needs_eoi)))
8799 + VOID(HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi));
8800 +}
8801 +
8802 +static inline void pirq_query_unmask(int irq)
8803 +{
8804 + struct physdev_irq_status_query irq_status;
8805 + irq_status.irq = evtchn_get_xen_pirq(irq);
8806 + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
8807 + irq_status.flags = 0;
8808 + clear_bit(irq - PIRQ_BASE, pirq_needs_eoi);
8809 + if (irq_status.flags & XENIRQSTAT_needs_eoi)
8810 + set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
8811 +}
8812 +
8813 +/*
8814 + * On startup, if there is no action associated with the IRQ then we are
8815 + * probing. In this case we should not share with others as it will confuse us.
8816 + */
8817 +#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
8818 +
8819 +static unsigned int startup_pirq(unsigned int irq)
8820 +{
8821 + struct evtchn_bind_pirq bind_pirq;
8822 + int evtchn = evtchn_from_irq(irq);
8823 +
8824 + if (VALID_EVTCHN(evtchn))
8825 + goto out;
8826 +
8827 + bind_pirq.pirq = evtchn_get_xen_pirq(irq);
8828 + /* NB. We are happy to share unless we are probing. */
8829 + bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
8830 + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
8831 + if (!probing_irq(irq))
8832 + printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
8833 + irq);
8834 + return 0;
8835 + }
8836 + evtchn = bind_pirq.port;
8837 +
8838 + pirq_query_unmask(irq);
8839 +
8840 + evtchn_to_irq[evtchn] = irq;
8841 + bind_evtchn_to_cpu(evtchn, 0);
8842 + irq_info[irq] = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
8843 +
8844 + out:
8845 + unmask_evtchn(evtchn);
8846 + pirq_unmask_notify(irq);
8847 +
8848 + return 0;
8849 +}
8850 +
8851 +static void shutdown_pirq(unsigned int irq)
8852 +{
8853 + struct evtchn_close close;
8854 + int evtchn = evtchn_from_irq(irq);
8855 +
8856 + if (!VALID_EVTCHN(evtchn))
8857 + return;
8858 +
8859 + mask_evtchn(evtchn);
8860 +
8861 + close.port = evtchn;
8862 + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
8863 + BUG();
8864 +
8865 + bind_evtchn_to_cpu(evtchn, 0);
8866 + evtchn_to_irq[evtchn] = -1;
8867 + irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
8868 +}
8869 +
8870 +static void enable_pirq(unsigned int irq)
8871 +{
8872 + startup_pirq(irq);
8873 +}
8874 +
8875 +static void disable_pirq(unsigned int irq)
8876 +{
8877 +}
8878 +
8879 +static void ack_pirq(unsigned int irq)
8880 +{
8881 + int evtchn = evtchn_from_irq(irq);
8882 +
8883 + move_native_irq(irq);
8884 +
8885 + if (VALID_EVTCHN(evtchn)) {
8886 + mask_evtchn(evtchn);
8887 + clear_evtchn(evtchn);
8888 + }
8889 +}
8890 +
8891 +static void end_pirq(unsigned int irq)
8892 +{
8893 + int evtchn = evtchn_from_irq(irq);
8894 +
8895 + if ((irq_desc[irq].status & (IRQ_DISABLED|IRQ_PENDING)) ==
8896 + (IRQ_DISABLED|IRQ_PENDING)) {
8897 + shutdown_pirq(irq);
8898 + } else if (VALID_EVTCHN(evtchn)) {
8899 + unmask_evtchn(evtchn);
8900 + pirq_unmask_notify(irq);
8901 + }
8902 +}
8903 +
8904 +static struct hw_interrupt_type pirq_type = {
8905 + .typename = "Phys-irq",
8906 + .startup = startup_pirq,
8907 + .shutdown = shutdown_pirq,
8908 + .enable = enable_pirq,
8909 + .disable = disable_pirq,
8910 + .ack = ack_pirq,
8911 + .end = end_pirq,
8912 +#ifdef CONFIG_SMP
8913 + .set_affinity = set_affinity_irq,
8914 +#endif
8915 + .retrigger = resend_irq_on_evtchn,
8916 +};
8917 +
8918 +int irq_ignore_unhandled(unsigned int irq)
8919 +{
8920 + struct physdev_irq_status_query irq_status = { .irq = irq };
8921 +
8922 + if (!is_running_on_xen())
8923 + return 0;
8924 +
8925 + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
8926 + return 0;
8927 + return !!(irq_status.flags & XENIRQSTAT_shared);
8928 +}
8929 +
8930 +void notify_remote_via_irq(int irq)
8931 +{
8932 + int evtchn = evtchn_from_irq(irq);
8933 +
8934 + if (VALID_EVTCHN(evtchn))
8935 + notify_remote_via_evtchn(evtchn);
8936 +}
8937 +EXPORT_SYMBOL_GPL(notify_remote_via_irq);
8938 +
8939 +int irq_to_evtchn_port(int irq)
8940 +{
8941 + return evtchn_from_irq(irq);
8942 +}
8943 +EXPORT_SYMBOL_GPL(irq_to_evtchn_port);
8944 +
8945 +void mask_evtchn(int port)
8946 +{
8947 + shared_info_t *s = HYPERVISOR_shared_info;
8948 + synch_set_bit(port, s->evtchn_mask);
8949 +}
8950 +EXPORT_SYMBOL_GPL(mask_evtchn);
8951 +
8952 +void unmask_evtchn(int port)
8953 +{
8954 + shared_info_t *s = HYPERVISOR_shared_info;
8955 + unsigned int cpu = smp_processor_id();
8956 + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
8957 +
8958 + BUG_ON(!irqs_disabled());
8959 +
8960 + /* Slow path (hypercall) if this is a non-local port. */
8961 + if (unlikely(cpu != cpu_from_evtchn(port))) {
8962 + struct evtchn_unmask unmask = { .port = port };
8963 + VOID(HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask));
8964 + return;
8965 + }
8966 +
8967 + synch_clear_bit(port, s->evtchn_mask);
8968 +
8969 + /* Did we miss an interrupt 'edge'? Re-fire if so. */
8970 + if (synch_test_bit(port, s->evtchn_pending) &&
8971 + !synch_test_and_set_bit(port / BITS_PER_LONG,
8972 + &vcpu_info->evtchn_pending_sel))
8973 + vcpu_info->evtchn_upcall_pending = 1;
8974 +}
8975 +EXPORT_SYMBOL_GPL(unmask_evtchn);
8976 +
8977 +void disable_all_local_evtchn(void)
8978 +{
8979 + unsigned i, cpu = smp_processor_id();
8980 + shared_info_t *s = HYPERVISOR_shared_info;
8981 +
8982 + for (i = 0; i < NR_EVENT_CHANNELS; ++i)
8983 + if (cpu_from_evtchn(i) == cpu)
8984 + synch_set_bit(i, &s->evtchn_mask[0]);
8985 +}
8986 +
8987 +static void restore_cpu_virqs(unsigned int cpu)
8988 +{
8989 + struct evtchn_bind_virq bind_virq;
8990 + int virq, irq, evtchn;
8991 +
8992 + for (virq = 0; virq < NR_VIRQS; virq++) {
8993 + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
8994 + continue;
8995 +
8996 + BUG_ON(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0));
8997 +
8998 + /* Get a new binding from Xen. */
8999 + bind_virq.virq = virq;
9000 + bind_virq.vcpu = cpu;
9001 + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
9002 + &bind_virq) != 0)
9003 + BUG();
9004 + evtchn = bind_virq.port;
9005 +
9006 + /* Record the new mapping. */
9007 + evtchn_to_irq[evtchn] = irq;
9008 + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
9009 + bind_evtchn_to_cpu(evtchn, cpu);
9010 +
9011 + /* Ready for use. */
9012 + unmask_evtchn(evtchn);
9013 + }
9014 +}
9015 +
9016 +static void restore_cpu_ipis(unsigned int cpu)
9017 +{
9018 + struct evtchn_bind_ipi bind_ipi;
9019 + int ipi, irq, evtchn;
9020 +
9021 + for (ipi = 0; ipi < NR_IPIS; ipi++) {
9022 + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
9023 + continue;
9024 +
9025 + BUG_ON(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0));
9026 +
9027 + /* Get a new binding from Xen. */
9028 + bind_ipi.vcpu = cpu;
9029 + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
9030 + &bind_ipi) != 0)
9031 + BUG();
9032 + evtchn = bind_ipi.port;
9033 +
9034 + /* Record the new mapping. */
9035 + evtchn_to_irq[evtchn] = irq;
9036 + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
9037 + bind_evtchn_to_cpu(evtchn, cpu);
9038 +
9039 + /* Ready for use. */
9040 + unmask_evtchn(evtchn);
9041 +
9042 + }
9043 +}
9044 +
9045 +void irq_resume(void)
9046 +{
9047 + unsigned int cpu, irq, evtchn;
9048 +
9049 + init_evtchn_cpu_bindings();
9050 +
9051 + /* New event-channel space is not 'live' yet. */
9052 + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
9053 + mask_evtchn(evtchn);
9054 +
9055 + /* Check that no PIRQs are still bound. */
9056 + for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++)
9057 + BUG_ON(irq_info[irq] != IRQ_UNBOUND);
9058 +
9059 + /* No IRQ <-> event-channel mappings. */
9060 + for (irq = 0; irq < NR_IRQS; irq++)
9061 + irq_info[irq] &= ~((1U << _EVTCHN_BITS) - 1);
9062 + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
9063 + evtchn_to_irq[evtchn] = -1;
9064 +
9065 + for_each_possible_cpu(cpu) {
9066 + restore_cpu_virqs(cpu);
9067 + restore_cpu_ipis(cpu);
9068 + }
9069 +
9070 +}
9071 +
9072 +#if defined(CONFIG_X86_IO_APIC)
9073 +#define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE))
9074 +#elif defined(CONFIG_X86)
9075 +#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < 16)
9076 +#else
9077 +#define identity_mapped_irq(irq) (1)
9078 +#endif
9079 +
9080 +void evtchn_register_pirq(int irq)
9081 +{
9082 + BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS);
9083 + if (identity_mapped_irq(irq))
9084 + return;
9085 + irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
9086 + irq_desc[irq].chip = &pirq_type;
9087 +}
9088 +
9089 +int evtchn_map_pirq(int irq, int xen_pirq)
9090 +{
9091 + if (irq < 0) {
9092 + static DEFINE_SPINLOCK(irq_alloc_lock);
9093 +
9094 + irq = PIRQ_BASE + NR_PIRQS - 1;
9095 + spin_lock(&irq_alloc_lock);
9096 + do {
9097 + if (identity_mapped_irq(irq))
9098 + continue;
9099 + if (!index_from_irq(irq)) {
9100 + BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
9101 + irq_info[irq] = mk_irq_info(IRQT_PIRQ,
9102 + xen_pirq, 0);
9103 + break;
9104 + }
9105 + } while (--irq >= PIRQ_BASE);
9106 + spin_unlock(&irq_alloc_lock);
9107 + if (irq < PIRQ_BASE)
9108 + return -ENOSPC;
9109 + irq_desc[irq].chip = &pirq_type;
9110 + } else if (!xen_pirq) {
9111 + if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
9112 + return -EINVAL;
9113 + irq_desc[irq].chip = &no_irq_type;
9114 + irq_info[irq] = IRQ_UNBOUND;
9115 + return 0;
9116 + } else if (type_from_irq(irq) != IRQT_PIRQ
9117 + || index_from_irq(irq) != xen_pirq) {
9118 + printk(KERN_ERR "IRQ#%d is already mapped to %d:%u - "
9119 + "cannot map to PIRQ#%u\n",
9120 + irq, type_from_irq(irq), index_from_irq(irq), xen_pirq);
9121 + return -EINVAL;
9122 + }
9123 + return index_from_irq(irq) ? irq : -EINVAL;
9124 +}
9125 +
9126 +int evtchn_get_xen_pirq(int irq)
9127 +{
9128 + if (identity_mapped_irq(irq))
9129 + return irq;
9130 + BUG_ON(type_from_irq(irq) != IRQT_PIRQ);
9131 + return index_from_irq(irq);
9132 +}
9133 +
9134 +void __init xen_init_IRQ(void)
9135 +{
9136 + unsigned int i;
9137 +
9138 + init_evtchn_cpu_bindings();
9139 +
9140 + /* No event channels are 'live' right now. */
9141 + for (i = 0; i < NR_EVENT_CHANNELS; i++)
9142 + mask_evtchn(i);
9143 +
9144 + /* No IRQ -> event-channel mappings. */
9145 + for (i = 0; i < NR_IRQS; i++)
9146 + irq_info[i] = IRQ_UNBOUND;
9147 +
9148 + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
9149 + for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
9150 + irq_bindcount[i] = 0;
9151 +
9152 + irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
9153 + irq_desc[i].action = NULL;
9154 + irq_desc[i].depth = 1;
9155 + irq_desc[i].chip = &dynirq_type;
9156 + }
9157 +
9158 + /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
9159 + for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) {
9160 + irq_bindcount[i] = 1;
9161 +
9162 + if (!identity_mapped_irq(i))
9163 + continue;
9164 +
9165 +#ifdef RTC_IRQ
9166 + /* If not domain 0, force our RTC driver to fail its probe. */
9167 + if (i - PIRQ_BASE == RTC_IRQ && !is_initial_xendomain())
9168 + continue;
9169 +#endif
9170 +
9171 + irq_desc[i].status = IRQ_DISABLED;
9172 + irq_desc[i].action = NULL;
9173 + irq_desc[i].depth = 1;
9174 + irq_desc[i].chip = &pirq_type;
9175 + }
9176 +}
9177 Index: head-2008-11-25/drivers/xen/core/features.c
9178 ===================================================================
9179 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
9180 +++ head-2008-11-25/drivers/xen/core/features.c 2007-06-12 13:13:44.000000000 +0200
9181 @@ -0,0 +1,34 @@
9182 +/******************************************************************************
9183 + * features.c
9184 + *
9185 + * Xen feature flags.
9186 + *
9187 + * Copyright (c) 2006, Ian Campbell, XenSource Inc.
9188 + */
9189 +#include <linux/types.h>
9190 +#include <linux/cache.h>
9191 +#include <linux/module.h>
9192 +#include <asm/hypervisor.h>
9193 +#include <xen/features.h>
9194 +
9195 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
9196 +#include <xen/platform-compat.h>
9197 +#endif
9198 +
9199 +u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
9200 +/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
9201 +EXPORT_SYMBOL(xen_features);
9202 +
9203 +void setup_xen_features(void)
9204 +{
9205 + xen_feature_info_t fi;
9206 + int i, j;
9207 +
9208 + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
9209 + fi.submap_idx = i;
9210 + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
9211 + break;
9212 + for (j=0; j<32; j++)
9213 + xen_features[i*32+j] = !!(fi.submap & 1<<j);
9214 + }
9215 +}
9216 Index: head-2008-11-25/drivers/xen/core/firmware.c
9217 ===================================================================
9218 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
9219 +++ head-2008-11-25/drivers/xen/core/firmware.c 2007-06-22 09:08:06.000000000 +0200
9220 @@ -0,0 +1,74 @@
9221 +#include <linux/kernel.h>
9222 +#include <linux/errno.h>
9223 +#include <linux/init.h>
9224 +#include <linux/edd.h>
9225 +#include <video/edid.h>
9226 +#include <xen/interface/platform.h>
9227 +#include <asm/hypervisor.h>
9228 +
9229 +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
9230 +void __init copy_edd(void)
9231 +{
9232 + int ret;
9233 + struct xen_platform_op op;
9234 +
9235 + if (!is_initial_xendomain())
9236 + return;
9237 +
9238 + op.cmd = XENPF_firmware_info;
9239 +
9240 + op.u.firmware_info.type = XEN_FW_DISK_INFO;
9241 + for (op.u.firmware_info.index = 0;
9242 + edd.edd_info_nr < EDDMAXNR;
9243 + op.u.firmware_info.index++) {
9244 + struct edd_info *info = edd.edd_info + edd.edd_info_nr;
9245 +
9246 + info->params.length = sizeof(info->params);
9247 + set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
9248 + &info->params);
9249 + ret = HYPERVISOR_platform_op(&op);
9250 + if (ret)
9251 + break;
9252 +
9253 +#define C(x) info->x = op.u.firmware_info.u.disk_info.x
9254 + C(device);
9255 + C(version);
9256 + C(interface_support);
9257 + C(legacy_max_cylinder);
9258 + C(legacy_max_head);
9259 + C(legacy_sectors_per_track);
9260 +#undef C
9261 +
9262 + edd.edd_info_nr++;
9263 + }
9264 +
9265 + op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
9266 + for (op.u.firmware_info.index = 0;
9267 + edd.mbr_signature_nr < EDD_MBR_SIG_MAX;
9268 + op.u.firmware_info.index++) {
9269 + ret = HYPERVISOR_platform_op(&op);
9270 + if (ret)
9271 + break;
9272 + edd.mbr_signature[edd.mbr_signature_nr++] =
9273 + op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
9274 + }
9275 +}
9276 +#endif
9277 +
9278 +void __init copy_edid(void)
9279 +{
9280 +#if defined(CONFIG_FIRMWARE_EDID) && defined(CONFIG_X86)
9281 + struct xen_platform_op op;
9282 +
9283 + if (!is_initial_xendomain())
9284 + return;
9285 +
9286 + op.cmd = XENPF_firmware_info;
9287 + op.u.firmware_info.index = 0;
9288 + op.u.firmware_info.type = XEN_FW_VBEDDC_INFO;
9289 + set_xen_guest_handle(op.u.firmware_info.u.vbeddc_info.edid,
9290 + edid_info.dummy);
9291 + if (HYPERVISOR_platform_op(&op) != 0)
9292 + memset(edid_info.dummy, 0x13, sizeof(edid_info.dummy));
9293 +#endif
9294 +}
9295 Index: head-2008-11-25/drivers/xen/core/gnttab.c
9296 ===================================================================
9297 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
9298 +++ head-2008-11-25/drivers/xen/core/gnttab.c 2008-11-04 11:13:10.000000000 +0100
9299 @@ -0,0 +1,772 @@
9300 +/******************************************************************************
9301 + * gnttab.c
9302 + *
9303 + * Granting foreign access to our memory reservation.
9304 + *
9305 + * Copyright (c) 2005-2006, Christopher Clark
9306 + * Copyright (c) 2004-2005, K A Fraser
9307 + *
9308 + * This program is free software; you can redistribute it and/or
9309 + * modify it under the terms of the GNU General Public License version 2
9310 + * as published by the Free Software Foundation; or, when distributed
9311 + * separately from the Linux kernel or incorporated into other
9312 + * software packages, subject to the following license:
9313 + *
9314 + * Permission is hereby granted, free of charge, to any person obtaining a copy
9315 + * of this source file (the "Software"), to deal in the Software without
9316 + * restriction, including without limitation the rights to use, copy, modify,
9317 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
9318 + * and to permit persons to whom the Software is furnished to do so, subject to
9319 + * the following conditions:
9320 + *
9321 + * The above copyright notice and this permission notice shall be included in
9322 + * all copies or substantial portions of the Software.
9323 + *
9324 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
9325 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9326 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
9327 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
9328 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
9329 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
9330 + * IN THE SOFTWARE.
9331 + */
9332 +
9333 +#include <linux/module.h>
9334 +#include <linux/sched.h>
9335 +#include <linux/mm.h>
9336 +#include <linux/seqlock.h>
9337 +#include <xen/interface/xen.h>
9338 +#include <xen/gnttab.h>
9339 +#include <asm/pgtable.h>
9340 +#include <asm/uaccess.h>
9341 +#include <asm/synch_bitops.h>
9342 +#include <asm/io.h>
9343 +#include <xen/interface/memory.h>
9344 +#include <xen/driver_util.h>
9345 +#include <asm/gnttab_dma.h>
9346 +
9347 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
9348 +#include <xen/platform-compat.h>
9349 +#endif
9350 +
9351 +/* External tools reserve first few grant table entries. */
9352 +#define NR_RESERVED_ENTRIES 8
9353 +#define GNTTAB_LIST_END 0xffffffff
9354 +#define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
9355 +
9356 +static grant_ref_t **gnttab_list;
9357 +static unsigned int nr_grant_frames;
9358 +static unsigned int boot_max_nr_grant_frames;
9359 +static int gnttab_free_count;
9360 +static grant_ref_t gnttab_free_head;
9361 +static DEFINE_SPINLOCK(gnttab_list_lock);
9362 +
9363 +static struct grant_entry *shared;
9364 +
9365 +static struct gnttab_free_callback *gnttab_free_callback_list;
9366 +
9367 +static int gnttab_expand(unsigned int req_entries);
9368 +
9369 +#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
9370 +#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
9371 +
9372 +#define nr_freelist_frames(grant_frames) \
9373 + (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP)
9374 +
9375 +static int get_free_entries(int count)
9376 +{
9377 + unsigned long flags;
9378 + int ref, rc;
9379 + grant_ref_t head;
9380 +
9381 + spin_lock_irqsave(&gnttab_list_lock, flags);
9382 +
9383 + if ((gnttab_free_count < count) &&
9384 + ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
9385 + spin_unlock_irqrestore(&gnttab_list_lock, flags);
9386 + return rc;
9387 + }
9388 +
9389 + ref = head = gnttab_free_head;
9390 + gnttab_free_count -= count;
9391 + while (count-- > 1)
9392 + head = gnttab_entry(head);
9393 + gnttab_free_head = gnttab_entry(head);
9394 + gnttab_entry(head) = GNTTAB_LIST_END;
9395 +
9396 + spin_unlock_irqrestore(&gnttab_list_lock, flags);
9397 +
9398 + return ref;
9399 +}
9400 +
9401 +#define get_free_entry() get_free_entries(1)
9402 +
9403 +static void do_free_callbacks(void)
9404 +{
9405 + struct gnttab_free_callback *callback, *next;
9406 +
9407 + callback = gnttab_free_callback_list;
9408 + gnttab_free_callback_list = NULL;
9409 +
9410 + while (callback != NULL) {
9411 + next = callback->next;
9412 + if (gnttab_free_count >= callback->count) {
9413 + callback->next = NULL;
9414 + callback->queued = 0;
9415 + callback->fn(callback->arg);
9416 + } else {
9417 + callback->next = gnttab_free_callback_list;
9418 + gnttab_free_callback_list = callback;
9419 + }
9420 + callback = next;
9421 + }
9422 +}
9423 +
9424 +static inline void check_free_callbacks(void)
9425 +{
9426 + if (unlikely(gnttab_free_callback_list))
9427 + do_free_callbacks();
9428 +}
9429 +
9430 +static void put_free_entry(grant_ref_t ref)
9431 +{
9432 + unsigned long flags;
9433 + spin_lock_irqsave(&gnttab_list_lock, flags);
9434 + gnttab_entry(ref) = gnttab_free_head;
9435 + gnttab_free_head = ref;
9436 + gnttab_free_count++;
9437 + check_free_callbacks();
9438 + spin_unlock_irqrestore(&gnttab_list_lock, flags);
9439 +}
9440 +
9441 +/*
9442 + * Public grant-issuing interface functions
9443 + */
9444 +
9445 +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
9446 + int flags)
9447 +{
9448 + int ref;
9449 +
9450 + if (unlikely((ref = get_free_entry()) < 0))
9451 + return -ENOSPC;
9452 +
9453 + shared[ref].frame = frame;
9454 + shared[ref].domid = domid;
9455 + wmb();
9456 + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
9457 + shared[ref].flags = GTF_permit_access | flags;
9458 +
9459 + return ref;
9460 +}
9461 +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
9462 +
9463 +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
9464 + unsigned long frame, int flags)
9465 +{
9466 + shared[ref].frame = frame;
9467 + shared[ref].domid = domid;
9468 + wmb();
9469 + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
9470 + shared[ref].flags = GTF_permit_access | flags;
9471 +}
9472 +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
9473 +
9474 +
9475 +int gnttab_query_foreign_access(grant_ref_t ref)
9476 +{
9477 + u16 nflags;
9478 +
9479 + nflags = shared[ref].flags;
9480 +
9481 + return (nflags & (GTF_reading|GTF_writing));
9482 +}
9483 +EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
9484 +
9485 +int gnttab_end_foreign_access_ref(grant_ref_t ref)
9486 +{
9487 + u16 flags, nflags;
9488 +
9489 + nflags = shared[ref].flags;
9490 + do {
9491 + if ((flags = nflags) & (GTF_reading|GTF_writing)) {
9492 + printk(KERN_DEBUG "WARNING: g.e. still in use!\n");
9493 + return 0;
9494 + }
9495 + } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) !=
9496 + flags);
9497 +
9498 + return 1;
9499 +}
9500 +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
9501 +
9502 +void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page)
9503 +{
9504 + if (gnttab_end_foreign_access_ref(ref)) {
9505 + put_free_entry(ref);
9506 + if (page != 0)
9507 + free_page(page);
9508 + } else {
9509 + /* XXX This needs to be fixed so that the ref and page are
9510 + placed on a list to be freed up later. */
9511 + printk(KERN_DEBUG
9512 + "WARNING: leaking g.e. and page still in use!\n");
9513 + }
9514 +}
9515 +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
9516 +
9517 +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
9518 +{
9519 + int ref;
9520 +
9521 + if (unlikely((ref = get_free_entry()) < 0))
9522 + return -ENOSPC;
9523 + gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
9524 +
9525 + return ref;
9526 +}
9527 +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
9528 +
9529 +void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
9530 + unsigned long pfn)
9531 +{
9532 + shared[ref].frame = pfn;
9533 + shared[ref].domid = domid;
9534 + wmb();
9535 + shared[ref].flags = GTF_accept_transfer;
9536 +}
9537 +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
9538 +
9539 +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
9540 +{
9541 + unsigned long frame;
9542 + u16 flags;
9543 +
9544 + /*
9545 + * If a transfer is not even yet started, try to reclaim the grant
9546 + * reference and return failure (== 0).
9547 + */
9548 + while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
9549 + if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags)
9550 + return 0;
9551 + cpu_relax();
9552 + }
9553 +
9554 + /* If a transfer is in progress then wait until it is completed. */
9555 + while (!(flags & GTF_transfer_completed)) {
9556 + flags = shared[ref].flags;
9557 + cpu_relax();
9558 + }
9559 +
9560 + /* Read the frame number /after/ reading completion status. */
9561 + rmb();
9562 + frame = shared[ref].frame;
9563 + BUG_ON(frame == 0);
9564 +
9565 + return frame;
9566 +}
9567 +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
9568 +
9569 +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
9570 +{
9571 + unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
9572 + put_free_entry(ref);
9573 + return frame;
9574 +}
9575 +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
9576 +
9577 +void gnttab_free_grant_reference(grant_ref_t ref)
9578 +{
9579 + put_free_entry(ref);
9580 +}
9581 +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
9582 +
9583 +void gnttab_free_grant_references(grant_ref_t head)
9584 +{
9585 + grant_ref_t ref;
9586 + unsigned long flags;
9587 + int count = 1;
9588 + if (head == GNTTAB_LIST_END)
9589 + return;
9590 + spin_lock_irqsave(&gnttab_list_lock, flags);
9591 + ref = head;
9592 + while (gnttab_entry(ref) != GNTTAB_LIST_END) {
9593 + ref = gnttab_entry(ref);
9594 + count++;
9595 + }
9596 + gnttab_entry(ref) = gnttab_free_head;
9597 + gnttab_free_head = head;
9598 + gnttab_free_count += count;
9599 + check_free_callbacks();
9600 + spin_unlock_irqrestore(&gnttab_list_lock, flags);
9601 +}
9602 +EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
9603 +
9604 +int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
9605 +{
9606 + int h = get_free_entries(count);
9607 +
9608 + if (h < 0)
9609 + return -ENOSPC;
9610 +
9611 + *head = h;
9612 +
9613 + return 0;
9614 +}
9615 +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
9616 +
9617 +int gnttab_empty_grant_references(const grant_ref_t *private_head)
9618 +{
9619 + return (*private_head == GNTTAB_LIST_END);
9620 +}
9621 +EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
9622 +
9623 +int gnttab_claim_grant_reference(grant_ref_t *private_head)
9624 +{
9625 + grant_ref_t g = *private_head;
9626 + if (unlikely(g == GNTTAB_LIST_END))
9627 + return -ENOSPC;
9628 + *private_head = gnttab_entry(g);
9629 + return g;
9630 +}
9631 +EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
9632 +
9633 +void gnttab_release_grant_reference(grant_ref_t *private_head,
9634 + grant_ref_t release)
9635 +{
9636 + gnttab_entry(release) = *private_head;
9637 + *private_head = release;
9638 +}
9639 +EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
9640 +
9641 +void gnttab_request_free_callback(struct gnttab_free_callback *callback,
9642 + void (*fn)(void *), void *arg, u16 count)
9643 +{
9644 + unsigned long flags;
9645 + spin_lock_irqsave(&gnttab_list_lock, flags);
9646 + if (callback->queued)
9647 + goto out;
9648 + callback->fn = fn;
9649 + callback->arg = arg;
9650 + callback->count = count;
9651 + callback->queued = 1;
9652 + callback->next = gnttab_free_callback_list;
9653 + gnttab_free_callback_list = callback;
9654 + check_free_callbacks();
9655 +out:
9656 + spin_unlock_irqrestore(&gnttab_list_lock, flags);
9657 +}
9658 +EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
9659 +
9660 +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
9661 +{
9662 + struct gnttab_free_callback **pcb;
9663 + unsigned long flags;
9664 +
9665 + spin_lock_irqsave(&gnttab_list_lock, flags);
9666 + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
9667 + if (*pcb == callback) {
9668 + *pcb = callback->next;
9669 + callback->queued = 0;
9670 + break;
9671 + }
9672 + }
9673 + spin_unlock_irqrestore(&gnttab_list_lock, flags);
9674 +}
9675 +EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
9676 +
9677 +static int grow_gnttab_list(unsigned int more_frames)
9678 +{
9679 + unsigned int new_nr_grant_frames, extra_entries, i;
9680 + unsigned int nr_glist_frames, new_nr_glist_frames;
9681 +
9682 + new_nr_grant_frames = nr_grant_frames + more_frames;
9683 + extra_entries = more_frames * ENTRIES_PER_GRANT_FRAME;
9684 +
9685 + nr_glist_frames = nr_freelist_frames(nr_grant_frames);
9686 + new_nr_glist_frames = nr_freelist_frames(new_nr_grant_frames);
9687 + for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
9688 + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
9689 + if (!gnttab_list[i])
9690 + goto grow_nomem;
9691 + }
9692 +
9693 + for (i = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
9694 + i < ENTRIES_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
9695 + gnttab_entry(i) = i + 1;
9696 +
9697 + gnttab_entry(i) = gnttab_free_head;
9698 + gnttab_free_head = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
9699 + gnttab_free_count += extra_entries;
9700 +
9701 + nr_grant_frames = new_nr_grant_frames;
9702 +
9703 + check_free_callbacks();
9704 +
9705 + return 0;
9706 +
9707 +grow_nomem:
9708 + for ( ; i >= nr_glist_frames; i--)
9709 + free_page((unsigned long) gnttab_list[i]);
9710 + return -ENOMEM;
9711 +}
9712 +
9713 +static unsigned int __max_nr_grant_frames(void)
9714 +{
9715 + struct gnttab_query_size query;
9716 + int rc;
9717 +
9718 + query.dom = DOMID_SELF;
9719 +
9720 + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
9721 + if ((rc < 0) || (query.status != GNTST_okay))
9722 + return 4; /* Legacy max supported number of frames */
9723 +
9724 + return query.max_nr_frames;
9725 +}
9726 +
9727 +static inline unsigned int max_nr_grant_frames(void)
9728 +{
9729 + unsigned int xen_max = __max_nr_grant_frames();
9730 +
9731 + if (xen_max > boot_max_nr_grant_frames)
9732 + return boot_max_nr_grant_frames;
9733 + return xen_max;
9734 +}
9735 +
9736 +#ifdef CONFIG_XEN
9737 +
9738 +static DEFINE_SEQLOCK(gnttab_dma_lock);
9739 +
9740 +#ifdef CONFIG_X86
9741 +static int map_pte_fn(pte_t *pte, struct page *pmd_page,
9742 + unsigned long addr, void *data)
9743 +{
9744 + unsigned long **frames = (unsigned long **)data;
9745 +
9746 + set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
9747 + (*frames)++;
9748 + return 0;
9749 +}
9750 +
9751 +static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
9752 + unsigned long addr, void *data)
9753 +{
9754 +
9755 + set_pte_at(&init_mm, addr, pte, __pte(0));
9756 + return 0;
9757 +}
9758 +
9759 +void *arch_gnttab_alloc_shared(unsigned long *frames)
9760 +{
9761 + struct vm_struct *area;
9762 + area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
9763 + BUG_ON(area == NULL);
9764 + return area->addr;
9765 +}
9766 +#endif /* CONFIG_X86 */
9767 +
9768 +static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
9769 +{
9770 + struct gnttab_setup_table setup;
9771 + unsigned long *frames;
9772 + unsigned int nr_gframes = end_idx + 1;
9773 + int rc;
9774 +
9775 + frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
9776 + if (!frames)
9777 + return -ENOMEM;
9778 +
9779 + setup.dom = DOMID_SELF;
9780 + setup.nr_frames = nr_gframes;
9781 + set_xen_guest_handle(setup.frame_list, frames);
9782 +
9783 + rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
9784 + if (rc == -ENOSYS) {
9785 + kfree(frames);
9786 + return -ENOSYS;
9787 + }
9788 +
9789 + BUG_ON(rc || setup.status);
9790 +
9791 + if (shared == NULL)
9792 + shared = arch_gnttab_alloc_shared(frames);
9793 +
9794 +#ifdef CONFIG_X86
9795 + rc = apply_to_page_range(&init_mm, (unsigned long)shared,
9796 + PAGE_SIZE * nr_gframes,
9797 + map_pte_fn, &frames);
9798 + BUG_ON(rc);
9799 + frames -= nr_gframes; /* adjust after map_pte_fn() */
9800 +#endif /* CONFIG_X86 */
9801 +
9802 + kfree(frames);
9803 +
9804 + return 0;
9805 +}
9806 +
9807 +static void gnttab_page_free(struct page *page)
9808 +{
9809 + ClearPageForeign(page);
9810 + gnttab_reset_grant_page(page);
9811 + put_page(page);
9812 +}
9813 +
9814 +/*
9815 + * Must not be called with IRQs off. This should only be used on the
9816 + * slow path.
9817 + *
9818 + * Copy a foreign granted page to local memory.
9819 + */
9820 +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
9821 +{
9822 + struct gnttab_unmap_and_replace unmap;
9823 + mmu_update_t mmu;
9824 + struct page *page;
9825 + struct page *new_page;
9826 + void *new_addr;
9827 + void *addr;
9828 + paddr_t pfn;
9829 + maddr_t mfn;
9830 + maddr_t new_mfn;
9831 + int err;
9832 +
9833 + page = *pagep;
9834 + if (!get_page_unless_zero(page))
9835 + return -ENOENT;
9836 +
9837 + err = -ENOMEM;
9838 + new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
9839 + if (!new_page)
9840 + goto out;
9841 +
9842 + new_addr = page_address(new_page);
9843 + addr = page_address(page);
9844 + memcpy(new_addr, addr, PAGE_SIZE);
9845 +
9846 + pfn = page_to_pfn(page);
9847 + mfn = pfn_to_mfn(pfn);
9848 + new_mfn = virt_to_mfn(new_addr);
9849 +
9850 + write_seqlock(&gnttab_dma_lock);
9851 +
9852 + /* Make seq visible before checking page_mapped. */
9853 + smp_mb();
9854 +
9855 + /* Has the page been DMA-mapped? */
9856 + if (unlikely(page_mapped(page))) {
9857 + write_sequnlock(&gnttab_dma_lock);
9858 + put_page(new_page);
9859 + err = -EBUSY;
9860 + goto out;
9861 + }
9862 +
9863 + if (!xen_feature(XENFEAT_auto_translated_physmap))
9864 + set_phys_to_machine(pfn, new_mfn);
9865 +
9866 + gnttab_set_replace_op(&unmap, (unsigned long)addr,
9867 + (unsigned long)new_addr, ref);
9868 +
9869 + err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
9870 + &unmap, 1);
9871 + BUG_ON(err);
9872 + BUG_ON(unmap.status);
9873 +
9874 + write_sequnlock(&gnttab_dma_lock);
9875 +
9876 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
9877 + set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
9878 +
9879 + mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
9880 + mmu.val = pfn;
9881 + err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
9882 + BUG_ON(err);
9883 + }
9884 +
9885 + new_page->mapping = page->mapping;
9886 + new_page->index = page->index;
9887 + set_bit(PG_foreign, &new_page->flags);
9888 + *pagep = new_page;
9889 +
9890 + SetPageForeign(page, gnttab_page_free);
9891 + page->mapping = NULL;
9892 +
9893 +out:
9894 + put_page(page);
9895 + return err;
9896 +}
9897 +EXPORT_SYMBOL_GPL(gnttab_copy_grant_page);
9898 +
9899 +void gnttab_reset_grant_page(struct page *page)
9900 +{
9901 + init_page_count(page);
9902 + reset_page_mapcount(page);
9903 +}
9904 +EXPORT_SYMBOL_GPL(gnttab_reset_grant_page);
9905 +
9906 +/*
9907 + * Keep track of foreign pages marked as PageForeign so that we don't
9908 + * return them to the remote domain prematurely.
9909 + *
9910 + * PageForeign pages are pinned down by increasing their mapcount.
9911 + *
9912 + * All other pages are simply returned as is.
9913 + */
9914 +void __gnttab_dma_map_page(struct page *page)
9915 +{
9916 + unsigned int seq;
9917 +
9918 + if (!is_running_on_xen() || !PageForeign(page))
9919 + return;
9920 +
9921 + do {
9922 + seq = read_seqbegin(&gnttab_dma_lock);
9923 +
9924 + if (gnttab_dma_local_pfn(page))
9925 + break;
9926 +
9927 + atomic_set(&page->_mapcount, 0);
9928 +
9929 + /* Make _mapcount visible before read_seqretry. */
9930 + smp_mb();
9931 + } while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
9932 +}
9933 +
9934 +int gnttab_resume(void)
9935 +{
9936 + if (max_nr_grant_frames() < nr_grant_frames)
9937 + return -ENOSYS;
9938 + return gnttab_map(0, nr_grant_frames - 1);
9939 +}
9940 +
9941 +int gnttab_suspend(void)
9942 +{
9943 +#ifdef CONFIG_X86
9944 + apply_to_page_range(&init_mm, (unsigned long)shared,
9945 + PAGE_SIZE * nr_grant_frames,
9946 + unmap_pte_fn, NULL);
9947 +#endif
9948 + return 0;
9949 +}
9950 +
9951 +#else /* !CONFIG_XEN */
9952 +
9953 +#include <platform-pci.h>
9954 +
9955 +static unsigned long resume_frames;
9956 +
9957 +static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
9958 +{
9959 + struct xen_add_to_physmap xatp;
9960 + unsigned int i = end_idx;
9961 +
9962 + /* Loop backwards, so that the first hypercall has the largest index,
9963 + * ensuring that the table will grow only once.
9964 + */
9965 + do {
9966 + xatp.domid = DOMID_SELF;
9967 + xatp.idx = i;
9968 + xatp.space = XENMAPSPACE_grant_table;
9969 + xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
9970 + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
9971 + BUG();
9972 + } while (i-- > start_idx);
9973 +
9974 + return 0;
9975 +}
9976 +
9977 +int gnttab_resume(void)
9978 +{
9979 + unsigned int max_nr_gframes, nr_gframes;
9980 +
9981 + nr_gframes = nr_grant_frames;
9982 + max_nr_gframes = max_nr_grant_frames();
9983 + if (max_nr_gframes < nr_gframes)
9984 + return -ENOSYS;
9985 +
9986 + if (!resume_frames) {
9987 + resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
9988 + shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
9989 + if (shared == NULL) {
9990 + printk("error to ioremap gnttab share frames\n");
9991 + return -1;
9992 + }
9993 + }
9994 +
9995 + gnttab_map(0, nr_gframes - 1);
9996 +
9997 + return 0;
9998 +}
9999 +
10000 +#endif /* !CONFIG_XEN */
10001 +
10002 +static int gnttab_expand(unsigned int req_entries)
10003 +{
10004 + int rc;
10005 + unsigned int cur, extra;
10006 +
10007 + cur = nr_grant_frames;
10008 + extra = ((req_entries + (ENTRIES_PER_GRANT_FRAME-1)) /
10009 + ENTRIES_PER_GRANT_FRAME);
10010 + if (cur + extra > max_nr_grant_frames())
10011 + return -ENOSPC;
10012 +
10013 + if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
10014 + rc = grow_gnttab_list(extra);
10015 +
10016 + return rc;
10017 +}
10018 +
10019 +int __devinit gnttab_init(void)
10020 +{
10021 + int i;
10022 + unsigned int max_nr_glist_frames, nr_glist_frames;
10023 + unsigned int nr_init_grefs;
10024 +
10025 + if (!is_running_on_xen())
10026 + return -ENODEV;
10027 +
10028 + nr_grant_frames = 1;
10029 + boot_max_nr_grant_frames = __max_nr_grant_frames();
10030 +
10031 + /* Determine the maximum number of frames required for the
10032 + * grant reference free list on the current hypervisor.
10033 + */
10034 + max_nr_glist_frames = nr_freelist_frames(boot_max_nr_grant_frames);
10035 +
10036 + gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
10037 + GFP_KERNEL);
10038 + if (gnttab_list == NULL)
10039 + return -ENOMEM;
10040 +
10041 + nr_glist_frames = nr_freelist_frames(nr_grant_frames);
10042 + for (i = 0; i < nr_glist_frames; i++) {
10043 + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
10044 + if (gnttab_list[i] == NULL)
10045 + goto ini_nomem;
10046 + }
10047 +
10048 + if (gnttab_resume() < 0)
10049 + return -ENODEV;
10050 +
10051 + nr_init_grefs = nr_grant_frames * ENTRIES_PER_GRANT_FRAME;
10052 +
10053 + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
10054 + gnttab_entry(i) = i + 1;
10055 +
10056 + gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
10057 + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
10058 + gnttab_free_head = NR_RESERVED_ENTRIES;
10059 +
10060 + return 0;
10061 +
10062 + ini_nomem:
10063 + for (i--; i >= 0; i--)
10064 + free_page((unsigned long)gnttab_list[i]);
10065 + kfree(gnttab_list);
10066 + return -ENOMEM;
10067 +}
10068 +
10069 +#ifdef CONFIG_XEN
10070 +core_initcall(gnttab_init);
10071 +#endif
10072 Index: head-2008-11-25/drivers/xen/core/hypervisor_sysfs.c
10073 ===================================================================
10074 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
10075 +++ head-2008-11-25/drivers/xen/core/hypervisor_sysfs.c 2007-07-10 09:42:30.000000000 +0200
10076 @@ -0,0 +1,57 @@
10077 +/*
10078 + * copyright (c) 2006 IBM Corporation
10079 + * Authored by: Mike D. Day <ncmike@us.ibm.com>
10080 + *
10081 + * This program is free software; you can redistribute it and/or modify
10082 + * it under the terms of the GNU General Public License version 2 as
10083 + * published by the Free Software Foundation.
10084 + */
10085 +
10086 +#include <linux/kernel.h>
10087 +#include <linux/module.h>
10088 +#include <linux/kobject.h>
10089 +#include <xen/hypervisor_sysfs.h>
10090 +#include <asm/hypervisor.h>
10091 +
10092 +static ssize_t hyp_sysfs_show(struct kobject *kobj,
10093 + struct attribute *attr,
10094 + char *buffer)
10095 +{
10096 + struct hyp_sysfs_attr *hyp_attr;
10097 + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
10098 + if (hyp_attr->show)
10099 + return hyp_attr->show(hyp_attr, buffer);
10100 + return 0;
10101 +}
10102 +
10103 +static ssize_t hyp_sysfs_store(struct kobject *kobj,
10104 + struct attribute *attr,
10105 + const char *buffer,
10106 + size_t len)
10107 +{
10108 + struct hyp_sysfs_attr *hyp_attr;
10109 + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
10110 + if (hyp_attr->store)
10111 + return hyp_attr->store(hyp_attr, buffer, len);
10112 + return 0;
10113 +}
10114 +
10115 +static struct sysfs_ops hyp_sysfs_ops = {
10116 + .show = hyp_sysfs_show,
10117 + .store = hyp_sysfs_store,
10118 +};
10119 +
10120 +static struct kobj_type hyp_sysfs_kobj_type = {
10121 + .sysfs_ops = &hyp_sysfs_ops,
10122 +};
10123 +
10124 +static int __init hypervisor_subsys_init(void)
10125 +{
10126 + if (!is_running_on_xen())
10127 + return -ENODEV;
10128 +
10129 + hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
10130 + return 0;
10131 +}
10132 +
10133 +device_initcall(hypervisor_subsys_init);
10134 Index: head-2008-11-25/drivers/xen/core/machine_kexec.c
10135 ===================================================================
10136 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
10137 +++ head-2008-11-25/drivers/xen/core/machine_kexec.c 2008-10-13 13:43:45.000000000 +0200
10138 @@ -0,0 +1,222 @@
10139 +/*
10140 + * drivers/xen/core/machine_kexec.c
10141 + * handle transition of Linux booting another kernel
10142 + */
10143 +
10144 +#include <linux/kexec.h>
10145 +#include <xen/interface/kexec.h>
10146 +#include <linux/mm.h>
10147 +#include <linux/bootmem.h>
10148 +
10149 +extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki,
10150 + struct kimage *image);
10151 +extern int machine_kexec_setup_resources(struct resource *hypervisor,
10152 + struct resource *phys_cpus,
10153 + int nr_phys_cpus);
10154 +extern void machine_kexec_register_resources(struct resource *res);
10155 +
10156 +static int __initdata xen_max_nr_phys_cpus;
10157 +static struct resource xen_hypervisor_res;
10158 +static struct resource *xen_phys_cpus;
10159 +
10160 +size_t vmcoreinfo_size_xen;
10161 +unsigned long paddr_vmcoreinfo_xen;
10162 +
10163 +void __init xen_machine_kexec_setup_resources(void)
10164 +{
10165 + xen_kexec_range_t range;
10166 + struct resource *res;
10167 + int k = 0;
10168 + int rc;
10169 +
10170 + if (!is_initial_xendomain())
10171 + return;
10172 +
10173 + /* determine maximum number of physical cpus */
10174 +
10175 + while (1) {
10176 + memset(&range, 0, sizeof(range));
10177 + range.range = KEXEC_RANGE_MA_CPU;
10178 + range.nr = k;
10179 +
10180 + if(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10181 + break;
10182 +
10183 + k++;
10184 + }
10185 +
10186 + if (k == 0)
10187 + return;
10188 +
10189 + xen_max_nr_phys_cpus = k;
10190 +
10191 + /* allocate xen_phys_cpus */
10192 +
10193 + xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource));
10194 + BUG_ON(xen_phys_cpus == NULL);
10195 +
10196 + /* fill in xen_phys_cpus with per-cpu crash note information */
10197 +
10198 + for (k = 0; k < xen_max_nr_phys_cpus; k++) {
10199 + memset(&range, 0, sizeof(range));
10200 + range.range = KEXEC_RANGE_MA_CPU;
10201 + range.nr = k;
10202 +
10203 + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10204 + goto err;
10205 +
10206 + res = xen_phys_cpus + k;
10207 +
10208 + memset(res, 0, sizeof(*res));
10209 + res->name = "Crash note";
10210 + res->start = range.start;
10211 + res->end = range.start + range.size - 1;
10212 + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
10213 + }
10214 +
10215 + /* fill in xen_hypervisor_res with hypervisor machine address range */
10216 +
10217 + memset(&range, 0, sizeof(range));
10218 + range.range = KEXEC_RANGE_MA_XEN;
10219 +
10220 + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10221 + goto err;
10222 +
10223 + xen_hypervisor_res.name = "Hypervisor code and data";
10224 + xen_hypervisor_res.start = range.start;
10225 + xen_hypervisor_res.end = range.start + range.size - 1;
10226 + xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
10227 +
10228 + /* fill in crashk_res if range is reserved by hypervisor */
10229 +
10230 + memset(&range, 0, sizeof(range));
10231 + range.range = KEXEC_RANGE_MA_CRASH;
10232 +
10233 + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
10234 + goto err;
10235 +
10236 + if (range.size) {
10237 + crashk_res.start = range.start;
10238 + crashk_res.end = range.start + range.size - 1;
10239 + }
10240 +
10241 + /* get physical address of vmcoreinfo */
10242 + memset(&range, 0, sizeof(range));
10243 + range.range = KEXEC_RANGE_MA_VMCOREINFO;
10244 +
10245 + rc = HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range);
10246 +
10247 + if (rc == 0) {
10248 + /* Hypercall succeeded */
10249 + vmcoreinfo_size_xen = range.size;
10250 + paddr_vmcoreinfo_xen = range.start;
10251 +
10252 + } else {
10253 + /* Hypercall failed.
10254 + * Indicate not to create sysfs file by resetting globals
10255 + */
10256 + vmcoreinfo_size_xen = 0;
10257 + paddr_vmcoreinfo_xen = 0;
10258 +
10259 + /* The KEXEC_CMD_kexec_get_range hypercall did not implement
10260 + * KEXEC_RANGE_MA_VMCOREINFO until Xen 3.3.
10261 + * Do not bail out if it fails for this reason.
10262 + */
10263 + if (rc != -EINVAL)
10264 + return;
10265 + }
10266 +
10267 + if (machine_kexec_setup_resources(&xen_hypervisor_res, xen_phys_cpus,
10268 + xen_max_nr_phys_cpus))
10269 + goto err;
10270 +
10271 + return;
10272 +
10273 + err:
10274 + /*
10275 + * It isn't possible to free xen_phys_cpus this early in the
10276 + * boot. Failure at this stage is unexpected and the amount of
10277 + * memory is small therefore we tolerate the potential leak.
10278 + */
10279 + xen_max_nr_phys_cpus = 0;
10280 + return;
10281 +}
10282 +
10283 +void __init xen_machine_kexec_register_resources(struct resource *res)
10284 +{
10285 + request_resource(res, &xen_hypervisor_res);
10286 + machine_kexec_register_resources(res);
10287 +}
10288 +
10289 +static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
10290 +{
10291 + machine_kexec_setup_load_arg(xki, image);
10292 +
10293 + xki->indirection_page = image->head;
10294 + xki->start_address = image->start;
10295 +}
10296 +
10297 +/*
10298 + * Load the image into xen so xen can kdump itself
10299 + * This might have been done in prepare, but prepare
10300 + * is currently called too early. It might make sense
10301 + * to move prepare, but for now, just add an extra hook.
10302 + */
10303 +int xen_machine_kexec_load(struct kimage *image)
10304 +{
10305 + xen_kexec_load_t xkl;
10306 +
10307 + memset(&xkl, 0, sizeof(xkl));
10308 + xkl.type = image->type;
10309 + setup_load_arg(&xkl.image, image);
10310 + return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl);
10311 +}
10312 +
10313 +/*
10314 + * Unload the image that was stored by machine_kexec_load()
10315 + * This might have been done in machine_kexec_cleanup() but it
10316 + * is called too late, and its possible xen could try and kdump
10317 + * using resources that have been freed.
10318 + */
10319 +void xen_machine_kexec_unload(struct kimage *image)
10320 +{
10321 + xen_kexec_load_t xkl;
10322 +
10323 + memset(&xkl, 0, sizeof(xkl));
10324 + xkl.type = image->type;
10325 + WARN_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl));
10326 +}
10327 +
10328 +/*
10329 + * Do not allocate memory (or fail in any way) in machine_kexec().
10330 + * We are past the point of no return, committed to rebooting now.
10331 + *
10332 + * This has the hypervisor move to the prefered reboot CPU,
10333 + * stop all CPUs and kexec. That is it combines machine_shutdown()
10334 + * and machine_kexec() in Linux kexec terms.
10335 + */
10336 +NORET_TYPE void machine_kexec(struct kimage *image)
10337 +{
10338 + xen_kexec_exec_t xke;
10339 +
10340 + memset(&xke, 0, sizeof(xke));
10341 + xke.type = image->type;
10342 + VOID(HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke));
10343 + panic("KEXEC_CMD_kexec hypercall should not return\n");
10344 +}
10345 +
10346 +void machine_shutdown(void)
10347 +{
10348 + /* do nothing */
10349 +}
10350 +
10351 +
10352 +/*
10353 + * Local variables:
10354 + * c-file-style: "linux"
10355 + * indent-tabs-mode: t
10356 + * c-indent-level: 8
10357 + * c-basic-offset: 8
10358 + * tab-width: 8
10359 + * End:
10360 + */
10361 Index: head-2008-11-25/drivers/xen/core/machine_reboot.c
10362 ===================================================================
10363 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
10364 +++ head-2008-11-25/drivers/xen/core/machine_reboot.c 2008-09-01 12:07:31.000000000 +0200
10365 @@ -0,0 +1,247 @@
10366 +#include <linux/version.h>
10367 +#include <linux/kernel.h>
10368 +#include <linux/mm.h>
10369 +#include <linux/unistd.h>
10370 +#include <linux/module.h>
10371 +#include <linux/reboot.h>
10372 +#include <linux/sysrq.h>
10373 +#include <linux/stringify.h>
10374 +#include <linux/stop_machine.h>
10375 +#include <asm/irq.h>
10376 +#include <asm/mmu_context.h>
10377 +#include <xen/evtchn.h>
10378 +#include <asm/hypervisor.h>
10379 +#include <xen/xenbus.h>
10380 +#include <linux/cpu.h>
10381 +#include <xen/gnttab.h>
10382 +#include <xen/xencons.h>
10383 +#include <xen/cpu_hotplug.h>
10384 +#include <xen/interface/vcpu.h>
10385 +
10386 +#if defined(__i386__) || defined(__x86_64__)
10387 +
10388 +/*
10389 + * Power off function, if any
10390 + */
10391 +void (*pm_power_off)(void);
10392 +EXPORT_SYMBOL(pm_power_off);
10393 +
10394 +void machine_emergency_restart(void)
10395 +{
10396 + /* We really want to get pending console data out before we die. */
10397 + xencons_force_flush();
10398 + HYPERVISOR_shutdown(SHUTDOWN_reboot);
10399 +}
10400 +
10401 +void machine_restart(char * __unused)
10402 +{
10403 + machine_emergency_restart();
10404 +}
10405 +
10406 +void machine_halt(void)
10407 +{
10408 + machine_power_off();
10409 +}
10410 +
10411 +void machine_power_off(void)
10412 +{
10413 + /* We really want to get pending console data out before we die. */
10414 + xencons_force_flush();
10415 + if (pm_power_off)
10416 + pm_power_off();
10417 + HYPERVISOR_shutdown(SHUTDOWN_poweroff);
10418 +}
10419 +
10420 +int reboot_thru_bios = 0; /* for dmi_scan.c */
10421 +EXPORT_SYMBOL(machine_restart);
10422 +EXPORT_SYMBOL(machine_halt);
10423 +EXPORT_SYMBOL(machine_power_off);
10424 +
10425 +static void pre_suspend(void)
10426 +{
10427 + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
10428 + WARN_ON(HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
10429 + __pte_ma(0), 0));
10430 +
10431 + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
10432 + xen_start_info->console.domU.mfn =
10433 + mfn_to_pfn(xen_start_info->console.domU.mfn);
10434 +}
10435 +
10436 +static void post_suspend(int suspend_cancelled)
10437 +{
10438 + int i, j, k, fpp;
10439 + unsigned long shinfo_mfn;
10440 + extern unsigned long max_pfn;
10441 + extern unsigned long *pfn_to_mfn_frame_list_list;
10442 + extern unsigned long *pfn_to_mfn_frame_list[];
10443 +
10444 + if (suspend_cancelled) {
10445 + xen_start_info->store_mfn =
10446 + pfn_to_mfn(xen_start_info->store_mfn);
10447 + xen_start_info->console.domU.mfn =
10448 + pfn_to_mfn(xen_start_info->console.domU.mfn);
10449 + } else {
10450 +#ifdef CONFIG_SMP
10451 + cpu_initialized_map = cpu_online_map;
10452 +#endif
10453 + }
10454 +
10455 + shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
10456 + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
10457 + pfn_pte_ma(shinfo_mfn, PAGE_KERNEL),
10458 + 0))
10459 + BUG();
10460 + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
10461 +
10462 + memset(empty_zero_page, 0, PAGE_SIZE);
10463 +
10464 + fpp = PAGE_SIZE/sizeof(unsigned long);
10465 + for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
10466 + if ((j % fpp) == 0) {
10467 + k++;
10468 + pfn_to_mfn_frame_list_list[k] =
10469 + virt_to_mfn(pfn_to_mfn_frame_list[k]);
10470 + j = 0;
10471 + }
10472 + pfn_to_mfn_frame_list[k][j] =
10473 + virt_to_mfn(&phys_to_machine_mapping[i]);
10474 + }
10475 + HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
10476 + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
10477 + virt_to_mfn(pfn_to_mfn_frame_list_list);
10478 +}
10479 +
10480 +#else /* !(defined(__i386__) || defined(__x86_64__)) */
10481 +
10482 +#ifndef HAVE_XEN_PRE_SUSPEND
10483 +#define xen_pre_suspend() ((void)0)
10484 +#endif
10485 +
10486 +#ifndef HAVE_XEN_POST_SUSPEND
10487 +#define xen_post_suspend(x) ((void)0)
10488 +#endif
10489 +
10490 +#define switch_idle_mm() ((void)0)
10491 +#define mm_pin_all() ((void)0)
10492 +#define pre_suspend() xen_pre_suspend()
10493 +#define post_suspend(x) xen_post_suspend(x)
10494 +
10495 +#endif
10496 +
10497 +struct suspend {
10498 + int fast_suspend;
10499 + void (*resume_notifier)(int);
10500 +};
10501 +
10502 +static int take_machine_down(void *_suspend)
10503 +{
10504 + struct suspend *suspend = _suspend;
10505 + int suspend_cancelled, err;
10506 + extern void time_resume(void);
10507 +
10508 + if (suspend->fast_suspend) {
10509 + BUG_ON(!irqs_disabled());
10510 + } else {
10511 + BUG_ON(irqs_disabled());
10512 +
10513 + for (;;) {
10514 + err = smp_suspend();
10515 + if (err)
10516 + return err;
10517 +
10518 + xenbus_suspend();
10519 + preempt_disable();
10520 +
10521 + if (num_online_cpus() == 1)
10522 + break;
10523 +
10524 + preempt_enable();
10525 + xenbus_suspend_cancel();
10526 + }
10527 +
10528 + local_irq_disable();
10529 + }
10530 +
10531 + mm_pin_all();
10532 + gnttab_suspend();
10533 + pre_suspend();
10534 +
10535 + /*
10536 + * This hypercall returns 1 if suspend was cancelled or the domain was
10537 + * merely checkpointed, and 0 if it is resuming in a new domain.
10538 + */
10539 + suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
10540 +
10541 + suspend->resume_notifier(suspend_cancelled);
10542 + post_suspend(suspend_cancelled);
10543 + gnttab_resume();
10544 + if (!suspend_cancelled) {
10545 + irq_resume();
10546 +#ifdef __x86_64__
10547 + /*
10548 + * Older versions of Xen do not save/restore the user %cr3.
10549 + * We do it here just in case, but there's no need if we are
10550 + * in fast-suspend mode as that implies a new enough Xen.
10551 + */
10552 + if (!suspend->fast_suspend)
10553 + xen_new_user_pt(__pa(__user_pgd(
10554 + current->active_mm->pgd)));
10555 +#endif
10556 + }
10557 + time_resume();
10558 +
10559 + if (!suspend->fast_suspend)
10560 + local_irq_enable();
10561 +
10562 + return suspend_cancelled;
10563 +}
10564 +
10565 +int __xen_suspend(int fast_suspend, void (*resume_notifier)(int))
10566 +{
10567 + int err, suspend_cancelled;
10568 + struct suspend suspend;
10569 +
10570 + BUG_ON(smp_processor_id() != 0);
10571 + BUG_ON(in_interrupt());
10572 +
10573 +#if defined(__i386__) || defined(__x86_64__)
10574 + if (xen_feature(XENFEAT_auto_translated_physmap)) {
10575 + printk(KERN_WARNING "Cannot suspend in "
10576 + "auto_translated_physmap mode.\n");
10577 + return -EOPNOTSUPP;
10578 + }
10579 +#endif
10580 +
10581 + /* If we are definitely UP then 'slow mode' is actually faster. */
10582 + if (num_possible_cpus() == 1)
10583 + fast_suspend = 0;
10584 +
10585 + suspend.fast_suspend = fast_suspend;
10586 + suspend.resume_notifier = resume_notifier;
10587 +
10588 + if (fast_suspend) {
10589 + xenbus_suspend();
10590 + err = stop_machine_run(take_machine_down, &suspend, 0);
10591 + if (err < 0)
10592 + xenbus_suspend_cancel();
10593 + } else {
10594 + err = take_machine_down(&suspend);
10595 + }
10596 +
10597 + if (err < 0)
10598 + return err;
10599 +
10600 + suspend_cancelled = err;
10601 + if (!suspend_cancelled) {
10602 + xencons_resume();
10603 + xenbus_resume();
10604 + } else {
10605 + xenbus_suspend_cancel();
10606 + }
10607 +
10608 + if (!fast_suspend)
10609 + smp_resume();
10610 +
10611 + return 0;
10612 +}
10613 Index: head-2008-11-25/drivers/xen/core/pci.c
10614 ===================================================================
10615 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
10616 +++ head-2008-11-25/drivers/xen/core/pci.c 2008-11-10 11:44:21.000000000 +0100
10617 @@ -0,0 +1,59 @@
10618 +/*
10619 + * vim:shiftwidth=8:noexpandtab
10620 + */
10621 +
10622 +#include <linux/kernel.h>
10623 +#include <linux/init.h>
10624 +#include <linux/pci.h>
10625 +#include <xen/interface/physdev.h>
10626 +
10627 +static int (*pci_bus_probe)(struct device *dev);
10628 +static int (*pci_bus_remove)(struct device *dev);
10629 +
10630 +static int pci_bus_probe_wrapper(struct device *dev)
10631 +{
10632 + int r;
10633 + struct pci_dev *pci_dev = to_pci_dev(dev);
10634 + struct physdev_manage_pci manage_pci;
10635 + manage_pci.bus = pci_dev->bus->number;
10636 + manage_pci.devfn = pci_dev->devfn;
10637 +
10638 + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci);
10639 + if (r && r != -ENOSYS)
10640 + return r;
10641 +
10642 + r = pci_bus_probe(dev);
10643 + return r;
10644 +}
10645 +
10646 +static int pci_bus_remove_wrapper(struct device *dev)
10647 +{
10648 + int r;
10649 + struct pci_dev *pci_dev = to_pci_dev(dev);
10650 + struct physdev_manage_pci manage_pci;
10651 + manage_pci.bus = pci_dev->bus->number;
10652 + manage_pci.devfn = pci_dev->devfn;
10653 +
10654 + r = pci_bus_remove(dev);
10655 + /* dev and pci_dev are no longer valid!! */
10656 +
10657 + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
10658 + &manage_pci));
10659 + return r;
10660 +}
10661 +
10662 +static int __init hook_pci_bus(void)
10663 +{
10664 + if (!is_running_on_xen() || !is_initial_xendomain())
10665 + return 0;
10666 +
10667 + pci_bus_probe = pci_bus_type.probe;
10668 + pci_bus_type.probe = pci_bus_probe_wrapper;
10669 +
10670 + pci_bus_remove = pci_bus_type.remove;
10671 + pci_bus_type.remove = pci_bus_remove_wrapper;
10672 +
10673 + return 0;
10674 +}
10675 +
10676 +core_initcall(hook_pci_bus);
10677 Index: head-2008-11-25/drivers/xen/core/reboot.c
10678 ===================================================================
10679 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
10680 +++ head-2008-11-25/drivers/xen/core/reboot.c 2008-08-07 12:44:36.000000000 +0200
10681 @@ -0,0 +1,335 @@
10682 +#define __KERNEL_SYSCALLS__
10683 +#include <linux/version.h>
10684 +#include <linux/kernel.h>
10685 +#include <linux/unistd.h>
10686 +#include <linux/module.h>
10687 +#include <linux/reboot.h>
10688 +#include <linux/sysrq.h>
10689 +#include <asm/hypervisor.h>
10690 +#include <xen/xenbus.h>
10691 +#include <xen/evtchn.h>
10692 +#include <linux/kmod.h>
10693 +#include <linux/slab.h>
10694 +#include <linux/workqueue.h>
10695 +
10696 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
10697 +#include <xen/platform-compat.h>
10698 +#endif
10699 +
10700 +MODULE_LICENSE("Dual BSD/GPL");
10701 +
10702 +#define SHUTDOWN_INVALID -1
10703 +#define SHUTDOWN_POWEROFF 0
10704 +#define SHUTDOWN_SUSPEND 2
10705 +#define SHUTDOWN_RESUMING 3
10706 +#define SHUTDOWN_HALT 4
10707 +
10708 +/* Ignore multiple shutdown requests. */
10709 +static int shutting_down = SHUTDOWN_INVALID;
10710 +
10711 +/* Was last suspend request cancelled? */
10712 +static int suspend_cancelled;
10713 +
10714 +/* Can we leave APs online when we suspend? */
10715 +static int fast_suspend;
10716 +
10717 +static void __shutdown_handler(void *unused);
10718 +static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
10719 +
10720 +static int setup_suspend_evtchn(void);
10721 +
10722 +int __xen_suspend(int fast_suspend, void (*resume_notifier)(int));
10723 +
10724 +static int shutdown_process(void *__unused)
10725 +{
10726 + static char *envp[] = { "HOME=/", "TERM=linux",
10727 + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
10728 + static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
10729 +
10730 + extern asmlinkage long sys_reboot(int magic1, int magic2,
10731 + unsigned int cmd, void *arg);
10732 +
10733 + if ((shutting_down == SHUTDOWN_POWEROFF) ||
10734 + (shutting_down == SHUTDOWN_HALT)) {
10735 + if (call_usermodehelper("/sbin/poweroff", poweroff_argv,
10736 + envp, 0) < 0) {
10737 +#ifdef CONFIG_XEN
10738 + sys_reboot(LINUX_REBOOT_MAGIC1,
10739 + LINUX_REBOOT_MAGIC2,
10740 + LINUX_REBOOT_CMD_POWER_OFF,
10741 + NULL);
10742 +#endif /* CONFIG_XEN */
10743 + }
10744 + }
10745 +
10746 + shutting_down = SHUTDOWN_INVALID; /* could try again */
10747 +
10748 + return 0;
10749 +}
10750 +
10751 +static void xen_resume_notifier(int _suspend_cancelled)
10752 +{
10753 + int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING);
10754 + BUG_ON(old_state != SHUTDOWN_SUSPEND);
10755 + suspend_cancelled = _suspend_cancelled;
10756 +}
10757 +
10758 +static int xen_suspend(void *__unused)
10759 +{
10760 + int err, old_state;
10761 +
10762 + daemonize("suspend");
10763 + err = set_cpus_allowed(current, cpumask_of_cpu(0));
10764 + if (err) {
10765 + printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err);
10766 + goto fail;
10767 + }
10768 +
10769 + do {
10770 + err = __xen_suspend(fast_suspend, xen_resume_notifier);
10771 + if (err) {
10772 + printk(KERN_ERR "Xen suspend failed (%d)\n", err);
10773 + goto fail;
10774 + }
10775 + if (!suspend_cancelled)
10776 + setup_suspend_evtchn();
10777 + old_state = cmpxchg(
10778 + &shutting_down, SHUTDOWN_RESUMING, SHUTDOWN_INVALID);
10779 + } while (old_state == SHUTDOWN_SUSPEND);
10780 +
10781 + switch (old_state) {
10782 + case SHUTDOWN_INVALID:
10783 + case SHUTDOWN_SUSPEND:
10784 + BUG();
10785 + case SHUTDOWN_RESUMING:
10786 + break;
10787 + default:
10788 + schedule_work(&shutdown_work);
10789 + break;
10790 + }
10791 +
10792 + return 0;
10793 +
10794 + fail:
10795 + old_state = xchg(&shutting_down, SHUTDOWN_INVALID);
10796 + BUG_ON(old_state != SHUTDOWN_SUSPEND);
10797 + return 0;
10798 +}
10799 +
10800 +static void switch_shutdown_state(int new_state)
10801 +{
10802 + int prev_state, old_state = SHUTDOWN_INVALID;
10803 +
10804 + /* We only drive shutdown_state into an active state. */
10805 + if (new_state == SHUTDOWN_INVALID)
10806 + return;
10807 +
10808 + do {
10809 + /* We drop this transition if already in an active state. */
10810 + if ((old_state != SHUTDOWN_INVALID) &&
10811 + (old_state != SHUTDOWN_RESUMING))
10812 + return;
10813 + /* Attempt to transition. */
10814 + prev_state = old_state;
10815 + old_state = cmpxchg(&shutting_down, old_state, new_state);
10816 + } while (old_state != prev_state);
10817 +
10818 + /* Either we kick off the work, or we leave it to xen_suspend(). */
10819 + if (old_state == SHUTDOWN_INVALID)
10820 + schedule_work(&shutdown_work);
10821 + else
10822 + BUG_ON(old_state != SHUTDOWN_RESUMING);
10823 +}
10824 +
10825 +static void __shutdown_handler(void *unused)
10826 +{
10827 + int err;
10828 +
10829 + err = kernel_thread((shutting_down == SHUTDOWN_SUSPEND) ?
10830 + xen_suspend : shutdown_process,
10831 + NULL, CLONE_FS | CLONE_FILES);
10832 +
10833 + if (err < 0) {
10834 + printk(KERN_WARNING "Error creating shutdown process (%d): "
10835 + "retrying...\n", -err);
10836 + schedule_delayed_work(&shutdown_work, HZ/2);
10837 + }
10838 +}
10839 +
10840 +static void shutdown_handler(struct xenbus_watch *watch,
10841 + const char **vec, unsigned int len)
10842 +{
10843 + extern void ctrl_alt_del(void);
10844 + char *str;
10845 + struct xenbus_transaction xbt;
10846 + int err, new_state = SHUTDOWN_INVALID;
10847 +
10848 + if ((shutting_down != SHUTDOWN_INVALID) &&
10849 + (shutting_down != SHUTDOWN_RESUMING))
10850 + return;
10851 +
10852 + again:
10853 + err = xenbus_transaction_start(&xbt);
10854 + if (err)
10855 + return;
10856 +
10857 + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
10858 + /* Ignore read errors and empty reads. */
10859 + if (XENBUS_IS_ERR_READ(str)) {
10860 + xenbus_transaction_end(xbt, 1);
10861 + return;
10862 + }
10863 +
10864 + xenbus_write(xbt, "control", "shutdown", "");
10865 +
10866 + err = xenbus_transaction_end(xbt, 0);
10867 + if (err == -EAGAIN) {
10868 + kfree(str);
10869 + goto again;
10870 + }
10871 +
10872 + if (strcmp(str, "poweroff") == 0)
10873 + new_state = SHUTDOWN_POWEROFF;
10874 + else if (strcmp(str, "reboot") == 0)
10875 + ctrl_alt_del();
10876 + else if (strcmp(str, "suspend") == 0)
10877 + new_state = SHUTDOWN_SUSPEND;
10878 + else if (strcmp(str, "halt") == 0)
10879 + new_state = SHUTDOWN_HALT;
10880 + else
10881 + printk("Ignoring shutdown request: %s\n", str);
10882 +
10883 + switch_shutdown_state(new_state);
10884 +
10885 + kfree(str);
10886 +}
10887 +
10888 +static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
10889 + unsigned int len)
10890 +{
10891 + char sysrq_key = '\0';
10892 + struct xenbus_transaction xbt;
10893 + int err;
10894 +
10895 + again:
10896 + err = xenbus_transaction_start(&xbt);
10897 + if (err)
10898 + return;
10899 + if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
10900 + printk(KERN_ERR "Unable to read sysrq code in "
10901 + "control/sysrq\n");
10902 + xenbus_transaction_end(xbt, 1);
10903 + return;
10904 + }
10905 +
10906 + if (sysrq_key != '\0')
10907 + xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
10908 +
10909 + err = xenbus_transaction_end(xbt, 0);
10910 + if (err == -EAGAIN)
10911 + goto again;
10912 +
10913 +#ifdef CONFIG_MAGIC_SYSRQ
10914 + if (sysrq_key != '\0')
10915 + handle_sysrq(sysrq_key, NULL, NULL);
10916 +#endif
10917 +}
10918 +
10919 +static struct xenbus_watch shutdown_watch = {
10920 + .node = "control/shutdown",
10921 + .callback = shutdown_handler
10922 +};
10923 +
10924 +static struct xenbus_watch sysrq_watch = {
10925 + .node = "control/sysrq",
10926 + .callback = sysrq_handler
10927 +};
10928 +
10929 +static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
10930 +{
10931 + switch_shutdown_state(SHUTDOWN_SUSPEND);
10932 + return IRQ_HANDLED;
10933 +}
10934 +
10935 +static int setup_suspend_evtchn(void)
10936 +{
10937 + static int irq;
10938 + int port;
10939 + char portstr[16];
10940 +
10941 + if (irq > 0)
10942 + unbind_from_irqhandler(irq, NULL);
10943 +
10944 + irq = bind_listening_port_to_irqhandler(0, suspend_int, 0, "suspend",
10945 + NULL);
10946 + if (irq <= 0)
10947 + return -1;
10948 +
10949 + port = irq_to_evtchn_port(irq);
10950 + printk(KERN_INFO "suspend: event channel %d\n", port);
10951 + sprintf(portstr, "%d", port);
10952 + xenbus_write(XBT_NIL, "device/suspend", "event-channel", portstr);
10953 +
10954 + return 0;
10955 +}
10956 +
10957 +static int setup_shutdown_watcher(void)
10958 +{
10959 + int err;
10960 +
10961 + xenbus_scanf(XBT_NIL, "control",
10962 + "platform-feature-multiprocessor-suspend",
10963 + "%d", &fast_suspend);
10964 +
10965 + err = register_xenbus_watch(&shutdown_watch);
10966 + if (err) {
10967 + printk(KERN_ERR "Failed to set shutdown watcher\n");
10968 + return err;
10969 + }
10970 +
10971 + err = register_xenbus_watch(&sysrq_watch);
10972 + if (err) {
10973 + printk(KERN_ERR "Failed to set sysrq watcher\n");
10974 + return err;
10975 + }
10976 +
10977 + /* suspend event channel */
10978 + err = setup_suspend_evtchn();
10979 + if (err) {
10980 + printk(KERN_ERR "Failed to register suspend event channel\n");
10981 + return err;
10982 + }
10983 +
10984 + return 0;
10985 +}
10986 +
10987 +#ifdef CONFIG_XEN
10988 +
10989 +static int shutdown_event(struct notifier_block *notifier,
10990 + unsigned long event,
10991 + void *data)
10992 +{
10993 + setup_shutdown_watcher();
10994 + return NOTIFY_DONE;
10995 +}
10996 +
10997 +static int __init setup_shutdown_event(void)
10998 +{
10999 + static struct notifier_block xenstore_notifier = {
11000 + .notifier_call = shutdown_event
11001 + };
11002 + register_xenstore_notifier(&xenstore_notifier);
11003 +
11004 + return 0;
11005 +}
11006 +
11007 +subsys_initcall(setup_shutdown_event);
11008 +
11009 +#else /* !defined(CONFIG_XEN) */
11010 +
11011 +int xen_reboot_init(void)
11012 +{
11013 + return setup_shutdown_watcher();
11014 +}
11015 +
11016 +#endif /* !defined(CONFIG_XEN) */
11017 Index: head-2008-11-25/drivers/xen/core/smpboot.c
11018 ===================================================================
11019 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
11020 +++ head-2008-11-25/drivers/xen/core/smpboot.c 2008-03-06 08:54:32.000000000 +0100
11021 @@ -0,0 +1,464 @@
11022 +/*
11023 + * Xen SMP booting functions
11024 + *
11025 + * See arch/i386/kernel/smpboot.c for copyright and credits for derived
11026 + * portions of this file.
11027 + */
11028 +
11029 +#include <linux/module.h>
11030 +#include <linux/init.h>
11031 +#include <linux/kernel.h>
11032 +#include <linux/mm.h>
11033 +#include <linux/sched.h>
11034 +#include <linux/kernel_stat.h>
11035 +#include <linux/smp_lock.h>
11036 +#include <linux/irq.h>
11037 +#include <linux/bootmem.h>
11038 +#include <linux/notifier.h>
11039 +#include <linux/cpu.h>
11040 +#include <linux/percpu.h>
11041 +#include <asm/desc.h>
11042 +#include <asm/arch_hooks.h>
11043 +#include <asm/pgalloc.h>
11044 +#include <xen/evtchn.h>
11045 +#include <xen/interface/vcpu.h>
11046 +#include <xen/cpu_hotplug.h>
11047 +#include <xen/xenbus.h>
11048 +
11049 +extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
11050 +extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
11051 +
11052 +extern int local_setup_timer(unsigned int cpu);
11053 +extern void local_teardown_timer(unsigned int cpu);
11054 +
11055 +extern void hypervisor_callback(void);
11056 +extern void failsafe_callback(void);
11057 +extern void system_call(void);
11058 +extern void smp_trap_init(trap_info_t *);
11059 +
11060 +/* Number of siblings per CPU package */
11061 +int smp_num_siblings = 1;
11062 +
11063 +cpumask_t cpu_online_map;
11064 +EXPORT_SYMBOL(cpu_online_map);
11065 +cpumask_t cpu_possible_map;
11066 +EXPORT_SYMBOL(cpu_possible_map);
11067 +cpumask_t cpu_initialized_map;
11068 +
11069 +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
11070 +EXPORT_SYMBOL(cpu_data);
11071 +
11072 +#ifdef CONFIG_HOTPLUG_CPU
11073 +DEFINE_PER_CPU(int, cpu_state) = { 0 };
11074 +#endif
11075 +
11076 +static DEFINE_PER_CPU(int, resched_irq);
11077 +static DEFINE_PER_CPU(int, callfunc_irq);
11078 +static char resched_name[NR_CPUS][15];
11079 +static char callfunc_name[NR_CPUS][15];
11080 +
11081 +u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
11082 +
11083 +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
11084 +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
11085 +EXPORT_SYMBOL(cpu_core_map);
11086 +
11087 +#if defined(__i386__)
11088 +u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
11089 +EXPORT_SYMBOL(x86_cpu_to_apicid);
11090 +#elif !defined(CONFIG_X86_IO_APIC)
11091 +unsigned int maxcpus = NR_CPUS;
11092 +#endif
11093 +
11094 +void __init prefill_possible_map(void)
11095 +{
11096 + int i, rc;
11097 +
11098 + for_each_possible_cpu(i)
11099 + if (i != smp_processor_id())
11100 + return;
11101 +
11102 + for (i = 0; i < NR_CPUS; i++) {
11103 + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
11104 + if (rc >= 0)
11105 + cpu_set(i, cpu_possible_map);
11106 + }
11107 +}
11108 +
11109 +void __init smp_alloc_memory(void)
11110 +{
11111 +}
11112 +
11113 +static inline void
11114 +set_cpu_sibling_map(unsigned int cpu)
11115 +{
11116 + cpu_data[cpu].phys_proc_id = cpu;
11117 + cpu_data[cpu].cpu_core_id = 0;
11118 +
11119 + cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
11120 + cpu_core_map[cpu] = cpumask_of_cpu(cpu);
11121 +
11122 + cpu_data[cpu].booted_cores = 1;
11123 +}
11124 +
11125 +static void
11126 +remove_siblinginfo(unsigned int cpu)
11127 +{
11128 + cpu_data[cpu].phys_proc_id = BAD_APICID;
11129 + cpu_data[cpu].cpu_core_id = BAD_APICID;
11130 +
11131 + cpus_clear(cpu_sibling_map[cpu]);
11132 + cpus_clear(cpu_core_map[cpu]);
11133 +
11134 + cpu_data[cpu].booted_cores = 0;
11135 +}
11136 +
11137 +static int __cpuinit xen_smp_intr_init(unsigned int cpu)
11138 +{
11139 + int rc;
11140 +
11141 + per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
11142 +
11143 + sprintf(resched_name[cpu], "resched%u", cpu);
11144 + rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
11145 + cpu,
11146 + smp_reschedule_interrupt,
11147 + SA_INTERRUPT,
11148 + resched_name[cpu],
11149 + NULL);
11150 + if (rc < 0)
11151 + goto fail;
11152 + per_cpu(resched_irq, cpu) = rc;
11153 +
11154 + sprintf(callfunc_name[cpu], "callfunc%u", cpu);
11155 + rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
11156 + cpu,
11157 + smp_call_function_interrupt,
11158 + SA_INTERRUPT,
11159 + callfunc_name[cpu],
11160 + NULL);
11161 + if (rc < 0)
11162 + goto fail;
11163 + per_cpu(callfunc_irq, cpu) = rc;
11164 +
11165 + if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
11166 + goto fail;
11167 +
11168 + return 0;
11169 +
11170 + fail:
11171 + if (per_cpu(resched_irq, cpu) >= 0)
11172 + unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
11173 + if (per_cpu(callfunc_irq, cpu) >= 0)
11174 + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
11175 + return rc;
11176 +}
11177 +
11178 +#ifdef CONFIG_HOTPLUG_CPU
11179 +static void xen_smp_intr_exit(unsigned int cpu)
11180 +{
11181 + if (cpu != 0)
11182 + local_teardown_timer(cpu);
11183 +
11184 + unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
11185 + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
11186 +}
11187 +#endif
11188 +
11189 +void __cpuinit cpu_bringup(void)
11190 +{
11191 + cpu_init();
11192 + identify_cpu(cpu_data + smp_processor_id());
11193 + touch_softlockup_watchdog();
11194 + preempt_disable();
11195 + local_irq_enable();
11196 +}
11197 +
11198 +static void __cpuinit cpu_bringup_and_idle(void)
11199 +{
11200 + cpu_bringup();
11201 + cpu_idle();
11202 +}
11203 +
11204 +static void __cpuinit cpu_initialize_context(unsigned int cpu)
11205 +{
11206 + /* vcpu_guest_context_t is too large to allocate on the stack.
11207 + * Hence we allocate statically and protect it with a lock */
11208 + static vcpu_guest_context_t ctxt;
11209 + static DEFINE_SPINLOCK(ctxt_lock);
11210 +
11211 + struct task_struct *idle = idle_task(cpu);
11212 +#ifdef __x86_64__
11213 + struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
11214 +#else
11215 + struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
11216 +#endif
11217 +
11218 + if (cpu_test_and_set(cpu, cpu_initialized_map))
11219 + return;
11220 +
11221 + spin_lock(&ctxt_lock);
11222 +
11223 + memset(&ctxt, 0, sizeof(ctxt));
11224 +
11225 + ctxt.flags = VGCF_IN_KERNEL;
11226 + ctxt.user_regs.ds = __USER_DS;
11227 + ctxt.user_regs.es = __USER_DS;
11228 + ctxt.user_regs.fs = 0;
11229 + ctxt.user_regs.gs = 0;
11230 + ctxt.user_regs.ss = __KERNEL_DS;
11231 + ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
11232 + ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
11233 +
11234 + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
11235 +
11236 + smp_trap_init(ctxt.trap_ctxt);
11237 +
11238 + ctxt.ldt_ents = 0;
11239 +
11240 + ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
11241 + ctxt.gdt_ents = gdt_descr->size / 8;
11242 +
11243 +#ifdef __i386__
11244 + ctxt.user_regs.cs = __KERNEL_CS;
11245 + ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
11246 +
11247 + ctxt.kernel_ss = __KERNEL_DS;
11248 + ctxt.kernel_sp = idle->thread.esp0;
11249 +
11250 + ctxt.event_callback_cs = __KERNEL_CS;
11251 + ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
11252 + ctxt.failsafe_callback_cs = __KERNEL_CS;
11253 + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
11254 +
11255 + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
11256 +#else /* __x86_64__ */
11257 + ctxt.user_regs.cs = __KERNEL_CS;
11258 + ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
11259 +
11260 + ctxt.kernel_ss = __KERNEL_DS;
11261 + ctxt.kernel_sp = idle->thread.rsp0;
11262 +
11263 + ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
11264 + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
11265 + ctxt.syscall_callback_eip = (unsigned long)system_call;
11266 +
11267 + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
11268 +
11269 + ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
11270 +#endif
11271 +
11272 + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
11273 + BUG();
11274 +
11275 + spin_unlock(&ctxt_lock);
11276 +}
11277 +
11278 +void __init smp_prepare_cpus(unsigned int max_cpus)
11279 +{
11280 + unsigned int cpu;
11281 + struct task_struct *idle;
11282 + int apicid, acpiid;
11283 + struct vcpu_get_physid cpu_id;
11284 +#ifdef __x86_64__
11285 + struct desc_ptr *gdt_descr;
11286 +#else
11287 + struct Xgt_desc_struct *gdt_descr;
11288 +#endif
11289 +
11290 + apicid = 0;
11291 + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) {
11292 + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
11293 + acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
11294 +#ifdef CONFIG_ACPI
11295 + if (acpiid != 0xff)
11296 + x86_acpiid_to_apicid[acpiid] = apicid;
11297 +#endif
11298 + }
11299 + boot_cpu_data.apicid = apicid;
11300 + cpu_data[0] = boot_cpu_data;
11301 +
11302 + cpu_2_logical_apicid[0] = apicid;
11303 + x86_cpu_to_apicid[0] = apicid;
11304 +
11305 + current_thread_info()->cpu = 0;
11306 +
11307 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
11308 + cpus_clear(cpu_sibling_map[cpu]);
11309 + cpus_clear(cpu_core_map[cpu]);
11310 + }
11311 +
11312 + set_cpu_sibling_map(0);
11313 +
11314 + if (xen_smp_intr_init(0))
11315 + BUG();
11316 +
11317 + cpu_initialized_map = cpumask_of_cpu(0);
11318 +
11319 + /* Restrict the possible_map according to max_cpus. */
11320 + while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
11321 + for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
11322 + continue;
11323 + cpu_clear(cpu, cpu_possible_map);
11324 + }
11325 +
11326 + for_each_possible_cpu (cpu) {
11327 + if (cpu == 0)
11328 + continue;
11329 +
11330 +#ifdef __x86_64__
11331 + gdt_descr = &cpu_gdt_descr[cpu];
11332 +#else
11333 + gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
11334 +#endif
11335 + gdt_descr->address = get_zeroed_page(GFP_KERNEL);
11336 + if (unlikely(!gdt_descr->address)) {
11337 + printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
11338 + cpu);
11339 + continue;
11340 + }
11341 + gdt_descr->size = GDT_SIZE;
11342 + memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
11343 + make_page_readonly(
11344 + (void *)gdt_descr->address,
11345 + XENFEAT_writable_descriptor_tables);
11346 +
11347 + apicid = cpu;
11348 + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
11349 + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
11350 + acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
11351 +#ifdef CONFIG_ACPI
11352 + if (acpiid != 0xff)
11353 + x86_acpiid_to_apicid[acpiid] = apicid;
11354 +#endif
11355 + }
11356 + cpu_data[cpu] = boot_cpu_data;
11357 + cpu_data[cpu].apicid = apicid;
11358 +
11359 + cpu_2_logical_apicid[cpu] = apicid;
11360 + x86_cpu_to_apicid[cpu] = apicid;
11361 +
11362 + idle = fork_idle(cpu);
11363 + if (IS_ERR(idle))
11364 + panic("failed fork for CPU %d", cpu);
11365 +
11366 +#ifdef __x86_64__
11367 + cpu_pda(cpu)->pcurrent = idle;
11368 + cpu_pda(cpu)->cpunumber = cpu;
11369 + clear_ti_thread_flag(idle->thread_info, TIF_FORK);
11370 +#endif
11371 +
11372 + irq_ctx_init(cpu);
11373 +
11374 +#ifdef CONFIG_HOTPLUG_CPU
11375 + if (is_initial_xendomain())
11376 + cpu_set(cpu, cpu_present_map);
11377 +#else
11378 + cpu_set(cpu, cpu_present_map);
11379 +#endif
11380 + }
11381 +
11382 + init_xenbus_allowed_cpumask();
11383 +
11384 +#ifdef CONFIG_X86_IO_APIC
11385 + /*
11386 + * Here we can be sure that there is an IO-APIC in the system. Let's
11387 + * go and set it up:
11388 + */
11389 + if (!skip_ioapic_setup && nr_ioapics)
11390 + setup_IO_APIC();
11391 +#endif
11392 +}
11393 +
11394 +void __devinit smp_prepare_boot_cpu(void)
11395 +{
11396 + prefill_possible_map();
11397 +}
11398 +
11399 +#ifdef CONFIG_HOTPLUG_CPU
11400 +
11401 +/*
11402 + * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
11403 + * But do it early enough to catch critical for_each_present_cpu() loops
11404 + * in i386-specific code.
11405 + */
11406 +static int __init initialize_cpu_present_map(void)
11407 +{
11408 + cpu_present_map = cpu_possible_map;
11409 + return 0;
11410 +}
11411 +core_initcall(initialize_cpu_present_map);
11412 +
11413 +int __cpu_disable(void)
11414 +{
11415 + cpumask_t map = cpu_online_map;
11416 + unsigned int cpu = smp_processor_id();
11417 +
11418 + if (cpu == 0)
11419 + return -EBUSY;
11420 +
11421 + remove_siblinginfo(cpu);
11422 +
11423 + cpu_clear(cpu, map);
11424 + fixup_irqs(map);
11425 + cpu_clear(cpu, cpu_online_map);
11426 +
11427 + return 0;
11428 +}
11429 +
11430 +void __cpu_die(unsigned int cpu)
11431 +{
11432 + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
11433 + current->state = TASK_UNINTERRUPTIBLE;
11434 + schedule_timeout(HZ/10);
11435 + }
11436 +
11437 + xen_smp_intr_exit(cpu);
11438 +
11439 + if (num_online_cpus() == 1)
11440 + alternatives_smp_switch(0);
11441 +}
11442 +
11443 +#endif /* CONFIG_HOTPLUG_CPU */
11444 +
11445 +int __cpuinit __cpu_up(unsigned int cpu)
11446 +{
11447 + int rc;
11448 +
11449 + rc = cpu_up_check(cpu);
11450 + if (rc)
11451 + return rc;
11452 +
11453 + cpu_initialize_context(cpu);
11454 +
11455 + if (num_online_cpus() == 1)
11456 + alternatives_smp_switch(1);
11457 +
11458 + /* This must be done before setting cpu_online_map */
11459 + set_cpu_sibling_map(cpu);
11460 + wmb();
11461 +
11462 + rc = xen_smp_intr_init(cpu);
11463 + if (rc) {
11464 + remove_siblinginfo(cpu);
11465 + return rc;
11466 + }
11467 +
11468 + cpu_set(cpu, cpu_online_map);
11469 +
11470 + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
11471 + BUG_ON(rc);
11472 +
11473 + return 0;
11474 +}
11475 +
11476 +void __init smp_cpus_done(unsigned int max_cpus)
11477 +{
11478 +}
11479 +
11480 +#ifndef CONFIG_X86_LOCAL_APIC
11481 +int setup_profiling_timer(unsigned int multiplier)
11482 +{
11483 + return -EINVAL;
11484 +}
11485 +#endif
11486 Index: head-2008-11-25/drivers/xen/core/xen_proc.c
11487 ===================================================================
11488 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
11489 +++ head-2008-11-25/drivers/xen/core/xen_proc.c 2007-06-12 13:13:44.000000000 +0200
11490 @@ -0,0 +1,23 @@
11491 +
11492 +#include <linux/module.h>
11493 +#include <linux/proc_fs.h>
11494 +#include <xen/xen_proc.h>
11495 +
11496 +static struct proc_dir_entry *xen_base;
11497 +
11498 +struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
11499 +{
11500 + if ( xen_base == NULL )
11501 + if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
11502 + panic("Couldn't create /proc/xen");
11503 + return create_proc_entry(name, mode, xen_base);
11504 +}
11505 +
11506 +EXPORT_SYMBOL_GPL(create_xen_proc_entry);
11507 +
11508 +void remove_xen_proc_entry(const char *name)
11509 +{
11510 + remove_proc_entry(name, xen_base);
11511 +}
11512 +
11513 +EXPORT_SYMBOL_GPL(remove_xen_proc_entry);
11514 Index: head-2008-11-25/drivers/xen/core/xen_sysfs.c
11515 ===================================================================
11516 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
11517 +++ head-2008-11-25/drivers/xen/core/xen_sysfs.c 2008-10-29 09:55:56.000000000 +0100
11518 @@ -0,0 +1,427 @@
11519 +/*
11520 + * copyright (c) 2006 IBM Corporation
11521 + * Authored by: Mike D. Day <ncmike@us.ibm.com>
11522 + *
11523 + * This program is free software; you can redistribute it and/or modify
11524 + * it under the terms of the GNU General Public License version 2 as
11525 + * published by the Free Software Foundation.
11526 + */
11527 +
11528 +#include <linux/err.h>
11529 +#include <linux/kernel.h>
11530 +#include <linux/module.h>
11531 +#include <linux/init.h>
11532 +#include <asm/hypervisor.h>
11533 +#include <xen/features.h>
11534 +#include <xen/hypervisor_sysfs.h>
11535 +#include <xen/xenbus.h>
11536 +#include <xen/interface/kexec.h>
11537 +
11538 +MODULE_LICENSE("GPL");
11539 +MODULE_AUTHOR("Mike D. Day <ncmike@us.ibm.com>");
11540 +
11541 +static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
11542 +{
11543 + return sprintf(buffer, "xen\n");
11544 +}
11545 +
11546 +HYPERVISOR_ATTR_RO(type);
11547 +
11548 +static int __init xen_sysfs_type_init(void)
11549 +{
11550 + return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
11551 +}
11552 +
11553 +static void xen_sysfs_type_destroy(void)
11554 +{
11555 + sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
11556 +}
11557 +
11558 +/* xen version attributes */
11559 +static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
11560 +{
11561 + int version = HYPERVISOR_xen_version(XENVER_version, NULL);
11562 + if (version)
11563 + return sprintf(buffer, "%d\n", version >> 16);
11564 + return -ENODEV;
11565 +}
11566 +
11567 +HYPERVISOR_ATTR_RO(major);
11568 +
11569 +static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
11570 +{
11571 + int version = HYPERVISOR_xen_version(XENVER_version, NULL);
11572 + if (version)
11573 + return sprintf(buffer, "%d\n", version & 0xff);
11574 + return -ENODEV;
11575 +}
11576 +
11577 +HYPERVISOR_ATTR_RO(minor);
11578 +
11579 +static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
11580 +{
11581 + int ret = -ENOMEM;
11582 + char *extra;
11583 +
11584 + extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
11585 + if (extra) {
11586 + ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
11587 + if (!ret)
11588 + ret = sprintf(buffer, "%s\n", extra);
11589 + kfree(extra);
11590 + }
11591 +
11592 + return ret;
11593 +}
11594 +
11595 +HYPERVISOR_ATTR_RO(extra);
11596 +
11597 +static struct attribute *version_attrs[] = {
11598 + &major_attr.attr,
11599 + &minor_attr.attr,
11600 + &extra_attr.attr,
11601 + NULL
11602 +};
11603 +
11604 +static struct attribute_group version_group = {
11605 + .name = "version",
11606 + .attrs = version_attrs,
11607 +};
11608 +
11609 +static int __init xen_sysfs_version_init(void)
11610 +{
11611 + return sysfs_create_group(&hypervisor_subsys.kset.kobj,
11612 + &version_group);
11613 +}
11614 +
11615 +static void xen_sysfs_version_destroy(void)
11616 +{
11617 + sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group);
11618 +}
11619 +
11620 +/* UUID */
11621 +
11622 +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
11623 +{
11624 + char *vm, *val;
11625 + int ret;
11626 + extern int xenstored_ready;
11627 +
11628 + if (!xenstored_ready)
11629 + return -EBUSY;
11630 +
11631 + vm = xenbus_read(XBT_NIL, "vm", "", NULL);
11632 + if (IS_ERR(vm))
11633 + return PTR_ERR(vm);
11634 + val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
11635 + kfree(vm);
11636 + if (IS_ERR(val))
11637 + return PTR_ERR(val);
11638 + ret = sprintf(buffer, "%s\n", val);
11639 + kfree(val);
11640 + return ret;
11641 +}
11642 +
11643 +HYPERVISOR_ATTR_RO(uuid);
11644 +
11645 +static int __init xen_sysfs_uuid_init(void)
11646 +{
11647 + return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
11648 +}
11649 +
11650 +static void xen_sysfs_uuid_destroy(void)
11651 +{
11652 + sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
11653 +}
11654 +
11655 +/* xen compilation attributes */
11656 +
11657 +static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
11658 +{
11659 + int ret = -ENOMEM;
11660 + struct xen_compile_info *info;
11661 +
11662 + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
11663 + if (info) {
11664 + ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
11665 + if (!ret)
11666 + ret = sprintf(buffer, "%s\n", info->compiler);
11667 + kfree(info);
11668 + }
11669 +
11670 + return ret;
11671 +}
11672 +
11673 +HYPERVISOR_ATTR_RO(compiler);
11674 +
11675 +static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
11676 +{
11677 + int ret = -ENOMEM;
11678 + struct xen_compile_info *info;
11679 +
11680 + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
11681 + if (info) {
11682 + ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
11683 + if (!ret)
11684 + ret = sprintf(buffer, "%s\n", info->compile_by);
11685 + kfree(info);
11686 + }
11687 +
11688 + return ret;
11689 +}
11690 +
11691 +HYPERVISOR_ATTR_RO(compiled_by);
11692 +
11693 +static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
11694 +{
11695 + int ret = -ENOMEM;
11696 + struct xen_compile_info *info;
11697 +
11698 + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
11699 + if (info) {
11700 + ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
11701 + if (!ret)
11702 + ret = sprintf(buffer, "%s\n", info->compile_date);
11703 + kfree(info);
11704 + }
11705 +
11706 + return ret;
11707 +}
11708 +
11709 +HYPERVISOR_ATTR_RO(compile_date);
11710 +
11711 +static struct attribute *xen_compile_attrs[] = {
11712 + &compiler_attr.attr,
11713 + &compiled_by_attr.attr,
11714 + &compile_date_attr.attr,
11715 + NULL
11716 +};
11717 +
11718 +static struct attribute_group xen_compilation_group = {
11719 + .name = "compilation",
11720 + .attrs = xen_compile_attrs,
11721 +};
11722 +
11723 +int __init static xen_compilation_init(void)
11724 +{
11725 + return sysfs_create_group(&hypervisor_subsys.kset.kobj,
11726 + &xen_compilation_group);
11727 +}
11728 +
11729 +static void xen_compilation_destroy(void)
11730 +{
11731 + sysfs_remove_group(&hypervisor_subsys.kset.kobj,
11732 + &xen_compilation_group);
11733 +}
11734 +
11735 +/* xen properties info */
11736 +
11737 +static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
11738 +{
11739 + int ret = -ENOMEM;
11740 + char *caps;
11741 +
11742 + caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
11743 + if (caps) {
11744 + ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
11745 + if (!ret)
11746 + ret = sprintf(buffer, "%s\n", caps);
11747 + kfree(caps);
11748 + }
11749 +
11750 + return ret;
11751 +}
11752 +
11753 +HYPERVISOR_ATTR_RO(capabilities);
11754 +
11755 +static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
11756 +{
11757 + int ret = -ENOMEM;
11758 + char *cset;
11759 +
11760 + cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
11761 + if (cset) {
11762 + ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
11763 + if (!ret)
11764 + ret = sprintf(buffer, "%s\n", cset);
11765 + kfree(cset);
11766 + }
11767 +
11768 + return ret;
11769 +}
11770 +
11771 +HYPERVISOR_ATTR_RO(changeset);
11772 +
11773 +static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
11774 +{
11775 + int ret = -ENOMEM;
11776 + struct xen_platform_parameters *parms;
11777 +
11778 + parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
11779 + if (parms) {
11780 + ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
11781 + parms);
11782 + if (!ret)
11783 + ret = sprintf(buffer, "%lx\n", parms->virt_start);
11784 + kfree(parms);
11785 + }
11786 +
11787 + return ret;
11788 +}
11789 +
11790 +HYPERVISOR_ATTR_RO(virtual_start);
11791 +
11792 +static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
11793 +{
11794 + int ret;
11795 +
11796 + ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
11797 + if (ret > 0)
11798 + ret = sprintf(buffer, "%x\n", ret);
11799 +
11800 + return ret;
11801 +}
11802 +
11803 +HYPERVISOR_ATTR_RO(pagesize);
11804 +
11805 +/* eventually there will be several more features to export */
11806 +static ssize_t xen_feature_show(int index, char *buffer)
11807 +{
11808 + int ret = -ENOMEM;
11809 + struct xen_feature_info *info;
11810 +
11811 + info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
11812 + if (info) {
11813 + info->submap_idx = index;
11814 + ret = HYPERVISOR_xen_version(XENVER_get_features, info);
11815 + if (!ret)
11816 + ret = sprintf(buffer, "%d\n", info->submap);
11817 + kfree(info);
11818 + }
11819 +
11820 + return ret;
11821 +}
11822 +
11823 +static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
11824 +{
11825 + return xen_feature_show(XENFEAT_writable_page_tables, buffer);
11826 +}
11827 +
11828 +HYPERVISOR_ATTR_RO(writable_pt);
11829 +
11830 +static struct attribute *xen_properties_attrs[] = {
11831 + &capabilities_attr.attr,
11832 + &changeset_attr.attr,
11833 + &virtual_start_attr.attr,
11834 + &pagesize_attr.attr,
11835 + &writable_pt_attr.attr,
11836 + NULL
11837 +};
11838 +
11839 +static struct attribute_group xen_properties_group = {
11840 + .name = "properties",
11841 + .attrs = xen_properties_attrs,
11842 +};
11843 +
11844 +static int __init xen_properties_init(void)
11845 +{
11846 + return sysfs_create_group(&hypervisor_subsys.kset.kobj,
11847 + &xen_properties_group);
11848 +}
11849 +
11850 +static void xen_properties_destroy(void)
11851 +{
11852 + sysfs_remove_group(&hypervisor_subsys.kset.kobj,
11853 + &xen_properties_group);
11854 +}
11855 +
11856 +#ifdef CONFIG_KEXEC
11857 +
11858 +extern size_t vmcoreinfo_size_xen;
11859 +extern unsigned long paddr_vmcoreinfo_xen;
11860 +
11861 +static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
11862 +{
11863 + return sprintf(page, "%lx %zx\n",
11864 + paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
11865 +}
11866 +
11867 +HYPERVISOR_ATTR_RO(vmcoreinfo);
11868 +
11869 +static int __init xen_sysfs_vmcoreinfo_init(void)
11870 +{
11871 + return sysfs_create_file(&hypervisor_subsys.kset.kobj,
11872 + &vmcoreinfo_attr.attr);
11873 +}
11874 +
11875 +static void xen_sysfs_vmcoreinfo_destroy(void)
11876 +{
11877 + sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr);
11878 +}
11879 +
11880 +#endif
11881 +
11882 +static int __init hyper_sysfs_init(void)
11883 +{
11884 + int ret;
11885 +
11886 + if (!is_running_on_xen())
11887 + return -ENODEV;
11888 +
11889 + ret = xen_sysfs_type_init();
11890 + if (ret)
11891 + goto out;
11892 + ret = xen_sysfs_version_init();
11893 + if (ret)
11894 + goto version_out;
11895 + ret = xen_compilation_init();
11896 + if (ret)
11897 + goto comp_out;
11898 + ret = xen_sysfs_uuid_init();
11899 + if (ret)
11900 + goto uuid_out;
11901 + ret = xen_properties_init();
11902 + if (ret)
11903 + goto prop_out;
11904 +#ifdef CONFIG_KEXEC
11905 + if (vmcoreinfo_size_xen != 0) {
11906 + ret = xen_sysfs_vmcoreinfo_init();
11907 + if (ret)
11908 + goto vmcoreinfo_out;
11909 + }
11910 +#endif
11911 +
11912 + goto out;
11913 +
11914 +#ifdef CONFIG_KEXEC
11915 +vmcoreinfo_out:
11916 +#endif
11917 + xen_properties_destroy();
11918 +prop_out:
11919 + xen_sysfs_uuid_destroy();
11920 +uuid_out:
11921 + xen_compilation_destroy();
11922 +comp_out:
11923 + xen_sysfs_version_destroy();
11924 +version_out:
11925 + xen_sysfs_type_destroy();
11926 +out:
11927 + return ret;
11928 +}
11929 +
11930 +static void __exit hyper_sysfs_exit(void)
11931 +{
11932 +#ifdef CONFIG_KEXEC
11933 + if (vmcoreinfo_size_xen != 0)
11934 + xen_sysfs_vmcoreinfo_destroy();
11935 +#endif
11936 + xen_properties_destroy();
11937 + xen_compilation_destroy();
11938 + xen_sysfs_uuid_destroy();
11939 + xen_sysfs_version_destroy();
11940 + xen_sysfs_type_destroy();
11941 +
11942 +}
11943 +
11944 +module_init(hyper_sysfs_init);
11945 +module_exit(hyper_sysfs_exit);
11946 Index: head-2008-11-25/drivers/xen/core/xencomm.c
11947 ===================================================================
11948 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
11949 +++ head-2008-11-25/drivers/xen/core/xencomm.c 2007-11-12 08:41:05.000000000 +0100
11950 @@ -0,0 +1,229 @@
11951 +/*
11952 + * This program is free software; you can redistribute it and/or modify
11953 + * it under the terms of the GNU General Public License as published by
11954 + * the Free Software Foundation; either version 2 of the License, or
11955 + * (at your option) any later version.
11956 + *
11957 + * This program is distributed in the hope that it will be useful,
11958 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11959 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11960 + * GNU General Public License for more details.
11961 + *
11962 + * You should have received a copy of the GNU General Public License
11963 + * along with this program; if not, write to the Free Software
11964 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
11965 + *
11966 + * Copyright (C) IBM Corp. 2006
11967 + *
11968 + * Authors: Hollis Blanchard <hollisb@us.ibm.com>
11969 + */
11970 +
11971 +#include <linux/gfp.h>
11972 +#include <linux/mm.h>
11973 +#include <asm/page.h>
11974 +#include <xen/xencomm.h>
11975 +#include <xen/interface/xen.h>
11976 +#ifdef __ia64__
11977 +#include <asm/xen/xencomm.h> /* for is_kern_addr() */
11978 +#endif
11979 +
11980 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
11981 +#include <xen/platform-compat.h>
11982 +#endif
11983 +
11984 +static int xencomm_init(struct xencomm_desc *desc,
11985 + void *buffer, unsigned long bytes)
11986 +{
11987 + unsigned long recorded = 0;
11988 + int i = 0;
11989 +
11990 + while ((recorded < bytes) && (i < desc->nr_addrs)) {
11991 + unsigned long vaddr = (unsigned long)buffer + recorded;
11992 + unsigned long paddr;
11993 + int offset;
11994 + int chunksz;
11995 +
11996 + offset = vaddr % PAGE_SIZE; /* handle partial pages */
11997 + chunksz = min(PAGE_SIZE - offset, bytes - recorded);
11998 +
11999 + paddr = xencomm_vtop(vaddr);
12000 + if (paddr == ~0UL) {
12001 + printk("%s: couldn't translate vaddr %lx\n",
12002 + __func__, vaddr);
12003 + return -EINVAL;
12004 + }
12005 +
12006 + desc->address[i++] = paddr;
12007 + recorded += chunksz;
12008 + }
12009 +
12010 + if (recorded < bytes) {
12011 + printk("%s: could only translate %ld of %ld bytes\n",
12012 + __func__, recorded, bytes);
12013 + return -ENOSPC;
12014 + }
12015 +
12016 + /* mark remaining addresses invalid (just for safety) */
12017 + while (i < desc->nr_addrs)
12018 + desc->address[i++] = XENCOMM_INVALID;
12019 +
12020 + desc->magic = XENCOMM_MAGIC;
12021 +
12022 + return 0;
12023 +}
12024 +
12025 +static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
12026 + void *buffer, unsigned long bytes)
12027 +{
12028 + struct xencomm_desc *desc;
12029 + unsigned long buffer_ulong = (unsigned long)buffer;
12030 + unsigned long start = buffer_ulong & PAGE_MASK;
12031 + unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
12032 + unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
12033 + unsigned long size = sizeof(*desc) +
12034 + sizeof(desc->address[0]) * nr_addrs;
12035 +
12036 + /*
12037 + * slab allocator returns at least sizeof(void*) aligned pointer.
12038 + * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
12039 + * cross page boundary.
12040 + */
12041 + if (sizeof(*desc) > sizeof(void*)) {
12042 + unsigned long order = get_order(size);
12043 + desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
12044 + order);
12045 + if (desc == NULL)
12046 + return NULL;
12047 +
12048 + desc->nr_addrs =
12049 + ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
12050 + sizeof(*desc->address);
12051 + } else {
12052 + desc = kmalloc(size, gfp_mask);
12053 + if (desc == NULL)
12054 + return NULL;
12055 +
12056 + desc->nr_addrs = nr_addrs;
12057 + }
12058 + return desc;
12059 +}
12060 +
12061 +void xencomm_free(struct xencomm_handle *desc)
12062 +{
12063 + if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
12064 + struct xencomm_desc *desc__ = (struct xencomm_desc*)desc;
12065 + if (sizeof(*desc__) > sizeof(void*)) {
12066 + unsigned long size = sizeof(*desc__) +
12067 + sizeof(desc__->address[0]) * desc__->nr_addrs;
12068 + unsigned long order = get_order(size);
12069 + free_pages((unsigned long)__va(desc), order);
12070 + } else
12071 + kfree(__va(desc));
12072 + }
12073 +}
12074 +
12075 +static int xencomm_create(void *buffer, unsigned long bytes, struct xencomm_desc **ret, gfp_t gfp_mask)
12076 +{
12077 + struct xencomm_desc *desc;
12078 + int rc;
12079 +
12080 + pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
12081 +
12082 + if (bytes == 0) {
12083 + /* don't create a descriptor; Xen recognizes NULL. */
12084 + BUG_ON(buffer != NULL);
12085 + *ret = NULL;
12086 + return 0;
12087 + }
12088 +
12089 + BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
12090 +
12091 + desc = xencomm_alloc(gfp_mask, buffer, bytes);
12092 + if (!desc) {
12093 + printk("%s failure\n", "xencomm_alloc");
12094 + return -ENOMEM;
12095 + }
12096 +
12097 + rc = xencomm_init(desc, buffer, bytes);
12098 + if (rc) {
12099 + printk("%s failure: %d\n", "xencomm_init", rc);
12100 + xencomm_free((struct xencomm_handle *)__pa(desc));
12101 + return rc;
12102 + }
12103 +
12104 + *ret = desc;
12105 + return 0;
12106 +}
12107 +
12108 +/* check if memory address is within VMALLOC region */
12109 +static int is_phys_contiguous(unsigned long addr)
12110 +{
12111 + if (!is_kernel_addr(addr))
12112 + return 0;
12113 +
12114 + return (addr < VMALLOC_START) || (addr >= VMALLOC_END);
12115 +}
12116 +
12117 +static struct xencomm_handle *xencomm_create_inline(void *ptr)
12118 +{
12119 + unsigned long paddr;
12120 +
12121 + BUG_ON(!is_phys_contiguous((unsigned long)ptr));
12122 +
12123 + paddr = (unsigned long)xencomm_pa(ptr);
12124 + BUG_ON(paddr & XENCOMM_INLINE_FLAG);
12125 + return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
12126 +}
12127 +
12128 +/* "mini" routine, for stack-based communications: */
12129 +static int xencomm_create_mini(void *buffer,
12130 + unsigned long bytes, struct xencomm_mini *xc_desc,
12131 + struct xencomm_desc **ret)
12132 +{
12133 + int rc = 0;
12134 + struct xencomm_desc *desc;
12135 + BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
12136 +
12137 + desc = (void *)xc_desc;
12138 +
12139 + desc->nr_addrs = XENCOMM_MINI_ADDRS;
12140 +
12141 + if (!(rc = xencomm_init(desc, buffer, bytes)))
12142 + *ret = desc;
12143 +
12144 + return rc;
12145 +}
12146 +
12147 +struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
12148 +{
12149 + int rc;
12150 + struct xencomm_desc *desc;
12151 +
12152 + if (is_phys_contiguous((unsigned long)ptr))
12153 + return xencomm_create_inline(ptr);
12154 +
12155 + rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
12156 +
12157 + if (rc || desc == NULL)
12158 + return NULL;
12159 +
12160 + return xencomm_pa(desc);
12161 +}
12162 +
12163 +struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
12164 + struct xencomm_mini *xc_desc)
12165 +{
12166 + int rc;
12167 + struct xencomm_desc *desc = NULL;
12168 +
12169 + if (is_phys_contiguous((unsigned long)ptr))
12170 + return xencomm_create_inline(ptr);
12171 +
12172 + rc = xencomm_create_mini(ptr, bytes, xc_desc,
12173 + &desc);
12174 +
12175 + if (rc)
12176 + return NULL;
12177 +
12178 + return xencomm_pa(desc);
12179 +}
12180 Index: head-2008-11-25/drivers/xen/evtchn/Makefile
12181 ===================================================================
12182 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
12183 +++ head-2008-11-25/drivers/xen/evtchn/Makefile 2007-06-12 13:13:44.000000000 +0200
12184 @@ -0,0 +1,2 @@
12185 +
12186 +obj-y := evtchn.o
12187 Index: head-2008-11-25/drivers/xen/evtchn/evtchn.c
12188 ===================================================================
12189 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
12190 +++ head-2008-11-25/drivers/xen/evtchn/evtchn.c 2008-08-07 12:44:36.000000000 +0200
12191 @@ -0,0 +1,560 @@
12192 +/******************************************************************************
12193 + * evtchn.c
12194 + *
12195 + * Driver for receiving and demuxing event-channel signals.
12196 + *
12197 + * Copyright (c) 2004-2005, K A Fraser
12198 + * Multi-process extensions Copyright (c) 2004, Steven Smith
12199 + *
12200 + * This program is free software; you can redistribute it and/or
12201 + * modify it under the terms of the GNU General Public License version 2
12202 + * as published by the Free Software Foundation; or, when distributed
12203 + * separately from the Linux kernel or incorporated into other
12204 + * software packages, subject to the following license:
12205 + *
12206 + * Permission is hereby granted, free of charge, to any person obtaining a copy
12207 + * of this source file (the "Software"), to deal in the Software without
12208 + * restriction, including without limitation the rights to use, copy, modify,
12209 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
12210 + * and to permit persons to whom the Software is furnished to do so, subject to
12211 + * the following conditions:
12212 + *
12213 + * The above copyright notice and this permission notice shall be included in
12214 + * all copies or substantial portions of the Software.
12215 + *
12216 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12217 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
12218 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
12219 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
12220 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12221 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
12222 + * IN THE SOFTWARE.
12223 + */
12224 +
12225 +#include <linux/module.h>
12226 +#include <linux/kernel.h>
12227 +#include <linux/sched.h>
12228 +#include <linux/slab.h>
12229 +#include <linux/string.h>
12230 +#include <linux/errno.h>
12231 +#include <linux/fs.h>
12232 +#include <linux/errno.h>
12233 +#include <linux/miscdevice.h>
12234 +#include <linux/major.h>
12235 +#include <linux/proc_fs.h>
12236 +#include <linux/stat.h>
12237 +#include <linux/poll.h>
12238 +#include <linux/irq.h>
12239 +#include <linux/init.h>
12240 +#include <linux/gfp.h>
12241 +#include <linux/mutex.h>
12242 +#include <linux/cpu.h>
12243 +#include <xen/evtchn.h>
12244 +#include <xen/public/evtchn.h>
12245 +
12246 +struct per_user_data {
12247 + /* Notification ring, accessed via /dev/xen/evtchn. */
12248 +#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
12249 +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
12250 + evtchn_port_t *ring;
12251 + unsigned int ring_cons, ring_prod, ring_overflow;
12252 + struct mutex ring_cons_mutex; /* protect against concurrent readers */
12253 +
12254 + /* Processes wait on this queue when ring is empty. */
12255 + wait_queue_head_t evtchn_wait;
12256 + struct fasync_struct *evtchn_async_queue;
12257 +
12258 + int bind_cpu;
12259 + int nr_event_wrong_delivery;
12260 +};
12261 +
12262 +/* Who's bound to each port? */
12263 +static struct per_user_data *port_user[NR_EVENT_CHANNELS];
12264 +static spinlock_t port_user_lock;
12265 +
12266 +void evtchn_device_upcall(int port)
12267 +{
12268 + struct per_user_data *u;
12269 +
12270 + spin_lock(&port_user_lock);
12271 +
12272 + mask_evtchn(port);
12273 + clear_evtchn(port);
12274 +
12275 + if ((u = port_user[port]) != NULL) {
12276 + if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
12277 + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
12278 + wmb(); /* Ensure ring contents visible */
12279 + if (u->ring_cons == u->ring_prod++) {
12280 + wake_up_interruptible(&u->evtchn_wait);
12281 + kill_fasync(&u->evtchn_async_queue,
12282 + SIGIO, POLL_IN);
12283 + }
12284 + } else {
12285 + u->ring_overflow = 1;
12286 + }
12287 + }
12288 +
12289 + spin_unlock(&port_user_lock);
12290 +}
12291 +
12292 +static void evtchn_check_wrong_delivery(struct per_user_data *u)
12293 +{
12294 + evtchn_port_t port;
12295 + unsigned int current_cpu = smp_processor_id();
12296 +
12297 + /* Delivered to correct CPU? All is good. */
12298 + if (u->bind_cpu == current_cpu) {
12299 + u->nr_event_wrong_delivery = 0;
12300 + return;
12301 + }
12302 +
12303 + /* Tolerate up to 100 consecutive misdeliveries. */
12304 + if (++u->nr_event_wrong_delivery < 100)
12305 + return;
12306 +
12307 + spin_lock_irq(&port_user_lock);
12308 +
12309 + for (port = 0; port < NR_EVENT_CHANNELS; port++)
12310 + if (port_user[port] == u)
12311 + rebind_evtchn_to_cpu(port, current_cpu);
12312 +
12313 + u->bind_cpu = current_cpu;
12314 + u->nr_event_wrong_delivery = 0;
12315 +
12316 + spin_unlock_irq(&port_user_lock);
12317 +}
12318 +
12319 +static ssize_t evtchn_read(struct file *file, char __user *buf,
12320 + size_t count, loff_t *ppos)
12321 +{
12322 + int rc;
12323 + unsigned int c, p, bytes1 = 0, bytes2 = 0;
12324 + struct per_user_data *u = file->private_data;
12325 +
12326 + /* Whole number of ports. */
12327 + count &= ~(sizeof(evtchn_port_t)-1);
12328 +
12329 + if (count == 0)
12330 + return 0;
12331 +
12332 + if (count > PAGE_SIZE)
12333 + count = PAGE_SIZE;
12334 +
12335 + for (;;) {
12336 + mutex_lock(&u->ring_cons_mutex);
12337 +
12338 + rc = -EFBIG;
12339 + if (u->ring_overflow)
12340 + goto unlock_out;
12341 +
12342 + if ((c = u->ring_cons) != (p = u->ring_prod))
12343 + break;
12344 +
12345 + mutex_unlock(&u->ring_cons_mutex);
12346 +
12347 + if (file->f_flags & O_NONBLOCK)
12348 + return -EAGAIN;
12349 +
12350 + rc = wait_event_interruptible(
12351 + u->evtchn_wait, u->ring_cons != u->ring_prod);
12352 + if (rc)
12353 + return rc;
12354 + }
12355 +
12356 + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
12357 + if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
12358 + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
12359 + sizeof(evtchn_port_t);
12360 + bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
12361 + } else {
12362 + bytes1 = (p - c) * sizeof(evtchn_port_t);
12363 + bytes2 = 0;
12364 + }
12365 +
12366 + /* Truncate chunks according to caller's maximum byte count. */
12367 + if (bytes1 > count) {
12368 + bytes1 = count;
12369 + bytes2 = 0;
12370 + } else if ((bytes1 + bytes2) > count) {
12371 + bytes2 = count - bytes1;
12372 + }
12373 +
12374 + rc = -EFAULT;
12375 + rmb(); /* Ensure that we see the port before we copy it. */
12376 + if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
12377 + ((bytes2 != 0) &&
12378 + copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
12379 + goto unlock_out;
12380 +
12381 + evtchn_check_wrong_delivery(u);
12382 +
12383 + u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
12384 + rc = bytes1 + bytes2;
12385 +
12386 + unlock_out:
12387 + mutex_unlock(&u->ring_cons_mutex);
12388 + return rc;
12389 +}
12390 +
12391 +static ssize_t evtchn_write(struct file *file, const char __user *buf,
12392 + size_t count, loff_t *ppos)
12393 +{
12394 + int rc, i;
12395 + evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
12396 + struct per_user_data *u = file->private_data;
12397 +
12398 + if (kbuf == NULL)
12399 + return -ENOMEM;
12400 +
12401 + /* Whole number of ports. */
12402 + count &= ~(sizeof(evtchn_port_t)-1);
12403 +
12404 + rc = 0;
12405 + if (count == 0)
12406 + goto out;
12407 +
12408 + if (count > PAGE_SIZE)
12409 + count = PAGE_SIZE;
12410 +
12411 + rc = -EFAULT;
12412 + if (copy_from_user(kbuf, buf, count) != 0)
12413 + goto out;
12414 +
12415 + spin_lock_irq(&port_user_lock);
12416 + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
12417 + if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
12418 + unmask_evtchn(kbuf[i]);
12419 + spin_unlock_irq(&port_user_lock);
12420 +
12421 + rc = count;
12422 +
12423 + out:
12424 + free_page((unsigned long)kbuf);
12425 + return rc;
12426 +}
12427 +
12428 +static unsigned int next_bind_cpu(cpumask_t map)
12429 +{
12430 + static unsigned int bind_cpu;
12431 + bind_cpu = next_cpu(bind_cpu, map);
12432 + if (bind_cpu >= NR_CPUS)
12433 + bind_cpu = first_cpu(map);
12434 + return bind_cpu;
12435 +}
12436 +
12437 +static void evtchn_bind_to_user(struct per_user_data *u, int port)
12438 +{
12439 + spin_lock_irq(&port_user_lock);
12440 +
12441 + BUG_ON(port_user[port] != NULL);
12442 + port_user[port] = u;
12443 +
12444 + if (u->bind_cpu == -1)
12445 + u->bind_cpu = next_bind_cpu(cpu_online_map);
12446 +
12447 + rebind_evtchn_to_cpu(port, u->bind_cpu);
12448 +
12449 + unmask_evtchn(port);
12450 +
12451 + spin_unlock_irq(&port_user_lock);
12452 +}
12453 +
12454 +static long evtchn_ioctl(struct file *file,
12455 + unsigned int cmd, unsigned long arg)
12456 +{
12457 + int rc;
12458 + struct per_user_data *u = file->private_data;
12459 + void __user *uarg = (void __user *) arg;
12460 +
12461 + switch (cmd) {
12462 + case IOCTL_EVTCHN_BIND_VIRQ: {
12463 + struct ioctl_evtchn_bind_virq bind;
12464 + struct evtchn_bind_virq bind_virq;
12465 +
12466 + rc = -EFAULT;
12467 + if (copy_from_user(&bind, uarg, sizeof(bind)))
12468 + break;
12469 +
12470 + bind_virq.virq = bind.virq;
12471 + bind_virq.vcpu = 0;
12472 + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
12473 + &bind_virq);
12474 + if (rc != 0)
12475 + break;
12476 +
12477 + rc = bind_virq.port;
12478 + evtchn_bind_to_user(u, rc);
12479 + break;
12480 + }
12481 +
12482 + case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
12483 + struct ioctl_evtchn_bind_interdomain bind;
12484 + struct evtchn_bind_interdomain bind_interdomain;
12485 +
12486 + rc = -EFAULT;
12487 + if (copy_from_user(&bind, uarg, sizeof(bind)))
12488 + break;
12489 +
12490 + bind_interdomain.remote_dom = bind.remote_domain;
12491 + bind_interdomain.remote_port = bind.remote_port;
12492 + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
12493 + &bind_interdomain);
12494 + if (rc != 0)
12495 + break;
12496 +
12497 + rc = bind_interdomain.local_port;
12498 + evtchn_bind_to_user(u, rc);
12499 + break;
12500 + }
12501 +
12502 + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
12503 + struct ioctl_evtchn_bind_unbound_port bind;
12504 + struct evtchn_alloc_unbound alloc_unbound;
12505 +
12506 + rc = -EFAULT;
12507 + if (copy_from_user(&bind, uarg, sizeof(bind)))
12508 + break;
12509 +
12510 + alloc_unbound.dom = DOMID_SELF;
12511 + alloc_unbound.remote_dom = bind.remote_domain;
12512 + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
12513 + &alloc_unbound);
12514 + if (rc != 0)
12515 + break;
12516 +
12517 + rc = alloc_unbound.port;
12518 + evtchn_bind_to_user(u, rc);
12519 + break;
12520 + }
12521 +
12522 + case IOCTL_EVTCHN_UNBIND: {
12523 + struct ioctl_evtchn_unbind unbind;
12524 + struct evtchn_close close;
12525 + int ret;
12526 +
12527 + rc = -EFAULT;
12528 + if (copy_from_user(&unbind, uarg, sizeof(unbind)))
12529 + break;
12530 +
12531 + rc = -EINVAL;
12532 + if (unbind.port >= NR_EVENT_CHANNELS)
12533 + break;
12534 +
12535 + spin_lock_irq(&port_user_lock);
12536 +
12537 + rc = -ENOTCONN;
12538 + if (port_user[unbind.port] != u) {
12539 + spin_unlock_irq(&port_user_lock);
12540 + break;
12541 + }
12542 +
12543 + port_user[unbind.port] = NULL;
12544 + mask_evtchn(unbind.port);
12545 + rebind_evtchn_to_cpu(unbind.port, 0);
12546 +
12547 + spin_unlock_irq(&port_user_lock);
12548 +
12549 + close.port = unbind.port;
12550 + ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
12551 + BUG_ON(ret);
12552 +
12553 + rc = 0;
12554 + break;
12555 + }
12556 +
12557 + case IOCTL_EVTCHN_NOTIFY: {
12558 + struct ioctl_evtchn_notify notify;
12559 +
12560 + rc = -EFAULT;
12561 + if (copy_from_user(&notify, uarg, sizeof(notify)))
12562 + break;
12563 +
12564 + if (notify.port >= NR_EVENT_CHANNELS) {
12565 + rc = -EINVAL;
12566 + } else if (port_user[notify.port] != u) {
12567 + rc = -ENOTCONN;
12568 + } else {
12569 + notify_remote_via_evtchn(notify.port);
12570 + rc = 0;
12571 + }
12572 + break;
12573 + }
12574 +
12575 + case IOCTL_EVTCHN_RESET: {
12576 + /* Initialise the ring to empty. Clear errors. */
12577 + mutex_lock(&u->ring_cons_mutex);
12578 + spin_lock_irq(&port_user_lock);
12579 + u->ring_cons = u->ring_prod = u->ring_overflow = 0;
12580 + spin_unlock_irq(&port_user_lock);
12581 + mutex_unlock(&u->ring_cons_mutex);
12582 + rc = 0;
12583 + break;
12584 + }
12585 +
12586 + default:
12587 + rc = -ENOSYS;
12588 + break;
12589 + }
12590 +
12591 + return rc;
12592 +}
12593 +
12594 +static unsigned int evtchn_poll(struct file *file, poll_table *wait)
12595 +{
12596 + unsigned int mask = POLLOUT | POLLWRNORM;
12597 + struct per_user_data *u = file->private_data;
12598 +
12599 + poll_wait(file, &u->evtchn_wait, wait);
12600 + if (u->ring_cons != u->ring_prod)
12601 + mask |= POLLIN | POLLRDNORM;
12602 + if (u->ring_overflow)
12603 + mask = POLLERR;
12604 + return mask;
12605 +}
12606 +
12607 +static int evtchn_fasync(int fd, struct file *filp, int on)
12608 +{
12609 + struct per_user_data *u = filp->private_data;
12610 + return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
12611 +}
12612 +
12613 +static int evtchn_open(struct inode *inode, struct file *filp)
12614 +{
12615 + struct per_user_data *u;
12616 +
12617 + if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL)
12618 + return -ENOMEM;
12619 +
12620 + memset(u, 0, sizeof(*u));
12621 + init_waitqueue_head(&u->evtchn_wait);
12622 +
12623 + u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
12624 + if (u->ring == NULL) {
12625 + kfree(u);
12626 + return -ENOMEM;
12627 + }
12628 +
12629 + mutex_init(&u->ring_cons_mutex);
12630 +
12631 + filp->private_data = u;
12632 +
12633 + u->bind_cpu = -1;
12634 +
12635 + return 0;
12636 +}
12637 +
12638 +static int evtchn_release(struct inode *inode, struct file *filp)
12639 +{
12640 + int i;
12641 + struct per_user_data *u = filp->private_data;
12642 + struct evtchn_close close;
12643 +
12644 + spin_lock_irq(&port_user_lock);
12645 +
12646 + free_page((unsigned long)u->ring);
12647 +
12648 + for (i = 0; i < NR_EVENT_CHANNELS; i++) {
12649 + int ret;
12650 + if (port_user[i] != u)
12651 + continue;
12652 +
12653 + port_user[i] = NULL;
12654 + mask_evtchn(i);
12655 + rebind_evtchn_to_cpu(i, 0);
12656 +
12657 + close.port = i;
12658 + ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
12659 + BUG_ON(ret);
12660 + }
12661 +
12662 + spin_unlock_irq(&port_user_lock);
12663 +
12664 + kfree(u);
12665 +
12666 + return 0;
12667 +}
12668 +
12669 +static const struct file_operations evtchn_fops = {
12670 + .owner = THIS_MODULE,
12671 + .read = evtchn_read,
12672 + .write = evtchn_write,
12673 + .unlocked_ioctl = evtchn_ioctl,
12674 + .poll = evtchn_poll,
12675 + .fasync = evtchn_fasync,
12676 + .open = evtchn_open,
12677 + .release = evtchn_release,
12678 +};
12679 +
12680 +static struct miscdevice evtchn_miscdev = {
12681 + .minor = MISC_DYNAMIC_MINOR,
12682 + .name = "evtchn",
12683 + .fops = &evtchn_fops,
12684 +};
12685 +
12686 +static int __cpuinit evtchn_cpu_notify(struct notifier_block *nfb,
12687 + unsigned long action, void *hcpu)
12688 +{
12689 + int hotcpu = (unsigned long)hcpu;
12690 + cpumask_t map = cpu_online_map;
12691 + int port, newcpu;
12692 + struct per_user_data *u;
12693 +
12694 + switch (action) {
12695 + case CPU_DOWN_PREPARE:
12696 + cpu_clear(hotcpu, map);
12697 + spin_lock_irq(&port_user_lock);
12698 + for (port = 0; port < NR_EVENT_CHANNELS; port++) {
12699 + if ((u = port_user[port]) != NULL &&
12700 + u->bind_cpu == hotcpu &&
12701 + (newcpu = next_bind_cpu(map)) < NR_CPUS) {
12702 + rebind_evtchn_to_cpu(port, newcpu);
12703 + u->bind_cpu = newcpu;
12704 + }
12705 + }
12706 + spin_unlock_irq(&port_user_lock);
12707 + break;
12708 + default:
12709 + return NOTIFY_DONE;
12710 + }
12711 + return NOTIFY_OK;
12712 +}
12713 +
12714 +static struct notifier_block __cpuinitdata evtchn_cpu_nfb = {
12715 + .notifier_call = evtchn_cpu_notify
12716 +};
12717 +
12718 +static int __init evtchn_init(void)
12719 +{
12720 + int err;
12721 +
12722 + if (!is_running_on_xen())
12723 + return -ENODEV;
12724 +
12725 + spin_lock_init(&port_user_lock);
12726 + memset(port_user, 0, sizeof(port_user));
12727 +
12728 + /* Create '/dev/misc/evtchn'. */
12729 + err = misc_register(&evtchn_miscdev);
12730 + if (err != 0) {
12731 + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
12732 + return err;
12733 + }
12734 +
12735 + register_cpu_notifier(&evtchn_cpu_nfb);
12736 +
12737 + printk("Event-channel device installed.\n");
12738 +
12739 + return 0;
12740 +}
12741 +
12742 +static void __exit evtchn_cleanup(void)
12743 +{
12744 + misc_deregister(&evtchn_miscdev);
12745 + unregister_cpu_notifier(&evtchn_cpu_nfb);
12746 +}
12747 +
12748 +module_init(evtchn_init);
12749 +module_exit(evtchn_cleanup);
12750 +
12751 +MODULE_LICENSE("Dual BSD/GPL");
12752 Index: head-2008-11-25/drivers/xen/fbfront/Makefile
12753 ===================================================================
12754 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
12755 +++ head-2008-11-25/drivers/xen/fbfront/Makefile 2007-06-12 13:13:45.000000000 +0200
12756 @@ -0,0 +1,2 @@
12757 +obj-$(CONFIG_XEN_FRAMEBUFFER) := xenfb.o
12758 +obj-$(CONFIG_XEN_KEYBOARD) += xenkbd.o
12759 Index: head-2008-11-25/drivers/xen/fbfront/xenfb.c
12760 ===================================================================
12761 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
12762 +++ head-2008-11-25/drivers/xen/fbfront/xenfb.c 2008-11-25 12:22:34.000000000 +0100
12763 @@ -0,0 +1,887 @@
12764 +/*
12765 + * linux/drivers/video/xenfb.c -- Xen para-virtual frame buffer device
12766 + *
12767 + * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com>
12768 + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
12769 + *
12770 + * Based on linux/drivers/video/q40fb.c
12771 + *
12772 + * This file is subject to the terms and conditions of the GNU General Public
12773 + * License. See the file COPYING in the main directory of this archive for
12774 + * more details.
12775 + */
12776 +
12777 +/*
12778 + * TODO:
12779 + *
12780 + * Switch to grant tables when they become capable of dealing with the
12781 + * frame buffer.
12782 + */
12783 +
12784 +#include <linux/kernel.h>
12785 +#include <linux/errno.h>
12786 +#include <linux/fb.h>
12787 +#include <linux/module.h>
12788 +#include <linux/vmalloc.h>
12789 +#include <linux/mm.h>
12790 +#include <linux/mutex.h>
12791 +#include <asm/hypervisor.h>
12792 +#include <xen/evtchn.h>
12793 +#include <xen/interface/io/fbif.h>
12794 +#include <xen/interface/io/protocols.h>
12795 +#include <xen/xenbus.h>
12796 +#include <linux/kthread.h>
12797 +
12798 +struct xenfb_mapping
12799 +{
12800 + struct list_head link;
12801 + struct vm_area_struct *vma;
12802 + atomic_t map_refs;
12803 + int faults;
12804 + struct xenfb_info *info;
12805 +};
12806 +
12807 +struct xenfb_info
12808 +{
12809 + struct task_struct *kthread;
12810 + wait_queue_head_t wq;
12811 +
12812 + unsigned char *fb;
12813 + struct fb_info *fb_info;
12814 + struct timer_list refresh;
12815 + int dirty;
12816 + int x1, y1, x2, y2; /* dirty rectangle,
12817 + protected by dirty_lock */
12818 + spinlock_t dirty_lock;
12819 + struct mutex mm_lock;
12820 + int nr_pages;
12821 + struct page **pages;
12822 + struct list_head mappings; /* protected by mm_lock */
12823 +
12824 + int irq;
12825 + struct xenfb_page *page;
12826 + unsigned long *mfns;
12827 + int update_wanted; /* XENFB_TYPE_UPDATE wanted */
12828 + int feature_resize; /* Backend has resize feature */
12829 + struct xenfb_resize resize;
12830 + int resize_dpy;
12831 + spinlock_t resize_lock;
12832 +
12833 + struct xenbus_device *xbdev;
12834 +};
12835 +
12836 +/*
12837 + * There are three locks:
12838 + * spinlock resize_lock protecting resize_dpy and resize
12839 + * spinlock dirty_lock protecting the dirty rectangle
12840 + * mutex mm_lock protecting mappings.
12841 + *
12842 + * How the dirty and mapping locks work together
12843 + *
12844 + * The problem is that dirty rectangle and mappings aren't
12845 + * independent: the dirty rectangle must cover all faulted pages in
12846 + * mappings. We need to prove that our locking maintains this
12847 + * invariant.
12848 + *
12849 + * There are several kinds of critical regions:
12850 + *
12851 + * 1. Holding only dirty_lock: xenfb_refresh(). May run in
12852 + * interrupts. Extends the dirty rectangle. Trivially preserves
12853 + * invariant.
12854 + *
12855 + * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close(). Touch
12856 + * only mappings. The former creates unfaulted pages. Preserves
12857 + * invariant. The latter removes pages. Preserves invariant.
12858 + *
12859 + * 3. Holding both locks: xenfb_vm_nopage(). Extends the dirty
12860 + * rectangle and updates mappings consistently. Preserves
12861 + * invariant.
12862 + *
12863 + * 4. The ugliest one: xenfb_update_screen(). Clear the dirty
12864 + * rectangle and update mappings consistently.
12865 + *
12866 + * We can't simply hold both locks, because zap_page_range() cannot
12867 + * be called with a spinlock held.
12868 + *
12869 + * Therefore, we first clear the dirty rectangle with both locks
12870 + * held. Then we unlock dirty_lock and update the mappings.
12871 + * Critical regions that hold only dirty_lock may interfere with
12872 + * that. This can only be region 1: xenfb_refresh(). But that
12873 + * just extends the dirty rectangle, which can't harm the
12874 + * invariant.
12875 + *
12876 + * But FIXME: the invariant is too weak. It misses that the fault
12877 + * record in mappings must be consistent with the mapping of pages in
12878 + * the associated address space! do_no_page() updates the PTE after
12879 + * xenfb_vm_nopage() returns, i.e. outside the critical region. This
12880 + * allows the following race:
12881 + *
12882 + * X writes to some address in the Xen frame buffer
12883 + * Fault - call do_no_page()
12884 + * call xenfb_vm_nopage()
12885 + * grab mm_lock
12886 + * map->faults++;
12887 + * release mm_lock
12888 + * return back to do_no_page()
12889 + * (preempted, or SMP)
12890 + * Xen worker thread runs.
12891 + * grab mm_lock
12892 + * look at mappings
12893 + * find this mapping, zaps its pages (but page not in pte yet)
12894 + * clear map->faults
12895 + * releases mm_lock
12896 + * (back to X process)
12897 + * put page in X's pte
12898 + *
12899 + * Oh well, we wont be updating the writes to this page anytime soon.
12900 + */
12901 +#define MB_ (1024*1024)
12902 +#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8)
12903 +
12904 +enum {KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT};
12905 +static int video[KPARAM_CNT] = {2, XENFB_WIDTH, XENFB_HEIGHT};
12906 +module_param_array(video, int, NULL, 0);
12907 +MODULE_PARM_DESC(video,
12908 + "Size of video memory in MB and width,height in pixels, default = (2,800,600)");
12909 +
12910 +static int xenfb_fps = 20;
12911 +
12912 +static int xenfb_remove(struct xenbus_device *);
12913 +static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
12914 +static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
12915 +static void xenfb_disconnect_backend(struct xenfb_info *);
12916 +
12917 +static void xenfb_send_event(struct xenfb_info *info,
12918 + union xenfb_out_event *event)
12919 +{
12920 + __u32 prod;
12921 +
12922 + prod = info->page->out_prod;
12923 + /* caller ensures !xenfb_queue_full() */
12924 + mb(); /* ensure ring space available */
12925 + XENFB_OUT_RING_REF(info->page, prod) = *event;
12926 + wmb(); /* ensure ring contents visible */
12927 + info->page->out_prod = prod + 1;
12928 +
12929 + notify_remote_via_irq(info->irq);
12930 +}
12931 +
12932 +static void xenfb_do_update(struct xenfb_info *info,
12933 + int x, int y, int w, int h)
12934 +{
12935 + union xenfb_out_event event;
12936 +
12937 + memset(&event, 0, sizeof(event));
12938 + event.type = XENFB_TYPE_UPDATE;
12939 + event.update.x = x;
12940 + event.update.y = y;
12941 + event.update.width = w;
12942 + event.update.height = h;
12943 +
12944 + /* caller ensures !xenfb_queue_full() */
12945 + xenfb_send_event(info, &event);
12946 +}
12947 +
12948 +static void xenfb_do_resize(struct xenfb_info *info)
12949 +{
12950 + union xenfb_out_event event;
12951 +
12952 + memset(&event, 0, sizeof(event));
12953 + event.resize = info->resize;
12954 +
12955 + /* caller ensures !xenfb_queue_full() */
12956 + xenfb_send_event(info, &event);
12957 +}
12958 +
12959 +static int xenfb_queue_full(struct xenfb_info *info)
12960 +{
12961 + __u32 cons, prod;
12962 +
12963 + prod = info->page->out_prod;
12964 + cons = info->page->out_cons;
12965 + return prod - cons == XENFB_OUT_RING_LEN;
12966 +}
12967 +
12968 +static void xenfb_update_screen(struct xenfb_info *info)
12969 +{
12970 + unsigned long flags;
12971 + int y1, y2, x1, x2;
12972 + struct xenfb_mapping *map;
12973 +
12974 + if (!info->update_wanted)
12975 + return;
12976 + if (xenfb_queue_full(info))
12977 + return;
12978 +
12979 + mutex_lock(&info->mm_lock);
12980 +
12981 + spin_lock_irqsave(&info->dirty_lock, flags);
12982 + y1 = info->y1;
12983 + y2 = info->y2;
12984 + x1 = info->x1;
12985 + x2 = info->x2;
12986 + info->x1 = info->y1 = INT_MAX;
12987 + info->x2 = info->y2 = 0;
12988 + spin_unlock_irqrestore(&info->dirty_lock, flags);
12989 +
12990 + list_for_each_entry(map, &info->mappings, link) {
12991 + if (!map->faults)
12992 + continue;
12993 + zap_page_range(map->vma, map->vma->vm_start,
12994 + map->vma->vm_end - map->vma->vm_start, NULL);
12995 + map->faults = 0;
12996 + }
12997 +
12998 + mutex_unlock(&info->mm_lock);
12999 +
13000 + if (x2 < x1 || y2 < y1) {
13001 + printk("xenfb_update_screen bogus rect %d %d %d %d\n",
13002 + x1, x2, y1, y2);
13003 + WARN_ON(1);
13004 + }
13005 + xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1);
13006 +}
13007 +
13008 +static void xenfb_handle_resize_dpy(struct xenfb_info *info)
13009 +{
13010 + unsigned long flags;
13011 +
13012 + spin_lock_irqsave(&info->resize_lock, flags);
13013 + if (info->resize_dpy) {
13014 + if (!xenfb_queue_full(info)) {
13015 + info->resize_dpy = 0;
13016 + xenfb_do_resize(info);
13017 + }
13018 + }
13019 + spin_unlock_irqrestore(&info->resize_lock, flags);
13020 +}
13021 +
13022 +static int xenfb_thread(void *data)
13023 +{
13024 + struct xenfb_info *info = data;
13025 +
13026 + while (!kthread_should_stop()) {
13027 + xenfb_handle_resize_dpy(info);
13028 + if (info->dirty) {
13029 + info->dirty = 0;
13030 + xenfb_update_screen(info);
13031 + }
13032 + wait_event_interruptible(info->wq,
13033 + kthread_should_stop() || info->dirty);
13034 + try_to_freeze();
13035 + }
13036 + return 0;
13037 +}
13038 +
13039 +static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green,
13040 + unsigned blue, unsigned transp,
13041 + struct fb_info *info)
13042 +{
13043 + u32 v;
13044 +
13045 + if (regno > info->cmap.len)
13046 + return 1;
13047 +
13048 + red >>= (16 - info->var.red.length);
13049 + green >>= (16 - info->var.green.length);
13050 + blue >>= (16 - info->var.blue.length);
13051 +
13052 + v = (red << info->var.red.offset) |
13053 + (green << info->var.green.offset) |
13054 + (blue << info->var.blue.offset);
13055 +
13056 + /* FIXME is this sane? check against xxxfb_setcolreg()! */
13057 + switch (info->var.bits_per_pixel) {
13058 + case 16:
13059 + case 24:
13060 + case 32:
13061 + ((u32 *)info->pseudo_palette)[regno] = v;
13062 + break;
13063 + }
13064 +
13065 + return 0;
13066 +}
13067 +
13068 +static void xenfb_timer(unsigned long data)
13069 +{
13070 + struct xenfb_info *info = (struct xenfb_info *)data;
13071 + wake_up(&info->wq);
13072 +}
13073 +
13074 +static void __xenfb_refresh(struct xenfb_info *info,
13075 + int x1, int y1, int w, int h)
13076 +{
13077 + int y2, x2;
13078 +
13079 + y2 = y1 + h;
13080 + x2 = x1 + w;
13081 +
13082 + if (info->y1 > y1)
13083 + info->y1 = y1;
13084 + if (info->y2 < y2)
13085 + info->y2 = y2;
13086 + if (info->x1 > x1)
13087 + info->x1 = x1;
13088 + if (info->x2 < x2)
13089 + info->x2 = x2;
13090 + info->dirty = 1;
13091 +
13092 + if (timer_pending(&info->refresh))
13093 + return;
13094 +
13095 + mod_timer(&info->refresh, jiffies + HZ/xenfb_fps);
13096 +}
13097 +
13098 +static void xenfb_refresh(struct xenfb_info *info,
13099 + int x1, int y1, int w, int h)
13100 +{
13101 + unsigned long flags;
13102 +
13103 + spin_lock_irqsave(&info->dirty_lock, flags);
13104 + __xenfb_refresh(info, x1, y1, w, h);
13105 + spin_unlock_irqrestore(&info->dirty_lock, flags);
13106 +}
13107 +
13108 +static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
13109 +{
13110 + struct xenfb_info *info = p->par;
13111 +
13112 + cfb_fillrect(p, rect);
13113 + xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height);
13114 +}
13115 +
13116 +static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image)
13117 +{
13118 + struct xenfb_info *info = p->par;
13119 +
13120 + cfb_imageblit(p, image);
13121 + xenfb_refresh(info, image->dx, image->dy, image->width, image->height);
13122 +}
13123 +
13124 +static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
13125 +{
13126 + struct xenfb_info *info = p->par;
13127 +
13128 + cfb_copyarea(p, area);
13129 + xenfb_refresh(info, area->dx, area->dy, area->width, area->height);
13130 +}
13131 +
13132 +static void xenfb_vm_open(struct vm_area_struct *vma)
13133 +{
13134 + struct xenfb_mapping *map = vma->vm_private_data;
13135 + atomic_inc(&map->map_refs);
13136 +}
13137 +
13138 +static void xenfb_vm_close(struct vm_area_struct *vma)
13139 +{
13140 + struct xenfb_mapping *map = vma->vm_private_data;
13141 + struct xenfb_info *info = map->info;
13142 +
13143 + mutex_lock(&info->mm_lock);
13144 + if (atomic_dec_and_test(&map->map_refs)) {
13145 + list_del(&map->link);
13146 + kfree(map);
13147 + }
13148 + mutex_unlock(&info->mm_lock);
13149 +}
13150 +
13151 +static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
13152 + unsigned long vaddr, int *type)
13153 +{
13154 + struct xenfb_mapping *map = vma->vm_private_data;
13155 + struct xenfb_info *info = map->info;
13156 + int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
13157 + unsigned long flags;
13158 + struct page *page;
13159 + int y1, y2;
13160 +
13161 + if (pgnr >= info->nr_pages)
13162 + return NOPAGE_SIGBUS;
13163 +
13164 + mutex_lock(&info->mm_lock);
13165 + spin_lock_irqsave(&info->dirty_lock, flags);
13166 + page = info->pages[pgnr];
13167 + get_page(page);
13168 + map->faults++;
13169 +
13170 + y1 = pgnr * PAGE_SIZE / info->fb_info->fix.line_length;
13171 + y2 = (pgnr * PAGE_SIZE + PAGE_SIZE - 1) / info->fb_info->fix.line_length;
13172 + if (y2 > info->fb_info->var.yres)
13173 + y2 = info->fb_info->var.yres;
13174 + __xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1);
13175 + spin_unlock_irqrestore(&info->dirty_lock, flags);
13176 + mutex_unlock(&info->mm_lock);
13177 +
13178 + if (type)
13179 + *type = VM_FAULT_MINOR;
13180 +
13181 + return page;
13182 +}
13183 +
13184 +static struct vm_operations_struct xenfb_vm_ops = {
13185 + .open = xenfb_vm_open,
13186 + .close = xenfb_vm_close,
13187 + .nopage = xenfb_vm_nopage,
13188 +};
13189 +
13190 +static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
13191 +{
13192 + struct xenfb_info *info = fb_info->par;
13193 + struct xenfb_mapping *map;
13194 + int map_pages;
13195 +
13196 + if (!(vma->vm_flags & VM_WRITE))
13197 + return -EINVAL;
13198 + if (!(vma->vm_flags & VM_SHARED))
13199 + return -EINVAL;
13200 + if (vma->vm_pgoff != 0)
13201 + return -EINVAL;
13202 +
13203 + map_pages = (vma->vm_end - vma->vm_start + PAGE_SIZE-1) >> PAGE_SHIFT;
13204 + if (map_pages > info->nr_pages)
13205 + return -EINVAL;
13206 +
13207 + map = kzalloc(sizeof(*map), GFP_KERNEL);
13208 + if (map == NULL)
13209 + return -ENOMEM;
13210 +
13211 + map->vma = vma;
13212 + map->faults = 0;
13213 + map->info = info;
13214 + atomic_set(&map->map_refs, 1);
13215 +
13216 + mutex_lock(&info->mm_lock);
13217 + list_add(&map->link, &info->mappings);
13218 + mutex_unlock(&info->mm_lock);
13219 +
13220 + vma->vm_ops = &xenfb_vm_ops;
13221 + vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED);
13222 + vma->vm_private_data = map;
13223 +
13224 + return 0;
13225 +}
13226 +
13227 +static int
13228 +xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
13229 +{
13230 + struct xenfb_info *xenfb_info;
13231 + int required_mem_len;
13232 +
13233 + xenfb_info = info->par;
13234 +
13235 + if (!xenfb_info->feature_resize) {
13236 + if (var->xres == video[KPARAM_WIDTH] &&
13237 + var->yres == video[KPARAM_HEIGHT] &&
13238 + var->bits_per_pixel == xenfb_info->page->depth) {
13239 + return 0;
13240 + }
13241 + return -EINVAL;
13242 + }
13243 +
13244 + /* Can't resize past initial width and height */
13245 + if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT])
13246 + return -EINVAL;
13247 +
13248 + required_mem_len = var->xres * var->yres * (xenfb_info->page->depth / 8);
13249 + if (var->bits_per_pixel == xenfb_info->page->depth &&
13250 + var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) &&
13251 + required_mem_len <= info->fix.smem_len) {
13252 + var->xres_virtual = var->xres;
13253 + var->yres_virtual = var->yres;
13254 + return 0;
13255 + }
13256 + return -EINVAL;
13257 +}
13258 +
13259 +static int xenfb_set_par(struct fb_info *info)
13260 +{
13261 + struct xenfb_info *xenfb_info;
13262 + unsigned long flags;
13263 +
13264 + xenfb_info = info->par;
13265 +
13266 + spin_lock_irqsave(&xenfb_info->resize_lock, flags);
13267 + xenfb_info->resize.type = XENFB_TYPE_RESIZE;
13268 + xenfb_info->resize.width = info->var.xres;
13269 + xenfb_info->resize.height = info->var.yres;
13270 + xenfb_info->resize.stride = info->fix.line_length;
13271 + xenfb_info->resize.depth = info->var.bits_per_pixel;
13272 + xenfb_info->resize.offset = 0;
13273 + xenfb_info->resize_dpy = 1;
13274 + spin_unlock_irqrestore(&xenfb_info->resize_lock, flags);
13275 + return 0;
13276 +}
13277 +
13278 +static struct fb_ops xenfb_fb_ops = {
13279 + .owner = THIS_MODULE,
13280 + .fb_setcolreg = xenfb_setcolreg,
13281 + .fb_fillrect = xenfb_fillrect,
13282 + .fb_copyarea = xenfb_copyarea,
13283 + .fb_imageblit = xenfb_imageblit,
13284 + .fb_mmap = xenfb_mmap,
13285 + .fb_check_var = xenfb_check_var,
13286 + .fb_set_par = xenfb_set_par,
13287 +};
13288 +
13289 +static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
13290 + struct pt_regs *regs)
13291 +{
13292 + /*
13293 + * No in events recognized, simply ignore them all.
13294 + * If you need to recognize some, see xenbkd's input_handler()
13295 + * for how to do that.
13296 + */
13297 + struct xenfb_info *info = dev_id;
13298 + struct xenfb_page *page = info->page;
13299 +
13300 + if (page->in_cons != page->in_prod) {
13301 + info->page->in_cons = info->page->in_prod;
13302 + notify_remote_via_irq(info->irq);
13303 + }
13304 + return IRQ_HANDLED;
13305 +}
13306 +
13307 +static unsigned long vmalloc_to_mfn(void *address)
13308 +{
13309 + return pfn_to_mfn(vmalloc_to_pfn(address));
13310 +}
13311 +
13312 +static int __devinit xenfb_probe(struct xenbus_device *dev,
13313 + const struct xenbus_device_id *id)
13314 +{
13315 + struct xenfb_info *info;
13316 + struct fb_info *fb_info;
13317 + int fb_size;
13318 + int val;
13319 + int ret;
13320 +
13321 + info = kzalloc(sizeof(*info), GFP_KERNEL);
13322 + if (info == NULL) {
13323 + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
13324 + return -ENOMEM;
13325 + }
13326 +
13327 + /* Limit kernel param videoram amount to what is in xenstore */
13328 + if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) {
13329 + if (val < video[KPARAM_MEM])
13330 + video[KPARAM_MEM] = val;
13331 + }
13332 +
13333 + /* If requested res does not fit in available memory, use default */
13334 + fb_size = video[KPARAM_MEM] * MB_;
13335 + if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH/8 > fb_size) {
13336 + video[KPARAM_WIDTH] = XENFB_WIDTH;
13337 + video[KPARAM_HEIGHT] = XENFB_HEIGHT;
13338 + fb_size = XENFB_DEFAULT_FB_LEN;
13339 + }
13340 +
13341 + dev->dev.driver_data = info;
13342 + info->xbdev = dev;
13343 + info->irq = -1;
13344 + info->x1 = info->y1 = INT_MAX;
13345 + spin_lock_init(&info->dirty_lock);
13346 + spin_lock_init(&info->resize_lock);
13347 + mutex_init(&info->mm_lock);
13348 + init_waitqueue_head(&info->wq);
13349 + init_timer(&info->refresh);
13350 + info->refresh.function = xenfb_timer;
13351 + info->refresh.data = (unsigned long)info;
13352 + INIT_LIST_HEAD(&info->mappings);
13353 +
13354 + info->fb = vmalloc(fb_size);
13355 + if (info->fb == NULL)
13356 + goto error_nomem;
13357 + memset(info->fb, 0, fb_size);
13358 +
13359 + info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
13360 +
13361 + info->pages = kmalloc(sizeof(struct page *) * info->nr_pages,
13362 + GFP_KERNEL);
13363 + if (info->pages == NULL)
13364 + goto error_nomem;
13365 +
13366 + info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
13367 + if (!info->mfns)
13368 + goto error_nomem;
13369 +
13370 + /* set up shared page */
13371 + info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
13372 + if (!info->page)
13373 + goto error_nomem;
13374 +
13375 + fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
13376 + /* see fishy hackery below */
13377 + if (fb_info == NULL)
13378 + goto error_nomem;
13379 +
13380 + /* FIXME fishy hackery */
13381 + fb_info->pseudo_palette = fb_info->par;
13382 + fb_info->par = info;
13383 + /* /FIXME */
13384 + fb_info->screen_base = info->fb;
13385 +
13386 + fb_info->fbops = &xenfb_fb_ops;
13387 + fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH];
13388 + fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT];
13389 + fb_info->var.bits_per_pixel = XENFB_DEPTH;
13390 +
13391 + fb_info->var.red = (struct fb_bitfield){16, 8, 0};
13392 + fb_info->var.green = (struct fb_bitfield){8, 8, 0};
13393 + fb_info->var.blue = (struct fb_bitfield){0, 8, 0};
13394 +
13395 + fb_info->var.activate = FB_ACTIVATE_NOW;
13396 + fb_info->var.height = -1;
13397 + fb_info->var.width = -1;
13398 + fb_info->var.vmode = FB_VMODE_NONINTERLACED;
13399 +
13400 + fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
13401 + fb_info->fix.line_length = fb_info->var.xres * (XENFB_DEPTH / 8);
13402 + fb_info->fix.smem_start = 0;
13403 + fb_info->fix.smem_len = fb_size;
13404 + strcpy(fb_info->fix.id, "xen");
13405 + fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
13406 + fb_info->fix.accel = FB_ACCEL_NONE;
13407 +
13408 + fb_info->flags = FBINFO_FLAG_DEFAULT;
13409 +
13410 + ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
13411 + if (ret < 0) {
13412 + framebuffer_release(fb_info);
13413 + xenbus_dev_fatal(dev, ret, "fb_alloc_cmap");
13414 + goto error;
13415 + }
13416 +
13417 + xenfb_init_shared_page(info, fb_info);
13418 +
13419 + ret = register_framebuffer(fb_info);
13420 + if (ret) {
13421 + fb_dealloc_cmap(&info->fb_info->cmap);
13422 + framebuffer_release(fb_info);
13423 + xenbus_dev_fatal(dev, ret, "register_framebuffer");
13424 + goto error;
13425 + }
13426 + info->fb_info = fb_info;
13427 +
13428 + ret = xenfb_connect_backend(dev, info);
13429 + if (ret < 0)
13430 + goto error;
13431 +
13432 + /* FIXME should this be delayed until backend XenbusStateConnected? */
13433 + info->kthread = kthread_run(xenfb_thread, info, "xenfb thread");
13434 + if (IS_ERR(info->kthread)) {
13435 + ret = PTR_ERR(info->kthread);
13436 + info->kthread = NULL;
13437 + xenbus_dev_fatal(dev, ret, "register_framebuffer");
13438 + goto error;
13439 + }
13440 +
13441 + return 0;
13442 +
13443 + error_nomem:
13444 + ret = -ENOMEM;
13445 + xenbus_dev_fatal(dev, ret, "allocating device memory");
13446 + error:
13447 + xenfb_remove(dev);
13448 + return ret;
13449 +}
13450 +
13451 +static int xenfb_resume(struct xenbus_device *dev)
13452 +{
13453 + struct xenfb_info *info = dev->dev.driver_data;
13454 +
13455 + xenfb_disconnect_backend(info);
13456 + xenfb_init_shared_page(info, info->fb_info);
13457 + return xenfb_connect_backend(dev, info);
13458 +}
13459 +
13460 +static int xenfb_remove(struct xenbus_device *dev)
13461 +{
13462 + struct xenfb_info *info = dev->dev.driver_data;
13463 +
13464 + del_timer(&info->refresh);
13465 + if (info->kthread)
13466 + kthread_stop(info->kthread);
13467 + xenfb_disconnect_backend(info);
13468 + if (info->fb_info) {
13469 + unregister_framebuffer(info->fb_info);
13470 + fb_dealloc_cmap(&info->fb_info->cmap);
13471 + framebuffer_release(info->fb_info);
13472 + }
13473 + free_page((unsigned long)info->page);
13474 + vfree(info->mfns);
13475 + kfree(info->pages);
13476 + vfree(info->fb);
13477 + kfree(info);
13478 +
13479 + return 0;
13480 +}
13481 +
13482 +static void xenfb_init_shared_page(struct xenfb_info *info,
13483 + struct fb_info * fb_info)
13484 +{
13485 + int i;
13486 + int epd = PAGE_SIZE / sizeof(info->mfns[0]);
13487 +
13488 + for (i = 0; i < info->nr_pages; i++)
13489 + info->pages[i] = vmalloc_to_page(info->fb + i * PAGE_SIZE);
13490 +
13491 + for (i = 0; i < info->nr_pages; i++)
13492 + info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
13493 +
13494 + for (i = 0; i * epd < info->nr_pages; i++)
13495 + info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]);
13496 +
13497 + info->page->width = fb_info->var.xres;
13498 + info->page->height = fb_info->var.yres;
13499 + info->page->depth = fb_info->var.bits_per_pixel;
13500 + info->page->line_length = fb_info->fix.line_length;
13501 + info->page->mem_length = fb_info->fix.smem_len;
13502 + info->page->in_cons = info->page->in_prod = 0;
13503 + info->page->out_cons = info->page->out_prod = 0;
13504 +}
13505 +
13506 +static int xenfb_connect_backend(struct xenbus_device *dev,
13507 + struct xenfb_info *info)
13508 +{
13509 + int ret;
13510 + struct xenbus_transaction xbt;
13511 +
13512 + ret = bind_listening_port_to_irqhandler(
13513 + dev->otherend_id, xenfb_event_handler, 0, "xenfb", info);
13514 + if (ret < 0) {
13515 + xenbus_dev_fatal(dev, ret,
13516 + "bind_listening_port_to_irqhandler");
13517 + return ret;
13518 + }
13519 + info->irq = ret;
13520 +
13521 + again:
13522 + ret = xenbus_transaction_start(&xbt);
13523 + if (ret) {
13524 + xenbus_dev_fatal(dev, ret, "starting transaction");
13525 + return ret;
13526 + }
13527 + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
13528 + virt_to_mfn(info->page));
13529 + if (ret)
13530 + goto error_xenbus;
13531 + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
13532 + irq_to_evtchn_port(info->irq));
13533 + if (ret)
13534 + goto error_xenbus;
13535 + ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
13536 + XEN_IO_PROTO_ABI_NATIVE);
13537 + if (ret)
13538 + goto error_xenbus;
13539 + ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1");
13540 + if (ret)
13541 + goto error_xenbus;
13542 + ret = xenbus_transaction_end(xbt, 0);
13543 + if (ret) {
13544 + if (ret == -EAGAIN)
13545 + goto again;
13546 + xenbus_dev_fatal(dev, ret, "completing transaction");
13547 + return ret;
13548 + }
13549 +
13550 + xenbus_switch_state(dev, XenbusStateInitialised);
13551 + return 0;
13552 +
13553 + error_xenbus:
13554 + xenbus_transaction_end(xbt, 1);
13555 + xenbus_dev_fatal(dev, ret, "writing xenstore");
13556 + return ret;
13557 +}
13558 +
13559 +static void xenfb_disconnect_backend(struct xenfb_info *info)
13560 +{
13561 + if (info->irq >= 0)
13562 + unbind_from_irqhandler(info->irq, info);
13563 + info->irq = -1;
13564 +}
13565 +
13566 +static void xenfb_backend_changed(struct xenbus_device *dev,
13567 + enum xenbus_state backend_state)
13568 +{
13569 + struct xenfb_info *info = dev->dev.driver_data;
13570 + int val;
13571 +
13572 + switch (backend_state) {
13573 + case XenbusStateInitialising:
13574 + case XenbusStateInitialised:
13575 + case XenbusStateReconfiguring:
13576 + case XenbusStateReconfigured:
13577 + case XenbusStateUnknown:
13578 + case XenbusStateClosed:
13579 + break;
13580 +
13581 + case XenbusStateInitWait:
13582 + InitWait:
13583 + xenbus_switch_state(dev, XenbusStateConnected);
13584 + break;
13585 +
13586 + case XenbusStateConnected:
13587 + /*
13588 + * Work around xenbus race condition: If backend goes
13589 + * through InitWait to Connected fast enough, we can
13590 + * get Connected twice here.
13591 + */
13592 + if (dev->state != XenbusStateConnected)
13593 + goto InitWait; /* no InitWait seen yet, fudge it */
13594 +
13595 + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13596 + "request-update", "%d", &val) < 0)
13597 + val = 0;
13598 + if (val)
13599 + info->update_wanted = 1;
13600 +
13601 + if (xenbus_scanf(XBT_NIL, dev->otherend,
13602 + "feature-resize", "%d", &val) < 0)
13603 + val = 0;
13604 + info->feature_resize = val;
13605 + break;
13606 +
13607 + case XenbusStateClosing:
13608 + // FIXME is this safe in any dev->state?
13609 + xenbus_frontend_closed(dev);
13610 + break;
13611 + }
13612 +}
13613 +
13614 +static const struct xenbus_device_id xenfb_ids[] = {
13615 + { "vfb" },
13616 + { "" }
13617 +};
13618 +MODULE_ALIAS("xen:vfb");
13619 +
13620 +static struct xenbus_driver xenfb_driver = {
13621 + .name = "vfb",
13622 + .owner = THIS_MODULE,
13623 + .ids = xenfb_ids,
13624 + .probe = xenfb_probe,
13625 + .remove = xenfb_remove,
13626 + .resume = xenfb_resume,
13627 + .otherend_changed = xenfb_backend_changed,
13628 +};
13629 +
13630 +static int __init xenfb_init(void)
13631 +{
13632 + if (!is_running_on_xen())
13633 + return -ENODEV;
13634 +
13635 + /* Nothing to do if running in dom0. */
13636 + if (is_initial_xendomain())
13637 + return -ENODEV;
13638 +
13639 + return xenbus_register_frontend(&xenfb_driver);
13640 +}
13641 +
13642 +static void __exit xenfb_cleanup(void)
13643 +{
13644 + return xenbus_unregister_driver(&xenfb_driver);
13645 +}
13646 +
13647 +module_init(xenfb_init);
13648 +module_exit(xenfb_cleanup);
13649 +
13650 +MODULE_LICENSE("GPL");
13651 Index: head-2008-11-25/drivers/xen/fbfront/xenkbd.c
13652 ===================================================================
13653 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
13654 +++ head-2008-11-25/drivers/xen/fbfront/xenkbd.c 2008-04-02 12:34:02.000000000 +0200
13655 @@ -0,0 +1,354 @@
13656 +/*
13657 + * linux/drivers/input/keyboard/xenkbd.c -- Xen para-virtual input device
13658 + *
13659 + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
13660 + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
13661 + *
13662 + * Based on linux/drivers/input/mouse/sermouse.c
13663 + *
13664 + * This file is subject to the terms and conditions of the GNU General Public
13665 + * License. See the file COPYING in the main directory of this archive for
13666 + * more details.
13667 + */
13668 +
13669 +/*
13670 + * TODO:
13671 + *
13672 + * Switch to grant tables together with xenfb.c.
13673 + */
13674 +
13675 +#include <linux/kernel.h>
13676 +#include <linux/errno.h>
13677 +#include <linux/module.h>
13678 +#include <linux/input.h>
13679 +#include <asm/hypervisor.h>
13680 +#include <xen/evtchn.h>
13681 +#include <xen/interface/io/fbif.h>
13682 +#include <xen/interface/io/kbdif.h>
13683 +#include <xen/xenbus.h>
13684 +
13685 +struct xenkbd_info
13686 +{
13687 + struct input_dev *kbd;
13688 + struct input_dev *ptr;
13689 + struct xenkbd_page *page;
13690 + int irq;
13691 + struct xenbus_device *xbdev;
13692 + char phys[32];
13693 +};
13694 +
13695 +static int xenkbd_remove(struct xenbus_device *);
13696 +static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
13697 +static void xenkbd_disconnect_backend(struct xenkbd_info *);
13698 +
13699 +/*
13700 + * Note: if you need to send out events, see xenfb_do_update() for how
13701 + * to do that.
13702 + */
13703 +
13704 +static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
13705 +{
13706 + struct xenkbd_info *info = dev_id;
13707 + struct xenkbd_page *page = info->page;
13708 + __u32 cons, prod;
13709 +
13710 + prod = page->in_prod;
13711 + if (prod == page->in_cons)
13712 + return IRQ_HANDLED;
13713 + rmb(); /* ensure we see ring contents up to prod */
13714 + for (cons = page->in_cons; cons != prod; cons++) {
13715 + union xenkbd_in_event *event;
13716 + struct input_dev *dev;
13717 + event = &XENKBD_IN_RING_REF(page, cons);
13718 +
13719 + dev = info->ptr;
13720 + switch (event->type) {
13721 + case XENKBD_TYPE_MOTION:
13722 + if (event->motion.rel_z)
13723 + input_report_rel(dev, REL_WHEEL,
13724 + -event->motion.rel_z);
13725 + input_report_rel(dev, REL_X, event->motion.rel_x);
13726 + input_report_rel(dev, REL_Y, event->motion.rel_y);
13727 + break;
13728 + case XENKBD_TYPE_KEY:
13729 + dev = NULL;
13730 + if (test_bit(event->key.keycode, info->kbd->keybit))
13731 + dev = info->kbd;
13732 + if (test_bit(event->key.keycode, info->ptr->keybit))
13733 + dev = info->ptr;
13734 + if (dev)
13735 + input_report_key(dev, event->key.keycode,
13736 + event->key.pressed);
13737 + else
13738 + printk("xenkbd: unhandled keycode 0x%x\n",
13739 + event->key.keycode);
13740 + break;
13741 + case XENKBD_TYPE_POS:
13742 + if (event->pos.rel_z)
13743 + input_report_rel(dev, REL_WHEEL,
13744 + -event->pos.rel_z);
13745 + input_report_abs(dev, ABS_X, event->pos.abs_x);
13746 + input_report_abs(dev, ABS_Y, event->pos.abs_y);
13747 + break;
13748 + }
13749 + if (dev)
13750 + input_sync(dev);
13751 + }
13752 + mb(); /* ensure we got ring contents */
13753 + page->in_cons = cons;
13754 + notify_remote_via_irq(info->irq);
13755 +
13756 + return IRQ_HANDLED;
13757 +}
13758 +
13759 +int __devinit xenkbd_probe(struct xenbus_device *dev,
13760 + const struct xenbus_device_id *id)
13761 +{
13762 + int ret, i;
13763 + struct xenkbd_info *info;
13764 + struct input_dev *kbd, *ptr;
13765 +
13766 + info = kzalloc(sizeof(*info), GFP_KERNEL);
13767 + if (!info) {
13768 + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
13769 + return -ENOMEM;
13770 + }
13771 + dev->dev.driver_data = info;
13772 + info->xbdev = dev;
13773 + snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
13774 +
13775 + info->page = (void *)__get_free_page(GFP_KERNEL);
13776 + if (!info->page)
13777 + goto error_nomem;
13778 + info->page->in_cons = info->page->in_prod = 0;
13779 + info->page->out_cons = info->page->out_prod = 0;
13780 +
13781 + /* keyboard */
13782 + kbd = input_allocate_device();
13783 + if (!kbd)
13784 + goto error_nomem;
13785 + kbd->name = "Xen Virtual Keyboard";
13786 + kbd->phys = info->phys;
13787 + kbd->id.bustype = BUS_PCI;
13788 + kbd->id.vendor = 0x5853;
13789 + kbd->id.product = 0xffff;
13790 + kbd->evbit[0] = BIT(EV_KEY);
13791 + for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
13792 + set_bit(i, kbd->keybit);
13793 + for (i = KEY_OK; i < KEY_MAX; i++)
13794 + set_bit(i, kbd->keybit);
13795 +
13796 + ret = input_register_device(kbd);
13797 + if (ret) {
13798 + input_free_device(kbd);
13799 + xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
13800 + goto error;
13801 + }
13802 + info->kbd = kbd;
13803 +
13804 + /* pointing device */
13805 + ptr = input_allocate_device();
13806 + if (!ptr)
13807 + goto error_nomem;
13808 + ptr->name = "Xen Virtual Pointer";
13809 + ptr->phys = info->phys;
13810 + ptr->id.bustype = BUS_PCI;
13811 + ptr->id.vendor = 0x5853;
13812 + ptr->id.product = 0xfffe;
13813 + ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
13814 + for (i = BTN_LEFT; i <= BTN_TASK; i++)
13815 + set_bit(i, ptr->keybit);
13816 + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL);
13817 + input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
13818 + input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
13819 +
13820 + ret = input_register_device(ptr);
13821 + if (ret) {
13822 + input_free_device(ptr);
13823 + xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
13824 + goto error;
13825 + }
13826 + info->ptr = ptr;
13827 +
13828 + ret = xenkbd_connect_backend(dev, info);
13829 + if (ret < 0)
13830 + goto error;
13831 +
13832 + return 0;
13833 +
13834 + error_nomem:
13835 + ret = -ENOMEM;
13836 + xenbus_dev_fatal(dev, ret, "allocating device memory");
13837 + error:
13838 + xenkbd_remove(dev);
13839 + return ret;
13840 +}
13841 +
13842 +static int xenkbd_resume(struct xenbus_device *dev)
13843 +{
13844 + struct xenkbd_info *info = dev->dev.driver_data;
13845 +
13846 + xenkbd_disconnect_backend(info);
13847 + info->page->in_cons = info->page->in_prod = 0;
13848 + info->page->out_cons = info->page->out_prod = 0;
13849 + return xenkbd_connect_backend(dev, info);
13850 +}
13851 +
13852 +static int xenkbd_remove(struct xenbus_device *dev)
13853 +{
13854 + struct xenkbd_info *info = dev->dev.driver_data;
13855 +
13856 + xenkbd_disconnect_backend(info);
13857 + input_unregister_device(info->kbd);
13858 + input_unregister_device(info->ptr);
13859 + free_page((unsigned long)info->page);
13860 + kfree(info);
13861 + return 0;
13862 +}
13863 +
13864 +static int xenkbd_connect_backend(struct xenbus_device *dev,
13865 + struct xenkbd_info *info)
13866 +{
13867 + int ret;
13868 + struct xenbus_transaction xbt;
13869 +
13870 + ret = bind_listening_port_to_irqhandler(
13871 + dev->otherend_id, input_handler, 0, "xenkbd", info);
13872 + if (ret < 0) {
13873 + xenbus_dev_fatal(dev, ret,
13874 + "bind_listening_port_to_irqhandler");
13875 + return ret;
13876 + }
13877 + info->irq = ret;
13878 +
13879 + again:
13880 + ret = xenbus_transaction_start(&xbt);
13881 + if (ret) {
13882 + xenbus_dev_fatal(dev, ret, "starting transaction");
13883 + return ret;
13884 + }
13885 + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
13886 + virt_to_mfn(info->page));
13887 + if (ret)
13888 + goto error_xenbus;
13889 + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
13890 + irq_to_evtchn_port(info->irq));
13891 + if (ret)
13892 + goto error_xenbus;
13893 + ret = xenbus_transaction_end(xbt, 0);
13894 + if (ret) {
13895 + if (ret == -EAGAIN)
13896 + goto again;
13897 + xenbus_dev_fatal(dev, ret, "completing transaction");
13898 + return ret;
13899 + }
13900 +
13901 + xenbus_switch_state(dev, XenbusStateInitialised);
13902 + return 0;
13903 +
13904 + error_xenbus:
13905 + xenbus_transaction_end(xbt, 1);
13906 + xenbus_dev_fatal(dev, ret, "writing xenstore");
13907 + return ret;
13908 +}
13909 +
13910 +static void xenkbd_disconnect_backend(struct xenkbd_info *info)
13911 +{
13912 + if (info->irq >= 0)
13913 + unbind_from_irqhandler(info->irq, info);
13914 + info->irq = -1;
13915 +}
13916 +
13917 +static void xenkbd_backend_changed(struct xenbus_device *dev,
13918 + enum xenbus_state backend_state)
13919 +{
13920 + struct xenkbd_info *info = dev->dev.driver_data;
13921 + int ret, val;
13922 +
13923 + switch (backend_state) {
13924 + case XenbusStateInitialising:
13925 + case XenbusStateInitialised:
13926 + case XenbusStateReconfiguring:
13927 + case XenbusStateReconfigured:
13928 + case XenbusStateUnknown:
13929 + case XenbusStateClosed:
13930 + break;
13931 +
13932 + case XenbusStateInitWait:
13933 + InitWait:
13934 + ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13935 + "feature-abs-pointer", "%d", &val);
13936 + if (ret < 0)
13937 + val = 0;
13938 + if (val) {
13939 + ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
13940 + "request-abs-pointer", "1");
13941 + if (ret)
13942 + ; /* FIXME */
13943 + }
13944 + xenbus_switch_state(dev, XenbusStateConnected);
13945 + break;
13946 +
13947 + case XenbusStateConnected:
13948 + /*
13949 + * Work around xenbus race condition: If backend goes
13950 + * through InitWait to Connected fast enough, we can
13951 + * get Connected twice here.
13952 + */
13953 + if (dev->state != XenbusStateConnected)
13954 + goto InitWait; /* no InitWait seen yet, fudge it */
13955 +
13956 + /* Set input abs params to match backend screen res */
13957 + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13958 + "width", "%d", &val) > 0 )
13959 + input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0);
13960 +
13961 + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
13962 + "height", "%d", &val) > 0 )
13963 + input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0);
13964 +
13965 + break;
13966 +
13967 + case XenbusStateClosing:
13968 + xenbus_frontend_closed(dev);
13969 + break;
13970 + }
13971 +}
13972 +
13973 +static const struct xenbus_device_id xenkbd_ids[] = {
13974 + { "vkbd" },
13975 + { "" }
13976 +};
13977 +MODULE_ALIAS("xen:vkbd");
13978 +
13979 +static struct xenbus_driver xenkbd_driver = {
13980 + .name = "vkbd",
13981 + .owner = THIS_MODULE,
13982 + .ids = xenkbd_ids,
13983 + .probe = xenkbd_probe,
13984 + .remove = xenkbd_remove,
13985 + .resume = xenkbd_resume,
13986 + .otherend_changed = xenkbd_backend_changed,
13987 +};
13988 +
13989 +static int __init xenkbd_init(void)
13990 +{
13991 + if (!is_running_on_xen())
13992 + return -ENODEV;
13993 +
13994 + /* Nothing to do if running in dom0. */
13995 + if (is_initial_xendomain())
13996 + return -ENODEV;
13997 +
13998 + return xenbus_register_frontend(&xenkbd_driver);
13999 +}
14000 +
14001 +static void __exit xenkbd_cleanup(void)
14002 +{
14003 + return xenbus_unregister_driver(&xenkbd_driver);
14004 +}
14005 +
14006 +module_init(xenkbd_init);
14007 +module_exit(xenkbd_cleanup);
14008 +
14009 +MODULE_LICENSE("GPL");
14010 Index: head-2008-11-25/drivers/xen/gntdev/Makefile
14011 ===================================================================
14012 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
14013 +++ head-2008-11-25/drivers/xen/gntdev/Makefile 2008-01-07 13:19:18.000000000 +0100
14014 @@ -0,0 +1 @@
14015 +obj-$(CONFIG_XEN_GRANT_DEV) := gntdev.o
14016 Index: head-2008-11-25/drivers/xen/gntdev/gntdev.c
14017 ===================================================================
14018 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
14019 +++ head-2008-11-25/drivers/xen/gntdev/gntdev.c 2008-07-21 11:00:33.000000000 +0200
14020 @@ -0,0 +1,1074 @@
14021 +/******************************************************************************
14022 + * gntdev.c
14023 + *
14024 + * Device for accessing (in user-space) pages that have been granted by other
14025 + * domains.
14026 + *
14027 + * Copyright (c) 2006-2007, D G Murray.
14028 + *
14029 + * This program is distributed in the hope that it will be useful,
14030 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14031 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14032 + * GNU General Public License for more details.
14033 + *
14034 + * You should have received a copy of the GNU General Public License
14035 + * along with this program; if not, write to the Free Software
14036 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14037 + */
14038 +
14039 +#include <asm/atomic.h>
14040 +#include <linux/module.h>
14041 +#include <linux/kernel.h>
14042 +#include <linux/init.h>
14043 +#include <linux/fs.h>
14044 +#include <linux/device.h>
14045 +#include <linux/mm.h>
14046 +#include <linux/mman.h>
14047 +#include <asm/uaccess.h>
14048 +#include <asm/io.h>
14049 +#include <xen/gnttab.h>
14050 +#include <asm/hypervisor.h>
14051 +#include <xen/balloon.h>
14052 +#include <xen/evtchn.h>
14053 +#include <xen/driver_util.h>
14054 +
14055 +#include <linux/types.h>
14056 +#include <xen/public/gntdev.h>
14057 +
14058 +
14059 +#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
14060 +#define DRIVER_DESC "User-space granted page access driver"
14061 +
14062 +MODULE_LICENSE("GPL");
14063 +MODULE_AUTHOR(DRIVER_AUTHOR);
14064 +MODULE_DESCRIPTION(DRIVER_DESC);
14065 +
14066 +#define MAX_GRANTS_LIMIT 1024
14067 +#define DEFAULT_MAX_GRANTS 128
14068 +
14069 +/* A slot can be in one of three states:
14070 + *
14071 + * 0. GNTDEV_SLOT_INVALID:
14072 + * This slot is not associated with a grant reference, and is therefore free
14073 + * to be overwritten by a new grant reference.
14074 + *
14075 + * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
14076 + * This slot is associated with a grant reference (via the
14077 + * IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
14078 + *
14079 + * 2. GNTDEV_SLOT_MAPPED:
14080 + * This slot is associated with a grant reference, and has been mmap()-ed.
14081 + */
14082 +typedef enum gntdev_slot_state {
14083 + GNTDEV_SLOT_INVALID = 0,
14084 + GNTDEV_SLOT_NOT_YET_MAPPED,
14085 + GNTDEV_SLOT_MAPPED
14086 +} gntdev_slot_state_t;
14087 +
14088 +#define GNTDEV_INVALID_HANDLE -1
14089 +#define GNTDEV_FREE_LIST_INVALID -1
14090 +/* Each opened instance of gntdev is associated with a list of grants,
14091 + * represented by an array of elements of the following type,
14092 + * gntdev_grant_info_t.
14093 + */
14094 +typedef struct gntdev_grant_info {
14095 + gntdev_slot_state_t state;
14096 + union {
14097 + uint32_t free_list_index;
14098 + struct {
14099 + domid_t domid;
14100 + grant_ref_t ref;
14101 + grant_handle_t kernel_handle;
14102 + grant_handle_t user_handle;
14103 + uint64_t dev_bus_addr;
14104 + } valid;
14105 + } u;
14106 +} gntdev_grant_info_t;
14107 +
14108 +/* Private data structure, which is stored in the file pointer for files
14109 + * associated with this device.
14110 + */
14111 +typedef struct gntdev_file_private_data {
14112 +
14113 + /* Array of grant information. */
14114 + gntdev_grant_info_t *grants;
14115 + uint32_t grants_size;
14116 +
14117 + /* Read/write semaphore used to protect the grants array. */
14118 + struct rw_semaphore grants_sem;
14119 +
14120 + /* An array of indices of free slots in the grants array.
14121 + * N.B. An entry in this list may temporarily have the value
14122 + * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
14123 + * from the list by the contiguous allocator, but the list has not yet
14124 + * been compressed. However, this is not visible across invocations of
14125 + * the device.
14126 + */
14127 + int32_t *free_list;
14128 +
14129 + /* The number of free slots in the grants array. */
14130 + uint32_t free_list_size;
14131 +
14132 + /* Read/write semaphore used to protect the free list. */
14133 + struct rw_semaphore free_list_sem;
14134 +
14135 + /* Index of the next slot after the most recent contiguous allocation,
14136 + * for use in a next-fit allocator.
14137 + */
14138 + uint32_t next_fit_index;
14139 +
14140 + /* Used to map grants into the kernel, before mapping them into user
14141 + * space.
14142 + */
14143 + struct page **foreign_pages;
14144 +
14145 +} gntdev_file_private_data_t;
14146 +
14147 +/* Module lifecycle operations. */
14148 +static int __init gntdev_init(void);
14149 +static void __exit gntdev_exit(void);
14150 +
14151 +module_init(gntdev_init);
14152 +module_exit(gntdev_exit);
14153 +
14154 +/* File operations. */
14155 +static int gntdev_open(struct inode *inode, struct file *flip);
14156 +static int gntdev_release(struct inode *inode, struct file *flip);
14157 +static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
14158 +static long gntdev_ioctl(struct file *flip,
14159 + unsigned int cmd, unsigned long arg);
14160 +
14161 +static const struct file_operations gntdev_fops = {
14162 + .owner = THIS_MODULE,
14163 + .open = gntdev_open,
14164 + .release = gntdev_release,
14165 + .mmap = gntdev_mmap,
14166 + .unlocked_ioctl = gntdev_ioctl
14167 +};
14168 +
14169 +/* VM operations. */
14170 +static void gntdev_vma_close(struct vm_area_struct *vma);
14171 +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
14172 + pte_t *ptep, int is_fullmm);
14173 +
14174 +static struct vm_operations_struct gntdev_vmops = {
14175 + .close = gntdev_vma_close,
14176 + .zap_pte = gntdev_clear_pte
14177 +};
14178 +
14179 +/* Global variables. */
14180 +
14181 +/* The driver major number, for use when unregistering the driver. */
14182 +static int gntdev_major;
14183 +
14184 +#define GNTDEV_NAME "gntdev"
14185 +
14186 +/* Memory mapping functions
14187 + * ------------------------
14188 + *
14189 + * Every granted page is mapped into both kernel and user space, and the two
14190 + * following functions return the respective virtual addresses of these pages.
14191 + *
14192 + * When shadow paging is disabled, the granted page is mapped directly into
14193 + * user space; when it is enabled, it is mapped into the kernel and remapped
14194 + * into user space using vm_insert_page() (see gntdev_mmap(), below).
14195 + */
14196 +
14197 +/* Returns the virtual address (in user space) of the @page_index'th page
14198 + * in the given VM area.
14199 + */
14200 +static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
14201 + int page_index)
14202 +{
14203 + return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
14204 +}
14205 +
14206 +/* Returns the virtual address (in kernel space) of the @slot_index'th page
14207 + * mapped by the gntdev instance that owns the given private data struct.
14208 + */
14209 +static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
14210 + int slot_index)
14211 +{
14212 + unsigned long pfn;
14213 + void *kaddr;
14214 + pfn = page_to_pfn(priv->foreign_pages[slot_index]);
14215 + kaddr = pfn_to_kaddr(pfn);
14216 + return (unsigned long) kaddr;
14217 +}
14218 +
14219 +/* Helper functions. */
14220 +
14221 +/* Adds information about a grant reference to the list of grants in the file's
14222 + * private data structure. Returns non-zero on failure. On success, sets the
14223 + * value of *offset to the offset that should be mmap()-ed in order to map the
14224 + * grant reference.
14225 + */
14226 +static int add_grant_reference(struct file *flip,
14227 + struct ioctl_gntdev_grant_ref *op,
14228 + uint64_t *offset)
14229 +{
14230 + gntdev_file_private_data_t *private_data
14231 + = (gntdev_file_private_data_t *) flip->private_data;
14232 +
14233 + uint32_t slot_index;
14234 +
14235 + if (unlikely(private_data->free_list_size == 0)) {
14236 + return -ENOMEM;
14237 + }
14238 +
14239 + slot_index = private_data->free_list[--private_data->free_list_size];
14240 + private_data->free_list[private_data->free_list_size]
14241 + = GNTDEV_FREE_LIST_INVALID;
14242 +
14243 + /* Copy the grant information into file's private data. */
14244 + private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
14245 + private_data->grants[slot_index].u.valid.domid = op->domid;
14246 + private_data->grants[slot_index].u.valid.ref = op->ref;
14247 +
14248 + /* The offset is calculated as the index of the chosen entry in the
14249 + * file's private data's array of grant information. This is then
14250 + * shifted to give an offset into the virtual "file address space".
14251 + */
14252 + *offset = slot_index << PAGE_SHIFT;
14253 +
14254 + return 0;
14255 +}
14256 +
14257 +/* Adds the @count grant references to the contiguous range in the slot array
14258 + * beginning at @first_slot. It is assumed that @first_slot was returned by a
14259 + * previous invocation of find_contiguous_free_range(), during the same
14260 + * invocation of the driver.
14261 + */
14262 +static int add_grant_references(struct file *flip,
14263 + int count,
14264 + struct ioctl_gntdev_grant_ref *ops,
14265 + uint32_t first_slot)
14266 +{
14267 + gntdev_file_private_data_t *private_data
14268 + = (gntdev_file_private_data_t *) flip->private_data;
14269 + int i;
14270 +
14271 + for (i = 0; i < count; ++i) {
14272 +
14273 + /* First, mark the slot's entry in the free list as invalid. */
14274 + int free_list_index =
14275 + private_data->grants[first_slot+i].u.free_list_index;
14276 + private_data->free_list[free_list_index] =
14277 + GNTDEV_FREE_LIST_INVALID;
14278 +
14279 + /* Now, update the slot. */
14280 + private_data->grants[first_slot+i].state =
14281 + GNTDEV_SLOT_NOT_YET_MAPPED;
14282 + private_data->grants[first_slot+i].u.valid.domid =
14283 + ops[i].domid;
14284 + private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
14285 + }
14286 +
14287 + return 0;
14288 +}
14289 +
14290 +/* Scans through the free list for @flip, removing entries that are marked as
14291 + * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
14292 + * the number of valid entries.
14293 + */
14294 +static void compress_free_list(struct file *flip)
14295 +{
14296 + gntdev_file_private_data_t *private_data
14297 + = (gntdev_file_private_data_t *) flip->private_data;
14298 + int i, j = 0, old_size, slot_index;
14299 +
14300 + old_size = private_data->free_list_size;
14301 + for (i = 0; i < old_size; ++i) {
14302 + if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
14303 + if (i > j) {
14304 + slot_index = private_data->free_list[i];
14305 + private_data->free_list[j] = slot_index;
14306 + private_data->grants[slot_index].u
14307 + .free_list_index = j;
14308 + private_data->free_list[i]
14309 + = GNTDEV_FREE_LIST_INVALID;
14310 + }
14311 + ++j;
14312 + } else {
14313 + --private_data->free_list_size;
14314 + }
14315 + }
14316 +}
14317 +
14318 +/* Searches the grant array in the private data of @flip for a range of
14319 + * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
14320 + *
14321 + * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
14322 + */
14323 +static int find_contiguous_free_range(struct file *flip,
14324 + uint32_t num_slots)
14325 +{
14326 + gntdev_file_private_data_t *private_data
14327 + = (gntdev_file_private_data_t *) flip->private_data;
14328 +
14329 + int i;
14330 + int start_index = private_data->next_fit_index;
14331 + int range_start = 0, range_length;
14332 +
14333 + if (private_data->free_list_size < num_slots) {
14334 + return -ENOMEM;
14335 + }
14336 +
14337 + /* First search from the start_index to the end of the array. */
14338 + range_length = 0;
14339 + for (i = start_index; i < private_data->grants_size; ++i) {
14340 + if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
14341 + if (range_length == 0) {
14342 + range_start = i;
14343 + }
14344 + ++range_length;
14345 + if (range_length == num_slots) {
14346 + return range_start;
14347 + }
14348 + }
14349 + }
14350 +
14351 + /* Now search from the start of the array to the start_index. */
14352 + range_length = 0;
14353 + for (i = 0; i < start_index; ++i) {
14354 + if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
14355 + if (range_length == 0) {
14356 + range_start = i;
14357 + }
14358 + ++range_length;
14359 + if (range_length == num_slots) {
14360 + return range_start;
14361 + }
14362 + }
14363 + }
14364 +
14365 + return -ENOMEM;
14366 +}
14367 +
14368 +static int init_private_data(gntdev_file_private_data_t *priv,
14369 + uint32_t max_grants)
14370 +{
14371 + int i;
14372 +
14373 + /* Allocate space for the kernel-mapping of granted pages. */
14374 + priv->foreign_pages =
14375 + alloc_empty_pages_and_pagevec(max_grants);
14376 + if (!priv->foreign_pages)
14377 + goto nomem_out;
14378 +
14379 + /* Allocate the grant list and free-list. */
14380 + priv->grants = kmalloc(max_grants * sizeof(gntdev_grant_info_t),
14381 + GFP_KERNEL);
14382 + if (!priv->grants)
14383 + goto nomem_out2;
14384 + priv->free_list = kmalloc(max_grants * sizeof(int32_t), GFP_KERNEL);
14385 + if (!priv->free_list)
14386 + goto nomem_out3;
14387 +
14388 + /* Initialise the free-list, which contains all slots at first. */
14389 + for (i = 0; i < max_grants; ++i) {
14390 + priv->free_list[max_grants - i - 1] = i;
14391 + priv->grants[i].state = GNTDEV_SLOT_INVALID;
14392 + priv->grants[i].u.free_list_index = max_grants - i - 1;
14393 + }
14394 + priv->grants_size = max_grants;
14395 + priv->free_list_size = max_grants;
14396 + priv->next_fit_index = 0;
14397 +
14398 + return 0;
14399 +
14400 +nomem_out3:
14401 + kfree(priv->grants);
14402 +nomem_out2:
14403 + free_empty_pages_and_pagevec(priv->foreign_pages, max_grants);
14404 +nomem_out:
14405 + return -ENOMEM;
14406 +
14407 +}
14408 +
14409 +/* Interface functions. */
14410 +
14411 +/* Initialises the driver. Called when the module is loaded. */
14412 +static int __init gntdev_init(void)
14413 +{
14414 + struct class *class;
14415 + struct class_device *device;
14416 +
14417 + if (!is_running_on_xen()) {
14418 + printk(KERN_ERR "You must be running Xen to use gntdev\n");
14419 + return -ENODEV;
14420 + }
14421 +
14422 + gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
14423 + if (gntdev_major < 0)
14424 + {
14425 + printk(KERN_ERR "Could not register gntdev device\n");
14426 + return -ENOMEM;
14427 + }
14428 +
14429 + /* Note that if the sysfs code fails, we will still initialise the
14430 + * device, and output the major number so that the device can be
14431 + * created manually using mknod.
14432 + */
14433 + if ((class = get_xen_class()) == NULL) {
14434 + printk(KERN_ERR "Error setting up xen_class\n");
14435 + printk(KERN_ERR "gntdev created with major number = %d\n",
14436 + gntdev_major);
14437 + return 0;
14438 + }
14439 +
14440 + device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
14441 + NULL, GNTDEV_NAME);
14442 + if (IS_ERR(device)) {
14443 + printk(KERN_ERR "Error creating gntdev device in xen_class\n");
14444 + printk(KERN_ERR "gntdev created with major number = %d\n",
14445 + gntdev_major);
14446 + return 0;
14447 + }
14448 +
14449 + return 0;
14450 +}
14451 +
14452 +/* Cleans up and unregisters the driver. Called when the driver is unloaded.
14453 + */
14454 +static void __exit gntdev_exit(void)
14455 +{
14456 + struct class *class;
14457 + if ((class = get_xen_class()) != NULL)
14458 + class_device_destroy(class, MKDEV(gntdev_major, 0));
14459 + unregister_chrdev(gntdev_major, GNTDEV_NAME);
14460 +}
14461 +
14462 +/* Called when the device is opened. */
14463 +static int gntdev_open(struct inode *inode, struct file *flip)
14464 +{
14465 + gntdev_file_private_data_t *private_data;
14466 +
14467 + try_module_get(THIS_MODULE);
14468 +
14469 + /* Allocate space for the per-instance private data. */
14470 + private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
14471 + if (!private_data)
14472 + goto nomem_out;
14473 +
14474 + /* These will be lazily initialised by init_private_data. */
14475 + private_data->grants = NULL;
14476 + private_data->free_list = NULL;
14477 + private_data->foreign_pages = NULL;
14478 +
14479 + init_rwsem(&private_data->grants_sem);
14480 + init_rwsem(&private_data->free_list_sem);
14481 +
14482 + flip->private_data = private_data;
14483 +
14484 + return 0;
14485 +
14486 +nomem_out:
14487 + return -ENOMEM;
14488 +}
14489 +
14490 +/* Called when the device is closed.
14491 + */
14492 +static int gntdev_release(struct inode *inode, struct file *flip)
14493 +{
14494 + if (flip->private_data) {
14495 + gntdev_file_private_data_t *private_data =
14496 + (gntdev_file_private_data_t *) flip->private_data;
14497 + if (private_data->foreign_pages)
14498 + free_empty_pages_and_pagevec
14499 + (private_data->foreign_pages,
14500 + private_data->grants_size);
14501 + if (private_data->grants)
14502 + kfree(private_data->grants);
14503 + if (private_data->free_list)
14504 + kfree(private_data->free_list);
14505 + kfree(private_data);
14506 + }
14507 + module_put(THIS_MODULE);
14508 + return 0;
14509 +}
14510 +
14511 +/* Called when an attempt is made to mmap() the device. The private data from
14512 + * @flip contains the list of grant references that can be mapped. The vm_pgoff
14513 + * field of @vma contains the index into that list that refers to the grant
14514 + * reference that will be mapped. Only mappings that are a multiple of
14515 + * PAGE_SIZE are handled.
14516 + */
14517 +static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma)
14518 +{
14519 + struct gnttab_map_grant_ref op;
14520 + unsigned long slot_index = vma->vm_pgoff;
14521 + unsigned long kernel_vaddr, user_vaddr;
14522 + uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
14523 + uint64_t ptep;
14524 + int ret;
14525 + int flags;
14526 + int i;
14527 + struct page *page;
14528 + gntdev_file_private_data_t *private_data = flip->private_data;
14529 +
14530 + if (unlikely(!private_data)) {
14531 + printk(KERN_ERR "File's private data is NULL.\n");
14532 + return -EINVAL;
14533 + }
14534 +
14535 + /* Test to make sure that the grants array has been initialised. */
14536 + down_read(&private_data->grants_sem);
14537 + if (unlikely(!private_data->grants)) {
14538 + up_read(&private_data->grants_sem);
14539 + printk(KERN_ERR "Attempted to mmap before ioctl.\n");
14540 + return -EINVAL;
14541 + }
14542 + up_read(&private_data->grants_sem);
14543 +
14544 + if (unlikely((size <= 0) ||
14545 + (size + slot_index) > private_data->grants_size)) {
14546 + printk(KERN_ERR "Invalid number of pages or offset"
14547 + "(num_pages = %d, first_slot = %ld).\n",
14548 + size, slot_index);
14549 + return -ENXIO;
14550 + }
14551 +
14552 + if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
14553 + printk(KERN_ERR "Writable mappings must be shared.\n");
14554 + return -EINVAL;
14555 + }
14556 +
14557 + /* Slots must be in the NOT_YET_MAPPED state. */
14558 + down_write(&private_data->grants_sem);
14559 + for (i = 0; i < size; ++i) {
14560 + if (private_data->grants[slot_index + i].state !=
14561 + GNTDEV_SLOT_NOT_YET_MAPPED) {
14562 + printk(KERN_ERR "Slot (index = %ld) is in the wrong "
14563 + "state (%d).\n", slot_index + i,
14564 + private_data->grants[slot_index + i].state);
14565 + up_write(&private_data->grants_sem);
14566 + return -EINVAL;
14567 + }
14568 + }
14569 +
14570 + /* Install the hook for unmapping. */
14571 + vma->vm_ops = &gntdev_vmops;
14572 +
14573 + /* The VM area contains pages from another VM. */
14574 + vma->vm_flags |= VM_FOREIGN;
14575 + vma->vm_private_data = kzalloc(size * sizeof(struct page *),
14576 + GFP_KERNEL);
14577 + if (vma->vm_private_data == NULL) {
14578 + printk(KERN_ERR "Couldn't allocate mapping structure for VM "
14579 + "area.\n");
14580 + return -ENOMEM;
14581 + }
14582 +
14583 + /* This flag prevents Bad PTE errors when the memory is unmapped. */
14584 + vma->vm_flags |= VM_RESERVED;
14585 +
14586 + /* This flag prevents this VM area being copied on a fork(). A better
14587 + * behaviour might be to explicitly carry out the appropriate mappings
14588 + * on fork(), but I don't know if there's a hook for this.
14589 + */
14590 + vma->vm_flags |= VM_DONTCOPY;
14591 +
14592 +#ifdef CONFIG_X86
14593 + /* This flag ensures that the page tables are not unpinned before the
14594 + * VM area is unmapped. Therefore Xen still recognises the PTE as
14595 + * belonging to an L1 pagetable, and the grant unmap operation will
14596 + * succeed, even if the process does not exit cleanly.
14597 + */
14598 + vma->vm_mm->context.has_foreign_mappings = 1;
14599 +#endif
14600 +
14601 + for (i = 0; i < size; ++i) {
14602 +
14603 + flags = GNTMAP_host_map;
14604 + if (!(vma->vm_flags & VM_WRITE))
14605 + flags |= GNTMAP_readonly;
14606 +
14607 + kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
14608 + user_vaddr = get_user_vaddr(vma, i);
14609 + page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
14610 +
14611 + gnttab_set_map_op(&op, kernel_vaddr, flags,
14612 + private_data->grants[slot_index+i]
14613 + .u.valid.ref,
14614 + private_data->grants[slot_index+i]
14615 + .u.valid.domid);
14616 +
14617 + /* Carry out the mapping of the grant reference. */
14618 + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
14619 + &op, 1);
14620 + BUG_ON(ret);
14621 + if (op.status) {
14622 + printk(KERN_ERR "Error mapping the grant reference "
14623 + "into the kernel (%d). domid = %d; ref = %d\n",
14624 + op.status,
14625 + private_data->grants[slot_index+i]
14626 + .u.valid.domid,
14627 + private_data->grants[slot_index+i]
14628 + .u.valid.ref);
14629 + goto undo_map_out;
14630 + }
14631 +
14632 + /* Store a reference to the page that will be mapped into user
14633 + * space.
14634 + */
14635 + ((struct page **) vma->vm_private_data)[i] = page;
14636 +
14637 + /* Mark mapped page as reserved. */
14638 + SetPageReserved(page);
14639 +
14640 + /* Record the grant handle, for use in the unmap operation. */
14641 + private_data->grants[slot_index+i].u.valid.kernel_handle =
14642 + op.handle;
14643 + private_data->grants[slot_index+i].u.valid.dev_bus_addr =
14644 + op.dev_bus_addr;
14645 +
14646 + private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
14647 + private_data->grants[slot_index+i].u.valid.user_handle =
14648 + GNTDEV_INVALID_HANDLE;
14649 +
14650 + /* Now perform the mapping to user space. */
14651 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
14652 +
14653 + /* NOT USING SHADOW PAGE TABLES. */
14654 + /* In this case, we map the grant(s) straight into user
14655 + * space.
14656 + */
14657 +
14658 + /* Get the machine address of the PTE for the user
14659 + * page.
14660 + */
14661 + if ((ret = create_lookup_pte_addr(vma->vm_mm,
14662 + vma->vm_start
14663 + + (i << PAGE_SHIFT),
14664 + &ptep)))
14665 + {
14666 + printk(KERN_ERR "Error obtaining PTE pointer "
14667 + "(%d).\n", ret);
14668 + goto undo_map_out;
14669 + }
14670 +
14671 + /* Configure the map operation. */
14672 +
14673 + /* The reference is to be used by host CPUs. */
14674 + flags = GNTMAP_host_map;
14675 +
14676 + /* Specifies a user space mapping. */
14677 + flags |= GNTMAP_application_map;
14678 +
14679 + /* The map request contains the machine address of the
14680 + * PTE to update.
14681 + */
14682 + flags |= GNTMAP_contains_pte;
14683 +
14684 + if (!(vma->vm_flags & VM_WRITE))
14685 + flags |= GNTMAP_readonly;
14686 +
14687 + gnttab_set_map_op(&op, ptep, flags,
14688 + private_data->grants[slot_index+i]
14689 + .u.valid.ref,
14690 + private_data->grants[slot_index+i]
14691 + .u.valid.domid);
14692 +
14693 + /* Carry out the mapping of the grant reference. */
14694 + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
14695 + &op, 1);
14696 + BUG_ON(ret);
14697 + if (op.status) {
14698 + printk(KERN_ERR "Error mapping the grant "
14699 + "reference into user space (%d). domid "
14700 + "= %d; ref = %d\n", op.status,
14701 + private_data->grants[slot_index+i].u
14702 + .valid.domid,
14703 + private_data->grants[slot_index+i].u
14704 + .valid.ref);
14705 + goto undo_map_out;
14706 + }
14707 +
14708 + /* Record the grant handle, for use in the unmap
14709 + * operation.
14710 + */
14711 + private_data->grants[slot_index+i].u.
14712 + valid.user_handle = op.handle;
14713 +
14714 + /* Update p2m structure with the new mapping. */
14715 + set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
14716 + FOREIGN_FRAME(private_data->
14717 + grants[slot_index+i]
14718 + .u.valid.dev_bus_addr
14719 + >> PAGE_SHIFT));
14720 + } else {
14721 + /* USING SHADOW PAGE TABLES. */
14722 + /* In this case, we simply insert the page into the VM
14723 + * area. */
14724 + ret = vm_insert_page(vma, user_vaddr, page);
14725 + }
14726 +
14727 + }
14728 +
14729 + up_write(&private_data->grants_sem);
14730 + return 0;
14731 +
14732 +undo_map_out:
14733 + /* If we have a mapping failure, the unmapping will be taken care of
14734 + * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
14735 + * All we need to do here is free the vma_private_data.
14736 + */
14737 + kfree(vma->vm_private_data);
14738 +
14739 + /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
14740 + * to NULL on failure. However, we need this in gntdev_clear_pte() to
14741 + * unmap the grants. Therefore, we smuggle a reference to the file's
14742 + * private data in the VM area's private data pointer.
14743 + */
14744 + vma->vm_private_data = private_data;
14745 +
14746 + up_write(&private_data->grants_sem);
14747 +
14748 + return -ENOMEM;
14749 +}
14750 +
14751 +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
14752 + pte_t *ptep, int is_fullmm)
14753 +{
14754 + int slot_index, ret;
14755 + pte_t copy;
14756 + struct gnttab_unmap_grant_ref op;
14757 + gntdev_file_private_data_t *private_data;
14758 +
14759 + /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
14760 + * to NULL on failure. However, we need this in gntdev_clear_pte() to
14761 + * unmap the grants. Therefore, we smuggle a reference to the file's
14762 + * private data in the VM area's private data pointer.
14763 + */
14764 + if (vma->vm_file) {
14765 + private_data = (gntdev_file_private_data_t *)
14766 + vma->vm_file->private_data;
14767 + } else if (vma->vm_private_data) {
14768 + private_data = (gntdev_file_private_data_t *)
14769 + vma->vm_private_data;
14770 + } else {
14771 + private_data = NULL; /* gcc warning */
14772 + BUG();
14773 + }
14774 +
14775 + /* Copy the existing value of the PTE for returning. */
14776 + copy = *ptep;
14777 +
14778 + /* Calculate the grant relating to this PTE. */
14779 + slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
14780 +
14781 + /* Only unmap grants if the slot has been mapped. This could be being
14782 + * called from a failing mmap().
14783 + */
14784 + if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
14785 +
14786 + /* First, we clear the user space mapping, if it has been made.
14787 + */
14788 + if (private_data->grants[slot_index].u.valid.user_handle !=
14789 + GNTDEV_INVALID_HANDLE &&
14790 + !xen_feature(XENFEAT_auto_translated_physmap)) {
14791 + /* NOT USING SHADOW PAGE TABLES. */
14792 + gnttab_set_unmap_op(&op, virt_to_machine(ptep),
14793 + GNTMAP_contains_pte,
14794 + private_data->grants[slot_index]
14795 + .u.valid.user_handle);
14796 + ret = HYPERVISOR_grant_table_op(
14797 + GNTTABOP_unmap_grant_ref, &op, 1);
14798 + BUG_ON(ret);
14799 + if (op.status)
14800 + printk("User unmap grant status = %d\n",
14801 + op.status);
14802 + } else {
14803 + /* USING SHADOW PAGE TABLES. */
14804 + pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
14805 + }
14806 +
14807 + /* Finally, we unmap the grant from kernel space. */
14808 + gnttab_set_unmap_op(&op,
14809 + get_kernel_vaddr(private_data, slot_index),
14810 + GNTMAP_host_map,
14811 + private_data->grants[slot_index].u.valid
14812 + .kernel_handle);
14813 + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
14814 + &op, 1);
14815 + BUG_ON(ret);
14816 + if (op.status)
14817 + printk("Kernel unmap grant status = %d\n", op.status);
14818 +
14819 +
14820 + /* Return slot to the not-yet-mapped state, so that it may be
14821 + * mapped again, or removed by a subsequent ioctl.
14822 + */
14823 + private_data->grants[slot_index].state =
14824 + GNTDEV_SLOT_NOT_YET_MAPPED;
14825 +
14826 + /* Invalidate the physical to machine mapping for this page. */
14827 + set_phys_to_machine(__pa(get_kernel_vaddr(private_data,
14828 + slot_index))
14829 + >> PAGE_SHIFT, INVALID_P2M_ENTRY);
14830 +
14831 + } else {
14832 + pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
14833 + }
14834 +
14835 + return copy;
14836 +}
14837 +
14838 +/* "Destructor" for a VM area.
14839 + */
14840 +static void gntdev_vma_close(struct vm_area_struct *vma) {
14841 + if (vma->vm_private_data) {
14842 + kfree(vma->vm_private_data);
14843 + }
14844 +}
14845 +
14846 +/* Called when an ioctl is made on the device.
14847 + */
14848 +static long gntdev_ioctl(struct file *flip,
14849 + unsigned int cmd, unsigned long arg)
14850 +{
14851 + int rc = 0;
14852 + gntdev_file_private_data_t *private_data =
14853 + (gntdev_file_private_data_t *) flip->private_data;
14854 +
14855 + /* On the first invocation, we will lazily initialise the grant array
14856 + * and free-list.
14857 + */
14858 + if (unlikely(!private_data->grants)
14859 + && likely(cmd != IOCTL_GNTDEV_SET_MAX_GRANTS)) {
14860 + down_write(&private_data->grants_sem);
14861 +
14862 + if (unlikely(private_data->grants)) {
14863 + up_write(&private_data->grants_sem);
14864 + goto private_data_initialised;
14865 + }
14866 +
14867 + /* Just use the default. Setting to a non-default is handled
14868 + * in the ioctl switch.
14869 + */
14870 + rc = init_private_data(private_data, DEFAULT_MAX_GRANTS);
14871 +
14872 + up_write(&private_data->grants_sem);
14873 +
14874 + if (rc) {
14875 + printk (KERN_ERR "Initialising gntdev private data "
14876 + "failed.\n");
14877 + return rc;
14878 + }
14879 + }
14880 +
14881 +private_data_initialised:
14882 + switch (cmd) {
14883 + case IOCTL_GNTDEV_MAP_GRANT_REF:
14884 + {
14885 + struct ioctl_gntdev_map_grant_ref op;
14886 + down_write(&private_data->grants_sem);
14887 + down_write(&private_data->free_list_sem);
14888 +
14889 + if ((rc = copy_from_user(&op, (void __user *) arg,
14890 + sizeof(op)))) {
14891 + rc = -EFAULT;
14892 + goto map_out;
14893 + }
14894 + if (unlikely(op.count <= 0)) {
14895 + rc = -EINVAL;
14896 + goto map_out;
14897 + }
14898 +
14899 + if (op.count == 1) {
14900 + if ((rc = add_grant_reference(flip, &op.refs[0],
14901 + &op.index)) < 0) {
14902 + printk(KERN_ERR "Adding grant reference "
14903 + "failed (%d).\n", rc);
14904 + goto map_out;
14905 + }
14906 + } else {
14907 + struct ioctl_gntdev_grant_ref *refs, *u;
14908 + refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
14909 + if (!refs) {
14910 + rc = -ENOMEM;
14911 + goto map_out;
14912 + }
14913 + u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
14914 + if ((rc = copy_from_user(refs,
14915 + (void __user *)u,
14916 + sizeof(*refs) * op.count))) {
14917 + printk(KERN_ERR "Copying refs from user failed"
14918 + " (%d).\n", rc);
14919 + rc = -EINVAL;
14920 + goto map_out;
14921 + }
14922 + if ((rc = find_contiguous_free_range(flip, op.count))
14923 + < 0) {
14924 + printk(KERN_ERR "Finding contiguous range "
14925 + "failed (%d).\n", rc);
14926 + kfree(refs);
14927 + goto map_out;
14928 + }
14929 + op.index = rc << PAGE_SHIFT;
14930 + if ((rc = add_grant_references(flip, op.count,
14931 + refs, rc))) {
14932 + printk(KERN_ERR "Adding grant references "
14933 + "failed (%d).\n", rc);
14934 + kfree(refs);
14935 + goto map_out;
14936 + }
14937 + compress_free_list(flip);
14938 + kfree(refs);
14939 + }
14940 + if ((rc = copy_to_user((void __user *) arg,
14941 + &op,
14942 + sizeof(op)))) {
14943 + printk(KERN_ERR "Copying result back to user failed "
14944 + "(%d)\n", rc);
14945 + rc = -EFAULT;
14946 + goto map_out;
14947 + }
14948 + map_out:
14949 + up_write(&private_data->grants_sem);
14950 + up_write(&private_data->free_list_sem);
14951 + return rc;
14952 + }
14953 + case IOCTL_GNTDEV_UNMAP_GRANT_REF:
14954 + {
14955 + struct ioctl_gntdev_unmap_grant_ref op;
14956 + int i, start_index;
14957 +
14958 + down_write(&private_data->grants_sem);
14959 + down_write(&private_data->free_list_sem);
14960 +
14961 + if ((rc = copy_from_user(&op,
14962 + (void __user *) arg,
14963 + sizeof(op)))) {
14964 + rc = -EFAULT;
14965 + goto unmap_out;
14966 + }
14967 +
14968 + start_index = op.index >> PAGE_SHIFT;
14969 +
14970 + /* First, check that all pages are in the NOT_YET_MAPPED
14971 + * state.
14972 + */
14973 + for (i = 0; i < op.count; ++i) {
14974 + if (unlikely
14975 + (private_data->grants[start_index + i].state
14976 + != GNTDEV_SLOT_NOT_YET_MAPPED)) {
14977 + if (private_data->grants[start_index + i].state
14978 + == GNTDEV_SLOT_INVALID) {
14979 + printk(KERN_ERR
14980 + "Tried to remove an invalid "
14981 + "grant at offset 0x%x.",
14982 + (start_index + i)
14983 + << PAGE_SHIFT);
14984 + rc = -EINVAL;
14985 + } else {
14986 + printk(KERN_ERR
14987 + "Tried to remove a grant which "
14988 + "is currently mmap()-ed at "
14989 + "offset 0x%x.",
14990 + (start_index + i)
14991 + << PAGE_SHIFT);
14992 + rc = -EBUSY;
14993 + }
14994 + goto unmap_out;
14995 + }
14996 + }
14997 +
14998 + /* Unmap pages and add them to the free list.
14999 + */
15000 + for (i = 0; i < op.count; ++i) {
15001 + private_data->grants[start_index+i].state =
15002 + GNTDEV_SLOT_INVALID;
15003 + private_data->grants[start_index+i].u.free_list_index =
15004 + private_data->free_list_size;
15005 + private_data->free_list[private_data->free_list_size] =
15006 + start_index + i;
15007 + ++private_data->free_list_size;
15008 + }
15009 +
15010 + unmap_out:
15011 + up_write(&private_data->grants_sem);
15012 + up_write(&private_data->free_list_sem);
15013 + return rc;
15014 + }
15015 + case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
15016 + {
15017 + struct ioctl_gntdev_get_offset_for_vaddr op;
15018 + struct vm_area_struct *vma;
15019 + unsigned long vaddr;
15020 +
15021 + if ((rc = copy_from_user(&op,
15022 + (void __user *) arg,
15023 + sizeof(op)))) {
15024 + rc = -EFAULT;
15025 + goto get_offset_out;
15026 + }
15027 + vaddr = (unsigned long)op.vaddr;
15028 +
15029 + down_read(&current->mm->mmap_sem);
15030 + vma = find_vma(current->mm, vaddr);
15031 + if (vma == NULL) {
15032 + rc = -EFAULT;
15033 + goto get_offset_unlock_out;
15034 + }
15035 + if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
15036 + printk(KERN_ERR "The vaddr specified does not belong "
15037 + "to a gntdev instance: %#lx\n", vaddr);
15038 + rc = -EFAULT;
15039 + goto get_offset_unlock_out;
15040 + }
15041 + if (vma->vm_start != vaddr) {
15042 + printk(KERN_ERR "The vaddr specified in an "
15043 + "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
15044 + "the start of the VM area. vma->vm_start = "
15045 + "%#lx; vaddr = %#lx\n",
15046 + vma->vm_start, vaddr);
15047 + rc = -EFAULT;
15048 + goto get_offset_unlock_out;
15049 + }
15050 + op.offset = vma->vm_pgoff << PAGE_SHIFT;
15051 + op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
15052 + up_read(&current->mm->mmap_sem);
15053 + if ((rc = copy_to_user((void __user *) arg,
15054 + &op,
15055 + sizeof(op)))) {
15056 + rc = -EFAULT;
15057 + goto get_offset_out;
15058 + }
15059 + goto get_offset_out;
15060 + get_offset_unlock_out:
15061 + up_read(&current->mm->mmap_sem);
15062 + get_offset_out:
15063 + return rc;
15064 + }
15065 + case IOCTL_GNTDEV_SET_MAX_GRANTS:
15066 + {
15067 + struct ioctl_gntdev_set_max_grants op;
15068 + if ((rc = copy_from_user(&op,
15069 + (void __user *) arg,
15070 + sizeof(op)))) {
15071 + rc = -EFAULT;
15072 + goto set_max_out;
15073 + }
15074 + down_write(&private_data->grants_sem);
15075 + if (private_data->grants) {
15076 + rc = -EBUSY;
15077 + goto set_max_unlock_out;
15078 + }
15079 + if (op.count > MAX_GRANTS_LIMIT) {
15080 + rc = -EINVAL;
15081 + goto set_max_unlock_out;
15082 + }
15083 + rc = init_private_data(private_data, op.count);
15084 + set_max_unlock_out:
15085 + up_write(&private_data->grants_sem);
15086 + set_max_out:
15087 + return rc;
15088 + }
15089 + default:
15090 + return -ENOIOCTLCMD;
15091 + }
15092 +
15093 + return 0;
15094 +}
15095 Index: head-2008-11-25/drivers/xen/netback/Makefile
15096 ===================================================================
15097 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
15098 +++ head-2008-11-25/drivers/xen/netback/Makefile 2007-07-12 08:54:23.000000000 +0200
15099 @@ -0,0 +1,5 @@
15100 +obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
15101 +obj-$(CONFIG_XEN_NETDEV_LOOPBACK) += netloop.o
15102 +
15103 +netbk-y := netback.o xenbus.o interface.o accel.o
15104 +netloop-y := loopback.o
15105 Index: head-2008-11-25/drivers/xen/netback/accel.c
15106 ===================================================================
15107 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
15108 +++ head-2008-11-25/drivers/xen/netback/accel.c 2008-01-07 13:19:18.000000000 +0100
15109 @@ -0,0 +1,269 @@
15110 +/******************************************************************************
15111 + * drivers/xen/netback/accel.c
15112 + *
15113 + * Interface between backend virtual network device and accelerated plugin.
15114 + *
15115 + * Copyright (C) 2007 Solarflare Communications, Inc
15116 + *
15117 + * This program is free software; you can redistribute it and/or
15118 + * modify it under the terms of the GNU General Public License version 2
15119 + * as published by the Free Software Foundation; or, when distributed
15120 + * separately from the Linux kernel or incorporated into other
15121 + * software packages, subject to the following license:
15122 + *
15123 + * Permission is hereby granted, free of charge, to any person obtaining a copy
15124 + * of this source file (the "Software"), to deal in the Software without
15125 + * restriction, including without limitation the rights to use, copy, modify,
15126 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15127 + * and to permit persons to whom the Software is furnished to do so, subject to
15128 + * the following conditions:
15129 + *
15130 + * The above copyright notice and this permission notice shall be included in
15131 + * all copies or substantial portions of the Software.
15132 + *
15133 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15134 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15135 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15136 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15137 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15138 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15139 + * IN THE SOFTWARE.
15140 + */
15141 +
15142 +#include <linux/list.h>
15143 +#include <asm/atomic.h>
15144 +#include <xen/xenbus.h>
15145 +#include <linux/mutex.h>
15146 +
15147 +#include "common.h"
15148 +
15149 +#if 0
15150 +#undef DPRINTK
15151 +#define DPRINTK(fmt, args...) \
15152 + printk("netback/accel (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
15153 +#endif
15154 +
15155 +/*
15156 + * A list of available netback accelerator plugin modules (each list
15157 + * entry is of type struct netback_accelerator)
15158 + */
15159 +static struct list_head accelerators_list;
15160 +/* Lock used to protect access to accelerators_list */
15161 +DEFINE_MUTEX(accelerators_mutex);
15162 +
15163 +/*
15164 + * Compare a backend to an accelerator, and decide if they are
15165 + * compatible (i.e. if the accelerator should be used by the
15166 + * backend)
15167 + */
15168 +static int match_accelerator(struct xenbus_device *xendev,
15169 + struct backend_info *be,
15170 + struct netback_accelerator *accelerator)
15171 +{
15172 + int rc = 0;
15173 + char *eth_name = xenbus_read(XBT_NIL, xendev->nodename, "accel", NULL);
15174 +
15175 + if (IS_ERR(eth_name)) {
15176 + /* Probably means not present */
15177 + DPRINTK("%s: no match due to xenbus_read accel error %d\n",
15178 + __FUNCTION__, PTR_ERR(eth_name));
15179 + return 0;
15180 + } else {
15181 + if (!strcmp(eth_name, accelerator->eth_name))
15182 + rc = 1;
15183 + kfree(eth_name);
15184 + return rc;
15185 + }
15186 +}
15187 +
15188 +
15189 +static void do_probe(struct backend_info *be,
15190 + struct netback_accelerator *accelerator,
15191 + struct xenbus_device *xendev)
15192 +{
15193 + be->accelerator = accelerator;
15194 + atomic_inc(&be->accelerator->use_count);
15195 + if (be->accelerator->hooks->probe(xendev) != 0) {
15196 + atomic_dec(&be->accelerator->use_count);
15197 + module_put(be->accelerator->hooks->owner);
15198 + be->accelerator = NULL;
15199 + }
15200 +}
15201 +
15202 +
15203 +/*
15204 + * Notify suitable backends that a new accelerator is available and
15205 + * connected. This will also notify the accelerator plugin module
15206 + * that it is being used for a device through the probe hook.
15207 + */
15208 +static int netback_accelerator_probe_backend(struct device *dev, void *arg)
15209 +{
15210 + struct netback_accelerator *accelerator =
15211 + (struct netback_accelerator *)arg;
15212 + struct xenbus_device *xendev = to_xenbus_device(dev);
15213 +
15214 + if (!strcmp("vif", xendev->devicetype)) {
15215 + struct backend_info *be = xendev->dev.driver_data;
15216 +
15217 + if (match_accelerator(xendev, be, accelerator) &&
15218 + try_module_get(accelerator->hooks->owner)) {
15219 + do_probe(be, accelerator, xendev);
15220 + }
15221 + }
15222 + return 0;
15223 +}
15224 +
15225 +
15226 +/*
15227 + * Notify suitable backends that an accelerator is unavailable.
15228 + */
15229 +static int netback_accelerator_remove_backend(struct device *dev, void *arg)
15230 +{
15231 + struct xenbus_device *xendev = to_xenbus_device(dev);
15232 + struct netback_accelerator *accelerator =
15233 + (struct netback_accelerator *)arg;
15234 +
15235 + if (!strcmp("vif", xendev->devicetype)) {
15236 + struct backend_info *be = xendev->dev.driver_data;
15237 +
15238 + if (be->accelerator == accelerator) {
15239 + be->accelerator->hooks->remove(xendev);
15240 + atomic_dec(&be->accelerator->use_count);
15241 + module_put(be->accelerator->hooks->owner);
15242 + be->accelerator = NULL;
15243 + }
15244 + }
15245 + return 0;
15246 +}
15247 +
15248 +
15249 +
15250 +/*
15251 + * Entry point for an netback accelerator plugin module. Called to
15252 + * advertise its presence, and connect to any suitable backends.
15253 + */
15254 +int netback_connect_accelerator(unsigned version, int id, const char *eth_name,
15255 + struct netback_accel_hooks *hooks)
15256 +{
15257 + struct netback_accelerator *new_accelerator;
15258 + unsigned eth_name_len;
15259 +
15260 + if (version != NETBACK_ACCEL_VERSION) {
15261 + if (version > NETBACK_ACCEL_VERSION) {
15262 + /* Caller has higher version number, leave it
15263 + up to them to decide whether to continue.
15264 + They can recall with a lower number if
15265 + they're happy to be compatible with us */
15266 + return NETBACK_ACCEL_VERSION;
15267 + } else {
15268 + /* We have a more recent version than caller.
15269 + Currently reject, but may in future be able
15270 + to be backwardly compatible */
15271 + return -EPROTO;
15272 + }
15273 + }
15274 +
15275 + new_accelerator =
15276 + kmalloc(sizeof(struct netback_accelerator), GFP_KERNEL);
15277 + if (!new_accelerator) {
15278 + DPRINTK("%s: failed to allocate memory for accelerator\n",
15279 + __FUNCTION__);
15280 + return -ENOMEM;
15281 + }
15282 +
15283 + new_accelerator->id = id;
15284 +
15285 + eth_name_len = strlen(eth_name)+1;
15286 + new_accelerator->eth_name = kmalloc(eth_name_len, GFP_KERNEL);
15287 + if (!new_accelerator->eth_name) {
15288 + DPRINTK("%s: failed to allocate memory for eth_name string\n",
15289 + __FUNCTION__);
15290 + kfree(new_accelerator);
15291 + return -ENOMEM;
15292 + }
15293 + strlcpy(new_accelerator->eth_name, eth_name, eth_name_len);
15294 +
15295 + new_accelerator->hooks = hooks;
15296 +
15297 + atomic_set(&new_accelerator->use_count, 0);
15298 +
15299 + mutex_lock(&accelerators_mutex);
15300 + list_add(&new_accelerator->link, &accelerators_list);
15301 +
15302 + /* tell existing backends about new plugin */
15303 + xenbus_for_each_backend(new_accelerator,
15304 + netback_accelerator_probe_backend);
15305 +
15306 + mutex_unlock(&accelerators_mutex);
15307 +
15308 + return 0;
15309 +
15310 +}
15311 +EXPORT_SYMBOL_GPL(netback_connect_accelerator);
15312 +
15313 +
15314 +/*
15315 + * Disconnect an accelerator plugin module that has previously been
15316 + * connected.
15317 + */
15318 +void netback_disconnect_accelerator(int id, const char *eth_name)
15319 +{
15320 + struct netback_accelerator *accelerator, *next;
15321 +
15322 + mutex_lock(&accelerators_mutex);
15323 + list_for_each_entry_safe(accelerator, next, &accelerators_list, link) {
15324 + if (!strcmp(eth_name, accelerator->eth_name)) {
15325 + xenbus_for_each_backend
15326 + (accelerator, netback_accelerator_remove_backend);
15327 + BUG_ON(atomic_read(&accelerator->use_count) != 0);
15328 + list_del(&accelerator->link);
15329 + kfree(accelerator->eth_name);
15330 + kfree(accelerator);
15331 + break;
15332 + }
15333 + }
15334 + mutex_unlock(&accelerators_mutex);
15335 +}
15336 +EXPORT_SYMBOL_GPL(netback_disconnect_accelerator);
15337 +
15338 +
15339 +void netback_probe_accelerators(struct backend_info *be,
15340 + struct xenbus_device *dev)
15341 +{
15342 + struct netback_accelerator *accelerator;
15343 +
15344 + /*
15345 + * Check list of accelerators to see if any is suitable, and
15346 + * use it if it is.
15347 + */
15348 + mutex_lock(&accelerators_mutex);
15349 + list_for_each_entry(accelerator, &accelerators_list, link) {
15350 + if (match_accelerator(dev, be, accelerator) &&
15351 + try_module_get(accelerator->hooks->owner)) {
15352 + do_probe(be, accelerator, dev);
15353 + break;
15354 + }
15355 + }
15356 + mutex_unlock(&accelerators_mutex);
15357 +}
15358 +
15359 +
15360 +void netback_remove_accelerators(struct backend_info *be,
15361 + struct xenbus_device *dev)
15362 +{
15363 + mutex_lock(&accelerators_mutex);
15364 + /* Notify the accelerator (if any) of this device's removal */
15365 + if (be->accelerator != NULL) {
15366 + be->accelerator->hooks->remove(dev);
15367 + atomic_dec(&be->accelerator->use_count);
15368 + module_put(be->accelerator->hooks->owner);
15369 + be->accelerator = NULL;
15370 + }
15371 + mutex_unlock(&accelerators_mutex);
15372 +}
15373 +
15374 +
15375 +void netif_accel_init(void)
15376 +{
15377 + INIT_LIST_HEAD(&accelerators_list);
15378 +}
15379 Index: head-2008-11-25/drivers/xen/netback/common.h
15380 ===================================================================
15381 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
15382 +++ head-2008-11-25/drivers/xen/netback/common.h 2008-01-07 13:19:18.000000000 +0100
15383 @@ -0,0 +1,217 @@
15384 +/******************************************************************************
15385 + * arch/xen/drivers/netif/backend/common.h
15386 + *
15387 + * This program is free software; you can redistribute it and/or
15388 + * modify it under the terms of the GNU General Public License version 2
15389 + * as published by the Free Software Foundation; or, when distributed
15390 + * separately from the Linux kernel or incorporated into other
15391 + * software packages, subject to the following license:
15392 + *
15393 + * Permission is hereby granted, free of charge, to any person obtaining a copy
15394 + * of this source file (the "Software"), to deal in the Software without
15395 + * restriction, including without limitation the rights to use, copy, modify,
15396 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15397 + * and to permit persons to whom the Software is furnished to do so, subject to
15398 + * the following conditions:
15399 + *
15400 + * The above copyright notice and this permission notice shall be included in
15401 + * all copies or substantial portions of the Software.
15402 + *
15403 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15404 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15405 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15406 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15407 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15408 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15409 + * IN THE SOFTWARE.
15410 + */
15411 +
15412 +#ifndef __NETIF__BACKEND__COMMON_H__
15413 +#define __NETIF__BACKEND__COMMON_H__
15414 +
15415 +#include <linux/version.h>
15416 +#include <linux/module.h>
15417 +#include <linux/interrupt.h>
15418 +#include <linux/slab.h>
15419 +#include <linux/ip.h>
15420 +#include <linux/in.h>
15421 +#include <linux/netdevice.h>
15422 +#include <linux/etherdevice.h>
15423 +#include <linux/wait.h>
15424 +#include <xen/evtchn.h>
15425 +#include <xen/interface/io/netif.h>
15426 +#include <asm/io.h>
15427 +#include <asm/pgalloc.h>
15428 +#include <xen/interface/grant_table.h>
15429 +#include <xen/gnttab.h>
15430 +#include <xen/driver_util.h>
15431 +#include <xen/xenbus.h>
15432 +
15433 +#define DPRINTK(_f, _a...) \
15434 + pr_debug("(file=%s, line=%d) " _f, \
15435 + __FILE__ , __LINE__ , ## _a )
15436 +#define IPRINTK(fmt, args...) \
15437 + printk(KERN_INFO "xen_net: " fmt, ##args)
15438 +#define WPRINTK(fmt, args...) \
15439 + printk(KERN_WARNING "xen_net: " fmt, ##args)
15440 +
15441 +typedef struct netif_st {
15442 + /* Unique identifier for this interface. */
15443 + domid_t domid;
15444 + unsigned int handle;
15445 +
15446 + u8 fe_dev_addr[6];
15447 +
15448 + /* Physical parameters of the comms window. */
15449 + grant_handle_t tx_shmem_handle;
15450 + grant_ref_t tx_shmem_ref;
15451 + grant_handle_t rx_shmem_handle;
15452 + grant_ref_t rx_shmem_ref;
15453 + unsigned int irq;
15454 +
15455 + /* The shared rings and indexes. */
15456 + netif_tx_back_ring_t tx;
15457 + netif_rx_back_ring_t rx;
15458 + struct vm_struct *tx_comms_area;
15459 + struct vm_struct *rx_comms_area;
15460 +
15461 + /* Set of features that can be turned on in dev->features. */
15462 + int features;
15463 +
15464 + /* Internal feature information. */
15465 + u8 can_queue:1; /* can queue packets for receiver? */
15466 + u8 copying_receiver:1; /* copy packets to receiver? */
15467 +
15468 + /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
15469 + RING_IDX rx_req_cons_peek;
15470 +
15471 + /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
15472 + unsigned long credit_bytes;
15473 + unsigned long credit_usec;
15474 + unsigned long remaining_credit;
15475 + struct timer_list credit_timeout;
15476 +
15477 + /* Enforce draining of the transmit queue. */
15478 + struct timer_list tx_queue_timeout;
15479 +
15480 + /* Miscellaneous private stuff. */
15481 + struct list_head list; /* scheduling list */
15482 + atomic_t refcnt;
15483 + struct net_device *dev;
15484 + struct net_device_stats stats;
15485 +
15486 + unsigned int carrier;
15487 +
15488 + wait_queue_head_t waiting_to_free;
15489 +} netif_t;
15490 +
15491 +/*
15492 + * Implement our own carrier flag: the network stack's version causes delays
15493 + * when the carrier is re-enabled (in particular, dev_activate() may not
15494 + * immediately be called, which can cause packet loss; also the etherbridge
15495 + * can be rather lazy in activating its port).
15496 + */
15497 +#define netback_carrier_on(netif) ((netif)->carrier = 1)
15498 +#define netback_carrier_off(netif) ((netif)->carrier = 0)
15499 +#define netback_carrier_ok(netif) ((netif)->carrier)
15500 +
15501 +enum {
15502 + NETBK_DONT_COPY_SKB,
15503 + NETBK_DELAYED_COPY_SKB,
15504 + NETBK_ALWAYS_COPY_SKB,
15505 +};
15506 +
15507 +extern int netbk_copy_skb_mode;
15508 +
15509 +/* Function pointers into netback accelerator plugin modules */
15510 +struct netback_accel_hooks {
15511 + struct module *owner;
15512 + int (*probe)(struct xenbus_device *dev);
15513 + int (*remove)(struct xenbus_device *dev);
15514 +};
15515 +
15516 +/* Structure to track the state of a netback accelerator plugin */
15517 +struct netback_accelerator {
15518 + struct list_head link;
15519 + int id;
15520 + char *eth_name;
15521 + atomic_t use_count;
15522 + struct netback_accel_hooks *hooks;
15523 +};
15524 +
15525 +struct backend_info {
15526 + struct xenbus_device *dev;
15527 + netif_t *netif;
15528 + enum xenbus_state frontend_state;
15529 +
15530 + /* State relating to the netback accelerator */
15531 + void *netback_accel_priv;
15532 + /* The accelerator that this backend is currently using */
15533 + struct netback_accelerator *accelerator;
15534 +};
15535 +
15536 +#define NETBACK_ACCEL_VERSION 0x00010001
15537 +
15538 +/*
15539 + * Connect an accelerator plugin module to netback. Returns zero on
15540 + * success, < 0 on error, > 0 (with highest version number supported)
15541 + * if version mismatch.
15542 + */
15543 +extern int netback_connect_accelerator(unsigned version,
15544 + int id, const char *eth_name,
15545 + struct netback_accel_hooks *hooks);
15546 +/* Disconnect a previously connected accelerator plugin module */
15547 +extern void netback_disconnect_accelerator(int id, const char *eth_name);
15548 +
15549 +
15550 +extern
15551 +void netback_probe_accelerators(struct backend_info *be,
15552 + struct xenbus_device *dev);
15553 +extern
15554 +void netback_remove_accelerators(struct backend_info *be,
15555 + struct xenbus_device *dev);
15556 +extern
15557 +void netif_accel_init(void);
15558 +
15559 +
15560 +#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
15561 +#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
15562 +
15563 +void netif_disconnect(netif_t *netif);
15564 +
15565 +netif_t *netif_alloc(domid_t domid, unsigned int handle);
15566 +int netif_map(netif_t *netif, unsigned long tx_ring_ref,
15567 + unsigned long rx_ring_ref, unsigned int evtchn);
15568 +
15569 +#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
15570 +#define netif_put(_b) \
15571 + do { \
15572 + if ( atomic_dec_and_test(&(_b)->refcnt) ) \
15573 + wake_up(&(_b)->waiting_to_free); \
15574 + } while (0)
15575 +
15576 +void netif_xenbus_init(void);
15577 +
15578 +#define netif_schedulable(netif) \
15579 + (netif_running((netif)->dev) && netback_carrier_ok(netif))
15580 +
15581 +void netif_schedule_work(netif_t *netif);
15582 +void netif_deschedule_work(netif_t *netif);
15583 +
15584 +int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
15585 +struct net_device_stats *netif_be_get_stats(struct net_device *dev);
15586 +irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
15587 +
15588 +static inline int netbk_can_queue(struct net_device *dev)
15589 +{
15590 + netif_t *netif = netdev_priv(dev);
15591 + return netif->can_queue;
15592 +}
15593 +
15594 +static inline int netbk_can_sg(struct net_device *dev)
15595 +{
15596 + netif_t *netif = netdev_priv(dev);
15597 + return netif->features & NETIF_F_SG;
15598 +}
15599 +
15600 +#endif /* __NETIF__BACKEND__COMMON_H__ */
15601 Index: head-2008-11-25/drivers/xen/netback/interface.c
15602 ===================================================================
15603 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
15604 +++ head-2008-11-25/drivers/xen/netback/interface.c 2007-06-12 13:13:45.000000000 +0200
15605 @@ -0,0 +1,336 @@
15606 +/******************************************************************************
15607 + * arch/xen/drivers/netif/backend/interface.c
15608 + *
15609 + * Network-device interface management.
15610 + *
15611 + * Copyright (c) 2004-2005, Keir Fraser
15612 + *
15613 + * This program is free software; you can redistribute it and/or
15614 + * modify it under the terms of the GNU General Public License version 2
15615 + * as published by the Free Software Foundation; or, when distributed
15616 + * separately from the Linux kernel or incorporated into other
15617 + * software packages, subject to the following license:
15618 + *
15619 + * Permission is hereby granted, free of charge, to any person obtaining a copy
15620 + * of this source file (the "Software"), to deal in the Software without
15621 + * restriction, including without limitation the rights to use, copy, modify,
15622 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15623 + * and to permit persons to whom the Software is furnished to do so, subject to
15624 + * the following conditions:
15625 + *
15626 + * The above copyright notice and this permission notice shall be included in
15627 + * all copies or substantial portions of the Software.
15628 + *
15629 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15630 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15631 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15632 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15633 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15634 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15635 + * IN THE SOFTWARE.
15636 + */
15637 +
15638 +#include "common.h"
15639 +#include <linux/ethtool.h>
15640 +#include <linux/rtnetlink.h>
15641 +
15642 +/*
15643 + * Module parameter 'queue_length':
15644 + *
15645 + * Enables queuing in the network stack when a client has run out of receive
15646 + * descriptors. Although this feature can improve receive bandwidth by avoiding
15647 + * packet loss, it can also result in packets sitting in the 'tx_queue' for
15648 + * unbounded time. This is bad if those packets hold onto foreign resources.
15649 + * For example, consider a packet that holds onto resources belonging to the
15650 + * guest for which it is queued (e.g., packet received on vif1.0, destined for
15651 + * vif1.1 which is not activated in the guest): in this situation the guest
15652 + * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
15653 + * run a timer (tx_queue_timeout) to drain the queue when the interface is
15654 + * blocked.
15655 + */
15656 +static unsigned long netbk_queue_length = 32;
15657 +module_param_named(queue_length, netbk_queue_length, ulong, 0);
15658 +
15659 +static void __netif_up(netif_t *netif)
15660 +{
15661 + enable_irq(netif->irq);
15662 + netif_schedule_work(netif);
15663 +}
15664 +
15665 +static void __netif_down(netif_t *netif)
15666 +{
15667 + disable_irq(netif->irq);
15668 + netif_deschedule_work(netif);
15669 +}
15670 +
15671 +static int net_open(struct net_device *dev)
15672 +{
15673 + netif_t *netif = netdev_priv(dev);
15674 + if (netback_carrier_ok(netif)) {
15675 + __netif_up(netif);
15676 + netif_start_queue(dev);
15677 + }
15678 + return 0;
15679 +}
15680 +
15681 +static int net_close(struct net_device *dev)
15682 +{
15683 + netif_t *netif = netdev_priv(dev);
15684 + if (netback_carrier_ok(netif))
15685 + __netif_down(netif);
15686 + netif_stop_queue(dev);
15687 + return 0;
15688 +}
15689 +
15690 +static int netbk_change_mtu(struct net_device *dev, int mtu)
15691 +{
15692 + int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
15693 +
15694 + if (mtu > max)
15695 + return -EINVAL;
15696 + dev->mtu = mtu;
15697 + return 0;
15698 +}
15699 +
15700 +static int netbk_set_sg(struct net_device *dev, u32 data)
15701 +{
15702 + if (data) {
15703 + netif_t *netif = netdev_priv(dev);
15704 +
15705 + if (!(netif->features & NETIF_F_SG))
15706 + return -ENOSYS;
15707 + }
15708 +
15709 + return ethtool_op_set_sg(dev, data);
15710 +}
15711 +
15712 +static int netbk_set_tso(struct net_device *dev, u32 data)
15713 +{
15714 + if (data) {
15715 + netif_t *netif = netdev_priv(dev);
15716 +
15717 + if (!(netif->features & NETIF_F_TSO))
15718 + return -ENOSYS;
15719 + }
15720 +
15721 + return ethtool_op_set_tso(dev, data);
15722 +}
15723 +
15724 +static struct ethtool_ops network_ethtool_ops =
15725 +{
15726 + .get_tx_csum = ethtool_op_get_tx_csum,
15727 + .set_tx_csum = ethtool_op_set_tx_csum,
15728 + .get_sg = ethtool_op_get_sg,
15729 + .set_sg = netbk_set_sg,
15730 + .get_tso = ethtool_op_get_tso,
15731 + .set_tso = netbk_set_tso,
15732 + .get_link = ethtool_op_get_link,
15733 +};
15734 +
15735 +netif_t *netif_alloc(domid_t domid, unsigned int handle)
15736 +{
15737 + int err = 0;
15738 + struct net_device *dev;
15739 + netif_t *netif;
15740 + char name[IFNAMSIZ] = {};
15741 +
15742 + snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
15743 + dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
15744 + if (dev == NULL) {
15745 + DPRINTK("Could not create netif: out of memory\n");
15746 + return ERR_PTR(-ENOMEM);
15747 + }
15748 +
15749 + netif = netdev_priv(dev);
15750 + memset(netif, 0, sizeof(*netif));
15751 + netif->domid = domid;
15752 + netif->handle = handle;
15753 + atomic_set(&netif->refcnt, 1);
15754 + init_waitqueue_head(&netif->waiting_to_free);
15755 + netif->dev = dev;
15756 +
15757 + netback_carrier_off(netif);
15758 +
15759 + netif->credit_bytes = netif->remaining_credit = ~0UL;
15760 + netif->credit_usec = 0UL;
15761 + init_timer(&netif->credit_timeout);
15762 + /* Initialize 'expires' now: it's used to track the credit window. */
15763 + netif->credit_timeout.expires = jiffies;
15764 +
15765 + init_timer(&netif->tx_queue_timeout);
15766 +
15767 + dev->hard_start_xmit = netif_be_start_xmit;
15768 + dev->get_stats = netif_be_get_stats;
15769 + dev->open = net_open;
15770 + dev->stop = net_close;
15771 + dev->change_mtu = netbk_change_mtu;
15772 + dev->features = NETIF_F_IP_CSUM;
15773 +
15774 + SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
15775 +
15776 + dev->tx_queue_len = netbk_queue_length;
15777 +
15778 + /*
15779 + * Initialise a dummy MAC address. We choose the numerically
15780 + * largest non-broadcast address to prevent the address getting
15781 + * stolen by an Ethernet bridge for STP purposes.
15782 + * (FE:FF:FF:FF:FF:FF)
15783 + */
15784 + memset(dev->dev_addr, 0xFF, ETH_ALEN);
15785 + dev->dev_addr[0] &= ~0x01;
15786 +
15787 + rtnl_lock();
15788 + err = register_netdevice(dev);
15789 + rtnl_unlock();
15790 + if (err) {
15791 + DPRINTK("Could not register new net device %s: err=%d\n",
15792 + dev->name, err);
15793 + free_netdev(dev);
15794 + return ERR_PTR(err);
15795 + }
15796 +
15797 + DPRINTK("Successfully created netif\n");
15798 + return netif;
15799 +}
15800 +
15801 +static int map_frontend_pages(
15802 + netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
15803 +{
15804 + struct gnttab_map_grant_ref op;
15805 +
15806 + gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
15807 + GNTMAP_host_map, tx_ring_ref, netif->domid);
15808 +
15809 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
15810 + BUG();
15811 +
15812 + if (op.status) {
15813 + DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
15814 + return op.status;
15815 + }
15816 +
15817 + netif->tx_shmem_ref = tx_ring_ref;
15818 + netif->tx_shmem_handle = op.handle;
15819 +
15820 + gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
15821 + GNTMAP_host_map, rx_ring_ref, netif->domid);
15822 +
15823 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
15824 + BUG();
15825 +
15826 + if (op.status) {
15827 + DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
15828 + return op.status;
15829 + }
15830 +
15831 + netif->rx_shmem_ref = rx_ring_ref;
15832 + netif->rx_shmem_handle = op.handle;
15833 +
15834 + return 0;
15835 +}
15836 +
15837 +static void unmap_frontend_pages(netif_t *netif)
15838 +{
15839 + struct gnttab_unmap_grant_ref op;
15840 +
15841 + gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
15842 + GNTMAP_host_map, netif->tx_shmem_handle);
15843 +
15844 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
15845 + BUG();
15846 +
15847 + gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
15848 + GNTMAP_host_map, netif->rx_shmem_handle);
15849 +
15850 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
15851 + BUG();
15852 +}
15853 +
15854 +int netif_map(netif_t *netif, unsigned long tx_ring_ref,
15855 + unsigned long rx_ring_ref, unsigned int evtchn)
15856 +{
15857 + int err = -ENOMEM;
15858 + netif_tx_sring_t *txs;
15859 + netif_rx_sring_t *rxs;
15860 +
15861 + /* Already connected through? */
15862 + if (netif->irq)
15863 + return 0;
15864 +
15865 + netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
15866 + if (netif->tx_comms_area == NULL)
15867 + return -ENOMEM;
15868 + netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
15869 + if (netif->rx_comms_area == NULL)
15870 + goto err_rx;
15871 +
15872 + err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
15873 + if (err)
15874 + goto err_map;
15875 +
15876 + err = bind_interdomain_evtchn_to_irqhandler(
15877 + netif->domid, evtchn, netif_be_int, 0,
15878 + netif->dev->name, netif);
15879 + if (err < 0)
15880 + goto err_hypervisor;
15881 + netif->irq = err;
15882 + disable_irq(netif->irq);
15883 +
15884 + txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
15885 + BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
15886 +
15887 + rxs = (netif_rx_sring_t *)
15888 + ((char *)netif->rx_comms_area->addr);
15889 + BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
15890 +
15891 + netif->rx_req_cons_peek = 0;
15892 +
15893 + netif_get(netif);
15894 +
15895 + rtnl_lock();
15896 + netback_carrier_on(netif);
15897 + if (netif_running(netif->dev))
15898 + __netif_up(netif);
15899 + rtnl_unlock();
15900 +
15901 + return 0;
15902 +err_hypervisor:
15903 + unmap_frontend_pages(netif);
15904 +err_map:
15905 + free_vm_area(netif->rx_comms_area);
15906 +err_rx:
15907 + free_vm_area(netif->tx_comms_area);
15908 + return err;
15909 +}
15910 +
15911 +void netif_disconnect(netif_t *netif)
15912 +{
15913 + if (netback_carrier_ok(netif)) {
15914 + rtnl_lock();
15915 + netback_carrier_off(netif);
15916 + netif_carrier_off(netif->dev); /* discard queued packets */
15917 + if (netif_running(netif->dev))
15918 + __netif_down(netif);
15919 + rtnl_unlock();
15920 + netif_put(netif);
15921 + }
15922 +
15923 + atomic_dec(&netif->refcnt);
15924 + wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
15925 +
15926 + del_timer_sync(&netif->credit_timeout);
15927 + del_timer_sync(&netif->tx_queue_timeout);
15928 +
15929 + if (netif->irq)
15930 + unbind_from_irqhandler(netif->irq, netif);
15931 +
15932 + unregister_netdev(netif->dev);
15933 +
15934 + if (netif->tx.sring) {
15935 + unmap_frontend_pages(netif);
15936 + free_vm_area(netif->tx_comms_area);
15937 + free_vm_area(netif->rx_comms_area);
15938 + }
15939 +
15940 + free_netdev(netif->dev);
15941 +}
15942 Index: head-2008-11-25/drivers/xen/netback/loopback.c
15943 ===================================================================
15944 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
15945 +++ head-2008-11-25/drivers/xen/netback/loopback.c 2007-08-06 15:10:49.000000000 +0200
15946 @@ -0,0 +1,324 @@
15947 +/******************************************************************************
15948 + * netback/loopback.c
15949 + *
15950 + * A two-interface loopback device to emulate a local netfront-netback
15951 + * connection. This ensures that local packet delivery looks identical
15952 + * to inter-domain delivery. Most importantly, packets delivered locally
15953 + * originating from other domains will get *copied* when they traverse this
15954 + * driver. This prevents unbounded delays in socket-buffer queues from
15955 + * causing the netback driver to "seize up".
15956 + *
15957 + * This driver creates a symmetric pair of loopback interfaces with names
15958 + * vif0.0 and veth0. The intention is that 'vif0.0' is bound to an Ethernet
15959 + * bridge, just like a proper netback interface, while a local IP interface
15960 + * is configured on 'veth0'.
15961 + *
15962 + * As with a real netback interface, vif0.0 is configured with a suitable
15963 + * dummy MAC address. No default is provided for veth0: a reasonable strategy
15964 + * is to transfer eth0's MAC address to veth0, and give eth0 a dummy address
15965 + * (to avoid confusing the Etherbridge).
15966 + *
15967 + * Copyright (c) 2005 K A Fraser
15968 + *
15969 + * This program is free software; you can redistribute it and/or
15970 + * modify it under the terms of the GNU General Public License version 2
15971 + * as published by the Free Software Foundation; or, when distributed
15972 + * separately from the Linux kernel or incorporated into other
15973 + * software packages, subject to the following license:
15974 + *
15975 + * Permission is hereby granted, free of charge, to any person obtaining a copy
15976 + * of this source file (the "Software"), to deal in the Software without
15977 + * restriction, including without limitation the rights to use, copy, modify,
15978 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
15979 + * and to permit persons to whom the Software is furnished to do so, subject to
15980 + * the following conditions:
15981 + *
15982 + * The above copyright notice and this permission notice shall be included in
15983 + * all copies or substantial portions of the Software.
15984 + *
15985 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15986 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15987 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15988 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15989 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15990 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
15991 + * IN THE SOFTWARE.
15992 + */
15993 +
15994 +#include <linux/module.h>
15995 +#include <linux/netdevice.h>
15996 +#include <linux/inetdevice.h>
15997 +#include <linux/etherdevice.h>
15998 +#include <linux/skbuff.h>
15999 +#include <linux/ethtool.h>
16000 +#include <net/dst.h>
16001 +#include <net/xfrm.h> /* secpath_reset() */
16002 +#include <asm/hypervisor.h> /* is_initial_xendomain() */
16003 +
16004 +static int nloopbacks = -1;
16005 +module_param(nloopbacks, int, 0);
16006 +MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
16007 +
16008 +struct net_private {
16009 + struct net_device *loopback_dev;
16010 + struct net_device_stats stats;
16011 +};
16012 +
16013 +static int loopback_open(struct net_device *dev)
16014 +{
16015 + struct net_private *np = netdev_priv(dev);
16016 + memset(&np->stats, 0, sizeof(np->stats));
16017 + netif_start_queue(dev);
16018 + return 0;
16019 +}
16020 +
16021 +static int loopback_close(struct net_device *dev)
16022 +{
16023 + netif_stop_queue(dev);
16024 + return 0;
16025 +}
16026 +
16027 +#ifdef CONFIG_X86
16028 +static int is_foreign(unsigned long pfn)
16029 +{
16030 + /* NB. Play it safe for auto-translation mode. */
16031 + return (xen_feature(XENFEAT_auto_translated_physmap) ||
16032 + (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
16033 +}
16034 +#else
16035 +/* How to detect a foreign mapping? Play it safe. */
16036 +#define is_foreign(pfn) (1)
16037 +#endif
16038 +
16039 +static int skb_remove_foreign_references(struct sk_buff *skb)
16040 +{
16041 + struct page *page;
16042 + unsigned long pfn;
16043 + int i, off;
16044 + char *vaddr;
16045 +
16046 + BUG_ON(skb_shinfo(skb)->frag_list);
16047 +
16048 + if (skb_cloned(skb) &&
16049 + unlikely(pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
16050 + return 0;
16051 +
16052 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
16053 + pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
16054 + if (!is_foreign(pfn))
16055 + continue;
16056 +
16057 + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
16058 + if (unlikely(!page))
16059 + return 0;
16060 +
16061 + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
16062 + off = skb_shinfo(skb)->frags[i].page_offset;
16063 + memcpy(page_address(page) + off,
16064 + vaddr + off,
16065 + skb_shinfo(skb)->frags[i].size);
16066 + kunmap_skb_frag(vaddr);
16067 +
16068 + put_page(skb_shinfo(skb)->frags[i].page);
16069 + skb_shinfo(skb)->frags[i].page = page;
16070 + }
16071 +
16072 + return 1;
16073 +}
16074 +
16075 +static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
16076 +{
16077 + struct net_private *np = netdev_priv(dev);
16078 +
16079 + if (!skb_remove_foreign_references(skb)) {
16080 + np->stats.tx_dropped++;
16081 + dev_kfree_skb(skb);
16082 + return 0;
16083 + }
16084 +
16085 + dst_release(skb->dst);
16086 + skb->dst = NULL;
16087 +
16088 + skb_orphan(skb);
16089 +
16090 + np->stats.tx_bytes += skb->len;
16091 + np->stats.tx_packets++;
16092 +
16093 + /* Switch to loopback context. */
16094 + dev = np->loopback_dev;
16095 + np = netdev_priv(dev);
16096 +
16097 + np->stats.rx_bytes += skb->len;
16098 + np->stats.rx_packets++;
16099 +
16100 + if (skb->ip_summed == CHECKSUM_HW) {
16101 + /* Defer checksum calculation. */
16102 + skb->proto_csum_blank = 1;
16103 + /* Must be a local packet: assert its integrity. */
16104 + skb->proto_data_valid = 1;
16105 + }
16106 +
16107 + skb->ip_summed = skb->proto_data_valid ?
16108 + CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
16109 +
16110 + skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
16111 + skb->protocol = eth_type_trans(skb, dev);
16112 + skb->dev = dev;
16113 + dev->last_rx = jiffies;
16114 +
16115 + /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
16116 + nf_reset(skb);
16117 + secpath_reset(skb);
16118 +
16119 + netif_rx(skb);
16120 +
16121 + return 0;
16122 +}
16123 +
16124 +static struct net_device_stats *loopback_get_stats(struct net_device *dev)
16125 +{
16126 + struct net_private *np = netdev_priv(dev);
16127 + return &np->stats;
16128 +}
16129 +
16130 +static struct ethtool_ops network_ethtool_ops =
16131 +{
16132 + .get_tx_csum = ethtool_op_get_tx_csum,
16133 + .set_tx_csum = ethtool_op_set_tx_csum,
16134 + .get_sg = ethtool_op_get_sg,
16135 + .set_sg = ethtool_op_set_sg,
16136 + .get_tso = ethtool_op_get_tso,
16137 + .set_tso = ethtool_op_set_tso,
16138 + .get_link = ethtool_op_get_link,
16139 +};
16140 +
16141 +/*
16142 + * Nothing to do here. Virtual interface is point-to-point and the
16143 + * physical interface is probably promiscuous anyway.
16144 + */
16145 +static void loopback_set_multicast_list(struct net_device *dev)
16146 +{
16147 +}
16148 +
16149 +static void loopback_construct(struct net_device *dev, struct net_device *lo)
16150 +{
16151 + struct net_private *np = netdev_priv(dev);
16152 +
16153 + np->loopback_dev = lo;
16154 +
16155 + dev->open = loopback_open;
16156 + dev->stop = loopback_close;
16157 + dev->hard_start_xmit = loopback_start_xmit;
16158 + dev->get_stats = loopback_get_stats;
16159 + dev->set_multicast_list = loopback_set_multicast_list;
16160 + dev->change_mtu = NULL; /* allow arbitrary mtu */
16161 +
16162 + dev->tx_queue_len = 0;
16163 +
16164 + dev->features = (NETIF_F_HIGHDMA |
16165 + NETIF_F_LLTX |
16166 + NETIF_F_TSO |
16167 + NETIF_F_SG |
16168 + NETIF_F_IP_CSUM);
16169 +
16170 + SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
16171 +
16172 + /*
16173 + * We do not set a jumbo MTU on the interface. Otherwise the network
16174 + * stack will try to send large packets that will get dropped by the
16175 + * Ethernet bridge (unless the physical Ethernet interface is
16176 + * configured to transfer jumbo packets). If a larger MTU is desired
16177 + * then the system administrator can specify it using the 'ifconfig'
16178 + * command.
16179 + */
16180 + /*dev->mtu = 16*1024;*/
16181 +}
16182 +
16183 +static int __init make_loopback(int i)
16184 +{
16185 + struct net_device *dev1, *dev2;
16186 + char dev_name[IFNAMSIZ];
16187 + int err = -ENOMEM;
16188 +
16189 + sprintf(dev_name, "vif0.%d", i);
16190 + dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
16191 + if (!dev1)
16192 + return err;
16193 +
16194 + sprintf(dev_name, "veth%d", i);
16195 + dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
16196 + if (!dev2)
16197 + goto fail_netdev2;
16198 +
16199 + loopback_construct(dev1, dev2);
16200 + loopback_construct(dev2, dev1);
16201 +
16202 + /*
16203 + * Initialise a dummy MAC address for the 'dummy backend' interface. We
16204 + * choose the numerically largest non-broadcast address to prevent the
16205 + * address getting stolen by an Ethernet bridge for STP purposes.
16206 + */
16207 + memset(dev1->dev_addr, 0xFF, ETH_ALEN);
16208 + dev1->dev_addr[0] &= ~0x01;
16209 +
16210 + if ((err = register_netdev(dev1)) != 0)
16211 + goto fail;
16212 +
16213 + if ((err = register_netdev(dev2)) != 0) {
16214 + unregister_netdev(dev1);
16215 + goto fail;
16216 + }
16217 +
16218 + return 0;
16219 +
16220 + fail:
16221 + free_netdev(dev2);
16222 + fail_netdev2:
16223 + free_netdev(dev1);
16224 + return err;
16225 +}
16226 +
16227 +static void __exit clean_loopback(int i)
16228 +{
16229 + struct net_device *dev1, *dev2;
16230 + char dev_name[IFNAMSIZ];
16231 +
16232 + sprintf(dev_name, "vif0.%d", i);
16233 + dev1 = dev_get_by_name(dev_name);
16234 + sprintf(dev_name, "veth%d", i);
16235 + dev2 = dev_get_by_name(dev_name);
16236 + if (dev1 && dev2) {
16237 + unregister_netdev(dev2);
16238 + unregister_netdev(dev1);
16239 + free_netdev(dev2);
16240 + free_netdev(dev1);
16241 + }
16242 +}
16243 +
16244 +static int __init loopback_init(void)
16245 +{
16246 + int i, err = 0;
16247 +
16248 + if (nloopbacks == -1)
16249 + nloopbacks = is_initial_xendomain() ? 4 : 0;
16250 +
16251 + for (i = 0; i < nloopbacks; i++)
16252 + if ((err = make_loopback(i)) != 0)
16253 + break;
16254 +
16255 + return err;
16256 +}
16257 +
16258 +module_init(loopback_init);
16259 +
16260 +static void __exit loopback_exit(void)
16261 +{
16262 + int i;
16263 +
16264 + for (i = nloopbacks; i-- > 0; )
16265 + clean_loopback(i);
16266 +}
16267 +
16268 +module_exit(loopback_exit);
16269 +
16270 +MODULE_LICENSE("Dual BSD/GPL");
16271 Index: head-2008-11-25/drivers/xen/netback/netback.c
16272 ===================================================================
16273 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
16274 +++ head-2008-11-25/drivers/xen/netback/netback.c 2008-02-20 09:32:49.000000000 +0100
16275 @@ -0,0 +1,1614 @@
16276 +/******************************************************************************
16277 + * drivers/xen/netback/netback.c
16278 + *
16279 + * Back-end of the driver for virtual network devices. This portion of the
16280 + * driver exports a 'unified' network-device interface that can be accessed
16281 + * by any operating system that implements a compatible front end. A
16282 + * reference front-end implementation can be found in:
16283 + * drivers/xen/netfront/netfront.c
16284 + *
16285 + * Copyright (c) 2002-2005, K A Fraser
16286 + *
16287 + * This program is free software; you can redistribute it and/or
16288 + * modify it under the terms of the GNU General Public License version 2
16289 + * as published by the Free Software Foundation; or, when distributed
16290 + * separately from the Linux kernel or incorporated into other
16291 + * software packages, subject to the following license:
16292 + *
16293 + * Permission is hereby granted, free of charge, to any person obtaining a copy
16294 + * of this source file (the "Software"), to deal in the Software without
16295 + * restriction, including without limitation the rights to use, copy, modify,
16296 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16297 + * and to permit persons to whom the Software is furnished to do so, subject to
16298 + * the following conditions:
16299 + *
16300 + * The above copyright notice and this permission notice shall be included in
16301 + * all copies or substantial portions of the Software.
16302 + *
16303 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16304 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16305 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16306 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16307 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
16308 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
16309 + * IN THE SOFTWARE.
16310 + */
16311 +
16312 +#include "common.h"
16313 +#include <xen/balloon.h>
16314 +#include <xen/interface/memory.h>
16315 +
16316 +/*define NETBE_DEBUG_INTERRUPT*/
16317 +
16318 +/* extra field used in struct page */
16319 +#define netif_page_index(pg) (*(long *)&(pg)->mapping)
16320 +
16321 +struct netbk_rx_meta {
16322 + skb_frag_t frag;
16323 + int id;
16324 + u8 copy:1;
16325 +};
16326 +
16327 +struct netbk_tx_pending_inuse {
16328 + struct list_head list;
16329 + unsigned long alloc_time;
16330 +};
16331 +
16332 +static void netif_idx_release(u16 pending_idx);
16333 +static void netif_page_release(struct page *page);
16334 +static void make_tx_response(netif_t *netif,
16335 + netif_tx_request_t *txp,
16336 + s8 st);
16337 +static netif_rx_response_t *make_rx_response(netif_t *netif,
16338 + u16 id,
16339 + s8 st,
16340 + u16 offset,
16341 + u16 size,
16342 + u16 flags);
16343 +
16344 +static void net_tx_action(unsigned long unused);
16345 +static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
16346 +
16347 +static void net_rx_action(unsigned long unused);
16348 +static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
16349 +
16350 +static struct timer_list net_timer;
16351 +static struct timer_list netbk_tx_pending_timer;
16352 +
16353 +#define MAX_PENDING_REQS 256
16354 +
16355 +static struct sk_buff_head rx_queue;
16356 +
16357 +static struct page **mmap_pages;
16358 +static inline unsigned long idx_to_pfn(unsigned int idx)
16359 +{
16360 + return page_to_pfn(mmap_pages[idx]);
16361 +}
16362 +
16363 +static inline unsigned long idx_to_kaddr(unsigned int idx)
16364 +{
16365 + return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
16366 +}
16367 +
16368 +#define PKT_PROT_LEN 64
16369 +
16370 +static struct pending_tx_info {
16371 + netif_tx_request_t req;
16372 + netif_t *netif;
16373 +} pending_tx_info[MAX_PENDING_REQS];
16374 +static u16 pending_ring[MAX_PENDING_REQS];
16375 +typedef unsigned int PEND_RING_IDX;
16376 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
16377 +static PEND_RING_IDX pending_prod, pending_cons;
16378 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
16379 +
16380 +/* Freed TX SKBs get batched on this ring before return to pending_ring. */
16381 +static u16 dealloc_ring[MAX_PENDING_REQS];
16382 +static PEND_RING_IDX dealloc_prod, dealloc_cons;
16383 +
16384 +/* Doubly-linked list of in-use pending entries. */
16385 +static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
16386 +static LIST_HEAD(pending_inuse_head);
16387 +
16388 +static struct sk_buff_head tx_queue;
16389 +
16390 +static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
16391 +static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
16392 +static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
16393 +
16394 +static struct list_head net_schedule_list;
16395 +static spinlock_t net_schedule_list_lock;
16396 +
16397 +#define MAX_MFN_ALLOC 64
16398 +static unsigned long mfn_list[MAX_MFN_ALLOC];
16399 +static unsigned int alloc_index = 0;
16400 +
16401 +/* Setting this allows the safe use of this driver without netloop. */
16402 +static int MODPARM_copy_skb = 1;
16403 +module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
16404 +MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
16405 +
16406 +int netbk_copy_skb_mode;
16407 +
16408 +static inline unsigned long alloc_mfn(void)
16409 +{
16410 + BUG_ON(alloc_index == 0);
16411 + return mfn_list[--alloc_index];
16412 +}
16413 +
16414 +static int check_mfn(int nr)
16415 +{
16416 + struct xen_memory_reservation reservation = {
16417 + .extent_order = 0,
16418 + .domid = DOMID_SELF
16419 + };
16420 + int rc;
16421 +
16422 + if (likely(alloc_index >= nr))
16423 + return 0;
16424 +
16425 + set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
16426 + reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
16427 + rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
16428 + if (likely(rc > 0))
16429 + alloc_index += rc;
16430 +
16431 + return alloc_index >= nr ? 0 : -ENOMEM;
16432 +}
16433 +
16434 +static inline void maybe_schedule_tx_action(void)
16435 +{
16436 + smp_mb();
16437 + if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
16438 + !list_empty(&net_schedule_list))
16439 + tasklet_schedule(&net_tx_tasklet);
16440 +}
16441 +
16442 +static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
16443 +{
16444 + struct skb_shared_info *ninfo;
16445 + struct sk_buff *nskb;
16446 + unsigned long offset;
16447 + int ret;
16448 + int len;
16449 + int headlen;
16450 +
16451 + BUG_ON(skb_shinfo(skb)->frag_list != NULL);
16452 +
16453 + nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
16454 + if (unlikely(!nskb))
16455 + goto err;
16456 +
16457 + skb_reserve(nskb, 16 + NET_IP_ALIGN);
16458 + headlen = nskb->end - nskb->data;
16459 + if (headlen > skb_headlen(skb))
16460 + headlen = skb_headlen(skb);
16461 + ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
16462 + BUG_ON(ret);
16463 +
16464 + ninfo = skb_shinfo(nskb);
16465 + ninfo->gso_size = skb_shinfo(skb)->gso_size;
16466 + ninfo->gso_type = skb_shinfo(skb)->gso_type;
16467 +
16468 + offset = headlen;
16469 + len = skb->len - headlen;
16470 +
16471 + nskb->len = skb->len;
16472 + nskb->data_len = len;
16473 + nskb->truesize += len;
16474 +
16475 + while (len) {
16476 + struct page *page;
16477 + int copy;
16478 + int zero;
16479 +
16480 + if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
16481 + dump_stack();
16482 + goto err_free;
16483 + }
16484 +
16485 + copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
16486 + zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
16487 +
16488 + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
16489 + if (unlikely(!page))
16490 + goto err_free;
16491 +
16492 + ret = skb_copy_bits(skb, offset, page_address(page), copy);
16493 + BUG_ON(ret);
16494 +
16495 + ninfo->frags[ninfo->nr_frags].page = page;
16496 + ninfo->frags[ninfo->nr_frags].page_offset = 0;
16497 + ninfo->frags[ninfo->nr_frags].size = copy;
16498 + ninfo->nr_frags++;
16499 +
16500 + offset += copy;
16501 + len -= copy;
16502 + }
16503 +
16504 + offset = nskb->data - skb->data;
16505 +
16506 + nskb->h.raw = skb->h.raw + offset;
16507 + nskb->nh.raw = skb->nh.raw + offset;
16508 + nskb->mac.raw = skb->mac.raw + offset;
16509 +
16510 + return nskb;
16511 +
16512 + err_free:
16513 + kfree_skb(nskb);
16514 + err:
16515 + return NULL;
16516 +}
16517 +
16518 +static inline int netbk_max_required_rx_slots(netif_t *netif)
16519 +{
16520 + if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
16521 + return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
16522 + return 1; /* all in one */
16523 +}
16524 +
16525 +static inline int netbk_queue_full(netif_t *netif)
16526 +{
16527 + RING_IDX peek = netif->rx_req_cons_peek;
16528 + RING_IDX needed = netbk_max_required_rx_slots(netif);
16529 +
16530 + return ((netif->rx.sring->req_prod - peek) < needed) ||
16531 + ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
16532 +}
16533 +
16534 +static void tx_queue_callback(unsigned long data)
16535 +{
16536 + netif_t *netif = (netif_t *)data;
16537 + if (netif_schedulable(netif))
16538 + netif_wake_queue(netif->dev);
16539 +}
16540 +
16541 +int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
16542 +{
16543 + netif_t *netif = netdev_priv(dev);
16544 +
16545 + BUG_ON(skb->dev != dev);
16546 +
16547 + /* Drop the packet if the target domain has no receive buffers. */
16548 + if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
16549 + goto drop;
16550 +
16551 + /*
16552 + * Copy the packet here if it's destined for a flipping interface
16553 + * but isn't flippable (e.g. extra references to data).
16554 + * XXX For now we also copy skbuffs whose head crosses a page
16555 + * boundary, because netbk_gop_skb can't handle them.
16556 + */
16557 + if (!netif->copying_receiver ||
16558 + ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
16559 + struct sk_buff *nskb = netbk_copy_skb(skb);
16560 + if ( unlikely(nskb == NULL) )
16561 + goto drop;
16562 + /* Copy only the header fields we use in this driver. */
16563 + nskb->dev = skb->dev;
16564 + nskb->ip_summed = skb->ip_summed;
16565 + nskb->proto_data_valid = skb->proto_data_valid;
16566 + dev_kfree_skb(skb);
16567 + skb = nskb;
16568 + }
16569 +
16570 + netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
16571 + !!skb_shinfo(skb)->gso_size;
16572 + netif_get(netif);
16573 +
16574 + if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
16575 + netif->rx.sring->req_event = netif->rx_req_cons_peek +
16576 + netbk_max_required_rx_slots(netif);
16577 + mb(); /* request notification /then/ check & stop the queue */
16578 + if (netbk_queue_full(netif)) {
16579 + netif_stop_queue(dev);
16580 + /*
16581 + * Schedule 500ms timeout to restart the queue, thus
16582 + * ensuring that an inactive queue will be drained.
16583 + * Packets will be immediately be dropped until more
16584 + * receive buffers become available (see
16585 + * netbk_queue_full() check above).
16586 + */
16587 + netif->tx_queue_timeout.data = (unsigned long)netif;
16588 + netif->tx_queue_timeout.function = tx_queue_callback;
16589 + __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
16590 + }
16591 + }
16592 +
16593 + skb_queue_tail(&rx_queue, skb);
16594 + tasklet_schedule(&net_rx_tasklet);
16595 +
16596 + return 0;
16597 +
16598 + drop:
16599 + netif->stats.tx_dropped++;
16600 + dev_kfree_skb(skb);
16601 + return 0;
16602 +}
16603 +
16604 +#if 0
16605 +static void xen_network_done_notify(void)
16606 +{
16607 + static struct net_device *eth0_dev = NULL;
16608 + if (unlikely(eth0_dev == NULL))
16609 + eth0_dev = __dev_get_by_name("eth0");
16610 + netif_rx_schedule(eth0_dev);
16611 +}
16612 +/*
16613 + * Add following to poll() function in NAPI driver (Tigon3 is example):
16614 + * if ( xen_network_done() )
16615 + * tg3_enable_ints(tp);
16616 + */
16617 +int xen_network_done(void)
16618 +{
16619 + return skb_queue_empty(&rx_queue);
16620 +}
16621 +#endif
16622 +
16623 +struct netrx_pending_operations {
16624 + unsigned trans_prod, trans_cons;
16625 + unsigned mmu_prod, mmu_mcl;
16626 + unsigned mcl_prod, mcl_cons;
16627 + unsigned copy_prod, copy_cons;
16628 + unsigned meta_prod, meta_cons;
16629 + mmu_update_t *mmu;
16630 + gnttab_transfer_t *trans;
16631 + gnttab_copy_t *copy;
16632 + multicall_entry_t *mcl;
16633 + struct netbk_rx_meta *meta;
16634 +};
16635 +
16636 +/* Set up the grant operations for this fragment. If it's a flipping
16637 + interface, we also set up the unmap request from here. */
16638 +static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
16639 + int i, struct netrx_pending_operations *npo,
16640 + struct page *page, unsigned long size,
16641 + unsigned long offset)
16642 +{
16643 + mmu_update_t *mmu;
16644 + gnttab_transfer_t *gop;
16645 + gnttab_copy_t *copy_gop;
16646 + multicall_entry_t *mcl;
16647 + netif_rx_request_t *req;
16648 + unsigned long old_mfn, new_mfn;
16649 +
16650 + old_mfn = virt_to_mfn(page_address(page));
16651 +
16652 + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
16653 + if (netif->copying_receiver) {
16654 + /* The fragment needs to be copied rather than
16655 + flipped. */
16656 + meta->copy = 1;
16657 + copy_gop = npo->copy + npo->copy_prod++;
16658 + copy_gop->flags = GNTCOPY_dest_gref;
16659 + if (PageForeign(page)) {
16660 + struct pending_tx_info *src_pend =
16661 + &pending_tx_info[netif_page_index(page)];
16662 + copy_gop->source.domid = src_pend->netif->domid;
16663 + copy_gop->source.u.ref = src_pend->req.gref;
16664 + copy_gop->flags |= GNTCOPY_source_gref;
16665 + } else {
16666 + copy_gop->source.domid = DOMID_SELF;
16667 + copy_gop->source.u.gmfn = old_mfn;
16668 + }
16669 + copy_gop->source.offset = offset;
16670 + copy_gop->dest.domid = netif->domid;
16671 + copy_gop->dest.offset = 0;
16672 + copy_gop->dest.u.ref = req->gref;
16673 + copy_gop->len = size;
16674 + } else {
16675 + meta->copy = 0;
16676 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
16677 + new_mfn = alloc_mfn();
16678 +
16679 + /*
16680 + * Set the new P2M table entry before
16681 + * reassigning the old data page. Heed the
16682 + * comment in pgtable-2level.h:pte_page(). :-)
16683 + */
16684 + set_phys_to_machine(page_to_pfn(page), new_mfn);
16685 +
16686 + mcl = npo->mcl + npo->mcl_prod++;
16687 + MULTI_update_va_mapping(mcl,
16688 + (unsigned long)page_address(page),
16689 + pfn_pte_ma(new_mfn, PAGE_KERNEL),
16690 + 0);
16691 +
16692 + mmu = npo->mmu + npo->mmu_prod++;
16693 + mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
16694 + MMU_MACHPHYS_UPDATE;
16695 + mmu->val = page_to_pfn(page);
16696 + }
16697 +
16698 + gop = npo->trans + npo->trans_prod++;
16699 + gop->mfn = old_mfn;
16700 + gop->domid = netif->domid;
16701 + gop->ref = req->gref;
16702 + }
16703 + return req->id;
16704 +}
16705 +
16706 +static void netbk_gop_skb(struct sk_buff *skb,
16707 + struct netrx_pending_operations *npo)
16708 +{
16709 + netif_t *netif = netdev_priv(skb->dev);
16710 + int nr_frags = skb_shinfo(skb)->nr_frags;
16711 + int i;
16712 + int extra;
16713 + struct netbk_rx_meta *head_meta, *meta;
16714 +
16715 + head_meta = npo->meta + npo->meta_prod++;
16716 + head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
16717 + head_meta->frag.size = skb_shinfo(skb)->gso_size;
16718 + extra = !!head_meta->frag.size + 1;
16719 +
16720 + for (i = 0; i < nr_frags; i++) {
16721 + meta = npo->meta + npo->meta_prod++;
16722 + meta->frag = skb_shinfo(skb)->frags[i];
16723 + meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
16724 + meta->frag.page,
16725 + meta->frag.size,
16726 + meta->frag.page_offset);
16727 + }
16728 +
16729 + /*
16730 + * This must occur at the end to ensure that we don't trash skb_shinfo
16731 + * until we're done. We know that the head doesn't cross a page
16732 + * boundary because such packets get copied in netif_be_start_xmit.
16733 + */
16734 + head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
16735 + virt_to_page(skb->data),
16736 + skb_headlen(skb),
16737 + offset_in_page(skb->data));
16738 +
16739 + netif->rx.req_cons += nr_frags + extra;
16740 +}
16741 +
16742 +static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
16743 +{
16744 + int i;
16745 +
16746 + for (i = 0; i < nr_frags; i++)
16747 + put_page(meta[i].frag.page);
16748 +}
16749 +
16750 +/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
16751 + used to set up the operations on the top of
16752 + netrx_pending_operations, which have since been done. Check that
16753 + they didn't give any errors and advance over them. */
16754 +static int netbk_check_gop(int nr_frags, domid_t domid,
16755 + struct netrx_pending_operations *npo)
16756 +{
16757 + multicall_entry_t *mcl;
16758 + gnttab_transfer_t *gop;
16759 + gnttab_copy_t *copy_op;
16760 + int status = NETIF_RSP_OKAY;
16761 + int i;
16762 +
16763 + for (i = 0; i <= nr_frags; i++) {
16764 + if (npo->meta[npo->meta_cons + i].copy) {
16765 + copy_op = npo->copy + npo->copy_cons++;
16766 + if (copy_op->status != GNTST_okay) {
16767 + DPRINTK("Bad status %d from copy to DOM%d.\n",
16768 + copy_op->status, domid);
16769 + status = NETIF_RSP_ERROR;
16770 + }
16771 + } else {
16772 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
16773 + mcl = npo->mcl + npo->mcl_cons++;
16774 + /* The update_va_mapping() must not fail. */
16775 + BUG_ON(mcl->result != 0);
16776 + }
16777 +
16778 + gop = npo->trans + npo->trans_cons++;
16779 + /* Check the reassignment error code. */
16780 + if (gop->status != 0) {
16781 + DPRINTK("Bad status %d from grant transfer to DOM%u\n",
16782 + gop->status, domid);
16783 + /*
16784 + * Page no longer belongs to us unless
16785 + * GNTST_bad_page, but that should be
16786 + * a fatal error anyway.
16787 + */
16788 + BUG_ON(gop->status == GNTST_bad_page);
16789 + status = NETIF_RSP_ERROR;
16790 + }
16791 + }
16792 + }
16793 +
16794 + return status;
16795 +}
16796 +
16797 +static void netbk_add_frag_responses(netif_t *netif, int status,
16798 + struct netbk_rx_meta *meta, int nr_frags)
16799 +{
16800 + int i;
16801 + unsigned long offset;
16802 +
16803 + for (i = 0; i < nr_frags; i++) {
16804 + int id = meta[i].id;
16805 + int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
16806 +
16807 + if (meta[i].copy)
16808 + offset = 0;
16809 + else
16810 + offset = meta[i].frag.page_offset;
16811 + make_rx_response(netif, id, status, offset,
16812 + meta[i].frag.size, flags);
16813 + }
16814 +}
16815 +
16816 +static void net_rx_action(unsigned long unused)
16817 +{
16818 + netif_t *netif = NULL;
16819 + s8 status;
16820 + u16 id, irq, flags;
16821 + netif_rx_response_t *resp;
16822 + multicall_entry_t *mcl;
16823 + struct sk_buff_head rxq;
16824 + struct sk_buff *skb;
16825 + int notify_nr = 0;
16826 + int ret;
16827 + int nr_frags;
16828 + int count;
16829 + unsigned long offset;
16830 +
16831 + /*
16832 + * Putting hundreds of bytes on the stack is considered rude.
16833 + * Static works because a tasklet can only be on one CPU at any time.
16834 + */
16835 + static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
16836 + static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
16837 + static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
16838 + static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
16839 + static unsigned char rx_notify[NR_IRQS];
16840 + static u16 notify_list[NET_RX_RING_SIZE];
16841 + static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
16842 +
16843 + struct netrx_pending_operations npo = {
16844 + mmu: rx_mmu,
16845 + trans: grant_trans_op,
16846 + copy: grant_copy_op,
16847 + mcl: rx_mcl,
16848 + meta: meta};
16849 +
16850 + skb_queue_head_init(&rxq);
16851 +
16852 + count = 0;
16853 +
16854 + while ((skb = skb_dequeue(&rx_queue)) != NULL) {
16855 + nr_frags = skb_shinfo(skb)->nr_frags;
16856 + *(int *)skb->cb = nr_frags;
16857 +
16858 + if (!xen_feature(XENFEAT_auto_translated_physmap) &&
16859 + !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
16860 + check_mfn(nr_frags + 1)) {
16861 + /* Memory squeeze? Back off for an arbitrary while. */
16862 + if ( net_ratelimit() )
16863 + WPRINTK("Memory squeeze in netback "
16864 + "driver.\n");
16865 + mod_timer(&net_timer, jiffies + HZ);
16866 + skb_queue_head(&rx_queue, skb);
16867 + break;
16868 + }
16869 +
16870 + netbk_gop_skb(skb, &npo);
16871 +
16872 + count += nr_frags + 1;
16873 +
16874 + __skb_queue_tail(&rxq, skb);
16875 +
16876 + /* Filled the batch queue? */
16877 + if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
16878 + break;
16879 + }
16880 +
16881 + BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
16882 +
16883 + npo.mmu_mcl = npo.mcl_prod;
16884 + if (npo.mcl_prod) {
16885 + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
16886 + BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
16887 + mcl = npo.mcl + npo.mcl_prod++;
16888 +
16889 + BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
16890 + mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
16891 +
16892 + mcl->op = __HYPERVISOR_mmu_update;
16893 + mcl->args[0] = (unsigned long)rx_mmu;
16894 + mcl->args[1] = npo.mmu_prod;
16895 + mcl->args[2] = 0;
16896 + mcl->args[3] = DOMID_SELF;
16897 + }
16898 +
16899 + if (npo.trans_prod) {
16900 + BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
16901 + mcl = npo.mcl + npo.mcl_prod++;
16902 + mcl->op = __HYPERVISOR_grant_table_op;
16903 + mcl->args[0] = GNTTABOP_transfer;
16904 + mcl->args[1] = (unsigned long)grant_trans_op;
16905 + mcl->args[2] = npo.trans_prod;
16906 + }
16907 +
16908 + if (npo.copy_prod) {
16909 + BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
16910 + mcl = npo.mcl + npo.mcl_prod++;
16911 + mcl->op = __HYPERVISOR_grant_table_op;
16912 + mcl->args[0] = GNTTABOP_copy;
16913 + mcl->args[1] = (unsigned long)grant_copy_op;
16914 + mcl->args[2] = npo.copy_prod;
16915 + }
16916 +
16917 + /* Nothing to do? */
16918 + if (!npo.mcl_prod)
16919 + return;
16920 +
16921 + BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
16922 +
16923 + ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
16924 + BUG_ON(ret != 0);
16925 + /* The mmu_machphys_update() must not fail. */
16926 + BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
16927 +
16928 + while ((skb = __skb_dequeue(&rxq)) != NULL) {
16929 + nr_frags = *(int *)skb->cb;
16930 +
16931 + netif = netdev_priv(skb->dev);
16932 + /* We can't rely on skb_release_data to release the
16933 + pages used by fragments for us, since it tries to
16934 + touch the pages in the fraglist. If we're in
16935 + flipping mode, that doesn't work. In copying mode,
16936 + we still have access to all of the pages, and so
16937 + it's safe to let release_data deal with it. */
16938 + /* (Freeing the fragments is safe since we copy
16939 + non-linear skbs destined for flipping interfaces) */
16940 + if (!netif->copying_receiver) {
16941 + atomic_set(&(skb_shinfo(skb)->dataref), 1);
16942 + skb_shinfo(skb)->frag_list = NULL;
16943 + skb_shinfo(skb)->nr_frags = 0;
16944 + netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
16945 + }
16946 +
16947 + netif->stats.tx_bytes += skb->len;
16948 + netif->stats.tx_packets++;
16949 +
16950 + status = netbk_check_gop(nr_frags, netif->domid, &npo);
16951 +
16952 + id = meta[npo.meta_cons].id;
16953 + flags = nr_frags ? NETRXF_more_data : 0;
16954 +
16955 + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
16956 + flags |= NETRXF_csum_blank | NETRXF_data_validated;
16957 + else if (skb->proto_data_valid) /* remote but checksummed? */
16958 + flags |= NETRXF_data_validated;
16959 +
16960 + if (meta[npo.meta_cons].copy)
16961 + offset = 0;
16962 + else
16963 + offset = offset_in_page(skb->data);
16964 + resp = make_rx_response(netif, id, status, offset,
16965 + skb_headlen(skb), flags);
16966 +
16967 + if (meta[npo.meta_cons].frag.size) {
16968 + struct netif_extra_info *gso =
16969 + (struct netif_extra_info *)
16970 + RING_GET_RESPONSE(&netif->rx,
16971 + netif->rx.rsp_prod_pvt++);
16972 +
16973 + resp->flags |= NETRXF_extra_info;
16974 +
16975 + gso->u.gso.size = meta[npo.meta_cons].frag.size;
16976 + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
16977 + gso->u.gso.pad = 0;
16978 + gso->u.gso.features = 0;
16979 +
16980 + gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
16981 + gso->flags = 0;
16982 + }
16983 +
16984 + netbk_add_frag_responses(netif, status,
16985 + meta + npo.meta_cons + 1,
16986 + nr_frags);
16987 +
16988 + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
16989 + irq = netif->irq;
16990 + if (ret && !rx_notify[irq]) {
16991 + rx_notify[irq] = 1;
16992 + notify_list[notify_nr++] = irq;
16993 + }
16994 +
16995 + if (netif_queue_stopped(netif->dev) &&
16996 + netif_schedulable(netif) &&
16997 + !netbk_queue_full(netif))
16998 + netif_wake_queue(netif->dev);
16999 +
17000 + netif_put(netif);
17001 + dev_kfree_skb(skb);
17002 + npo.meta_cons += nr_frags + 1;
17003 + }
17004 +
17005 + while (notify_nr != 0) {
17006 + irq = notify_list[--notify_nr];
17007 + rx_notify[irq] = 0;
17008 + notify_remote_via_irq(irq);
17009 + }
17010 +
17011 + /* More work to do? */
17012 + if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
17013 + tasklet_schedule(&net_rx_tasklet);
17014 +#if 0
17015 + else
17016 + xen_network_done_notify();
17017 +#endif
17018 +}
17019 +
17020 +static void net_alarm(unsigned long unused)
17021 +{
17022 + tasklet_schedule(&net_rx_tasklet);
17023 +}
17024 +
17025 +static void netbk_tx_pending_timeout(unsigned long unused)
17026 +{
17027 + tasklet_schedule(&net_tx_tasklet);
17028 +}
17029 +
17030 +struct net_device_stats *netif_be_get_stats(struct net_device *dev)
17031 +{
17032 + netif_t *netif = netdev_priv(dev);
17033 + return &netif->stats;
17034 +}
17035 +
17036 +static int __on_net_schedule_list(netif_t *netif)
17037 +{
17038 + return netif->list.next != NULL;
17039 +}
17040 +
17041 +static void remove_from_net_schedule_list(netif_t *netif)
17042 +{
17043 + spin_lock_irq(&net_schedule_list_lock);
17044 + if (likely(__on_net_schedule_list(netif))) {
17045 + list_del(&netif->list);
17046 + netif->list.next = NULL;
17047 + netif_put(netif);
17048 + }
17049 + spin_unlock_irq(&net_schedule_list_lock);
17050 +}
17051 +
17052 +static void add_to_net_schedule_list_tail(netif_t *netif)
17053 +{
17054 + if (__on_net_schedule_list(netif))
17055 + return;
17056 +
17057 + spin_lock_irq(&net_schedule_list_lock);
17058 + if (!__on_net_schedule_list(netif) &&
17059 + likely(netif_schedulable(netif))) {
17060 + list_add_tail(&netif->list, &net_schedule_list);
17061 + netif_get(netif);
17062 + }
17063 + spin_unlock_irq(&net_schedule_list_lock);
17064 +}
17065 +
17066 +/*
17067 + * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
17068 + * If this driver is pipelining transmit requests then we can be very
17069 + * aggressive in avoiding new-packet notifications -- frontend only needs to
17070 + * send a notification if there are no outstanding unreceived responses.
17071 + * If we may be buffer transmit buffers for any reason then we must be rather
17072 + * more conservative and treat this as the final check for pending work.
17073 + */
17074 +void netif_schedule_work(netif_t *netif)
17075 +{
17076 + int more_to_do;
17077 +
17078 +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
17079 + more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
17080 +#else
17081 + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
17082 +#endif
17083 +
17084 + if (more_to_do) {
17085 + add_to_net_schedule_list_tail(netif);
17086 + maybe_schedule_tx_action();
17087 + }
17088 +}
17089 +
17090 +void netif_deschedule_work(netif_t *netif)
17091 +{
17092 + remove_from_net_schedule_list(netif);
17093 +}
17094 +
17095 +
17096 +static void tx_add_credit(netif_t *netif)
17097 +{
17098 + unsigned long max_burst, max_credit;
17099 +
17100 + /*
17101 + * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
17102 + * Otherwise the interface can seize up due to insufficient credit.
17103 + */
17104 + max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
17105 + max_burst = min(max_burst, 131072UL);
17106 + max_burst = max(max_burst, netif->credit_bytes);
17107 +
17108 + /* Take care that adding a new chunk of credit doesn't wrap to zero. */
17109 + max_credit = netif->remaining_credit + netif->credit_bytes;
17110 + if (max_credit < netif->remaining_credit)
17111 + max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
17112 +
17113 + netif->remaining_credit = min(max_credit, max_burst);
17114 +}
17115 +
17116 +static void tx_credit_callback(unsigned long data)
17117 +{
17118 + netif_t *netif = (netif_t *)data;
17119 + tx_add_credit(netif);
17120 + netif_schedule_work(netif);
17121 +}
17122 +
17123 +static inline int copy_pending_req(PEND_RING_IDX pending_idx)
17124 +{
17125 + return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
17126 + &mmap_pages[pending_idx]);
17127 +}
17128 +
17129 +inline static void net_tx_action_dealloc(void)
17130 +{
17131 + struct netbk_tx_pending_inuse *inuse, *n;
17132 + gnttab_unmap_grant_ref_t *gop;
17133 + u16 pending_idx;
17134 + PEND_RING_IDX dc, dp;
17135 + netif_t *netif;
17136 + int ret;
17137 + LIST_HEAD(list);
17138 +
17139 + dc = dealloc_cons;
17140 + gop = tx_unmap_ops;
17141 +
17142 + /*
17143 + * Free up any grants we have finished using
17144 + */
17145 + do {
17146 + dp = dealloc_prod;
17147 +
17148 + /* Ensure we see all indices enqueued by netif_idx_release(). */
17149 + smp_rmb();
17150 +
17151 + while (dc != dp) {
17152 + unsigned long pfn;
17153 +
17154 + pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
17155 + list_move_tail(&pending_inuse[pending_idx].list, &list);
17156 +
17157 + pfn = idx_to_pfn(pending_idx);
17158 + /* Already unmapped? */
17159 + if (!phys_to_machine_mapping_valid(pfn))
17160 + continue;
17161 +
17162 + gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
17163 + GNTMAP_host_map,
17164 + grant_tx_handle[pending_idx]);
17165 + gop++;
17166 + }
17167 +
17168 + if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
17169 + list_empty(&pending_inuse_head))
17170 + break;
17171 +
17172 + /* Copy any entries that have been pending for too long. */
17173 + list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
17174 + if (time_after(inuse->alloc_time + HZ / 2, jiffies))
17175 + break;
17176 +
17177 + switch (copy_pending_req(inuse - pending_inuse)) {
17178 + case 0:
17179 + list_move_tail(&inuse->list, &list);
17180 + continue;
17181 + case -EBUSY:
17182 + list_del_init(&inuse->list);
17183 + continue;
17184 + case -ENOENT:
17185 + continue;
17186 + }
17187 +
17188 + break;
17189 + }
17190 + } while (dp != dealloc_prod);
17191 +
17192 + dealloc_cons = dc;
17193 +
17194 + ret = HYPERVISOR_grant_table_op(
17195 + GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
17196 + BUG_ON(ret);
17197 +
17198 + list_for_each_entry_safe(inuse, n, &list, list) {
17199 + pending_idx = inuse - pending_inuse;
17200 +
17201 + netif = pending_tx_info[pending_idx].netif;
17202 +
17203 + make_tx_response(netif, &pending_tx_info[pending_idx].req,
17204 + NETIF_RSP_OKAY);
17205 +
17206 + /* Ready for next use. */
17207 + gnttab_reset_grant_page(mmap_pages[pending_idx]);
17208 +
17209 + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
17210 +
17211 + netif_put(netif);
17212 +
17213 + list_del_init(&inuse->list);
17214 + }
17215 +}
17216 +
17217 +static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
17218 +{
17219 + RING_IDX cons = netif->tx.req_cons;
17220 +
17221 + do {
17222 + make_tx_response(netif, txp, NETIF_RSP_ERROR);
17223 + if (cons >= end)
17224 + break;
17225 + txp = RING_GET_REQUEST(&netif->tx, cons++);
17226 + } while (1);
17227 + netif->tx.req_cons = cons;
17228 + netif_schedule_work(netif);
17229 + netif_put(netif);
17230 +}
17231 +
17232 +static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
17233 + netif_tx_request_t *txp, int work_to_do)
17234 +{
17235 + RING_IDX cons = netif->tx.req_cons;
17236 + int frags = 0;
17237 +
17238 + if (!(first->flags & NETTXF_more_data))
17239 + return 0;
17240 +
17241 + do {
17242 + if (frags >= work_to_do) {
17243 + DPRINTK("Need more frags\n");
17244 + return -frags;
17245 + }
17246 +
17247 + if (unlikely(frags >= MAX_SKB_FRAGS)) {
17248 + DPRINTK("Too many frags\n");
17249 + return -frags;
17250 + }
17251 +
17252 + memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
17253 + sizeof(*txp));
17254 + if (txp->size > first->size) {
17255 + DPRINTK("Frags galore\n");
17256 + return -frags;
17257 + }
17258 +
17259 + first->size -= txp->size;
17260 + frags++;
17261 +
17262 + if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
17263 + DPRINTK("txp->offset: %x, size: %u\n",
17264 + txp->offset, txp->size);
17265 + return -frags;
17266 + }
17267 + } while ((txp++)->flags & NETTXF_more_data);
17268 +
17269 + return frags;
17270 +}
17271 +
17272 +static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
17273 + struct sk_buff *skb,
17274 + netif_tx_request_t *txp,
17275 + gnttab_map_grant_ref_t *mop)
17276 +{
17277 + struct skb_shared_info *shinfo = skb_shinfo(skb);
17278 + skb_frag_t *frags = shinfo->frags;
17279 + unsigned long pending_idx = *((u16 *)skb->data);
17280 + int i, start;
17281 +
17282 + /* Skip first skb fragment if it is on same page as header fragment. */
17283 + start = ((unsigned long)shinfo->frags[0].page == pending_idx);
17284 +
17285 + for (i = start; i < shinfo->nr_frags; i++, txp++) {
17286 + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
17287 +
17288 + gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
17289 + GNTMAP_host_map | GNTMAP_readonly,
17290 + txp->gref, netif->domid);
17291 +
17292 + memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
17293 + netif_get(netif);
17294 + pending_tx_info[pending_idx].netif = netif;
17295 + frags[i].page = (void *)pending_idx;
17296 + }
17297 +
17298 + return mop;
17299 +}
17300 +
17301 +static int netbk_tx_check_mop(struct sk_buff *skb,
17302 + gnttab_map_grant_ref_t **mopp)
17303 +{
17304 + gnttab_map_grant_ref_t *mop = *mopp;
17305 + int pending_idx = *((u16 *)skb->data);
17306 + netif_t *netif = pending_tx_info[pending_idx].netif;
17307 + netif_tx_request_t *txp;
17308 + struct skb_shared_info *shinfo = skb_shinfo(skb);
17309 + int nr_frags = shinfo->nr_frags;
17310 + int i, err, start;
17311 +
17312 + /* Check status of header. */
17313 + err = mop->status;
17314 + if (unlikely(err)) {
17315 + txp = &pending_tx_info[pending_idx].req;
17316 + make_tx_response(netif, txp, NETIF_RSP_ERROR);
17317 + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
17318 + netif_put(netif);
17319 + } else {
17320 + set_phys_to_machine(
17321 + __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
17322 + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
17323 + grant_tx_handle[pending_idx] = mop->handle;
17324 + }
17325 +
17326 + /* Skip first skb fragment if it is on same page as header fragment. */
17327 + start = ((unsigned long)shinfo->frags[0].page == pending_idx);
17328 +
17329 + for (i = start; i < nr_frags; i++) {
17330 + int j, newerr;
17331 +
17332 + pending_idx = (unsigned long)shinfo->frags[i].page;
17333 +
17334 + /* Check error status: if okay then remember grant handle. */
17335 + newerr = (++mop)->status;
17336 + if (likely(!newerr)) {
17337 + set_phys_to_machine(
17338 + __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
17339 + FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
17340 + grant_tx_handle[pending_idx] = mop->handle;
17341 + /* Had a previous error? Invalidate this fragment. */
17342 + if (unlikely(err))
17343 + netif_idx_release(pending_idx);
17344 + continue;
17345 + }
17346 +
17347 + /* Error on this fragment: respond to client with an error. */
17348 + txp = &pending_tx_info[pending_idx].req;
17349 + make_tx_response(netif, txp, NETIF_RSP_ERROR);
17350 + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
17351 + netif_put(netif);
17352 +
17353 + /* Not the first error? Preceding frags already invalidated. */
17354 + if (err)
17355 + continue;
17356 +
17357 + /* First error: invalidate header and preceding fragments. */
17358 + pending_idx = *((u16 *)skb->data);
17359 + netif_idx_release(pending_idx);
17360 + for (j = start; j < i; j++) {
17361 + pending_idx = (unsigned long)shinfo->frags[i].page;
17362 + netif_idx_release(pending_idx);
17363 + }
17364 +
17365 + /* Remember the error: invalidate all subsequent fragments. */
17366 + err = newerr;
17367 + }
17368 +
17369 + *mopp = mop + 1;
17370 + return err;
17371 +}
17372 +
17373 +static void netbk_fill_frags(struct sk_buff *skb)
17374 +{
17375 + struct skb_shared_info *shinfo = skb_shinfo(skb);
17376 + int nr_frags = shinfo->nr_frags;
17377 + int i;
17378 +
17379 + for (i = 0; i < nr_frags; i++) {
17380 + skb_frag_t *frag = shinfo->frags + i;
17381 + netif_tx_request_t *txp;
17382 + unsigned long pending_idx;
17383 +
17384 + pending_idx = (unsigned long)frag->page;
17385 +
17386 + pending_inuse[pending_idx].alloc_time = jiffies;
17387 + list_add_tail(&pending_inuse[pending_idx].list,
17388 + &pending_inuse_head);
17389 +
17390 + txp = &pending_tx_info[pending_idx].req;
17391 + frag->page = virt_to_page(idx_to_kaddr(pending_idx));
17392 + frag->size = txp->size;
17393 + frag->page_offset = txp->offset;
17394 +
17395 + skb->len += txp->size;
17396 + skb->data_len += txp->size;
17397 + skb->truesize += txp->size;
17398 + }
17399 +}
17400 +
17401 +int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
17402 + int work_to_do)
17403 +{
17404 + struct netif_extra_info extra;
17405 + RING_IDX cons = netif->tx.req_cons;
17406 +
17407 + do {
17408 + if (unlikely(work_to_do-- <= 0)) {
17409 + DPRINTK("Missing extra info\n");
17410 + return -EBADR;
17411 + }
17412 +
17413 + memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
17414 + sizeof(extra));
17415 + if (unlikely(!extra.type ||
17416 + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
17417 + netif->tx.req_cons = ++cons;
17418 + DPRINTK("Invalid extra type: %d\n", extra.type);
17419 + return -EINVAL;
17420 + }
17421 +
17422 + memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
17423 + netif->tx.req_cons = ++cons;
17424 + } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
17425 +
17426 + return work_to_do;
17427 +}
17428 +
17429 +static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
17430 +{
17431 + if (!gso->u.gso.size) {
17432 + DPRINTK("GSO size must not be zero.\n");
17433 + return -EINVAL;
17434 + }
17435 +
17436 + /* Currently only TCPv4 S.O. is supported. */
17437 + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
17438 + DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
17439 + return -EINVAL;
17440 + }
17441 +
17442 + skb_shinfo(skb)->gso_size = gso->u.gso.size;
17443 + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
17444 +
17445 + /* Header must be checked, and gso_segs computed. */
17446 + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
17447 + skb_shinfo(skb)->gso_segs = 0;
17448 +
17449 + return 0;
17450 +}
17451 +
17452 +/* Called after netfront has transmitted */
17453 +static void net_tx_action(unsigned long unused)
17454 +{
17455 + struct list_head *ent;
17456 + struct sk_buff *skb;
17457 + netif_t *netif;
17458 + netif_tx_request_t txreq;
17459 + netif_tx_request_t txfrags[MAX_SKB_FRAGS];
17460 + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
17461 + u16 pending_idx;
17462 + RING_IDX i;
17463 + gnttab_map_grant_ref_t *mop;
17464 + unsigned int data_len;
17465 + int ret, work_to_do;
17466 +
17467 + if (dealloc_cons != dealloc_prod)
17468 + net_tx_action_dealloc();
17469 +
17470 + mop = tx_map_ops;
17471 + while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
17472 + !list_empty(&net_schedule_list)) {
17473 + /* Get a netif from the list with work to do. */
17474 + ent = net_schedule_list.next;
17475 + netif = list_entry(ent, netif_t, list);
17476 + netif_get(netif);
17477 + remove_from_net_schedule_list(netif);
17478 +
17479 + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
17480 + if (!work_to_do) {
17481 + netif_put(netif);
17482 + continue;
17483 + }
17484 +
17485 + i = netif->tx.req_cons;
17486 + rmb(); /* Ensure that we see the request before we copy it. */
17487 + memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
17488 +
17489 + /* Credit-based scheduling. */
17490 + if (txreq.size > netif->remaining_credit) {
17491 + unsigned long now = jiffies;
17492 + unsigned long next_credit =
17493 + netif->credit_timeout.expires +
17494 + msecs_to_jiffies(netif->credit_usec / 1000);
17495 +
17496 + /* Timer could already be pending in rare cases. */
17497 + if (timer_pending(&netif->credit_timeout)) {
17498 + netif_put(netif);
17499 + continue;
17500 + }
17501 +
17502 + /* Passed the point where we can replenish credit? */
17503 + if (time_after_eq(now, next_credit)) {
17504 + netif->credit_timeout.expires = now;
17505 + tx_add_credit(netif);
17506 + }
17507 +
17508 + /* Still too big to send right now? Set a callback. */
17509 + if (txreq.size > netif->remaining_credit) {
17510 + netif->credit_timeout.data =
17511 + (unsigned long)netif;
17512 + netif->credit_timeout.function =
17513 + tx_credit_callback;
17514 + __mod_timer(&netif->credit_timeout,
17515 + next_credit);
17516 + netif_put(netif);
17517 + continue;
17518 + }
17519 + }
17520 + netif->remaining_credit -= txreq.size;
17521 +
17522 + work_to_do--;
17523 + netif->tx.req_cons = ++i;
17524 +
17525 + memset(extras, 0, sizeof(extras));
17526 + if (txreq.flags & NETTXF_extra_info) {
17527 + work_to_do = netbk_get_extras(netif, extras,
17528 + work_to_do);
17529 + i = netif->tx.req_cons;
17530 + if (unlikely(work_to_do < 0)) {
17531 + netbk_tx_err(netif, &txreq, i);
17532 + continue;
17533 + }
17534 + }
17535 +
17536 + ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
17537 + if (unlikely(ret < 0)) {
17538 + netbk_tx_err(netif, &txreq, i - ret);
17539 + continue;
17540 + }
17541 + i += ret;
17542 +
17543 + if (unlikely(txreq.size < ETH_HLEN)) {
17544 + DPRINTK("Bad packet size: %d\n", txreq.size);
17545 + netbk_tx_err(netif, &txreq, i);
17546 + continue;
17547 + }
17548 +
17549 + /* No crossing a page as the payload mustn't fragment. */
17550 + if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
17551 + DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
17552 + txreq.offset, txreq.size,
17553 + (txreq.offset &~PAGE_MASK) + txreq.size);
17554 + netbk_tx_err(netif, &txreq, i);
17555 + continue;
17556 + }
17557 +
17558 + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
17559 +
17560 + data_len = (txreq.size > PKT_PROT_LEN &&
17561 + ret < MAX_SKB_FRAGS) ?
17562 + PKT_PROT_LEN : txreq.size;
17563 +
17564 + skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
17565 + GFP_ATOMIC | __GFP_NOWARN);
17566 + if (unlikely(skb == NULL)) {
17567 + DPRINTK("Can't allocate a skb in start_xmit.\n");
17568 + netbk_tx_err(netif, &txreq, i);
17569 + break;
17570 + }
17571 +
17572 + /* Packets passed to netif_rx() must have some headroom. */
17573 + skb_reserve(skb, 16 + NET_IP_ALIGN);
17574 +
17575 + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
17576 + struct netif_extra_info *gso;
17577 + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
17578 +
17579 + if (netbk_set_skb_gso(skb, gso)) {
17580 + kfree_skb(skb);
17581 + netbk_tx_err(netif, &txreq, i);
17582 + continue;
17583 + }
17584 + }
17585 +
17586 + gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
17587 + GNTMAP_host_map | GNTMAP_readonly,
17588 + txreq.gref, netif->domid);
17589 + mop++;
17590 +
17591 + memcpy(&pending_tx_info[pending_idx].req,
17592 + &txreq, sizeof(txreq));
17593 + pending_tx_info[pending_idx].netif = netif;
17594 + *((u16 *)skb->data) = pending_idx;
17595 +
17596 + __skb_put(skb, data_len);
17597 +
17598 + skb_shinfo(skb)->nr_frags = ret;
17599 + if (data_len < txreq.size) {
17600 + skb_shinfo(skb)->nr_frags++;
17601 + skb_shinfo(skb)->frags[0].page =
17602 + (void *)(unsigned long)pending_idx;
17603 + } else {
17604 + /* Discriminate from any valid pending_idx value. */
17605 + skb_shinfo(skb)->frags[0].page = (void *)~0UL;
17606 + }
17607 +
17608 + __skb_queue_tail(&tx_queue, skb);
17609 +
17610 + pending_cons++;
17611 +
17612 + mop = netbk_get_requests(netif, skb, txfrags, mop);
17613 +
17614 + netif->tx.req_cons = i;
17615 + netif_schedule_work(netif);
17616 +
17617 + if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
17618 + break;
17619 + }
17620 +
17621 + if (mop == tx_map_ops)
17622 + return;
17623 +
17624 + ret = HYPERVISOR_grant_table_op(
17625 + GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
17626 + BUG_ON(ret);
17627 +
17628 + mop = tx_map_ops;
17629 + while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
17630 + netif_tx_request_t *txp;
17631 +
17632 + pending_idx = *((u16 *)skb->data);
17633 + netif = pending_tx_info[pending_idx].netif;
17634 + txp = &pending_tx_info[pending_idx].req;
17635 +
17636 + /* Check the remap error code. */
17637 + if (unlikely(netbk_tx_check_mop(skb, &mop))) {
17638 + DPRINTK("netback grant failed.\n");
17639 + skb_shinfo(skb)->nr_frags = 0;
17640 + kfree_skb(skb);
17641 + continue;
17642 + }
17643 +
17644 + data_len = skb->len;
17645 + memcpy(skb->data,
17646 + (void *)(idx_to_kaddr(pending_idx)|txp->offset),
17647 + data_len);
17648 + if (data_len < txp->size) {
17649 + /* Append the packet payload as a fragment. */
17650 + txp->offset += data_len;
17651 + txp->size -= data_len;
17652 + } else {
17653 + /* Schedule a response immediately. */
17654 + netif_idx_release(pending_idx);
17655 + }
17656 +
17657 + /*
17658 + * Old frontends do not assert data_validated but we
17659 + * can infer it from csum_blank so test both flags.
17660 + */
17661 + if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
17662 + skb->ip_summed = CHECKSUM_UNNECESSARY;
17663 + skb->proto_data_valid = 1;
17664 + } else {
17665 + skb->ip_summed = CHECKSUM_NONE;
17666 + skb->proto_data_valid = 0;
17667 + }
17668 + skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
17669 +
17670 + netbk_fill_frags(skb);
17671 +
17672 + skb->dev = netif->dev;
17673 + skb->protocol = eth_type_trans(skb, skb->dev);
17674 +
17675 + netif->stats.rx_bytes += skb->len;
17676 + netif->stats.rx_packets++;
17677 +
17678 + if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
17679 + unlikely(skb_linearize(skb))) {
17680 + DPRINTK("Can't linearize skb in net_tx_action.\n");
17681 + kfree_skb(skb);
17682 + continue;
17683 + }
17684 +
17685 + netif_rx(skb);
17686 + netif->dev->last_rx = jiffies;
17687 + }
17688 +
17689 + if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
17690 + !list_empty(&pending_inuse_head)) {
17691 + struct netbk_tx_pending_inuse *oldest;
17692 +
17693 + oldest = list_entry(pending_inuse_head.next,
17694 + struct netbk_tx_pending_inuse, list);
17695 + mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
17696 + }
17697 +}
17698 +
17699 +static void netif_idx_release(u16 pending_idx)
17700 +{
17701 + static DEFINE_SPINLOCK(_lock);
17702 + unsigned long flags;
17703 +
17704 + spin_lock_irqsave(&_lock, flags);
17705 + dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
17706 + /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
17707 + smp_wmb();
17708 + dealloc_prod++;
17709 + spin_unlock_irqrestore(&_lock, flags);
17710 +
17711 + tasklet_schedule(&net_tx_tasklet);
17712 +}
17713 +
17714 +static void netif_page_release(struct page *page)
17715 +{
17716 + netif_idx_release(netif_page_index(page));
17717 +}
17718 +
17719 +irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
17720 +{
17721 + netif_t *netif = dev_id;
17722 +
17723 + add_to_net_schedule_list_tail(netif);
17724 + maybe_schedule_tx_action();
17725 +
17726 + if (netif_schedulable(netif) && !netbk_queue_full(netif))
17727 + netif_wake_queue(netif->dev);
17728 +
17729 + return IRQ_HANDLED;
17730 +}
17731 +
17732 +static void make_tx_response(netif_t *netif,
17733 + netif_tx_request_t *txp,
17734 + s8 st)
17735 +{
17736 + RING_IDX i = netif->tx.rsp_prod_pvt;
17737 + netif_tx_response_t *resp;
17738 + int notify;
17739 +
17740 + resp = RING_GET_RESPONSE(&netif->tx, i);
17741 + resp->id = txp->id;
17742 + resp->status = st;
17743 +
17744 + if (txp->flags & NETTXF_extra_info)
17745 + RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
17746 +
17747 + netif->tx.rsp_prod_pvt = ++i;
17748 + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
17749 + if (notify)
17750 + notify_remote_via_irq(netif->irq);
17751 +
17752 +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
17753 + if (i == netif->tx.req_cons) {
17754 + int more_to_do;
17755 + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
17756 + if (more_to_do)
17757 + add_to_net_schedule_list_tail(netif);
17758 + }
17759 +#endif
17760 +}
17761 +
17762 +static netif_rx_response_t *make_rx_response(netif_t *netif,
17763 + u16 id,
17764 + s8 st,
17765 + u16 offset,
17766 + u16 size,
17767 + u16 flags)
17768 +{
17769 + RING_IDX i = netif->rx.rsp_prod_pvt;
17770 + netif_rx_response_t *resp;
17771 +
17772 + resp = RING_GET_RESPONSE(&netif->rx, i);
17773 + resp->offset = offset;
17774 + resp->flags = flags;
17775 + resp->id = id;
17776 + resp->status = (s16)size;
17777 + if (st < 0)
17778 + resp->status = (s16)st;
17779 +
17780 + netif->rx.rsp_prod_pvt = ++i;
17781 +
17782 + return resp;
17783 +}
17784 +
17785 +#ifdef NETBE_DEBUG_INTERRUPT
17786 +static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
17787 +{
17788 + struct list_head *ent;
17789 + netif_t *netif;
17790 + int i = 0;
17791 +
17792 + printk(KERN_ALERT "netif_schedule_list:\n");
17793 + spin_lock_irq(&net_schedule_list_lock);
17794 +
17795 + list_for_each (ent, &net_schedule_list) {
17796 + netif = list_entry(ent, netif_t, list);
17797 + printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
17798 + "rx_resp_prod=%08x\n",
17799 + i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
17800 + printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
17801 + netif->tx.req_cons, netif->tx.rsp_prod_pvt);
17802 + printk(KERN_ALERT " shared(rx_req_prod=%08x "
17803 + "rx_resp_prod=%08x\n",
17804 + netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
17805 + printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
17806 + netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
17807 + printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
17808 + netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
17809 + i++;
17810 + }
17811 +
17812 + spin_unlock_irq(&net_schedule_list_lock);
17813 + printk(KERN_ALERT " ** End of netif_schedule_list **\n");
17814 +
17815 + return IRQ_HANDLED;
17816 +}
17817 +#endif
17818 +
17819 +static int __init netback_init(void)
17820 +{
17821 + int i;
17822 + struct page *page;
17823 +
17824 + if (!is_running_on_xen())
17825 + return -ENODEV;
17826 +
17827 + /* We can increase reservation by this much in net_rx_action(). */
17828 + balloon_update_driver_allowance(NET_RX_RING_SIZE);
17829 +
17830 + skb_queue_head_init(&rx_queue);
17831 + skb_queue_head_init(&tx_queue);
17832 +
17833 + init_timer(&net_timer);
17834 + net_timer.data = 0;
17835 + net_timer.function = net_alarm;
17836 +
17837 + init_timer(&netbk_tx_pending_timer);
17838 + netbk_tx_pending_timer.data = 0;
17839 + netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
17840 +
17841 + mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
17842 + if (mmap_pages == NULL) {
17843 + printk("%s: out of memory\n", __FUNCTION__);
17844 + return -ENOMEM;
17845 + }
17846 +
17847 + for (i = 0; i < MAX_PENDING_REQS; i++) {
17848 + page = mmap_pages[i];
17849 + SetPageForeign(page, netif_page_release);
17850 + netif_page_index(page) = i;
17851 + INIT_LIST_HEAD(&pending_inuse[i].list);
17852 + }
17853 +
17854 + pending_cons = 0;
17855 + pending_prod = MAX_PENDING_REQS;
17856 + for (i = 0; i < MAX_PENDING_REQS; i++)
17857 + pending_ring[i] = i;
17858 +
17859 + spin_lock_init(&net_schedule_list_lock);
17860 + INIT_LIST_HEAD(&net_schedule_list);
17861 +
17862 + netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
17863 + if (MODPARM_copy_skb) {
17864 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
17865 + NULL, 0))
17866 + netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
17867 + else
17868 + netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
17869 + }
17870 +
17871 + netif_accel_init();
17872 +
17873 + netif_xenbus_init();
17874 +
17875 +#ifdef NETBE_DEBUG_INTERRUPT
17876 + (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
17877 + 0,
17878 + netif_be_dbg,
17879 + SA_SHIRQ,
17880 + "net-be-dbg",
17881 + &netif_be_dbg);
17882 +#endif
17883 +
17884 + return 0;
17885 +}
17886 +
17887 +module_init(netback_init);
17888 +
17889 +MODULE_LICENSE("Dual BSD/GPL");
17890 Index: head-2008-11-25/drivers/xen/netback/xenbus.c
17891 ===================================================================
17892 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
17893 +++ head-2008-11-25/drivers/xen/netback/xenbus.c 2008-09-01 12:07:31.000000000 +0200
17894 @@ -0,0 +1,454 @@
17895 +/* Xenbus code for netif backend
17896 + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
17897 + Copyright (C) 2005 XenSource Ltd
17898 +
17899 + This program is free software; you can redistribute it and/or modify
17900 + it under the terms of the GNU General Public License as published by
17901 + the Free Software Foundation; either version 2 of the License, or
17902 + (at your option) any later version.
17903 +
17904 + This program is distributed in the hope that it will be useful,
17905 + but WITHOUT ANY WARRANTY; without even the implied warranty of
17906 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17907 + GNU General Public License for more details.
17908 +
17909 + You should have received a copy of the GNU General Public License
17910 + along with this program; if not, write to the Free Software
17911 + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17912 +*/
17913 +
17914 +#include <stdarg.h>
17915 +#include <linux/module.h>
17916 +#include <xen/xenbus.h>
17917 +#include "common.h"
17918 +
17919 +#if 0
17920 +#undef DPRINTK
17921 +#define DPRINTK(fmt, args...) \
17922 + printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
17923 +#endif
17924 +
17925 +
17926 +static int connect_rings(struct backend_info *);
17927 +static void connect(struct backend_info *);
17928 +static void backend_create_netif(struct backend_info *be);
17929 +
17930 +static int netback_remove(struct xenbus_device *dev)
17931 +{
17932 + struct backend_info *be = dev->dev.driver_data;
17933 +
17934 + netback_remove_accelerators(be, dev);
17935 +
17936 + if (be->netif) {
17937 + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
17938 + netif_disconnect(be->netif);
17939 + be->netif = NULL;
17940 + }
17941 + kfree(be);
17942 + dev->dev.driver_data = NULL;
17943 + return 0;
17944 +}
17945 +
17946 +
17947 +/**
17948 + * Entry point to this code when a new device is created. Allocate the basic
17949 + * structures and switch to InitWait.
17950 + */
17951 +static int netback_probe(struct xenbus_device *dev,
17952 + const struct xenbus_device_id *id)
17953 +{
17954 + const char *message;
17955 + struct xenbus_transaction xbt;
17956 + int err;
17957 + int sg;
17958 + struct backend_info *be = kzalloc(sizeof(struct backend_info),
17959 + GFP_KERNEL);
17960 + if (!be) {
17961 + xenbus_dev_fatal(dev, -ENOMEM,
17962 + "allocating backend structure");
17963 + return -ENOMEM;
17964 + }
17965 +
17966 + be->dev = dev;
17967 + dev->dev.driver_data = be;
17968 +
17969 + sg = 1;
17970 + if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
17971 + sg = 0;
17972 +
17973 + do {
17974 + err = xenbus_transaction_start(&xbt);
17975 + if (err) {
17976 + xenbus_dev_fatal(dev, err, "starting transaction");
17977 + goto fail;
17978 + }
17979 +
17980 + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
17981 + if (err) {
17982 + message = "writing feature-sg";
17983 + goto abort_transaction;
17984 + }
17985 +
17986 + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
17987 + "%d", sg);
17988 + if (err) {
17989 + message = "writing feature-gso-tcpv4";
17990 + goto abort_transaction;
17991 + }
17992 +
17993 + /* We support rx-copy path. */
17994 + err = xenbus_printf(xbt, dev->nodename,
17995 + "feature-rx-copy", "%d", 1);
17996 + if (err) {
17997 + message = "writing feature-rx-copy";
17998 + goto abort_transaction;
17999 + }
18000 +
18001 + /*
18002 + * We don't support rx-flip path (except old guests who don't
18003 + * grok this feature flag).
18004 + */
18005 + err = xenbus_printf(xbt, dev->nodename,
18006 + "feature-rx-flip", "%d", 0);
18007 + if (err) {
18008 + message = "writing feature-rx-flip";
18009 + goto abort_transaction;
18010 + }
18011 +
18012 + err = xenbus_transaction_end(xbt, 0);
18013 + } while (err == -EAGAIN);
18014 +
18015 + if (err) {
18016 + xenbus_dev_fatal(dev, err, "completing transaction");
18017 + goto fail;
18018 + }
18019 +
18020 + netback_probe_accelerators(be, dev);
18021 +
18022 + err = xenbus_switch_state(dev, XenbusStateInitWait);
18023 + if (err)
18024 + goto fail;
18025 +
18026 + /* This kicks hotplug scripts, so do it immediately. */
18027 + backend_create_netif(be);
18028 +
18029 + return 0;
18030 +
18031 +abort_transaction:
18032 + xenbus_transaction_end(xbt, 1);
18033 + xenbus_dev_fatal(dev, err, "%s", message);
18034 +fail:
18035 + DPRINTK("failed");
18036 + netback_remove(dev);
18037 + return err;
18038 +}
18039 +
18040 +
18041 +/**
18042 + * Handle the creation of the hotplug script environment. We add the script
18043 + * and vif variables to the environment, for the benefit of the vif-* hotplug
18044 + * scripts.
18045 + */
18046 +static int netback_uevent(struct xenbus_device *xdev, char **envp,
18047 + int num_envp, char *buffer, int buffer_size)
18048 +{
18049 + struct backend_info *be = xdev->dev.driver_data;
18050 + netif_t *netif = be->netif;
18051 + int i = 0, length = 0;
18052 + char *val;
18053 +
18054 + DPRINTK("netback_uevent");
18055 +
18056 + val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
18057 + if (IS_ERR(val)) {
18058 + int err = PTR_ERR(val);
18059 + xenbus_dev_fatal(xdev, err, "reading script");
18060 + return err;
18061 + }
18062 + else {
18063 + add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
18064 + &length, "script=%s", val);
18065 + kfree(val);
18066 + }
18067 +
18068 + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
18069 + "vif=%s", netif->dev->name);
18070 +
18071 + envp[i] = NULL;
18072 +
18073 + return 0;
18074 +}
18075 +
18076 +
18077 +static void backend_create_netif(struct backend_info *be)
18078 +{
18079 + int err;
18080 + long handle;
18081 + struct xenbus_device *dev = be->dev;
18082 +
18083 + if (be->netif != NULL)
18084 + return;
18085 +
18086 + err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
18087 + if (err != 1) {
18088 + xenbus_dev_fatal(dev, err, "reading handle");
18089 + return;
18090 + }
18091 +
18092 + be->netif = netif_alloc(dev->otherend_id, handle);
18093 + if (IS_ERR(be->netif)) {
18094 + err = PTR_ERR(be->netif);
18095 + be->netif = NULL;
18096 + xenbus_dev_fatal(dev, err, "creating interface");
18097 + return;
18098 + }
18099 +
18100 + kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
18101 +}
18102 +
18103 +
18104 +/**
18105 + * Callback received when the frontend's state changes.
18106 + */
18107 +static void frontend_changed(struct xenbus_device *dev,
18108 + enum xenbus_state frontend_state)
18109 +{
18110 + struct backend_info *be = dev->dev.driver_data;
18111 +
18112 + DPRINTK("%s", xenbus_strstate(frontend_state));
18113 +
18114 + be->frontend_state = frontend_state;
18115 +
18116 + switch (frontend_state) {
18117 + case XenbusStateInitialising:
18118 + if (dev->state == XenbusStateClosed) {
18119 + printk(KERN_INFO "%s: %s: prepare for reconnect\n",
18120 + __FUNCTION__, dev->nodename);
18121 + xenbus_switch_state(dev, XenbusStateInitWait);
18122 + }
18123 + break;
18124 +
18125 + case XenbusStateInitialised:
18126 + break;
18127 +
18128 + case XenbusStateConnected:
18129 + if (dev->state == XenbusStateConnected)
18130 + break;
18131 + backend_create_netif(be);
18132 + if (be->netif)
18133 + connect(be);
18134 + break;
18135 +
18136 + case XenbusStateClosing:
18137 + if (be->netif) {
18138 + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
18139 + netif_disconnect(be->netif);
18140 + be->netif = NULL;
18141 + }
18142 + xenbus_switch_state(dev, XenbusStateClosing);
18143 + break;
18144 +
18145 + case XenbusStateClosed:
18146 + xenbus_switch_state(dev, XenbusStateClosed);
18147 + if (xenbus_dev_is_online(dev))
18148 + break;
18149 + /* fall through if not online */
18150 + case XenbusStateUnknown:
18151 + device_unregister(&dev->dev);
18152 + break;
18153 +
18154 + default:
18155 + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
18156 + frontend_state);
18157 + break;
18158 + }
18159 +}
18160 +
18161 +
18162 +static void xen_net_read_rate(struct xenbus_device *dev,
18163 + unsigned long *bytes, unsigned long *usec)
18164 +{
18165 + char *s, *e;
18166 + unsigned long b, u;
18167 + char *ratestr;
18168 +
18169 + /* Default to unlimited bandwidth. */
18170 + *bytes = ~0UL;
18171 + *usec = 0;
18172 +
18173 + ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
18174 + if (IS_ERR(ratestr))
18175 + return;
18176 +
18177 + s = ratestr;
18178 + b = simple_strtoul(s, &e, 10);
18179 + if ((s == e) || (*e != ','))
18180 + goto fail;
18181 +
18182 + s = e + 1;
18183 + u = simple_strtoul(s, &e, 10);
18184 + if ((s == e) || (*e != '\0'))
18185 + goto fail;
18186 +
18187 + *bytes = b;
18188 + *usec = u;
18189 +
18190 + kfree(ratestr);
18191 + return;
18192 +
18193 + fail:
18194 + WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
18195 + kfree(ratestr);
18196 +}
18197 +
18198 +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
18199 +{
18200 + char *s, *e, *macstr;
18201 + int i;
18202 +
18203 + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
18204 + if (IS_ERR(macstr))
18205 + return PTR_ERR(macstr);
18206 +
18207 + for (i = 0; i < ETH_ALEN; i++) {
18208 + mac[i] = simple_strtoul(s, &e, 16);
18209 + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
18210 + kfree(macstr);
18211 + return -ENOENT;
18212 + }
18213 + s = e+1;
18214 + }
18215 +
18216 + kfree(macstr);
18217 + return 0;
18218 +}
18219 +
18220 +static void connect(struct backend_info *be)
18221 +{
18222 + int err;
18223 + struct xenbus_device *dev = be->dev;
18224 +
18225 + err = connect_rings(be);
18226 + if (err)
18227 + return;
18228 +
18229 + err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
18230 + if (err) {
18231 + xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
18232 + return;
18233 + }
18234 +
18235 + xen_net_read_rate(dev, &be->netif->credit_bytes,
18236 + &be->netif->credit_usec);
18237 + be->netif->remaining_credit = be->netif->credit_bytes;
18238 +
18239 + xenbus_switch_state(dev, XenbusStateConnected);
18240 +
18241 + netif_wake_queue(be->netif->dev);
18242 +}
18243 +
18244 +
18245 +static int connect_rings(struct backend_info *be)
18246 +{
18247 + struct xenbus_device *dev = be->dev;
18248 + unsigned long tx_ring_ref, rx_ring_ref;
18249 + unsigned int evtchn, rx_copy;
18250 + int err;
18251 + int val;
18252 +
18253 + DPRINTK("");
18254 +
18255 + err = xenbus_gather(XBT_NIL, dev->otherend,
18256 + "tx-ring-ref", "%lu", &tx_ring_ref,
18257 + "rx-ring-ref", "%lu", &rx_ring_ref,
18258 + "event-channel", "%u", &evtchn, NULL);
18259 + if (err) {
18260 + xenbus_dev_fatal(dev, err,
18261 + "reading %s/ring-ref and event-channel",
18262 + dev->otherend);
18263 + return err;
18264 + }
18265 +
18266 + err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
18267 + &rx_copy);
18268 + if (err == -ENOENT) {
18269 + err = 0;
18270 + rx_copy = 0;
18271 + }
18272 + if (err < 0) {
18273 + xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
18274 + dev->otherend);
18275 + return err;
18276 + }
18277 + be->netif->copying_receiver = !!rx_copy;
18278 +
18279 + if (be->netif->dev->tx_queue_len != 0) {
18280 + if (xenbus_scanf(XBT_NIL, dev->otherend,
18281 + "feature-rx-notify", "%d", &val) < 0)
18282 + val = 0;
18283 + if (val)
18284 + be->netif->can_queue = 1;
18285 + else
18286 + /* Must be non-zero for pfifo_fast to work. */
18287 + be->netif->dev->tx_queue_len = 1;
18288 + }
18289 +
18290 + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
18291 + val = 0;
18292 + if (val) {
18293 + be->netif->features |= NETIF_F_SG;
18294 + be->netif->dev->features |= NETIF_F_SG;
18295 + }
18296 +
18297 + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
18298 + &val) < 0)
18299 + val = 0;
18300 + if (val) {
18301 + be->netif->features |= NETIF_F_TSO;
18302 + be->netif->dev->features |= NETIF_F_TSO;
18303 + }
18304 +
18305 + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
18306 + "%d", &val) < 0)
18307 + val = 0;
18308 + if (val) {
18309 + be->netif->features &= ~NETIF_F_IP_CSUM;
18310 + be->netif->dev->features &= ~NETIF_F_IP_CSUM;
18311 + }
18312 +
18313 + /* Map the shared frame, irq etc. */
18314 + err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
18315 + if (err) {
18316 + xenbus_dev_fatal(dev, err,
18317 + "mapping shared-frames %lu/%lu port %u",
18318 + tx_ring_ref, rx_ring_ref, evtchn);
18319 + return err;
18320 + }
18321 + return 0;
18322 +}
18323 +
18324 +
18325 +/* ** Driver Registration ** */
18326 +
18327 +
18328 +static const struct xenbus_device_id netback_ids[] = {
18329 + { "vif" },
18330 + { "" }
18331 +};
18332 +
18333 +
18334 +static struct xenbus_driver netback = {
18335 + .name = "vif",
18336 + .owner = THIS_MODULE,
18337 + .ids = netback_ids,
18338 + .probe = netback_probe,
18339 + .remove = netback_remove,
18340 + .uevent = netback_uevent,
18341 + .otherend_changed = frontend_changed,
18342 +};
18343 +
18344 +
18345 +void netif_xenbus_init(void)
18346 +{
18347 + xenbus_register_backend(&netback);
18348 +}
18349 Index: head-2008-11-25/drivers/xen/netfront/Makefile
18350 ===================================================================
18351 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
18352 +++ head-2008-11-25/drivers/xen/netfront/Makefile 2007-07-12 08:54:23.000000000 +0200
18353 @@ -0,0 +1,4 @@
18354 +
18355 +obj-$(CONFIG_XEN_NETDEV_FRONTEND) := xennet.o
18356 +
18357 +xennet-objs := netfront.o accel.o
18358 Index: head-2008-11-25/drivers/xen/netfront/accel.c
18359 ===================================================================
18360 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
18361 +++ head-2008-11-25/drivers/xen/netfront/accel.c 2008-08-07 12:44:36.000000000 +0200
18362 @@ -0,0 +1,824 @@
18363 +/******************************************************************************
18364 + * Virtual network driver for conversing with remote driver backends.
18365 + *
18366 + * Copyright (C) 2007 Solarflare Communications, Inc.
18367 + *
18368 + * This program is free software; you can redistribute it and/or
18369 + * modify it under the terms of the GNU General Public License version 2
18370 + * as published by the Free Software Foundation; or, when distributed
18371 + * separately from the Linux kernel or incorporated into other
18372 + * software packages, subject to the following license:
18373 + *
18374 + * Permission is hereby granted, free of charge, to any person obtaining a copy
18375 + * of this source file (the "Software"), to deal in the Software without
18376 + * restriction, including without limitation the rights to use, copy, modify,
18377 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18378 + * and to permit persons to whom the Software is furnished to do so, subject to
18379 + * the following conditions:
18380 + *
18381 + * The above copyright notice and this permission notice shall be included in
18382 + * all copies or substantial portions of the Software.
18383 + *
18384 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18385 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18386 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18387 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18388 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18389 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18390 + * IN THE SOFTWARE.
18391 + */
18392 +
18393 +#include <linux/netdevice.h>
18394 +#include <linux/skbuff.h>
18395 +#include <linux/list.h>
18396 +#include <linux/mutex.h>
18397 +#include <asm/hypervisor.h>
18398 +#include <xen/xenbus.h>
18399 +
18400 +#include "netfront.h"
18401 +
18402 +#define DPRINTK(fmt, args...) \
18403 + pr_debug("netfront/accel (%s:%d) " fmt, \
18404 + __FUNCTION__, __LINE__, ##args)
18405 +#define IPRINTK(fmt, args...) \
18406 + printk(KERN_INFO "netfront/accel: " fmt, ##args)
18407 +#define WPRINTK(fmt, args...) \
18408 + printk(KERN_WARNING "netfront/accel: " fmt, ##args)
18409 +
18410 +static int netfront_remove_accelerator(struct netfront_info *np,
18411 + struct xenbus_device *dev);
18412 +static int netfront_load_accelerator(struct netfront_info *np,
18413 + struct xenbus_device *dev,
18414 + const char *frontend);
18415 +
18416 +/*
18417 + * List of all netfront accelerator plugin modules available. Each
18418 + * list entry is of type struct netfront_accelerator.
18419 + */
18420 +static struct list_head accelerators_list;
18421 +
18422 +/* Lock to protect access to accelerators_list */
18423 +static spinlock_t accelerators_lock;
18424 +
18425 +/* Workqueue to process acceleration configuration changes */
18426 +struct workqueue_struct *accel_watch_workqueue;
18427 +
18428 +/* Mutex to prevent concurrent loads and suspends, etc. */
18429 +DEFINE_MUTEX(accelerator_mutex);
18430 +
18431 +void netif_init_accel(void)
18432 +{
18433 + INIT_LIST_HEAD(&accelerators_list);
18434 + spin_lock_init(&accelerators_lock);
18435 +
18436 + accel_watch_workqueue = create_workqueue("net_accel");
18437 +}
18438 +
18439 +void netif_exit_accel(void)
18440 +{
18441 + struct netfront_accelerator *accelerator, *tmp;
18442 + unsigned long flags;
18443 +
18444 + flush_workqueue(accel_watch_workqueue);
18445 + destroy_workqueue(accel_watch_workqueue);
18446 +
18447 + spin_lock_irqsave(&accelerators_lock, flags);
18448 +
18449 + list_for_each_entry_safe(accelerator, tmp, &accelerators_list, link) {
18450 + BUG_ON(!list_empty(&accelerator->vif_states));
18451 +
18452 + list_del(&accelerator->link);
18453 + kfree(accelerator->frontend);
18454 + kfree(accelerator);
18455 + }
18456 +
18457 + spin_unlock_irqrestore(&accelerators_lock, flags);
18458 +}
18459 +
18460 +
18461 +/*
18462 + * Watch the configured accelerator and change plugin if it's modified
18463 + */
18464 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
18465 +static void accel_watch_work(struct work_struct *context)
18466 +#else
18467 +static void accel_watch_work(void *context)
18468 +#endif
18469 +{
18470 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
18471 + struct netfront_accel_vif_state *vif_state =
18472 + container_of(context, struct netfront_accel_vif_state,
18473 + accel_work);
18474 +#else
18475 + struct netfront_accel_vif_state *vif_state =
18476 + (struct netfront_accel_vif_state *)context;
18477 +#endif
18478 + struct netfront_info *np = vif_state->np;
18479 + char *accel_frontend;
18480 + int accel_len, rc = -1;
18481 +
18482 + mutex_lock(&accelerator_mutex);
18483 +
18484 + accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend,
18485 + "accel-frontend", &accel_len);
18486 + if (IS_ERR(accel_frontend)) {
18487 + accel_frontend = NULL;
18488 + netfront_remove_accelerator(np, np->xbdev);
18489 + } else {
18490 + /* If this is the first time, request the accelerator,
18491 + otherwise only request one if it has changed */
18492 + if (vif_state->accel_frontend == NULL) {
18493 + rc = netfront_load_accelerator(np, np->xbdev,
18494 + accel_frontend);
18495 + } else {
18496 + if (strncmp(vif_state->accel_frontend, accel_frontend,
18497 + accel_len)) {
18498 + netfront_remove_accelerator(np, np->xbdev);
18499 + rc = netfront_load_accelerator(np, np->xbdev,
18500 + accel_frontend);
18501 + }
18502 + }
18503 + }
18504 +
18505 + /* Get rid of previous state and replace with the new name */
18506 + if (vif_state->accel_frontend != NULL)
18507 + kfree(vif_state->accel_frontend);
18508 + vif_state->accel_frontend = accel_frontend;
18509 +
18510 + mutex_unlock(&accelerator_mutex);
18511 +
18512 + if (rc == 0) {
18513 + DPRINTK("requesting module %s\n", accel_frontend);
18514 + request_module("%s", accel_frontend);
18515 + /*
18516 + * Module should now call netfront_accelerator_loaded() once
18517 + * it's up and running, and we can continue from there
18518 + */
18519 + }
18520 +}
18521 +
18522 +
18523 +static void accel_watch_changed(struct xenbus_watch *watch,
18524 + const char **vec, unsigned int len)
18525 +{
18526 + struct netfront_accel_vif_state *vif_state =
18527 + container_of(watch, struct netfront_accel_vif_state,
18528 + accel_watch);
18529 + queue_work(accel_watch_workqueue, &vif_state->accel_work);
18530 +}
18531 +
18532 +
18533 +void netfront_accelerator_add_watch(struct netfront_info *np)
18534 +{
18535 + int err;
18536 +
18537 + /* Check we're not trying to overwrite an existing watch */
18538 + BUG_ON(np->accel_vif_state.accel_watch.node != NULL);
18539 +
18540 + /* Get a watch on the accelerator plugin */
18541 + err = xenbus_watch_path2(np->xbdev, np->xbdev->otherend,
18542 + "accel-frontend",
18543 + &np->accel_vif_state.accel_watch,
18544 + accel_watch_changed);
18545 + if (err) {
18546 + DPRINTK("%s: Failed to register accel watch: %d\n",
18547 + __FUNCTION__, err);
18548 + np->accel_vif_state.accel_watch.node = NULL;
18549 + }
18550 +}
18551 +
18552 +
18553 +static
18554 +void netfront_accelerator_remove_watch(struct netfront_info *np)
18555 +{
18556 + struct netfront_accel_vif_state *vif_state = &np->accel_vif_state;
18557 +
18558 + /* Get rid of watch on accelerator plugin */
18559 + if (vif_state->accel_watch.node != NULL) {
18560 + unregister_xenbus_watch(&vif_state->accel_watch);
18561 + kfree(vif_state->accel_watch.node);
18562 + vif_state->accel_watch.node = NULL;
18563 +
18564 + flush_workqueue(accel_watch_workqueue);
18565 +
18566 + /* Clean up any state left from watch */
18567 + if (vif_state->accel_frontend != NULL) {
18568 + kfree(vif_state->accel_frontend);
18569 + vif_state->accel_frontend = NULL;
18570 + }
18571 + }
18572 +}
18573 +
18574 +
18575 +/*
18576 + * Initialise the accel_vif_state field in the netfront state
18577 + */
18578 +void init_accelerator_vif(struct netfront_info *np,
18579 + struct xenbus_device *dev)
18580 +{
18581 + np->accelerator = NULL;
18582 +
18583 + /* It's assumed that these things don't change */
18584 + np->accel_vif_state.np = np;
18585 + np->accel_vif_state.dev = dev;
18586 +
18587 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
18588 + INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work);
18589 +#else
18590 + INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work,
18591 + &np->accel_vif_state);
18592 +#endif
18593 +}
18594 +
18595 +
18596 +/*
18597 + * Compare a frontend description string against an accelerator to see
18598 + * if they match. Would ultimately be nice to replace the string with
18599 + * a unique numeric identifier for each accelerator.
18600 + */
18601 +static int match_accelerator(const char *frontend,
18602 + struct netfront_accelerator *accelerator)
18603 +{
18604 + return strcmp(frontend, accelerator->frontend) == 0;
18605 +}
18606 +
18607 +
18608 +/*
18609 + * Add a frontend vif to the list of vifs that is using a netfront
18610 + * accelerator plugin module.
18611 + */
18612 +static void add_accelerator_vif(struct netfront_accelerator *accelerator,
18613 + struct netfront_info *np)
18614 +{
18615 + unsigned long flags;
18616 +
18617 + /* Need lock to write list */
18618 + spin_lock_irqsave(&accelerator->vif_states_lock, flags);
18619 +
18620 + if (np->accelerator == NULL) {
18621 + np->accelerator = accelerator;
18622 +
18623 + list_add(&np->accel_vif_state.link, &accelerator->vif_states);
18624 + } else {
18625 + /*
18626 + * May get here legitimately if suspend_cancel is
18627 + * called, but in that case configuration should not
18628 + * have changed
18629 + */
18630 + BUG_ON(np->accelerator != accelerator);
18631 + }
18632 +
18633 + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
18634 +}
18635 +
18636 +
18637 +/*
18638 + * Initialise the state to track an accelerator plugin module.
18639 + */
18640 +static int init_accelerator(const char *frontend,
18641 + struct netfront_accelerator **result,
18642 + struct netfront_accel_hooks *hooks)
18643 +{
18644 + struct netfront_accelerator *accelerator =
18645 + kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL);
18646 + unsigned long flags;
18647 + int frontend_len;
18648 +
18649 + if (!accelerator) {
18650 + DPRINTK("no memory for accelerator\n");
18651 + return -ENOMEM;
18652 + }
18653 +
18654 + frontend_len = strlen(frontend) + 1;
18655 + accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL);
18656 + if (!accelerator->frontend) {
18657 + DPRINTK("no memory for accelerator\n");
18658 + kfree(accelerator);
18659 + return -ENOMEM;
18660 + }
18661 + strlcpy(accelerator->frontend, frontend, frontend_len);
18662 +
18663 + INIT_LIST_HEAD(&accelerator->vif_states);
18664 + spin_lock_init(&accelerator->vif_states_lock);
18665 +
18666 + accelerator->hooks = hooks;
18667 +
18668 + spin_lock_irqsave(&accelerators_lock, flags);
18669 + list_add(&accelerator->link, &accelerators_list);
18670 + spin_unlock_irqrestore(&accelerators_lock, flags);
18671 +
18672 + *result = accelerator;
18673 +
18674 + return 0;
18675 +}
18676 +
18677 +
18678 +/*
18679 + * Modify the hooks stored in the per-vif state to match that in the
18680 + * netfront accelerator's state.
18681 + */
18682 +static void
18683 +accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state)
18684 +{
18685 + /* This function must be called with the vif_states_lock held */
18686 +
18687 + DPRINTK("%p\n",vif_state);
18688 +
18689 + /* Make sure there are no data path operations going on */
18690 + netif_poll_disable(vif_state->np->netdev);
18691 + netif_tx_lock_bh(vif_state->np->netdev);
18692 +
18693 + vif_state->hooks = vif_state->np->accelerator->hooks;
18694 +
18695 + netif_tx_unlock_bh(vif_state->np->netdev);
18696 + netif_poll_enable(vif_state->np->netdev);
18697 +}
18698 +
18699 +
18700 +static void accelerator_probe_new_vif(struct netfront_info *np,
18701 + struct xenbus_device *dev,
18702 + struct netfront_accelerator *accelerator)
18703 +{
18704 + struct netfront_accel_hooks *hooks;
18705 + unsigned long flags;
18706 +
18707 + DPRINTK("\n");
18708 +
18709 + /* Include this frontend device on the accelerator's list */
18710 + add_accelerator_vif(accelerator, np);
18711 +
18712 + hooks = accelerator->hooks;
18713 +
18714 + if (hooks) {
18715 + if (hooks->new_device(np->netdev, dev) == 0) {
18716 + spin_lock_irqsave
18717 + (&accelerator->vif_states_lock, flags);
18718 +
18719 + accelerator_set_vif_state_hooks(&np->accel_vif_state);
18720 +
18721 + spin_unlock_irqrestore
18722 + (&accelerator->vif_states_lock, flags);
18723 + }
18724 + }
18725 +
18726 + return;
18727 +}
18728 +
18729 +
18730 +/*
18731 + * Request that a particular netfront accelerator plugin is loaded.
18732 + * Usually called as a result of the vif configuration specifying
18733 + * which one to use. Must be called with accelerator_mutex held
18734 + */
18735 +static int netfront_load_accelerator(struct netfront_info *np,
18736 + struct xenbus_device *dev,
18737 + const char *frontend)
18738 +{
18739 + struct netfront_accelerator *accelerator;
18740 + int rc = 0;
18741 +
18742 + DPRINTK(" %s\n", frontend);
18743 +
18744 + /*
18745 + * Look at list of loaded accelerators to see if the requested
18746 + * one is already there
18747 + */
18748 + list_for_each_entry(accelerator, &accelerators_list, link) {
18749 + if (match_accelerator(frontend, accelerator)) {
18750 + accelerator_probe_new_vif(np, dev, accelerator);
18751 + return 0;
18752 + }
18753 + }
18754 +
18755 + /* Couldn't find it, so create a new one and load the module */
18756 + if ((rc = init_accelerator(frontend, &accelerator, NULL)) < 0) {
18757 + return rc;
18758 + }
18759 +
18760 + /* Include this frontend device on the accelerator's list */
18761 + add_accelerator_vif(accelerator, np);
18762 +
18763 + return rc;
18764 +}
18765 +
18766 +
18767 +/*
18768 + * Go through all the netfront vifs and see if they have requested
18769 + * this accelerator. Notify the accelerator plugin of the relevant
18770 + * device if so. Called when an accelerator plugin module is first
18771 + * loaded and connects to netfront.
18772 + */
18773 +static void
18774 +accelerator_probe_vifs(struct netfront_accelerator *accelerator,
18775 + struct netfront_accel_hooks *hooks)
18776 +{
18777 + struct netfront_accel_vif_state *vif_state, *tmp;
18778 + unsigned long flags;
18779 +
18780 + DPRINTK("%p\n", accelerator);
18781 +
18782 + /*
18783 + * Store the hooks for future calls to probe a new device, and
18784 + * to wire into the vif_state once the accelerator plugin is
18785 + * ready to accelerate each vif
18786 + */
18787 + BUG_ON(hooks == NULL);
18788 + accelerator->hooks = hooks;
18789 +
18790 + /*
18791 + * currently hold accelerator_mutex, so don't need
18792 + * vif_states_lock to read the list
18793 + */
18794 + list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states,
18795 + link) {
18796 + struct netfront_info *np = vif_state->np;
18797 +
18798 + if (hooks->new_device(np->netdev, vif_state->dev) == 0) {
18799 + spin_lock_irqsave
18800 + (&accelerator->vif_states_lock, flags);
18801 +
18802 + accelerator_set_vif_state_hooks(vif_state);
18803 +
18804 + spin_unlock_irqrestore
18805 + (&accelerator->vif_states_lock, flags);
18806 + }
18807 + }
18808 +}
18809 +
18810 +
18811 +/*
18812 + * Called by the netfront accelerator plugin module when it has loaded
18813 + */
18814 +int netfront_accelerator_loaded(int version, const char *frontend,
18815 + struct netfront_accel_hooks *hooks)
18816 +{
18817 + struct netfront_accelerator *accelerator;
18818 +
18819 + if (is_initial_xendomain())
18820 + return -EINVAL;
18821 +
18822 + if (version != NETFRONT_ACCEL_VERSION) {
18823 + if (version > NETFRONT_ACCEL_VERSION) {
18824 + /* Caller has higher version number, leave it
18825 + up to them to decide whether to continue.
18826 + They can re-call with a lower number if
18827 + they're happy to be compatible with us */
18828 + return NETFRONT_ACCEL_VERSION;
18829 + } else {
18830 + /* We have a more recent version than caller.
18831 + Currently reject, but may in future be able
18832 + to be backwardly compatible */
18833 + return -EPROTO;
18834 + }
18835 + }
18836 +
18837 + mutex_lock(&accelerator_mutex);
18838 +
18839 + /*
18840 + * Look through list of accelerators to see if it has already
18841 + * been requested
18842 + */
18843 + list_for_each_entry(accelerator, &accelerators_list, link) {
18844 + if (match_accelerator(frontend, accelerator)) {
18845 + accelerator_probe_vifs(accelerator, hooks);
18846 + goto out;
18847 + }
18848 + }
18849 +
18850 + /*
18851 + * If it wasn't in the list, add it now so that when it is
18852 + * requested the caller will find it
18853 + */
18854 + DPRINTK("Couldn't find matching accelerator (%s)\n",
18855 + frontend);
18856 +
18857 + init_accelerator(frontend, &accelerator, hooks);
18858 +
18859 + out:
18860 + mutex_unlock(&accelerator_mutex);
18861 + return 0;
18862 +}
18863 +EXPORT_SYMBOL_GPL(netfront_accelerator_loaded);
18864 +
18865 +
18866 +/*
18867 + * Remove the hooks from a single vif state.
18868 + */
18869 +static void
18870 +accelerator_remove_single_hook(struct netfront_accelerator *accelerator,
18871 + struct netfront_accel_vif_state *vif_state)
18872 +{
18873 + /* Make sure there are no data path operations going on */
18874 + netif_poll_disable(vif_state->np->netdev);
18875 + netif_tx_lock_bh(vif_state->np->netdev);
18876 +
18877 + /*
18878 + * Remove the hooks, but leave the vif_state on the
18879 + * accelerator's list as that signifies this vif is
18880 + * interested in using that accelerator if it becomes
18881 + * available again
18882 + */
18883 + vif_state->hooks = NULL;
18884 +
18885 + netif_tx_unlock_bh(vif_state->np->netdev);
18886 + netif_poll_enable(vif_state->np->netdev);
18887 +}
18888 +
18889 +
18890 +/*
18891 + * Safely remove the accelerator function hooks from a netfront state.
18892 + */
18893 +static void accelerator_remove_hooks(struct netfront_accelerator *accelerator)
18894 +{
18895 + struct netfront_accel_hooks *hooks;
18896 + struct netfront_accel_vif_state *vif_state, *tmp;
18897 + unsigned long flags;
18898 +
18899 + /* Mutex is held so don't need vif_states_lock to iterate list */
18900 + list_for_each_entry_safe(vif_state, tmp,
18901 + &accelerator->vif_states,
18902 + link) {
18903 + spin_lock_irqsave(&accelerator->vif_states_lock, flags);
18904 +
18905 + if(vif_state->hooks) {
18906 + hooks = vif_state->hooks;
18907 +
18908 + /* Last chance to get statistics from the accelerator */
18909 + hooks->get_stats(vif_state->np->netdev,
18910 + &vif_state->np->stats);
18911 +
18912 + spin_unlock_irqrestore(&accelerator->vif_states_lock,
18913 + flags);
18914 +
18915 + accelerator_remove_single_hook(accelerator, vif_state);
18916 +
18917 + accelerator->hooks->remove(vif_state->dev);
18918 + } else {
18919 + spin_unlock_irqrestore(&accelerator->vif_states_lock,
18920 + flags);
18921 + }
18922 + }
18923 +
18924 + accelerator->hooks = NULL;
18925 +}
18926 +
18927 +
18928 +/*
18929 + * Called by a netfront accelerator when it is unloaded. This safely
18930 + * removes the hooks into the plugin and blocks until all devices have
18931 + * finished using it, so on return it is safe to unload.
18932 + */
18933 +void netfront_accelerator_stop(const char *frontend)
18934 +{
18935 + struct netfront_accelerator *accelerator;
18936 + unsigned long flags;
18937 +
18938 + mutex_lock(&accelerator_mutex);
18939 + spin_lock_irqsave(&accelerators_lock, flags);
18940 +
18941 + list_for_each_entry(accelerator, &accelerators_list, link) {
18942 + if (match_accelerator(frontend, accelerator)) {
18943 + spin_unlock_irqrestore(&accelerators_lock, flags);
18944 +
18945 + accelerator_remove_hooks(accelerator);
18946 +
18947 + goto out;
18948 + }
18949 + }
18950 + spin_unlock_irqrestore(&accelerators_lock, flags);
18951 + out:
18952 + mutex_unlock(&accelerator_mutex);
18953 +}
18954 +EXPORT_SYMBOL_GPL(netfront_accelerator_stop);
18955 +
18956 +
18957 +/* Helper for call_remove and do_suspend */
18958 +static int do_remove(struct netfront_info *np, struct xenbus_device *dev,
18959 + unsigned long *lock_flags)
18960 +{
18961 + struct netfront_accelerator *accelerator = np->accelerator;
18962 + struct netfront_accel_hooks *hooks;
18963 + int rc = 0;
18964 +
18965 + if (np->accel_vif_state.hooks) {
18966 + hooks = np->accel_vif_state.hooks;
18967 +
18968 + /* Last chance to get statistics from the accelerator */
18969 + hooks->get_stats(np->netdev, &np->stats);
18970 +
18971 + spin_unlock_irqrestore(&accelerator->vif_states_lock,
18972 + *lock_flags);
18973 +
18974 + /*
18975 + * Try and do the opposite of accelerator_probe_new_vif
18976 + * to ensure there's no state pointing back at the
18977 + * netdev
18978 + */
18979 + accelerator_remove_single_hook(accelerator,
18980 + &np->accel_vif_state);
18981 +
18982 + rc = accelerator->hooks->remove(dev);
18983 +
18984 + spin_lock_irqsave(&accelerator->vif_states_lock, *lock_flags);
18985 + }
18986 +
18987 + return rc;
18988 +}
18989 +
18990 +
18991 +static int netfront_remove_accelerator(struct netfront_info *np,
18992 + struct xenbus_device *dev)
18993 +{
18994 + struct netfront_accelerator *accelerator;
18995 + struct netfront_accel_vif_state *tmp_vif_state;
18996 + unsigned long flags;
18997 + int rc = 0;
18998 +
18999 + /* Check that we've got a device that was accelerated */
19000 + if (np->accelerator == NULL)
19001 + return rc;
19002 +
19003 + accelerator = np->accelerator;
19004 +
19005 + spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19006 +
19007 + list_for_each_entry(tmp_vif_state, &accelerator->vif_states,
19008 + link) {
19009 + if (tmp_vif_state == &np->accel_vif_state) {
19010 + list_del(&np->accel_vif_state.link);
19011 + break;
19012 + }
19013 + }
19014 +
19015 + rc = do_remove(np, dev, &flags);
19016 +
19017 + np->accelerator = NULL;
19018 +
19019 + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19020 +
19021 + return rc;
19022 +}
19023 +
19024 +
19025 +int netfront_accelerator_call_remove(struct netfront_info *np,
19026 + struct xenbus_device *dev)
19027 +{
19028 + int rc;
19029 + netfront_accelerator_remove_watch(np);
19030 + mutex_lock(&accelerator_mutex);
19031 + rc = netfront_remove_accelerator(np, dev);
19032 + mutex_unlock(&accelerator_mutex);
19033 + return rc;
19034 +}
19035 +
19036 +
19037 +int netfront_accelerator_suspend(struct netfront_info *np,
19038 + struct xenbus_device *dev)
19039 +{
19040 + unsigned long flags;
19041 + int rc = 0;
19042 +
19043 + netfront_accelerator_remove_watch(np);
19044 +
19045 + mutex_lock(&accelerator_mutex);
19046 +
19047 + /* Check that we've got a device that was accelerated */
19048 + if (np->accelerator == NULL)
19049 + goto out;
19050 +
19051 + /*
19052 + * Call the remove accelerator hook, but leave the vif_state
19053 + * on the accelerator's list in case there is a suspend_cancel.
19054 + */
19055 + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags);
19056 +
19057 + rc = do_remove(np, dev, &flags);
19058 +
19059 + spin_unlock_irqrestore(&np->accelerator->vif_states_lock, flags);
19060 + out:
19061 + mutex_unlock(&accelerator_mutex);
19062 + return rc;
19063 +}
19064 +
19065 +
19066 +int netfront_accelerator_suspend_cancel(struct netfront_info *np,
19067 + struct xenbus_device *dev)
19068 +{
19069 + /*
19070 + * Setting the watch will cause it to fire and probe the
19071 + * accelerator, so no need to call accelerator_probe_new_vif()
19072 + * directly here
19073 + */
19074 + if (dev->state == XenbusStateConnected)
19075 + netfront_accelerator_add_watch(np);
19076 + return 0;
19077 +}
19078 +
19079 +
19080 +void netfront_accelerator_resume(struct netfront_info *np,
19081 + struct xenbus_device *dev)
19082 +{
19083 + struct netfront_accel_vif_state *accel_vif_state = NULL;
19084 + spinlock_t *vif_states_lock;
19085 + unsigned long flags;
19086 +
19087 + mutex_lock(&accelerator_mutex);
19088 +
19089 + /* Check that we've got a device that was accelerated */
19090 + if(np->accelerator == NULL)
19091 + goto out;
19092 +
19093 + /* Find the vif_state from the accelerator's list */
19094 + list_for_each_entry(accel_vif_state, &np->accelerator->vif_states,
19095 + link) {
19096 + if (accel_vif_state->dev == dev) {
19097 + BUG_ON(accel_vif_state != &np->accel_vif_state);
19098 +
19099 + vif_states_lock = &np->accelerator->vif_states_lock;
19100 + spin_lock_irqsave(vif_states_lock, flags);
19101 +
19102 + /*
19103 + * Remove it from the accelerator's list so
19104 + * state is consistent for probing new vifs
19105 + * when they get connected
19106 + */
19107 + list_del(&accel_vif_state->link);
19108 + np->accelerator = NULL;
19109 +
19110 + spin_unlock_irqrestore(vif_states_lock, flags);
19111 +
19112 + break;
19113 + }
19114 + }
19115 +
19116 + out:
19117 + mutex_unlock(&accelerator_mutex);
19118 + return;
19119 +}
19120 +
19121 +
19122 +int netfront_check_accelerator_queue_ready(struct net_device *dev,
19123 + struct netfront_info *np)
19124 +{
19125 + struct netfront_accelerator *accelerator;
19126 + struct netfront_accel_hooks *hooks;
19127 + int rc = 1;
19128 + unsigned long flags;
19129 +
19130 + accelerator = np->accelerator;
19131 +
19132 + /* Call the check_ready accelerator hook. */
19133 + if (np->accel_vif_state.hooks && accelerator) {
19134 + spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19135 + hooks = np->accel_vif_state.hooks;
19136 + if (hooks && np->accelerator == accelerator)
19137 + rc = np->accel_vif_state.hooks->check_ready(dev);
19138 + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19139 + }
19140 +
19141 + return rc;
19142 +}
19143 +
19144 +
19145 +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np,
19146 + struct net_device *dev)
19147 +{
19148 + struct netfront_accelerator *accelerator;
19149 + struct netfront_accel_hooks *hooks;
19150 + unsigned long flags;
19151 +
19152 + accelerator = np->accelerator;
19153 +
19154 + /* Call the stop_napi_interrupts accelerator hook. */
19155 + if (np->accel_vif_state.hooks && accelerator != NULL) {
19156 + spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19157 + hooks = np->accel_vif_state.hooks;
19158 + if (hooks && np->accelerator == accelerator)
19159 + np->accel_vif_state.hooks->stop_napi_irq(dev);
19160 + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19161 + }
19162 +}
19163 +
19164 +
19165 +int netfront_accelerator_call_get_stats(struct netfront_info *np,
19166 + struct net_device *dev)
19167 +{
19168 + struct netfront_accelerator *accelerator;
19169 + struct netfront_accel_hooks *hooks;
19170 + unsigned long flags;
19171 + int rc = 0;
19172 +
19173 + accelerator = np->accelerator;
19174 +
19175 + /* Call the get_stats accelerator hook. */
19176 + if (np->accel_vif_state.hooks && accelerator != NULL) {
19177 + spin_lock_irqsave(&accelerator->vif_states_lock, flags);
19178 + hooks = np->accel_vif_state.hooks;
19179 + if (hooks && np->accelerator == accelerator)
19180 + rc = np->accel_vif_state.hooks->get_stats(dev,
19181 + &np->stats);
19182 + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
19183 + }
19184 + return rc;
19185 +}
19186 +
19187 Index: head-2008-11-25/drivers/xen/netfront/netfront.c
19188 ===================================================================
19189 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
19190 +++ head-2008-11-25/drivers/xen/netfront/netfront.c 2008-07-21 11:00:33.000000000 +0200
19191 @@ -0,0 +1,2240 @@
19192 +/******************************************************************************
19193 + * Virtual network driver for conversing with remote driver backends.
19194 + *
19195 + * Copyright (c) 2002-2005, K A Fraser
19196 + * Copyright (c) 2005, XenSource Ltd
19197 + * Copyright (C) 2007 Solarflare Communications, Inc.
19198 + *
19199 + * This program is free software; you can redistribute it and/or
19200 + * modify it under the terms of the GNU General Public License version 2
19201 + * as published by the Free Software Foundation; or, when distributed
19202 + * separately from the Linux kernel or incorporated into other
19203 + * software packages, subject to the following license:
19204 + *
19205 + * Permission is hereby granted, free of charge, to any person obtaining a copy
19206 + * of this source file (the "Software"), to deal in the Software without
19207 + * restriction, including without limitation the rights to use, copy, modify,
19208 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19209 + * and to permit persons to whom the Software is furnished to do so, subject to
19210 + * the following conditions:
19211 + *
19212 + * The above copyright notice and this permission notice shall be included in
19213 + * all copies or substantial portions of the Software.
19214 + *
19215 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19216 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19217 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19218 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19219 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19220 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19221 + * IN THE SOFTWARE.
19222 + */
19223 +
19224 +#include <linux/module.h>
19225 +#include <linux/version.h>
19226 +#include <linux/kernel.h>
19227 +#include <linux/sched.h>
19228 +#include <linux/slab.h>
19229 +#include <linux/string.h>
19230 +#include <linux/errno.h>
19231 +#include <linux/netdevice.h>
19232 +#include <linux/inetdevice.h>
19233 +#include <linux/etherdevice.h>
19234 +#include <linux/skbuff.h>
19235 +#include <linux/init.h>
19236 +#include <linux/bitops.h>
19237 +#include <linux/ethtool.h>
19238 +#include <linux/in.h>
19239 +#include <linux/if_ether.h>
19240 +#include <linux/io.h>
19241 +#include <linux/moduleparam.h>
19242 +#include <net/sock.h>
19243 +#include <net/pkt_sched.h>
19244 +#include <net/arp.h>
19245 +#include <net/route.h>
19246 +#include <asm/uaccess.h>
19247 +#include <xen/evtchn.h>
19248 +#include <xen/xenbus.h>
19249 +#include <xen/interface/io/netif.h>
19250 +#include <xen/interface/memory.h>
19251 +#include <xen/balloon.h>
19252 +#include <asm/page.h>
19253 +#include <asm/maddr.h>
19254 +#include <asm/uaccess.h>
19255 +#include <xen/interface/grant_table.h>
19256 +#include <xen/gnttab.h>
19257 +
19258 +struct netfront_cb {
19259 + struct page *page;
19260 + unsigned offset;
19261 +};
19262 +
19263 +#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
19264 +
19265 +#include "netfront.h"
19266 +
19267 +/*
19268 + * Mutually-exclusive module options to select receive data path:
19269 + * rx_copy : Packets are copied by network backend into local memory
19270 + * rx_flip : Page containing packet data is transferred to our ownership
19271 + * For fully-virtualised guests there is no option - copying must be used.
19272 + * For paravirtualised guests, flipping is the default.
19273 + */
19274 +#ifdef CONFIG_XEN
19275 +static int MODPARM_rx_copy = 0;
19276 +module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
19277 +MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
19278 +static int MODPARM_rx_flip = 0;
19279 +module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
19280 +MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
19281 +#else
19282 +static const int MODPARM_rx_copy = 1;
19283 +static const int MODPARM_rx_flip = 0;
19284 +#endif
19285 +
19286 +#define RX_COPY_THRESHOLD 256
19287 +
19288 +/* If we don't have GSO, fake things up so that we never try to use it. */
19289 +#if defined(NETIF_F_GSO)
19290 +#define HAVE_GSO 1
19291 +#define HAVE_TSO 1 /* TSO is a subset of GSO */
19292 +#define HAVE_CSUM_OFFLOAD 1
19293 +static inline void dev_disable_gso_features(struct net_device *dev)
19294 +{
19295 + /* Turn off all GSO bits except ROBUST. */
19296 + dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
19297 + dev->features |= NETIF_F_GSO_ROBUST;
19298 +}
19299 +#elif defined(NETIF_F_TSO)
19300 +#define HAVE_GSO 0
19301 +#define HAVE_TSO 1
19302 +
19303 +/* Some older kernels cannot cope with incorrect checksums,
19304 + * particularly in netfilter. I'm not sure there is 100% correlation
19305 + * with the presence of NETIF_F_TSO but it appears to be a good first
19306 + * approximiation.
19307 + */
19308 +#define HAVE_CSUM_OFFLOAD 0
19309 +
19310 +#define gso_size tso_size
19311 +#define gso_segs tso_segs
19312 +static inline void dev_disable_gso_features(struct net_device *dev)
19313 +{
19314 + /* Turn off all TSO bits. */
19315 + dev->features &= ~NETIF_F_TSO;
19316 +}
19317 +static inline int skb_is_gso(const struct sk_buff *skb)
19318 +{
19319 + return skb_shinfo(skb)->tso_size;
19320 +}
19321 +static inline int skb_gso_ok(struct sk_buff *skb, int features)
19322 +{
19323 + return (features & NETIF_F_TSO);
19324 +}
19325 +
19326 +static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
19327 +{
19328 + return skb_is_gso(skb) &&
19329 + (!skb_gso_ok(skb, dev->features) ||
19330 + unlikely(skb->ip_summed != CHECKSUM_HW));
19331 +}
19332 +#else
19333 +#define HAVE_GSO 0
19334 +#define HAVE_TSO 0
19335 +#define HAVE_CSUM_OFFLOAD 0
19336 +#define netif_needs_gso(dev, skb) 0
19337 +#define dev_disable_gso_features(dev) ((void)0)
19338 +#define ethtool_op_set_tso(dev, data) (-ENOSYS)
19339 +#endif
19340 +
19341 +#define GRANT_INVALID_REF 0
19342 +
19343 +struct netfront_rx_info {
19344 + struct netif_rx_response rx;
19345 + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
19346 +};
19347 +
19348 +/*
19349 + * Implement our own carrier flag: the network stack's version causes delays
19350 + * when the carrier is re-enabled (in particular, dev_activate() may not
19351 + * immediately be called, which can cause packet loss).
19352 + */
19353 +#define netfront_carrier_on(netif) ((netif)->carrier = 1)
19354 +#define netfront_carrier_off(netif) ((netif)->carrier = 0)
19355 +#define netfront_carrier_ok(netif) ((netif)->carrier)
19356 +
19357 +/*
19358 + * Access macros for acquiring freeing slots in tx_skbs[].
19359 + */
19360 +
19361 +static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
19362 +{
19363 + list[id] = list[0];
19364 + list[0] = (void *)(unsigned long)id;
19365 +}
19366 +
19367 +static inline unsigned short get_id_from_freelist(struct sk_buff **list)
19368 +{
19369 + unsigned int id = (unsigned int)(unsigned long)list[0];
19370 + list[0] = list[id];
19371 + return id;
19372 +}
19373 +
19374 +static inline int xennet_rxidx(RING_IDX idx)
19375 +{
19376 + return idx & (NET_RX_RING_SIZE - 1);
19377 +}
19378 +
19379 +static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
19380 + RING_IDX ri)
19381 +{
19382 + int i = xennet_rxidx(ri);
19383 + struct sk_buff *skb = np->rx_skbs[i];
19384 + np->rx_skbs[i] = NULL;
19385 + return skb;
19386 +}
19387 +
19388 +static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
19389 + RING_IDX ri)
19390 +{
19391 + int i = xennet_rxidx(ri);
19392 + grant_ref_t ref = np->grant_rx_ref[i];
19393 + np->grant_rx_ref[i] = GRANT_INVALID_REF;
19394 + return ref;
19395 +}
19396 +
19397 +#define DPRINTK(fmt, args...) \
19398 + pr_debug("netfront (%s:%d) " fmt, \
19399 + __FUNCTION__, __LINE__, ##args)
19400 +#define IPRINTK(fmt, args...) \
19401 + printk(KERN_INFO "netfront: " fmt, ##args)
19402 +#define WPRINTK(fmt, args...) \
19403 + printk(KERN_WARNING "netfront: " fmt, ##args)
19404 +
19405 +static int setup_device(struct xenbus_device *, struct netfront_info *);
19406 +static struct net_device *create_netdev(struct xenbus_device *);
19407 +
19408 +static void end_access(int, void *);
19409 +static void netif_disconnect_backend(struct netfront_info *);
19410 +
19411 +static int network_connect(struct net_device *);
19412 +static void network_tx_buf_gc(struct net_device *);
19413 +static void network_alloc_rx_buffers(struct net_device *);
19414 +static void send_fake_arp(struct net_device *);
19415 +
19416 +static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
19417 +
19418 +#ifdef CONFIG_SYSFS
19419 +static int xennet_sysfs_addif(struct net_device *netdev);
19420 +static void xennet_sysfs_delif(struct net_device *netdev);
19421 +#else /* !CONFIG_SYSFS */
19422 +#define xennet_sysfs_addif(dev) (0)
19423 +#define xennet_sysfs_delif(dev) do { } while(0)
19424 +#endif
19425 +
19426 +static inline int xennet_can_sg(struct net_device *dev)
19427 +{
19428 + return dev->features & NETIF_F_SG;
19429 +}
19430 +
19431 +/**
19432 + * Entry point to this code when a new device is created. Allocate the basic
19433 + * structures and the ring buffers for communication with the backend, and
19434 + * inform the backend of the appropriate details for those.
19435 + */
19436 +static int __devinit netfront_probe(struct xenbus_device *dev,
19437 + const struct xenbus_device_id *id)
19438 +{
19439 + int err;
19440 + struct net_device *netdev;
19441 + struct netfront_info *info;
19442 +
19443 + netdev = create_netdev(dev);
19444 + if (IS_ERR(netdev)) {
19445 + err = PTR_ERR(netdev);
19446 + xenbus_dev_fatal(dev, err, "creating netdev");
19447 + return err;
19448 + }
19449 +
19450 + info = netdev_priv(netdev);
19451 + dev->dev.driver_data = info;
19452 +
19453 + err = register_netdev(info->netdev);
19454 + if (err) {
19455 + printk(KERN_WARNING "%s: register_netdev err=%d\n",
19456 + __FUNCTION__, err);
19457 + goto fail;
19458 + }
19459 +
19460 + err = xennet_sysfs_addif(info->netdev);
19461 + if (err) {
19462 + unregister_netdev(info->netdev);
19463 + printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
19464 + __FUNCTION__, err);
19465 + goto fail;
19466 + }
19467 +
19468 + return 0;
19469 +
19470 + fail:
19471 + free_netdev(netdev);
19472 + dev->dev.driver_data = NULL;
19473 + return err;
19474 +}
19475 +
19476 +static int __devexit netfront_remove(struct xenbus_device *dev)
19477 +{
19478 + struct netfront_info *info = dev->dev.driver_data;
19479 +
19480 + DPRINTK("%s\n", dev->nodename);
19481 +
19482 + netfront_accelerator_call_remove(info, dev);
19483 +
19484 + netif_disconnect_backend(info);
19485 +
19486 + del_timer_sync(&info->rx_refill_timer);
19487 +
19488 + xennet_sysfs_delif(info->netdev);
19489 +
19490 + unregister_netdev(info->netdev);
19491 +
19492 + free_netdev(info->netdev);
19493 +
19494 + return 0;
19495 +}
19496 +
19497 +
19498 +static int netfront_suspend(struct xenbus_device *dev)
19499 +{
19500 + struct netfront_info *info = dev->dev.driver_data;
19501 + return netfront_accelerator_suspend(info, dev);
19502 +}
19503 +
19504 +
19505 +static int netfront_suspend_cancel(struct xenbus_device *dev)
19506 +{
19507 + struct netfront_info *info = dev->dev.driver_data;
19508 + return netfront_accelerator_suspend_cancel(info, dev);
19509 +}
19510 +
19511 +
19512 +/**
19513 + * We are reconnecting to the backend, due to a suspend/resume, or a backend
19514 + * driver restart. We tear down our netif structure and recreate it, but
19515 + * leave the device-layer structures intact so that this is transparent to the
19516 + * rest of the kernel.
19517 + */
19518 +static int netfront_resume(struct xenbus_device *dev)
19519 +{
19520 + struct netfront_info *info = dev->dev.driver_data;
19521 +
19522 + DPRINTK("%s\n", dev->nodename);
19523 +
19524 + netfront_accelerator_resume(info, dev);
19525 +
19526 + netif_disconnect_backend(info);
19527 + return 0;
19528 +}
19529 +
19530 +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
19531 +{
19532 + char *s, *e, *macstr;
19533 + int i;
19534 +
19535 + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
19536 + if (IS_ERR(macstr))
19537 + return PTR_ERR(macstr);
19538 +
19539 + for (i = 0; i < ETH_ALEN; i++) {
19540 + mac[i] = simple_strtoul(s, &e, 16);
19541 + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
19542 + kfree(macstr);
19543 + return -ENOENT;
19544 + }
19545 + s = e+1;
19546 + }
19547 +
19548 + kfree(macstr);
19549 + return 0;
19550 +}
19551 +
19552 +/* Common code used when first setting up, and when resuming. */
19553 +static int talk_to_backend(struct xenbus_device *dev,
19554 + struct netfront_info *info)
19555 +{
19556 + const char *message;
19557 + struct xenbus_transaction xbt;
19558 + int err;
19559 +
19560 + /* Read mac only in the first setup. */
19561 + if (!is_valid_ether_addr(info->mac)) {
19562 + err = xen_net_read_mac(dev, info->mac);
19563 + if (err) {
19564 + xenbus_dev_fatal(dev, err, "parsing %s/mac",
19565 + dev->nodename);
19566 + goto out;
19567 + }
19568 + }
19569 +
19570 + /* Create shared ring, alloc event channel. */
19571 + err = setup_device(dev, info);
19572 + if (err)
19573 + goto out;
19574 +
19575 + /* This will load an accelerator if one is configured when the
19576 + * watch fires */
19577 + netfront_accelerator_add_watch(info);
19578 +
19579 +again:
19580 + err = xenbus_transaction_start(&xbt);
19581 + if (err) {
19582 + xenbus_dev_fatal(dev, err, "starting transaction");
19583 + goto destroy_ring;
19584 + }
19585 +
19586 + err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
19587 + info->tx_ring_ref);
19588 + if (err) {
19589 + message = "writing tx ring-ref";
19590 + goto abort_transaction;
19591 + }
19592 + err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
19593 + info->rx_ring_ref);
19594 + if (err) {
19595 + message = "writing rx ring-ref";
19596 + goto abort_transaction;
19597 + }
19598 + err = xenbus_printf(xbt, dev->nodename,
19599 + "event-channel", "%u",
19600 + irq_to_evtchn_port(info->irq));
19601 + if (err) {
19602 + message = "writing event-channel";
19603 + goto abort_transaction;
19604 + }
19605 +
19606 + err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
19607 + info->copying_receiver);
19608 + if (err) {
19609 + message = "writing request-rx-copy";
19610 + goto abort_transaction;
19611 + }
19612 +
19613 + err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
19614 + if (err) {
19615 + message = "writing feature-rx-notify";
19616 + goto abort_transaction;
19617 + }
19618 +
19619 + err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload",
19620 + "%d", !HAVE_CSUM_OFFLOAD);
19621 + if (err) {
19622 + message = "writing feature-no-csum-offload";
19623 + goto abort_transaction;
19624 + }
19625 +
19626 + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
19627 + if (err) {
19628 + message = "writing feature-sg";
19629 + goto abort_transaction;
19630 + }
19631 +
19632 + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d",
19633 + HAVE_TSO);
19634 + if (err) {
19635 + message = "writing feature-gso-tcpv4";
19636 + goto abort_transaction;
19637 + }
19638 +
19639 + err = xenbus_transaction_end(xbt, 0);
19640 + if (err) {
19641 + if (err == -EAGAIN)
19642 + goto again;
19643 + xenbus_dev_fatal(dev, err, "completing transaction");
19644 + goto destroy_ring;
19645 + }
19646 +
19647 + return 0;
19648 +
19649 + abort_transaction:
19650 + xenbus_transaction_end(xbt, 1);
19651 + xenbus_dev_fatal(dev, err, "%s", message);
19652 + destroy_ring:
19653 + netfront_accelerator_call_remove(info, dev);
19654 + netif_disconnect_backend(info);
19655 + out:
19656 + return err;
19657 +}
19658 +
19659 +static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
19660 +{
19661 + struct netif_tx_sring *txs;
19662 + struct netif_rx_sring *rxs;
19663 + int err;
19664 + struct net_device *netdev = info->netdev;
19665 +
19666 + info->tx_ring_ref = GRANT_INVALID_REF;
19667 + info->rx_ring_ref = GRANT_INVALID_REF;
19668 + info->rx.sring = NULL;
19669 + info->tx.sring = NULL;
19670 + info->irq = 0;
19671 +
19672 + txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
19673 + if (!txs) {
19674 + err = -ENOMEM;
19675 + xenbus_dev_fatal(dev, err, "allocating tx ring page");
19676 + goto fail;
19677 + }
19678 + SHARED_RING_INIT(txs);
19679 + FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
19680 +
19681 + err = xenbus_grant_ring(dev, virt_to_mfn(txs));
19682 + if (err < 0) {
19683 + free_page((unsigned long)txs);
19684 + goto fail;
19685 + }
19686 + info->tx_ring_ref = err;
19687 +
19688 + rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
19689 + if (!rxs) {
19690 + err = -ENOMEM;
19691 + xenbus_dev_fatal(dev, err, "allocating rx ring page");
19692 + goto fail;
19693 + }
19694 + SHARED_RING_INIT(rxs);
19695 + FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
19696 +
19697 + err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
19698 + if (err < 0) {
19699 + free_page((unsigned long)rxs);
19700 + goto fail;
19701 + }
19702 + info->rx_ring_ref = err;
19703 +
19704 + memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
19705 +
19706 + err = bind_listening_port_to_irqhandler(
19707 + dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
19708 + netdev);
19709 + if (err < 0)
19710 + goto fail;
19711 + info->irq = err;
19712 +
19713 + return 0;
19714 +
19715 + fail:
19716 + return err;
19717 +}
19718 +
19719 +/**
19720 + * Callback received when the backend's state changes.
19721 + */
19722 +static void backend_changed(struct xenbus_device *dev,
19723 + enum xenbus_state backend_state)
19724 +{
19725 + struct netfront_info *np = dev->dev.driver_data;
19726 + struct net_device *netdev = np->netdev;
19727 +
19728 + DPRINTK("%s\n", xenbus_strstate(backend_state));
19729 +
19730 + switch (backend_state) {
19731 + case XenbusStateInitialising:
19732 + case XenbusStateInitialised:
19733 + case XenbusStateConnected:
19734 + case XenbusStateReconfiguring:
19735 + case XenbusStateReconfigured:
19736 + case XenbusStateUnknown:
19737 + case XenbusStateClosed:
19738 + break;
19739 +
19740 + case XenbusStateInitWait:
19741 + if (dev->state != XenbusStateInitialising)
19742 + break;
19743 + if (network_connect(netdev) != 0)
19744 + break;
19745 + xenbus_switch_state(dev, XenbusStateConnected);
19746 + send_fake_arp(netdev);
19747 + break;
19748 +
19749 + case XenbusStateClosing:
19750 + xenbus_frontend_closed(dev);
19751 + break;
19752 + }
19753 +}
19754 +
19755 +/** Send a packet on a net device to encourage switches to learn the
19756 + * MAC. We send a fake ARP request.
19757 + *
19758 + * @param dev device
19759 + * @return 0 on success, error code otherwise
19760 + */
19761 +static void send_fake_arp(struct net_device *dev)
19762 +{
19763 +#ifdef CONFIG_INET
19764 + struct sk_buff *skb;
19765 + u32 src_ip, dst_ip;
19766 +
19767 + dst_ip = INADDR_BROADCAST;
19768 + src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
19769 +
19770 + /* No IP? Then nothing to do. */
19771 + if (src_ip == 0)
19772 + return;
19773 +
19774 + skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
19775 + dst_ip, dev, src_ip,
19776 + /*dst_hw*/ NULL, /*src_hw*/ NULL,
19777 + /*target_hw*/ dev->dev_addr);
19778 + if (skb == NULL)
19779 + return;
19780 +
19781 + dev_queue_xmit(skb);
19782 +#endif
19783 +}
19784 +
19785 +static inline int netfront_tx_slot_available(struct netfront_info *np)
19786 +{
19787 + return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
19788 + (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
19789 +}
19790 +
19791 +
19792 +static inline void network_maybe_wake_tx(struct net_device *dev)
19793 +{
19794 + struct netfront_info *np = netdev_priv(dev);
19795 +
19796 + if (unlikely(netif_queue_stopped(dev)) &&
19797 + netfront_tx_slot_available(np) &&
19798 + likely(netif_running(dev)) &&
19799 + netfront_check_accelerator_queue_ready(dev, np))
19800 + netif_wake_queue(dev);
19801 +}
19802 +
19803 +
19804 +int netfront_check_queue_ready(struct net_device *dev)
19805 +{
19806 + struct netfront_info *np = netdev_priv(dev);
19807 +
19808 + return unlikely(netif_queue_stopped(dev)) &&
19809 + netfront_tx_slot_available(np) &&
19810 + likely(netif_running(dev));
19811 +}
19812 +EXPORT_SYMBOL(netfront_check_queue_ready);
19813 +
19814 +
19815 +static int network_open(struct net_device *dev)
19816 +{
19817 + struct netfront_info *np = netdev_priv(dev);
19818 +
19819 + memset(&np->stats, 0, sizeof(np->stats));
19820 +
19821 + spin_lock_bh(&np->rx_lock);
19822 + if (netfront_carrier_ok(np)) {
19823 + network_alloc_rx_buffers(dev);
19824 + np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
19825 + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
19826 + netfront_accelerator_call_stop_napi_irq(np, dev);
19827 +
19828 + netif_rx_schedule(dev);
19829 + }
19830 + }
19831 + spin_unlock_bh(&np->rx_lock);
19832 +
19833 + network_maybe_wake_tx(dev);
19834 +
19835 + return 0;
19836 +}
19837 +
19838 +static void network_tx_buf_gc(struct net_device *dev)
19839 +{
19840 + RING_IDX cons, prod;
19841 + unsigned short id;
19842 + struct netfront_info *np = netdev_priv(dev);
19843 + struct sk_buff *skb;
19844 +
19845 + BUG_ON(!netfront_carrier_ok(np));
19846 +
19847 + do {
19848 + prod = np->tx.sring->rsp_prod;
19849 + rmb(); /* Ensure we see responses up to 'rp'. */
19850 +
19851 + for (cons = np->tx.rsp_cons; cons != prod; cons++) {
19852 + struct netif_tx_response *txrsp;
19853 +
19854 + txrsp = RING_GET_RESPONSE(&np->tx, cons);
19855 + if (txrsp->status == NETIF_RSP_NULL)
19856 + continue;
19857 +
19858 + id = txrsp->id;
19859 + skb = np->tx_skbs[id];
19860 + if (unlikely(gnttab_query_foreign_access(
19861 + np->grant_tx_ref[id]) != 0)) {
19862 + printk(KERN_ALERT "network_tx_buf_gc: warning "
19863 + "-- grant still in use by backend "
19864 + "domain.\n");
19865 + BUG();
19866 + }
19867 + gnttab_end_foreign_access_ref(np->grant_tx_ref[id]);
19868 + gnttab_release_grant_reference(
19869 + &np->gref_tx_head, np->grant_tx_ref[id]);
19870 + np->grant_tx_ref[id] = GRANT_INVALID_REF;
19871 + add_id_to_freelist(np->tx_skbs, id);
19872 + dev_kfree_skb_irq(skb);
19873 + }
19874 +
19875 + np->tx.rsp_cons = prod;
19876 +
19877 + /*
19878 + * Set a new event, then check for race with update of tx_cons.
19879 + * Note that it is essential to schedule a callback, no matter
19880 + * how few buffers are pending. Even if there is space in the
19881 + * transmit ring, higher layers may be blocked because too much
19882 + * data is outstanding: in such cases notification from Xen is
19883 + * likely to be the only kick that we'll get.
19884 + */
19885 + np->tx.sring->rsp_event =
19886 + prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
19887 + mb();
19888 + } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
19889 +
19890 + network_maybe_wake_tx(dev);
19891 +}
19892 +
19893 +static void rx_refill_timeout(unsigned long data)
19894 +{
19895 + struct net_device *dev = (struct net_device *)data;
19896 + struct netfront_info *np = netdev_priv(dev);
19897 +
19898 + netfront_accelerator_call_stop_napi_irq(np, dev);
19899 +
19900 + netif_rx_schedule(dev);
19901 +}
19902 +
19903 +static void network_alloc_rx_buffers(struct net_device *dev)
19904 +{
19905 + unsigned short id;
19906 + struct netfront_info *np = netdev_priv(dev);
19907 + struct sk_buff *skb;
19908 + struct page *page;
19909 + int i, batch_target, notify;
19910 + RING_IDX req_prod = np->rx.req_prod_pvt;
19911 + struct xen_memory_reservation reservation;
19912 + grant_ref_t ref;
19913 + unsigned long pfn;
19914 + void *vaddr;
19915 + int nr_flips;
19916 + netif_rx_request_t *req;
19917 +
19918 + if (unlikely(!netfront_carrier_ok(np)))
19919 + return;
19920 +
19921 + /*
19922 + * Allocate skbuffs greedily, even though we batch updates to the
19923 + * receive ring. This creates a less bursty demand on the memory
19924 + * allocator, so should reduce the chance of failed allocation requests
19925 + * both for ourself and for other kernel subsystems.
19926 + */
19927 + batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
19928 + for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
19929 + /*
19930 + * Allocate an skb and a page. Do not use __dev_alloc_skb as
19931 + * that will allocate page-sized buffers which is not
19932 + * necessary here.
19933 + * 16 bytes added as necessary headroom for netif_receive_skb.
19934 + */
19935 + skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
19936 + GFP_ATOMIC | __GFP_NOWARN);
19937 + if (unlikely(!skb))
19938 + goto no_skb;
19939 +
19940 + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
19941 + if (!page) {
19942 + kfree_skb(skb);
19943 +no_skb:
19944 + /* Any skbuffs queued for refill? Force them out. */
19945 + if (i != 0)
19946 + goto refill;
19947 + /* Could not allocate any skbuffs. Try again later. */
19948 + mod_timer(&np->rx_refill_timer,
19949 + jiffies + (HZ/10));
19950 + break;
19951 + }
19952 +
19953 + skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
19954 + skb_shinfo(skb)->frags[0].page = page;
19955 + skb_shinfo(skb)->nr_frags = 1;
19956 + __skb_queue_tail(&np->rx_batch, skb);
19957 + }
19958 +
19959 + /* Is the batch large enough to be worthwhile? */
19960 + if (i < (np->rx_target/2)) {
19961 + if (req_prod > np->rx.sring->req_prod)
19962 + goto push;
19963 + return;
19964 + }
19965 +
19966 + /* Adjust our fill target if we risked running out of buffers. */
19967 + if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
19968 + ((np->rx_target *= 2) > np->rx_max_target))
19969 + np->rx_target = np->rx_max_target;
19970 +
19971 + refill:
19972 + for (nr_flips = i = 0; ; i++) {
19973 + if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
19974 + break;
19975 +
19976 + skb->dev = dev;
19977 +
19978 + id = xennet_rxidx(req_prod + i);
19979 +
19980 + BUG_ON(np->rx_skbs[id]);
19981 + np->rx_skbs[id] = skb;
19982 +
19983 + ref = gnttab_claim_grant_reference(&np->gref_rx_head);
19984 + BUG_ON((signed short)ref < 0);
19985 + np->grant_rx_ref[id] = ref;
19986 +
19987 + pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
19988 + vaddr = page_address(skb_shinfo(skb)->frags[0].page);
19989 +
19990 + req = RING_GET_REQUEST(&np->rx, req_prod + i);
19991 + if (!np->copying_receiver) {
19992 + gnttab_grant_foreign_transfer_ref(ref,
19993 + np->xbdev->otherend_id,
19994 + pfn);
19995 + np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
19996 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
19997 + /* Remove this page before passing
19998 + * back to Xen. */
19999 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
20000 + MULTI_update_va_mapping(np->rx_mcl+i,
20001 + (unsigned long)vaddr,
20002 + __pte(0), 0);
20003 + }
20004 + nr_flips++;
20005 + } else {
20006 + gnttab_grant_foreign_access_ref(ref,
20007 + np->xbdev->otherend_id,
20008 + pfn_to_mfn(pfn),
20009 + 0);
20010 + }
20011 +
20012 + req->id = id;
20013 + req->gref = ref;
20014 + }
20015 +
20016 + if ( nr_flips != 0 ) {
20017 + /* Tell the ballon driver what is going on. */
20018 + balloon_update_driver_allowance(i);
20019 +
20020 + set_xen_guest_handle(reservation.extent_start,
20021 + np->rx_pfn_array);
20022 + reservation.nr_extents = nr_flips;
20023 + reservation.extent_order = 0;
20024 + reservation.address_bits = 0;
20025 + reservation.domid = DOMID_SELF;
20026 +
20027 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20028 + /* After all PTEs have been zapped, flush the TLB. */
20029 + np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
20030 + UVMF_TLB_FLUSH|UVMF_ALL;
20031 +
20032 + /* Give away a batch of pages. */
20033 + np->rx_mcl[i].op = __HYPERVISOR_memory_op;
20034 + np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
20035 + np->rx_mcl[i].args[1] = (unsigned long)&reservation;
20036 +
20037 + /* Zap PTEs and give away pages in one big
20038 + * multicall. */
20039 + if (unlikely(HYPERVISOR_multicall(np->rx_mcl, i+1)))
20040 + BUG();
20041 +
20042 + /* Check return status of HYPERVISOR_memory_op(). */
20043 + if (unlikely(np->rx_mcl[i].result != i))
20044 + panic("Unable to reduce memory reservation\n");
20045 + while (nr_flips--)
20046 + BUG_ON(np->rx_mcl[nr_flips].result);
20047 + } else {
20048 + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
20049 + &reservation) != i)
20050 + panic("Unable to reduce memory reservation\n");
20051 + }
20052 + } else {
20053 + wmb();
20054 + }
20055 +
20056 + /* Above is a suitable barrier to ensure backend will see requests. */
20057 + np->rx.req_prod_pvt = req_prod + i;
20058 + push:
20059 + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
20060 + if (notify)
20061 + notify_remote_via_irq(np->irq);
20062 +}
20063 +
20064 +static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
20065 + struct netif_tx_request *tx)
20066 +{
20067 + struct netfront_info *np = netdev_priv(dev);
20068 + char *data = skb->data;
20069 + unsigned long mfn;
20070 + RING_IDX prod = np->tx.req_prod_pvt;
20071 + int frags = skb_shinfo(skb)->nr_frags;
20072 + unsigned int offset = offset_in_page(data);
20073 + unsigned int len = skb_headlen(skb);
20074 + unsigned int id;
20075 + grant_ref_t ref;
20076 + int i;
20077 +
20078 + while (len > PAGE_SIZE - offset) {
20079 + tx->size = PAGE_SIZE - offset;
20080 + tx->flags |= NETTXF_more_data;
20081 + len -= tx->size;
20082 + data += tx->size;
20083 + offset = 0;
20084 +
20085 + id = get_id_from_freelist(np->tx_skbs);
20086 + np->tx_skbs[id] = skb_get(skb);
20087 + tx = RING_GET_REQUEST(&np->tx, prod++);
20088 + tx->id = id;
20089 + ref = gnttab_claim_grant_reference(&np->gref_tx_head);
20090 + BUG_ON((signed short)ref < 0);
20091 +
20092 + mfn = virt_to_mfn(data);
20093 + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
20094 + mfn, GTF_readonly);
20095 +
20096 + tx->gref = np->grant_tx_ref[id] = ref;
20097 + tx->offset = offset;
20098 + tx->size = len;
20099 + tx->flags = 0;
20100 + }
20101 +
20102 + for (i = 0; i < frags; i++) {
20103 + skb_frag_t *frag = skb_shinfo(skb)->frags + i;
20104 +
20105 + tx->flags |= NETTXF_more_data;
20106 +
20107 + id = get_id_from_freelist(np->tx_skbs);
20108 + np->tx_skbs[id] = skb_get(skb);
20109 + tx = RING_GET_REQUEST(&np->tx, prod++);
20110 + tx->id = id;
20111 + ref = gnttab_claim_grant_reference(&np->gref_tx_head);
20112 + BUG_ON((signed short)ref < 0);
20113 +
20114 + mfn = pfn_to_mfn(page_to_pfn(frag->page));
20115 + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
20116 + mfn, GTF_readonly);
20117 +
20118 + tx->gref = np->grant_tx_ref[id] = ref;
20119 + tx->offset = frag->page_offset;
20120 + tx->size = frag->size;
20121 + tx->flags = 0;
20122 + }
20123 +
20124 + np->tx.req_prod_pvt = prod;
20125 +}
20126 +
20127 +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
20128 +{
20129 + unsigned short id;
20130 + struct netfront_info *np = netdev_priv(dev);
20131 + struct netif_tx_request *tx;
20132 + struct netif_extra_info *extra;
20133 + char *data = skb->data;
20134 + RING_IDX i;
20135 + grant_ref_t ref;
20136 + unsigned long mfn;
20137 + int notify;
20138 + int frags = skb_shinfo(skb)->nr_frags;
20139 + unsigned int offset = offset_in_page(data);
20140 + unsigned int len = skb_headlen(skb);
20141 +
20142 + /* Check the fast path, if hooks are available */
20143 + if (np->accel_vif_state.hooks &&
20144 + np->accel_vif_state.hooks->start_xmit(skb, dev)) {
20145 + /* Fast path has sent this packet */
20146 + return 0;
20147 + }
20148 +
20149 + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
20150 + if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
20151 + printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
20152 + frags);
20153 + dump_stack();
20154 + goto drop;
20155 + }
20156 +
20157 + spin_lock_irq(&np->tx_lock);
20158 +
20159 + if (unlikely(!netfront_carrier_ok(np) ||
20160 + (frags > 1 && !xennet_can_sg(dev)) ||
20161 + netif_needs_gso(dev, skb))) {
20162 + spin_unlock_irq(&np->tx_lock);
20163 + goto drop;
20164 + }
20165 +
20166 + i = np->tx.req_prod_pvt;
20167 +
20168 + id = get_id_from_freelist(np->tx_skbs);
20169 + np->tx_skbs[id] = skb;
20170 +
20171 + tx = RING_GET_REQUEST(&np->tx, i);
20172 +
20173 + tx->id = id;
20174 + ref = gnttab_claim_grant_reference(&np->gref_tx_head);
20175 + BUG_ON((signed short)ref < 0);
20176 + mfn = virt_to_mfn(data);
20177 + gnttab_grant_foreign_access_ref(
20178 + ref, np->xbdev->otherend_id, mfn, GTF_readonly);
20179 + tx->gref = np->grant_tx_ref[id] = ref;
20180 + tx->offset = offset;
20181 + tx->size = len;
20182 +
20183 + tx->flags = 0;
20184 + extra = NULL;
20185 +
20186 + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
20187 + tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
20188 +#ifdef CONFIG_XEN
20189 + if (skb->proto_data_valid) /* remote but checksummed? */
20190 + tx->flags |= NETTXF_data_validated;
20191 +#endif
20192 +
20193 +#if HAVE_TSO
20194 + if (skb_shinfo(skb)->gso_size) {
20195 + struct netif_extra_info *gso = (struct netif_extra_info *)
20196 + RING_GET_REQUEST(&np->tx, ++i);
20197 +
20198 + if (extra)
20199 + extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
20200 + else
20201 + tx->flags |= NETTXF_extra_info;
20202 +
20203 + gso->u.gso.size = skb_shinfo(skb)->gso_size;
20204 + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
20205 + gso->u.gso.pad = 0;
20206 + gso->u.gso.features = 0;
20207 +
20208 + gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
20209 + gso->flags = 0;
20210 + extra = gso;
20211 + }
20212 +#endif
20213 +
20214 + np->tx.req_prod_pvt = i + 1;
20215 +
20216 + xennet_make_frags(skb, dev, tx);
20217 + tx->size = skb->len;
20218 +
20219 + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
20220 + if (notify)
20221 + notify_remote_via_irq(np->irq);
20222 +
20223 + np->stats.tx_bytes += skb->len;
20224 + np->stats.tx_packets++;
20225 + dev->trans_start = jiffies;
20226 +
20227 + /* Note: It is not safe to access skb after network_tx_buf_gc()! */
20228 + network_tx_buf_gc(dev);
20229 +
20230 + if (!netfront_tx_slot_available(np))
20231 + netif_stop_queue(dev);
20232 +
20233 + spin_unlock_irq(&np->tx_lock);
20234 +
20235 + return 0;
20236 +
20237 + drop:
20238 + np->stats.tx_dropped++;
20239 + dev_kfree_skb(skb);
20240 + return 0;
20241 +}
20242 +
20243 +static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
20244 +{
20245 + struct net_device *dev = dev_id;
20246 + struct netfront_info *np = netdev_priv(dev);
20247 + unsigned long flags;
20248 +
20249 + spin_lock_irqsave(&np->tx_lock, flags);
20250 +
20251 + if (likely(netfront_carrier_ok(np))) {
20252 + network_tx_buf_gc(dev);
20253 + /* Under tx_lock: protects access to rx shared-ring indexes. */
20254 + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
20255 + netfront_accelerator_call_stop_napi_irq(np, dev);
20256 +
20257 + netif_rx_schedule(dev);
20258 + dev->last_rx = jiffies;
20259 + }
20260 + }
20261 +
20262 + spin_unlock_irqrestore(&np->tx_lock, flags);
20263 +
20264 + return IRQ_HANDLED;
20265 +}
20266 +
20267 +static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
20268 + grant_ref_t ref)
20269 +{
20270 + int new = xennet_rxidx(np->rx.req_prod_pvt);
20271 +
20272 + BUG_ON(np->rx_skbs[new]);
20273 + np->rx_skbs[new] = skb;
20274 + np->grant_rx_ref[new] = ref;
20275 + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
20276 + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
20277 + np->rx.req_prod_pvt++;
20278 +}
20279 +
20280 +int xennet_get_extras(struct netfront_info *np,
20281 + struct netif_extra_info *extras, RING_IDX rp)
20282 +
20283 +{
20284 + struct netif_extra_info *extra;
20285 + RING_IDX cons = np->rx.rsp_cons;
20286 + int err = 0;
20287 +
20288 + do {
20289 + struct sk_buff *skb;
20290 + grant_ref_t ref;
20291 +
20292 + if (unlikely(cons + 1 == rp)) {
20293 + if (net_ratelimit())
20294 + WPRINTK("Missing extra info\n");
20295 + err = -EBADR;
20296 + break;
20297 + }
20298 +
20299 + extra = (struct netif_extra_info *)
20300 + RING_GET_RESPONSE(&np->rx, ++cons);
20301 +
20302 + if (unlikely(!extra->type ||
20303 + extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
20304 + if (net_ratelimit())
20305 + WPRINTK("Invalid extra type: %d\n",
20306 + extra->type);
20307 + err = -EINVAL;
20308 + } else {
20309 + memcpy(&extras[extra->type - 1], extra,
20310 + sizeof(*extra));
20311 + }
20312 +
20313 + skb = xennet_get_rx_skb(np, cons);
20314 + ref = xennet_get_rx_ref(np, cons);
20315 + xennet_move_rx_slot(np, skb, ref);
20316 + } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
20317 +
20318 + np->rx.rsp_cons = cons;
20319 + return err;
20320 +}
20321 +
20322 +static int xennet_get_responses(struct netfront_info *np,
20323 + struct netfront_rx_info *rinfo, RING_IDX rp,
20324 + struct sk_buff_head *list,
20325 + int *pages_flipped_p)
20326 +{
20327 + int pages_flipped = *pages_flipped_p;
20328 + struct mmu_update *mmu;
20329 + struct multicall_entry *mcl;
20330 + struct netif_rx_response *rx = &rinfo->rx;
20331 + struct netif_extra_info *extras = rinfo->extras;
20332 + RING_IDX cons = np->rx.rsp_cons;
20333 + struct sk_buff *skb = xennet_get_rx_skb(np, cons);
20334 + grant_ref_t ref = xennet_get_rx_ref(np, cons);
20335 + int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
20336 + int frags = 1;
20337 + int err = 0;
20338 + unsigned long ret;
20339 +
20340 + if (rx->flags & NETRXF_extra_info) {
20341 + err = xennet_get_extras(np, extras, rp);
20342 + cons = np->rx.rsp_cons;
20343 + }
20344 +
20345 + for (;;) {
20346 + unsigned long mfn;
20347 +
20348 + if (unlikely(rx->status < 0 ||
20349 + rx->offset + rx->status > PAGE_SIZE)) {
20350 + if (net_ratelimit())
20351 + WPRINTK("rx->offset: %x, size: %u\n",
20352 + rx->offset, rx->status);
20353 + xennet_move_rx_slot(np, skb, ref);
20354 + err = -EINVAL;
20355 + goto next;
20356 + }
20357 +
20358 + /*
20359 + * This definitely indicates a bug, either in this driver or in
20360 + * the backend driver. In future this should flag the bad
20361 + * situation to the system controller to reboot the backed.
20362 + */
20363 + if (ref == GRANT_INVALID_REF) {
20364 + if (net_ratelimit())
20365 + WPRINTK("Bad rx response id %d.\n", rx->id);
20366 + err = -EINVAL;
20367 + goto next;
20368 + }
20369 +
20370 + if (!np->copying_receiver) {
20371 + /* Memory pressure, insufficient buffer
20372 + * headroom, ... */
20373 + if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
20374 + if (net_ratelimit())
20375 + WPRINTK("Unfulfilled rx req "
20376 + "(id=%d, st=%d).\n",
20377 + rx->id, rx->status);
20378 + xennet_move_rx_slot(np, skb, ref);
20379 + err = -ENOMEM;
20380 + goto next;
20381 + }
20382 +
20383 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20384 + /* Remap the page. */
20385 + struct page *page =
20386 + skb_shinfo(skb)->frags[0].page;
20387 + unsigned long pfn = page_to_pfn(page);
20388 + void *vaddr = page_address(page);
20389 +
20390 + mcl = np->rx_mcl + pages_flipped;
20391 + mmu = np->rx_mmu + pages_flipped;
20392 +
20393 + MULTI_update_va_mapping(mcl,
20394 + (unsigned long)vaddr,
20395 + pfn_pte_ma(mfn,
20396 + PAGE_KERNEL),
20397 + 0);
20398 + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
20399 + | MMU_MACHPHYS_UPDATE;
20400 + mmu->val = pfn;
20401 +
20402 + set_phys_to_machine(pfn, mfn);
20403 + }
20404 + pages_flipped++;
20405 + } else {
20406 + ret = gnttab_end_foreign_access_ref(ref);
20407 + BUG_ON(!ret);
20408 + }
20409 +
20410 + gnttab_release_grant_reference(&np->gref_rx_head, ref);
20411 +
20412 + __skb_queue_tail(list, skb);
20413 +
20414 +next:
20415 + if (!(rx->flags & NETRXF_more_data))
20416 + break;
20417 +
20418 + if (cons + frags == rp) {
20419 + if (net_ratelimit())
20420 + WPRINTK("Need more frags\n");
20421 + err = -ENOENT;
20422 + break;
20423 + }
20424 +
20425 + rx = RING_GET_RESPONSE(&np->rx, cons + frags);
20426 + skb = xennet_get_rx_skb(np, cons + frags);
20427 + ref = xennet_get_rx_ref(np, cons + frags);
20428 + frags++;
20429 + }
20430 +
20431 + if (unlikely(frags > max)) {
20432 + if (net_ratelimit())
20433 + WPRINTK("Too many frags\n");
20434 + err = -E2BIG;
20435 + }
20436 +
20437 + if (unlikely(err))
20438 + np->rx.rsp_cons = cons + frags;
20439 +
20440 + *pages_flipped_p = pages_flipped;
20441 +
20442 + return err;
20443 +}
20444 +
20445 +static RING_IDX xennet_fill_frags(struct netfront_info *np,
20446 + struct sk_buff *skb,
20447 + struct sk_buff_head *list)
20448 +{
20449 + struct skb_shared_info *shinfo = skb_shinfo(skb);
20450 + int nr_frags = shinfo->nr_frags;
20451 + RING_IDX cons = np->rx.rsp_cons;
20452 + skb_frag_t *frag = shinfo->frags + nr_frags;
20453 + struct sk_buff *nskb;
20454 +
20455 + while ((nskb = __skb_dequeue(list))) {
20456 + struct netif_rx_response *rx =
20457 + RING_GET_RESPONSE(&np->rx, ++cons);
20458 +
20459 + frag->page = skb_shinfo(nskb)->frags[0].page;
20460 + frag->page_offset = rx->offset;
20461 + frag->size = rx->status;
20462 +
20463 + skb->data_len += rx->status;
20464 +
20465 + skb_shinfo(nskb)->nr_frags = 0;
20466 + kfree_skb(nskb);
20467 +
20468 + frag++;
20469 + nr_frags++;
20470 + }
20471 +
20472 + shinfo->nr_frags = nr_frags;
20473 + return cons;
20474 +}
20475 +
20476 +static int xennet_set_skb_gso(struct sk_buff *skb,
20477 + struct netif_extra_info *gso)
20478 +{
20479 + if (!gso->u.gso.size) {
20480 + if (net_ratelimit())
20481 + WPRINTK("GSO size must not be zero.\n");
20482 + return -EINVAL;
20483 + }
20484 +
20485 + /* Currently only TCPv4 S.O. is supported. */
20486 + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
20487 + if (net_ratelimit())
20488 + WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
20489 + return -EINVAL;
20490 + }
20491 +
20492 +#if HAVE_TSO
20493 + skb_shinfo(skb)->gso_size = gso->u.gso.size;
20494 +#if HAVE_GSO
20495 + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
20496 +
20497 + /* Header must be checked, and gso_segs computed. */
20498 + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
20499 +#endif
20500 + skb_shinfo(skb)->gso_segs = 0;
20501 +
20502 + return 0;
20503 +#else
20504 + if (net_ratelimit())
20505 + WPRINTK("GSO unsupported by this kernel.\n");
20506 + return -EINVAL;
20507 +#endif
20508 +}
20509 +
20510 +static int netif_poll(struct net_device *dev, int *pbudget)
20511 +{
20512 + struct netfront_info *np = netdev_priv(dev);
20513 + struct sk_buff *skb;
20514 + struct netfront_rx_info rinfo;
20515 + struct netif_rx_response *rx = &rinfo.rx;
20516 + struct netif_extra_info *extras = rinfo.extras;
20517 + RING_IDX i, rp;
20518 + struct multicall_entry *mcl;
20519 + int work_done, budget, more_to_do = 1, accel_more_to_do = 1;
20520 + struct sk_buff_head rxq;
20521 + struct sk_buff_head errq;
20522 + struct sk_buff_head tmpq;
20523 + unsigned long flags;
20524 + unsigned int len;
20525 + int pages_flipped = 0;
20526 + int err;
20527 +
20528 + spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */
20529 +
20530 + if (unlikely(!netfront_carrier_ok(np))) {
20531 + spin_unlock(&np->rx_lock);
20532 + return 0;
20533 + }
20534 +
20535 + skb_queue_head_init(&rxq);
20536 + skb_queue_head_init(&errq);
20537 + skb_queue_head_init(&tmpq);
20538 +
20539 + if ((budget = *pbudget) > dev->quota)
20540 + budget = dev->quota;
20541 + rp = np->rx.sring->rsp_prod;
20542 + rmb(); /* Ensure we see queued responses up to 'rp'. */
20543 +
20544 + i = np->rx.rsp_cons;
20545 + work_done = 0;
20546 + while ((i != rp) && (work_done < budget)) {
20547 + memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
20548 + memset(extras, 0, sizeof(rinfo.extras));
20549 +
20550 + err = xennet_get_responses(np, &rinfo, rp, &tmpq,
20551 + &pages_flipped);
20552 +
20553 + if (unlikely(err)) {
20554 +err:
20555 + while ((skb = __skb_dequeue(&tmpq)))
20556 + __skb_queue_tail(&errq, skb);
20557 + np->stats.rx_errors++;
20558 + i = np->rx.rsp_cons;
20559 + continue;
20560 + }
20561 +
20562 + skb = __skb_dequeue(&tmpq);
20563 +
20564 + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
20565 + struct netif_extra_info *gso;
20566 + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
20567 +
20568 + if (unlikely(xennet_set_skb_gso(skb, gso))) {
20569 + __skb_queue_head(&tmpq, skb);
20570 + np->rx.rsp_cons += skb_queue_len(&tmpq);
20571 + goto err;
20572 + }
20573 + }
20574 +
20575 + NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
20576 + NETFRONT_SKB_CB(skb)->offset = rx->offset;
20577 +
20578 + len = rx->status;
20579 + if (len > RX_COPY_THRESHOLD)
20580 + len = RX_COPY_THRESHOLD;
20581 + skb_put(skb, len);
20582 +
20583 + if (rx->status > len) {
20584 + skb_shinfo(skb)->frags[0].page_offset =
20585 + rx->offset + len;
20586 + skb_shinfo(skb)->frags[0].size = rx->status - len;
20587 + skb->data_len = rx->status - len;
20588 + } else {
20589 + skb_shinfo(skb)->frags[0].page = NULL;
20590 + skb_shinfo(skb)->nr_frags = 0;
20591 + }
20592 +
20593 + i = xennet_fill_frags(np, skb, &tmpq);
20594 +
20595 + /*
20596 + * Truesize must approximates the size of true data plus
20597 + * any supervisor overheads. Adding hypervisor overheads
20598 + * has been shown to significantly reduce achievable
20599 + * bandwidth with the default receive buffer size. It is
20600 + * therefore not wise to account for it here.
20601 + *
20602 + * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
20603 + * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
20604 + * add the size of the data pulled in xennet_fill_frags().
20605 + *
20606 + * We also adjust for any unused space in the main data
20607 + * area by subtracting (RX_COPY_THRESHOLD - len). This is
20608 + * especially important with drivers which split incoming
20609 + * packets into header and data, using only 66 bytes of
20610 + * the main data area (see the e1000 driver for example.)
20611 + * On such systems, without this last adjustement, our
20612 + * achievable receive throughout using the standard receive
20613 + * buffer size was cut by 25%(!!!).
20614 + */
20615 + skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
20616 + skb->len += skb->data_len;
20617 +
20618 + /*
20619 + * Old backends do not assert data_validated but we
20620 + * can infer it from csum_blank so test both flags.
20621 + */
20622 + if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
20623 + skb->ip_summed = CHECKSUM_UNNECESSARY;
20624 + else
20625 + skb->ip_summed = CHECKSUM_NONE;
20626 +#ifdef CONFIG_XEN
20627 + skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
20628 + skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
20629 +#endif
20630 + np->stats.rx_packets++;
20631 + np->stats.rx_bytes += skb->len;
20632 +
20633 + __skb_queue_tail(&rxq, skb);
20634 +
20635 + np->rx.rsp_cons = ++i;
20636 + work_done++;
20637 + }
20638 +
20639 + if (pages_flipped) {
20640 + /* Some pages are no longer absent... */
20641 + balloon_update_driver_allowance(-pages_flipped);
20642 +
20643 + /* Do all the remapping work and M2P updates. */
20644 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20645 + mcl = np->rx_mcl + pages_flipped;
20646 + mcl->op = __HYPERVISOR_mmu_update;
20647 + mcl->args[0] = (unsigned long)np->rx_mmu;
20648 + mcl->args[1] = pages_flipped;
20649 + mcl->args[2] = 0;
20650 + mcl->args[3] = DOMID_SELF;
20651 + err = HYPERVISOR_multicall_check(np->rx_mcl,
20652 + pages_flipped + 1,
20653 + NULL);
20654 + BUG_ON(err);
20655 + }
20656 + }
20657 +
20658 + while ((skb = __skb_dequeue(&errq)))
20659 + kfree_skb(skb);
20660 +
20661 + while ((skb = __skb_dequeue(&rxq)) != NULL) {
20662 + struct page *page = NETFRONT_SKB_CB(skb)->page;
20663 + void *vaddr = page_address(page);
20664 + unsigned offset = NETFRONT_SKB_CB(skb)->offset;
20665 +
20666 + memcpy(skb->data, vaddr + offset, skb_headlen(skb));
20667 +
20668 + if (page != skb_shinfo(skb)->frags[0].page)
20669 + __free_page(page);
20670 +
20671 + /* Ethernet work: Delayed to here as it peeks the header. */
20672 + skb->protocol = eth_type_trans(skb, dev);
20673 +
20674 + /* Pass it up. */
20675 + netif_receive_skb(skb);
20676 + dev->last_rx = jiffies;
20677 + }
20678 +
20679 + /* If we get a callback with very few responses, reduce fill target. */
20680 + /* NB. Note exponential increase, linear decrease. */
20681 + if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
20682 + ((3*np->rx_target) / 4)) &&
20683 + (--np->rx_target < np->rx_min_target))
20684 + np->rx_target = np->rx_min_target;
20685 +
20686 + network_alloc_rx_buffers(dev);
20687 +
20688 + if (work_done < budget) {
20689 + /* there's some spare capacity, try the accelerated path */
20690 + int accel_budget = budget - work_done;
20691 + int accel_budget_start = accel_budget;
20692 +
20693 + if (np->accel_vif_state.hooks) {
20694 + accel_more_to_do =
20695 + np->accel_vif_state.hooks->netdev_poll
20696 + (dev, &accel_budget);
20697 + work_done += (accel_budget_start - accel_budget);
20698 + } else
20699 + accel_more_to_do = 0;
20700 + }
20701 +
20702 + *pbudget -= work_done;
20703 + dev->quota -= work_done;
20704 +
20705 + if (work_done < budget) {
20706 + local_irq_save(flags);
20707 +
20708 + RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
20709 +
20710 + if (!more_to_do && !accel_more_to_do &&
20711 + np->accel_vif_state.hooks) {
20712 + /*
20713 + * Slow path has nothing more to do, see if
20714 + * fast path is likewise
20715 + */
20716 + accel_more_to_do =
20717 + np->accel_vif_state.hooks->start_napi_irq(dev);
20718 + }
20719 +
20720 + if (!more_to_do && !accel_more_to_do)
20721 + __netif_rx_complete(dev);
20722 +
20723 + local_irq_restore(flags);
20724 + }
20725 +
20726 + spin_unlock(&np->rx_lock);
20727 +
20728 + return more_to_do | accel_more_to_do;
20729 +}
20730 +
20731 +static void netif_release_tx_bufs(struct netfront_info *np)
20732 +{
20733 + struct sk_buff *skb;
20734 + int i;
20735 +
20736 + for (i = 1; i <= NET_TX_RING_SIZE; i++) {
20737 + if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
20738 + continue;
20739 +
20740 + skb = np->tx_skbs[i];
20741 + gnttab_end_foreign_access_ref(np->grant_tx_ref[i]);
20742 + gnttab_release_grant_reference(
20743 + &np->gref_tx_head, np->grant_tx_ref[i]);
20744 + np->grant_tx_ref[i] = GRANT_INVALID_REF;
20745 + add_id_to_freelist(np->tx_skbs, i);
20746 + dev_kfree_skb_irq(skb);
20747 + }
20748 +}
20749 +
20750 +static void netif_release_rx_bufs_flip(struct netfront_info *np)
20751 +{
20752 + struct mmu_update *mmu = np->rx_mmu;
20753 + struct multicall_entry *mcl = np->rx_mcl;
20754 + struct sk_buff_head free_list;
20755 + struct sk_buff *skb;
20756 + unsigned long mfn;
20757 + int xfer = 0, noxfer = 0, unused = 0;
20758 + int id, ref, rc;
20759 +
20760 + skb_queue_head_init(&free_list);
20761 +
20762 + spin_lock_bh(&np->rx_lock);
20763 +
20764 + for (id = 0; id < NET_RX_RING_SIZE; id++) {
20765 + if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
20766 + unused++;
20767 + continue;
20768 + }
20769 +
20770 + skb = np->rx_skbs[id];
20771 + mfn = gnttab_end_foreign_transfer_ref(ref);
20772 + gnttab_release_grant_reference(&np->gref_rx_head, ref);
20773 + np->grant_rx_ref[id] = GRANT_INVALID_REF;
20774 + add_id_to_freelist(np->rx_skbs, id);
20775 +
20776 + if (0 == mfn) {
20777 + struct page *page = skb_shinfo(skb)->frags[0].page;
20778 + balloon_release_driver_page(page);
20779 + skb_shinfo(skb)->nr_frags = 0;
20780 + dev_kfree_skb(skb);
20781 + noxfer++;
20782 + continue;
20783 + }
20784 +
20785 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20786 + /* Remap the page. */
20787 + struct page *page = skb_shinfo(skb)->frags[0].page;
20788 + unsigned long pfn = page_to_pfn(page);
20789 + void *vaddr = page_address(page);
20790 +
20791 + MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
20792 + pfn_pte_ma(mfn, PAGE_KERNEL),
20793 + 0);
20794 + mcl++;
20795 + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
20796 + | MMU_MACHPHYS_UPDATE;
20797 + mmu->val = pfn;
20798 + mmu++;
20799 +
20800 + set_phys_to_machine(pfn, mfn);
20801 + }
20802 + __skb_queue_tail(&free_list, skb);
20803 + xfer++;
20804 + }
20805 +
20806 + DPRINTK("%s: %d xfer, %d noxfer, %d unused\n",
20807 + __FUNCTION__, xfer, noxfer, unused);
20808 +
20809 + if (xfer) {
20810 + /* Some pages are no longer absent... */
20811 + balloon_update_driver_allowance(-xfer);
20812 +
20813 + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
20814 + /* Do all the remapping work and M2P updates. */
20815 + mcl->op = __HYPERVISOR_mmu_update;
20816 + mcl->args[0] = (unsigned long)np->rx_mmu;
20817 + mcl->args[1] = mmu - np->rx_mmu;
20818 + mcl->args[2] = 0;
20819 + mcl->args[3] = DOMID_SELF;
20820 + mcl++;
20821 + rc = HYPERVISOR_multicall_check(
20822 + np->rx_mcl, mcl - np->rx_mcl, NULL);
20823 + BUG_ON(rc);
20824 + }
20825 + }
20826 +
20827 + while ((skb = __skb_dequeue(&free_list)) != NULL)
20828 + dev_kfree_skb(skb);
20829 +
20830 + spin_unlock_bh(&np->rx_lock);
20831 +}
20832 +
20833 +static void netif_release_rx_bufs_copy(struct netfront_info *np)
20834 +{
20835 + struct sk_buff *skb;
20836 + int i, ref;
20837 + int busy = 0, inuse = 0;
20838 +
20839 + spin_lock_bh(&np->rx_lock);
20840 +
20841 + for (i = 0; i < NET_RX_RING_SIZE; i++) {
20842 + ref = np->grant_rx_ref[i];
20843 +
20844 + if (ref == GRANT_INVALID_REF)
20845 + continue;
20846 +
20847 + inuse++;
20848 +
20849 + skb = np->rx_skbs[i];
20850 +
20851 + if (!gnttab_end_foreign_access_ref(ref))
20852 + {
20853 + busy++;
20854 + continue;
20855 + }
20856 +
20857 + gnttab_release_grant_reference(&np->gref_rx_head, ref);
20858 + np->grant_rx_ref[i] = GRANT_INVALID_REF;
20859 + add_id_to_freelist(np->rx_skbs, i);
20860 +
20861 + dev_kfree_skb(skb);
20862 + }
20863 +
20864 + if (busy)
20865 + DPRINTK("%s: Unable to release %d of %d inuse grant references out of %ld total.\n",
20866 + __FUNCTION__, busy, inuse, NET_RX_RING_SIZE);
20867 +
20868 + spin_unlock_bh(&np->rx_lock);
20869 +}
20870 +
20871 +static int network_close(struct net_device *dev)
20872 +{
20873 + struct netfront_info *np = netdev_priv(dev);
20874 + netif_stop_queue(np->netdev);
20875 + return 0;
20876 +}
20877 +
20878 +
20879 +static struct net_device_stats *network_get_stats(struct net_device *dev)
20880 +{
20881 + struct netfront_info *np = netdev_priv(dev);
20882 +
20883 + netfront_accelerator_call_get_stats(np, dev);
20884 + return &np->stats;
20885 +}
20886 +
20887 +static int xennet_set_mac_address(struct net_device *dev, void *p)
20888 +{
20889 + struct netfront_info *np = netdev_priv(dev);
20890 + struct sockaddr *addr = p;
20891 +
20892 + if (netif_running(dev))
20893 + return -EBUSY;
20894 +
20895 + if (!is_valid_ether_addr(addr->sa_data))
20896 + return -EADDRNOTAVAIL;
20897 +
20898 + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
20899 + memcpy(np->mac, addr->sa_data, ETH_ALEN);
20900 +
20901 + return 0;
20902 +}
20903 +
20904 +static int xennet_change_mtu(struct net_device *dev, int mtu)
20905 +{
20906 + int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
20907 +
20908 + if (mtu > max)
20909 + return -EINVAL;
20910 + dev->mtu = mtu;
20911 + return 0;
20912 +}
20913 +
20914 +static int xennet_set_sg(struct net_device *dev, u32 data)
20915 +{
20916 + if (data) {
20917 + struct netfront_info *np = netdev_priv(dev);
20918 + int val;
20919 +
20920 + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
20921 + "%d", &val) < 0)
20922 + val = 0;
20923 + if (!val)
20924 + return -ENOSYS;
20925 + } else if (dev->mtu > ETH_DATA_LEN)
20926 + dev->mtu = ETH_DATA_LEN;
20927 +
20928 + return ethtool_op_set_sg(dev, data);
20929 +}
20930 +
20931 +static int xennet_set_tso(struct net_device *dev, u32 data)
20932 +{
20933 + if (data) {
20934 + struct netfront_info *np = netdev_priv(dev);
20935 + int val;
20936 +
20937 + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
20938 + "feature-gso-tcpv4", "%d", &val) < 0)
20939 + val = 0;
20940 + if (!val)
20941 + return -ENOSYS;
20942 + }
20943 +
20944 + return ethtool_op_set_tso(dev, data);
20945 +}
20946 +
20947 +static void xennet_set_features(struct net_device *dev)
20948 +{
20949 + dev_disable_gso_features(dev);
20950 + xennet_set_sg(dev, 0);
20951 +
20952 + /* We need checksum offload to enable scatter/gather and TSO. */
20953 + if (!(dev->features & NETIF_F_IP_CSUM))
20954 + return;
20955 +
20956 + if (xennet_set_sg(dev, 1))
20957 + return;
20958 +
20959 + /* Before 2.6.9 TSO seems to be unreliable so do not enable it
20960 + * on older kernels.
20961 + */
20962 + if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9))
20963 + xennet_set_tso(dev, 1);
20964 +}
20965 +
20966 +static int network_connect(struct net_device *dev)
20967 +{
20968 + struct netfront_info *np = netdev_priv(dev);
20969 + int i, requeue_idx, err;
20970 + struct sk_buff *skb;
20971 + grant_ref_t ref;
20972 + netif_rx_request_t *req;
20973 + unsigned int feature_rx_copy, feature_rx_flip;
20974 +
20975 + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
20976 + "feature-rx-copy", "%u", &feature_rx_copy);
20977 + if (err != 1)
20978 + feature_rx_copy = 0;
20979 + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
20980 + "feature-rx-flip", "%u", &feature_rx_flip);
20981 + if (err != 1)
20982 + feature_rx_flip = 1;
20983 +
20984 + /*
20985 + * Copy packets on receive path if:
20986 + * (a) This was requested by user, and the backend supports it; or
20987 + * (b) Flipping was requested, but this is unsupported by the backend.
20988 + */
20989 + np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
20990 + (MODPARM_rx_flip && !feature_rx_flip));
20991 +
20992 + err = talk_to_backend(np->xbdev, np);
20993 + if (err)
20994 + return err;
20995 +
20996 + xennet_set_features(dev);
20997 +
20998 + DPRINTK("device %s has %sing receive path.\n",
20999 + dev->name, np->copying_receiver ? "copy" : "flipp");
21000 +
21001 + spin_lock_bh(&np->rx_lock);
21002 + spin_lock_irq(&np->tx_lock);
21003 +
21004 + /*
21005 + * Recovery procedure:
21006 + * NB. Freelist index entries are always going to be less than
21007 + * PAGE_OFFSET, whereas pointers to skbs will always be equal or
21008 + * greater than PAGE_OFFSET: we use this property to distinguish
21009 + * them.
21010 + */
21011 +
21012 + /* Step 1: Discard all pending TX packet fragments. */
21013 + netif_release_tx_bufs(np);
21014 +
21015 + /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
21016 + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
21017 + if (!np->rx_skbs[i])
21018 + continue;
21019 +
21020 + skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
21021 + ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
21022 + req = RING_GET_REQUEST(&np->rx, requeue_idx);
21023 +
21024 + if (!np->copying_receiver) {
21025 + gnttab_grant_foreign_transfer_ref(
21026 + ref, np->xbdev->otherend_id,
21027 + page_to_pfn(skb_shinfo(skb)->frags->page));
21028 + } else {
21029 + gnttab_grant_foreign_access_ref(
21030 + ref, np->xbdev->otherend_id,
21031 + pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
21032 + frags->page)),
21033 + 0);
21034 + }
21035 + req->gref = ref;
21036 + req->id = requeue_idx;
21037 +
21038 + requeue_idx++;
21039 + }
21040 +
21041 + np->rx.req_prod_pvt = requeue_idx;
21042 +
21043 + /*
21044 + * Step 3: All public and private state should now be sane. Get
21045 + * ready to start sending and receiving packets and give the driver
21046 + * domain a kick because we've probably just requeued some
21047 + * packets.
21048 + */
21049 + netfront_carrier_on(np);
21050 + notify_remote_via_irq(np->irq);
21051 + network_tx_buf_gc(dev);
21052 + network_alloc_rx_buffers(dev);
21053 +
21054 + spin_unlock_irq(&np->tx_lock);
21055 + spin_unlock_bh(&np->rx_lock);
21056 +
21057 + return 0;
21058 +}
21059 +
21060 +static void netif_uninit(struct net_device *dev)
21061 +{
21062 + struct netfront_info *np = netdev_priv(dev);
21063 + netif_release_tx_bufs(np);
21064 + if (np->copying_receiver)
21065 + netif_release_rx_bufs_copy(np);
21066 + else
21067 + netif_release_rx_bufs_flip(np);
21068 + gnttab_free_grant_references(np->gref_tx_head);
21069 + gnttab_free_grant_references(np->gref_rx_head);
21070 +}
21071 +
21072 +static struct ethtool_ops network_ethtool_ops =
21073 +{
21074 + .get_tx_csum = ethtool_op_get_tx_csum,
21075 + .set_tx_csum = ethtool_op_set_tx_csum,
21076 + .get_sg = ethtool_op_get_sg,
21077 + .set_sg = xennet_set_sg,
21078 +#if HAVE_TSO
21079 + .get_tso = ethtool_op_get_tso,
21080 + .set_tso = xennet_set_tso,
21081 +#endif
21082 + .get_link = ethtool_op_get_link,
21083 +};
21084 +
21085 +#ifdef CONFIG_SYSFS
21086 +static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
21087 +{
21088 + struct net_device *netdev = container_of(cd, struct net_device,
21089 + class_dev);
21090 + struct netfront_info *info = netdev_priv(netdev);
21091 +
21092 + return sprintf(buf, "%u\n", info->rx_min_target);
21093 +}
21094 +
21095 +static ssize_t store_rxbuf_min(struct class_device *cd,
21096 + const char *buf, size_t len)
21097 +{
21098 + struct net_device *netdev = container_of(cd, struct net_device,
21099 + class_dev);
21100 + struct netfront_info *np = netdev_priv(netdev);
21101 + char *endp;
21102 + unsigned long target;
21103 +
21104 + if (!capable(CAP_NET_ADMIN))
21105 + return -EPERM;
21106 +
21107 + target = simple_strtoul(buf, &endp, 0);
21108 + if (endp == buf)
21109 + return -EBADMSG;
21110 +
21111 + if (target < RX_MIN_TARGET)
21112 + target = RX_MIN_TARGET;
21113 + if (target > RX_MAX_TARGET)
21114 + target = RX_MAX_TARGET;
21115 +
21116 + spin_lock_bh(&np->rx_lock);
21117 + if (target > np->rx_max_target)
21118 + np->rx_max_target = target;
21119 + np->rx_min_target = target;
21120 + if (target > np->rx_target)
21121 + np->rx_target = target;
21122 +
21123 + network_alloc_rx_buffers(netdev);
21124 +
21125 + spin_unlock_bh(&np->rx_lock);
21126 + return len;
21127 +}
21128 +
21129 +static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
21130 +{
21131 + struct net_device *netdev = container_of(cd, struct net_device,
21132 + class_dev);
21133 + struct netfront_info *info = netdev_priv(netdev);
21134 +
21135 + return sprintf(buf, "%u\n", info->rx_max_target);
21136 +}
21137 +
21138 +static ssize_t store_rxbuf_max(struct class_device *cd,
21139 + const char *buf, size_t len)
21140 +{
21141 + struct net_device *netdev = container_of(cd, struct net_device,
21142 + class_dev);
21143 + struct netfront_info *np = netdev_priv(netdev);
21144 + char *endp;
21145 + unsigned long target;
21146 +
21147 + if (!capable(CAP_NET_ADMIN))
21148 + return -EPERM;
21149 +
21150 + target = simple_strtoul(buf, &endp, 0);
21151 + if (endp == buf)
21152 + return -EBADMSG;
21153 +
21154 + if (target < RX_MIN_TARGET)
21155 + target = RX_MIN_TARGET;
21156 + if (target > RX_MAX_TARGET)
21157 + target = RX_MAX_TARGET;
21158 +
21159 + spin_lock_bh(&np->rx_lock);
21160 + if (target < np->rx_min_target)
21161 + np->rx_min_target = target;
21162 + np->rx_max_target = target;
21163 + if (target < np->rx_target)
21164 + np->rx_target = target;
21165 +
21166 + network_alloc_rx_buffers(netdev);
21167 +
21168 + spin_unlock_bh(&np->rx_lock);
21169 + return len;
21170 +}
21171 +
21172 +static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
21173 +{
21174 + struct net_device *netdev = container_of(cd, struct net_device,
21175 + class_dev);
21176 + struct netfront_info *info = netdev_priv(netdev);
21177 +
21178 + return sprintf(buf, "%u\n", info->rx_target);
21179 +}
21180 +
21181 +static const struct class_device_attribute xennet_attrs[] = {
21182 + __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
21183 + __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
21184 + __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
21185 +};
21186 +
21187 +static int xennet_sysfs_addif(struct net_device *netdev)
21188 +{
21189 + int i;
21190 + int error = 0;
21191 +
21192 + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
21193 + error = class_device_create_file(&netdev->class_dev,
21194 + &xennet_attrs[i]);
21195 + if (error)
21196 + goto fail;
21197 + }
21198 + return 0;
21199 +
21200 + fail:
21201 + while (--i >= 0)
21202 + class_device_remove_file(&netdev->class_dev,
21203 + &xennet_attrs[i]);
21204 + return error;
21205 +}
21206 +
21207 +static void xennet_sysfs_delif(struct net_device *netdev)
21208 +{
21209 + int i;
21210 +
21211 + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
21212 + class_device_remove_file(&netdev->class_dev,
21213 + &xennet_attrs[i]);
21214 + }
21215 +}
21216 +
21217 +#endif /* CONFIG_SYSFS */
21218 +
21219 +
21220 +/*
21221 + * Nothing to do here. Virtual interface is point-to-point and the
21222 + * physical interface is probably promiscuous anyway.
21223 + */
21224 +static void network_set_multicast_list(struct net_device *dev)
21225 +{
21226 +}
21227 +
21228 +static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
21229 +{
21230 + int i, err = 0;
21231 + struct net_device *netdev = NULL;
21232 + struct netfront_info *np = NULL;
21233 +
21234 + netdev = alloc_etherdev(sizeof(struct netfront_info));
21235 + if (!netdev) {
21236 + printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
21237 + __FUNCTION__);
21238 + return ERR_PTR(-ENOMEM);
21239 + }
21240 +
21241 + np = netdev_priv(netdev);
21242 + np->xbdev = dev;
21243 +
21244 + spin_lock_init(&np->tx_lock);
21245 + spin_lock_init(&np->rx_lock);
21246 +
21247 + init_accelerator_vif(np, dev);
21248 +
21249 + skb_queue_head_init(&np->rx_batch);
21250 + np->rx_target = RX_DFL_MIN_TARGET;
21251 + np->rx_min_target = RX_DFL_MIN_TARGET;
21252 + np->rx_max_target = RX_MAX_TARGET;
21253 +
21254 + init_timer(&np->rx_refill_timer);
21255 + np->rx_refill_timer.data = (unsigned long)netdev;
21256 + np->rx_refill_timer.function = rx_refill_timeout;
21257 +
21258 + /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
21259 + for (i = 0; i <= NET_TX_RING_SIZE; i++) {
21260 + np->tx_skbs[i] = (void *)((unsigned long) i+1);
21261 + np->grant_tx_ref[i] = GRANT_INVALID_REF;
21262 + }
21263 +
21264 + for (i = 0; i < NET_RX_RING_SIZE; i++) {
21265 + np->rx_skbs[i] = NULL;
21266 + np->grant_rx_ref[i] = GRANT_INVALID_REF;
21267 + }
21268 +
21269 + /* A grant for every tx ring slot */
21270 + if (gnttab_alloc_grant_references(TX_MAX_TARGET,
21271 + &np->gref_tx_head) < 0) {
21272 + printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
21273 + err = -ENOMEM;
21274 + goto exit;
21275 + }
21276 + /* A grant for every rx ring slot */
21277 + if (gnttab_alloc_grant_references(RX_MAX_TARGET,
21278 + &np->gref_rx_head) < 0) {
21279 + printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
21280 + err = -ENOMEM;
21281 + goto exit_free_tx;
21282 + }
21283 +
21284 + netdev->open = network_open;
21285 + netdev->hard_start_xmit = network_start_xmit;
21286 + netdev->stop = network_close;
21287 + netdev->get_stats = network_get_stats;
21288 + netdev->poll = netif_poll;
21289 + netdev->set_multicast_list = network_set_multicast_list;
21290 + netdev->uninit = netif_uninit;
21291 + netdev->set_mac_address = xennet_set_mac_address;
21292 + netdev->change_mtu = xennet_change_mtu;
21293 + netdev->weight = 64;
21294 + netdev->features = NETIF_F_IP_CSUM;
21295 +
21296 + SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
21297 + SET_MODULE_OWNER(netdev);
21298 + SET_NETDEV_DEV(netdev, &dev->dev);
21299 +
21300 + np->netdev = netdev;
21301 +
21302 + netfront_carrier_off(np);
21303 +
21304 + return netdev;
21305 +
21306 + exit_free_tx:
21307 + gnttab_free_grant_references(np->gref_tx_head);
21308 + exit:
21309 + free_netdev(netdev);
21310 + return ERR_PTR(err);
21311 +}
21312 +
21313 +#ifdef CONFIG_INET
21314 +/*
21315 + * We use this notifier to send out a fake ARP reply to reset switches and
21316 + * router ARP caches when an IP interface is brought up on a VIF.
21317 + */
21318 +static int
21319 +inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
21320 +{
21321 + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
21322 + struct net_device *dev = ifa->ifa_dev->dev;
21323 +
21324 + /* UP event and is it one of our devices? */
21325 + if (event == NETDEV_UP && dev->open == network_open)
21326 + send_fake_arp(dev);
21327 +
21328 + return NOTIFY_DONE;
21329 +}
21330 +
21331 +static struct notifier_block notifier_inetdev = {
21332 + .notifier_call = inetdev_notify,
21333 + .next = NULL,
21334 + .priority = 0
21335 +};
21336 +#endif
21337 +
21338 +
21339 +static void netif_disconnect_backend(struct netfront_info *info)
21340 +{
21341 + /* Stop old i/f to prevent errors whilst we rebuild the state. */
21342 + spin_lock_bh(&info->rx_lock);
21343 + spin_lock_irq(&info->tx_lock);
21344 + netfront_carrier_off(info);
21345 + spin_unlock_irq(&info->tx_lock);
21346 + spin_unlock_bh(&info->rx_lock);
21347 +
21348 + if (info->irq)
21349 + unbind_from_irqhandler(info->irq, info->netdev);
21350 + info->irq = 0;
21351 +
21352 + end_access(info->tx_ring_ref, info->tx.sring);
21353 + end_access(info->rx_ring_ref, info->rx.sring);
21354 + info->tx_ring_ref = GRANT_INVALID_REF;
21355 + info->rx_ring_ref = GRANT_INVALID_REF;
21356 + info->tx.sring = NULL;
21357 + info->rx.sring = NULL;
21358 +}
21359 +
21360 +
21361 +static void end_access(int ref, void *page)
21362 +{
21363 + if (ref != GRANT_INVALID_REF)
21364 + gnttab_end_foreign_access(ref, (unsigned long)page);
21365 +}
21366 +
21367 +
21368 +/* ** Driver registration ** */
21369 +
21370 +
21371 +static const struct xenbus_device_id netfront_ids[] = {
21372 + { "vif" },
21373 + { "" }
21374 +};
21375 +MODULE_ALIAS("xen:vif");
21376 +
21377 +
21378 +static struct xenbus_driver netfront_driver = {
21379 + .name = "vif",
21380 + .owner = THIS_MODULE,
21381 + .ids = netfront_ids,
21382 + .probe = netfront_probe,
21383 + .remove = __devexit_p(netfront_remove),
21384 + .suspend = netfront_suspend,
21385 + .suspend_cancel = netfront_suspend_cancel,
21386 + .resume = netfront_resume,
21387 + .otherend_changed = backend_changed,
21388 +};
21389 +
21390 +
21391 +static int __init netif_init(void)
21392 +{
21393 + if (!is_running_on_xen())
21394 + return -ENODEV;
21395 +
21396 +#ifdef CONFIG_XEN
21397 + if (MODPARM_rx_flip && MODPARM_rx_copy) {
21398 + WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
21399 + return -EINVAL;
21400 + }
21401 +
21402 + if (!MODPARM_rx_flip && !MODPARM_rx_copy)
21403 + MODPARM_rx_flip = 1; /* Default is to flip. */
21404 +#endif
21405 +
21406 + netif_init_accel();
21407 +
21408 + IPRINTK("Initialising virtual ethernet driver.\n");
21409 +
21410 +#ifdef CONFIG_INET
21411 + (void)register_inetaddr_notifier(&notifier_inetdev);
21412 +#endif
21413 +
21414 + return xenbus_register_frontend(&netfront_driver);
21415 +}
21416 +module_init(netif_init);
21417 +
21418 +
21419 +static void __exit netif_exit(void)
21420 +{
21421 +#ifdef CONFIG_INET
21422 + unregister_inetaddr_notifier(&notifier_inetdev);
21423 +#endif
21424 +
21425 + netif_exit_accel();
21426 +
21427 + return xenbus_unregister_driver(&netfront_driver);
21428 +}
21429 +module_exit(netif_exit);
21430 +
21431 +MODULE_LICENSE("Dual BSD/GPL");
21432 Index: head-2008-11-25/drivers/xen/netfront/netfront.h
21433 ===================================================================
21434 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
21435 +++ head-2008-11-25/drivers/xen/netfront/netfront.h 2008-01-07 13:19:18.000000000 +0100
21436 @@ -0,0 +1,274 @@
21437 +/******************************************************************************
21438 + * Virtual network driver for conversing with remote driver backends.
21439 + *
21440 + * Copyright (c) 2002-2005, K A Fraser
21441 + * Copyright (c) 2005, XenSource Ltd
21442 + * Copyright (C) 2007 Solarflare Communications, Inc.
21443 + *
21444 + * This program is free software; you can redistribute it and/or
21445 + * modify it under the terms of the GNU General Public License version 2
21446 + * as published by the Free Software Foundation; or, when distributed
21447 + * separately from the Linux kernel or incorporated into other
21448 + * software packages, subject to the following license:
21449 + *
21450 + * Permission is hereby granted, free of charge, to any person obtaining a copy
21451 + * of this source file (the "Software"), to deal in the Software without
21452 + * restriction, including without limitation the rights to use, copy, modify,
21453 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
21454 + * and to permit persons to whom the Software is furnished to do so, subject to
21455 + * the following conditions:
21456 + *
21457 + * The above copyright notice and this permission notice shall be included in
21458 + * all copies or substantial portions of the Software.
21459 + *
21460 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21461 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21462 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21463 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21464 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21465 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21466 + * IN THE SOFTWARE.
21467 + */
21468 +
21469 +#ifndef NETFRONT_H
21470 +#define NETFRONT_H
21471 +
21472 +#include <xen/interface/io/netif.h>
21473 +#include <linux/netdevice.h>
21474 +#include <linux/skbuff.h>
21475 +#include <linux/list.h>
21476 +
21477 +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
21478 +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
21479 +
21480 +#include <xen/xenbus.h>
21481 +
21482 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
21483 +#include <xen/platform-compat.h>
21484 +#endif
21485 +
21486 +/*
21487 + * Function pointer table for hooks into a network acceleration
21488 + * plugin. These are called at appropriate points from the netfront
21489 + * driver
21490 + */
21491 +struct netfront_accel_hooks {
21492 + /*
21493 + * new_device: Accelerator hook to ask the plugin to support a
21494 + * new network interface
21495 + */
21496 + int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev);
21497 + /*
21498 + * remove: Opposite of new_device
21499 + */
21500 + int (*remove)(struct xenbus_device *dev);
21501 + /*
21502 + * The net_device is being polled, check the accelerated
21503 + * hardware for any pending packets
21504 + */
21505 + int (*netdev_poll)(struct net_device *dev, int *pbudget);
21506 + /*
21507 + * start_xmit: Used to give the accelerated plugin the option
21508 + * of sending a packet. Returns non-zero if has done so, or
21509 + * zero to decline and force the packet onto normal send
21510 + * path
21511 + */
21512 + int (*start_xmit)(struct sk_buff *skb, struct net_device *dev);
21513 + /*
21514 + * start/stop_napi_interrupts Used by netfront to indicate
21515 + * when napi interrupts should be enabled or disabled
21516 + */
21517 + int (*start_napi_irq)(struct net_device *dev);
21518 + void (*stop_napi_irq)(struct net_device *dev);
21519 + /*
21520 + * Called before re-enabling the TX queue to check the fast
21521 + * path has slots too
21522 + */
21523 + int (*check_ready)(struct net_device *dev);
21524 + /*
21525 + * Get the fastpath network statistics
21526 + */
21527 + int (*get_stats)(struct net_device *dev,
21528 + struct net_device_stats *stats);
21529 +};
21530 +
21531 +
21532 +/* Version of API/protocol for communication between netfront and
21533 + acceleration plugin supported */
21534 +#define NETFRONT_ACCEL_VERSION 0x00010003
21535 +
21536 +/*
21537 + * Per-netfront device state for the accelerator. This is used to
21538 + * allow efficient per-netfront device access to the accelerator
21539 + * hooks
21540 + */
21541 +struct netfront_accel_vif_state {
21542 + struct list_head link;
21543 +
21544 + struct xenbus_device *dev;
21545 + struct netfront_info *np;
21546 + struct netfront_accel_hooks *hooks;
21547 +
21548 + /* Watch on the accelerator configuration value */
21549 + struct xenbus_watch accel_watch;
21550 + /* Work item to process change in accelerator */
21551 + struct work_struct accel_work;
21552 + /* The string from xenbus last time accel_watch fired */
21553 + char *accel_frontend;
21554 +};
21555 +
21556 +/*
21557 + * Per-accelerator state stored in netfront. These form a list that
21558 + * is used to track which devices are accelerated by which plugins,
21559 + * and what plugins are available/have been requested
21560 + */
21561 +struct netfront_accelerator {
21562 + /* Used to make a list */
21563 + struct list_head link;
21564 + /* ID of the accelerator */
21565 + int id;
21566 + /*
21567 + * String describing the accelerator. Currently this is the
21568 + * name of the accelerator module. This is provided by the
21569 + * backend accelerator through xenstore
21570 + */
21571 + char *frontend;
21572 + /* The hooks into the accelerator plugin module */
21573 + struct netfront_accel_hooks *hooks;
21574 +
21575 + /*
21576 + * List of per-netfront device state (struct
21577 + * netfront_accel_vif_state) for each netfront device that is
21578 + * using this accelerator
21579 + */
21580 + struct list_head vif_states;
21581 + spinlock_t vif_states_lock;
21582 +};
21583 +
21584 +struct netfront_info {
21585 + struct list_head list;
21586 + struct net_device *netdev;
21587 +
21588 + struct net_device_stats stats;
21589 +
21590 + struct netif_tx_front_ring tx;
21591 + struct netif_rx_front_ring rx;
21592 +
21593 + spinlock_t tx_lock;
21594 + spinlock_t rx_lock;
21595 +
21596 + unsigned int irq;
21597 + unsigned int copying_receiver;
21598 + unsigned int carrier;
21599 +
21600 + /* Receive-ring batched refills. */
21601 +#define RX_MIN_TARGET 8
21602 +#define RX_DFL_MIN_TARGET 64
21603 +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
21604 + unsigned rx_min_target, rx_max_target, rx_target;
21605 + struct sk_buff_head rx_batch;
21606 +
21607 + struct timer_list rx_refill_timer;
21608 +
21609 + /*
21610 + * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
21611 + * is an index into a chain of free entries.
21612 + */
21613 + struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
21614 + struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
21615 +
21616 +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
21617 + grant_ref_t gref_tx_head;
21618 + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
21619 + grant_ref_t gref_rx_head;
21620 + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
21621 +
21622 + struct xenbus_device *xbdev;
21623 + int tx_ring_ref;
21624 + int rx_ring_ref;
21625 + u8 mac[ETH_ALEN];
21626 +
21627 + unsigned long rx_pfn_array[NET_RX_RING_SIZE];
21628 + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
21629 + struct mmu_update rx_mmu[NET_RX_RING_SIZE];
21630 +
21631 + /* Private pointer to state internal to accelerator module */
21632 + void *accel_priv;
21633 + /* The accelerator used by this netfront device */
21634 + struct netfront_accelerator *accelerator;
21635 + /* The accelerator state for this netfront device */
21636 + struct netfront_accel_vif_state accel_vif_state;
21637 +};
21638 +
21639 +
21640 +/* Exported Functions */
21641 +
21642 +/*
21643 + * Called by an accelerator plugin module when it has loaded.
21644 + *
21645 + * frontend: the string describing the accelerator, currently the module name
21646 + * hooks: the hooks for netfront to use to call into the accelerator
21647 + * version: the version of API between frontend and plugin requested
21648 + *
21649 + * return: 0 on success, <0 on error, >0 (with version supported) on
21650 + * version mismatch
21651 + */
21652 +extern int netfront_accelerator_loaded(int version, const char *frontend,
21653 + struct netfront_accel_hooks *hooks);
21654 +
21655 +/*
21656 + * Called by an accelerator plugin module when it is about to unload.
21657 + *
21658 + * frontend: the string describing the accelerator. Must match the
21659 + * one passed to netfront_accelerator_loaded()
21660 + */
21661 +extern void netfront_accelerator_stop(const char *frontend);
21662 +
21663 +/*
21664 + * Called by an accelerator before waking the net device's TX queue to
21665 + * ensure the slow path has available slots. Returns true if OK to
21666 + * wake, false if still busy
21667 + */
21668 +extern int netfront_check_queue_ready(struct net_device *net_dev);
21669 +
21670 +
21671 +/* Internal-to-netfront Functions */
21672 +
21673 +/*
21674 + * Call into accelerator and check to see if it has tx space before we
21675 + * wake the net device's TX queue. Returns true if OK to wake, false
21676 + * if still busy
21677 + */
21678 +extern
21679 +int netfront_check_accelerator_queue_ready(struct net_device *dev,
21680 + struct netfront_info *np);
21681 +extern
21682 +int netfront_accelerator_call_remove(struct netfront_info *np,
21683 + struct xenbus_device *dev);
21684 +extern
21685 +int netfront_accelerator_suspend(struct netfront_info *np,
21686 + struct xenbus_device *dev);
21687 +extern
21688 +int netfront_accelerator_suspend_cancel(struct netfront_info *np,
21689 + struct xenbus_device *dev);
21690 +extern
21691 +void netfront_accelerator_resume(struct netfront_info *np,
21692 + struct xenbus_device *dev);
21693 +extern
21694 +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np,
21695 + struct net_device *dev);
21696 +extern
21697 +int netfront_accelerator_call_get_stats(struct netfront_info *np,
21698 + struct net_device *dev);
21699 +extern
21700 +void netfront_accelerator_add_watch(struct netfront_info *np);
21701 +
21702 +extern
21703 +void netif_init_accel(void);
21704 +extern
21705 +void netif_exit_accel(void);
21706 +
21707 +extern
21708 +void init_accelerator_vif(struct netfront_info *np,
21709 + struct xenbus_device *dev);
21710 +#endif /* NETFRONT_H */
21711 Index: head-2008-11-25/drivers/xen/pciback/Makefile
21712 ===================================================================
21713 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
21714 +++ head-2008-11-25/drivers/xen/pciback/Makefile 2008-07-21 11:00:33.000000000 +0200
21715 @@ -0,0 +1,17 @@
21716 +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
21717 +
21718 +pciback-y := pci_stub.o pciback_ops.o xenbus.o
21719 +pciback-y += conf_space.o conf_space_header.o \
21720 + conf_space_capability.o \
21721 + conf_space_capability_vpd.o \
21722 + conf_space_capability_pm.o \
21723 + conf_space_quirks.o
21724 +pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
21725 +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
21726 +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
21727 +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
21728 +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
21729 +
21730 +ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
21731 +EXTRA_CFLAGS += -DDEBUG
21732 +endif
21733 Index: head-2008-11-25/drivers/xen/pciback/conf_space.c
21734 ===================================================================
21735 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
21736 +++ head-2008-11-25/drivers/xen/pciback/conf_space.c 2008-10-29 09:55:56.000000000 +0100
21737 @@ -0,0 +1,426 @@
21738 +/*
21739 + * PCI Backend - Functions for creating a virtual configuration space for
21740 + * exported PCI Devices.
21741 + * It's dangerous to allow PCI Driver Domains to change their
21742 + * device's resources (memory, i/o ports, interrupts). We need to
21743 + * restrict changes to certain PCI Configuration registers:
21744 + * BARs, INTERRUPT_PIN, most registers in the header...
21745 + *
21746 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
21747 + */
21748 +
21749 +#include <linux/kernel.h>
21750 +#include <linux/pci.h>
21751 +#include "pciback.h"
21752 +#include "conf_space.h"
21753 +#include "conf_space_quirks.h"
21754 +
21755 +#define DEFINE_PCI_CONFIG(op,size,type) \
21756 +int pciback_##op##_config_##size \
21757 +(struct pci_dev *dev, int offset, type value, void *data) \
21758 +{ \
21759 + return pci_##op##_config_##size (dev, offset, value); \
21760 +}
21761 +
21762 +DEFINE_PCI_CONFIG(read, byte, u8 *)
21763 +DEFINE_PCI_CONFIG(read, word, u16 *)
21764 +DEFINE_PCI_CONFIG(read, dword, u32 *)
21765 +
21766 +DEFINE_PCI_CONFIG(write, byte, u8)
21767 +DEFINE_PCI_CONFIG(write, word, u16)
21768 +DEFINE_PCI_CONFIG(write, dword, u32)
21769 +
21770 +static int conf_space_read(struct pci_dev *dev,
21771 + const struct config_field_entry *entry,
21772 + int offset, u32 *value)
21773 +{
21774 + int ret = 0;
21775 + const struct config_field *field = entry->field;
21776 +
21777 + *value = 0;
21778 +
21779 + switch (field->size) {
21780 + case 1:
21781 + if (field->u.b.read)
21782 + ret = field->u.b.read(dev, offset, (u8 *) value,
21783 + entry->data);
21784 + break;
21785 + case 2:
21786 + if (field->u.w.read)
21787 + ret = field->u.w.read(dev, offset, (u16 *) value,
21788 + entry->data);
21789 + break;
21790 + case 4:
21791 + if (field->u.dw.read)
21792 + ret = field->u.dw.read(dev, offset, value, entry->data);
21793 + break;
21794 + }
21795 + return ret;
21796 +}
21797 +
21798 +static int conf_space_write(struct pci_dev *dev,
21799 + const struct config_field_entry *entry,
21800 + int offset, u32 value)
21801 +{
21802 + int ret = 0;
21803 + const struct config_field *field = entry->field;
21804 +
21805 + switch (field->size) {
21806 + case 1:
21807 + if (field->u.b.write)
21808 + ret = field->u.b.write(dev, offset, (u8) value,
21809 + entry->data);
21810 + break;
21811 + case 2:
21812 + if (field->u.w.write)
21813 + ret = field->u.w.write(dev, offset, (u16) value,
21814 + entry->data);
21815 + break;
21816 + case 4:
21817 + if (field->u.dw.write)
21818 + ret = field->u.dw.write(dev, offset, value,
21819 + entry->data);
21820 + break;
21821 + }
21822 + return ret;
21823 +}
21824 +
21825 +static inline u32 get_mask(int size)
21826 +{
21827 + if (size == 1)
21828 + return 0xff;
21829 + else if (size == 2)
21830 + return 0xffff;
21831 + else
21832 + return 0xffffffff;
21833 +}
21834 +
21835 +static inline int valid_request(int offset, int size)
21836 +{
21837 + /* Validate request (no un-aligned requests) */
21838 + if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
21839 + return 1;
21840 + return 0;
21841 +}
21842 +
21843 +static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
21844 + int offset)
21845 +{
21846 + if (offset >= 0) {
21847 + new_val_mask <<= (offset * 8);
21848 + new_val <<= (offset * 8);
21849 + } else {
21850 + new_val_mask >>= (offset * -8);
21851 + new_val >>= (offset * -8);
21852 + }
21853 + val = (val & ~new_val_mask) | (new_val & new_val_mask);
21854 +
21855 + return val;
21856 +}
21857 +
21858 +static int pcibios_err_to_errno(int err)
21859 +{
21860 + switch (err) {
21861 + case PCIBIOS_SUCCESSFUL:
21862 + return XEN_PCI_ERR_success;
21863 + case PCIBIOS_DEVICE_NOT_FOUND:
21864 + return XEN_PCI_ERR_dev_not_found;
21865 + case PCIBIOS_BAD_REGISTER_NUMBER:
21866 + return XEN_PCI_ERR_invalid_offset;
21867 + case PCIBIOS_FUNC_NOT_SUPPORTED:
21868 + return XEN_PCI_ERR_not_implemented;
21869 + case PCIBIOS_SET_FAILED:
21870 + return XEN_PCI_ERR_access_denied;
21871 + }
21872 + return err;
21873 +}
21874 +
21875 +int pciback_config_read(struct pci_dev *dev, int offset, int size,
21876 + u32 * ret_val)
21877 +{
21878 + int err = 0;
21879 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
21880 + const struct config_field_entry *cfg_entry;
21881 + const struct config_field *field;
21882 + int req_start, req_end, field_start, field_end;
21883 + /* if read fails for any reason, return 0 (as if device didn't respond) */
21884 + u32 value = 0, tmp_val;
21885 +
21886 + if (unlikely(verbose_request))
21887 + printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
21888 + pci_name(dev), size, offset);
21889 +
21890 + if (!valid_request(offset, size)) {
21891 + err = XEN_PCI_ERR_invalid_offset;
21892 + goto out;
21893 + }
21894 +
21895 + /* Get the real value first, then modify as appropriate */
21896 + switch (size) {
21897 + case 1:
21898 + err = pci_read_config_byte(dev, offset, (u8 *) & value);
21899 + break;
21900 + case 2:
21901 + err = pci_read_config_word(dev, offset, (u16 *) & value);
21902 + break;
21903 + case 4:
21904 + err = pci_read_config_dword(dev, offset, &value);
21905 + break;
21906 + }
21907 +
21908 + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
21909 + field = cfg_entry->field;
21910 +
21911 + req_start = offset;
21912 + req_end = offset + size;
21913 + field_start = OFFSET(cfg_entry);
21914 + field_end = OFFSET(cfg_entry) + field->size;
21915 +
21916 + if ((req_start >= field_start && req_start < field_end)
21917 + || (req_end > field_start && req_end <= field_end)) {
21918 + err = conf_space_read(dev, cfg_entry, field_start,
21919 + &tmp_val);
21920 + if (err)
21921 + goto out;
21922 +
21923 + value = merge_value(value, tmp_val,
21924 + get_mask(field->size),
21925 + field_start - req_start);
21926 + }
21927 + }
21928 +
21929 + out:
21930 + if (unlikely(verbose_request))
21931 + printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
21932 + pci_name(dev), size, offset, value);
21933 +
21934 + *ret_val = value;
21935 + return pcibios_err_to_errno(err);
21936 +}
21937 +
21938 +int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
21939 +{
21940 + int err = 0, handled = 0;
21941 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
21942 + const struct config_field_entry *cfg_entry;
21943 + const struct config_field *field;
21944 + u32 tmp_val;
21945 + int req_start, req_end, field_start, field_end;
21946 +
21947 + if (unlikely(verbose_request))
21948 + printk(KERN_DEBUG
21949 + "pciback: %s: write request %d bytes at 0x%x = %x\n",
21950 + pci_name(dev), size, offset, value);
21951 +
21952 + if (!valid_request(offset, size))
21953 + return XEN_PCI_ERR_invalid_offset;
21954 +
21955 + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
21956 + field = cfg_entry->field;
21957 +
21958 + req_start = offset;
21959 + req_end = offset + size;
21960 + field_start = OFFSET(cfg_entry);
21961 + field_end = OFFSET(cfg_entry) + field->size;
21962 +
21963 + if ((req_start >= field_start && req_start < field_end)
21964 + || (req_end > field_start && req_end <= field_end)) {
21965 + tmp_val = 0;
21966 +
21967 + err = pciback_config_read(dev, field_start,
21968 + field->size, &tmp_val);
21969 + if (err)
21970 + break;
21971 +
21972 + tmp_val = merge_value(tmp_val, value, get_mask(size),
21973 + req_start - field_start);
21974 +
21975 + err = conf_space_write(dev, cfg_entry, field_start,
21976 + tmp_val);
21977 +
21978 + /* handled is set true here, but not every byte
21979 + * may have been written! Properly detecting if
21980 + * every byte is handled is unnecessary as the
21981 + * flag is used to detect devices that need
21982 + * special helpers to work correctly.
21983 + */
21984 + handled = 1;
21985 + }
21986 + }
21987 +
21988 + if (!handled && !err) {
21989 + /* By default, anything not specificially handled above is
21990 + * read-only. The permissive flag changes this behavior so
21991 + * that anything not specifically handled above is writable.
21992 + * This means that some fields may still be read-only because
21993 + * they have entries in the config_field list that intercept
21994 + * the write and do nothing. */
21995 + if (dev_data->permissive) {
21996 + switch (size) {
21997 + case 1:
21998 + err = pci_write_config_byte(dev, offset,
21999 + (u8) value);
22000 + break;
22001 + case 2:
22002 + err = pci_write_config_word(dev, offset,
22003 + (u16) value);
22004 + break;
22005 + case 4:
22006 + err = pci_write_config_dword(dev, offset,
22007 + (u32) value);
22008 + break;
22009 + }
22010 + } else if (!dev_data->warned_on_write) {
22011 + dev_data->warned_on_write = 1;
22012 + dev_warn(&dev->dev, "Driver tried to write to a "
22013 + "read-only configuration space field at offset "
22014 + "0x%x, size %d. This may be harmless, but if "
22015 + "you have problems with your device:\n"
22016 + "1) see permissive attribute in sysfs\n"
22017 + "2) report problems to the xen-devel "
22018 + "mailing list along with details of your "
22019 + "device obtained from lspci.\n", offset, size);
22020 + }
22021 + }
22022 +
22023 + return pcibios_err_to_errno(err);
22024 +}
22025 +
22026 +void pciback_config_free_dyn_fields(struct pci_dev *dev)
22027 +{
22028 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22029 + struct config_field_entry *cfg_entry, *t;
22030 + const struct config_field *field;
22031 +
22032 + dev_dbg(&dev->dev,
22033 + "free-ing dynamically allocated virtual configuration space fields\n");
22034 +
22035 + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
22036 + field = cfg_entry->field;
22037 +
22038 + if (field->clean) {
22039 + field->clean((struct config_field *)field);
22040 +
22041 + if (cfg_entry->data)
22042 + kfree(cfg_entry->data);
22043 +
22044 + list_del(&cfg_entry->list);
22045 + kfree(cfg_entry);
22046 + }
22047 +
22048 + }
22049 +}
22050 +
22051 +void pciback_config_reset_dev(struct pci_dev *dev)
22052 +{
22053 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22054 + const struct config_field_entry *cfg_entry;
22055 + const struct config_field *field;
22056 +
22057 + dev_dbg(&dev->dev, "resetting virtual configuration space\n");
22058 +
22059 + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
22060 + field = cfg_entry->field;
22061 +
22062 + if (field->reset)
22063 + field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
22064 + }
22065 +}
22066 +
22067 +void pciback_config_free_dev(struct pci_dev *dev)
22068 +{
22069 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22070 + struct config_field_entry *cfg_entry, *t;
22071 + const struct config_field *field;
22072 +
22073 + dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
22074 +
22075 + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
22076 + list_del(&cfg_entry->list);
22077 +
22078 + field = cfg_entry->field;
22079 +
22080 + if (field->release)
22081 + field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
22082 +
22083 + kfree(cfg_entry);
22084 + }
22085 +}
22086 +
22087 +int pciback_config_add_field_offset(struct pci_dev *dev,
22088 + const struct config_field *field,
22089 + unsigned int base_offset)
22090 +{
22091 + int err = 0;
22092 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22093 + struct config_field_entry *cfg_entry;
22094 + void *tmp;
22095 +
22096 + cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
22097 + if (!cfg_entry) {
22098 + err = -ENOMEM;
22099 + goto out;
22100 + }
22101 +
22102 + cfg_entry->data = NULL;
22103 + cfg_entry->field = field;
22104 + cfg_entry->base_offset = base_offset;
22105 +
22106 + /* silently ignore duplicate fields */
22107 + err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
22108 + if (err)
22109 + goto out;
22110 +
22111 + if (field->init) {
22112 + tmp = field->init(dev, OFFSET(cfg_entry));
22113 +
22114 + if (IS_ERR(tmp)) {
22115 + err = PTR_ERR(tmp);
22116 + goto out;
22117 + }
22118 +
22119 + cfg_entry->data = tmp;
22120 + }
22121 +
22122 + dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
22123 + OFFSET(cfg_entry));
22124 + list_add_tail(&cfg_entry->list, &dev_data->config_fields);
22125 +
22126 + out:
22127 + if (err)
22128 + kfree(cfg_entry);
22129 +
22130 + return err;
22131 +}
22132 +
22133 +/* This sets up the device's virtual configuration space to keep track of
22134 + * certain registers (like the base address registers (BARs) so that we can
22135 + * keep the client from manipulating them directly.
22136 + */
22137 +int pciback_config_init_dev(struct pci_dev *dev)
22138 +{
22139 + int err = 0;
22140 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
22141 +
22142 + dev_dbg(&dev->dev, "initializing virtual configuration space\n");
22143 +
22144 + INIT_LIST_HEAD(&dev_data->config_fields);
22145 +
22146 + err = pciback_config_header_add_fields(dev);
22147 + if (err)
22148 + goto out;
22149 +
22150 + err = pciback_config_capability_add_fields(dev);
22151 + if (err)
22152 + goto out;
22153 +
22154 + err = pciback_config_quirks_init(dev);
22155 +
22156 + out:
22157 + return err;
22158 +}
22159 +
22160 +int pciback_config_init(void)
22161 +{
22162 + return pciback_config_capability_init();
22163 +}
22164 Index: head-2008-11-25/drivers/xen/pciback/conf_space.h
22165 ===================================================================
22166 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22167 +++ head-2008-11-25/drivers/xen/pciback/conf_space.h 2008-10-29 09:55:56.000000000 +0100
22168 @@ -0,0 +1,126 @@
22169 +/*
22170 + * PCI Backend - Common data structures for overriding the configuration space
22171 + *
22172 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22173 + */
22174 +
22175 +#ifndef __XEN_PCIBACK_CONF_SPACE_H__
22176 +#define __XEN_PCIBACK_CONF_SPACE_H__
22177 +
22178 +#include <linux/list.h>
22179 +#include <linux/err.h>
22180 +
22181 +/* conf_field_init can return an errno in a ptr with ERR_PTR() */
22182 +typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
22183 +typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
22184 +typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
22185 +
22186 +typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
22187 + void *data);
22188 +typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
22189 + void *data);
22190 +typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
22191 + void *data);
22192 +typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
22193 + void *data);
22194 +typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
22195 + void *data);
22196 +typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
22197 + void *data);
22198 +
22199 +/* These are the fields within the configuration space which we
22200 + * are interested in intercepting reads/writes to and changing their
22201 + * values.
22202 + */
22203 +struct config_field {
22204 + unsigned int offset;
22205 + unsigned int size;
22206 + unsigned int mask;
22207 + conf_field_init init;
22208 + conf_field_reset reset;
22209 + conf_field_free release;
22210 + void (*clean) (struct config_field * field);
22211 + union {
22212 + struct {
22213 + conf_dword_write write;
22214 + conf_dword_read read;
22215 + } dw;
22216 + struct {
22217 + conf_word_write write;
22218 + conf_word_read read;
22219 + } w;
22220 + struct {
22221 + conf_byte_write write;
22222 + conf_byte_read read;
22223 + } b;
22224 + } u;
22225 + struct list_head list;
22226 +};
22227 +
22228 +struct config_field_entry {
22229 + struct list_head list;
22230 + const struct config_field *field;
22231 + unsigned int base_offset;
22232 + void *data;
22233 +};
22234 +
22235 +#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
22236 +
22237 +/* Add fields to a device - the add_fields macro expects to get a pointer to
22238 + * the first entry in an array (of which the ending is marked by size==0)
22239 + */
22240 +int pciback_config_add_field_offset(struct pci_dev *dev,
22241 + const struct config_field *field,
22242 + unsigned int offset);
22243 +
22244 +static inline int pciback_config_add_field(struct pci_dev *dev,
22245 + const struct config_field *field)
22246 +{
22247 + return pciback_config_add_field_offset(dev, field, 0);
22248 +}
22249 +
22250 +static inline int pciback_config_add_fields(struct pci_dev *dev,
22251 + const struct config_field *field)
22252 +{
22253 + int i, err = 0;
22254 + for (i = 0; field[i].size != 0; i++) {
22255 + err = pciback_config_add_field(dev, &field[i]);
22256 + if (err)
22257 + break;
22258 + }
22259 + return err;
22260 +}
22261 +
22262 +static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
22263 + const struct config_field *field,
22264 + unsigned int offset)
22265 +{
22266 + int i, err = 0;
22267 + for (i = 0; field[i].size != 0; i++) {
22268 + err = pciback_config_add_field_offset(dev, &field[i], offset);
22269 + if (err)
22270 + break;
22271 + }
22272 + return err;
22273 +}
22274 +
22275 +/* Read/Write the real configuration space */
22276 +int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
22277 + void *data);
22278 +int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
22279 + void *data);
22280 +int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
22281 + void *data);
22282 +int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
22283 + void *data);
22284 +int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
22285 + void *data);
22286 +int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
22287 + void *data);
22288 +
22289 +int pciback_config_capability_init(void);
22290 +
22291 +int pciback_config_header_add_fields(struct pci_dev *dev);
22292 +int pciback_config_capability_add_fields(struct pci_dev *dev);
22293 +
22294 +#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
22295 Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability.c
22296 ===================================================================
22297 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22298 +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability.c 2008-10-29 09:55:56.000000000 +0100
22299 @@ -0,0 +1,69 @@
22300 +/*
22301 + * PCI Backend - Handles the virtual fields found on the capability lists
22302 + * in the configuration space.
22303 + *
22304 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22305 + */
22306 +
22307 +#include <linux/kernel.h>
22308 +#include <linux/pci.h>
22309 +#include "pciback.h"
22310 +#include "conf_space.h"
22311 +#include "conf_space_capability.h"
22312 +
22313 +static LIST_HEAD(capabilities);
22314 +
22315 +static const struct config_field caplist_header[] = {
22316 + {
22317 + .offset = PCI_CAP_LIST_ID,
22318 + .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
22319 + .u.w.read = pciback_read_config_word,
22320 + .u.w.write = NULL,
22321 + },
22322 + {}
22323 +};
22324 +
22325 +static inline void register_capability(struct pciback_config_capability *cap)
22326 +{
22327 + list_add_tail(&cap->cap_list, &capabilities);
22328 +}
22329 +
22330 +int pciback_config_capability_add_fields(struct pci_dev *dev)
22331 +{
22332 + int err = 0;
22333 + struct pciback_config_capability *cap;
22334 + int cap_offset;
22335 +
22336 + list_for_each_entry(cap, &capabilities, cap_list) {
22337 + cap_offset = pci_find_capability(dev, cap->capability);
22338 + if (cap_offset) {
22339 + dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
22340 + cap->capability, cap_offset);
22341 +
22342 + err = pciback_config_add_fields_offset(dev,
22343 + caplist_header,
22344 + cap_offset);
22345 + if (err)
22346 + goto out;
22347 + err = pciback_config_add_fields_offset(dev,
22348 + cap->fields,
22349 + cap_offset);
22350 + if (err)
22351 + goto out;
22352 + }
22353 + }
22354 +
22355 + out:
22356 + return err;
22357 +}
22358 +
22359 +extern struct pciback_config_capability pciback_config_capability_vpd;
22360 +extern struct pciback_config_capability pciback_config_capability_pm;
22361 +
22362 +int pciback_config_capability_init(void)
22363 +{
22364 + register_capability(&pciback_config_capability_vpd);
22365 + register_capability(&pciback_config_capability_pm);
22366 +
22367 + return 0;
22368 +}
22369 Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability.h
22370 ===================================================================
22371 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22372 +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability.h 2008-10-29 09:55:56.000000000 +0100
22373 @@ -0,0 +1,23 @@
22374 +/*
22375 + * PCI Backend - Data structures for special overlays for structures on
22376 + * the capability list.
22377 + *
22378 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22379 + */
22380 +
22381 +#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
22382 +#define __PCIBACK_CONFIG_CAPABILITY_H__
22383 +
22384 +#include <linux/pci.h>
22385 +#include <linux/list.h>
22386 +
22387 +struct pciback_config_capability {
22388 + struct list_head cap_list;
22389 +
22390 + int capability;
22391 +
22392 + /* If the device has the capability found above, add these fields */
22393 + const struct config_field *fields;
22394 +};
22395 +
22396 +#endif
22397 Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_msi.c
22398 ===================================================================
22399 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22400 +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_msi.c 2008-09-15 13:40:15.000000000 +0200
22401 @@ -0,0 +1,79 @@
22402 +/*
22403 + * PCI Backend -- Configuration overlay for MSI capability
22404 + */
22405 +#include <linux/pci.h>
22406 +#include <linux/slab.h>
22407 +#include "conf_space.h"
22408 +#include "conf_space_capability.h"
22409 +#include <xen/interface/io/pciif.h>
22410 +#include "pciback.h"
22411 +
22412 +int pciback_enable_msi(struct pciback_device *pdev,
22413 + struct pci_dev *dev, struct xen_pci_op *op)
22414 +{
22415 + int otherend = pdev->xdev->otherend_id;
22416 + int status;
22417 +
22418 + status = pci_enable_msi(dev);
22419 +
22420 + if (status) {
22421 + printk("error enable msi for guest %x status %x\n", otherend, status);
22422 + op->value = 0;
22423 + return XEN_PCI_ERR_op_failed;
22424 + }
22425 +
22426 + op->value = dev->irq;
22427 + return 0;
22428 +}
22429 +
22430 +int pciback_disable_msi(struct pciback_device *pdev,
22431 + struct pci_dev *dev, struct xen_pci_op *op)
22432 +{
22433 + pci_disable_msi(dev);
22434 +
22435 + op->value = dev->irq;
22436 + return 0;
22437 +}
22438 +
22439 +int pciback_enable_msix(struct pciback_device *pdev,
22440 + struct pci_dev *dev, struct xen_pci_op *op)
22441 +{
22442 + int i, result;
22443 + struct msix_entry *entries;
22444 +
22445 + if (op->value > SH_INFO_MAX_VEC)
22446 + return -EINVAL;
22447 +
22448 + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
22449 + if (entries == NULL)
22450 + return -ENOMEM;
22451 +
22452 + for (i = 0; i < op->value; i++) {
22453 + entries[i].entry = op->msix_entries[i].entry;
22454 + entries[i].vector = op->msix_entries[i].vector;
22455 + }
22456 +
22457 + result = pci_enable_msix(dev, entries, op->value);
22458 +
22459 + for (i = 0; i < op->value; i++) {
22460 + op->msix_entries[i].entry = entries[i].entry;
22461 + op->msix_entries[i].vector = entries[i].vector;
22462 + }
22463 +
22464 + kfree(entries);
22465 +
22466 + op->value = result;
22467 +
22468 + return result;
22469 +}
22470 +
22471 +int pciback_disable_msix(struct pciback_device *pdev,
22472 + struct pci_dev *dev, struct xen_pci_op *op)
22473 +{
22474 +
22475 + pci_disable_msix(dev);
22476 +
22477 + op->value = dev->irq;
22478 + return 0;
22479 +}
22480 +
22481 Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_pm.c
22482 ===================================================================
22483 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22484 +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_pm.c 2008-10-29 09:55:56.000000000 +0100
22485 @@ -0,0 +1,126 @@
22486 +/*
22487 + * PCI Backend - Configuration space overlay for power management
22488 + *
22489 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22490 + */
22491 +
22492 +#include <linux/pci.h>
22493 +#include "conf_space.h"
22494 +#include "conf_space_capability.h"
22495 +
22496 +static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
22497 + void *data)
22498 +{
22499 + int err;
22500 + u16 real_value;
22501 +
22502 + err = pci_read_config_word(dev, offset, &real_value);
22503 + if (err)
22504 + goto out;
22505 +
22506 + *value = real_value & ~PCI_PM_CAP_PME_MASK;
22507 +
22508 + out:
22509 + return err;
22510 +}
22511 +
22512 +/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
22513 + * Can't allow driver domain to enable PMEs - they're shared */
22514 +#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
22515 +
22516 +static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
22517 + void *data)
22518 +{
22519 + int err;
22520 + u16 old_value;
22521 + pci_power_t new_state, old_state;
22522 +
22523 + err = pci_read_config_word(dev, offset, &old_value);
22524 + if (err)
22525 + goto out;
22526 +
22527 + old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
22528 + new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
22529 +
22530 + new_value &= PM_OK_BITS;
22531 + if ((old_value & PM_OK_BITS) != new_value) {
22532 + new_value = (old_value & ~PM_OK_BITS) | new_value;
22533 + err = pci_write_config_word(dev, offset, new_value);
22534 + if (err)
22535 + goto out;
22536 + }
22537 +
22538 + /* Let pci core handle the power management change */
22539 + dev_dbg(&dev->dev, "set power state to %x\n", new_state);
22540 + err = pci_set_power_state(dev, new_state);
22541 + if (err) {
22542 + err = PCIBIOS_SET_FAILED;
22543 + goto out;
22544 + }
22545 +
22546 + /*
22547 + * Device may lose PCI config info on D3->D0 transition. This
22548 + * is a problem for some guests which will not reset BARs. Even
22549 + * those that have a go will be foiled by our BAR-write handler
22550 + * which will discard the write! Since Linux won't re-init
22551 + * the config space automatically in all cases, we do it here.
22552 + * Future: Should we re-initialise all first 64 bytes of config space?
22553 + */
22554 + if (new_state == PCI_D0 &&
22555 + (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
22556 + !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
22557 + pci_restore_bars(dev);
22558 +
22559 + out:
22560 + return err;
22561 +}
22562 +
22563 +/* Ensure PMEs are disabled */
22564 +static void *pm_ctrl_init(struct pci_dev *dev, int offset)
22565 +{
22566 + int err;
22567 + u16 value;
22568 +
22569 + err = pci_read_config_word(dev, offset, &value);
22570 + if (err)
22571 + goto out;
22572 +
22573 + if (value & PCI_PM_CTRL_PME_ENABLE) {
22574 + value &= ~PCI_PM_CTRL_PME_ENABLE;
22575 + err = pci_write_config_word(dev, offset, value);
22576 + }
22577 +
22578 + out:
22579 + return ERR_PTR(err);
22580 +}
22581 +
22582 +static const struct config_field caplist_pm[] = {
22583 + {
22584 + .offset = PCI_PM_PMC,
22585 + .size = 2,
22586 + .u.w.read = pm_caps_read,
22587 + },
22588 + {
22589 + .offset = PCI_PM_CTRL,
22590 + .size = 2,
22591 + .init = pm_ctrl_init,
22592 + .u.w.read = pciback_read_config_word,
22593 + .u.w.write = pm_ctrl_write,
22594 + },
22595 + {
22596 + .offset = PCI_PM_PPB_EXTENSIONS,
22597 + .size = 1,
22598 + .u.b.read = pciback_read_config_byte,
22599 + },
22600 + {
22601 + .offset = PCI_PM_DATA_REGISTER,
22602 + .size = 1,
22603 + .u.b.read = pciback_read_config_byte,
22604 + },
22605 + {}
22606 +};
22607 +
22608 +struct pciback_config_capability pciback_config_capability_pm = {
22609 + .capability = PCI_CAP_ID_PM,
22610 + .fields = caplist_pm,
22611 +};
22612 Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_vpd.c
22613 ===================================================================
22614 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22615 +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_vpd.c 2008-10-29 09:55:56.000000000 +0100
22616 @@ -0,0 +1,40 @@
22617 +/*
22618 + * PCI Backend - Configuration space overlay for Vital Product Data
22619 + *
22620 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22621 + */
22622 +
22623 +#include <linux/pci.h>
22624 +#include "conf_space.h"
22625 +#include "conf_space_capability.h"
22626 +
22627 +static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
22628 + void *data)
22629 +{
22630 + /* Disallow writes to the vital product data */
22631 + if (value & PCI_VPD_ADDR_F)
22632 + return PCIBIOS_SET_FAILED;
22633 + else
22634 + return pci_write_config_word(dev, offset, value);
22635 +}
22636 +
22637 +static const struct config_field caplist_vpd[] = {
22638 + {
22639 + .offset = PCI_VPD_ADDR,
22640 + .size = 2,
22641 + .u.w.read = pciback_read_config_word,
22642 + .u.w.write = vpd_address_write,
22643 + },
22644 + {
22645 + .offset = PCI_VPD_DATA,
22646 + .size = 4,
22647 + .u.dw.read = pciback_read_config_dword,
22648 + .u.dw.write = NULL,
22649 + },
22650 + {}
22651 +};
22652 +
22653 +struct pciback_config_capability pciback_config_capability_vpd = {
22654 + .capability = PCI_CAP_ID_VPD,
22655 + .fields = caplist_vpd,
22656 +};
22657 Index: head-2008-11-25/drivers/xen/pciback/conf_space_header.c
22658 ===================================================================
22659 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22660 +++ head-2008-11-25/drivers/xen/pciback/conf_space_header.c 2008-10-29 09:55:56.000000000 +0100
22661 @@ -0,0 +1,317 @@
22662 +/*
22663 + * PCI Backend - Handles the virtual fields in the configuration space headers.
22664 + *
22665 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22666 + */
22667 +
22668 +#include <linux/kernel.h>
22669 +#include <linux/pci.h>
22670 +#include "pciback.h"
22671 +#include "conf_space.h"
22672 +
22673 +struct pci_bar_info {
22674 + u32 val;
22675 + u32 len_val;
22676 + int which;
22677 +};
22678 +
22679 +#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
22680 +#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
22681 +
22682 +static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
22683 +{
22684 + int err;
22685 +
22686 + if (!dev->is_enabled && is_enable_cmd(value)) {
22687 + if (unlikely(verbose_request))
22688 + printk(KERN_DEBUG "pciback: %s: enable\n",
22689 + pci_name(dev));
22690 + err = pci_enable_device(dev);
22691 + if (err)
22692 + return err;
22693 + } else if (dev->is_enabled && !is_enable_cmd(value)) {
22694 + if (unlikely(verbose_request))
22695 + printk(KERN_DEBUG "pciback: %s: disable\n",
22696 + pci_name(dev));
22697 + pci_disable_device(dev);
22698 + }
22699 +
22700 + if (!dev->is_busmaster && is_master_cmd(value)) {
22701 + if (unlikely(verbose_request))
22702 + printk(KERN_DEBUG "pciback: %s: set bus master\n",
22703 + pci_name(dev));
22704 + pci_set_master(dev);
22705 + }
22706 +
22707 + if (value & PCI_COMMAND_INVALIDATE) {
22708 + if (unlikely(verbose_request))
22709 + printk(KERN_DEBUG
22710 + "pciback: %s: enable memory-write-invalidate\n",
22711 + pci_name(dev));
22712 + err = pci_set_mwi(dev);
22713 + if (err) {
22714 + printk(KERN_WARNING
22715 + "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
22716 + pci_name(dev), err);
22717 + value &= ~PCI_COMMAND_INVALIDATE;
22718 + }
22719 + }
22720 +
22721 + return pci_write_config_word(dev, offset, value);
22722 +}
22723 +
22724 +static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
22725 +{
22726 + struct pci_bar_info *bar = data;
22727 +
22728 + if (unlikely(!bar)) {
22729 + printk(KERN_WARNING "pciback: driver data not found for %s\n",
22730 + pci_name(dev));
22731 + return XEN_PCI_ERR_op_failed;
22732 + }
22733 +
22734 + /* A write to obtain the length must happen as a 32-bit write.
22735 + * This does not (yet) support writing individual bytes
22736 + */
22737 + if (value == ~PCI_ROM_ADDRESS_ENABLE)
22738 + bar->which = 1;
22739 + else {
22740 + u32 tmpval;
22741 + pci_read_config_dword(dev, offset, &tmpval);
22742 + if (tmpval != bar->val && value == bar->val) {
22743 + /* Allow restoration of bar value. */
22744 + pci_write_config_dword(dev, offset, bar->val);
22745 + }
22746 + bar->which = 0;
22747 + }
22748 +
22749 + /* Do we need to support enabling/disabling the rom address here? */
22750 +
22751 + return 0;
22752 +}
22753 +
22754 +/* For the BARs, only allow writes which write ~0 or
22755 + * the correct resource information
22756 + * (Needed for when the driver probes the resource usage)
22757 + */
22758 +static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
22759 +{
22760 + struct pci_bar_info *bar = data;
22761 +
22762 + if (unlikely(!bar)) {
22763 + printk(KERN_WARNING "pciback: driver data not found for %s\n",
22764 + pci_name(dev));
22765 + return XEN_PCI_ERR_op_failed;
22766 + }
22767 +
22768 + /* A write to obtain the length must happen as a 32-bit write.
22769 + * This does not (yet) support writing individual bytes
22770 + */
22771 + if (value == ~0)
22772 + bar->which = 1;
22773 + else {
22774 + u32 tmpval;
22775 + pci_read_config_dword(dev, offset, &tmpval);
22776 + if (tmpval != bar->val && value == bar->val) {
22777 + /* Allow restoration of bar value. */
22778 + pci_write_config_dword(dev, offset, bar->val);
22779 + }
22780 + bar->which = 0;
22781 + }
22782 +
22783 + return 0;
22784 +}
22785 +
22786 +static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
22787 +{
22788 + struct pci_bar_info *bar = data;
22789 +
22790 + if (unlikely(!bar)) {
22791 + printk(KERN_WARNING "pciback: driver data not found for %s\n",
22792 + pci_name(dev));
22793 + return XEN_PCI_ERR_op_failed;
22794 + }
22795 +
22796 + *value = bar->which ? bar->len_val : bar->val;
22797 +
22798 + return 0;
22799 +}
22800 +
22801 +static inline void read_dev_bar(struct pci_dev *dev,
22802 + struct pci_bar_info *bar_info, int offset,
22803 + u32 len_mask)
22804 +{
22805 + pci_read_config_dword(dev, offset, &bar_info->val);
22806 + pci_write_config_dword(dev, offset, len_mask);
22807 + pci_read_config_dword(dev, offset, &bar_info->len_val);
22808 + pci_write_config_dword(dev, offset, bar_info->val);
22809 +}
22810 +
22811 +static void *bar_init(struct pci_dev *dev, int offset)
22812 +{
22813 + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
22814 +
22815 + if (!bar)
22816 + return ERR_PTR(-ENOMEM);
22817 +
22818 + read_dev_bar(dev, bar, offset, ~0);
22819 + bar->which = 0;
22820 +
22821 + return bar;
22822 +}
22823 +
22824 +static void *rom_init(struct pci_dev *dev, int offset)
22825 +{
22826 + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
22827 +
22828 + if (!bar)
22829 + return ERR_PTR(-ENOMEM);
22830 +
22831 + read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
22832 + bar->which = 0;
22833 +
22834 + return bar;
22835 +}
22836 +
22837 +static void bar_reset(struct pci_dev *dev, int offset, void *data)
22838 +{
22839 + struct pci_bar_info *bar = data;
22840 +
22841 + bar->which = 0;
22842 +}
22843 +
22844 +static void bar_release(struct pci_dev *dev, int offset, void *data)
22845 +{
22846 + kfree(data);
22847 +}
22848 +
22849 +static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
22850 + void *data)
22851 +{
22852 + *value = (u8) dev->irq;
22853 +
22854 + return 0;
22855 +}
22856 +
22857 +static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
22858 +{
22859 + u8 cur_value;
22860 + int err;
22861 +
22862 + err = pci_read_config_byte(dev, offset, &cur_value);
22863 + if (err)
22864 + goto out;
22865 +
22866 + if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
22867 + || value == PCI_BIST_START)
22868 + err = pci_write_config_byte(dev, offset, value);
22869 +
22870 + out:
22871 + return err;
22872 +}
22873 +
22874 +static const struct config_field header_common[] = {
22875 + {
22876 + .offset = PCI_COMMAND,
22877 + .size = 2,
22878 + .u.w.read = pciback_read_config_word,
22879 + .u.w.write = command_write,
22880 + },
22881 + {
22882 + .offset = PCI_INTERRUPT_LINE,
22883 + .size = 1,
22884 + .u.b.read = interrupt_read,
22885 + },
22886 + {
22887 + .offset = PCI_INTERRUPT_PIN,
22888 + .size = 1,
22889 + .u.b.read = pciback_read_config_byte,
22890 + },
22891 + {
22892 + /* Any side effects of letting driver domain control cache line? */
22893 + .offset = PCI_CACHE_LINE_SIZE,
22894 + .size = 1,
22895 + .u.b.read = pciback_read_config_byte,
22896 + .u.b.write = pciback_write_config_byte,
22897 + },
22898 + {
22899 + .offset = PCI_LATENCY_TIMER,
22900 + .size = 1,
22901 + .u.b.read = pciback_read_config_byte,
22902 + },
22903 + {
22904 + .offset = PCI_BIST,
22905 + .size = 1,
22906 + .u.b.read = pciback_read_config_byte,
22907 + .u.b.write = bist_write,
22908 + },
22909 + {}
22910 +};
22911 +
22912 +#define CFG_FIELD_BAR(reg_offset) \
22913 + { \
22914 + .offset = reg_offset, \
22915 + .size = 4, \
22916 + .init = bar_init, \
22917 + .reset = bar_reset, \
22918 + .release = bar_release, \
22919 + .u.dw.read = bar_read, \
22920 + .u.dw.write = bar_write, \
22921 + }
22922 +
22923 +#define CFG_FIELD_ROM(reg_offset) \
22924 + { \
22925 + .offset = reg_offset, \
22926 + .size = 4, \
22927 + .init = rom_init, \
22928 + .reset = bar_reset, \
22929 + .release = bar_release, \
22930 + .u.dw.read = bar_read, \
22931 + .u.dw.write = rom_write, \
22932 + }
22933 +
22934 +static const struct config_field header_0[] = {
22935 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
22936 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
22937 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
22938 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
22939 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
22940 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
22941 + CFG_FIELD_ROM(PCI_ROM_ADDRESS),
22942 + {}
22943 +};
22944 +
22945 +static const struct config_field header_1[] = {
22946 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
22947 + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
22948 + CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
22949 + {}
22950 +};
22951 +
22952 +int pciback_config_header_add_fields(struct pci_dev *dev)
22953 +{
22954 + int err;
22955 +
22956 + err = pciback_config_add_fields(dev, header_common);
22957 + if (err)
22958 + goto out;
22959 +
22960 + switch (dev->hdr_type) {
22961 + case PCI_HEADER_TYPE_NORMAL:
22962 + err = pciback_config_add_fields(dev, header_0);
22963 + break;
22964 +
22965 + case PCI_HEADER_TYPE_BRIDGE:
22966 + err = pciback_config_add_fields(dev, header_1);
22967 + break;
22968 +
22969 + default:
22970 + err = -EINVAL;
22971 + printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
22972 + pci_name(dev), dev->hdr_type);
22973 + break;
22974 + }
22975 +
22976 + out:
22977 + return err;
22978 +}
22979 Index: head-2008-11-25/drivers/xen/pciback/conf_space_quirks.c
22980 ===================================================================
22981 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22982 +++ head-2008-11-25/drivers/xen/pciback/conf_space_quirks.c 2007-06-12 13:13:45.000000000 +0200
22983 @@ -0,0 +1,126 @@
22984 +/*
22985 + * PCI Backend - Handle special overlays for broken devices.
22986 + *
22987 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
22988 + * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
22989 + */
22990 +
22991 +#include <linux/kernel.h>
22992 +#include <linux/pci.h>
22993 +#include "pciback.h"
22994 +#include "conf_space.h"
22995 +#include "conf_space_quirks.h"
22996 +
22997 +LIST_HEAD(pciback_quirks);
22998 +
22999 +struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
23000 +{
23001 + struct pciback_config_quirk *tmp_quirk;
23002 +
23003 + list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
23004 + if (pci_match_id(&tmp_quirk->devid, dev))
23005 + goto out;
23006 + tmp_quirk = NULL;
23007 + printk(KERN_DEBUG
23008 + "quirk didn't match any device pciback knows about\n");
23009 + out:
23010 + return tmp_quirk;
23011 +}
23012 +
23013 +static inline void register_quirk(struct pciback_config_quirk *quirk)
23014 +{
23015 + list_add_tail(&quirk->quirks_list, &pciback_quirks);
23016 +}
23017 +
23018 +int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
23019 +{
23020 + int ret = 0;
23021 + struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
23022 + struct config_field_entry *cfg_entry;
23023 +
23024 + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
23025 + if ( OFFSET(cfg_entry) == reg) {
23026 + ret = 1;
23027 + break;
23028 + }
23029 + }
23030 + return ret;
23031 +}
23032 +
23033 +int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
23034 + *field)
23035 +{
23036 + int err = 0;
23037 +
23038 + switch (field->size) {
23039 + case 1:
23040 + field->u.b.read = pciback_read_config_byte;
23041 + field->u.b.write = pciback_write_config_byte;
23042 + break;
23043 + case 2:
23044 + field->u.w.read = pciback_read_config_word;
23045 + field->u.w.write = pciback_write_config_word;
23046 + break;
23047 + case 4:
23048 + field->u.dw.read = pciback_read_config_dword;
23049 + field->u.dw.write = pciback_write_config_dword;
23050 + break;
23051 + default:
23052 + err = -EINVAL;
23053 + goto out;
23054 + }
23055 +
23056 + pciback_config_add_field(dev, field);
23057 +
23058 + out:
23059 + return err;
23060 +}
23061 +
23062 +int pciback_config_quirks_init(struct pci_dev *dev)
23063 +{
23064 + struct pciback_config_quirk *quirk;
23065 + int ret = 0;
23066 +
23067 + quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
23068 + if (!quirk) {
23069 + ret = -ENOMEM;
23070 + goto out;
23071 + }
23072 +
23073 + quirk->devid.vendor = dev->vendor;
23074 + quirk->devid.device = dev->device;
23075 + quirk->devid.subvendor = dev->subsystem_vendor;
23076 + quirk->devid.subdevice = dev->subsystem_device;
23077 + quirk->devid.class = 0;
23078 + quirk->devid.class_mask = 0;
23079 + quirk->devid.driver_data = 0UL;
23080 +
23081 + quirk->pdev = dev;
23082 +
23083 + register_quirk(quirk);
23084 + out:
23085 + return ret;
23086 +}
23087 +
23088 +void pciback_config_field_free(struct config_field *field)
23089 +{
23090 + kfree(field);
23091 +}
23092 +
23093 +int pciback_config_quirk_release(struct pci_dev *dev)
23094 +{
23095 + struct pciback_config_quirk *quirk;
23096 + int ret = 0;
23097 +
23098 + quirk = pciback_find_quirk(dev);
23099 + if (!quirk) {
23100 + ret = -ENXIO;
23101 + goto out;
23102 + }
23103 +
23104 + list_del(&quirk->quirks_list);
23105 + kfree(quirk);
23106 +
23107 + out:
23108 + return ret;
23109 +}
23110 Index: head-2008-11-25/drivers/xen/pciback/conf_space_quirks.h
23111 ===================================================================
23112 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
23113 +++ head-2008-11-25/drivers/xen/pciback/conf_space_quirks.h 2007-06-12 13:13:45.000000000 +0200
23114 @@ -0,0 +1,35 @@
23115 +/*
23116 + * PCI Backend - Data structures for special overlays for broken devices.
23117 + *
23118 + * Ryan Wilson <hap9@epoch.ncsc.mil>
23119 + * Chris Bookholt <hap10@epoch.ncsc.mil>
23120 + */
23121 +
23122 +#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
23123 +#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
23124 +
23125 +#include <linux/pci.h>
23126 +#include <linux/list.h>
23127 +
23128 +struct pciback_config_quirk {
23129 + struct list_head quirks_list;
23130 + struct pci_device_id devid;
23131 + struct pci_dev *pdev;
23132 +};
23133 +
23134 +struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
23135 +
23136 +int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
23137 + *field);
23138 +
23139 +int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
23140 +
23141 +int pciback_config_quirks_init(struct pci_dev *dev);
23142 +
23143 +void pciback_config_field_free(struct config_field *field);
23144 +
23145 +int pciback_config_quirk_release(struct pci_dev *dev);
23146 +
23147 +int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
23148 +
23149 +#endif
23150 Index: head-2008-11-25/drivers/xen/pciback/controller.c
23151 ===================================================================
23152 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
23153 +++ head-2008-11-25/drivers/xen/pciback/controller.c 2008-02-26 10:54:11.000000000 +0100
23154 @@ -0,0 +1,408 @@
23155 +/*
23156 + * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
23157 + * Alex Williamson <alex.williamson@hp.com>
23158 + *
23159 + * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
23160 + * controllers. Devices under the same PCI controller are exposed on the
23161 + * same virtual domain:bus. Within a bus, device slots are virtualized
23162 + * to compact the bus.
23163 + *
23164 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23165 + * This program is free software; you can redistribute it and/or modify
23166 + * it under the terms of the GNU General Public License as published by
23167 + * the Free Software Foundation; either version 2 of the License, or
23168 + * (at your option) any later version.
23169 + *
23170 + * This program is distributed in the hope that it will be useful,
23171 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23172 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23173 + * GNU General Public License for more details.
23174 + *
23175 + * You should have received a copy of the GNU General Public License
23176 + * along with this program; if not, write to the Free Software
23177 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23178 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23179 + */
23180 +
23181 +#include <linux/acpi.h>
23182 +#include <linux/list.h>
23183 +#include <linux/pci.h>
23184 +#include <linux/spinlock.h>
23185 +#include "pciback.h"
23186 +
23187 +#define PCI_MAX_BUSSES 255
23188 +#define PCI_MAX_SLOTS 32
23189 +
23190 +struct controller_dev_entry {
23191 + struct list_head list;
23192 + struct pci_dev *dev;
23193 + unsigned int devfn;
23194 +};
23195 +
23196 +struct controller_list_entry {
23197 + struct list_head list;
23198 + struct pci_controller *controller;
23199 + unsigned int domain;
23200 + unsigned int bus;
23201 + unsigned int next_devfn;
23202 + struct list_head dev_list;
23203 +};
23204 +
23205 +struct controller_dev_data {
23206 + struct list_head list;
23207 + unsigned int next_domain;
23208 + unsigned int next_bus;
23209 + spinlock_t lock;
23210 +};
23211 +
23212 +struct walk_info {
23213 + struct pciback_device *pdev;
23214 + int resource_count;
23215 + int root_num;
23216 +};
23217 +
23218 +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
23219 + unsigned int domain, unsigned int bus,
23220 + unsigned int devfn)
23221 +{
23222 + struct controller_dev_data *dev_data = pdev->pci_dev_data;
23223 + struct controller_dev_entry *dev_entry;
23224 + struct controller_list_entry *cntrl_entry;
23225 + struct pci_dev *dev = NULL;
23226 + unsigned long flags;
23227 +
23228 + spin_lock_irqsave(&dev_data->lock, flags);
23229 +
23230 + list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23231 + if (cntrl_entry->domain != domain ||
23232 + cntrl_entry->bus != bus)
23233 + continue;
23234 +
23235 + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
23236 + if (devfn == dev_entry->devfn) {
23237 + dev = dev_entry->dev;
23238 + goto found;
23239 + }
23240 + }
23241 + }
23242 +found:
23243 + spin_unlock_irqrestore(&dev_data->lock, flags);
23244 +
23245 + return dev;
23246 +}
23247 +
23248 +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
23249 + int devid, publish_pci_dev_cb publish_cb)
23250 +{
23251 + struct controller_dev_data *dev_data = pdev->pci_dev_data;
23252 + struct controller_dev_entry *dev_entry;
23253 + struct controller_list_entry *cntrl_entry;
23254 + struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
23255 + unsigned long flags;
23256 + int ret = 0, found = 0;
23257 +
23258 + spin_lock_irqsave(&dev_data->lock, flags);
23259 +
23260 + /* Look to see if we already have a domain:bus for this controller */
23261 + list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23262 + if (cntrl_entry->controller == dev_controller) {
23263 + found = 1;
23264 + break;
23265 + }
23266 + }
23267 +
23268 + if (!found) {
23269 + cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
23270 + if (!cntrl_entry) {
23271 + ret = -ENOMEM;
23272 + goto out;
23273 + }
23274 +
23275 + cntrl_entry->controller = dev_controller;
23276 + cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
23277 +
23278 + cntrl_entry->domain = dev_data->next_domain;
23279 + cntrl_entry->bus = dev_data->next_bus++;
23280 + if (dev_data->next_bus > PCI_MAX_BUSSES) {
23281 + dev_data->next_domain++;
23282 + dev_data->next_bus = 0;
23283 + }
23284 +
23285 + INIT_LIST_HEAD(&cntrl_entry->dev_list);
23286 +
23287 + list_add_tail(&cntrl_entry->list, &dev_data->list);
23288 + }
23289 +
23290 + if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
23291 + /*
23292 + * While it seems unlikely, this can actually happen if
23293 + * a controller has P2P bridges under it.
23294 + */
23295 + xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
23296 + "is full, no room to export %04x:%02x:%02x.%x",
23297 + cntrl_entry->domain, cntrl_entry->bus,
23298 + pci_domain_nr(dev->bus), dev->bus->number,
23299 + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
23300 + ret = -ENOSPC;
23301 + goto out;
23302 + }
23303 +
23304 + dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
23305 + if (!dev_entry) {
23306 + if (list_empty(&cntrl_entry->dev_list)) {
23307 + list_del(&cntrl_entry->list);
23308 + kfree(cntrl_entry);
23309 + }
23310 + ret = -ENOMEM;
23311 + goto out;
23312 + }
23313 +
23314 + dev_entry->dev = dev;
23315 + dev_entry->devfn = cntrl_entry->next_devfn;
23316 +
23317 + list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
23318 +
23319 + cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
23320 +
23321 +out:
23322 + spin_unlock_irqrestore(&dev_data->lock, flags);
23323 +
23324 + /* TODO: Publish virtual domain:bus:slot.func here. */
23325 +
23326 + return ret;
23327 +}
23328 +
23329 +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
23330 +{
23331 + struct controller_dev_data *dev_data = pdev->pci_dev_data;
23332 + struct controller_list_entry *cntrl_entry;
23333 + struct controller_dev_entry *dev_entry = NULL;
23334 + struct pci_dev *found_dev = NULL;
23335 + unsigned long flags;
23336 +
23337 + spin_lock_irqsave(&dev_data->lock, flags);
23338 +
23339 + list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23340 + if (cntrl_entry->controller != PCI_CONTROLLER(dev))
23341 + continue;
23342 +
23343 + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
23344 + if (dev_entry->dev == dev) {
23345 + found_dev = dev_entry->dev;
23346 + break;
23347 + }
23348 + }
23349 + }
23350 +
23351 + if (!found_dev) {
23352 + spin_unlock_irqrestore(&dev_data->lock, flags);
23353 + return;
23354 + }
23355 +
23356 + list_del(&dev_entry->list);
23357 + kfree(dev_entry);
23358 +
23359 + if (list_empty(&cntrl_entry->dev_list)) {
23360 + list_del(&cntrl_entry->list);
23361 + kfree(cntrl_entry);
23362 + }
23363 +
23364 + spin_unlock_irqrestore(&dev_data->lock, flags);
23365 + pcistub_put_pci_dev(found_dev);
23366 +}
23367 +
23368 +int pciback_init_devices(struct pciback_device *pdev)
23369 +{
23370 + struct controller_dev_data *dev_data;
23371 +
23372 + dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
23373 + if (!dev_data)
23374 + return -ENOMEM;
23375 +
23376 + spin_lock_init(&dev_data->lock);
23377 +
23378 + INIT_LIST_HEAD(&dev_data->list);
23379 +
23380 + /* Starting domain:bus numbers */
23381 + dev_data->next_domain = 0;
23382 + dev_data->next_bus = 0;
23383 +
23384 + pdev->pci_dev_data = dev_data;
23385 +
23386 + return 0;
23387 +}
23388 +
23389 +static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
23390 +{
23391 + struct walk_info *info = data;
23392 + struct acpi_resource_address64 addr;
23393 + acpi_status status;
23394 + int i, len, err;
23395 + char str[32], tmp[3];
23396 + unsigned char *ptr, *buf;
23397 +
23398 + status = acpi_resource_to_address64(res, &addr);
23399 +
23400 + /* Do we care about this range? Let's check. */
23401 + if (!ACPI_SUCCESS(status) ||
23402 + !(addr.resource_type == ACPI_MEMORY_RANGE ||
23403 + addr.resource_type == ACPI_IO_RANGE) ||
23404 + !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
23405 + return AE_OK;
23406 +
23407 + /*
23408 + * Furthermore, we really only care to tell the guest about
23409 + * address ranges that require address translation of some sort.
23410 + */
23411 + if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
23412 + addr.info.mem.translation) &&
23413 + !(addr.resource_type == ACPI_IO_RANGE &&
23414 + addr.info.io.translation))
23415 + return AE_OK;
23416 +
23417 + /* Store the resource in xenbus for the guest */
23418 + len = snprintf(str, sizeof(str), "root-%d-resource-%d",
23419 + info->root_num, info->resource_count);
23420 + if (unlikely(len >= (sizeof(str) - 1)))
23421 + return AE_OK;
23422 +
23423 + buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
23424 + if (!buf)
23425 + return AE_OK;
23426 +
23427 + /* Clean out resource_source */
23428 + res->data.address64.resource_source.index = 0xFF;
23429 + res->data.address64.resource_source.string_length = 0;
23430 + res->data.address64.resource_source.string_ptr = NULL;
23431 +
23432 + ptr = (unsigned char *)res;
23433 +
23434 + /* Turn the acpi_resource into an ASCII byte stream */
23435 + for (i = 0; i < sizeof(*res); i++) {
23436 + snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
23437 + strncat(buf, tmp, 2);
23438 + }
23439 +
23440 + err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
23441 + str, "%s", buf);
23442 +
23443 + if (!err)
23444 + info->resource_count++;
23445 +
23446 + kfree(buf);
23447 +
23448 + return AE_OK;
23449 +}
23450 +
23451 +int pciback_publish_pci_roots(struct pciback_device *pdev,
23452 + publish_pci_root_cb publish_root_cb)
23453 +{
23454 + struct controller_dev_data *dev_data = pdev->pci_dev_data;
23455 + struct controller_list_entry *cntrl_entry;
23456 + int i, root_num, len, err = 0;
23457 + unsigned int domain, bus;
23458 + char str[64];
23459 + struct walk_info info;
23460 +
23461 + spin_lock(&dev_data->lock);
23462 +
23463 + list_for_each_entry(cntrl_entry, &dev_data->list, list) {
23464 + /* First publish all the domain:bus info */
23465 + err = publish_root_cb(pdev, cntrl_entry->domain,
23466 + cntrl_entry->bus);
23467 + if (err)
23468 + goto out;
23469 +
23470 + /*
23471 + * Now figure out which root-%d this belongs to
23472 + * so we can associate resources with it.
23473 + */
23474 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
23475 + "root_num", "%d", &root_num);
23476 +
23477 + if (err != 1)
23478 + goto out;
23479 +
23480 + for (i = 0; i < root_num; i++) {
23481 + len = snprintf(str, sizeof(str), "root-%d", i);
23482 + if (unlikely(len >= (sizeof(str) - 1))) {
23483 + err = -ENOMEM;
23484 + goto out;
23485 + }
23486 +
23487 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
23488 + str, "%x:%x", &domain, &bus);
23489 + if (err != 2)
23490 + goto out;
23491 +
23492 + /* Is this the one we just published? */
23493 + if (domain == cntrl_entry->domain &&
23494 + bus == cntrl_entry->bus)
23495 + break;
23496 + }
23497 +
23498 + if (i == root_num)
23499 + goto out;
23500 +
23501 + info.pdev = pdev;
23502 + info.resource_count = 0;
23503 + info.root_num = i;
23504 +
23505 + /* Let ACPI do the heavy lifting on decoding resources */
23506 + acpi_walk_resources(cntrl_entry->controller->acpi_handle,
23507 + METHOD_NAME__CRS, write_xenbus_resource,
23508 + &info);
23509 +
23510 + /* No resouces. OK. On to the next one */
23511 + if (!info.resource_count)
23512 + continue;
23513 +
23514 + /* Store the number of resources we wrote for this root-%d */
23515 + len = snprintf(str, sizeof(str), "root-%d-resources", i);
23516 + if (unlikely(len >= (sizeof(str) - 1))) {
23517 + err = -ENOMEM;
23518 + goto out;
23519 + }
23520 +
23521 + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
23522 + "%d", info.resource_count);
23523 + if (err)
23524 + goto out;
23525 + }
23526 +
23527 + /* Finally, write some magic to synchronize with the guest. */
23528 + len = snprintf(str, sizeof(str), "root-resource-magic");
23529 + if (unlikely(len >= (sizeof(str) - 1))) {
23530 + err = -ENOMEM;
23531 + goto out;
23532 + }
23533 +
23534 + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
23535 + "%lx", (sizeof(struct acpi_resource) * 2) + 1);
23536 +
23537 +out:
23538 + spin_unlock(&dev_data->lock);
23539 +
23540 + return err;
23541 +}
23542 +
23543 +void pciback_release_devices(struct pciback_device *pdev)
23544 +{
23545 + struct controller_dev_data *dev_data = pdev->pci_dev_data;
23546 + struct controller_list_entry *cntrl_entry, *c;
23547 + struct controller_dev_entry *dev_entry, *d;
23548 +
23549 + list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
23550 + list_for_each_entry_safe(dev_entry, d,
23551 + &cntrl_entry->dev_list, list) {
23552 + list_del(&dev_entry->list);
23553 + pcistub_put_pci_dev(dev_entry->dev);
23554 + kfree(dev_entry);
23555 + }
23556 + list_del(&cntrl_entry->list);
23557 + kfree(cntrl_entry);
23558 + }
23559 +
23560 + kfree(dev_data);
23561 + pdev->pci_dev_data = NULL;
23562 +}
23563 Index: head-2008-11-25/drivers/xen/pciback/passthrough.c
23564 ===================================================================
23565 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
23566 +++ head-2008-11-25/drivers/xen/pciback/passthrough.c 2008-04-02 12:34:02.000000000 +0200
23567 @@ -0,0 +1,166 @@
23568 +/*
23569 + * PCI Backend - Provides restricted access to the real PCI bus topology
23570 + * to the frontend
23571 + *
23572 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
23573 + */
23574 +
23575 +#include <linux/list.h>
23576 +#include <linux/pci.h>
23577 +#include <linux/spinlock.h>
23578 +#include "pciback.h"
23579 +
23580 +struct passthrough_dev_data {
23581 + /* Access to dev_list must be protected by lock */
23582 + struct list_head dev_list;
23583 + spinlock_t lock;
23584 +};
23585 +
23586 +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
23587 + unsigned int domain, unsigned int bus,
23588 + unsigned int devfn)
23589 +{
23590 + struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23591 + struct pci_dev_entry *dev_entry;
23592 + struct pci_dev *dev = NULL;
23593 + unsigned long flags;
23594 +
23595 + spin_lock_irqsave(&dev_data->lock, flags);
23596 +
23597 + list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
23598 + if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
23599 + && bus == (unsigned int)dev_entry->dev->bus->number
23600 + && devfn == dev_entry->dev->devfn) {
23601 + dev = dev_entry->dev;
23602 + break;
23603 + }
23604 + }
23605 +
23606 + spin_unlock_irqrestore(&dev_data->lock, flags);
23607 +
23608 + return dev;
23609 +}
23610 +
23611 +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
23612 + int devid, publish_pci_dev_cb publish_cb)
23613 +{
23614 + struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23615 + struct pci_dev_entry *dev_entry;
23616 + unsigned long flags;
23617 + unsigned int domain, bus, devfn;
23618 + int err;
23619 +
23620 + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
23621 + if (!dev_entry)
23622 + return -ENOMEM;
23623 + dev_entry->dev = dev;
23624 +
23625 + spin_lock_irqsave(&dev_data->lock, flags);
23626 + list_add_tail(&dev_entry->list, &dev_data->dev_list);
23627 + spin_unlock_irqrestore(&dev_data->lock, flags);
23628 +
23629 + /* Publish this device. */
23630 + domain = (unsigned int)pci_domain_nr(dev->bus);
23631 + bus = (unsigned int)dev->bus->number;
23632 + devfn = dev->devfn;
23633 + err = publish_cb(pdev, domain, bus, devfn, devid);
23634 +
23635 + return err;
23636 +}
23637 +
23638 +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
23639 +{
23640 + struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23641 + struct pci_dev_entry *dev_entry, *t;
23642 + struct pci_dev *found_dev = NULL;
23643 + unsigned long flags;
23644 +
23645 + spin_lock_irqsave(&dev_data->lock, flags);
23646 +
23647 + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
23648 + if (dev_entry->dev == dev) {
23649 + list_del(&dev_entry->list);
23650 + found_dev = dev_entry->dev;
23651 + kfree(dev_entry);
23652 + }
23653 + }
23654 +
23655 + spin_unlock_irqrestore(&dev_data->lock, flags);
23656 +
23657 + if (found_dev)
23658 + pcistub_put_pci_dev(found_dev);
23659 +}
23660 +
23661 +int pciback_init_devices(struct pciback_device *pdev)
23662 +{
23663 + struct passthrough_dev_data *dev_data;
23664 +
23665 + dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
23666 + if (!dev_data)
23667 + return -ENOMEM;
23668 +
23669 + spin_lock_init(&dev_data->lock);
23670 +
23671 + INIT_LIST_HEAD(&dev_data->dev_list);
23672 +
23673 + pdev->pci_dev_data = dev_data;
23674 +
23675 + return 0;
23676 +}
23677 +
23678 +int pciback_publish_pci_roots(struct pciback_device *pdev,
23679 + publish_pci_root_cb publish_root_cb)
23680 +{
23681 + int err = 0;
23682 + struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23683 + struct pci_dev_entry *dev_entry, *e;
23684 + struct pci_dev *dev;
23685 + int found;
23686 + unsigned int domain, bus;
23687 +
23688 + spin_lock(&dev_data->lock);
23689 +
23690 + list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
23691 + /* Only publish this device as a root if none of its
23692 + * parent bridges are exported
23693 + */
23694 + found = 0;
23695 + dev = dev_entry->dev->bus->self;
23696 + for (; !found && dev != NULL; dev = dev->bus->self) {
23697 + list_for_each_entry(e, &dev_data->dev_list, list) {
23698 + if (dev == e->dev) {
23699 + found = 1;
23700 + break;
23701 + }
23702 + }
23703 + }
23704 +
23705 + domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
23706 + bus = (unsigned int)dev_entry->dev->bus->number;
23707 +
23708 + if (!found) {
23709 + err = publish_root_cb(pdev, domain, bus);
23710 + if (err)
23711 + break;
23712 + }
23713 + }
23714 +
23715 + spin_unlock(&dev_data->lock);
23716 +
23717 + return err;
23718 +}
23719 +
23720 +void pciback_release_devices(struct pciback_device *pdev)
23721 +{
23722 + struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
23723 + struct pci_dev_entry *dev_entry, *t;
23724 +
23725 + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
23726 + list_del(&dev_entry->list);
23727 + pcistub_put_pci_dev(dev_entry->dev);
23728 + kfree(dev_entry);
23729 + }
23730 +
23731 + kfree(dev_data);
23732 + pdev->pci_dev_data = NULL;
23733 +}
23734 Index: head-2008-11-25/drivers/xen/pciback/pci_stub.c
23735 ===================================================================
23736 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
23737 +++ head-2008-11-25/drivers/xen/pciback/pci_stub.c 2008-10-29 09:55:56.000000000 +0100
23738 @@ -0,0 +1,948 @@
23739 +/*
23740 + * PCI Stub Driver - Grabs devices in backend to be exported later
23741 + *
23742 + * Ryan Wilson <hap9@epoch.ncsc.mil>
23743 + * Chris Bookholt <hap10@epoch.ncsc.mil>
23744 + */
23745 +#include <linux/module.h>
23746 +#include <linux/init.h>
23747 +#include <linux/list.h>
23748 +#include <linux/spinlock.h>
23749 +#include <linux/kref.h>
23750 +#include <asm/atomic.h>
23751 +#include "pciback.h"
23752 +#include "conf_space.h"
23753 +#include "conf_space_quirks.h"
23754 +
23755 +static char *pci_devs_to_hide = NULL;
23756 +module_param_named(hide, pci_devs_to_hide, charp, 0444);
23757 +
23758 +struct pcistub_device_id {
23759 + struct list_head slot_list;
23760 + int domain;
23761 + unsigned char bus;
23762 + unsigned int devfn;
23763 +};
23764 +static LIST_HEAD(pcistub_device_ids);
23765 +static DEFINE_SPINLOCK(device_ids_lock);
23766 +
23767 +struct pcistub_device {
23768 + struct kref kref;
23769 + struct list_head dev_list;
23770 + spinlock_t lock;
23771 +
23772 + struct pci_dev *dev;
23773 + struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */
23774 +};
23775 +
23776 +/* Access to pcistub_devices & seized_devices lists and the initialize_devices
23777 + * flag must be locked with pcistub_devices_lock
23778 + */
23779 +static DEFINE_SPINLOCK(pcistub_devices_lock);
23780 +static LIST_HEAD(pcistub_devices);
23781 +
23782 +/* wait for device_initcall before initializing our devices
23783 + * (see pcistub_init_devices_late)
23784 + */
23785 +static int initialize_devices = 0;
23786 +static LIST_HEAD(seized_devices);
23787 +
23788 +static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
23789 +{
23790 + struct pcistub_device *psdev;
23791 +
23792 + dev_dbg(&dev->dev, "pcistub_device_alloc\n");
23793 +
23794 + psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
23795 + if (!psdev)
23796 + return NULL;
23797 +
23798 + psdev->dev = pci_dev_get(dev);
23799 + if (!psdev->dev) {
23800 + kfree(psdev);
23801 + return NULL;
23802 + }
23803 +
23804 + kref_init(&psdev->kref);
23805 + spin_lock_init(&psdev->lock);
23806 +
23807 + return psdev;
23808 +}
23809 +
23810 +/* Don't call this directly as it's called by pcistub_device_put */
23811 +static void pcistub_device_release(struct kref *kref)
23812 +{
23813 + struct pcistub_device *psdev;
23814 +
23815 + psdev = container_of(kref, struct pcistub_device, kref);
23816 +
23817 + dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
23818 +
23819 + /* Clean-up the device */
23820 + pciback_reset_device(psdev->dev);
23821 + pciback_config_free_dyn_fields(psdev->dev);
23822 + pciback_config_free_dev(psdev->dev);
23823 + kfree(pci_get_drvdata(psdev->dev));
23824 + pci_set_drvdata(psdev->dev, NULL);
23825 +
23826 + pci_dev_put(psdev->dev);
23827 +
23828 + kfree(psdev);
23829 +}
23830 +
23831 +static inline void pcistub_device_get(struct pcistub_device *psdev)
23832 +{
23833 + kref_get(&psdev->kref);
23834 +}
23835 +
23836 +static inline void pcistub_device_put(struct pcistub_device *psdev)
23837 +{
23838 + kref_put(&psdev->kref, pcistub_device_release);
23839 +}
23840 +
23841 +static struct pcistub_device *pcistub_device_find(int domain, int bus,
23842 + int slot, int func)
23843 +{
23844 + struct pcistub_device *psdev = NULL;
23845 + unsigned long flags;
23846 +
23847 + spin_lock_irqsave(&pcistub_devices_lock, flags);
23848 +
23849 + list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23850 + if (psdev->dev != NULL
23851 + && domain == pci_domain_nr(psdev->dev->bus)
23852 + && bus == psdev->dev->bus->number
23853 + && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
23854 + pcistub_device_get(psdev);
23855 + goto out;
23856 + }
23857 + }
23858 +
23859 + /* didn't find it */
23860 + psdev = NULL;
23861 +
23862 + out:
23863 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23864 + return psdev;
23865 +}
23866 +
23867 +static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
23868 + struct pcistub_device *psdev)
23869 +{
23870 + struct pci_dev *pci_dev = NULL;
23871 + unsigned long flags;
23872 +
23873 + pcistub_device_get(psdev);
23874 +
23875 + spin_lock_irqsave(&psdev->lock, flags);
23876 + if (!psdev->pdev) {
23877 + psdev->pdev = pdev;
23878 + pci_dev = psdev->dev;
23879 + }
23880 + spin_unlock_irqrestore(&psdev->lock, flags);
23881 +
23882 + if (!pci_dev)
23883 + pcistub_device_put(psdev);
23884 +
23885 + return pci_dev;
23886 +}
23887 +
23888 +struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
23889 + int domain, int bus,
23890 + int slot, int func)
23891 +{
23892 + struct pcistub_device *psdev;
23893 + struct pci_dev *found_dev = NULL;
23894 + unsigned long flags;
23895 +
23896 + spin_lock_irqsave(&pcistub_devices_lock, flags);
23897 +
23898 + list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23899 + if (psdev->dev != NULL
23900 + && domain == pci_domain_nr(psdev->dev->bus)
23901 + && bus == psdev->dev->bus->number
23902 + && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
23903 + found_dev = pcistub_device_get_pci_dev(pdev, psdev);
23904 + break;
23905 + }
23906 + }
23907 +
23908 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23909 + return found_dev;
23910 +}
23911 +
23912 +struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
23913 + struct pci_dev *dev)
23914 +{
23915 + struct pcistub_device *psdev;
23916 + struct pci_dev *found_dev = NULL;
23917 + unsigned long flags;
23918 +
23919 + spin_lock_irqsave(&pcistub_devices_lock, flags);
23920 +
23921 + list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23922 + if (psdev->dev == dev) {
23923 + found_dev = pcistub_device_get_pci_dev(pdev, psdev);
23924 + break;
23925 + }
23926 + }
23927 +
23928 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23929 + return found_dev;
23930 +}
23931 +
23932 +void pcistub_put_pci_dev(struct pci_dev *dev)
23933 +{
23934 + struct pcistub_device *psdev, *found_psdev = NULL;
23935 + unsigned long flags;
23936 +
23937 + spin_lock_irqsave(&pcistub_devices_lock, flags);
23938 +
23939 + list_for_each_entry(psdev, &pcistub_devices, dev_list) {
23940 + if (psdev->dev == dev) {
23941 + found_psdev = psdev;
23942 + break;
23943 + }
23944 + }
23945 +
23946 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
23947 +
23948 + /* Cleanup our device
23949 + * (so it's ready for the next domain)
23950 + */
23951 + pciback_reset_device(found_psdev->dev);
23952 + pciback_config_free_dyn_fields(found_psdev->dev);
23953 + pciback_config_reset_dev(found_psdev->dev);
23954 +
23955 + spin_lock_irqsave(&found_psdev->lock, flags);
23956 + found_psdev->pdev = NULL;
23957 + spin_unlock_irqrestore(&found_psdev->lock, flags);
23958 +
23959 + pcistub_device_put(found_psdev);
23960 +}
23961 +
23962 +static int __devinit pcistub_match_one(struct pci_dev *dev,
23963 + struct pcistub_device_id *pdev_id)
23964 +{
23965 + /* Match the specified device by domain, bus, slot, func and also if
23966 + * any of the device's parent bridges match.
23967 + */
23968 + for (; dev != NULL; dev = dev->bus->self) {
23969 + if (pci_domain_nr(dev->bus) == pdev_id->domain
23970 + && dev->bus->number == pdev_id->bus
23971 + && dev->devfn == pdev_id->devfn)
23972 + return 1;
23973 +
23974 + /* Sometimes topmost bridge links to itself. */
23975 + if (dev == dev->bus->self)
23976 + break;
23977 + }
23978 +
23979 + return 0;
23980 +}
23981 +
23982 +static int __devinit pcistub_match(struct pci_dev *dev)
23983 +{
23984 + struct pcistub_device_id *pdev_id;
23985 + unsigned long flags;
23986 + int found = 0;
23987 +
23988 + spin_lock_irqsave(&device_ids_lock, flags);
23989 + list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
23990 + if (pcistub_match_one(dev, pdev_id)) {
23991 + found = 1;
23992 + break;
23993 + }
23994 + }
23995 + spin_unlock_irqrestore(&device_ids_lock, flags);
23996 +
23997 + return found;
23998 +}
23999 +
24000 +static int __devinit pcistub_init_device(struct pci_dev *dev)
24001 +{
24002 + struct pciback_dev_data *dev_data;
24003 + int err = 0;
24004 +
24005 + dev_dbg(&dev->dev, "initializing...\n");
24006 +
24007 + /* The PCI backend is not intended to be a module (or to work with
24008 + * removable PCI devices (yet). If it were, pciback_config_free()
24009 + * would need to be called somewhere to free the memory allocated
24010 + * here and then to call kfree(pci_get_drvdata(psdev->dev)).
24011 + */
24012 + dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
24013 + if (!dev_data) {
24014 + err = -ENOMEM;
24015 + goto out;
24016 + }
24017 + pci_set_drvdata(dev, dev_data);
24018 +
24019 + dev_dbg(&dev->dev, "initializing config\n");
24020 + err = pciback_config_init_dev(dev);
24021 + if (err)
24022 + goto out;
24023 +
24024 + /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
24025 + * must do this here because pcibios_enable_device may specify
24026 + * the pci device's true irq (and possibly its other resources)
24027 + * if they differ from what's in the configuration space.
24028 + * This makes the assumption that the device's resources won't
24029 + * change after this point (otherwise this code may break!)
24030 + */
24031 + dev_dbg(&dev->dev, "enabling device\n");
24032 + err = pci_enable_device(dev);
24033 + if (err)
24034 + goto config_release;
24035 +
24036 + /* Now disable the device (this also ensures some private device
24037 + * data is setup before we export)
24038 + */
24039 + dev_dbg(&dev->dev, "reset device\n");
24040 + pciback_reset_device(dev);
24041 +
24042 + return 0;
24043 +
24044 + config_release:
24045 + pciback_config_free_dev(dev);
24046 +
24047 + out:
24048 + pci_set_drvdata(dev, NULL);
24049 + kfree(dev_data);
24050 + return err;
24051 +}
24052 +
24053 +/*
24054 + * Because some initialization still happens on
24055 + * devices during fs_initcall, we need to defer
24056 + * full initialization of our devices until
24057 + * device_initcall.
24058 + */
24059 +static int __init pcistub_init_devices_late(void)
24060 +{
24061 + struct pcistub_device *psdev;
24062 + unsigned long flags;
24063 + int err = 0;
24064 +
24065 + pr_debug("pciback: pcistub_init_devices_late\n");
24066 +
24067 + spin_lock_irqsave(&pcistub_devices_lock, flags);
24068 +
24069 + while (!list_empty(&seized_devices)) {
24070 + psdev = container_of(seized_devices.next,
24071 + struct pcistub_device, dev_list);
24072 + list_del(&psdev->dev_list);
24073 +
24074 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24075 +
24076 + err = pcistub_init_device(psdev->dev);
24077 + if (err) {
24078 + dev_err(&psdev->dev->dev,
24079 + "error %d initializing device\n", err);
24080 + kfree(psdev);
24081 + psdev = NULL;
24082 + }
24083 +
24084 + spin_lock_irqsave(&pcistub_devices_lock, flags);
24085 +
24086 + if (psdev)
24087 + list_add_tail(&psdev->dev_list, &pcistub_devices);
24088 + }
24089 +
24090 + initialize_devices = 1;
24091 +
24092 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24093 +
24094 + return 0;
24095 +}
24096 +
24097 +static int __devinit pcistub_seize(struct pci_dev *dev)
24098 +{
24099 + struct pcistub_device *psdev;
24100 + unsigned long flags;
24101 + int err = 0;
24102 +
24103 + psdev = pcistub_device_alloc(dev);
24104 + if (!psdev)
24105 + return -ENOMEM;
24106 +
24107 + spin_lock_irqsave(&pcistub_devices_lock, flags);
24108 +
24109 + if (initialize_devices) {
24110 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24111 +
24112 + /* don't want irqs disabled when calling pcistub_init_device */
24113 + err = pcistub_init_device(psdev->dev);
24114 +
24115 + spin_lock_irqsave(&pcistub_devices_lock, flags);
24116 +
24117 + if (!err)
24118 + list_add(&psdev->dev_list, &pcistub_devices);
24119 + } else {
24120 + dev_dbg(&dev->dev, "deferring initialization\n");
24121 + list_add(&psdev->dev_list, &seized_devices);
24122 + }
24123 +
24124 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24125 +
24126 + if (err)
24127 + pcistub_device_put(psdev);
24128 +
24129 + return err;
24130 +}
24131 +
24132 +static int __devinit pcistub_probe(struct pci_dev *dev,
24133 + const struct pci_device_id *id)
24134 +{
24135 + int err = 0;
24136 +
24137 + dev_dbg(&dev->dev, "probing...\n");
24138 +
24139 + if (pcistub_match(dev)) {
24140 +
24141 + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
24142 + && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
24143 + dev_err(&dev->dev, "can't export pci devices that "
24144 + "don't have a normal (0) or bridge (1) "
24145 + "header type!\n");
24146 + err = -ENODEV;
24147 + goto out;
24148 + }
24149 +
24150 + dev_info(&dev->dev, "seizing device\n");
24151 + err = pcistub_seize(dev);
24152 + } else
24153 + /* Didn't find the device */
24154 + err = -ENODEV;
24155 +
24156 + out:
24157 + return err;
24158 +}
24159 +
24160 +static void pcistub_remove(struct pci_dev *dev)
24161 +{
24162 + struct pcistub_device *psdev, *found_psdev = NULL;
24163 + unsigned long flags;
24164 +
24165 + dev_dbg(&dev->dev, "removing\n");
24166 +
24167 + spin_lock_irqsave(&pcistub_devices_lock, flags);
24168 +
24169 + pciback_config_quirk_release(dev);
24170 +
24171 + list_for_each_entry(psdev, &pcistub_devices, dev_list) {
24172 + if (psdev->dev == dev) {
24173 + found_psdev = psdev;
24174 + break;
24175 + }
24176 + }
24177 +
24178 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24179 +
24180 + if (found_psdev) {
24181 + dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
24182 + found_psdev->pdev);
24183 +
24184 + if (found_psdev->pdev) {
24185 + printk(KERN_WARNING "pciback: ****** removing device "
24186 + "%s while still in-use! ******\n",
24187 + pci_name(found_psdev->dev));
24188 + printk(KERN_WARNING "pciback: ****** driver domain may "
24189 + "still access this device's i/o resources!\n");
24190 + printk(KERN_WARNING "pciback: ****** shutdown driver "
24191 + "domain before binding device\n");
24192 + printk(KERN_WARNING "pciback: ****** to other drivers "
24193 + "or domains\n");
24194 +
24195 + pciback_release_pci_dev(found_psdev->pdev,
24196 + found_psdev->dev);
24197 + }
24198 +
24199 + spin_lock_irqsave(&pcistub_devices_lock, flags);
24200 + list_del(&found_psdev->dev_list);
24201 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24202 +
24203 + /* the final put for releasing from the list */
24204 + pcistub_device_put(found_psdev);
24205 + }
24206 +}
24207 +
24208 +static const struct pci_device_id pcistub_ids[] = {
24209 + {
24210 + .vendor = PCI_ANY_ID,
24211 + .device = PCI_ANY_ID,
24212 + .subvendor = PCI_ANY_ID,
24213 + .subdevice = PCI_ANY_ID,
24214 + },
24215 + {0,},
24216 +};
24217 +
24218 +/*
24219 + * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
24220 + * for a normal device. I don't want it to be loaded automatically.
24221 + */
24222 +
24223 +static struct pci_driver pciback_pci_driver = {
24224 + .name = "pciback",
24225 + .id_table = pcistub_ids,
24226 + .probe = pcistub_probe,
24227 + .remove = pcistub_remove,
24228 +};
24229 +
24230 +static inline int str_to_slot(const char *buf, int *domain, int *bus,
24231 + int *slot, int *func)
24232 +{
24233 + int err;
24234 +
24235 + err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
24236 + if (err == 4)
24237 + return 0;
24238 + else if (err < 0)
24239 + return -EINVAL;
24240 +
24241 + /* try again without domain */
24242 + *domain = 0;
24243 + err = sscanf(buf, " %x:%x.%x", bus, slot, func);
24244 + if (err == 3)
24245 + return 0;
24246 +
24247 + return -EINVAL;
24248 +}
24249 +
24250 +static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
24251 + *slot, int *func, int *reg, int *size, int *mask)
24252 +{
24253 + int err;
24254 +
24255 + err =
24256 + sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
24257 + func, reg, size, mask);
24258 + if (err == 7)
24259 + return 0;
24260 + return -EINVAL;
24261 +}
24262 +
24263 +static int pcistub_device_id_add(int domain, int bus, int slot, int func)
24264 +{
24265 + struct pcistub_device_id *pci_dev_id;
24266 + unsigned long flags;
24267 +
24268 + pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
24269 + if (!pci_dev_id)
24270 + return -ENOMEM;
24271 +
24272 + pci_dev_id->domain = domain;
24273 + pci_dev_id->bus = bus;
24274 + pci_dev_id->devfn = PCI_DEVFN(slot, func);
24275 +
24276 + pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
24277 + domain, bus, slot, func);
24278 +
24279 + spin_lock_irqsave(&device_ids_lock, flags);
24280 + list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
24281 + spin_unlock_irqrestore(&device_ids_lock, flags);
24282 +
24283 + return 0;
24284 +}
24285 +
24286 +static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
24287 +{
24288 + struct pcistub_device_id *pci_dev_id, *t;
24289 + int devfn = PCI_DEVFN(slot, func);
24290 + int err = -ENOENT;
24291 + unsigned long flags;
24292 +
24293 + spin_lock_irqsave(&device_ids_lock, flags);
24294 + list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
24295 +
24296 + if (pci_dev_id->domain == domain
24297 + && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
24298 + /* Don't break; here because it's possible the same
24299 + * slot could be in the list more than once
24300 + */
24301 + list_del(&pci_dev_id->slot_list);
24302 + kfree(pci_dev_id);
24303 +
24304 + err = 0;
24305 +
24306 + pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
24307 + "seize list\n", domain, bus, slot, func);
24308 + }
24309 + }
24310 + spin_unlock_irqrestore(&device_ids_lock, flags);
24311 +
24312 + return err;
24313 +}
24314 +
24315 +static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
24316 + int size, int mask)
24317 +{
24318 + int err = 0;
24319 + struct pcistub_device *psdev;
24320 + struct pci_dev *dev;
24321 + struct config_field *field;
24322 +
24323 + psdev = pcistub_device_find(domain, bus, slot, func);
24324 + if (!psdev || !psdev->dev) {
24325 + err = -ENODEV;
24326 + goto out;
24327 + }
24328 + dev = psdev->dev;
24329 +
24330 + field = kzalloc(sizeof(*field), GFP_ATOMIC);
24331 + if (!field) {
24332 + err = -ENOMEM;
24333 + goto out;
24334 + }
24335 +
24336 + field->offset = reg;
24337 + field->size = size;
24338 + field->mask = mask;
24339 + field->init = NULL;
24340 + field->reset = NULL;
24341 + field->release = NULL;
24342 + field->clean = pciback_config_field_free;
24343 +
24344 + err = pciback_config_quirks_add_field(dev, field);
24345 + if (err)
24346 + kfree(field);
24347 + out:
24348 + return err;
24349 +}
24350 +
24351 +static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
24352 + size_t count)
24353 +{
24354 + int domain, bus, slot, func;
24355 + int err;
24356 +
24357 + err = str_to_slot(buf, &domain, &bus, &slot, &func);
24358 + if (err)
24359 + goto out;
24360 +
24361 + err = pcistub_device_id_add(domain, bus, slot, func);
24362 +
24363 + out:
24364 + if (!err)
24365 + err = count;
24366 + return err;
24367 +}
24368 +
24369 +DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
24370 +
24371 +static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
24372 + size_t count)
24373 +{
24374 + int domain, bus, slot, func;
24375 + int err;
24376 +
24377 + err = str_to_slot(buf, &domain, &bus, &slot, &func);
24378 + if (err)
24379 + goto out;
24380 +
24381 + err = pcistub_device_id_remove(domain, bus, slot, func);
24382 +
24383 + out:
24384 + if (!err)
24385 + err = count;
24386 + return err;
24387 +}
24388 +
24389 +DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
24390 +
24391 +static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
24392 +{
24393 + struct pcistub_device_id *pci_dev_id;
24394 + size_t count = 0;
24395 + unsigned long flags;
24396 +
24397 + spin_lock_irqsave(&device_ids_lock, flags);
24398 + list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
24399 + if (count >= PAGE_SIZE)
24400 + break;
24401 +
24402 + count += scnprintf(buf + count, PAGE_SIZE - count,
24403 + "%04x:%02x:%02x.%01x\n",
24404 + pci_dev_id->domain, pci_dev_id->bus,
24405 + PCI_SLOT(pci_dev_id->devfn),
24406 + PCI_FUNC(pci_dev_id->devfn));
24407 + }
24408 + spin_unlock_irqrestore(&device_ids_lock, flags);
24409 +
24410 + return count;
24411 +}
24412 +
24413 +DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
24414 +
24415 +static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
24416 + size_t count)
24417 +{
24418 + int domain, bus, slot, func, reg, size, mask;
24419 + int err;
24420 +
24421 + err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
24422 + &mask);
24423 + if (err)
24424 + goto out;
24425 +
24426 + err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
24427 +
24428 + out:
24429 + if (!err)
24430 + err = count;
24431 + return err;
24432 +}
24433 +
24434 +static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
24435 +{
24436 + int count = 0;
24437 + unsigned long flags;
24438 + extern struct list_head pciback_quirks;
24439 + struct pciback_config_quirk *quirk;
24440 + struct pciback_dev_data *dev_data;
24441 + const struct config_field *field;
24442 + const struct config_field_entry *cfg_entry;
24443 +
24444 + spin_lock_irqsave(&device_ids_lock, flags);
24445 + list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
24446 + if (count >= PAGE_SIZE)
24447 + goto out;
24448 +
24449 + count += scnprintf(buf + count, PAGE_SIZE - count,
24450 + "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
24451 + quirk->pdev->bus->number,
24452 + PCI_SLOT(quirk->pdev->devfn),
24453 + PCI_FUNC(quirk->pdev->devfn),
24454 + quirk->devid.vendor, quirk->devid.device,
24455 + quirk->devid.subvendor,
24456 + quirk->devid.subdevice);
24457 +
24458 + dev_data = pci_get_drvdata(quirk->pdev);
24459 +
24460 + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
24461 + field = cfg_entry->field;
24462 + if (count >= PAGE_SIZE)
24463 + goto out;
24464 +
24465 + count += scnprintf(buf + count, PAGE_SIZE - count,
24466 + "\t\t%08x:%01x:%08x\n",
24467 + cfg_entry->base_offset + field->offset,
24468 + field->size, field->mask);
24469 + }
24470 + }
24471 +
24472 + out:
24473 + spin_unlock_irqrestore(&device_ids_lock, flags);
24474 +
24475 + return count;
24476 +}
24477 +
24478 +DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
24479 +
24480 +static ssize_t permissive_add(struct device_driver *drv, const char *buf,
24481 + size_t count)
24482 +{
24483 + int domain, bus, slot, func;
24484 + int err;
24485 + struct pcistub_device *psdev;
24486 + struct pciback_dev_data *dev_data;
24487 + err = str_to_slot(buf, &domain, &bus, &slot, &func);
24488 + if (err)
24489 + goto out;
24490 + psdev = pcistub_device_find(domain, bus, slot, func);
24491 + if (!psdev) {
24492 + err = -ENODEV;
24493 + goto out;
24494 + }
24495 + if (!psdev->dev) {
24496 + err = -ENODEV;
24497 + goto release;
24498 + }
24499 + dev_data = pci_get_drvdata(psdev->dev);
24500 + /* the driver data for a device should never be null at this point */
24501 + if (!dev_data) {
24502 + err = -ENXIO;
24503 + goto release;
24504 + }
24505 + if (!dev_data->permissive) {
24506 + dev_data->permissive = 1;
24507 + /* Let user know that what they're doing could be unsafe */
24508 + dev_warn(&psdev->dev->dev,
24509 + "enabling permissive mode configuration space accesses!\n");
24510 + dev_warn(&psdev->dev->dev,
24511 + "permissive mode is potentially unsafe!\n");
24512 + }
24513 + release:
24514 + pcistub_device_put(psdev);
24515 + out:
24516 + if (!err)
24517 + err = count;
24518 + return err;
24519 +}
24520 +
24521 +static ssize_t permissive_show(struct device_driver *drv, char *buf)
24522 +{
24523 + struct pcistub_device *psdev;
24524 + struct pciback_dev_data *dev_data;
24525 + size_t count = 0;
24526 + unsigned long flags;
24527 + spin_lock_irqsave(&pcistub_devices_lock, flags);
24528 + list_for_each_entry(psdev, &pcistub_devices, dev_list) {
24529 + if (count >= PAGE_SIZE)
24530 + break;
24531 + if (!psdev->dev)
24532 + continue;
24533 + dev_data = pci_get_drvdata(psdev->dev);
24534 + if (!dev_data || !dev_data->permissive)
24535 + continue;
24536 + count +=
24537 + scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
24538 + pci_name(psdev->dev));
24539 + }
24540 + spin_unlock_irqrestore(&pcistub_devices_lock, flags);
24541 + return count;
24542 +}
24543 +
24544 +DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
24545 +
24546 +#ifdef CONFIG_PCI_MSI
24547 +
24548 +int pciback_get_owner(struct pci_dev *dev)
24549 +{
24550 + struct pcistub_device *psdev;
24551 +
24552 + psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
24553 + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
24554 +
24555 + if (!psdev || !psdev->pdev)
24556 + return -1;
24557 +
24558 + return psdev->pdev->xdev->otherend_id;
24559 +}
24560 +#endif
24561 +
24562 +static void pcistub_exit(void)
24563 +{
24564 + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
24565 + driver_remove_file(&pciback_pci_driver.driver,
24566 + &driver_attr_remove_slot);
24567 + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
24568 + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
24569 + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
24570 +
24571 + pci_unregister_driver(&pciback_pci_driver);
24572 + WARN_ON(unregister_msi_get_owner(pciback_get_owner));
24573 +}
24574 +
24575 +static int __init pcistub_init(void)
24576 +{
24577 + int pos = 0;
24578 + int err = 0;
24579 + int domain, bus, slot, func;
24580 + int parsed;
24581 +
24582 + if (pci_devs_to_hide && *pci_devs_to_hide) {
24583 + do {
24584 + parsed = 0;
24585 +
24586 + err = sscanf(pci_devs_to_hide + pos,
24587 + " (%x:%x:%x.%x) %n",
24588 + &domain, &bus, &slot, &func, &parsed);
24589 + if (err != 4) {
24590 + domain = 0;
24591 + err = sscanf(pci_devs_to_hide + pos,
24592 + " (%x:%x.%x) %n",
24593 + &bus, &slot, &func, &parsed);
24594 + if (err != 3)
24595 + goto parse_error;
24596 + }
24597 +
24598 + err = pcistub_device_id_add(domain, bus, slot, func);
24599 + if (err)
24600 + goto out;
24601 +
24602 + /* if parsed<=0, we've reached the end of the string */
24603 + pos += parsed;
24604 + } while (parsed > 0 && pci_devs_to_hide[pos]);
24605 + }
24606 +
24607 + /* If we're the first PCI Device Driver to register, we're the
24608 + * first one to get offered PCI devices as they become
24609 + * available (and thus we can be the first to grab them)
24610 + */
24611 + err = pci_register_driver(&pciback_pci_driver);
24612 + if (err < 0)
24613 + goto out;
24614 +
24615 + err = driver_create_file(&pciback_pci_driver.driver,
24616 + &driver_attr_new_slot);
24617 + if (!err)
24618 + err = driver_create_file(&pciback_pci_driver.driver,
24619 + &driver_attr_remove_slot);
24620 + if (!err)
24621 + err = driver_create_file(&pciback_pci_driver.driver,
24622 + &driver_attr_slots);
24623 + if (!err)
24624 + err = driver_create_file(&pciback_pci_driver.driver,
24625 + &driver_attr_quirks);
24626 + if (!err)
24627 + err = driver_create_file(&pciback_pci_driver.driver,
24628 + &driver_attr_permissive);
24629 +
24630 + if (!err)
24631 + err = register_msi_get_owner(pciback_get_owner);
24632 + if (err)
24633 + pcistub_exit();
24634 +
24635 + out:
24636 + return err;
24637 +
24638 + parse_error:
24639 + printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
24640 + pci_devs_to_hide + pos);
24641 + return -EINVAL;
24642 +}
24643 +
24644 +#ifndef MODULE
24645 +/*
24646 + * fs_initcall happens before device_initcall
24647 + * so pciback *should* get called first (b/c we
24648 + * want to suck up any device before other drivers
24649 + * get a chance by being the first pci device
24650 + * driver to register)
24651 + */
24652 +fs_initcall(pcistub_init);
24653 +#endif
24654 +
24655 +static int __init pciback_init(void)
24656 +{
24657 + int err;
24658 +
24659 + err = pciback_config_init();
24660 + if (err)
24661 + return err;
24662 +
24663 +#ifdef MODULE
24664 + err = pcistub_init();
24665 + if (err < 0)
24666 + return err;
24667 +#endif
24668 +
24669 + pcistub_init_devices_late();
24670 + err = pciback_xenbus_register();
24671 + if (err)
24672 + pcistub_exit();
24673 +
24674 + return err;
24675 +}
24676 +
24677 +static void __exit pciback_cleanup(void)
24678 +{
24679 + pciback_xenbus_unregister();
24680 + pcistub_exit();
24681 +}
24682 +
24683 +module_init(pciback_init);
24684 +module_exit(pciback_cleanup);
24685 +
24686 +MODULE_LICENSE("Dual BSD/GPL");
24687 Index: head-2008-11-25/drivers/xen/pciback/pciback.h
24688 ===================================================================
24689 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
24690 +++ head-2008-11-25/drivers/xen/pciback/pciback.h 2008-07-21 11:00:33.000000000 +0200
24691 @@ -0,0 +1,111 @@
24692 +/*
24693 + * PCI Backend Common Data Structures & Function Declarations
24694 + *
24695 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
24696 + */
24697 +#ifndef __XEN_PCIBACK_H__
24698 +#define __XEN_PCIBACK_H__
24699 +
24700 +#include <linux/pci.h>
24701 +#include <linux/interrupt.h>
24702 +#include <xen/xenbus.h>
24703 +#include <linux/list.h>
24704 +#include <linux/spinlock.h>
24705 +#include <linux/workqueue.h>
24706 +#include <asm/atomic.h>
24707 +#include <xen/interface/io/pciif.h>
24708 +
24709 +struct pci_dev_entry {
24710 + struct list_head list;
24711 + struct pci_dev *dev;
24712 +};
24713 +
24714 +#define _PDEVF_op_active (0)
24715 +#define PDEVF_op_active (1<<(_PDEVF_op_active))
24716 +
24717 +struct pciback_device {
24718 + void *pci_dev_data;
24719 + spinlock_t dev_lock;
24720 +
24721 + struct xenbus_device *xdev;
24722 +
24723 + struct xenbus_watch be_watch;
24724 + u8 be_watching;
24725 +
24726 + int evtchn_irq;
24727 +
24728 + struct vm_struct *sh_area;
24729 + struct xen_pci_sharedinfo *sh_info;
24730 +
24731 + unsigned long flags;
24732 +
24733 + struct work_struct op_work;
24734 +};
24735 +
24736 +struct pciback_dev_data {
24737 + struct list_head config_fields;
24738 + int permissive;
24739 + int warned_on_write;
24740 +};
24741 +
24742 +/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
24743 +struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
24744 + int domain, int bus,
24745 + int slot, int func);
24746 +struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
24747 + struct pci_dev *dev);
24748 +void pcistub_put_pci_dev(struct pci_dev *dev);
24749 +
24750 +/* Ensure a device is turned off or reset */
24751 +void pciback_reset_device(struct pci_dev *pdev);
24752 +
24753 +/* Access a virtual configuration space for a PCI device */
24754 +int pciback_config_init(void);
24755 +int pciback_config_init_dev(struct pci_dev *dev);
24756 +void pciback_config_free_dyn_fields(struct pci_dev *dev);
24757 +void pciback_config_reset_dev(struct pci_dev *dev);
24758 +void pciback_config_free_dev(struct pci_dev *dev);
24759 +int pciback_config_read(struct pci_dev *dev, int offset, int size,
24760 + u32 * ret_val);
24761 +int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
24762 +
24763 +/* Handle requests for specific devices from the frontend */
24764 +typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
24765 + unsigned int domain, unsigned int bus,
24766 + unsigned int devfn, unsigned int devid);
24767 +typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
24768 + unsigned int domain, unsigned int bus);
24769 +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
24770 + int devid, publish_pci_dev_cb publish_cb);
24771 +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
24772 +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
24773 + unsigned int domain, unsigned int bus,
24774 + unsigned int devfn);
24775 +int pciback_init_devices(struct pciback_device *pdev);
24776 +int pciback_publish_pci_roots(struct pciback_device *pdev,
24777 + publish_pci_root_cb cb);
24778 +void pciback_release_devices(struct pciback_device *pdev);
24779 +
24780 +/* Handles events from front-end */
24781 +irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
24782 +void pciback_do_op(void *data);
24783 +
24784 +int pciback_xenbus_register(void);
24785 +void pciback_xenbus_unregister(void);
24786 +
24787 +#ifdef CONFIG_PCI_MSI
24788 +int pciback_enable_msi(struct pciback_device *pdev,
24789 + struct pci_dev *dev, struct xen_pci_op *op);
24790 +
24791 +int pciback_disable_msi(struct pciback_device *pdev,
24792 + struct pci_dev *dev, struct xen_pci_op *op);
24793 +
24794 +
24795 +int pciback_enable_msix(struct pciback_device *pdev,
24796 + struct pci_dev *dev, struct xen_pci_op *op);
24797 +
24798 +int pciback_disable_msix(struct pciback_device *pdev,
24799 + struct pci_dev *dev, struct xen_pci_op *op);
24800 +#endif
24801 +extern int verbose_request;
24802 +#endif
24803 Index: head-2008-11-25/drivers/xen/pciback/pciback_ops.c
24804 ===================================================================
24805 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
24806 +++ head-2008-11-25/drivers/xen/pciback/pciback_ops.c 2008-07-21 11:00:33.000000000 +0200
24807 @@ -0,0 +1,117 @@
24808 +/*
24809 + * PCI Backend Operations - respond to PCI requests from Frontend
24810 + *
24811 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
24812 + */
24813 +#include <linux/module.h>
24814 +#include <asm/bitops.h>
24815 +#include <xen/evtchn.h>
24816 +#include "pciback.h"
24817 +
24818 +int verbose_request = 0;
24819 +module_param(verbose_request, int, 0644);
24820 +
24821 +/* Ensure a device is "turned off" and ready to be exported.
24822 + * (Also see pciback_config_reset to ensure virtual configuration space is
24823 + * ready to be re-exported)
24824 + */
24825 +void pciback_reset_device(struct pci_dev *dev)
24826 +{
24827 + u16 cmd;
24828 +
24829 + /* Disable devices (but not bridges) */
24830 + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
24831 + pci_disable_device(dev);
24832 +
24833 + pci_write_config_word(dev, PCI_COMMAND, 0);
24834 +
24835 + dev->is_enabled = 0;
24836 + dev->is_busmaster = 0;
24837 + } else {
24838 + pci_read_config_word(dev, PCI_COMMAND, &cmd);
24839 + if (cmd & (PCI_COMMAND_INVALIDATE)) {
24840 + cmd &= ~(PCI_COMMAND_INVALIDATE);
24841 + pci_write_config_word(dev, PCI_COMMAND, cmd);
24842 +
24843 + dev->is_busmaster = 0;
24844 + }
24845 + }
24846 +}
24847 +
24848 +static inline void test_and_schedule_op(struct pciback_device *pdev)
24849 +{
24850 + /* Check that frontend is requesting an operation and that we are not
24851 + * already processing a request */
24852 + if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
24853 + && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
24854 + schedule_work(&pdev->op_work);
24855 +}
24856 +
24857 +/* Performing the configuration space reads/writes must not be done in atomic
24858 + * context because some of the pci_* functions can sleep (mostly due to ACPI
24859 + * use of semaphores). This function is intended to be called from a work
24860 + * queue in process context taking a struct pciback_device as a parameter */
24861 +void pciback_do_op(void *data)
24862 +{
24863 + struct pciback_device *pdev = data;
24864 + struct pci_dev *dev;
24865 + struct xen_pci_op *op = &pdev->sh_info->op;
24866 +
24867 + dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
24868 +
24869 + if (dev == NULL)
24870 + op->err = XEN_PCI_ERR_dev_not_found;
24871 + else
24872 + {
24873 + switch (op->cmd)
24874 + {
24875 + case XEN_PCI_OP_conf_read:
24876 + op->err = pciback_config_read(dev,
24877 + op->offset, op->size, &op->value);
24878 + break;
24879 + case XEN_PCI_OP_conf_write:
24880 + op->err = pciback_config_write(dev,
24881 + op->offset, op->size, op->value);
24882 + break;
24883 +#ifdef CONFIG_PCI_MSI
24884 + case XEN_PCI_OP_enable_msi:
24885 + op->err = pciback_enable_msi(pdev, dev, op);
24886 + break;
24887 + case XEN_PCI_OP_disable_msi:
24888 + op->err = pciback_disable_msi(pdev, dev, op);
24889 + break;
24890 + case XEN_PCI_OP_enable_msix:
24891 + op->err = pciback_enable_msix(pdev, dev, op);
24892 + break;
24893 + case XEN_PCI_OP_disable_msix:
24894 + op->err = pciback_disable_msix(pdev, dev, op);
24895 + break;
24896 +#endif
24897 + default:
24898 + op->err = XEN_PCI_ERR_not_implemented;
24899 + break;
24900 + }
24901 + }
24902 + /* Tell the driver domain that we're done. */
24903 + wmb();
24904 + clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
24905 + notify_remote_via_irq(pdev->evtchn_irq);
24906 +
24907 + /* Mark that we're done. */
24908 + smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
24909 + clear_bit(_PDEVF_op_active, &pdev->flags);
24910 + smp_mb__after_clear_bit(); /* /before/ final check for work */
24911 +
24912 + /* Check to see if the driver domain tried to start another request in
24913 + * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. */
24914 + test_and_schedule_op(pdev);
24915 +}
24916 +
24917 +irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
24918 +{
24919 + struct pciback_device *pdev = dev_id;
24920 +
24921 + test_and_schedule_op(pdev);
24922 +
24923 + return IRQ_HANDLED;
24924 +}
24925 Index: head-2008-11-25/drivers/xen/pciback/slot.c
24926 ===================================================================
24927 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
24928 +++ head-2008-11-25/drivers/xen/pciback/slot.c 2008-02-26 10:54:11.000000000 +0100
24929 @@ -0,0 +1,157 @@
24930 +/*
24931 + * PCI Backend - Provides a Virtual PCI bus (with real devices)
24932 + * to the frontend
24933 + *
24934 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c)
24935 + * Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
24936 + */
24937 +
24938 +#include <linux/list.h>
24939 +#include <linux/slab.h>
24940 +#include <linux/pci.h>
24941 +#include <linux/spinlock.h>
24942 +#include "pciback.h"
24943 +
24944 +/* There are at most 32 slots in a pci bus. */
24945 +#define PCI_SLOT_MAX 32
24946 +
24947 +#define PCI_BUS_NBR 2
24948 +
24949 +struct slot_dev_data {
24950 + /* Access to dev_list must be protected by lock */
24951 + struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
24952 + spinlock_t lock;
24953 +};
24954 +
24955 +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
24956 + unsigned int domain, unsigned int bus,
24957 + unsigned int devfn)
24958 +{
24959 + struct pci_dev *dev = NULL;
24960 + struct slot_dev_data *slot_dev = pdev->pci_dev_data;
24961 + unsigned long flags;
24962 +
24963 + if (domain != 0 || PCI_FUNC(devfn) != 0)
24964 + return NULL;
24965 +
24966 + if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
24967 + return NULL;
24968 +
24969 + spin_lock_irqsave(&slot_dev->lock, flags);
24970 + dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
24971 + spin_unlock_irqrestore(&slot_dev->lock, flags);
24972 +
24973 + return dev;
24974 +}
24975 +
24976 +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
24977 + int devid, publish_pci_dev_cb publish_cb)
24978 +{
24979 + int err = 0, slot, bus;
24980 + struct slot_dev_data *slot_dev = pdev->pci_dev_data;
24981 + unsigned long flags;
24982 +
24983 + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
24984 + err = -EFAULT;
24985 + xenbus_dev_fatal(pdev->xdev, err,
24986 + "Can't export bridges on the virtual PCI bus");
24987 + goto out;
24988 + }
24989 +
24990 + spin_lock_irqsave(&slot_dev->lock, flags);
24991 +
24992 + /* Assign to a new slot on the virtual PCI bus */
24993 + for (bus = 0; bus < PCI_BUS_NBR; bus++)
24994 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
24995 + if (slot_dev->slots[bus][slot] == NULL) {
24996 + printk(KERN_INFO
24997 + "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
24998 + pci_name(dev), slot, bus);
24999 + slot_dev->slots[bus][slot] = dev;
25000 + goto unlock;
25001 + }
25002 + }
25003 +
25004 + err = -ENOMEM;
25005 + xenbus_dev_fatal(pdev->xdev, err,
25006 + "No more space on root virtual PCI bus");
25007 +
25008 + unlock:
25009 + spin_unlock_irqrestore(&slot_dev->lock, flags);
25010 +
25011 + /* Publish this device. */
25012 + if(!err)
25013 + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
25014 +
25015 + out:
25016 + return err;
25017 +}
25018 +
25019 +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
25020 +{
25021 + int slot, bus;
25022 + struct slot_dev_data *slot_dev = pdev->pci_dev_data;
25023 + struct pci_dev *found_dev = NULL;
25024 + unsigned long flags;
25025 +
25026 + spin_lock_irqsave(&slot_dev->lock, flags);
25027 +
25028 + for (bus = 0; bus < PCI_BUS_NBR; bus++)
25029 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25030 + if (slot_dev->slots[bus][slot] == dev) {
25031 + slot_dev->slots[bus][slot] = NULL;
25032 + found_dev = dev;
25033 + goto out;
25034 + }
25035 + }
25036 +
25037 + out:
25038 + spin_unlock_irqrestore(&slot_dev->lock, flags);
25039 +
25040 + if (found_dev)
25041 + pcistub_put_pci_dev(found_dev);
25042 +}
25043 +
25044 +int pciback_init_devices(struct pciback_device *pdev)
25045 +{
25046 + int slot, bus;
25047 + struct slot_dev_data *slot_dev;
25048 +
25049 + slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
25050 + if (!slot_dev)
25051 + return -ENOMEM;
25052 +
25053 + spin_lock_init(&slot_dev->lock);
25054 +
25055 + for (bus = 0; bus < PCI_BUS_NBR; bus++)
25056 + for (slot = 0; slot < PCI_SLOT_MAX; slot++)
25057 + slot_dev->slots[bus][slot] = NULL;
25058 +
25059 + pdev->pci_dev_data = slot_dev;
25060 +
25061 + return 0;
25062 +}
25063 +
25064 +int pciback_publish_pci_roots(struct pciback_device *pdev,
25065 + publish_pci_root_cb publish_cb)
25066 +{
25067 + /* The Virtual PCI bus has only one root */
25068 + return publish_cb(pdev, 0, 0);
25069 +}
25070 +
25071 +void pciback_release_devices(struct pciback_device *pdev)
25072 +{
25073 + int slot, bus;
25074 + struct slot_dev_data *slot_dev = pdev->pci_dev_data;
25075 + struct pci_dev *dev;
25076 +
25077 + for (bus = 0; bus < PCI_BUS_NBR; bus++)
25078 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25079 + dev = slot_dev->slots[bus][slot];
25080 + if (dev != NULL)
25081 + pcistub_put_pci_dev(dev);
25082 + }
25083 +
25084 + kfree(slot_dev);
25085 + pdev->pci_dev_data = NULL;
25086 +}
25087 Index: head-2008-11-25/drivers/xen/pciback/vpci.c
25088 ===================================================================
25089 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
25090 +++ head-2008-11-25/drivers/xen/pciback/vpci.c 2008-02-26 10:54:11.000000000 +0100
25091 @@ -0,0 +1,212 @@
25092 +/*
25093 + * PCI Backend - Provides a Virtual PCI bus (with real devices)
25094 + * to the frontend
25095 + *
25096 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
25097 + */
25098 +
25099 +#include <linux/list.h>
25100 +#include <linux/slab.h>
25101 +#include <linux/pci.h>
25102 +#include <linux/spinlock.h>
25103 +#include "pciback.h"
25104 +
25105 +#define PCI_SLOT_MAX 32
25106 +
25107 +struct vpci_dev_data {
25108 + /* Access to dev_list must be protected by lock */
25109 + struct list_head dev_list[PCI_SLOT_MAX];
25110 + spinlock_t lock;
25111 +};
25112 +
25113 +static inline struct list_head *list_first(struct list_head *head)
25114 +{
25115 + return head->next;
25116 +}
25117 +
25118 +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
25119 + unsigned int domain, unsigned int bus,
25120 + unsigned int devfn)
25121 +{
25122 + struct pci_dev_entry *entry;
25123 + struct pci_dev *dev = NULL;
25124 + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25125 + unsigned long flags;
25126 +
25127 + if (domain != 0 || bus != 0)
25128 + return NULL;
25129 +
25130 + if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
25131 + spin_lock_irqsave(&vpci_dev->lock, flags);
25132 +
25133 + list_for_each_entry(entry,
25134 + &vpci_dev->dev_list[PCI_SLOT(devfn)],
25135 + list) {
25136 + if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
25137 + dev = entry->dev;
25138 + break;
25139 + }
25140 + }
25141 +
25142 + spin_unlock_irqrestore(&vpci_dev->lock, flags);
25143 + }
25144 + return dev;
25145 +}
25146 +
25147 +static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
25148 +{
25149 + if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
25150 + && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
25151 + return 1;
25152 +
25153 + return 0;
25154 +}
25155 +
25156 +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
25157 + int devid, publish_pci_dev_cb publish_cb)
25158 +{
25159 + int err = 0, slot, func;
25160 + struct pci_dev_entry *t, *dev_entry;
25161 + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25162 + unsigned long flags;
25163 +
25164 + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
25165 + err = -EFAULT;
25166 + xenbus_dev_fatal(pdev->xdev, err,
25167 + "Can't export bridges on the virtual PCI bus");
25168 + goto out;
25169 + }
25170 +
25171 + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
25172 + if (!dev_entry) {
25173 + err = -ENOMEM;
25174 + xenbus_dev_fatal(pdev->xdev, err,
25175 + "Error adding entry to virtual PCI bus");
25176 + goto out;
25177 + }
25178 +
25179 + dev_entry->dev = dev;
25180 +
25181 + spin_lock_irqsave(&vpci_dev->lock, flags);
25182 +
25183 + /* Keep multi-function devices together on the virtual PCI bus */
25184 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25185 + if (!list_empty(&vpci_dev->dev_list[slot])) {
25186 + t = list_entry(list_first(&vpci_dev->dev_list[slot]),
25187 + struct pci_dev_entry, list);
25188 +
25189 + if (match_slot(dev, t->dev)) {
25190 + pr_info("pciback: vpci: %s: "
25191 + "assign to virtual slot %d func %d\n",
25192 + pci_name(dev), slot,
25193 + PCI_FUNC(dev->devfn));
25194 + list_add_tail(&dev_entry->list,
25195 + &vpci_dev->dev_list[slot]);
25196 + func = PCI_FUNC(dev->devfn);
25197 + goto unlock;
25198 + }
25199 + }
25200 + }
25201 +
25202 + /* Assign to a new slot on the virtual PCI bus */
25203 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25204 + if (list_empty(&vpci_dev->dev_list[slot])) {
25205 + printk(KERN_INFO
25206 + "pciback: vpci: %s: assign to virtual slot %d\n",
25207 + pci_name(dev), slot);
25208 + list_add_tail(&dev_entry->list,
25209 + &vpci_dev->dev_list[slot]);
25210 + func = PCI_FUNC(dev->devfn);
25211 + goto unlock;
25212 + }
25213 + }
25214 +
25215 + err = -ENOMEM;
25216 + xenbus_dev_fatal(pdev->xdev, err,
25217 + "No more space on root virtual PCI bus");
25218 +
25219 + unlock:
25220 + spin_unlock_irqrestore(&vpci_dev->lock, flags);
25221 +
25222 + /* Publish this device. */
25223 + if(!err)
25224 + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
25225 +
25226 + out:
25227 + return err;
25228 +}
25229 +
25230 +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
25231 +{
25232 + int slot;
25233 + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25234 + struct pci_dev *found_dev = NULL;
25235 + unsigned long flags;
25236 +
25237 + spin_lock_irqsave(&vpci_dev->lock, flags);
25238 +
25239 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25240 + struct pci_dev_entry *e, *tmp;
25241 + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
25242 + list) {
25243 + if (e->dev == dev) {
25244 + list_del(&e->list);
25245 + found_dev = e->dev;
25246 + kfree(e);
25247 + goto out;
25248 + }
25249 + }
25250 + }
25251 +
25252 + out:
25253 + spin_unlock_irqrestore(&vpci_dev->lock, flags);
25254 +
25255 + if (found_dev)
25256 + pcistub_put_pci_dev(found_dev);
25257 +}
25258 +
25259 +int pciback_init_devices(struct pciback_device *pdev)
25260 +{
25261 + int slot;
25262 + struct vpci_dev_data *vpci_dev;
25263 +
25264 + vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
25265 + if (!vpci_dev)
25266 + return -ENOMEM;
25267 +
25268 + spin_lock_init(&vpci_dev->lock);
25269 +
25270 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25271 + INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
25272 + }
25273 +
25274 + pdev->pci_dev_data = vpci_dev;
25275 +
25276 + return 0;
25277 +}
25278 +
25279 +int pciback_publish_pci_roots(struct pciback_device *pdev,
25280 + publish_pci_root_cb publish_cb)
25281 +{
25282 + /* The Virtual PCI bus has only one root */
25283 + return publish_cb(pdev, 0, 0);
25284 +}
25285 +
25286 +void pciback_release_devices(struct pciback_device *pdev)
25287 +{
25288 + int slot;
25289 + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
25290 +
25291 + for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
25292 + struct pci_dev_entry *e, *tmp;
25293 + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
25294 + list) {
25295 + list_del(&e->list);
25296 + pcistub_put_pci_dev(e->dev);
25297 + kfree(e);
25298 + }
25299 + }
25300 +
25301 + kfree(vpci_dev);
25302 + pdev->pci_dev_data = NULL;
25303 +}
25304 Index: head-2008-11-25/drivers/xen/pciback/xenbus.c
25305 ===================================================================
25306 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
25307 +++ head-2008-11-25/drivers/xen/pciback/xenbus.c 2008-07-21 11:00:33.000000000 +0200
25308 @@ -0,0 +1,704 @@
25309 +/*
25310 + * PCI Backend Xenbus Setup - handles setup with frontend and xend
25311 + *
25312 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
25313 + */
25314 +#include <linux/module.h>
25315 +#include <linux/init.h>
25316 +#include <linux/list.h>
25317 +#include <linux/vmalloc.h>
25318 +#include <xen/xenbus.h>
25319 +#include <xen/evtchn.h>
25320 +#include "pciback.h"
25321 +
25322 +#define INVALID_EVTCHN_IRQ (-1)
25323 +
25324 +static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
25325 +{
25326 + struct pciback_device *pdev;
25327 +
25328 + pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
25329 + if (pdev == NULL)
25330 + goto out;
25331 + dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
25332 +
25333 + pdev->xdev = xdev;
25334 + xdev->dev.driver_data = pdev;
25335 +
25336 + spin_lock_init(&pdev->dev_lock);
25337 +
25338 + pdev->sh_area = NULL;
25339 + pdev->sh_info = NULL;
25340 + pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
25341 + pdev->be_watching = 0;
25342 +
25343 + INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
25344 +
25345 + if (pciback_init_devices(pdev)) {
25346 + kfree(pdev);
25347 + pdev = NULL;
25348 + }
25349 + out:
25350 + return pdev;
25351 +}
25352 +
25353 +static void pciback_disconnect(struct pciback_device *pdev)
25354 +{
25355 + spin_lock(&pdev->dev_lock);
25356 +
25357 + /* Ensure the guest can't trigger our handler before removing devices */
25358 + if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
25359 + unbind_from_irqhandler(pdev->evtchn_irq, pdev);
25360 + pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
25361 + }
25362 +
25363 + /* If the driver domain started an op, make sure we complete it or
25364 + * delete it before releasing the shared memory */
25365 + cancel_delayed_work(&pdev->op_work);
25366 + flush_scheduled_work();
25367 +
25368 + if (pdev->sh_info != NULL) {
25369 + xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
25370 + pdev->sh_info = NULL;
25371 + }
25372 +
25373 + spin_unlock(&pdev->dev_lock);
25374 +}
25375 +
25376 +static void free_pdev(struct pciback_device *pdev)
25377 +{
25378 + if (pdev->be_watching)
25379 + unregister_xenbus_watch(&pdev->be_watch);
25380 +
25381 + pciback_disconnect(pdev);
25382 +
25383 + pciback_release_devices(pdev);
25384 +
25385 + pdev->xdev->dev.driver_data = NULL;
25386 + pdev->xdev = NULL;
25387 +
25388 + kfree(pdev);
25389 +}
25390 +
25391 +static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
25392 + int remote_evtchn)
25393 +{
25394 + int err = 0;
25395 + struct vm_struct *area;
25396 +
25397 + dev_dbg(&pdev->xdev->dev,
25398 + "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
25399 + gnt_ref, remote_evtchn);
25400 +
25401 + area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
25402 + if (IS_ERR(area)) {
25403 + err = PTR_ERR(area);
25404 + goto out;
25405 + }
25406 + pdev->sh_area = area;
25407 + pdev->sh_info = area->addr;
25408 +
25409 + err = bind_interdomain_evtchn_to_irqhandler(
25410 + pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
25411 + SA_SAMPLE_RANDOM, "pciback", pdev);
25412 + if (err < 0) {
25413 + xenbus_dev_fatal(pdev->xdev, err,
25414 + "Error binding event channel to IRQ");
25415 + goto out;
25416 + }
25417 + pdev->evtchn_irq = err;
25418 + err = 0;
25419 +
25420 + dev_dbg(&pdev->xdev->dev, "Attached!\n");
25421 + out:
25422 + return err;
25423 +}
25424 +
25425 +static int pciback_attach(struct pciback_device *pdev)
25426 +{
25427 + int err = 0;
25428 + int gnt_ref, remote_evtchn;
25429 + char *magic = NULL;
25430 +
25431 + spin_lock(&pdev->dev_lock);
25432 +
25433 + /* Make sure we only do this setup once */
25434 + if (xenbus_read_driver_state(pdev->xdev->nodename) !=
25435 + XenbusStateInitialised)
25436 + goto out;
25437 +
25438 + /* Wait for frontend to state that it has published the configuration */
25439 + if (xenbus_read_driver_state(pdev->xdev->otherend) !=
25440 + XenbusStateInitialised)
25441 + goto out;
25442 +
25443 + dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
25444 +
25445 + err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
25446 + "pci-op-ref", "%u", &gnt_ref,
25447 + "event-channel", "%u", &remote_evtchn,
25448 + "magic", NULL, &magic, NULL);
25449 + if (err) {
25450 + /* If configuration didn't get read correctly, wait longer */
25451 + xenbus_dev_fatal(pdev->xdev, err,
25452 + "Error reading configuration from frontend");
25453 + goto out;
25454 + }
25455 +
25456 + if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
25457 + xenbus_dev_fatal(pdev->xdev, -EFAULT,
25458 + "version mismatch (%s/%s) with pcifront - "
25459 + "halting pciback",
25460 + magic, XEN_PCI_MAGIC);
25461 + goto out;
25462 + }
25463 +
25464 + err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
25465 + if (err)
25466 + goto out;
25467 +
25468 + dev_dbg(&pdev->xdev->dev, "Connecting...\n");
25469 +
25470 + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
25471 + if (err)
25472 + xenbus_dev_fatal(pdev->xdev, err,
25473 + "Error switching to connected state!");
25474 +
25475 + dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
25476 + out:
25477 + spin_unlock(&pdev->dev_lock);
25478 +
25479 + if (magic)
25480 + kfree(magic);
25481 +
25482 + return err;
25483 +}
25484 +
25485 +static int pciback_publish_pci_dev(struct pciback_device *pdev,
25486 + unsigned int domain, unsigned int bus,
25487 + unsigned int devfn, unsigned int devid)
25488 +{
25489 + int err;
25490 + int len;
25491 + char str[64];
25492 +
25493 + len = snprintf(str, sizeof(str), "vdev-%d", devid);
25494 + if (unlikely(len >= (sizeof(str) - 1))) {
25495 + err = -ENOMEM;
25496 + goto out;
25497 + }
25498 +
25499 + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
25500 + "%04x:%02x:%02x.%02x", domain, bus,
25501 + PCI_SLOT(devfn), PCI_FUNC(devfn));
25502 +
25503 + out:
25504 + return err;
25505 +}
25506 +
25507 +static int pciback_export_device(struct pciback_device *pdev,
25508 + int domain, int bus, int slot, int func,
25509 + int devid)
25510 +{
25511 + struct pci_dev *dev;
25512 + int err = 0;
25513 +
25514 + dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
25515 + domain, bus, slot, func);
25516 +
25517 + dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
25518 + if (!dev) {
25519 + err = -EINVAL;
25520 + xenbus_dev_fatal(pdev->xdev, err,
25521 + "Couldn't locate PCI device "
25522 + "(%04x:%02x:%02x.%01x)! "
25523 + "perhaps already in-use?",
25524 + domain, bus, slot, func);
25525 + goto out;
25526 + }
25527 +
25528 + err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
25529 + if (err)
25530 + goto out;
25531 +
25532 + /* TODO: It'd be nice to export a bridge and have all of its children
25533 + * get exported with it. This may be best done in xend (which will
25534 + * have to calculate resource usage anyway) but we probably want to
25535 + * put something in here to ensure that if a bridge gets given to a
25536 + * driver domain, that all devices under that bridge are not given
25537 + * to other driver domains (as he who controls the bridge can disable
25538 + * it and stop the other devices from working).
25539 + */
25540 + out:
25541 + return err;
25542 +}
25543 +
25544 +static int pciback_remove_device(struct pciback_device *pdev,
25545 + int domain, int bus, int slot, int func)
25546 +{
25547 + int err = 0;
25548 + struct pci_dev *dev;
25549 +
25550 + dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
25551 + domain, bus, slot, func);
25552 +
25553 + dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
25554 + if (!dev) {
25555 + err = -EINVAL;
25556 + dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
25557 + "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
25558 + domain, bus, slot, func);
25559 + goto out;
25560 + }
25561 +
25562 + pciback_release_pci_dev(pdev, dev);
25563 +
25564 + out:
25565 + return err;
25566 +}
25567 +
25568 +static int pciback_publish_pci_root(struct pciback_device *pdev,
25569 + unsigned int domain, unsigned int bus)
25570 +{
25571 + unsigned int d, b;
25572 + int i, root_num, len, err;
25573 + char str[64];
25574 +
25575 + dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
25576 +
25577 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25578 + "root_num", "%d", &root_num);
25579 + if (err == 0 || err == -ENOENT)
25580 + root_num = 0;
25581 + else if (err < 0)
25582 + goto out;
25583 +
25584 + /* Verify that we haven't already published this pci root */
25585 + for (i = 0; i < root_num; i++) {
25586 + len = snprintf(str, sizeof(str), "root-%d", i);
25587 + if (unlikely(len >= (sizeof(str) - 1))) {
25588 + err = -ENOMEM;
25589 + goto out;
25590 + }
25591 +
25592 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25593 + str, "%x:%x", &d, &b);
25594 + if (err < 0)
25595 + goto out;
25596 + if (err != 2) {
25597 + err = -EINVAL;
25598 + goto out;
25599 + }
25600 +
25601 + if (d == domain && b == bus) {
25602 + err = 0;
25603 + goto out;
25604 + }
25605 + }
25606 +
25607 + len = snprintf(str, sizeof(str), "root-%d", root_num);
25608 + if (unlikely(len >= (sizeof(str) - 1))) {
25609 + err = -ENOMEM;
25610 + goto out;
25611 + }
25612 +
25613 + dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
25614 + root_num, domain, bus);
25615 +
25616 + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
25617 + "%04x:%02x", domain, bus);
25618 + if (err)
25619 + goto out;
25620 +
25621 + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
25622 + "root_num", "%d", (root_num + 1));
25623 +
25624 + out:
25625 + return err;
25626 +}
25627 +
25628 +static int pciback_reconfigure(struct pciback_device *pdev)
25629 +{
25630 + int err = 0;
25631 + int num_devs;
25632 + int domain, bus, slot, func;
25633 + int substate;
25634 + int i, len;
25635 + char state_str[64];
25636 + char dev_str[64];
25637 +
25638 + spin_lock(&pdev->dev_lock);
25639 +
25640 + dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
25641 +
25642 + /* Make sure we only reconfigure once */
25643 + if (xenbus_read_driver_state(pdev->xdev->nodename) !=
25644 + XenbusStateReconfiguring)
25645 + goto out;
25646 +
25647 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
25648 + &num_devs);
25649 + if (err != 1) {
25650 + if (err >= 0)
25651 + err = -EINVAL;
25652 + xenbus_dev_fatal(pdev->xdev, err,
25653 + "Error reading number of devices");
25654 + goto out;
25655 + }
25656 +
25657 + for (i = 0; i < num_devs; i++) {
25658 + len = snprintf(state_str, sizeof(state_str), "state-%d", i);
25659 + if (unlikely(len >= (sizeof(state_str) - 1))) {
25660 + err = -ENOMEM;
25661 + xenbus_dev_fatal(pdev->xdev, err,
25662 + "String overflow while reading "
25663 + "configuration");
25664 + goto out;
25665 + }
25666 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
25667 + "%d", &substate);
25668 + if (err != 1)
25669 + substate = XenbusStateUnknown;
25670 +
25671 + switch (substate) {
25672 + case XenbusStateInitialising:
25673 + dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
25674 +
25675 + len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
25676 + if (unlikely(len >= (sizeof(dev_str) - 1))) {
25677 + err = -ENOMEM;
25678 + xenbus_dev_fatal(pdev->xdev, err,
25679 + "String overflow while "
25680 + "reading configuration");
25681 + goto out;
25682 + }
25683 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25684 + dev_str, "%x:%x:%x.%x",
25685 + &domain, &bus, &slot, &func);
25686 + if (err < 0) {
25687 + xenbus_dev_fatal(pdev->xdev, err,
25688 + "Error reading device "
25689 + "configuration");
25690 + goto out;
25691 + }
25692 + if (err != 4) {
25693 + err = -EINVAL;
25694 + xenbus_dev_fatal(pdev->xdev, err,
25695 + "Error parsing pci device "
25696 + "configuration");
25697 + goto out;
25698 + }
25699 +
25700 + err = pciback_export_device(pdev, domain, bus, slot,
25701 + func, i);
25702 + if (err)
25703 + goto out;
25704 +
25705 + /* Publish pci roots. */
25706 + err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
25707 + if (err) {
25708 + xenbus_dev_fatal(pdev->xdev, err,
25709 + "Error while publish PCI root"
25710 + "buses for frontend");
25711 + goto out;
25712 + }
25713 +
25714 + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
25715 + state_str, "%d",
25716 + XenbusStateInitialised);
25717 + if (err) {
25718 + xenbus_dev_fatal(pdev->xdev, err,
25719 + "Error switching substate of "
25720 + "dev-%d\n", i);
25721 + goto out;
25722 + }
25723 + break;
25724 +
25725 + case XenbusStateClosing:
25726 + dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
25727 +
25728 + len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
25729 + if (unlikely(len >= (sizeof(dev_str) - 1))) {
25730 + err = -ENOMEM;
25731 + xenbus_dev_fatal(pdev->xdev, err,
25732 + "String overflow while "
25733 + "reading configuration");
25734 + goto out;
25735 + }
25736 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
25737 + dev_str, "%x:%x:%x.%x",
25738 + &domain, &bus, &slot, &func);
25739 + if (err < 0) {
25740 + xenbus_dev_fatal(pdev->xdev, err,
25741 + "Error reading device "
25742 + "configuration");
25743 + goto out;
25744 + }
25745 + if (err != 4) {
25746 + err = -EINVAL;
25747 + xenbus_dev_fatal(pdev->xdev, err,
25748 + "Error parsing pci device "
25749 + "configuration");
25750 + goto out;
25751 + }
25752 +
25753 + err = pciback_remove_device(pdev, domain, bus, slot,
25754 + func);
25755 + if(err)
25756 + goto out;
25757 +
25758 + /* TODO: If at some point we implement support for pci
25759 + * root hot-remove on pcifront side, we'll need to
25760 + * remove unnecessary xenstore nodes of pci roots here.
25761 + */
25762 +
25763 + break;
25764 +
25765 + default:
25766 + break;
25767 + }
25768 + }
25769 +
25770 + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
25771 + if (err) {
25772 + xenbus_dev_fatal(pdev->xdev, err,
25773 + "Error switching to reconfigured state!");
25774 + goto out;
25775 + }
25776 +
25777 + out:
25778 + spin_unlock(&pdev->dev_lock);
25779 +
25780 + return 0;
25781 +}
25782 +
25783 +static void pciback_frontend_changed(struct xenbus_device *xdev,
25784 + enum xenbus_state fe_state)
25785 +{
25786 + struct pciback_device *pdev = xdev->dev.driver_data;
25787 +
25788 + dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
25789 +
25790 + switch (fe_state) {
25791 + case XenbusStateInitialised:
25792 + pciback_attach(pdev);
25793 + break;
25794 +
25795 + case XenbusStateReconfiguring:
25796 + pciback_reconfigure(pdev);
25797 + break;
25798 +
25799 + case XenbusStateConnected:
25800 + /* pcifront switched its state from reconfiguring to connected.
25801 + * Then switch to connected state.
25802 + */
25803 + xenbus_switch_state(xdev, XenbusStateConnected);
25804 + break;
25805 +
25806 + case XenbusStateClosing:
25807 + pciback_disconnect(pdev);
25808 + xenbus_switch_state(xdev, XenbusStateClosing);
25809 + break;
25810 +
25811 + case XenbusStateClosed:
25812 + pciback_disconnect(pdev);
25813 + xenbus_switch_state(xdev, XenbusStateClosed);
25814 + if (xenbus_dev_is_online(xdev))
25815 + break;
25816 + /* fall through if not online */
25817 + case XenbusStateUnknown:
25818 + dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
25819 + device_unregister(&xdev->dev);
25820 + break;
25821 +
25822 + default:
25823 + break;
25824 + }
25825 +}
25826 +
25827 +static int pciback_setup_backend(struct pciback_device *pdev)
25828 +{
25829 + /* Get configuration from xend (if available now) */
25830 + int domain, bus, slot, func;
25831 + int err = 0;
25832 + int i, num_devs;
25833 + char dev_str[64];
25834 + char state_str[64];
25835 +
25836 + spin_lock(&pdev->dev_lock);
25837 +
25838 + /* It's possible we could get the call to setup twice, so make sure
25839 + * we're not already connected.
25840 + */
25841 + if (xenbus_read_driver_state(pdev->xdev->nodename) !=
25842 + XenbusStateInitWait)
25843 + goto out;
25844 +
25845 + dev_dbg(&pdev->xdev->dev, "getting be setup\n");
25846 +
25847 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
25848 + &num_devs);
25849 + if (err != 1) {
25850 + if (err >= 0)
25851 + err = -EINVAL;
25852 + xenbus_dev_fatal(pdev->xdev, err,
25853 + "Error reading number of devices");
25854 + goto out;
25855 + }
25856 +
25857 + for (i = 0; i < num_devs; i++) {
25858 + int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
25859 + if (unlikely(l >= (sizeof(dev_str) - 1))) {
25860 + err = -ENOMEM;
25861 + xenbus_dev_fatal(pdev->xdev, err,
25862 + "String overflow while reading "
25863 + "configuration");
25864 + goto out;
25865 + }
25866 +
25867 + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
25868 + "%x:%x:%x.%x", &domain, &bus, &slot, &func);
25869 + if (err < 0) {
25870 + xenbus_dev_fatal(pdev->xdev, err,
25871 + "Error reading device configuration");
25872 + goto out;
25873 + }
25874 + if (err != 4) {
25875 + err = -EINVAL;
25876 + xenbus_dev_fatal(pdev->xdev, err,
25877 + "Error parsing pci device "
25878 + "configuration");
25879 + goto out;
25880 + }
25881 +
25882 + err = pciback_export_device(pdev, domain, bus, slot, func, i);
25883 + if (err)
25884 + goto out;
25885 +
25886 + /* Switch substate of this device. */
25887 + l = snprintf(state_str, sizeof(state_str), "state-%d", i);
25888 + if (unlikely(l >= (sizeof(state_str) - 1))) {
25889 + err = -ENOMEM;
25890 + xenbus_dev_fatal(pdev->xdev, err,
25891 + "String overflow while reading "
25892 + "configuration");
25893 + goto out;
25894 + }
25895 + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
25896 + "%d", XenbusStateInitialised);
25897 + if (err) {
25898 + xenbus_dev_fatal(pdev->xdev, err, "Error switching "
25899 + "substate of dev-%d\n", i);
25900 + goto out;
25901 + }
25902 + }
25903 +
25904 + err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
25905 + if (err) {
25906 + xenbus_dev_fatal(pdev->xdev, err,
25907 + "Error while publish PCI root buses "
25908 + "for frontend");
25909 + goto out;
25910 + }
25911 +
25912 + err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
25913 + if (err)
25914 + xenbus_dev_fatal(pdev->xdev, err,
25915 + "Error switching to initialised state!");
25916 +
25917 + out:
25918 + spin_unlock(&pdev->dev_lock);
25919 +
25920 + if (!err)
25921 + /* see if pcifront is already configured (if not, we'll wait) */
25922 + pciback_attach(pdev);
25923 +
25924 + return err;
25925 +}
25926 +
25927 +static void pciback_be_watch(struct xenbus_watch *watch,
25928 + const char **vec, unsigned int len)
25929 +{
25930 + struct pciback_device *pdev =
25931 + container_of(watch, struct pciback_device, be_watch);
25932 +
25933 + switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
25934 + case XenbusStateInitWait:
25935 + pciback_setup_backend(pdev);
25936 + break;
25937 +
25938 + default:
25939 + break;
25940 + }
25941 +}
25942 +
25943 +static int pciback_xenbus_probe(struct xenbus_device *dev,
25944 + const struct xenbus_device_id *id)
25945 +{
25946 + int err = 0;
25947 + struct pciback_device *pdev = alloc_pdev(dev);
25948 +
25949 + if (pdev == NULL) {
25950 + err = -ENOMEM;
25951 + xenbus_dev_fatal(dev, err,
25952 + "Error allocating pciback_device struct");
25953 + goto out;
25954 + }
25955 +
25956 + /* wait for xend to configure us */
25957 + err = xenbus_switch_state(dev, XenbusStateInitWait);
25958 + if (err)
25959 + goto out;
25960 +
25961 + /* watch the backend node for backend configuration information */
25962 + err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
25963 + pciback_be_watch);
25964 + if (err)
25965 + goto out;
25966 + pdev->be_watching = 1;
25967 +
25968 + /* We need to force a call to our callback here in case
25969 + * xend already configured us!
25970 + */
25971 + pciback_be_watch(&pdev->be_watch, NULL, 0);
25972 +
25973 + out:
25974 + return err;
25975 +}
25976 +
25977 +static int pciback_xenbus_remove(struct xenbus_device *dev)
25978 +{
25979 + struct pciback_device *pdev = dev->dev.driver_data;
25980 +
25981 + if (pdev != NULL)
25982 + free_pdev(pdev);
25983 +
25984 + return 0;
25985 +}
25986 +
25987 +static const struct xenbus_device_id xenpci_ids[] = {
25988 + {"pci"},
25989 + {{0}},
25990 +};
25991 +
25992 +static struct xenbus_driver xenbus_pciback_driver = {
25993 + .name = "pciback",
25994 + .owner = THIS_MODULE,
25995 + .ids = xenpci_ids,
25996 + .probe = pciback_xenbus_probe,
25997 + .remove = pciback_xenbus_remove,
25998 + .otherend_changed = pciback_frontend_changed,
25999 +};
26000 +
26001 +int __init pciback_xenbus_register(void)
26002 +{
26003 + if (!is_running_on_xen())
26004 + return -ENODEV;
26005 +
26006 + return xenbus_register_backend(&xenbus_pciback_driver);
26007 +}
26008 +
26009 +void __exit pciback_xenbus_unregister(void)
26010 +{
26011 + xenbus_unregister_driver(&xenbus_pciback_driver);
26012 +}
26013 Index: head-2008-11-25/drivers/xen/pcifront/Makefile
26014 ===================================================================
26015 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
26016 +++ head-2008-11-25/drivers/xen/pcifront/Makefile 2007-06-12 13:13:45.000000000 +0200
26017 @@ -0,0 +1,7 @@
26018 +obj-y += pcifront.o
26019 +
26020 +pcifront-y := pci_op.o xenbus.o pci.o
26021 +
26022 +ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y)
26023 +EXTRA_CFLAGS += -DDEBUG
26024 +endif
26025 Index: head-2008-11-25/drivers/xen/pcifront/pci.c
26026 ===================================================================
26027 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
26028 +++ head-2008-11-25/drivers/xen/pcifront/pci.c 2007-06-12 13:13:45.000000000 +0200
26029 @@ -0,0 +1,46 @@
26030 +/*
26031 + * PCI Frontend Operations - ensure only one PCI frontend runs at a time
26032 + *
26033 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26034 + */
26035 +#include <linux/module.h>
26036 +#include <linux/init.h>
26037 +#include <linux/pci.h>
26038 +#include <linux/spinlock.h>
26039 +#include "pcifront.h"
26040 +
26041 +DEFINE_SPINLOCK(pcifront_dev_lock);
26042 +static struct pcifront_device *pcifront_dev = NULL;
26043 +
26044 +int pcifront_connect(struct pcifront_device *pdev)
26045 +{
26046 + int err = 0;
26047 +
26048 + spin_lock(&pcifront_dev_lock);
26049 +
26050 + if (!pcifront_dev) {
26051 + dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
26052 + pcifront_dev = pdev;
26053 + }
26054 + else {
26055 + dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
26056 + err = -EEXIST;
26057 + }
26058 +
26059 + spin_unlock(&pcifront_dev_lock);
26060 +
26061 + return err;
26062 +}
26063 +
26064 +void pcifront_disconnect(struct pcifront_device *pdev)
26065 +{
26066 + spin_lock(&pcifront_dev_lock);
26067 +
26068 + if (pdev == pcifront_dev) {
26069 + dev_info(&pdev->xdev->dev,
26070 + "Disconnecting PCI Frontend Buses\n");
26071 + pcifront_dev = NULL;
26072 + }
26073 +
26074 + spin_unlock(&pcifront_dev_lock);
26075 +}
26076 Index: head-2008-11-25/drivers/xen/pcifront/pci_op.c
26077 ===================================================================
26078 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
26079 +++ head-2008-11-25/drivers/xen/pcifront/pci_op.c 2008-07-21 11:00:33.000000000 +0200
26080 @@ -0,0 +1,551 @@
26081 +/*
26082 + * PCI Frontend Operations - Communicates with frontend
26083 + *
26084 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26085 + */
26086 +#include <linux/module.h>
26087 +#include <linux/version.h>
26088 +#include <linux/init.h>
26089 +#include <linux/pci.h>
26090 +#include <linux/spinlock.h>
26091 +#include <linux/time.h>
26092 +#include <xen/evtchn.h>
26093 +#include "pcifront.h"
26094 +
26095 +static int verbose_request = 0;
26096 +module_param(verbose_request, int, 0644);
26097 +
26098 +#ifdef __ia64__
26099 +static void pcifront_init_sd(struct pcifront_sd *sd,
26100 + unsigned int domain, unsigned int bus,
26101 + struct pcifront_device *pdev)
26102 +{
26103 + int err, i, j, k, len, root_num, res_count;
26104 + struct acpi_resource res;
26105 + unsigned int d, b, byte;
26106 + unsigned long magic;
26107 + char str[64], tmp[3];
26108 + unsigned char *buf, *bufp;
26109 + u8 *ptr;
26110 +
26111 + memset(sd, 0, sizeof(*sd));
26112 +
26113 + sd->segment = domain;
26114 + sd->node = -1; /* Revisit for NUMA */
26115 + sd->platform_data = pdev;
26116 +
26117 + /* Look for resources for this controller in xenbus. */
26118 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "root_num",
26119 + "%d", &root_num);
26120 + if (err != 1)
26121 + return;
26122 +
26123 + for (i = 0; i < root_num; i++) {
26124 + len = snprintf(str, sizeof(str), "root-%d", i);
26125 + if (unlikely(len >= (sizeof(str) - 1)))
26126 + return;
26127 +
26128 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26129 + str, "%x:%x", &d, &b);
26130 + if (err != 2)
26131 + return;
26132 +
26133 + if (d == domain && b == bus)
26134 + break;
26135 + }
26136 +
26137 + if (i == root_num)
26138 + return;
26139 +
26140 + len = snprintf(str, sizeof(str), "root-resource-magic");
26141 +
26142 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26143 + str, "%lx", &magic);
26144 +
26145 + if (err != 1)
26146 + return; /* No resources, nothing to do */
26147 +
26148 + if (magic != (sizeof(res) * 2) + 1) {
26149 + printk(KERN_WARNING "pcifront: resource magic mismatch\n");
26150 + return;
26151 + }
26152 +
26153 + len = snprintf(str, sizeof(str), "root-%d-resources", i);
26154 + if (unlikely(len >= (sizeof(str) - 1)))
26155 + return;
26156 +
26157 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26158 + str, "%d", &res_count);
26159 +
26160 + if (err != 1)
26161 + return; /* No resources, nothing to do */
26162 +
26163 + sd->window = kzalloc(sizeof(*sd->window) * res_count, GFP_KERNEL);
26164 + if (!sd->window)
26165 + return;
26166 +
26167 + /* magic is also the size of the byte stream in xenbus */
26168 + buf = kmalloc(magic, GFP_KERNEL);
26169 + if (!buf) {
26170 + kfree(sd->window);
26171 + sd->window = NULL;
26172 + return;
26173 + }
26174 +
26175 + /* Read the resources out of xenbus */
26176 + for (j = 0; j < res_count; j++) {
26177 + memset(&res, 0, sizeof(res));
26178 + memset(buf, 0, magic);
26179 +
26180 + len = snprintf(str, sizeof(str), "root-%d-resource-%d", i, j);
26181 + if (unlikely(len >= (sizeof(str) - 1)))
26182 + return;
26183 +
26184 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
26185 + "%s", buf);
26186 + if (err != 1) {
26187 + printk(KERN_WARNING "pcifront: error reading "
26188 + "resource %d on bus %04x:%02x\n",
26189 + j, domain, bus);
26190 + continue;
26191 + }
26192 +
26193 + bufp = buf;
26194 + ptr = (u8 *)&res;
26195 + memset(tmp, 0, sizeof(tmp));
26196 +
26197 + /* Copy ASCII byte stream into structure */
26198 + for (k = 0; k < magic - 1; k += 2) {
26199 + memcpy(tmp, bufp, 2);
26200 + bufp += 2;
26201 +
26202 + sscanf(tmp, "%02x", &byte);
26203 + *ptr = byte;
26204 + ptr++;
26205 + }
26206 +
26207 + xen_add_resource(sd, domain, bus, &res);
26208 + sd->windows++;
26209 + }
26210 + kfree(buf);
26211 +}
26212 +#endif
26213 +
26214 +static int errno_to_pcibios_err(int errno)
26215 +{
26216 + switch (errno) {
26217 + case XEN_PCI_ERR_success:
26218 + return PCIBIOS_SUCCESSFUL;
26219 +
26220 + case XEN_PCI_ERR_dev_not_found:
26221 + return PCIBIOS_DEVICE_NOT_FOUND;
26222 +
26223 + case XEN_PCI_ERR_invalid_offset:
26224 + case XEN_PCI_ERR_op_failed:
26225 + return PCIBIOS_BAD_REGISTER_NUMBER;
26226 +
26227 + case XEN_PCI_ERR_not_implemented:
26228 + return PCIBIOS_FUNC_NOT_SUPPORTED;
26229 +
26230 + case XEN_PCI_ERR_access_denied:
26231 + return PCIBIOS_SET_FAILED;
26232 + }
26233 + return errno;
26234 +}
26235 +
26236 +static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
26237 +{
26238 + int err = 0;
26239 + struct xen_pci_op *active_op = &pdev->sh_info->op;
26240 + unsigned long irq_flags;
26241 + evtchn_port_t port = pdev->evtchn;
26242 + s64 ns, ns_timeout;
26243 + struct timeval tv;
26244 +
26245 + spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
26246 +
26247 + memcpy(active_op, op, sizeof(struct xen_pci_op));
26248 +
26249 + /* Go */
26250 + wmb();
26251 + set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
26252 + notify_remote_via_evtchn(port);
26253 +
26254 + /*
26255 + * We set a poll timeout of 3 seconds but give up on return after
26256 + * 2 seconds. It is better to time out too late rather than too early
26257 + * (in the latter case we end up continually re-executing poll() with a
26258 + * timeout in the past). 1s difference gives plenty of slack for error.
26259 + */
26260 + do_gettimeofday(&tv);
26261 + ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
26262 +
26263 + clear_evtchn(port);
26264 +
26265 + while (test_bit(_XEN_PCIF_active,
26266 + (unsigned long *)&pdev->sh_info->flags)) {
26267 + if (HYPERVISOR_poll(&port, 1, jiffies + 3*HZ))
26268 + BUG();
26269 + clear_evtchn(port);
26270 + do_gettimeofday(&tv);
26271 + ns = timeval_to_ns(&tv);
26272 + if (ns > ns_timeout) {
26273 + dev_err(&pdev->xdev->dev,
26274 + "pciback not responding!!!\n");
26275 + clear_bit(_XEN_PCIF_active,
26276 + (unsigned long *)&pdev->sh_info->flags);
26277 + err = XEN_PCI_ERR_dev_not_found;
26278 + goto out;
26279 + }
26280 + }
26281 +
26282 + memcpy(op, active_op, sizeof(struct xen_pci_op));
26283 +
26284 + err = op->err;
26285 + out:
26286 + spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
26287 + return err;
26288 +}
26289 +
26290 +/* Access to this function is spinlocked in drivers/pci/access.c */
26291 +static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
26292 + int where, int size, u32 * val)
26293 +{
26294 + int err = 0;
26295 + struct xen_pci_op op = {
26296 + .cmd = XEN_PCI_OP_conf_read,
26297 + .domain = pci_domain_nr(bus),
26298 + .bus = bus->number,
26299 + .devfn = devfn,
26300 + .offset = where,
26301 + .size = size,
26302 + };
26303 + struct pcifront_sd *sd = bus->sysdata;
26304 + struct pcifront_device *pdev = pcifront_get_pdev(sd);
26305 +
26306 + if (verbose_request)
26307 + dev_info(&pdev->xdev->dev,
26308 + "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
26309 + pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
26310 + PCI_FUNC(devfn), where, size);
26311 +
26312 + err = do_pci_op(pdev, &op);
26313 +
26314 + if (likely(!err)) {
26315 + if (verbose_request)
26316 + dev_info(&pdev->xdev->dev, "read got back value %x\n",
26317 + op.value);
26318 +
26319 + *val = op.value;
26320 + } else if (err == -ENODEV) {
26321 + /* No device here, pretend that it just returned 0 */
26322 + err = 0;
26323 + *val = 0;
26324 + }
26325 +
26326 + return errno_to_pcibios_err(err);
26327 +}
26328 +
26329 +/* Access to this function is spinlocked in drivers/pci/access.c */
26330 +static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
26331 + int where, int size, u32 val)
26332 +{
26333 + struct xen_pci_op op = {
26334 + .cmd = XEN_PCI_OP_conf_write,
26335 + .domain = pci_domain_nr(bus),
26336 + .bus = bus->number,
26337 + .devfn = devfn,
26338 + .offset = where,
26339 + .size = size,
26340 + .value = val,
26341 + };
26342 + struct pcifront_sd *sd = bus->sysdata;
26343 + struct pcifront_device *pdev = pcifront_get_pdev(sd);
26344 +
26345 + if (verbose_request)
26346 + dev_info(&pdev->xdev->dev,
26347 + "write dev=%04x:%02x:%02x.%01x - "
26348 + "offset %x size %d val %x\n",
26349 + pci_domain_nr(bus), bus->number,
26350 + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
26351 +
26352 + return errno_to_pcibios_err(do_pci_op(pdev, &op));
26353 +}
26354 +
26355 +struct pci_ops pcifront_bus_ops = {
26356 + .read = pcifront_bus_read,
26357 + .write = pcifront_bus_write,
26358 +};
26359 +
26360 +#ifdef CONFIG_PCI_MSI
26361 +int pci_frontend_enable_msix(struct pci_dev *dev,
26362 + struct msix_entry *entries,
26363 + int nvec)
26364 +{
26365 + int err;
26366 + int i;
26367 + struct xen_pci_op op = {
26368 + .cmd = XEN_PCI_OP_enable_msix,
26369 + .domain = pci_domain_nr(dev->bus),
26370 + .bus = dev->bus->number,
26371 + .devfn = dev->devfn,
26372 + .value = nvec,
26373 + };
26374 + struct pcifront_sd *sd = dev->bus->sysdata;
26375 + struct pcifront_device *pdev = pcifront_get_pdev(sd);
26376 +
26377 + if (nvec > SH_INFO_MAX_VEC) {
26378 + printk("too much vector for pci frontend%x\n", nvec);
26379 + return -EINVAL;
26380 + }
26381 +
26382 + for (i = 0; i < nvec; i++) {
26383 + op.msix_entries[i].entry = entries[i].entry;
26384 + op.msix_entries[i].vector = entries[i].vector;
26385 + }
26386 +
26387 + err = do_pci_op(pdev, &op);
26388 +
26389 + if (!err) {
26390 + if (!op.value) {
26391 + /* we get the result */
26392 + for ( i = 0; i < nvec; i++)
26393 + entries[i].vector = op.msix_entries[i].vector;
26394 + return 0;
26395 + }
26396 + else {
26397 + printk("enable msix get value %x\n", op.value);
26398 + return op.value;
26399 + }
26400 + }
26401 + else {
26402 + printk("enable msix get err %x\n", err);
26403 + return err;
26404 + }
26405 +}
26406 +
26407 +void pci_frontend_disable_msix(struct pci_dev* dev)
26408 +{
26409 + int err;
26410 + struct xen_pci_op op = {
26411 + .cmd = XEN_PCI_OP_disable_msix,
26412 + .domain = pci_domain_nr(dev->bus),
26413 + .bus = dev->bus->number,
26414 + .devfn = dev->devfn,
26415 + };
26416 + struct pcifront_sd *sd = dev->bus->sysdata;
26417 + struct pcifront_device *pdev = pcifront_get_pdev(sd);
26418 +
26419 + err = do_pci_op(pdev, &op);
26420 +
26421 + /* What should do for error ? */
26422 + if (err)
26423 + printk("pci_disable_msix get err %x\n", err);
26424 +}
26425 +
26426 +int pci_frontend_enable_msi(struct pci_dev *dev)
26427 +{
26428 + int err;
26429 + struct xen_pci_op op = {
26430 + .cmd = XEN_PCI_OP_enable_msi,
26431 + .domain = pci_domain_nr(dev->bus),
26432 + .bus = dev->bus->number,
26433 + .devfn = dev->devfn,
26434 + };
26435 + struct pcifront_sd *sd = dev->bus->sysdata;
26436 + struct pcifront_device *pdev = pcifront_get_pdev(sd);
26437 +
26438 + err = do_pci_op(pdev, &op);
26439 + if (likely(!err)) {
26440 + dev->irq = op.value;
26441 + }
26442 + else {
26443 + printk("pci frontend enable msi failed for dev %x:%x \n",
26444 + op.bus, op.devfn);
26445 + err = -EINVAL;
26446 + }
26447 + return err;
26448 +}
26449 +
26450 +void pci_frontend_disable_msi(struct pci_dev* dev)
26451 +{
26452 + int err;
26453 + struct xen_pci_op op = {
26454 + .cmd = XEN_PCI_OP_disable_msi,
26455 + .domain = pci_domain_nr(dev->bus),
26456 + .bus = dev->bus->number,
26457 + .devfn = dev->devfn,
26458 + };
26459 + struct pcifront_sd *sd = dev->bus->sysdata;
26460 + struct pcifront_device *pdev = pcifront_get_pdev(sd);
26461 +
26462 + err = do_pci_op(pdev, &op);
26463 + if (err == XEN_PCI_ERR_dev_not_found) {
26464 + /* XXX No response from backend, what shall we do? */
26465 + printk("get no response from backend for disable MSI\n");
26466 + return;
26467 + }
26468 + if (likely(!err))
26469 + dev->irq = op.value;
26470 + else
26471 + /* how can pciback notify us fail? */
26472 + printk("get fake response frombackend \n");
26473 +}
26474 +#endif /* CONFIG_PCI_MSI */
26475 +
26476 +/* Claim resources for the PCI frontend as-is, backend won't allow changes */
26477 +static void pcifront_claim_resource(struct pci_dev *dev, void *data)
26478 +{
26479 + struct pcifront_device *pdev = data;
26480 + int i;
26481 + struct resource *r;
26482 +
26483 + for (i = 0; i < PCI_NUM_RESOURCES; i++) {
26484 + r = &dev->resource[i];
26485 +
26486 + if (!r->parent && r->start && r->flags) {
26487 + dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n",
26488 + pci_name(dev), i);
26489 + pci_claim_resource(dev, i);
26490 + }
26491 + }
26492 +}
26493 +
26494 +int __devinit pcifront_scan_root(struct pcifront_device *pdev,
26495 + unsigned int domain, unsigned int bus)
26496 +{
26497 + struct pci_bus *b;
26498 + struct pcifront_sd *sd = NULL;
26499 + struct pci_bus_entry *bus_entry = NULL;
26500 + int err = 0;
26501 +
26502 +#ifndef CONFIG_PCI_DOMAINS
26503 + if (domain != 0) {
26504 + dev_err(&pdev->xdev->dev,
26505 + "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
26506 + dev_err(&pdev->xdev->dev,
26507 + "Please compile with CONFIG_PCI_DOMAINS\n");
26508 + err = -EINVAL;
26509 + goto err_out;
26510 + }
26511 +#endif
26512 +
26513 + dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
26514 + domain, bus);
26515 +
26516 + bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
26517 + sd = kmalloc(sizeof(*sd), GFP_KERNEL);
26518 + if (!bus_entry || !sd) {
26519 + err = -ENOMEM;
26520 + goto err_out;
26521 + }
26522 + pcifront_init_sd(sd, domain, bus, pdev);
26523 +
26524 + b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
26525 + &pcifront_bus_ops, sd);
26526 + if (!b) {
26527 + dev_err(&pdev->xdev->dev,
26528 + "Error creating PCI Frontend Bus!\n");
26529 + err = -ENOMEM;
26530 + goto err_out;
26531 + }
26532 +
26533 + pcifront_setup_root_resources(b, sd);
26534 + bus_entry->bus = b;
26535 +
26536 + list_add(&bus_entry->list, &pdev->root_buses);
26537 +
26538 + /* Claim resources before going "live" with our devices */
26539 + pci_walk_bus(b, pcifront_claim_resource, pdev);
26540 +
26541 + pci_bus_add_devices(b);
26542 +
26543 + return 0;
26544 +
26545 + err_out:
26546 + kfree(bus_entry);
26547 + kfree(sd);
26548 +
26549 + return err;
26550 +}
26551 +
26552 +int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
26553 + unsigned int domain, unsigned int bus)
26554 +{
26555 + struct pci_bus *b;
26556 + struct pci_dev *d;
26557 + unsigned int devfn;
26558 +
26559 +#ifndef CONFIG_PCI_DOMAINS
26560 + if (domain != 0) {
26561 + dev_err(&pdev->xdev->dev,
26562 + "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
26563 + dev_err(&pdev->xdev->dev,
26564 + "Please compile with CONFIG_PCI_DOMAINS\n");
26565 + return -EINVAL;
26566 + }
26567 +#endif
26568 +
26569 + dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
26570 + domain, bus);
26571 +
26572 + b = pci_find_bus(domain, bus);
26573 + if(!b)
26574 + /* If the bus is unknown, create it. */
26575 + return pcifront_scan_root(pdev, domain, bus);
26576 +
26577 + /* Rescan the bus for newly attached functions and add.
26578 + * We omit handling of PCI bridge attachment because pciback prevents
26579 + * bridges from being exported.
26580 + */
26581 + for (devfn = 0; devfn < 0x100; devfn++) {
26582 + d = pci_get_slot(b, devfn);
26583 + if(d) {
26584 + /* Device is already known. */
26585 + pci_dev_put(d);
26586 + continue;
26587 + }
26588 +
26589 + d = pci_scan_single_device(b, devfn);
26590 + if (d) {
26591 + dev_info(&pdev->xdev->dev, "New device on "
26592 + "%04x:%02x:%02x.%02x found.\n", domain, bus,
26593 + PCI_SLOT(devfn), PCI_FUNC(devfn));
26594 + pci_bus_add_device(d);
26595 + }
26596 + }
26597 +
26598 + return 0;
26599 +}
26600 +
26601 +static void free_root_bus_devs(struct pci_bus *bus)
26602 +{
26603 + struct pci_dev *dev;
26604 +
26605 + while (!list_empty(&bus->devices)) {
26606 + dev = container_of(bus->devices.next, struct pci_dev,
26607 + bus_list);
26608 + dev_dbg(&dev->dev, "removing device\n");
26609 + pci_remove_bus_device(dev);
26610 + }
26611 +}
26612 +
26613 +void pcifront_free_roots(struct pcifront_device *pdev)
26614 +{
26615 + struct pci_bus_entry *bus_entry, *t;
26616 +
26617 + dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
26618 +
26619 + list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
26620 + list_del(&bus_entry->list);
26621 +
26622 + free_root_bus_devs(bus_entry->bus);
26623 +
26624 + kfree(bus_entry->bus->sysdata);
26625 +
26626 + device_unregister(bus_entry->bus->bridge);
26627 + pci_remove_bus(bus_entry->bus);
26628 +
26629 + kfree(bus_entry);
26630 + }
26631 +}
26632 Index: head-2008-11-25/drivers/xen/pcifront/pcifront.h
26633 ===================================================================
26634 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
26635 +++ head-2008-11-25/drivers/xen/pcifront/pcifront.h 2008-02-26 10:54:11.000000000 +0100
26636 @@ -0,0 +1,42 @@
26637 +/*
26638 + * PCI Frontend - Common data structures & function declarations
26639 + *
26640 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26641 + */
26642 +#ifndef __XEN_PCIFRONT_H__
26643 +#define __XEN_PCIFRONT_H__
26644 +
26645 +#include <linux/spinlock.h>
26646 +#include <linux/pci.h>
26647 +#include <xen/xenbus.h>
26648 +#include <xen/interface/io/pciif.h>
26649 +#include <xen/pcifront.h>
26650 +
26651 +struct pci_bus_entry {
26652 + struct list_head list;
26653 + struct pci_bus *bus;
26654 +};
26655 +
26656 +struct pcifront_device {
26657 + struct xenbus_device *xdev;
26658 + struct list_head root_buses;
26659 + spinlock_t dev_lock;
26660 +
26661 + int evtchn;
26662 + int gnt_ref;
26663 +
26664 + /* Lock this when doing any operations in sh_info */
26665 + spinlock_t sh_info_lock;
26666 + struct xen_pci_sharedinfo *sh_info;
26667 +};
26668 +
26669 +int pcifront_connect(struct pcifront_device *pdev);
26670 +void pcifront_disconnect(struct pcifront_device *pdev);
26671 +
26672 +int pcifront_scan_root(struct pcifront_device *pdev,
26673 + unsigned int domain, unsigned int bus);
26674 +int pcifront_rescan_root(struct pcifront_device *pdev,
26675 + unsigned int domain, unsigned int bus);
26676 +void pcifront_free_roots(struct pcifront_device *pdev);
26677 +
26678 +#endif /* __XEN_PCIFRONT_H__ */
26679 Index: head-2008-11-25/drivers/xen/pcifront/xenbus.c
26680 ===================================================================
26681 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
26682 +++ head-2008-11-25/drivers/xen/pcifront/xenbus.c 2008-07-21 11:00:33.000000000 +0200
26683 @@ -0,0 +1,455 @@
26684 +/*
26685 + * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn)
26686 + *
26687 + * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
26688 + */
26689 +#include <linux/module.h>
26690 +#include <linux/init.h>
26691 +#include <linux/mm.h>
26692 +#include <xen/xenbus.h>
26693 +#include <xen/gnttab.h>
26694 +#include "pcifront.h"
26695 +
26696 +#ifndef __init_refok
26697 +#define __init_refok
26698 +#endif
26699 +
26700 +#define INVALID_GRANT_REF (0)
26701 +#define INVALID_EVTCHN (-1)
26702 +
26703 +static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
26704 +{
26705 + struct pcifront_device *pdev;
26706 +
26707 + pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
26708 + if (pdev == NULL)
26709 + goto out;
26710 +
26711 + pdev->sh_info =
26712 + (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
26713 + if (pdev->sh_info == NULL) {
26714 + kfree(pdev);
26715 + pdev = NULL;
26716 + goto out;
26717 + }
26718 + pdev->sh_info->flags = 0;
26719 +
26720 + xdev->dev.driver_data = pdev;
26721 + pdev->xdev = xdev;
26722 +
26723 + INIT_LIST_HEAD(&pdev->root_buses);
26724 +
26725 + spin_lock_init(&pdev->dev_lock);
26726 + spin_lock_init(&pdev->sh_info_lock);
26727 +
26728 + pdev->evtchn = INVALID_EVTCHN;
26729 + pdev->gnt_ref = INVALID_GRANT_REF;
26730 +
26731 + dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
26732 + pdev, pdev->sh_info);
26733 + out:
26734 + return pdev;
26735 +}
26736 +
26737 +static void free_pdev(struct pcifront_device *pdev)
26738 +{
26739 + dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
26740 +
26741 + pcifront_free_roots(pdev);
26742 +
26743 + if (pdev->evtchn != INVALID_EVTCHN)
26744 + xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
26745 +
26746 + if (pdev->gnt_ref != INVALID_GRANT_REF)
26747 + gnttab_end_foreign_access(pdev->gnt_ref,
26748 + (unsigned long)pdev->sh_info);
26749 +
26750 + pdev->xdev->dev.driver_data = NULL;
26751 +
26752 + kfree(pdev);
26753 +}
26754 +
26755 +static int pcifront_publish_info(struct pcifront_device *pdev)
26756 +{
26757 + int err = 0;
26758 + struct xenbus_transaction trans;
26759 +
26760 + err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
26761 + if (err < 0)
26762 + goto out;
26763 +
26764 + pdev->gnt_ref = err;
26765 +
26766 + err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
26767 + if (err)
26768 + goto out;
26769 +
26770 + do_publish:
26771 + err = xenbus_transaction_start(&trans);
26772 + if (err) {
26773 + xenbus_dev_fatal(pdev->xdev, err,
26774 + "Error writing configuration for backend "
26775 + "(start transaction)");
26776 + goto out;
26777 + }
26778 +
26779 + err = xenbus_printf(trans, pdev->xdev->nodename,
26780 + "pci-op-ref", "%u", pdev->gnt_ref);
26781 + if (!err)
26782 + err = xenbus_printf(trans, pdev->xdev->nodename,
26783 + "event-channel", "%u", pdev->evtchn);
26784 + if (!err)
26785 + err = xenbus_printf(trans, pdev->xdev->nodename,
26786 + "magic", XEN_PCI_MAGIC);
26787 +
26788 + if (err) {
26789 + xenbus_transaction_end(trans, 1);
26790 + xenbus_dev_fatal(pdev->xdev, err,
26791 + "Error writing configuration for backend");
26792 + goto out;
26793 + } else {
26794 + err = xenbus_transaction_end(trans, 0);
26795 + if (err == -EAGAIN)
26796 + goto do_publish;
26797 + else if (err) {
26798 + xenbus_dev_fatal(pdev->xdev, err,
26799 + "Error completing transaction "
26800 + "for backend");
26801 + goto out;
26802 + }
26803 + }
26804 +
26805 + xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
26806 +
26807 + dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
26808 +
26809 + out:
26810 + return err;
26811 +}
26812 +
26813 +static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
26814 +{
26815 + int err = -EFAULT;
26816 + int i, num_roots, len;
26817 + char str[64];
26818 + unsigned int domain, bus;
26819 +
26820 + spin_lock(&pdev->dev_lock);
26821 +
26822 + /* Only connect once */
26823 + if (xenbus_read_driver_state(pdev->xdev->nodename) !=
26824 + XenbusStateInitialised)
26825 + goto out;
26826 +
26827 + err = pcifront_connect(pdev);
26828 + if (err) {
26829 + xenbus_dev_fatal(pdev->xdev, err,
26830 + "Error connecting PCI Frontend");
26831 + goto out;
26832 + }
26833 +
26834 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26835 + "root_num", "%d", &num_roots);
26836 + if (err == -ENOENT) {
26837 + xenbus_dev_error(pdev->xdev, err,
26838 + "No PCI Roots found, trying 0000:00");
26839 + err = pcifront_scan_root(pdev, 0, 0);
26840 + num_roots = 0;
26841 + } else if (err != 1) {
26842 + if (err == 0)
26843 + err = -EINVAL;
26844 + xenbus_dev_fatal(pdev->xdev, err,
26845 + "Error reading number of PCI roots");
26846 + goto out;
26847 + }
26848 +
26849 + for (i = 0; i < num_roots; i++) {
26850 + len = snprintf(str, sizeof(str), "root-%d", i);
26851 + if (unlikely(len >= (sizeof(str) - 1))) {
26852 + err = -ENOMEM;
26853 + goto out;
26854 + }
26855 +
26856 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
26857 + "%x:%x", &domain, &bus);
26858 + if (err != 2) {
26859 + if (err >= 0)
26860 + err = -EINVAL;
26861 + xenbus_dev_fatal(pdev->xdev, err,
26862 + "Error reading PCI root %d", i);
26863 + goto out;
26864 + }
26865 +
26866 + err = pcifront_scan_root(pdev, domain, bus);
26867 + if (err) {
26868 + xenbus_dev_fatal(pdev->xdev, err,
26869 + "Error scanning PCI root %04x:%02x",
26870 + domain, bus);
26871 + goto out;
26872 + }
26873 + }
26874 +
26875 + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
26876 + if (err)
26877 + goto out;
26878 +
26879 + out:
26880 + spin_unlock(&pdev->dev_lock);
26881 + return err;
26882 +}
26883 +
26884 +static int pcifront_try_disconnect(struct pcifront_device *pdev)
26885 +{
26886 + int err = 0;
26887 + enum xenbus_state prev_state;
26888 +
26889 + spin_lock(&pdev->dev_lock);
26890 +
26891 + prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
26892 +
26893 + if (prev_state >= XenbusStateClosing)
26894 + goto out;
26895 +
26896 + if(prev_state == XenbusStateConnected) {
26897 + pcifront_free_roots(pdev);
26898 + pcifront_disconnect(pdev);
26899 + }
26900 +
26901 + err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
26902 +
26903 + out:
26904 + spin_unlock(&pdev->dev_lock);
26905 +
26906 + return err;
26907 +}
26908 +
26909 +static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
26910 +{
26911 + int err = -EFAULT;
26912 + int i, num_roots, len;
26913 + unsigned int domain, bus;
26914 + char str[64];
26915 +
26916 + spin_lock(&pdev->dev_lock);
26917 +
26918 + if (xenbus_read_driver_state(pdev->xdev->nodename) !=
26919 + XenbusStateReconfiguring)
26920 + goto out;
26921 +
26922 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
26923 + "root_num", "%d", &num_roots);
26924 + if (err == -ENOENT) {
26925 + xenbus_dev_error(pdev->xdev, err,
26926 + "No PCI Roots found, trying 0000:00");
26927 + err = pcifront_rescan_root(pdev, 0, 0);
26928 + num_roots = 0;
26929 + } else if (err != 1) {
26930 + if (err == 0)
26931 + err = -EINVAL;
26932 + xenbus_dev_fatal(pdev->xdev, err,
26933 + "Error reading number of PCI roots");
26934 + goto out;
26935 + }
26936 +
26937 + for (i = 0; i < num_roots; i++) {
26938 + len = snprintf(str, sizeof(str), "root-%d", i);
26939 + if (unlikely(len >= (sizeof(str) - 1))) {
26940 + err = -ENOMEM;
26941 + goto out;
26942 + }
26943 +
26944 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
26945 + "%x:%x", &domain, &bus);
26946 + if (err != 2) {
26947 + if (err >= 0)
26948 + err = -EINVAL;
26949 + xenbus_dev_fatal(pdev->xdev, err,
26950 + "Error reading PCI root %d", i);
26951 + goto out;
26952 + }
26953 +
26954 + err = pcifront_rescan_root(pdev, domain, bus);
26955 + if (err) {
26956 + xenbus_dev_fatal(pdev->xdev, err,
26957 + "Error scanning PCI root %04x:%02x",
26958 + domain, bus);
26959 + goto out;
26960 + }
26961 + }
26962 +
26963 + xenbus_switch_state(pdev->xdev, XenbusStateConnected);
26964 +
26965 + out:
26966 + spin_unlock(&pdev->dev_lock);
26967 + return err;
26968 +}
26969 +
26970 +static int pcifront_detach_devices(struct pcifront_device *pdev)
26971 +{
26972 + int err = 0;
26973 + int i, num_devs;
26974 + unsigned int domain, bus, slot, func;
26975 + struct pci_bus *pci_bus;
26976 + struct pci_dev *pci_dev;
26977 + char str[64];
26978 +
26979 + spin_lock(&pdev->dev_lock);
26980 +
26981 + if (xenbus_read_driver_state(pdev->xdev->nodename) !=
26982 + XenbusStateConnected)
26983 + goto out;
26984 +
26985 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
26986 + &num_devs);
26987 + if (err != 1) {
26988 + if (err >= 0)
26989 + err = -EINVAL;
26990 + xenbus_dev_fatal(pdev->xdev, err,
26991 + "Error reading number of PCI devices");
26992 + goto out;
26993 + }
26994 +
26995 + /* Find devices being detached and remove them. */
26996 + for (i = 0; i < num_devs; i++) {
26997 + int l, state;
26998 + l = snprintf(str, sizeof(str), "state-%d", i);
26999 + if (unlikely(l >= (sizeof(str) - 1))) {
27000 + err = -ENOMEM;
27001 + goto out;
27002 + }
27003 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
27004 + &state);
27005 + if (err != 1)
27006 + state = XenbusStateUnknown;
27007 +
27008 + if (state != XenbusStateClosing)
27009 + continue;
27010 +
27011 + /* Remove device. */
27012 + l = snprintf(str, sizeof(str), "vdev-%d", i);
27013 + if (unlikely(l >= (sizeof(str) - 1))) {
27014 + err = -ENOMEM;
27015 + goto out;
27016 + }
27017 + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
27018 + "%x:%x:%x.%x", &domain, &bus, &slot, &func);
27019 + if (err != 4) {
27020 + if (err >= 0)
27021 + err = -EINVAL;
27022 + xenbus_dev_fatal(pdev->xdev, err,
27023 + "Error reading PCI device %d", i);
27024 + goto out;
27025 + }
27026 +
27027 + pci_bus = pci_find_bus(domain, bus);
27028 + if(!pci_bus) {
27029 + dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
27030 + domain, bus);
27031 + continue;
27032 + }
27033 + pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
27034 + if(!pci_dev) {
27035 + dev_dbg(&pdev->xdev->dev,
27036 + "Cannot get PCI device %04x:%02x:%02x.%02x\n",
27037 + domain, bus, slot, func);
27038 + continue;
27039 + }
27040 + pci_remove_bus_device(pci_dev);
27041 + pci_dev_put(pci_dev);
27042 +
27043 + dev_dbg(&pdev->xdev->dev,
27044 + "PCI device %04x:%02x:%02x.%02x removed.\n",
27045 + domain, bus, slot, func);
27046 + }
27047 +
27048 + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
27049 +
27050 + out:
27051 + spin_unlock(&pdev->dev_lock);
27052 + return err;
27053 +}
27054 +
27055 +static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
27056 + enum xenbus_state be_state)
27057 +{
27058 + struct pcifront_device *pdev = xdev->dev.driver_data;
27059 +
27060 + switch (be_state) {
27061 + case XenbusStateUnknown:
27062 + case XenbusStateInitialising:
27063 + case XenbusStateInitWait:
27064 + case XenbusStateInitialised:
27065 + case XenbusStateClosed:
27066 + break;
27067 +
27068 + case XenbusStateConnected:
27069 + pcifront_try_connect(pdev);
27070 + break;
27071 +
27072 + case XenbusStateClosing:
27073 + dev_warn(&xdev->dev, "backend going away!\n");
27074 + pcifront_try_disconnect(pdev);
27075 + break;
27076 +
27077 + case XenbusStateReconfiguring:
27078 + pcifront_detach_devices(pdev);
27079 + break;
27080 +
27081 + case XenbusStateReconfigured:
27082 + pcifront_attach_devices(pdev);
27083 + break;
27084 + }
27085 +}
27086 +
27087 +static int pcifront_xenbus_probe(struct xenbus_device *xdev,
27088 + const struct xenbus_device_id *id)
27089 +{
27090 + int err = 0;
27091 + struct pcifront_device *pdev = alloc_pdev(xdev);
27092 +
27093 + if (pdev == NULL) {
27094 + err = -ENOMEM;
27095 + xenbus_dev_fatal(xdev, err,
27096 + "Error allocating pcifront_device struct");
27097 + goto out;
27098 + }
27099 +
27100 + err = pcifront_publish_info(pdev);
27101 +
27102 + out:
27103 + return err;
27104 +}
27105 +
27106 +static int pcifront_xenbus_remove(struct xenbus_device *xdev)
27107 +{
27108 + if (xdev->dev.driver_data)
27109 + free_pdev(xdev->dev.driver_data);
27110 +
27111 + return 0;
27112 +}
27113 +
27114 +static const struct xenbus_device_id xenpci_ids[] = {
27115 + {"pci"},
27116 + {{0}},
27117 +};
27118 +MODULE_ALIAS("xen:pci");
27119 +
27120 +static struct xenbus_driver xenbus_pcifront_driver = {
27121 + .name = "pcifront",
27122 + .owner = THIS_MODULE,
27123 + .ids = xenpci_ids,
27124 + .probe = pcifront_xenbus_probe,
27125 + .remove = pcifront_xenbus_remove,
27126 + .otherend_changed = pcifront_backend_changed,
27127 +};
27128 +
27129 +static int __init pcifront_init(void)
27130 +{
27131 + if (!is_running_on_xen())
27132 + return -ENODEV;
27133 +
27134 + return xenbus_register_frontend(&xenbus_pcifront_driver);
27135 +}
27136 +
27137 +/* Initialize after the Xen PCI Frontend Stub is initialized */
27138 +subsys_initcall(pcifront_init);
27139 Index: head-2008-11-25/drivers/xen/privcmd/Makefile
27140 ===================================================================
27141 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
27142 +++ head-2008-11-25/drivers/xen/privcmd/Makefile 2007-07-10 09:42:30.000000000 +0200
27143 @@ -0,0 +1,3 @@
27144 +
27145 +obj-y += privcmd.o
27146 +obj-$(CONFIG_COMPAT) += compat_privcmd.o
27147 Index: head-2008-11-25/drivers/xen/privcmd/compat_privcmd.c
27148 ===================================================================
27149 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
27150 +++ head-2008-11-25/drivers/xen/privcmd/compat_privcmd.c 2007-07-10 09:42:30.000000000 +0200
27151 @@ -0,0 +1,73 @@
27152 +/*
27153 + * This program is free software; you can redistribute it and/or modify
27154 + * it under the terms of the GNU General Public License as published by
27155 + * the Free Software Foundation; either version 2 of the License, or
27156 + * (at your option) any later version.
27157 + *
27158 + * This program is distributed in the hope that it will be useful,
27159 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
27160 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27161 + * GNU General Public License for more details.
27162 + *
27163 + * You should have received a copy of the GNU General Public License
27164 + * along with this program; if not, write to the Free Software
27165 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27166 + *
27167 + * Copyright (C) IBM Corp. 2006
27168 + *
27169 + * Authors: Jimi Xenidis <jimix@watson.ibm.com>
27170 + */
27171 +
27172 +#include <linux/config.h>
27173 +#include <linux/compat.h>
27174 +#include <linux/ioctl.h>
27175 +#include <linux/syscalls.h>
27176 +#include <asm/hypervisor.h>
27177 +#include <asm/uaccess.h>
27178 +#include <xen/public/privcmd.h>
27179 +#include <xen/compat_ioctl.h>
27180 +
27181 +int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg)
27182 +{
27183 + int ret;
27184 +
27185 + switch (cmd) {
27186 + case IOCTL_PRIVCMD_MMAP_32: {
27187 + struct privcmd_mmap *p;
27188 + struct privcmd_mmap_32 *p32;
27189 + struct privcmd_mmap_32 n32;
27190 +
27191 + p32 = compat_ptr(arg);
27192 + p = compat_alloc_user_space(sizeof(*p));
27193 + if (copy_from_user(&n32, p32, sizeof(n32)) ||
27194 + put_user(n32.num, &p->num) ||
27195 + put_user(n32.dom, &p->dom) ||
27196 + put_user(compat_ptr(n32.entry), &p->entry))
27197 + return -EFAULT;
27198 +
27199 + ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAP, (unsigned long)p);
27200 + }
27201 + break;
27202 + case IOCTL_PRIVCMD_MMAPBATCH_32: {
27203 + struct privcmd_mmapbatch *p;
27204 + struct privcmd_mmapbatch_32 *p32;
27205 + struct privcmd_mmapbatch_32 n32;
27206 +
27207 + p32 = compat_ptr(arg);
27208 + p = compat_alloc_user_space(sizeof(*p));
27209 + if (copy_from_user(&n32, p32, sizeof(n32)) ||
27210 + put_user(n32.num, &p->num) ||
27211 + put_user(n32.dom, &p->dom) ||
27212 + put_user(n32.addr, &p->addr) ||
27213 + put_user(compat_ptr(n32.arr), &p->arr))
27214 + return -EFAULT;
27215 +
27216 + ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, (unsigned long)p);
27217 + }
27218 + break;
27219 + default:
27220 + ret = -EINVAL;
27221 + break;
27222 + }
27223 + return ret;
27224 +}
27225 Index: head-2008-11-25/drivers/xen/privcmd/privcmd.c
27226 ===================================================================
27227 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
27228 +++ head-2008-11-25/drivers/xen/privcmd/privcmd.c 2008-07-21 11:00:33.000000000 +0200
27229 @@ -0,0 +1,356 @@
27230 +/******************************************************************************
27231 + * privcmd.c
27232 + *
27233 + * Interface to privileged domain-0 commands.
27234 + *
27235 + * Copyright (c) 2002-2004, K A Fraser, B Dragovic
27236 + */
27237 +
27238 +#include <linux/kernel.h>
27239 +#include <linux/sched.h>
27240 +#include <linux/slab.h>
27241 +#include <linux/string.h>
27242 +#include <linux/errno.h>
27243 +#include <linux/mm.h>
27244 +#include <linux/mman.h>
27245 +#include <linux/swap.h>
27246 +#include <linux/smp_lock.h>
27247 +#include <linux/highmem.h>
27248 +#include <linux/pagemap.h>
27249 +#include <linux/seq_file.h>
27250 +#include <asm/hypervisor.h>
27251 +
27252 +#include <asm/pgalloc.h>
27253 +#include <asm/pgtable.h>
27254 +#include <asm/uaccess.h>
27255 +#include <asm/tlb.h>
27256 +#include <asm/hypervisor.h>
27257 +#include <xen/public/privcmd.h>
27258 +#include <xen/interface/xen.h>
27259 +#include <xen/xen_proc.h>
27260 +#include <xen/features.h>
27261 +
27262 +static struct proc_dir_entry *privcmd_intf;
27263 +static struct proc_dir_entry *capabilities_intf;
27264 +
27265 +#ifndef HAVE_ARCH_PRIVCMD_MMAP
27266 +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
27267 +#endif
27268 +
27269 +static long privcmd_ioctl(struct file *file,
27270 + unsigned int cmd, unsigned long data)
27271 +{
27272 + int ret = -ENOSYS;
27273 + void __user *udata = (void __user *) data;
27274 +
27275 + switch (cmd) {
27276 + case IOCTL_PRIVCMD_HYPERCALL: {
27277 + privcmd_hypercall_t hypercall;
27278 +
27279 + if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
27280 + return -EFAULT;
27281 +
27282 +#if defined(__i386__)
27283 + if (hypercall.op >= (PAGE_SIZE >> 5))
27284 + break;
27285 + __asm__ __volatile__ (
27286 + "pushl %%ebx; pushl %%ecx; pushl %%edx; "
27287 + "pushl %%esi; pushl %%edi; "
27288 + "movl 8(%%eax),%%ebx ;"
27289 + "movl 16(%%eax),%%ecx ;"
27290 + "movl 24(%%eax),%%edx ;"
27291 + "movl 32(%%eax),%%esi ;"
27292 + "movl 40(%%eax),%%edi ;"
27293 + "movl (%%eax),%%eax ;"
27294 + "shll $5,%%eax ;"
27295 + "addl $hypercall_page,%%eax ;"
27296 + "call *%%eax ;"
27297 + "popl %%edi; popl %%esi; popl %%edx; "
27298 + "popl %%ecx; popl %%ebx"
27299 + : "=a" (ret) : "0" (&hypercall) : "memory" );
27300 +#elif defined (__x86_64__)
27301 + if (hypercall.op < (PAGE_SIZE >> 5)) {
27302 + long ign1, ign2, ign3;
27303 + __asm__ __volatile__ (
27304 + "movq %8,%%r10; movq %9,%%r8;"
27305 + "shll $5,%%eax ;"
27306 + "addq $hypercall_page,%%rax ;"
27307 + "call *%%rax"
27308 + : "=a" (ret), "=D" (ign1),
27309 + "=S" (ign2), "=d" (ign3)
27310 + : "0" ((unsigned int)hypercall.op),
27311 + "1" (hypercall.arg[0]),
27312 + "2" (hypercall.arg[1]),
27313 + "3" (hypercall.arg[2]),
27314 + "g" (hypercall.arg[3]),
27315 + "g" (hypercall.arg[4])
27316 + : "r8", "r10", "memory" );
27317 + }
27318 +#else
27319 + ret = privcmd_hypercall(&hypercall);
27320 +#endif
27321 + }
27322 + break;
27323 +
27324 + case IOCTL_PRIVCMD_MMAP: {
27325 +#define MMAP_NR_PER_PAGE (int)((PAGE_SIZE-sizeof(struct list_head))/sizeof(privcmd_mmap_entry_t))
27326 + privcmd_mmap_t mmapcmd;
27327 + privcmd_mmap_entry_t *msg;
27328 + privcmd_mmap_entry_t __user *p;
27329 + struct mm_struct *mm = current->mm;
27330 + struct vm_area_struct *vma;
27331 + unsigned long va;
27332 + int i, rc;
27333 + LIST_HEAD(pagelist);
27334 + struct list_head *l,*l2;
27335 +
27336 + if (!is_initial_xendomain())
27337 + return -EPERM;
27338 +
27339 + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
27340 + return -EFAULT;
27341 +
27342 + p = mmapcmd.entry;
27343 + for (i = 0; i < mmapcmd.num;) {
27344 + int nr = min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
27345 +
27346 + rc = -ENOMEM;
27347 + l = (struct list_head *) __get_free_page(GFP_KERNEL);
27348 + if (l == NULL)
27349 + goto mmap_out;
27350 +
27351 + INIT_LIST_HEAD(l);
27352 + list_add_tail(l, &pagelist);
27353 + msg = (privcmd_mmap_entry_t*)(l + 1);
27354 +
27355 + rc = -EFAULT;
27356 + if (copy_from_user(msg, p, nr*sizeof(*msg)))
27357 + goto mmap_out;
27358 + i += nr;
27359 + p += nr;
27360 + }
27361 +
27362 + l = pagelist.next;
27363 + msg = (privcmd_mmap_entry_t*)(l + 1);
27364 +
27365 + down_write(&mm->mmap_sem);
27366 +
27367 + vma = find_vma(mm, msg->va);
27368 + rc = -EINVAL;
27369 + if (!vma || (msg->va != vma->vm_start) ||
27370 + !privcmd_enforce_singleshot_mapping(vma))
27371 + goto mmap_out;
27372 +
27373 + va = vma->vm_start;
27374 +
27375 + i = 0;
27376 + list_for_each(l, &pagelist) {
27377 + int nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
27378 +
27379 + msg = (privcmd_mmap_entry_t*)(l + 1);
27380 + while (i<nr) {
27381 +
27382 + /* Do not allow range to wrap the address space. */
27383 + rc = -EINVAL;
27384 + if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
27385 + ((unsigned long)(msg->npages << PAGE_SHIFT) >= -va))
27386 + goto mmap_out;
27387 +
27388 + /* Range chunks must be contiguous in va space. */
27389 + if ((msg->va != va) ||
27390 + ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
27391 + goto mmap_out;
27392 +
27393 + if ((rc = direct_remap_pfn_range(
27394 + vma,
27395 + msg->va & PAGE_MASK,
27396 + msg->mfn,
27397 + msg->npages << PAGE_SHIFT,
27398 + vma->vm_page_prot,
27399 + mmapcmd.dom)) < 0)
27400 + goto mmap_out;
27401 +
27402 + va += msg->npages << PAGE_SHIFT;
27403 + msg++;
27404 + i++;
27405 + }
27406 + }
27407 +
27408 + rc = 0;
27409 +
27410 + mmap_out:
27411 + up_write(&mm->mmap_sem);
27412 + list_for_each_safe(l,l2,&pagelist)
27413 + free_page((unsigned long)l);
27414 + ret = rc;
27415 + }
27416 +#undef MMAP_NR_PER_PAGE
27417 + break;
27418 +
27419 + case IOCTL_PRIVCMD_MMAPBATCH: {
27420 +#define MMAPBATCH_NR_PER_PAGE (unsigned long)((PAGE_SIZE-sizeof(struct list_head))/sizeof(unsigned long))
27421 + privcmd_mmapbatch_t m;
27422 + struct mm_struct *mm = current->mm;
27423 + struct vm_area_struct *vma;
27424 + xen_pfn_t __user *p;
27425 + unsigned long addr, *mfn, nr_pages;
27426 + int i;
27427 + LIST_HEAD(pagelist);
27428 + struct list_head *l, *l2;
27429 +
27430 + if (!is_initial_xendomain())
27431 + return -EPERM;
27432 +
27433 + if (copy_from_user(&m, udata, sizeof(m)))
27434 + return -EFAULT;
27435 +
27436 + nr_pages = m.num;
27437 + if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
27438 + return -EINVAL;
27439 +
27440 + p = m.arr;
27441 + for (i=0; i<nr_pages; ) {
27442 + int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
27443 +
27444 + ret = -ENOMEM;
27445 + l = (struct list_head *)__get_free_page(GFP_KERNEL);
27446 + if (l == NULL)
27447 + goto mmapbatch_out;
27448 +
27449 + INIT_LIST_HEAD(l);
27450 + list_add_tail(l, &pagelist);
27451 +
27452 + mfn = (unsigned long*)(l + 1);
27453 + ret = -EFAULT;
27454 + if (copy_from_user(mfn, p, nr*sizeof(*mfn)))
27455 + goto mmapbatch_out;
27456 +
27457 + i += nr; p+= nr;
27458 + }
27459 +
27460 + down_write(&mm->mmap_sem);
27461 +
27462 + vma = find_vma(mm, m.addr);
27463 + ret = -EINVAL;
27464 + if (!vma ||
27465 + (m.addr != vma->vm_start) ||
27466 + ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
27467 + !privcmd_enforce_singleshot_mapping(vma)) {
27468 + up_write(&mm->mmap_sem);
27469 + goto mmapbatch_out;
27470 + }
27471 +
27472 + p = m.arr;
27473 + addr = m.addr;
27474 + i = 0;
27475 + ret = 0;
27476 + list_for_each(l, &pagelist) {
27477 + int nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
27478 + mfn = (unsigned long *)(l + 1);
27479 +
27480 + while (i<nr) {
27481 + if(direct_remap_pfn_range(vma, addr & PAGE_MASK,
27482 + *mfn, PAGE_SIZE,
27483 + vma->vm_page_prot, m.dom) < 0) {
27484 + *mfn |= 0xf0000000U;
27485 + ret++;
27486 + }
27487 + mfn++; i++; addr += PAGE_SIZE;
27488 + }
27489 + }
27490 +
27491 + up_write(&mm->mmap_sem);
27492 + if (ret > 0) {
27493 + p = m.arr;
27494 + i = 0;
27495 + ret = 0;
27496 + list_for_each(l, &pagelist) {
27497 + int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
27498 + mfn = (unsigned long *)(l + 1);
27499 + if (copy_to_user(p, mfn, nr*sizeof(*mfn)))
27500 + ret = -EFAULT;
27501 + i += nr; p += nr;
27502 + }
27503 + }
27504 + mmapbatch_out:
27505 + list_for_each_safe(l,l2,&pagelist)
27506 + free_page((unsigned long)l);
27507 +#undef MMAPBATCH_NR_PER_PAGE
27508 + }
27509 + break;
27510 +
27511 + default:
27512 + ret = -EINVAL;
27513 + break;
27514 + }
27515 +
27516 + return ret;
27517 +}
27518 +
27519 +#ifndef HAVE_ARCH_PRIVCMD_MMAP
27520 +static struct page *privcmd_nopage(struct vm_area_struct *vma,
27521 + unsigned long address,
27522 + int *type)
27523 +{
27524 + return NOPAGE_SIGBUS;
27525 +}
27526 +
27527 +static struct vm_operations_struct privcmd_vm_ops = {
27528 + .nopage = privcmd_nopage
27529 +};
27530 +
27531 +static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
27532 +{
27533 + /* Unsupported for auto-translate guests. */
27534 + if (xen_feature(XENFEAT_auto_translated_physmap))
27535 + return -ENOSYS;
27536 +
27537 + /* DONTCOPY is essential for Xen as copy_page_range is broken. */
27538 + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
27539 + vma->vm_ops = &privcmd_vm_ops;
27540 + vma->vm_private_data = NULL;
27541 +
27542 + return 0;
27543 +}
27544 +
27545 +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
27546 +{
27547 + return (xchg(&vma->vm_private_data, (void *)1) == NULL);
27548 +}
27549 +#endif
27550 +
27551 +static const struct file_operations privcmd_file_ops = {
27552 + .unlocked_ioctl = privcmd_ioctl,
27553 + .mmap = privcmd_mmap,
27554 +};
27555 +
27556 +static int capabilities_read(char *page, char **start, off_t off,
27557 + int count, int *eof, void *data)
27558 +{
27559 + int len = 0;
27560 + *page = 0;
27561 +
27562 + if (is_initial_xendomain())
27563 + len = sprintf( page, "control_d\n" );
27564 +
27565 + *eof = 1;
27566 + return len;
27567 +}
27568 +
27569 +static int __init privcmd_init(void)
27570 +{
27571 + if (!is_running_on_xen())
27572 + return -ENODEV;
27573 +
27574 + privcmd_intf = create_xen_proc_entry("privcmd", 0400);
27575 + if (privcmd_intf != NULL)
27576 + privcmd_intf->proc_fops = &privcmd_file_ops;
27577 +
27578 + capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
27579 + if (capabilities_intf != NULL)
27580 + capabilities_intf->read_proc = capabilities_read;
27581 +
27582 + return 0;
27583 +}
27584 +
27585 +__initcall(privcmd_init);
27586 Index: head-2008-11-25/drivers/xen/scsiback/Makefile
27587 ===================================================================
27588 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
27589 +++ head-2008-11-25/drivers/xen/scsiback/Makefile 2008-07-21 11:00:33.000000000 +0200
27590 @@ -0,0 +1,4 @@
27591 +obj-$(CONFIG_XEN_SCSI_BACKEND) := xen-scsibk.o
27592 +
27593 +xen-scsibk-y := interface.o scsiback.o xenbus.o translate.o emulate.o
27594 +
27595 Index: head-2008-11-25/drivers/xen/scsiback/common.h
27596 ===================================================================
27597 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
27598 +++ head-2008-11-25/drivers/xen/scsiback/common.h 2008-07-21 11:00:33.000000000 +0200
27599 @@ -0,0 +1,181 @@
27600 +/*
27601 + * Copyright (c) 2008, FUJITSU Limited
27602 + *
27603 + * Based on the blkback driver code.
27604 + *
27605 + * This program is free software; you can redistribute it and/or
27606 + * modify it under the terms of the GNU General Public License version 2
27607 + * as published by the Free Software Foundation; or, when distributed
27608 + * separately from the Linux kernel or incorporated into other
27609 + * software packages, subject to the following license:
27610 + *
27611 + * Permission is hereby granted, free of charge, to any person obtaining a copy
27612 + * of this source file (the "Software"), to deal in the Software without
27613 + * restriction, including without limitation the rights to use, copy, modify,
27614 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
27615 + * and to permit persons to whom the Software is furnished to do so, subject to
27616 + * the following conditions:
27617 + *
27618 + * The above copyright notice and this permission notice shall be included in
27619 + * all copies or substantial portions of the Software.
27620 + *
27621 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27622 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27623 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27624 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27625 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27626 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27627 + * IN THE SOFTWARE.
27628 + */
27629 +
27630 +#ifndef __SCSIIF__BACKEND__COMMON_H__
27631 +#define __SCSIIF__BACKEND__COMMON_H__
27632 +
27633 +#include <linux/version.h>
27634 +#include <linux/module.h>
27635 +#include <linux/interrupt.h>
27636 +#include <linux/slab.h>
27637 +#include <linux/vmalloc.h>
27638 +#include <linux/wait.h>
27639 +#include <linux/sched.h>
27640 +#include <linux/kthread.h>
27641 +#include <linux/blkdev.h>
27642 +#include <linux/list.h>
27643 +#include <linux/kthread.h>
27644 +#include <scsi/scsi.h>
27645 +#include <scsi/scsi_cmnd.h>
27646 +#include <scsi/scsi_host.h>
27647 +#include <scsi/scsi_device.h>
27648 +#include <scsi/scsi_dbg.h>
27649 +#include <scsi/scsi_eh.h>
27650 +#include <asm/io.h>
27651 +#include <asm/setup.h>
27652 +#include <asm/pgalloc.h>
27653 +#include <asm/delay.h>
27654 +#include <xen/evtchn.h>
27655 +#include <asm/hypervisor.h>
27656 +#include <xen/gnttab.h>
27657 +#include <xen/driver_util.h>
27658 +#include <xen/xenbus.h>
27659 +#include <xen/interface/io/ring.h>
27660 +#include <xen/interface/grant_table.h>
27661 +#include <xen/interface/io/vscsiif.h>
27662 +
27663 +
27664 +#define DPRINTK(_f, _a...) \
27665 + pr_debug("(file=%s, line=%d) " _f, \
27666 + __FILE__ , __LINE__ , ## _a )
27667 +
27668 +struct ids_tuple {
27669 + unsigned int hst; /* host */
27670 + unsigned int chn; /* channel */
27671 + unsigned int tgt; /* target */
27672 + unsigned int lun; /* LUN */
27673 +};
27674 +
27675 +struct v2p_entry {
27676 + struct ids_tuple v; /* translate from */
27677 + struct scsi_device *sdev; /* translate to */
27678 + struct list_head l;
27679 +};
27680 +
27681 +struct vscsibk_info {
27682 + struct xenbus_device *dev;
27683 +
27684 + domid_t domid;
27685 + unsigned int evtchn;
27686 + unsigned int irq;
27687 +
27688 + struct vscsiif_back_ring ring;
27689 + struct vm_struct *ring_area;
27690 + grant_handle_t shmem_handle;
27691 + grant_ref_t shmem_ref;
27692 +
27693 + spinlock_t ring_lock;
27694 + atomic_t nr_unreplied_reqs;
27695 +
27696 + spinlock_t v2p_lock;
27697 + struct list_head v2p_entry_lists;
27698 +
27699 + struct task_struct *kthread;
27700 + wait_queue_head_t waiting_to_free;
27701 + wait_queue_head_t wq;
27702 + unsigned int waiting_reqs;
27703 + struct page **mmap_pages;
27704 +
27705 +};
27706 +
27707 +typedef struct {
27708 + unsigned char act;
27709 + struct vscsibk_info *info;
27710 + struct scsi_device *sdev;
27711 +
27712 + uint16_t rqid;
27713 +
27714 + uint8_t nr_segments;
27715 + uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
27716 + uint8_t cmd_len;
27717 +
27718 + uint8_t sc_data_direction;
27719 + uint16_t timeout_per_command;
27720 +
27721 + uint32_t request_bufflen;
27722 + struct scatterlist *sgl;
27723 + grant_ref_t gref[VSCSIIF_SG_TABLESIZE];
27724 +
27725 + int32_t rslt;
27726 + uint32_t resid;
27727 + uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
27728 +
27729 + struct list_head free_list;
27730 +} pending_req_t;
27731 +
27732 +
27733 +
27734 +#define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs))
27735 +#define scsiback_put(_b) \
27736 + do { \
27737 + if (atomic_dec_and_test(&(_b)->nr_unreplied_reqs)) \
27738 + wake_up(&(_b)->waiting_to_free);\
27739 + } while (0)
27740 +
27741 +#define VSCSIIF_TIMEOUT (900*HZ)
27742 +
27743 +
27744 +irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
27745 +int scsiback_init_sring(struct vscsibk_info *info,
27746 + unsigned long ring_ref, unsigned int evtchn);
27747 +int scsiback_schedule(void *data);
27748 +
27749 +
27750 +struct vscsibk_info *vscsibk_info_alloc(domid_t domid);
27751 +void scsiback_free(struct vscsibk_info *info);
27752 +void scsiback_disconnect(struct vscsibk_info *info);
27753 +int __init scsiback_interface_init(void);
27754 +void scsiback_interface_exit(void);
27755 +int scsiback_xenbus_init(void);
27756 +void scsiback_xenbus_unregister(void);
27757 +
27758 +void scsiback_init_translation_table(struct vscsibk_info *info);
27759 +
27760 +int scsiback_add_translation_entry(struct vscsibk_info *info,
27761 + struct scsi_device *sdev, struct ids_tuple *v);
27762 +
27763 +int scsiback_del_translation_entry(struct vscsibk_info *info,
27764 + struct ids_tuple *v);
27765 +struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
27766 + struct ids_tuple *v);
27767 +void scsiback_release_translation_entry(struct vscsibk_info *info);
27768 +
27769 +
27770 +void scsiback_cmd_exec(pending_req_t *pending_req);
27771 +void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
27772 + uint32_t resid, pending_req_t *pending_req);
27773 +void scsiback_fast_flush_area(pending_req_t *req);
27774 +
27775 +void scsiback_rsp_emulation(pending_req_t *pending_req);
27776 +void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req);
27777 +void scsiback_emulation_init(void);
27778 +
27779 +
27780 +#endif /* __SCSIIF__BACKEND__COMMON_H__ */
27781 Index: head-2008-11-25/drivers/xen/scsiback/emulate.c
27782 ===================================================================
27783 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
27784 +++ head-2008-11-25/drivers/xen/scsiback/emulate.c 2008-08-07 12:44:36.000000000 +0200
27785 @@ -0,0 +1,454 @@
27786 +/*
27787 + * Xen SCSI backend driver
27788 + *
27789 + * Copyright (c) 2008, FUJITSU Limited
27790 + *
27791 + * This program is free software; you can redistribute it and/or
27792 + * modify it under the terms of the GNU General Public License version 2
27793 + * as published by the Free Software Foundation; or, when distributed
27794 + * separately from the Linux kernel or incorporated into other
27795 + * software packages, subject to the following license:
27796 + *
27797 + * Permission is hereby granted, free of charge, to any person obtaining a copy
27798 + * of this source file (the "Software"), to deal in the Software without
27799 + * restriction, including without limitation the rights to use, copy, modify,
27800 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
27801 + * and to permit persons to whom the Software is furnished to do so, subject to
27802 + * the following conditions:
27803 + *
27804 + * The above copyright notice and this permission notice shall be included in
27805 + * all copies or substantial portions of the Software.
27806 + *
27807 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27808 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27809 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27810 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27811 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27812 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27813 + * IN THE SOFTWARE.
27814 + */
27815 +
27816 +#include <scsi/scsi.h>
27817 +#include <scsi/scsi_cmnd.h>
27818 +#include <scsi/scsi_device.h>
27819 +#include "common.h"
27820 +
27821 +/* Following SCSI commands are not defined in scsi/scsi.h */
27822 +#define EXTENDED_COPY 0x83 /* EXTENDED COPY command */
27823 +#define REPORT_ALIASES 0xa3 /* REPORT ALIASES command */
27824 +#define CHANGE_ALIASES 0xa4 /* CHANGE ALIASES command */
27825 +#define SET_PRIORITY 0xa4 /* SET PRIORITY command */
27826 +
27827 +
27828 +/*
27829 + The bitmap in order to control emulation.
27830 + (Bit 3 to 7 are reserved for future use.)
27831 +*/
27832 +#define VSCSIIF_NEED_CMD_EXEC 0x01 /* If this bit is set, cmd exec */
27833 + /* is required. */
27834 +#define VSCSIIF_NEED_EMULATE_REQBUF 0x02 /* If this bit is set, need */
27835 + /* emulation reqest buff before */
27836 + /* cmd exec. */
27837 +#define VSCSIIF_NEED_EMULATE_RSPBUF 0x04 /* If this bit is set, need */
27838 + /* emulation resp buff after */
27839 + /* cmd exec. */
27840 +
27841 +/* Additional Sense Code (ASC) used */
27842 +#define NO_ADDITIONAL_SENSE 0x0
27843 +#define LOGICAL_UNIT_NOT_READY 0x4
27844 +#define UNRECOVERED_READ_ERR 0x11
27845 +#define PARAMETER_LIST_LENGTH_ERR 0x1a
27846 +#define INVALID_OPCODE 0x20
27847 +#define ADDR_OUT_OF_RANGE 0x21
27848 +#define INVALID_FIELD_IN_CDB 0x24
27849 +#define INVALID_FIELD_IN_PARAM_LIST 0x26
27850 +#define POWERON_RESET 0x29
27851 +#define SAVING_PARAMS_UNSUP 0x39
27852 +#define THRESHOLD_EXCEEDED 0x5d
27853 +#define LOW_POWER_COND_ON 0x5e
27854 +
27855 +
27856 +
27857 +/* Number os SCSI op_code */
27858 +#define VSCSI_MAX_SCSI_OP_CODE 256
27859 +static unsigned char bitmap[VSCSI_MAX_SCSI_OP_CODE];
27860 +
27861 +
27862 +
27863 +/*
27864 + Emulation routines for each SCSI op_code.
27865 +*/
27866 +static void (*pre_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
27867 +static void (*post_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
27868 +
27869 +
27870 +static const int check_condition_result =
27871 + (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
27872 +
27873 +static void scsiback_mk_sense_buffer(uint8_t *data, uint8_t key,
27874 + uint8_t asc, uint8_t asq)
27875 +{
27876 + data[0] = 0x70; /* fixed, current */
27877 + data[2] = key;
27878 + data[7] = 0xa; /* implies 18 byte sense buffer */
27879 + data[12] = asc;
27880 + data[13] = asq;
27881 +}
27882 +
27883 +static void resp_not_supported_cmd(pending_req_t *pending_req, void *data)
27884 +{
27885 + scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
27886 + INVALID_OPCODE, 0);
27887 + pending_req->resid = 0;
27888 + pending_req->rslt = check_condition_result;
27889 +}
27890 +
27891 +
27892 +static int __copy_to_sg(struct scatterlist *sg, unsigned int nr_sg,
27893 + void *buf, unsigned int buflen)
27894 +{
27895 + void *from = buf;
27896 + void *to;
27897 + unsigned int from_rest = buflen;
27898 + unsigned int to_capa;
27899 + unsigned int copy_size = 0;
27900 + unsigned int i;
27901 + unsigned long pfn;
27902 +
27903 + for (i = 0; i < nr_sg; i++) {
27904 + if (sg->page == NULL) {
27905 + printk(KERN_WARNING "%s: inconsistent length field in "
27906 + "scatterlist\n", __FUNCTION__);
27907 + return -ENOMEM;
27908 + }
27909 +
27910 + to_capa = sg->length;
27911 + copy_size = min_t(unsigned int, to_capa, from_rest);
27912 +
27913 + pfn = page_to_pfn(sg->page);
27914 + to = pfn_to_kaddr(pfn) + (sg->offset);
27915 + memcpy(to, from, copy_size);
27916 +
27917 + from_rest -= copy_size;
27918 + if (from_rest == 0) {
27919 + return 0;
27920 + }
27921 +
27922 + sg++;
27923 + from += copy_size;
27924 + }
27925 +
27926 + printk(KERN_WARNING "%s: no space in scatterlist\n",
27927 + __FUNCTION__);
27928 + return -ENOMEM;
27929 +}
27930 +
27931 +static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg,
27932 + void *buf, unsigned int buflen)
27933 +{
27934 + void *from;
27935 + void *to = buf;
27936 + unsigned int from_rest;
27937 + unsigned int to_capa = buflen;
27938 + unsigned int copy_size;
27939 + unsigned int i;
27940 + unsigned long pfn;
27941 +
27942 + for (i = 0; i < nr_sg; i++) {
27943 + if (sg->page == NULL) {
27944 + printk(KERN_WARNING "%s: inconsistent length field in "
27945 + "scatterlist\n", __FUNCTION__);
27946 + return -ENOMEM;
27947 + }
27948 +
27949 + from_rest = sg->length;
27950 + if ((from_rest > 0) && (to_capa < from_rest)) {
27951 + printk(KERN_WARNING
27952 + "%s: no space in destination buffer\n",
27953 + __FUNCTION__);
27954 + return -ENOMEM;
27955 + }
27956 + copy_size = from_rest;
27957 +
27958 + pfn = page_to_pfn(sg->page);
27959 + from = pfn_to_kaddr(pfn) + (sg->offset);
27960 + memcpy(to, from, copy_size);
27961 +
27962 + to_capa -= copy_size;
27963 +
27964 + sg++;
27965 + to += copy_size;
27966 + }
27967 +
27968 + return 0;
27969 +}
27970 +
27971 +static int __nr_luns_under_host(struct vscsibk_info *info)
27972 +{
27973 + struct v2p_entry *entry;
27974 + struct list_head *head = &(info->v2p_entry_lists);
27975 + unsigned long flags;
27976 + int lun_cnt = 0;
27977 +
27978 + spin_lock_irqsave(&info->v2p_lock, flags);
27979 + list_for_each_entry(entry, head, l) {
27980 + lun_cnt++;
27981 + }
27982 + spin_unlock_irqrestore(&info->v2p_lock, flags);
27983 +
27984 + return (lun_cnt);
27985 +}
27986 +
27987 +
27988 +/* REPORT LUNS Define*/
27989 +#define VSCSI_REPORT_LUNS_HEADER 8
27990 +#define VSCSI_REPORT_LUNS_RETRY 3
27991 +
27992 +/* quoted scsi_debug.c/resp_report_luns() */
27993 +static void __report_luns(pending_req_t *pending_req, void *data)
27994 +{
27995 + struct vscsibk_info *info = pending_req->info;
27996 + unsigned int channel = pending_req->sdev->channel;
27997 + unsigned int target = pending_req->sdev->id;
27998 + unsigned int nr_seg = pending_req->nr_segments;
27999 + unsigned char *cmd = (unsigned char *)pending_req->cmnd;
28000 +
28001 + unsigned char *buff = NULL;
28002 + unsigned char alloc_len;
28003 + unsigned int alloc_luns = 0;
28004 + unsigned int req_bufflen = 0;
28005 + unsigned int actual_len = 0;
28006 + unsigned int retry_cnt = 0;
28007 + int select_report = (int)cmd[2];
28008 + int i, lun_cnt = 0, lun, upper, err = 0;
28009 +
28010 + struct v2p_entry *entry;
28011 + struct list_head *head = &(info->v2p_entry_lists);
28012 + unsigned long flags;
28013 +
28014 + struct scsi_lun *one_lun;
28015 +
28016 + req_bufflen = cmd[9] + (cmd[8] << 8) + (cmd[7] << 16) + (cmd[6] << 24);
28017 + if ((req_bufflen < 4) || (select_report != 0))
28018 + goto fail;
28019 +
28020 + alloc_luns = __nr_luns_under_host(info);
28021 + alloc_len = sizeof(struct scsi_lun) * alloc_luns
28022 + + VSCSI_REPORT_LUNS_HEADER;
28023 +retry:
28024 + if ((buff = kmalloc(alloc_len, GFP_KERNEL)) == NULL) {
28025 + printk(KERN_ERR "scsiback:%s kmalloc err\n", __FUNCTION__);
28026 + goto fail;
28027 + }
28028 +
28029 + memset(buff, 0, alloc_len);
28030 +
28031 + one_lun = (struct scsi_lun *) &buff[8];
28032 + spin_lock_irqsave(&info->v2p_lock, flags);
28033 + list_for_each_entry(entry, head, l) {
28034 + if ((entry->v.chn == channel) &&
28035 + (entry->v.tgt == target)) {
28036 +
28037 + /* check overflow */
28038 + if (lun_cnt >= alloc_luns) {
28039 + spin_unlock_irqrestore(&info->v2p_lock,
28040 + flags);
28041 +
28042 + if (retry_cnt < VSCSI_REPORT_LUNS_RETRY) {
28043 + retry_cnt++;
28044 + if (buff)
28045 + kfree(buff);
28046 + goto retry;
28047 + }
28048 +
28049 + goto fail;
28050 + }
28051 +
28052 + lun = entry->v.lun;
28053 + upper = (lun >> 8) & 0x3f;
28054 + if (upper)
28055 + one_lun[lun_cnt].scsi_lun[0] = upper;
28056 + one_lun[lun_cnt].scsi_lun[1] = lun & 0xff;
28057 + lun_cnt++;
28058 + }
28059 + }
28060 +
28061 + spin_unlock_irqrestore(&info->v2p_lock, flags);
28062 +
28063 + buff[2] = ((sizeof(struct scsi_lun) * lun_cnt) >> 8) & 0xff;
28064 + buff[3] = (sizeof(struct scsi_lun) * lun_cnt) & 0xff;
28065 +
28066 + actual_len = lun_cnt * sizeof(struct scsi_lun)
28067 + + VSCSI_REPORT_LUNS_HEADER;
28068 + req_bufflen = 0;
28069 + for (i = 0; i < nr_seg; i++)
28070 + req_bufflen += pending_req->sgl[i].length;
28071 +
28072 + err = __copy_to_sg(pending_req->sgl, nr_seg, buff,
28073 + min(req_bufflen, actual_len));
28074 + if (err)
28075 + goto fail;
28076 +
28077 + memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
28078 + pending_req->rslt = 0x00;
28079 + pending_req->resid = req_bufflen - min(req_bufflen, actual_len);
28080 +
28081 + kfree(buff);
28082 + return;
28083 +
28084 +fail:
28085 + scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
28086 + INVALID_FIELD_IN_CDB, 0);
28087 + pending_req->rslt = check_condition_result;
28088 + pending_req->resid = 0;
28089 + if (buff)
28090 + kfree(buff);
28091 + return;
28092 +}
28093 +
28094 +
28095 +
28096 +int __pre_do_emulation(pending_req_t *pending_req, void *data)
28097 +{
28098 + uint8_t op_code = pending_req->cmnd[0];
28099 +
28100 + if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_REQBUF) &&
28101 + pre_function[op_code] != NULL) {
28102 + pre_function[op_code](pending_req, data);
28103 + }
28104 +
28105 + /*
28106 + 0: no need for native driver call, so should return immediately.
28107 + 1: non emulation or should call native driver
28108 + after modifing the request buffer.
28109 + */
28110 + return !!(bitmap[op_code] & VSCSIIF_NEED_CMD_EXEC);
28111 +}
28112 +
28113 +void scsiback_rsp_emulation(pending_req_t *pending_req)
28114 +{
28115 + uint8_t op_code = pending_req->cmnd[0];
28116 +
28117 + if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_RSPBUF) &&
28118 + post_function[op_code] != NULL) {
28119 + post_function[op_code](pending_req, NULL);
28120 + }
28121 +
28122 + return;
28123 +}
28124 +
28125 +
28126 +void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req)
28127 +{
28128 + if (__pre_do_emulation(pending_req, NULL)) {
28129 + scsiback_cmd_exec(pending_req);
28130 + }
28131 + else {
28132 + scsiback_fast_flush_area(pending_req);
28133 + scsiback_do_resp_with_sense(pending_req->sense_buffer,
28134 + pending_req->rslt, pending_req->resid, pending_req);
28135 + }
28136 +}
28137 +
28138 +
28139 +/*
28140 + Following are not customizable functions.
28141 +*/
28142 +void scsiback_emulation_init(void)
28143 +{
28144 + int i;
28145 +
28146 + /* Initialize to default state */
28147 + for (i = 0; i < VSCSI_MAX_SCSI_OP_CODE; i++) {
28148 + bitmap[i] = (VSCSIIF_NEED_EMULATE_REQBUF |
28149 + VSCSIIF_NEED_EMULATE_RSPBUF);
28150 + pre_function[i] = resp_not_supported_cmd;
28151 + post_function[i] = NULL;
28152 + /* means,
28153 + - no need for pre-emulation
28154 + - no need for post-emulation
28155 + - call native driver
28156 + */
28157 + }
28158 +
28159 + /*
28160 + Register appropriate functions below as you need.
28161 + (See scsi/scsi.h for definition of SCSI op_code.)
28162 + */
28163 +
28164 + /*
28165 + This command is Non emulation.
28166 + */
28167 + bitmap[TEST_UNIT_READY] = VSCSIIF_NEED_CMD_EXEC;
28168 + pre_function[TEST_UNIT_READY] = NULL;
28169 + post_function[TEST_UNIT_READY] = NULL;
28170 +
28171 + bitmap[REZERO_UNIT] = VSCSIIF_NEED_CMD_EXEC;
28172 + pre_function[REZERO_UNIT] = NULL;
28173 + post_function[REZERO_UNIT] = NULL;
28174 +
28175 + bitmap[REQUEST_SENSE] = VSCSIIF_NEED_CMD_EXEC;
28176 + pre_function[REQUEST_SENSE] = NULL;
28177 + post_function[REQUEST_SENSE] = NULL;
28178 +
28179 + bitmap[FORMAT_UNIT] = VSCSIIF_NEED_CMD_EXEC;
28180 + pre_function[FORMAT_UNIT] = NULL;
28181 + post_function[FORMAT_UNIT] = NULL;
28182 +
28183 + bitmap[READ_BLOCK_LIMITS] = VSCSIIF_NEED_CMD_EXEC;
28184 + pre_function[READ_BLOCK_LIMITS] = NULL;
28185 + post_function[READ_BLOCK_LIMITS] = NULL;
28186 +
28187 + bitmap[READ_6] = VSCSIIF_NEED_CMD_EXEC;
28188 + pre_function[READ_6] = NULL;
28189 + post_function[READ_6] = NULL;
28190 +
28191 + bitmap[WRITE_6] = VSCSIIF_NEED_CMD_EXEC;
28192 + pre_function[WRITE_6] = NULL;
28193 + post_function[WRITE_6] = NULL;
28194 +
28195 + bitmap[WRITE_FILEMARKS] = VSCSIIF_NEED_CMD_EXEC;
28196 + pre_function[WRITE_FILEMARKS] = NULL;
28197 + post_function[WRITE_FILEMARKS] = NULL;
28198 +
28199 + bitmap[SPACE] = VSCSIIF_NEED_CMD_EXEC;
28200 + pre_function[SPACE] = NULL;
28201 + post_function[SPACE] = NULL;
28202 +
28203 + bitmap[INQUIRY] = VSCSIIF_NEED_CMD_EXEC;
28204 + pre_function[INQUIRY] = NULL;
28205 + post_function[INQUIRY] = NULL;
28206 +
28207 + bitmap[ERASE] = VSCSIIF_NEED_CMD_EXEC;
28208 + pre_function[ERASE] = NULL;
28209 + post_function[ERASE] = NULL;
28210 +
28211 + bitmap[MODE_SENSE] = VSCSIIF_NEED_CMD_EXEC;
28212 + pre_function[MODE_SENSE] = NULL;
28213 + post_function[MODE_SENSE] = NULL;
28214 +
28215 + bitmap[SEND_DIAGNOSTIC] = VSCSIIF_NEED_CMD_EXEC;
28216 + pre_function[SEND_DIAGNOSTIC] = NULL;
28217 + post_function[SEND_DIAGNOSTIC] = NULL;
28218 +
28219 + bitmap[READ_CAPACITY] = VSCSIIF_NEED_CMD_EXEC;
28220 + pre_function[READ_CAPACITY] = NULL;
28221 + post_function[READ_CAPACITY] = NULL;
28222 +
28223 + bitmap[READ_10] = VSCSIIF_NEED_CMD_EXEC;
28224 + pre_function[READ_10] = NULL;
28225 + post_function[READ_10] = NULL;
28226 +
28227 + bitmap[WRITE_10] = VSCSIIF_NEED_CMD_EXEC;
28228 + pre_function[WRITE_10] = NULL;
28229 + post_function[WRITE_10] = NULL;
28230 +
28231 + /*
28232 + This command is Full emulation.
28233 + */
28234 + pre_function[REPORT_LUNS] = __report_luns;
28235 + bitmap[REPORT_LUNS] = (VSCSIIF_NEED_EMULATE_REQBUF |
28236 + VSCSIIF_NEED_EMULATE_RSPBUF);
28237 +
28238 + return;
28239 +}
28240 Index: head-2008-11-25/drivers/xen/scsiback/interface.c
28241 ===================================================================
28242 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
28243 +++ head-2008-11-25/drivers/xen/scsiback/interface.c 2008-07-21 11:00:33.000000000 +0200
28244 @@ -0,0 +1,182 @@
28245 +/*
28246 + * interface management.
28247 + *
28248 + * Copyright (c) 2008, FUJITSU Limited
28249 + *
28250 + * Based on the blkback driver code.
28251 + *
28252 + * This program is free software; you can redistribute it and/or
28253 + * modify it under the terms of the GNU General Public License version 2
28254 + * as published by the Free Software Foundation; or, when distributed
28255 + * separately from the Linux kernel or incorporated into other
28256 + * software packages, subject to the following license:
28257 + *
28258 + * Permission is hereby granted, free of charge, to any person obtaining a copy
28259 + * of this source file (the "Software"), to deal in the Software without
28260 + * restriction, including without limitation the rights to use, copy, modify,
28261 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
28262 + * and to permit persons to whom the Software is furnished to do so, subject to
28263 + * the following conditions:
28264 + *
28265 + * The above copyright notice and this permission notice shall be included in
28266 + * all copies or substantial portions of the Software.
28267 + *
28268 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28269 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28270 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28271 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28272 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28273 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28274 + * IN THE SOFTWARE.
28275 + */
28276 +
28277 +#include <scsi/scsi.h>
28278 +#include <scsi/scsi_host.h>
28279 +#include <scsi/scsi_device.h>
28280 +#include "common.h"
28281 +
28282 +#include <xen/evtchn.h>
28283 +#include <linux/kthread.h>
28284 +
28285 +
28286 +static kmem_cache_t *scsiback_cachep;
28287 +
28288 +struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
28289 +{
28290 + struct vscsibk_info *info;
28291 +
28292 + info = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
28293 + if (!info)
28294 + return ERR_PTR(-ENOMEM);
28295 +
28296 + memset(info, 0, sizeof(*info));
28297 + info->domid = domid;
28298 + spin_lock_init(&info->ring_lock);
28299 + atomic_set(&info->nr_unreplied_reqs, 0);
28300 + init_waitqueue_head(&info->wq);
28301 + init_waitqueue_head(&info->waiting_to_free);
28302 +
28303 + return info;
28304 +}
28305 +
28306 +static int map_frontend_page( struct vscsibk_info *info,
28307 + unsigned long ring_ref)
28308 +{
28309 + struct gnttab_map_grant_ref op;
28310 + int err;
28311 +
28312 + gnttab_set_map_op(&op, (unsigned long)info->ring_area->addr,
28313 + GNTMAP_host_map, ring_ref,
28314 + info->domid);
28315 +
28316 + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
28317 + BUG_ON(err);
28318 +
28319 + if (op.status) {
28320 + printk(KERN_ERR "scsiback: Grant table operation failure !\n");
28321 + return op.status;
28322 + }
28323 +
28324 + info->shmem_ref = ring_ref;
28325 + info->shmem_handle = op.handle;
28326 +
28327 + return (GNTST_okay);
28328 +}
28329 +
28330 +static void unmap_frontend_page(struct vscsibk_info *info)
28331 +{
28332 + struct gnttab_unmap_grant_ref op;
28333 + int err;
28334 +
28335 + gnttab_set_unmap_op(&op, (unsigned long)info->ring_area->addr,
28336 + GNTMAP_host_map, info->shmem_handle);
28337 +
28338 + err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
28339 + BUG_ON(err);
28340 +
28341 +}
28342 +
28343 +int scsiback_init_sring(struct vscsibk_info *info,
28344 + unsigned long ring_ref, unsigned int evtchn)
28345 +{
28346 + struct vscsiif_sring *sring;
28347 + int err;
28348 +
28349 + if (info->irq) {
28350 + printk(KERN_ERR "scsiback: Already connected through?\n");
28351 + return -1;
28352 + }
28353 +
28354 + info->ring_area = alloc_vm_area(PAGE_SIZE);
28355 + if (!info)
28356 + return -ENOMEM;
28357 +
28358 + err = map_frontend_page(info, ring_ref);
28359 + if (err)
28360 + goto free_vm;
28361 +
28362 + sring = (struct vscsiif_sring *) info->ring_area->addr;
28363 + BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
28364 +
28365 + err = bind_interdomain_evtchn_to_irqhandler(
28366 + info->domid, evtchn,
28367 + scsiback_intr, 0, "vscsiif-backend", info);
28368 +
28369 + if (err < 0)
28370 + goto unmap_page;
28371 +
28372 + info->irq = err;
28373 +
28374 + return 0;
28375 +
28376 +unmap_page:
28377 + unmap_frontend_page(info);
28378 +free_vm:
28379 + free_vm_area(info->ring_area);
28380 +
28381 + return err;
28382 +}
28383 +
28384 +void scsiback_disconnect(struct vscsibk_info *info)
28385 +{
28386 + if (info->kthread) {
28387 + kthread_stop(info->kthread);
28388 + info->kthread = NULL;
28389 + }
28390 +
28391 + wait_event(info->waiting_to_free,
28392 + atomic_read(&info->nr_unreplied_reqs) == 0);
28393 +
28394 + if (info->irq) {
28395 + unbind_from_irqhandler(info->irq, info);
28396 + info->irq = 0;
28397 + }
28398 +
28399 + if (info->ring.sring) {
28400 + unmap_frontend_page(info);
28401 + free_vm_area(info->ring_area);
28402 + info->ring.sring = NULL;
28403 + }
28404 +}
28405 +
28406 +void scsiback_free(struct vscsibk_info *info)
28407 +{
28408 + kmem_cache_free(scsiback_cachep, info);
28409 +}
28410 +
28411 +int __init scsiback_interface_init(void)
28412 +{
28413 + scsiback_cachep = kmem_cache_create("vscsiif_cache",
28414 + sizeof(struct vscsibk_info), 0, 0, NULL, NULL);
28415 + if (!scsiback_cachep) {
28416 + printk(KERN_ERR "scsiback: can't init scsi cache\n");
28417 + return -ENOMEM;
28418 + }
28419 +
28420 + return 0;
28421 +}
28422 +
28423 +void scsiback_interface_exit(void)
28424 +{
28425 + kmem_cache_destroy(scsiback_cachep);
28426 +}
28427 Index: head-2008-11-25/drivers/xen/scsiback/scsiback.c
28428 ===================================================================
28429 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
28430 +++ head-2008-11-25/drivers/xen/scsiback/scsiback.c 2008-07-21 11:00:33.000000000 +0200
28431 @@ -0,0 +1,717 @@
28432 +/*
28433 + * Xen SCSI backend driver
28434 + *
28435 + * Copyright (c) 2008, FUJITSU Limited
28436 + *
28437 + * Based on the blkback driver code.
28438 + *
28439 + * This program is free software; you can redistribute it and/or
28440 + * modify it under the terms of the GNU General Public License version 2
28441 + * as published by the Free Software Foundation; or, when distributed
28442 + * separately from the Linux kernel or incorporated into other
28443 + * software packages, subject to the following license:
28444 + *
28445 + * Permission is hereby granted, free of charge, to any person obtaining a copy
28446 + * of this source file (the "Software"), to deal in the Software without
28447 + * restriction, including without limitation the rights to use, copy, modify,
28448 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
28449 + * and to permit persons to whom the Software is furnished to do so, subject to
28450 + * the following conditions:
28451 + *
28452 + * The above copyright notice and this permission notice shall be included in
28453 + * all copies or substantial portions of the Software.
28454 + *
28455 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28456 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28457 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28458 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28459 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28460 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28461 + * IN THE SOFTWARE.
28462 + */
28463 +
28464 +#include <linux/spinlock.h>
28465 +#include <linux/kthread.h>
28466 +#include <linux/list.h>
28467 +#include <linux/delay.h>
28468 +#include <xen/balloon.h>
28469 +#include <asm/hypervisor.h>
28470 +#include <scsi/scsi.h>
28471 +#include <scsi/scsi_cmnd.h>
28472 +#include <scsi/scsi_host.h>
28473 +#include <scsi/scsi_device.h>
28474 +#include <scsi/scsi_dbg.h>
28475 +#include <scsi/scsi_eh.h>
28476 +
28477 +#include "common.h"
28478 +
28479 +
28480 +struct list_head pending_free;
28481 +DEFINE_SPINLOCK(pending_free_lock);
28482 +DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
28483 +
28484 +int vscsiif_reqs = VSCSIIF_BACK_MAX_PENDING_REQS;
28485 +module_param_named(reqs, vscsiif_reqs, int, 0);
28486 +MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
28487 +
28488 +static unsigned int log_print_stat = 0;
28489 +module_param(log_print_stat, int, 0644);
28490 +
28491 +#define SCSIBACK_INVALID_HANDLE (~0)
28492 +
28493 +static pending_req_t *pending_reqs;
28494 +static struct page **pending_pages;
28495 +static grant_handle_t *pending_grant_handles;
28496 +
28497 +static int vaddr_pagenr(pending_req_t *req, int seg)
28498 +{
28499 + return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg;
28500 +}
28501 +
28502 +static unsigned long vaddr(pending_req_t *req, int seg)
28503 +{
28504 + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
28505 + return (unsigned long)pfn_to_kaddr(pfn);
28506 +}
28507 +
28508 +#define pending_handle(_req, _seg) \
28509 + (pending_grant_handles[vaddr_pagenr(_req, _seg)])
28510 +
28511 +
28512 +void scsiback_fast_flush_area(pending_req_t *req)
28513 +{
28514 + struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE];
28515 + unsigned int i, invcount = 0;
28516 + grant_handle_t handle;
28517 + int err;
28518 +
28519 + if (req->nr_segments) {
28520 + for (i = 0; i < req->nr_segments; i++) {
28521 + handle = pending_handle(req, i);
28522 + if (handle == SCSIBACK_INVALID_HANDLE)
28523 + continue;
28524 + gnttab_set_unmap_op(&unmap[i], vaddr(req, i),
28525 + GNTMAP_host_map, handle);
28526 + pending_handle(req, i) = SCSIBACK_INVALID_HANDLE;
28527 + invcount++;
28528 + }
28529 +
28530 + err = HYPERVISOR_grant_table_op(
28531 + GNTTABOP_unmap_grant_ref, unmap, invcount);
28532 + BUG_ON(err);
28533 + kfree(req->sgl);
28534 + }
28535 +
28536 + return;
28537 +}
28538 +
28539 +
28540 +static pending_req_t * alloc_req(struct vscsibk_info *info)
28541 +{
28542 + pending_req_t *req = NULL;
28543 + unsigned long flags;
28544 +
28545 + spin_lock_irqsave(&pending_free_lock, flags);
28546 + if (!list_empty(&pending_free)) {
28547 + req = list_entry(pending_free.next, pending_req_t, free_list);
28548 + list_del(&req->free_list);
28549 + }
28550 + spin_unlock_irqrestore(&pending_free_lock, flags);
28551 + return req;
28552 +}
28553 +
28554 +
28555 +static void free_req(pending_req_t *req)
28556 +{
28557 + unsigned long flags;
28558 + int was_empty;
28559 +
28560 + spin_lock_irqsave(&pending_free_lock, flags);
28561 + was_empty = list_empty(&pending_free);
28562 + list_add(&req->free_list, &pending_free);
28563 + spin_unlock_irqrestore(&pending_free_lock, flags);
28564 + if (was_empty)
28565 + wake_up(&pending_free_wq);
28566 +}
28567 +
28568 +
28569 +static void scsiback_notify_work(struct vscsibk_info *info)
28570 +{
28571 + info->waiting_reqs = 1;
28572 + wake_up(&info->wq);
28573 +}
28574 +
28575 +void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
28576 + uint32_t resid, pending_req_t *pending_req)
28577 +{
28578 + vscsiif_response_t *ring_res;
28579 + struct vscsibk_info *info = pending_req->info;
28580 + int notify;
28581 + int more_to_do = 1;
28582 + unsigned long flags;
28583 +
28584 + DPRINTK("%s\n",__FUNCTION__);
28585 +
28586 + spin_lock_irqsave(&info->ring_lock, flags);
28587 +
28588 + ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
28589 + info->ring.rsp_prod_pvt++;
28590 +
28591 + ring_res->rslt = result;
28592 + ring_res->rqid = pending_req->rqid;
28593 +
28594 + if (sense_buffer != NULL) {
28595 + memcpy(ring_res->sense_buffer, sense_buffer,
28596 + VSCSIIF_SENSE_BUFFERSIZE);
28597 + ring_res->sense_len = VSCSIIF_SENSE_BUFFERSIZE;
28598 + } else {
28599 + ring_res->sense_len = 0;
28600 + }
28601 +
28602 + ring_res->residual_len = resid;
28603 +
28604 + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
28605 + if (info->ring.rsp_prod_pvt == info->ring.req_cons) {
28606 + RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
28607 + } else if (RING_HAS_UNCONSUMED_REQUESTS(&info->ring)) {
28608 + more_to_do = 1;
28609 + }
28610 +
28611 + spin_unlock_irqrestore(&info->ring_lock, flags);
28612 +
28613 + if (more_to_do)
28614 + scsiback_notify_work(info);
28615 +
28616 + if (notify)
28617 + notify_remote_via_irq(info->irq);
28618 +
28619 + free_req(pending_req);
28620 +}
28621 +
28622 +static void scsiback_print_status(char *sense_buffer, int errors,
28623 + pending_req_t *pending_req)
28624 +{
28625 + struct scsi_device *sdev = pending_req->sdev;
28626 +
28627 + printk(KERN_ERR "scsiback: %d:%d:%d:%d ",sdev->host->host_no,
28628 + sdev->channel, sdev->id, sdev->lun);
28629 + printk(KERN_ERR "status = 0x%02x, message = 0x%02x, host = 0x%02x, driver = 0x%02x\n",
28630 + status_byte(errors), msg_byte(errors),
28631 + host_byte(errors), driver_byte(errors));
28632 +
28633 + printk(KERN_ERR "scsiback: cmnd[0]=0x%02X\n",
28634 + pending_req->cmnd[0]);
28635 +
28636 + if (CHECK_CONDITION & status_byte(errors))
28637 + __scsi_print_sense("scsiback", sense_buffer, SCSI_SENSE_BUFFERSIZE);
28638 +}
28639 +
28640 +
28641 +static void scsiback_cmd_done(struct request *req, int errors)
28642 +{
28643 + pending_req_t *pending_req = req->end_io_data;
28644 + unsigned char *sense_buffer;
28645 + unsigned int resid;
28646 +
28647 + sense_buffer = req->sense;
28648 + resid = req->data_len;
28649 +
28650 + if (errors != 0) {
28651 + if (log_print_stat)
28652 + scsiback_print_status(sense_buffer, errors, pending_req);
28653 + }
28654 +
28655 + scsiback_rsp_emulation(pending_req);
28656 +
28657 + scsiback_fast_flush_area(pending_req);
28658 + scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
28659 + scsiback_put(pending_req->info);
28660 +
28661 + __blk_put_request(req->q, req);
28662 +}
28663 +
28664 +
28665 +static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req,
28666 + pending_req_t *pending_req)
28667 +{
28668 + u32 flags;
28669 + int write;
28670 + int i, err = 0;
28671 + unsigned int data_len = 0;
28672 + struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE];
28673 + struct vscsibk_info *info = pending_req->info;
28674 +
28675 + int data_dir = (int)pending_req->sc_data_direction;
28676 + unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
28677 +
28678 + write = (data_dir == DMA_TO_DEVICE);
28679 +
28680 + if (nr_segments) {
28681 + /* free of (sgl) in fast_flush_area()*/
28682 + pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments,
28683 + GFP_KERNEL);
28684 + if (!pending_req->sgl) {
28685 + printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
28686 + return -ENOMEM;
28687 + }
28688 +
28689 + for (i = 0; i < nr_segments; i++) {
28690 + flags = GNTMAP_host_map;
28691 + if (write)
28692 + flags |= GNTMAP_readonly;
28693 + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
28694 + ring_req->seg[i].gref,
28695 + info->domid);
28696 + }
28697 +
28698 + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments);
28699 + BUG_ON(err);
28700 +
28701 + for (i = 0; i < nr_segments; i++) {
28702 + if (unlikely(map[i].status != 0)) {
28703 + printk(KERN_ERR "scsiback: invalid buffer -- could not remap it\n");
28704 + map[i].handle = SCSIBACK_INVALID_HANDLE;
28705 + err |= 1;
28706 + }
28707 +
28708 + pending_handle(pending_req, i) = map[i].handle;
28709 +
28710 + if (err)
28711 + continue;
28712 +
28713 + set_phys_to_machine(__pa(vaddr(
28714 + pending_req, i)) >> PAGE_SHIFT,
28715 + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
28716 +
28717 + pending_req->sgl[i].page = virt_to_page(vaddr(pending_req, i));
28718 + pending_req->sgl[i].offset = ring_req->seg[i].offset;
28719 + pending_req->sgl[i].length = ring_req->seg[i].length;
28720 + data_len += pending_req->sgl[i].length;
28721 +
28722 + barrier();
28723 + if (pending_req->sgl[i].offset >= PAGE_SIZE ||
28724 + pending_req->sgl[i].length > PAGE_SIZE ||
28725 + pending_req->sgl[i].offset + pending_req->sgl[i].length > PAGE_SIZE)
28726 + err |= 1;
28727 +
28728 + }
28729 +
28730 + if (err)
28731 + goto fail_flush;
28732 + }
28733 +
28734 + pending_req->request_bufflen = data_len;
28735 +
28736 + return 0;
28737 +
28738 +fail_flush:
28739 + scsiback_fast_flush_area(pending_req);
28740 + return -ENOMEM;
28741 +}
28742 +
28743 +/* quoted scsi_lib.c/scsi_merge_bio */
28744 +static int scsiback_merge_bio(struct request *rq, struct bio *bio)
28745 +{
28746 + struct request_queue *q = rq->q;
28747 +
28748 + bio->bi_flags &= ~(1 << BIO_SEG_VALID);
28749 + if (rq_data_dir(rq) == WRITE)
28750 + bio->bi_rw |= (1 << BIO_RW);
28751 +
28752 + blk_queue_bounce(q, &bio);
28753 +
28754 + if (!rq->bio)
28755 + blk_rq_bio_prep(q, rq, bio);
28756 + else if (!q->back_merge_fn(q, rq, bio))
28757 + return -EINVAL;
28758 + else {
28759 + rq->biotail->bi_next = bio;
28760 + rq->biotail = bio;
28761 + rq->hard_nr_sectors += bio_sectors(bio);
28762 + rq->nr_sectors = rq->hard_nr_sectors;
28763 + }
28764 +
28765 + return 0;
28766 +}
28767 +
28768 +
28769 +/* quoted scsi_lib.c/scsi_bi_endio */
28770 +static int scsiback_bi_endio(struct bio *bio, unsigned int bytes_done, int error)
28771 +{
28772 + if (bio->bi_size)
28773 + return 1;
28774 +
28775 + bio_put(bio);
28776 + return 0;
28777 +}
28778 +
28779 +
28780 +
28781 +/* quoted scsi_lib.c/scsi_req_map_sg . */
28782 +static int request_map_sg(struct request *rq, pending_req_t *pending_req, unsigned int count)
28783 +{
28784 + struct request_queue *q = rq->q;
28785 + int nr_pages;
28786 + unsigned int nsegs = count;
28787 +
28788 + unsigned int data_len = 0, len, bytes, off;
28789 + struct page *page;
28790 + struct bio *bio = NULL;
28791 + int i, err, nr_vecs = 0;
28792 +
28793 + for (i = 0; i < nsegs; i++) {
28794 + page = pending_req->sgl[i].page;
28795 + off = (unsigned int)pending_req->sgl[i].offset;
28796 + len = (unsigned int)pending_req->sgl[i].length;
28797 + data_len += len;
28798 +
28799 + nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT;
28800 + while (len > 0) {
28801 + bytes = min_t(unsigned int, len, PAGE_SIZE - off);
28802 +
28803 + if (!bio) {
28804 + nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
28805 + nr_pages -= nr_vecs;
28806 + bio = bio_alloc(GFP_KERNEL, nr_vecs);
28807 + if (!bio) {
28808 + err = -ENOMEM;
28809 + goto free_bios;
28810 + }
28811 + bio->bi_end_io = scsiback_bi_endio;
28812 + }
28813 +
28814 + if (bio_add_pc_page(q, bio, page, bytes, off) !=
28815 + bytes) {
28816 + bio_put(bio);
28817 + err = -EINVAL;
28818 + goto free_bios;
28819 + }
28820 +
28821 + if (bio->bi_vcnt >= nr_vecs) {
28822 + err = scsiback_merge_bio(rq, bio);
28823 + if (err) {
28824 + bio_endio(bio, bio->bi_size, 0);
28825 + goto free_bios;
28826 + }
28827 + bio = NULL;
28828 + }
28829 +
28830 + page++;
28831 + len -= bytes;
28832 + off = 0;
28833 + }
28834 + }
28835 +
28836 + rq->buffer = rq->data = NULL;
28837 + rq->data_len = data_len;
28838 +
28839 + return 0;
28840 +
28841 +free_bios:
28842 + while ((bio = rq->bio) != NULL) {
28843 + rq->bio = bio->bi_next;
28844 + /*
28845 + * call endio instead of bio_put incase it was bounced
28846 + */
28847 + bio_endio(bio, bio->bi_size, 0);
28848 + }
28849 +
28850 + return err;
28851 +}
28852 +
28853 +
28854 +void scsiback_cmd_exec(pending_req_t *pending_req)
28855 +{
28856 + int cmd_len = (int)pending_req->cmd_len;
28857 + int data_dir = (int)pending_req->sc_data_direction;
28858 + unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
28859 + unsigned int timeout;
28860 + struct request *rq;
28861 + int write;
28862 +
28863 + DPRINTK("%s\n",__FUNCTION__);
28864 +
28865 + /* because it doesn't timeout backend earlier than frontend.*/
28866 + if (pending_req->timeout_per_command)
28867 + timeout = pending_req->timeout_per_command * HZ;
28868 + else
28869 + timeout = VSCSIIF_TIMEOUT;
28870 +
28871 + write = (data_dir == DMA_TO_DEVICE);
28872 + rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
28873 +
28874 + rq->flags |= REQ_BLOCK_PC;
28875 + rq->cmd_len = cmd_len;
28876 + memcpy(rq->cmd, pending_req->cmnd, cmd_len);
28877 +
28878 + memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
28879 + rq->sense = pending_req->sense_buffer;
28880 + rq->sense_len = 0;
28881 +
28882 + /* not allowed to retry in backend. */
28883 + rq->retries = 0;
28884 + rq->timeout = timeout;
28885 + rq->end_io_data = pending_req;
28886 +
28887 + if (nr_segments) {
28888 +
28889 + if (request_map_sg(rq, pending_req, nr_segments)) {
28890 + printk(KERN_ERR "scsiback: SG Request Map Error\n");
28891 + return;
28892 + }
28893 + }
28894 +
28895 + scsiback_get(pending_req->info);
28896 + blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done);
28897 +
28898 + return ;
28899 +}
28900 +
28901 +
28902 +static void scsiback_device_reset_exec(pending_req_t *pending_req)
28903 +{
28904 + struct vscsibk_info *info = pending_req->info;
28905 + int err;
28906 + struct scsi_device *sdev = pending_req->sdev;
28907 +
28908 + scsiback_get(info);
28909 + err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE);
28910 +
28911 + scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
28912 + scsiback_put(info);
28913 +
28914 + return;
28915 +}
28916 +
28917 +
28918 +irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
28919 +{
28920 + scsiback_notify_work((struct vscsibk_info *)dev_id);
28921 + return IRQ_HANDLED;
28922 +}
28923 +
28924 +static int prepare_pending_reqs(struct vscsibk_info *info,
28925 + vscsiif_request_t *ring_req, pending_req_t *pending_req)
28926 +{
28927 + struct scsi_device *sdev;
28928 + struct ids_tuple vir;
28929 + int err = -EINVAL;
28930 +
28931 + DPRINTK("%s\n",__FUNCTION__);
28932 +
28933 + pending_req->rqid = ring_req->rqid;
28934 + pending_req->act = ring_req->act;
28935 +
28936 + pending_req->info = info;
28937 +
28938 + vir.chn = ring_req->channel;
28939 + vir.tgt = ring_req->id;
28940 + vir.lun = ring_req->lun;
28941 +
28942 + rmb();
28943 + sdev = scsiback_do_translation(info, &vir);
28944 + if (!sdev) {
28945 + pending_req->sdev = NULL;
28946 + DPRINTK("scsiback: doesn't exist.\n");
28947 + err = -ENODEV;
28948 + goto invalid_value;
28949 + }
28950 + pending_req->sdev = sdev;
28951 +
28952 + /* request range check from frontend */
28953 + pending_req->sc_data_direction = ring_req->sc_data_direction;
28954 + barrier();
28955 + if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
28956 + (pending_req->sc_data_direction != DMA_TO_DEVICE) &&
28957 + (pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
28958 + (pending_req->sc_data_direction != DMA_NONE)) {
28959 + DPRINTK("scsiback: invalid parameter data_dir = %d\n",
28960 + pending_req->sc_data_direction);
28961 + err = -EINVAL;
28962 + goto invalid_value;
28963 + }
28964 +
28965 + pending_req->nr_segments = ring_req->nr_segments;
28966 + barrier();
28967 + if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) {
28968 + DPRINTK("scsiback: invalid parameter nr_seg = %d\n",
28969 + pending_req->nr_segments);
28970 + err = -EINVAL;
28971 + goto invalid_value;
28972 + }
28973 +
28974 + pending_req->cmd_len = ring_req->cmd_len;
28975 + barrier();
28976 + if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
28977 + DPRINTK("scsiback: invalid parameter cmd_len = %d\n",
28978 + pending_req->cmd_len);
28979 + err = -EINVAL;
28980 + goto invalid_value;
28981 + }
28982 + memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
28983 +
28984 + pending_req->timeout_per_command = ring_req->timeout_per_command;
28985 +
28986 + if(scsiback_gnttab_data_map(ring_req, pending_req)) {
28987 + DPRINTK("scsiback: invalid buffer\n");
28988 + err = -EINVAL;
28989 + goto invalid_value;
28990 + }
28991 +
28992 + return 0;
28993 +
28994 +invalid_value:
28995 + return err;
28996 +}
28997 +
28998 +
28999 +static int scsiback_do_cmd_fn(struct vscsibk_info *info)
29000 +{
29001 + struct vscsiif_back_ring *ring = &info->ring;
29002 + vscsiif_request_t *ring_req;
29003 +
29004 + pending_req_t *pending_req;
29005 + RING_IDX rc, rp;
29006 + int err, more_to_do = 0;
29007 +
29008 + DPRINTK("%s\n",__FUNCTION__);
29009 +
29010 + rc = ring->req_cons;
29011 + rp = ring->sring->req_prod;
29012 + rmb();
29013 +
29014 + while ((rc != rp)) {
29015 + if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
29016 + break;
29017 + pending_req = alloc_req(info);
29018 + if (NULL == pending_req) {
29019 + more_to_do = 1;
29020 + break;
29021 + }
29022 +
29023 + ring_req = RING_GET_REQUEST(ring, rc);
29024 + ring->req_cons = ++rc;
29025 +
29026 + err = prepare_pending_reqs(info, ring_req,
29027 + pending_req);
29028 + if (err == -EINVAL) {
29029 + scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
29030 + 0, pending_req);
29031 + continue;
29032 + } else if (err == -ENODEV) {
29033 + scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << 16),
29034 + 0, pending_req);
29035 + continue;
29036 + }
29037 +
29038 + if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) {
29039 + scsiback_req_emulation_or_cmdexec(pending_req);
29040 + } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) {
29041 + scsiback_device_reset_exec(pending_req);
29042 + } else {
29043 + printk(KERN_ERR "scsiback: invalid parameter for request\n");
29044 + scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
29045 + 0, pending_req);
29046 + continue;
29047 + }
29048 + }
29049 +
29050 + if (RING_HAS_UNCONSUMED_REQUESTS(ring))
29051 + more_to_do = 1;
29052 +
29053 + /* Yield point for this unbounded loop. */
29054 + cond_resched();
29055 +
29056 + return more_to_do;
29057 +}
29058 +
29059 +
29060 +int scsiback_schedule(void *data)
29061 +{
29062 + struct vscsibk_info *info = (struct vscsibk_info *)data;
29063 +
29064 + DPRINTK("%s\n",__FUNCTION__);
29065 +
29066 + while (!kthread_should_stop()) {
29067 + wait_event_interruptible(
29068 + info->wq,
29069 + info->waiting_reqs || kthread_should_stop());
29070 + wait_event_interruptible(
29071 + pending_free_wq,
29072 + !list_empty(&pending_free) || kthread_should_stop());
29073 +
29074 + info->waiting_reqs = 0;
29075 + smp_mb();
29076 +
29077 + if (scsiback_do_cmd_fn(info))
29078 + info->waiting_reqs = 1;
29079 + }
29080 +
29081 + return 0;
29082 +}
29083 +
29084 +
29085 +static int __init scsiback_init(void)
29086 +{
29087 + int i, mmap_pages;
29088 +
29089 + if (!is_running_on_xen())
29090 + return -ENODEV;
29091 +
29092 + mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
29093 +
29094 + pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
29095 + vscsiif_reqs, GFP_KERNEL);
29096 + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
29097 + mmap_pages, GFP_KERNEL);
29098 + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
29099 +
29100 + if (!pending_reqs || !pending_grant_handles || !pending_pages)
29101 + goto out_of_memory;
29102 +
29103 + for (i = 0; i < mmap_pages; i++)
29104 + pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE;
29105 +
29106 + if (scsiback_interface_init() < 0)
29107 + goto out_of_kmem;
29108 +
29109 + memset(pending_reqs, 0, sizeof(pending_reqs));
29110 + INIT_LIST_HEAD(&pending_free);
29111 +
29112 + for (i = 0; i < vscsiif_reqs; i++)
29113 + list_add_tail(&pending_reqs[i].free_list, &pending_free);
29114 +
29115 + if (scsiback_xenbus_init())
29116 + goto out_of_xenbus;
29117 +
29118 + scsiback_emulation_init();
29119 +
29120 + return 0;
29121 +
29122 +out_of_xenbus:
29123 + scsiback_xenbus_unregister();
29124 +out_of_kmem:
29125 + scsiback_interface_exit();
29126 +out_of_memory:
29127 + kfree(pending_reqs);
29128 + kfree(pending_grant_handles);
29129 + free_empty_pages_and_pagevec(pending_pages, mmap_pages);
29130 + printk(KERN_ERR "scsiback: %s: out of memory\n", __FUNCTION__);
29131 + return -ENOMEM;
29132 +}
29133 +
29134 +static void __exit scsiback_exit(void)
29135 +{
29136 + scsiback_xenbus_unregister();
29137 + scsiback_interface_exit();
29138 + kfree(pending_reqs);
29139 + kfree(pending_grant_handles);
29140 + free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * VSCSIIF_SG_TABLESIZE));
29141 +
29142 +}
29143 +
29144 +module_init(scsiback_init);
29145 +module_exit(scsiback_exit);
29146 +
29147 +MODULE_DESCRIPTION("Xen SCSI backend driver");
29148 +MODULE_LICENSE("Dual BSD/GPL");
29149 Index: head-2008-11-25/drivers/xen/scsiback/translate.c
29150 ===================================================================
29151 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
29152 +++ head-2008-11-25/drivers/xen/scsiback/translate.c 2008-07-21 11:00:33.000000000 +0200
29153 @@ -0,0 +1,168 @@
29154 +/*
29155 + * Xen SCSI backend driver
29156 + *
29157 + * Copyright (c) 2008, FUJITSU Limited
29158 + *
29159 + * This program is free software; you can redistribute it and/or
29160 + * modify it under the terms of the GNU General Public License version 2
29161 + * as published by the Free Software Foundation; or, when distributed
29162 + * separately from the Linux kernel or incorporated into other
29163 + * software packages, subject to the following license:
29164 + *
29165 + * Permission is hereby granted, free of charge, to any person obtaining a copy
29166 + * of this source file (the "Software"), to deal in the Software without
29167 + * restriction, including without limitation the rights to use, copy, modify,
29168 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29169 + * and to permit persons to whom the Software is furnished to do so, subject to
29170 + * the following conditions:
29171 + *
29172 + * The above copyright notice and this permission notice shall be included in
29173 + * all copies or substantial portions of the Software.
29174 + *
29175 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29176 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29177 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29178 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29179 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29180 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29181 + * IN THE SOFTWARE.
29182 + */
29183 +
29184 +#include <linux/list.h>
29185 +#include <linux/gfp.h>
29186 +
29187 +#include "common.h"
29188 +
29189 +/*
29190 + Initialize the translation entry list
29191 +*/
29192 +void scsiback_init_translation_table(struct vscsibk_info *info)
29193 +{
29194 + INIT_LIST_HEAD(&info->v2p_entry_lists);
29195 + spin_lock_init(&info->v2p_lock);
29196 +}
29197 +
29198 +
29199 +/*
29200 + Add a new translation entry
29201 +*/
29202 +int scsiback_add_translation_entry(struct vscsibk_info *info,
29203 + struct scsi_device *sdev, struct ids_tuple *v)
29204 +{
29205 + int err = 0;
29206 + struct v2p_entry *entry;
29207 + struct v2p_entry *new;
29208 + struct list_head *head = &(info->v2p_entry_lists);
29209 + unsigned long flags;
29210 +
29211 + spin_lock_irqsave(&info->v2p_lock, flags);
29212 +
29213 + /* Check double assignment to identical virtual ID */
29214 + list_for_each_entry(entry, head, l) {
29215 + if ((entry->v.chn == v->chn) &&
29216 + (entry->v.tgt == v->tgt) &&
29217 + (entry->v.lun == v->lun)) {
29218 + printk(KERN_WARNING "scsiback: Virtual ID is already used. "
29219 + "Assignment was not performed.\n");
29220 + err = -EEXIST;
29221 + goto out;
29222 + }
29223 +
29224 + }
29225 +
29226 + /* Create a new translation entry and add to the list */
29227 + if ((new = kmalloc(sizeof(struct v2p_entry), GFP_ATOMIC)) == NULL) {
29228 + printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
29229 + err = -ENOMEM;
29230 + goto out;
29231 + }
29232 + new->v = *v;
29233 + new->sdev = sdev;
29234 + list_add_tail(&new->l, head);
29235 +
29236 +out:
29237 + spin_unlock_irqrestore(&info->v2p_lock, flags);
29238 + return err;
29239 +}
29240 +
29241 +
29242 +/*
29243 + Delete the translation entry specfied
29244 +*/
29245 +int scsiback_del_translation_entry(struct vscsibk_info *info,
29246 + struct ids_tuple *v)
29247 +{
29248 + struct v2p_entry *entry;
29249 + struct list_head *head = &(info->v2p_entry_lists);
29250 + unsigned long flags;
29251 +
29252 + spin_lock_irqsave(&info->v2p_lock, flags);
29253 + /* Find out the translation entry specified */
29254 + list_for_each_entry(entry, head, l) {
29255 + if ((entry->v.chn == v->chn) &&
29256 + (entry->v.tgt == v->tgt) &&
29257 + (entry->v.lun == v->lun)) {
29258 + goto found;
29259 + }
29260 + }
29261 +
29262 + spin_unlock_irqrestore(&info->v2p_lock, flags);
29263 + return 1;
29264 +
29265 +found:
29266 + /* Delete the translation entry specfied */
29267 + scsi_device_put(entry->sdev);
29268 + list_del(&entry->l);
29269 + kfree(entry);
29270 +
29271 + spin_unlock_irqrestore(&info->v2p_lock, flags);
29272 + return 0;
29273 +}
29274 +
29275 +
29276 +/*
29277 + Perform virtual to physical translation
29278 +*/
29279 +struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
29280 + struct ids_tuple *v)
29281 +{
29282 + struct v2p_entry *entry;
29283 + struct list_head *head = &(info->v2p_entry_lists);
29284 + struct scsi_device *sdev = NULL;
29285 + unsigned long flags;
29286 +
29287 + spin_lock_irqsave(&info->v2p_lock, flags);
29288 + list_for_each_entry(entry, head, l) {
29289 + if ((entry->v.chn == v->chn) &&
29290 + (entry->v.tgt == v->tgt) &&
29291 + (entry->v.lun == v->lun)) {
29292 + sdev = entry->sdev;
29293 + goto out;
29294 + }
29295 + }
29296 +out:
29297 + spin_unlock_irqrestore(&info->v2p_lock, flags);
29298 + return sdev;
29299 +}
29300 +
29301 +
29302 +/*
29303 + Release the translation entry specfied
29304 +*/
29305 +void scsiback_release_translation_entry(struct vscsibk_info *info)
29306 +{
29307 + struct v2p_entry *entry, *tmp;
29308 + struct list_head *head = &(info->v2p_entry_lists);
29309 + unsigned long flags;
29310 +
29311 + spin_lock_irqsave(&info->v2p_lock, flags);
29312 + list_for_each_entry_safe(entry, tmp, head, l) {
29313 + scsi_device_put(entry->sdev);
29314 + list_del(&entry->l);
29315 + kfree(entry);
29316 + }
29317 +
29318 + spin_unlock_irqrestore(&info->v2p_lock, flags);
29319 + return;
29320 +
29321 +}
29322 Index: head-2008-11-25/drivers/xen/scsiback/xenbus.c
29323 ===================================================================
29324 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
29325 +++ head-2008-11-25/drivers/xen/scsiback/xenbus.c 2008-07-21 11:00:33.000000000 +0200
29326 @@ -0,0 +1,368 @@
29327 +/*
29328 + * Xen SCSI backend driver
29329 + *
29330 + * Copyright (c) 2008, FUJITSU Limited
29331 + *
29332 + * Based on the blkback driver code.
29333 + *
29334 + * This program is free software; you can redistribute it and/or
29335 + * modify it under the terms of the GNU General Public License version 2
29336 + * as published by the Free Software Foundation; or, when distributed
29337 + * separately from the Linux kernel or incorporated into other
29338 + * software packages, subject to the following license:
29339 + *
29340 + * Permission is hereby granted, free of charge, to any person obtaining a copy
29341 + * of this source file (the "Software"), to deal in the Software without
29342 + * restriction, including without limitation the rights to use, copy, modify,
29343 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29344 + * and to permit persons to whom the Software is furnished to do so, subject to
29345 + * the following conditions:
29346 + *
29347 + * The above copyright notice and this permission notice shall be included in
29348 + * all copies or substantial portions of the Software.
29349 + *
29350 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29351 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29352 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29353 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29354 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29355 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29356 + * IN THE SOFTWARE.
29357 + */
29358 +
29359 +#include <stdarg.h>
29360 +#include <linux/module.h>
29361 +#include <linux/kthread.h>
29362 +#include <scsi/scsi.h>
29363 +#include <scsi/scsi_host.h>
29364 +#include <scsi/scsi_device.h>
29365 +
29366 +#include "common.h"
29367 +
29368 +struct backend_info
29369 +{
29370 + struct xenbus_device *dev;
29371 + struct vscsibk_info *info;
29372 +};
29373 +
29374 +
29375 +static int __vscsiif_name(struct backend_info *be, char *buf)
29376 +{
29377 + struct xenbus_device *dev = be->dev;
29378 + unsigned int domid, id;
29379 +
29380 + sscanf(dev->nodename, "backend/vscsi/%u/%u", &domid, &id);
29381 + snprintf(buf, TASK_COMM_LEN, "vscsi.%u.%u", be->info->domid, id);
29382 +
29383 + return 0;
29384 +}
29385 +
29386 +static int scsiback_map(struct backend_info *be)
29387 +{
29388 + struct xenbus_device *dev = be->dev;
29389 + unsigned long ring_ref;
29390 + unsigned int evtchn;
29391 + int err;
29392 + char name[TASK_COMM_LEN];
29393 +
29394 + err = xenbus_gather(XBT_NIL, dev->otherend,
29395 + "ring-ref", "%lu", &ring_ref,
29396 + "event-channel", "%u", &evtchn, NULL);
29397 + if (err) {
29398 + xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend);
29399 + return err;
29400 + }
29401 +
29402 + err = scsiback_init_sring(be->info, ring_ref, evtchn);
29403 + if (err)
29404 + return err;
29405 +
29406 + err = __vscsiif_name(be, name);
29407 + if (err) {
29408 + xenbus_dev_error(dev, err, "get scsiback dev name");
29409 + return err;
29410 + }
29411 +
29412 + be->info->kthread = kthread_run(scsiback_schedule, be->info, name);
29413 + if (IS_ERR(be->info->kthread)) {
29414 + err = PTR_ERR(be->info->kthread);
29415 + be->info->kthread = NULL;
29416 + xenbus_dev_error(be->dev, err, "start vscsiif");
29417 + return err;
29418 + }
29419 +
29420 + return 0;
29421 +}
29422 +
29423 +
29424 +struct scsi_device *scsiback_get_scsi_device(struct ids_tuple *phy)
29425 +{
29426 + struct Scsi_Host *shost;
29427 + struct scsi_device *sdev = NULL;
29428 +
29429 + shost = scsi_host_lookup(phy->hst);
29430 + if (IS_ERR(shost)) {
29431 + printk(KERN_ERR "scsiback: host%d doesn't exist.\n",
29432 + phy->hst);
29433 + return NULL;
29434 + }
29435 + sdev = scsi_device_lookup(shost, phy->chn, phy->tgt, phy->lun);
29436 + if (!sdev) {
29437 + printk(KERN_ERR "scsiback: %d:%d:%d:%d doesn't exist.\n",
29438 + phy->hst, phy->chn, phy->tgt, phy->lun);
29439 + scsi_host_put(shost);
29440 + return NULL;
29441 + }
29442 +
29443 + scsi_host_put(shost);
29444 + return (sdev);
29445 +}
29446 +
29447 +#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1
29448 +#define VSCSIBACK_OP_UPDATEDEV_STATE 2
29449 +
29450 +
29451 +static void scsiback_do_lun_hotplug(struct backend_info *be, int op)
29452 +{
29453 + int i, err = 0;
29454 + struct ids_tuple phy, vir;
29455 + int device_state;
29456 + char str[64], state_str[64];
29457 + char **dir;
29458 + unsigned int dir_n = 0;
29459 + struct xenbus_device *dev = be->dev;
29460 + struct scsi_device *sdev;
29461 +
29462 + dir = xenbus_directory(XBT_NIL, dev->nodename, "vscsi-devs", &dir_n);
29463 + if (IS_ERR(dir))
29464 + return;
29465 +
29466 + for (i = 0; i < dir_n; i++) {
29467 +
29468 + /* read status */
29469 + snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
29470 + err = xenbus_scanf(XBT_NIL, dev->nodename, state_str, "%u",
29471 + &device_state);
29472 + if (XENBUS_EXIST_ERR(err))
29473 + continue;
29474 +
29475 + /* physical SCSI device */
29476 + snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", dir[i]);
29477 + err = xenbus_scanf(XBT_NIL, dev->nodename, str,
29478 + "%u:%u:%u:%u", &phy.hst, &phy.chn, &phy.tgt, &phy.lun);
29479 + if (XENBUS_EXIST_ERR(err)) {
29480 + xenbus_printf(XBT_NIL, dev->nodename, state_str,
29481 + "%d", XenbusStateClosed);
29482 + continue;
29483 + }
29484 +
29485 + /* virtual SCSI device */
29486 + snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
29487 + err = xenbus_scanf(XBT_NIL, dev->nodename, str,
29488 + "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
29489 + if (XENBUS_EXIST_ERR(err)) {
29490 + xenbus_printf(XBT_NIL, dev->nodename, state_str,
29491 + "%d", XenbusStateClosed);
29492 + continue;
29493 + }
29494 +
29495 + switch (op) {
29496 + case VSCSIBACK_OP_ADD_OR_DEL_LUN:
29497 + if (device_state == XenbusStateInitialising) {
29498 + sdev = scsiback_get_scsi_device(&phy);
29499 + if (!sdev)
29500 + xenbus_printf(XBT_NIL, dev->nodename, state_str,
29501 + "%d", XenbusStateClosed);
29502 + else {
29503 + err = scsiback_add_translation_entry(be->info, sdev, &vir);
29504 + if (!err) {
29505 + if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
29506 + "%d", XenbusStateInitialised)) {
29507 + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
29508 + scsiback_del_translation_entry(be->info, &vir);
29509 + }
29510 + } else {
29511 + scsi_device_put(sdev);
29512 + xenbus_printf(XBT_NIL, dev->nodename, state_str,
29513 + "%d", XenbusStateClosed);
29514 + }
29515 + }
29516 + }
29517 +
29518 + if (device_state == XenbusStateClosing) {
29519 + if (!scsiback_del_translation_entry(be->info, &vir)) {
29520 + if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
29521 + "%d", XenbusStateClosed))
29522 + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
29523 + }
29524 + }
29525 + break;
29526 +
29527 + case VSCSIBACK_OP_UPDATEDEV_STATE:
29528 + if (device_state == XenbusStateInitialised) {
29529 + /* modify vscsi-devs/dev-x/state */
29530 + if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
29531 + "%d", XenbusStateConnected)) {
29532 + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
29533 + scsiback_del_translation_entry(be->info, &vir);
29534 + xenbus_printf(XBT_NIL, dev->nodename, state_str,
29535 + "%d", XenbusStateClosed);
29536 + }
29537 + }
29538 + break;
29539 + /*When it is necessary, processing is added here.*/
29540 + default:
29541 + break;
29542 + }
29543 + }
29544 +
29545 + kfree(dir);
29546 + return ;
29547 +}
29548 +
29549 +
29550 +static void scsiback_frontend_changed(struct xenbus_device *dev,
29551 + enum xenbus_state frontend_state)
29552 +{
29553 + struct backend_info *be = dev->dev.driver_data;
29554 + int err;
29555 +
29556 + switch (frontend_state) {
29557 + case XenbusStateInitialising:
29558 + break;
29559 + case XenbusStateInitialised:
29560 + err = scsiback_map(be);
29561 + if (err)
29562 + break;
29563 +
29564 + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
29565 + xenbus_switch_state(dev, XenbusStateConnected);
29566 +
29567 + break;
29568 + case XenbusStateConnected:
29569 +
29570 + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_UPDATEDEV_STATE);
29571 +
29572 + if (dev->state == XenbusStateConnected)
29573 + break;
29574 +
29575 + xenbus_switch_state(dev, XenbusStateConnected);
29576 +
29577 + break;
29578 +
29579 + case XenbusStateClosing:
29580 + scsiback_disconnect(be->info);
29581 + xenbus_switch_state(dev, XenbusStateClosing);
29582 + break;
29583 +
29584 + case XenbusStateClosed:
29585 + xenbus_switch_state(dev, XenbusStateClosed);
29586 + if (xenbus_dev_is_online(dev))
29587 + break;
29588 + /* fall through if not online */
29589 + case XenbusStateUnknown:
29590 + device_unregister(&dev->dev);
29591 + break;
29592 +
29593 + case XenbusStateReconfiguring:
29594 + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
29595 +
29596 + xenbus_switch_state(dev, XenbusStateReconfigured);
29597 +
29598 + break;
29599 +
29600 + default:
29601 + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
29602 + frontend_state);
29603 + break;
29604 + }
29605 +}
29606 +
29607 +
29608 +static int scsiback_remove(struct xenbus_device *dev)
29609 +{
29610 + struct backend_info *be = dev->dev.driver_data;
29611 +
29612 + if (be->info) {
29613 + scsiback_disconnect(be->info);
29614 + scsiback_release_translation_entry(be->info);
29615 + scsiback_free(be->info);
29616 + be->info = NULL;
29617 + }
29618 +
29619 + kfree(be);
29620 + dev->dev.driver_data = NULL;
29621 +
29622 + return 0;
29623 +}
29624 +
29625 +
29626 +static int scsiback_probe(struct xenbus_device *dev,
29627 + const struct xenbus_device_id *id)
29628 +{
29629 + int err;
29630 +
29631 + struct backend_info *be = kzalloc(sizeof(struct backend_info),
29632 + GFP_KERNEL);
29633 +
29634 + DPRINTK("%p %d\n", dev, dev->otherend_id);
29635 +
29636 + if (!be) {
29637 + xenbus_dev_fatal(dev, -ENOMEM,
29638 + "allocating backend structure");
29639 + return -ENOMEM;
29640 + }
29641 + be->dev = dev;
29642 + dev->dev.driver_data = be;
29643 +
29644 + be->info = vscsibk_info_alloc(dev->otherend_id);
29645 + if (IS_ERR(be->info)) {
29646 + err = PTR_ERR(be->info);
29647 + be->info = NULL;
29648 + xenbus_dev_fatal(dev, err, "creating scsihost interface");
29649 + goto fail;
29650 + }
29651 +
29652 + be->info->dev = dev;
29653 + be->info->irq = 0;
29654 +
29655 + scsiback_init_translation_table(be->info);
29656 +
29657 + err = xenbus_switch_state(dev, XenbusStateInitWait);
29658 + if (err)
29659 + goto fail;
29660 +
29661 + return 0;
29662 +
29663 +
29664 +fail:
29665 + printk(KERN_WARNING "scsiback: %s failed\n",__FUNCTION__);
29666 + scsiback_remove(dev);
29667 +
29668 + return err;
29669 +}
29670 +
29671 +
29672 +static struct xenbus_device_id scsiback_ids[] = {
29673 + { "vscsi" },
29674 + { "" }
29675 +};
29676 +
29677 +static struct xenbus_driver scsiback = {
29678 + .name = "vscsi",
29679 + .owner = THIS_MODULE,
29680 + .ids = scsiback_ids,
29681 + .probe = scsiback_probe,
29682 + .remove = scsiback_remove,
29683 + .otherend_changed = scsiback_frontend_changed
29684 +};
29685 +
29686 +int scsiback_xenbus_init(void)
29687 +{
29688 + return xenbus_register_backend(&scsiback);
29689 +}
29690 +
29691 +void scsiback_xenbus_unregister(void)
29692 +{
29693 + xenbus_unregister_driver(&scsiback);
29694 +}
29695 Index: head-2008-11-25/drivers/xen/scsifront/Makefile
29696 ===================================================================
29697 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
29698 +++ head-2008-11-25/drivers/xen/scsifront/Makefile 2008-07-21 11:00:33.000000000 +0200
29699 @@ -0,0 +1,3 @@
29700 +
29701 +obj-$(CONFIG_XEN_SCSI_FRONTEND) := xenscsi.o
29702 +xenscsi-objs := scsifront.o xenbus.o
29703 Index: head-2008-11-25/drivers/xen/scsifront/common.h
29704 ===================================================================
29705 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
29706 +++ head-2008-11-25/drivers/xen/scsifront/common.h 2008-07-21 11:00:33.000000000 +0200
29707 @@ -0,0 +1,129 @@
29708 +/*
29709 + * Xen SCSI frontend driver
29710 + *
29711 + * Copyright (c) 2008, FUJITSU Limited
29712 + *
29713 + * This program is free software; you can redistribute it and/or
29714 + * modify it under the terms of the GNU General Public License version 2
29715 + * as published by the Free Software Foundation; or, when distributed
29716 + * separately from the Linux kernel or incorporated into other
29717 + * software packages, subject to the following license:
29718 + *
29719 + * Permission is hereby granted, free of charge, to any person obtaining a copy
29720 + * of this source file (the "Software"), to deal in the Software without
29721 + * restriction, including without limitation the rights to use, copy, modify,
29722 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29723 + * and to permit persons to whom the Software is furnished to do so, subject to
29724 + * the following conditions:
29725 + *
29726 + * The above copyright notice and this permission notice shall be included in
29727 + * all copies or substantial portions of the Software.
29728 + *
29729 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29730 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29731 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29732 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29733 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29734 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29735 + * IN THE SOFTWARE.
29736 + */
29737 +
29738 +#ifndef __XEN_DRIVERS_SCSIFRONT_H__
29739 +#define __XEN_DRIVERS_SCSIFRONT_H__
29740 +
29741 +#include <linux/version.h>
29742 +#include <linux/module.h>
29743 +#include <linux/kernel.h>
29744 +#include <linux/device.h>
29745 +#include <linux/kthread.h>
29746 +#include <linux/wait.h>
29747 +#include <linux/interrupt.h>
29748 +#include <linux/spinlock.h>
29749 +#include <linux/sched.h>
29750 +#include <linux/blkdev.h>
29751 +#include <scsi/scsi_cmnd.h>
29752 +#include <scsi/scsi_device.h>
29753 +#include <scsi/scsi.h>
29754 +#include <scsi/scsi_host.h>
29755 +#include <xen/xenbus.h>
29756 +#include <xen/gnttab.h>
29757 +#include <xen/evtchn.h>
29758 +#include <xen/interface/xen.h>
29759 +#include <xen/interface/io/ring.h>
29760 +#include <xen/interface/io/vscsiif.h>
29761 +#include <asm/delay.h>
29762 +
29763 +
29764 +#define GRANT_INVALID_REF 0
29765 +#define VSCSI_IN_ABORT 1
29766 +#define VSCSI_IN_RESET 2
29767 +
29768 +/* tuning point*/
29769 +#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
29770 +#define VSCSIIF_MAX_TARGET 64
29771 +#define VSCSIIF_MAX_LUN 255
29772 +
29773 +#define VSCSIIF_RING_SIZE \
29774 + __RING_SIZE((struct vscsiif_sring *)0, PAGE_SIZE)
29775 +#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE
29776 +
29777 +struct vscsifrnt_shadow {
29778 + uint16_t next_free;
29779 +
29780 + /* command between backend and frontend
29781 + * VSCSIIF_ACT_SCSI_CDB or VSCSIIF_ACT_SCSI_RESET */
29782 + unsigned char act;
29783 +
29784 + /* do reset function */
29785 + wait_queue_head_t wq_reset; /* reset work queue */
29786 + int wait_reset; /* reset work queue condition */
29787 + int32_t rslt_reset; /* reset response status */
29788 + /* (SUCESS or FAILED) */
29789 +
29790 + /* for DMA_TO_DEVICE(1), DMA_FROM_DEVICE(2), DMA_NONE(3)
29791 + requests */
29792 + unsigned int sc_data_direction;
29793 +
29794 + /* Number of pieces of scatter-gather */
29795 + unsigned int nr_segments;
29796 +
29797 + /* requested struct scsi_cmnd is stored from kernel */
29798 + unsigned long req_scsi_cmnd;
29799 + int gref[VSCSIIF_SG_TABLESIZE];
29800 +};
29801 +
29802 +struct vscsifrnt_info {
29803 + struct xenbus_device *dev;
29804 +
29805 + struct Scsi_Host *host;
29806 +
29807 + spinlock_t io_lock;
29808 + spinlock_t shadow_lock;
29809 + unsigned int evtchn;
29810 + unsigned int irq;
29811 +
29812 + grant_ref_t ring_ref;
29813 + struct vscsiif_front_ring ring;
29814 + struct vscsiif_response ring_res;
29815 +
29816 + struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS];
29817 + uint32_t shadow_free;
29818 +
29819 + struct task_struct *kthread;
29820 + wait_queue_head_t wq;
29821 + unsigned int waiting_resp;
29822 +
29823 +};
29824 +
29825 +#define DPRINTK(_f, _a...) \
29826 + pr_debug("(file=%s, line=%d) " _f, \
29827 + __FILE__ , __LINE__ , ## _a )
29828 +
29829 +int scsifront_xenbus_init(void);
29830 +void scsifront_xenbus_unregister(void);
29831 +int scsifront_schedule(void *data);
29832 +irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
29833 +int scsifront_cmd_done(struct vscsifrnt_info *info);
29834 +
29835 +
29836 +#endif /* __XEN_DRIVERS_SCSIFRONT_H__ */
29837 Index: head-2008-11-25/drivers/xen/scsifront/scsifront.c
29838 ===================================================================
29839 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
29840 +++ head-2008-11-25/drivers/xen/scsifront/scsifront.c 2008-07-21 11:00:33.000000000 +0200
29841 @@ -0,0 +1,511 @@
29842 +/*
29843 + * Xen SCSI frontend driver
29844 + *
29845 + * Copyright (c) 2008, FUJITSU Limited
29846 + *
29847 + * This program is free software; you can redistribute it and/or
29848 + * modify it under the terms of the GNU General Public License version 2
29849 + * as published by the Free Software Foundation; or, when distributed
29850 + * separately from the Linux kernel or incorporated into other
29851 + * software packages, subject to the following license:
29852 + *
29853 + * Permission is hereby granted, free of charge, to any person obtaining a copy
29854 + * of this source file (the "Software"), to deal in the Software without
29855 + * restriction, including without limitation the rights to use, copy, modify,
29856 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
29857 + * and to permit persons to whom the Software is furnished to do so, subject to
29858 + * the following conditions:
29859 + *
29860 + * The above copyright notice and this permission notice shall be included in
29861 + * all copies or substantial portions of the Software.
29862 + *
29863 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29864 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29865 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29866 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29867 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29868 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29869 + * IN THE SOFTWARE.
29870 + */
29871 +
29872 +
29873 +#include <linux/version.h>
29874 +#include "common.h"
29875 +
29876 +static int get_id_from_freelist(struct vscsifrnt_info *info)
29877 +{
29878 + unsigned long flags;
29879 + uint32_t free;
29880 +
29881 + spin_lock_irqsave(&info->shadow_lock, flags);
29882 +
29883 + free = info->shadow_free;
29884 + BUG_ON(free > VSCSIIF_MAX_REQS);
29885 + info->shadow_free = info->shadow[free].next_free;
29886 + info->shadow[free].next_free = 0x0fff;
29887 +
29888 + info->shadow[free].wait_reset = 0;
29889 +
29890 + spin_unlock_irqrestore(&info->shadow_lock, flags);
29891 +
29892 + return free;
29893 +}
29894 +
29895 +static void add_id_to_freelist(struct vscsifrnt_info *info, uint32_t id)
29896 +{
29897 + unsigned long flags;
29898 +
29899 + spin_lock_irqsave(&info->shadow_lock, flags);
29900 +
29901 + info->shadow[id].next_free = info->shadow_free;
29902 + info->shadow[id].req_scsi_cmnd = 0;
29903 + info->shadow_free = id;
29904 +
29905 + spin_unlock_irqrestore(&info->shadow_lock, flags);
29906 +}
29907 +
29908 +
29909 +struct vscsiif_request * scsifront_pre_request(struct vscsifrnt_info *info)
29910 +{
29911 + struct vscsiif_front_ring *ring = &(info->ring);
29912 + vscsiif_request_t *ring_req;
29913 + uint32_t id;
29914 +
29915 + ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
29916 +
29917 + ring->req_prod_pvt++;
29918 +
29919 + id = get_id_from_freelist(info); /* use id by response */
29920 + ring_req->rqid = (uint16_t)id;
29921 +
29922 + return ring_req;
29923 +}
29924 +
29925 +
29926 +static void scsifront_notify_work(struct vscsifrnt_info *info)
29927 +{
29928 + info->waiting_resp = 1;
29929 + wake_up(&info->wq);
29930 +}
29931 +
29932 +
29933 +static void scsifront_do_request(struct vscsifrnt_info *info)
29934 +{
29935 + struct vscsiif_front_ring *ring = &(info->ring);
29936 + unsigned int irq = info->irq;
29937 + int notify;
29938 +
29939 + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
29940 + if (notify)
29941 + notify_remote_via_irq(irq);
29942 +}
29943 +
29944 +irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
29945 +{
29946 + scsifront_notify_work((struct vscsifrnt_info *)dev_id);
29947 + return IRQ_HANDLED;
29948 +}
29949 +
29950 +
29951 +static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id)
29952 +{
29953 + int i;
29954 +
29955 + if (s->sc_data_direction == DMA_NONE)
29956 + return;
29957 +
29958 + if (s->nr_segments) {
29959 + for (i = 0; i < s->nr_segments; i++) {
29960 + if (unlikely(gnttab_query_foreign_access(
29961 + s->gref[i]) != 0)) {
29962 + printk(KERN_ALERT "scsifront: "
29963 + "grant still in use by backend.\n");
29964 + BUG();
29965 + }
29966 + gnttab_end_foreign_access(s->gref[i], 0UL);
29967 + }
29968 + }
29969 +
29970 + return;
29971 +}
29972 +
29973 +
29974 +static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
29975 + vscsiif_response_t *ring_res)
29976 +{
29977 + struct scsi_cmnd *sc;
29978 + uint32_t id;
29979 + uint8_t sense_len;
29980 +
29981 + id = ring_res->rqid;
29982 + sc = (struct scsi_cmnd *)info->shadow[id].req_scsi_cmnd;
29983 +
29984 + if (sc == NULL)
29985 + BUG();
29986 +
29987 + scsifront_gnttab_done(&info->shadow[id], id);
29988 + add_id_to_freelist(info, id);
29989 +
29990 + sc->result = ring_res->rslt;
29991 + sc->resid = ring_res->residual_len;
29992 +
29993 + if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE)
29994 + sense_len = VSCSIIF_SENSE_BUFFERSIZE;
29995 + else
29996 + sense_len = ring_res->sense_len;
29997 +
29998 + if (sense_len)
29999 + memcpy(sc->sense_buffer, ring_res->sense_buffer, sense_len);
30000 +
30001 + sc->scsi_done(sc);
30002 +
30003 + return;
30004 +}
30005 +
30006 +
30007 +static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
30008 + vscsiif_response_t *ring_res)
30009 +{
30010 + uint16_t id = ring_res->rqid;
30011 + unsigned long flags;
30012 +
30013 + spin_lock_irqsave(&info->shadow_lock, flags);
30014 + info->shadow[id].wait_reset = 1;
30015 + info->shadow[id].rslt_reset = ring_res->rslt;
30016 + spin_unlock_irqrestore(&info->shadow_lock, flags);
30017 +
30018 + wake_up(&(info->shadow[id].wq_reset));
30019 +}
30020 +
30021 +
30022 +int scsifront_cmd_done(struct vscsifrnt_info *info)
30023 +{
30024 + vscsiif_response_t *ring_res;
30025 +
30026 + RING_IDX i, rp;
30027 + int more_to_do = 0;
30028 + unsigned long flags;
30029 +
30030 + spin_lock_irqsave(&info->io_lock, flags);
30031 +
30032 + rp = info->ring.sring->rsp_prod;
30033 + rmb();
30034 + for (i = info->ring.rsp_cons; i != rp; i++) {
30035 +
30036 + ring_res = RING_GET_RESPONSE(&info->ring, i);
30037 +
30038 + if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB)
30039 + scsifront_cdb_cmd_done(info, ring_res);
30040 + else
30041 + scsifront_sync_cmd_done(info, ring_res);
30042 + }
30043 +
30044 + info->ring.rsp_cons = i;
30045 +
30046 + if (i != info->ring.req_prod_pvt) {
30047 + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
30048 + } else {
30049 + info->ring.sring->rsp_event = i + 1;
30050 + }
30051 +
30052 + spin_unlock_irqrestore(&info->io_lock, flags);
30053 +
30054 +
30055 + /* Yield point for this unbounded loop. */
30056 + cond_resched();
30057 +
30058 + return more_to_do;
30059 +}
30060 +
30061 +
30062 +
30063 +
30064 +int scsifront_schedule(void *data)
30065 +{
30066 + struct vscsifrnt_info *info = (struct vscsifrnt_info *)data;
30067 +
30068 + while (!kthread_should_stop()) {
30069 + wait_event_interruptible(
30070 + info->wq,
30071 + info->waiting_resp || kthread_should_stop());
30072 +
30073 + info->waiting_resp = 0;
30074 + smp_mb();
30075 +
30076 + if (scsifront_cmd_done(info))
30077 + info->waiting_resp = 1;
30078 + }
30079 +
30080 + return 0;
30081 +}
30082 +
30083 +
30084 +
30085 +static int map_data_for_request(struct vscsifrnt_info *info,
30086 + struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id)
30087 +{
30088 + grant_ref_t gref_head;
30089 + struct page *page;
30090 + int err, i, ref, ref_cnt = 0;
30091 + int write = (sc->sc_data_direction == DMA_TO_DEVICE);
30092 + int nr_pages, off, len, bytes;
30093 + unsigned long buffer_pfn;
30094 + unsigned int data_len = 0;
30095 +
30096 + if (sc->sc_data_direction == DMA_NONE)
30097 + return 0;
30098 +
30099 + err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head);
30100 + if (err) {
30101 + printk(KERN_ERR "scsifront: gnttab_alloc_grant_references() error\n");
30102 + return -ENOMEM;
30103 + }
30104 +
30105 + if (sc->use_sg) {
30106 + /* quoted scsi_lib.c/scsi_req_map_sg . */
30107 + struct scatterlist *sg = (struct scatterlist *)sc->request_buffer;
30108 + nr_pages = (sc->request_bufflen + sg[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
30109 +
30110 + if (nr_pages > VSCSIIF_SG_TABLESIZE) {
30111 + printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n");
30112 + ref_cnt = (-E2BIG);
30113 + goto big_to_sg;
30114 + }
30115 +
30116 + for (i = 0; i < sc->use_sg; i++) {
30117 + page = sg[i].page;
30118 + off = sg[i].offset;
30119 + len = sg[i].length;
30120 + data_len += len;
30121 +
30122 + buffer_pfn = page_to_phys(page) >> PAGE_SHIFT;
30123 +
30124 + while (len > 0) {
30125 + bytes = min_t(unsigned int, len, PAGE_SIZE - off);
30126 +
30127 + ref = gnttab_claim_grant_reference(&gref_head);
30128 + BUG_ON(ref == -ENOSPC);
30129 +
30130 + gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
30131 + buffer_pfn, write);
30132 +
30133 + info->shadow[id].gref[ref_cnt] = ref;
30134 + ring_req->seg[ref_cnt].gref = ref;
30135 + ring_req->seg[ref_cnt].offset = (uint16_t)off;
30136 + ring_req->seg[ref_cnt].length = (uint16_t)bytes;
30137 +
30138 + buffer_pfn++;
30139 + len -= bytes;
30140 + off = 0;
30141 + ref_cnt++;
30142 + }
30143 + }
30144 + } else if (sc->request_bufflen) {
30145 + unsigned long end = ((unsigned long)sc->request_buffer
30146 + + sc->request_bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
30147 + unsigned long start = (unsigned long)sc->request_buffer >> PAGE_SHIFT;
30148 +
30149 + page = virt_to_page(sc->request_buffer);
30150 + nr_pages = end - start;
30151 + len = sc->request_bufflen;
30152 +
30153 + if (nr_pages > VSCSIIF_SG_TABLESIZE) {
30154 + ref_cnt = (-E2BIG);
30155 + goto big_to_sg;
30156 + }
30157 +
30158 + buffer_pfn = page_to_phys(page) >> PAGE_SHIFT;
30159 +
30160 + off = offset_in_page((unsigned long)sc->request_buffer);
30161 + for (i = 0; i < nr_pages; i++) {
30162 + bytes = PAGE_SIZE - off;
30163 +
30164 + if (bytes > len)
30165 + bytes = len;
30166 +
30167 + ref = gnttab_claim_grant_reference(&gref_head);
30168 + BUG_ON(ref == -ENOSPC);
30169 +
30170 + gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
30171 + buffer_pfn, write);
30172 +
30173 + info->shadow[id].gref[i] = ref;
30174 + ring_req->seg[i].gref = ref;
30175 + ring_req->seg[i].offset = (uint16_t)off;
30176 + ring_req->seg[i].length = (uint16_t)bytes;
30177 +
30178 + buffer_pfn++;
30179 + len -= bytes;
30180 + off = 0;
30181 + ref_cnt++;
30182 + }
30183 + }
30184 +
30185 +big_to_sg:
30186 +
30187 + gnttab_free_grant_references(gref_head);
30188 +
30189 + return ref_cnt;
30190 +}
30191 +
30192 +static int scsifront_queuecommand(struct scsi_cmnd *sc,
30193 + void (*done)(struct scsi_cmnd *))
30194 +{
30195 + struct vscsifrnt_info *info =
30196 + (struct vscsifrnt_info *) sc->device->host->hostdata;
30197 + vscsiif_request_t *ring_req;
30198 + int ref_cnt;
30199 + uint16_t rqid;
30200 +
30201 + if (RING_FULL(&info->ring)) {
30202 + goto out_host_busy;
30203 + }
30204 +
30205 + sc->scsi_done = done;
30206 + sc->result = 0;
30207 +
30208 + ring_req = scsifront_pre_request(info);
30209 + rqid = ring_req->rqid;
30210 + ring_req->act = VSCSIIF_ACT_SCSI_CDB;
30211 +
30212 + ring_req->id = sc->device->id;
30213 + ring_req->lun = sc->device->lun;
30214 + ring_req->channel = sc->device->channel;
30215 + ring_req->cmd_len = sc->cmd_len;
30216 +
30217 + BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
30218 +
30219 + if ( sc->cmd_len )
30220 + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
30221 + else
30222 + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
30223 +
30224 + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
30225 + ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
30226 +
30227 + info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc;
30228 + info->shadow[rqid].sc_data_direction = sc->sc_data_direction;
30229 + info->shadow[rqid].act = ring_req->act;
30230 +
30231 + ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
30232 + if (ref_cnt < 0) {
30233 + add_id_to_freelist(info, rqid);
30234 + if (ref_cnt == (-ENOMEM))
30235 + goto out_host_busy;
30236 + else {
30237 + sc->result = (DID_ERROR << 16);
30238 + goto out_fail_command;
30239 + }
30240 + }
30241 +
30242 + ring_req->nr_segments = (uint8_t)ref_cnt;
30243 + info->shadow[rqid].nr_segments = ref_cnt;
30244 +
30245 + scsifront_do_request(info);
30246 +
30247 + return 0;
30248 +
30249 +out_host_busy:
30250 + return SCSI_MLQUEUE_HOST_BUSY;
30251 +
30252 +out_fail_command:
30253 + done(sc);
30254 + return 0;
30255 +}
30256 +
30257 +
30258 +static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
30259 +{
30260 + return (FAILED);
30261 +}
30262 +
30263 +/* vscsi supports only device_reset, because it is each of LUNs */
30264 +static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
30265 +{
30266 + struct Scsi_Host *host = sc->device->host;
30267 + struct vscsifrnt_info *info =
30268 + (struct vscsifrnt_info *) sc->device->host->hostdata;
30269 +
30270 + vscsiif_request_t *ring_req;
30271 + uint16_t rqid;
30272 + int err;
30273 +
30274 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
30275 + spin_lock_irq(host->host_lock);
30276 +#endif
30277 +
30278 + ring_req = scsifront_pre_request(info);
30279 + ring_req->act = VSCSIIF_ACT_SCSI_RESET;
30280 +
30281 + rqid = ring_req->rqid;
30282 + info->shadow[rqid].act = VSCSIIF_ACT_SCSI_RESET;
30283 +
30284 + ring_req->channel = sc->device->channel;
30285 + ring_req->id = sc->device->id;
30286 + ring_req->lun = sc->device->lun;
30287 + ring_req->cmd_len = sc->cmd_len;
30288 +
30289 + if ( sc->cmd_len )
30290 + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
30291 + else
30292 + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
30293 +
30294 + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
30295 + ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
30296 + ring_req->nr_segments = 0;
30297 +
30298 + scsifront_do_request(info);
30299 +
30300 + spin_unlock_irq(host->host_lock);
30301 + wait_event_interruptible(info->shadow[rqid].wq_reset,
30302 + info->shadow[rqid].wait_reset);
30303 + spin_lock_irq(host->host_lock);
30304 +
30305 + err = info->shadow[rqid].rslt_reset;
30306 +
30307 + add_id_to_freelist(info, rqid);
30308 +
30309 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
30310 + spin_unlock_irq(host->host_lock);
30311 +#endif
30312 + return (err);
30313 +}
30314 +
30315 +
30316 +struct scsi_host_template scsifront_sht = {
30317 + .module = THIS_MODULE,
30318 + .name = "Xen SCSI frontend driver",
30319 + .queuecommand = scsifront_queuecommand,
30320 + .eh_abort_handler = scsifront_eh_abort_handler,
30321 + .eh_device_reset_handler= scsifront_dev_reset_handler,
30322 + .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN,
30323 + .can_queue = VSCSIIF_MAX_REQS,
30324 + .this_id = -1,
30325 + .sg_tablesize = VSCSIIF_SG_TABLESIZE,
30326 + .use_clustering = DISABLE_CLUSTERING,
30327 + .proc_name = "scsifront",
30328 +};
30329 +
30330 +
30331 +static int __init scsifront_init(void)
30332 +{
30333 + int err;
30334 +
30335 + if (!is_running_on_xen())
30336 + return -ENODEV;
30337 +
30338 + err = scsifront_xenbus_init();
30339 +
30340 + return err;
30341 +}
30342 +
30343 +static void __exit scsifront_exit(void)
30344 +{
30345 + scsifront_xenbus_unregister();
30346 +}
30347 +
30348 +module_init(scsifront_init);
30349 +module_exit(scsifront_exit);
30350 +
30351 +MODULE_DESCRIPTION("Xen SCSI frontend driver");
30352 +MODULE_LICENSE("GPL");
30353 Index: head-2008-11-25/drivers/xen/scsifront/xenbus.c
30354 ===================================================================
30355 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
30356 +++ head-2008-11-25/drivers/xen/scsifront/xenbus.c 2008-07-21 11:00:33.000000000 +0200
30357 @@ -0,0 +1,421 @@
30358 +/*
30359 + * Xen SCSI frontend driver
30360 + *
30361 + * Copyright (c) 2008, FUJITSU Limited
30362 + *
30363 + * This program is free software; you can redistribute it and/or
30364 + * modify it under the terms of the GNU General Public License version 2
30365 + * as published by the Free Software Foundation; or, when distributed
30366 + * separately from the Linux kernel or incorporated into other
30367 + * software packages, subject to the following license:
30368 + *
30369 + * Permission is hereby granted, free of charge, to any person obtaining a copy
30370 + * of this source file (the "Software"), to deal in the Software without
30371 + * restriction, including without limitation the rights to use, copy, modify,
30372 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
30373 + * and to permit persons to whom the Software is furnished to do so, subject to
30374 + * the following conditions:
30375 + *
30376 + * The above copyright notice and this permission notice shall be included in
30377 + * all copies or substantial portions of the Software.
30378 + *
30379 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30380 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30381 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30382 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30383 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30384 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30385 + * IN THE SOFTWARE.
30386 + */
30387 +
30388 +
30389 +#include <linux/version.h>
30390 +#include "common.h"
30391 +
30392 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
30393 + #define DEFAULT_TASK_COMM_LEN 16
30394 +#else
30395 + #define DEFAULT_TASK_COMM_LEN TASK_COMM_LEN
30396 +#endif
30397 +
30398 +extern struct scsi_host_template scsifront_sht;
30399 +
30400 +static void scsifront_free(struct vscsifrnt_info *info)
30401 +{
30402 + struct Scsi_Host *host = info->host;
30403 +
30404 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
30405 + if (host->shost_state != SHOST_DEL) {
30406 +#else
30407 + if (!test_bit(SHOST_DEL, &host->shost_state)) {
30408 +#endif
30409 + scsi_remove_host(info->host);
30410 + }
30411 +
30412 + if (info->ring_ref != GRANT_INVALID_REF) {
30413 + gnttab_end_foreign_access(info->ring_ref,
30414 + (unsigned long)info->ring.sring);
30415 + info->ring_ref = GRANT_INVALID_REF;
30416 + info->ring.sring = NULL;
30417 + }
30418 +
30419 + if (info->irq)
30420 + unbind_from_irqhandler(info->irq, info);
30421 + info->irq = 0;
30422 +
30423 + scsi_host_put(info->host);
30424 +}
30425 +
30426 +
30427 +static int scsifront_alloc_ring(struct vscsifrnt_info *info)
30428 +{
30429 + struct xenbus_device *dev = info->dev;
30430 + struct vscsiif_sring *sring;
30431 + int err = -ENOMEM;
30432 +
30433 +
30434 + info->ring_ref = GRANT_INVALID_REF;
30435 +
30436 + /***** Frontend to Backend ring start *****/
30437 + sring = (struct vscsiif_sring *) __get_free_page(GFP_KERNEL);
30438 + if (!sring) {
30439 + xenbus_dev_fatal(dev, err, "fail to allocate shared ring (Front to Back)");
30440 + return err;
30441 + }
30442 + SHARED_RING_INIT(sring);
30443 + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
30444 +
30445 + err = xenbus_grant_ring(dev, virt_to_mfn(sring));
30446 + if (err < 0) {
30447 + free_page((unsigned long) sring);
30448 + info->ring.sring = NULL;
30449 + xenbus_dev_fatal(dev, err, "fail to grant shared ring (Front to Back)");
30450 + goto free_sring;
30451 + }
30452 + info->ring_ref = err;
30453 +
30454 + err = bind_listening_port_to_irqhandler(
30455 + dev->otherend_id, scsifront_intr,
30456 + SA_SAMPLE_RANDOM, "scsifront", info);
30457 +
30458 + if (err <= 0) {
30459 + xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler");
30460 + goto free_sring;
30461 + }
30462 + info->irq = err;
30463 +
30464 + return 0;
30465 +
30466 +/* free resource */
30467 +free_sring:
30468 + scsifront_free(info);
30469 +
30470 + return err;
30471 +}
30472 +
30473 +
30474 +static int scsifront_init_ring(struct vscsifrnt_info *info)
30475 +{
30476 + struct xenbus_device *dev = info->dev;
30477 + struct xenbus_transaction xbt;
30478 + int err;
30479 +
30480 + DPRINTK("%s\n",__FUNCTION__);
30481 +
30482 + err = scsifront_alloc_ring(info);
30483 + if (err)
30484 + return err;
30485 + DPRINTK("%u %u\n", info->ring_ref, info->evtchn);
30486 +
30487 +again:
30488 + err = xenbus_transaction_start(&xbt);
30489 + if (err) {
30490 + xenbus_dev_fatal(dev, err, "starting transaction");
30491 + }
30492 +
30493 + err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
30494 + info->ring_ref);
30495 + if (err) {
30496 + xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
30497 + goto fail;
30498 + }
30499 +
30500 + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
30501 + irq_to_evtchn_port(info->irq));
30502 +
30503 + if (err) {
30504 + xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
30505 + goto fail;
30506 + }
30507 +
30508 + err = xenbus_transaction_end(xbt, 0);
30509 + if (err) {
30510 + if (err == -EAGAIN)
30511 + goto again;
30512 + xenbus_dev_fatal(dev, err, "completing transaction");
30513 + goto free_sring;
30514 + }
30515 +
30516 + return 0;
30517 +
30518 +fail:
30519 + xenbus_transaction_end(xbt, 1);
30520 +free_sring:
30521 + /* free resource */
30522 + scsifront_free(info);
30523 +
30524 + return err;
30525 +}
30526 +
30527 +
30528 +static int scsifront_probe(struct xenbus_device *dev,
30529 + const struct xenbus_device_id *id)
30530 +{
30531 + struct vscsifrnt_info *info;
30532 + struct Scsi_Host *host;
30533 + int i, err = -ENOMEM;
30534 + char name[DEFAULT_TASK_COMM_LEN];
30535 +
30536 + host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
30537 + if (!host) {
30538 + xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
30539 + return err;
30540 + }
30541 + info = (struct vscsifrnt_info *) host->hostdata;
30542 + info->host = host;
30543 +
30544 +
30545 + dev->dev.driver_data = info;
30546 + info->dev = dev;
30547 +
30548 + for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
30549 + info->shadow[i].next_free = i + 1;
30550 + init_waitqueue_head(&(info->shadow[i].wq_reset));
30551 + info->shadow[i].wait_reset = 0;
30552 + }
30553 + info->shadow[VSCSIIF_MAX_REQS - 1].next_free = 0x0fff;
30554 +
30555 + err = scsifront_init_ring(info);
30556 + if (err) {
30557 + scsi_host_put(host);
30558 + return err;
30559 + }
30560 +
30561 + init_waitqueue_head(&info->wq);
30562 + spin_lock_init(&info->io_lock);
30563 + spin_lock_init(&info->shadow_lock);
30564 +
30565 + snprintf(name, DEFAULT_TASK_COMM_LEN, "vscsiif.%d", info->host->host_no);
30566 +
30567 + info->kthread = kthread_run(scsifront_schedule, info, name);
30568 + if (IS_ERR(info->kthread)) {
30569 + err = PTR_ERR(info->kthread);
30570 + info->kthread = NULL;
30571 + printk(KERN_ERR "scsifront: kthread start err %d\n", err);
30572 + goto free_sring;
30573 + }
30574 +
30575 + host->max_id = VSCSIIF_MAX_TARGET;
30576 + host->max_channel = 0;
30577 + host->max_lun = VSCSIIF_MAX_LUN;
30578 + host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512;
30579 +
30580 + err = scsi_add_host(host, &dev->dev);
30581 + if (err) {
30582 + printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err);
30583 + goto free_sring;
30584 + }
30585 +
30586 + xenbus_switch_state(dev, XenbusStateInitialised);
30587 +
30588 + return 0;
30589 +
30590 +free_sring:
30591 + /* free resource */
30592 + scsifront_free(info);
30593 + return err;
30594 +}
30595 +
30596 +static int scsifront_remove(struct xenbus_device *dev)
30597 +{
30598 + struct vscsifrnt_info *info = dev->dev.driver_data;
30599 +
30600 + DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename);
30601 +
30602 + if (info->kthread) {
30603 + kthread_stop(info->kthread);
30604 + info->kthread = NULL;
30605 + }
30606 +
30607 + scsifront_free(info);
30608 +
30609 + return 0;
30610 +}
30611 +
30612 +
30613 +static int scsifront_disconnect(struct vscsifrnt_info *info)
30614 +{
30615 + struct xenbus_device *dev = info->dev;
30616 + struct Scsi_Host *host = info->host;
30617 +
30618 + DPRINTK("%s: %s disconnect\n",__FUNCTION__ ,dev->nodename);
30619 +
30620 + /*
30621 + When this function is executed, all devices of
30622 + Frontend have been deleted.
30623 + Therefore, it need not block I/O before remove_host.
30624 + */
30625 +
30626 + scsi_remove_host(host);
30627 + xenbus_frontend_closed(dev);
30628 +
30629 + return 0;
30630 +}
30631 +
30632 +#define VSCSIFRONT_OP_ADD_LUN 1
30633 +#define VSCSIFRONT_OP_DEL_LUN 2
30634 +
30635 +static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
30636 +{
30637 + struct xenbus_device *dev = info->dev;
30638 + int i, err = 0;
30639 + char str[64], state_str[64];
30640 + char **dir;
30641 + unsigned int dir_n = 0;
30642 + unsigned int device_state;
30643 + unsigned int hst, chn, tgt, lun;
30644 + struct scsi_device *sdev;
30645 +
30646 + dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
30647 + if (IS_ERR(dir))
30648 + return;
30649 +
30650 + for (i = 0; i < dir_n; i++) {
30651 + /* read status */
30652 + snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
30653 + err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
30654 + &device_state);
30655 + if (XENBUS_EXIST_ERR(err))
30656 + continue;
30657 +
30658 + /* virtual SCSI device */
30659 + snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
30660 + err = xenbus_scanf(XBT_NIL, dev->otherend, str,
30661 + "%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
30662 + if (XENBUS_EXIST_ERR(err))
30663 + continue;
30664 +
30665 + /* front device state path */
30666 + snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
30667 +
30668 + switch (op) {
30669 + case VSCSIFRONT_OP_ADD_LUN:
30670 + if (device_state == XenbusStateInitialised) {
30671 + sdev = scsi_device_lookup(info->host, chn, tgt, lun);
30672 + if (sdev) {
30673 + printk(KERN_ERR "scsifront: Device already in use.\n");
30674 + scsi_device_put(sdev);
30675 + xenbus_printf(XBT_NIL, dev->nodename,
30676 + state_str, "%d", XenbusStateClosed);
30677 + } else {
30678 + scsi_add_device(info->host, chn, tgt, lun);
30679 + xenbus_printf(XBT_NIL, dev->nodename,
30680 + state_str, "%d", XenbusStateConnected);
30681 + }
30682 + }
30683 + break;
30684 + case VSCSIFRONT_OP_DEL_LUN:
30685 + if (device_state == XenbusStateClosing) {
30686 + sdev = scsi_device_lookup(info->host, chn, tgt, lun);
30687 + if (sdev) {
30688 + scsi_remove_device(sdev);
30689 + scsi_device_put(sdev);
30690 + xenbus_printf(XBT_NIL, dev->nodename,
30691 + state_str, "%d", XenbusStateClosed);
30692 + }
30693 + }
30694 + break;
30695 + default:
30696 + break;
30697 + }
30698 + }
30699 +
30700 + kfree(dir);
30701 + return;
30702 +}
30703 +
30704 +
30705 +
30706 +
30707 +static void scsifront_backend_changed(struct xenbus_device *dev,
30708 + enum xenbus_state backend_state)
30709 +{
30710 + struct vscsifrnt_info *info = dev->dev.driver_data;
30711 +
30712 + DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
30713 +
30714 + switch (backend_state) {
30715 + case XenbusStateUnknown:
30716 + case XenbusStateInitialising:
30717 + case XenbusStateInitWait:
30718 + case XenbusStateClosed:
30719 + break;
30720 +
30721 + case XenbusStateInitialised:
30722 + break;
30723 +
30724 + case XenbusStateConnected:
30725 + if (xenbus_read_driver_state(dev->nodename) ==
30726 + XenbusStateInitialised) {
30727 + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
30728 + }
30729 +
30730 + if (dev->state == XenbusStateConnected)
30731 + break;
30732 +
30733 + xenbus_switch_state(dev, XenbusStateConnected);
30734 + break;
30735 +
30736 + case XenbusStateClosing:
30737 + scsifront_disconnect(info);
30738 + break;
30739 +
30740 + case XenbusStateReconfiguring:
30741 + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
30742 + xenbus_switch_state(dev, XenbusStateReconfiguring);
30743 + break;
30744 +
30745 + case XenbusStateReconfigured:
30746 + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
30747 + xenbus_switch_state(dev, XenbusStateConnected);
30748 + break;
30749 + }
30750 +}
30751 +
30752 +
30753 +static struct xenbus_device_id scsifront_ids[] = {
30754 + { "vscsi" },
30755 + { "" }
30756 +};
30757 +
30758 +
30759 +static struct xenbus_driver scsifront_driver = {
30760 + .name = "vscsi",
30761 + .owner = THIS_MODULE,
30762 + .ids = scsifront_ids,
30763 + .probe = scsifront_probe,
30764 + .remove = scsifront_remove,
30765 +/* .resume = scsifront_resume, */
30766 + .otherend_changed = scsifront_backend_changed,
30767 +};
30768 +
30769 +int scsifront_xenbus_init(void)
30770 +{
30771 + return xenbus_register_frontend(&scsifront_driver);
30772 +}
30773 +
30774 +void scsifront_xenbus_unregister(void)
30775 +{
30776 + xenbus_unregister_driver(&scsifront_driver);
30777 +}
30778 +
30779 Index: head-2008-11-25/drivers/xen/sfc_netback/Makefile
30780 ===================================================================
30781 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
30782 +++ head-2008-11-25/drivers/xen/sfc_netback/Makefile 2008-02-26 10:54:11.000000000 +0100
30783 @@ -0,0 +1,12 @@
30784 +EXTRA_CFLAGS += -Idrivers/xen/sfc_netback -Idrivers/xen/sfc_netutil -Idrivers/xen/netback -Idrivers/net/sfc
30785 +EXTRA_CFLAGS += -D__ci_driver__
30786 +EXTRA_CFLAGS += -DEFX_USE_KCOMPAT
30787 +EXTRA_CFLAGS += -Werror
30788 +
30789 +ifdef GCOV
30790 +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
30791 +endif
30792 +
30793 +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) := sfc_netback.o
30794 +
30795 +sfc_netback-objs := accel.o accel_fwd.o accel_msg.o accel_solarflare.o accel_xenbus.o accel_debugfs.o
30796 Index: head-2008-11-25/drivers/xen/sfc_netback/accel.c
30797 ===================================================================
30798 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
30799 +++ head-2008-11-25/drivers/xen/sfc_netback/accel.c 2008-02-26 10:54:11.000000000 +0100
30800 @@ -0,0 +1,129 @@
30801 +/****************************************************************************
30802 + * Solarflare driver for Xen network acceleration
30803 + *
30804 + * Copyright 2006-2008: Solarflare Communications Inc,
30805 + * 9501 Jeronimo Road, Suite 250,
30806 + * Irvine, CA 92618, USA
30807 + *
30808 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
30809 + *
30810 + * This program is free software; you can redistribute it and/or modify it
30811 + * under the terms of the GNU General Public License version 2 as published
30812 + * by the Free Software Foundation, incorporated herein by reference.
30813 + *
30814 + * This program is distributed in the hope that it will be useful,
30815 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
30816 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30817 + * GNU General Public License for more details.
30818 + *
30819 + * You should have received a copy of the GNU General Public License
30820 + * along with this program; if not, write to the Free Software
30821 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30822 + ****************************************************************************
30823 + */
30824 +
30825 +#include "accel.h"
30826 +#include "accel_msg_iface.h"
30827 +#include "accel_solarflare.h"
30828 +
30829 +#include <linux/notifier.h>
30830 +
30831 +#ifdef EFX_GCOV
30832 +#include "gcov.h"
30833 +#endif
30834 +
30835 +static int netback_accel_netdev_event(struct notifier_block *nb,
30836 + unsigned long event, void *ptr)
30837 +{
30838 + struct net_device *net_dev = (struct net_device *)ptr;
30839 + struct netback_accel *bend;
30840 +
30841 + if ((event == NETDEV_UP) || (event == NETDEV_DOWN)) {
30842 + mutex_lock(&bend_list_mutex);
30843 + bend = bend_list;
30844 + while (bend != NULL) {
30845 + mutex_lock(&bend->bend_mutex);
30846 + /*
30847 + * This happens when the shared pages have
30848 + * been unmapped, but the bend not yet removed
30849 + * from list
30850 + */
30851 + if (bend->shared_page == NULL)
30852 + goto next;
30853 +
30854 + if (bend->net_dev->ifindex == net_dev->ifindex)
30855 + netback_accel_set_interface_state
30856 + (bend, event == NETDEV_UP);
30857 +
30858 + next:
30859 + mutex_unlock(&bend->bend_mutex);
30860 + bend = bend->next_bend;
30861 + }
30862 + mutex_unlock(&bend_list_mutex);
30863 + }
30864 +
30865 + return NOTIFY_DONE;
30866 +}
30867 +
30868 +
30869 +static struct notifier_block netback_accel_netdev_notifier = {
30870 + .notifier_call = netback_accel_netdev_event,
30871 +};
30872 +
30873 +
30874 +unsigned sfc_netback_max_pages = NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES;
30875 +module_param_named(max_pages, sfc_netback_max_pages, uint, 0644);
30876 +MODULE_PARM_DESC(max_pages,
30877 + "The number of buffer pages to enforce on each guest");
30878 +
30879 +/* Initialise subsystems need for the accelerated fast path */
30880 +static int __init netback_accel_init(void)
30881 +{
30882 + int rc = 0;
30883 +
30884 +#ifdef EFX_GCOV
30885 + gcov_provider_init(THIS_MODULE);
30886 +#endif
30887 +
30888 + rc = netback_accel_init_fwd();
30889 +
30890 + if (rc == 0)
30891 + netback_accel_debugfs_init();
30892 +
30893 + if (rc == 0)
30894 + rc = netback_accel_sf_init();
30895 +
30896 + if (rc == 0)
30897 + rc = register_netdevice_notifier
30898 + (&netback_accel_netdev_notifier);
30899 +
30900 + /*
30901 + * What if no device was found, shouldn't we clean up stuff
30902 + * we've allocated for acceleration subsystem?
30903 + */
30904 +
30905 + return rc;
30906 +}
30907 +
30908 +module_init(netback_accel_init);
30909 +
30910 +static void __exit netback_accel_exit(void)
30911 +{
30912 + unregister_netdevice_notifier(&netback_accel_netdev_notifier);
30913 +
30914 + netback_accel_sf_shutdown();
30915 +
30916 + netback_accel_shutdown_bends();
30917 +
30918 + netback_accel_debugfs_fini();
30919 +
30920 + netback_accel_shutdown_fwd();
30921 +
30922 +#ifdef EFX_GCOV
30923 + gcov_provider_fini(THIS_MODULE);
30924 +#endif
30925 +}
30926 +
30927 +module_exit(netback_accel_exit);
30928 +
30929 +MODULE_LICENSE("GPL");
30930 Index: head-2008-11-25/drivers/xen/sfc_netback/accel.h
30931 ===================================================================
30932 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
30933 +++ head-2008-11-25/drivers/xen/sfc_netback/accel.h 2008-02-26 10:54:11.000000000 +0100
30934 @@ -0,0 +1,393 @@
30935 +/****************************************************************************
30936 + * Solarflare driver for Xen network acceleration
30937 + *
30938 + * Copyright 2006-2008: Solarflare Communications Inc,
30939 + * 9501 Jeronimo Road, Suite 250,
30940 + * Irvine, CA 92618, USA
30941 + *
30942 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
30943 + *
30944 + * This program is free software; you can redistribute it and/or modify it
30945 + * under the terms of the GNU General Public License version 2 as published
30946 + * by the Free Software Foundation, incorporated herein by reference.
30947 + *
30948 + * This program is distributed in the hope that it will be useful,
30949 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
30950 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30951 + * GNU General Public License for more details.
30952 + *
30953 + * You should have received a copy of the GNU General Public License
30954 + * along with this program; if not, write to the Free Software
30955 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30956 + ****************************************************************************
30957 + */
30958 +
30959 +#ifndef NETBACK_ACCEL_H
30960 +#define NETBACK_ACCEL_H
30961 +
30962 +#include <linux/slab.h>
30963 +#include <linux/ip.h>
30964 +#include <linux/tcp.h>
30965 +#include <linux/udp.h>
30966 +#include <linux/in.h>
30967 +#include <linux/netdevice.h>
30968 +#include <linux/etherdevice.h>
30969 +#include <linux/mutex.h>
30970 +#include <linux/wait.h>
30971 +
30972 +#include <xen/xenbus.h>
30973 +
30974 +#include "accel_shared_fifo.h"
30975 +#include "accel_msg_iface.h"
30976 +#include "accel_util.h"
30977 +
30978 +/**************************************************************************
30979 + * Datatypes
30980 + **************************************************************************/
30981 +
30982 +#define NETBACK_ACCEL_DEFAULT_MAX_FILTERS (8)
30983 +#define NETBACK_ACCEL_DEFAULT_MAX_MCASTS (8)
30984 +#define NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES (384)
30985 +/* Variable to store module parameter for max_buf_pages */
30986 +extern unsigned sfc_netback_max_pages;
30987 +
30988 +#define NETBACK_ACCEL_STATS 1
30989 +
30990 +#if NETBACK_ACCEL_STATS
30991 +#define NETBACK_ACCEL_STATS_OP(x) x
30992 +#else
30993 +#define NETBACK_ACCEL_STATS_OP(x)
30994 +#endif
30995 +
30996 +/*! Statistics for a given backend */
30997 +struct netback_accel_stats {
30998 + /*! Number of eventq wakeup events */
30999 + u64 evq_wakeups;
31000 + /*! Number of eventq timeout events */
31001 + u64 evq_timeouts;
31002 + /*! Number of filters used */
31003 + u32 num_filters;
31004 + /*! Number of buffer pages registered */
31005 + u32 num_buffer_pages;
31006 +};
31007 +
31008 +
31009 +/* Debug fs nodes for each of the above stats */
31010 +struct netback_accel_dbfs {
31011 + struct dentry *evq_wakeups;
31012 + struct dentry *evq_timeouts;
31013 + struct dentry *num_filters;
31014 + struct dentry *num_buffer_pages;
31015 +};
31016 +
31017 +
31018 +/*! Resource limits for a given NIC */
31019 +struct netback_accel_limits {
31020 + int max_filters; /*!< Max. number of filters to use. */
31021 + int max_mcasts; /*!< Max. number of mcast subscriptions */
31022 + int max_buf_pages; /*!< Max. number of pages of NIC buffers */
31023 +};
31024 +
31025 +
31026 +/*! The state for an instance of the back end driver. */
31027 +struct netback_accel {
31028 + /*! mutex to protect this state */
31029 + struct mutex bend_mutex;
31030 +
31031 + /*! Watches on xenstore */
31032 + struct xenbus_watch domu_accel_watch;
31033 + struct xenbus_watch config_accel_watch;
31034 +
31035 + /*! Pointer to whatever device cookie ties us in to the hypervisor */
31036 + void *hdev_data;
31037 +
31038 + /*! FIFO indices. Next page is msg FIFOs */
31039 + struct net_accel_shared_page *shared_page;
31040 +
31041 + /*! Defer control message processing */
31042 + struct work_struct handle_msg;
31043 +
31044 + /*! Identifies other end VM and interface.*/
31045 + int far_end;
31046 + int vif_num;
31047 +
31048 + /*!< To unmap the shared pages */
31049 + void *sh_pages_unmap;
31050 +
31051 + /* Resource tracking */
31052 + /*! Limits on H/W & Dom0 resources */
31053 + struct netback_accel_limits quotas;
31054 +
31055 + /* Hardware resources */
31056 + /*! The H/W type of associated NIC */
31057 + enum net_accel_hw_type hw_type;
31058 + /*! State of allocation */
31059 + int hw_state;
31060 + /*! Index into ci_driver.nics[] for this interface */
31061 + int nic_index;
31062 + /*! How to set up the acceleration for this hardware */
31063 + int (*accel_setup)(struct netback_accel *);
31064 + /*! And how to stop it. */
31065 + void (*accel_shutdown)(struct netback_accel *);
31066 +
31067 + /*! The physical/real net_dev for this interface */
31068 + struct net_device *net_dev;
31069 +
31070 + /*! Magic pointer to locate state in fowarding table */
31071 + void *fwd_priv;
31072 +
31073 + /*! Message FIFO */
31074 + sh_msg_fifo2 to_domU;
31075 + /*! Message FIFO */
31076 + sh_msg_fifo2 from_domU;
31077 +
31078 + /*! General notification channel id */
31079 + int msg_channel;
31080 + /*! General notification channel irq */
31081 + int msg_channel_irq;
31082 +
31083 + /*! Event channel id dedicated to network packet interrupts. */
31084 + int net_channel;
31085 + /*! Event channel irq dedicated to network packets interrupts */
31086 + int net_channel_irq;
31087 +
31088 + /*! The MAC address the frontend goes by. */
31089 + u8 mac[ETH_ALEN];
31090 + /*! Driver name of associated NIC */
31091 + char *nicname;
31092 +
31093 + /*! Array of pointers to buffer pages mapped */
31094 + grant_handle_t *buffer_maps;
31095 + u64 *buffer_addrs;
31096 + /*! Index into buffer_maps */
31097 + int buffer_maps_index;
31098 + /*! Max number of pages that domU is allowed/will request to map */
31099 + int max_pages;
31100 +
31101 + /*! Pointer to hardware specific private area */
31102 + void *accel_hw_priv;
31103 +
31104 + /*! Wait queue for changes in accelstate. */
31105 + wait_queue_head_t state_wait_queue;
31106 +
31107 + /*! Current state of the frontend according to the xenbus
31108 + * watch. */
31109 + XenbusState frontend_state;
31110 +
31111 + /*! Current state of this backend. */
31112 + XenbusState backend_state;
31113 +
31114 + /*! Non-zero if the backend is being removed. */
31115 + int removing;
31116 +
31117 + /*! Non-zero if the setup_vnic has been called. */
31118 + int vnic_is_setup;
31119 +
31120 +#if NETBACK_ACCEL_STATS
31121 + struct netback_accel_stats stats;
31122 +#endif
31123 +#if defined(CONFIG_DEBUG_FS)
31124 + char *dbfs_dir_name;
31125 + struct dentry *dbfs_dir;
31126 + struct netback_accel_dbfs dbfs;
31127 +#endif
31128 +
31129 + /*! List */
31130 + struct netback_accel *next_bend;
31131 +};
31132 +
31133 +
31134 +/*
31135 + * Values for netback_accel.hw_state. States of resource allocation
31136 + * we can go through
31137 + */
31138 +/*! No hardware has yet been allocated. */
31139 +#define NETBACK_ACCEL_RES_NONE (0)
31140 +/*! Hardware has been allocated. */
31141 +#define NETBACK_ACCEL_RES_ALLOC (1)
31142 +#define NETBACK_ACCEL_RES_FILTER (2)
31143 +#define NETBACK_ACCEL_RES_HWINFO (3)
31144 +
31145 +/*! Filtering specification. This assumes that for VNIC support we
31146 + * will always want wildcard entries, so only specifies the
31147 + * destination IP/port
31148 + */
31149 +struct netback_accel_filter_spec {
31150 + /*! Internal, used to access efx_vi API */
31151 + void *filter_handle;
31152 +
31153 + /*! Destination IP in network order */
31154 + u32 destip_be;
31155 + /*! Destination port in network order */
31156 + u16 destport_be;
31157 + /*! Mac address */
31158 + u8 mac[ETH_ALEN];
31159 + /*! TCP or UDP */
31160 + u8 proto;
31161 +};
31162 +
31163 +
31164 +/**************************************************************************
31165 + * From accel.c
31166 + **************************************************************************/
31167 +
31168 +/*! \brief Start up all the acceleration plugins
31169 + *
31170 + * \return 0 on success, an errno on failure
31171 + */
31172 +extern int netback_accel_init_accel(void);
31173 +
31174 +/*! \brief Shut down all the acceleration plugins
31175 + */
31176 +extern void netback_accel_shutdown_accel(void);
31177 +
31178 +
31179 +/**************************************************************************
31180 + * From accel_fwd.c
31181 + **************************************************************************/
31182 +
31183 +/*! \brief Init the forwarding infrastructure
31184 + * \return 0 on success, or -ENOMEM if it couldn't get memory for the
31185 + * forward table
31186 + */
31187 +extern int netback_accel_init_fwd(void);
31188 +
31189 +/*! \brief Shut down the forwarding and free memory. */
31190 +extern void netback_accel_shutdown_fwd(void);
31191 +
31192 +/*! Initialise each nic port's fowarding table */
31193 +extern void *netback_accel_init_fwd_port(void);
31194 +extern void netback_accel_shutdown_fwd_port(void *fwd_priv);
31195 +
31196 +/*! \brief Add an entry to the forwarding table.
31197 + * \param mac : MAC address, used as hash key
31198 + * \param ctxt : value to associate with key (can be NULL, see
31199 + * netback_accel_fwd_set_context)
31200 + * \return 0 on success, -ENOMEM if table was full and could no grow it
31201 + */
31202 +extern int netback_accel_fwd_add(const __u8 *mac, void *context,
31203 + void *fwd_priv);
31204 +
31205 +/*! \brief Remove an entry from the forwarding table.
31206 + * \param mac : the MAC address to remove
31207 + * \return nothing: it is not an error if the mac was not in the table
31208 + */
31209 +extern void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv);
31210 +
31211 +/*! \brief Set the context pointer for an existing fwd table entry.
31212 + * \param mac : key that is already present in the table
31213 + * \param context : new value to associate with key
31214 + * \return 0 on success, -ENOENT if mac not present in table.
31215 + */
31216 +extern int netback_accel_fwd_set_context(const __u8 *mac, void *context,
31217 + void *fwd_priv);
31218 +
31219 +/**************************************************************************
31220 + * From accel_msg.c
31221 + **************************************************************************/
31222 +
31223 +
31224 +/*! \brief Send the start-of-day message that handshakes with the VNIC
31225 + * and tells it its MAC address.
31226 + *
31227 + * \param bend The back end driver data structure
31228 + * \param version The version of communication to use, e.g. NET_ACCEL_MSG_VERSION
31229 + */
31230 +extern void netback_accel_msg_tx_hello(struct netback_accel *bend,
31231 + unsigned version);
31232 +
31233 +/*! \brief Send a "there's a new local mac address" message
31234 + *
31235 + * \param bend The back end driver data structure for the vnic to send
31236 + * the message to
31237 + * \param mac Pointer to the new mac address
31238 + */
31239 +extern void netback_accel_msg_tx_new_localmac(struct netback_accel *bend,
31240 + const void *mac);
31241 +
31242 +/*! \brief Send a "a mac address that was local has gone away" message
31243 + *
31244 + * \param bend The back end driver data structure for the vnic to send
31245 + * the message to
31246 + * \param mac Pointer to the old mac address
31247 + */
31248 +extern void netback_accel_msg_tx_old_localmac(struct netback_accel *bend,
31249 + const void *mac);
31250 +
31251 +extern void netback_accel_set_interface_state(struct netback_accel *bend,
31252 + int up);
31253 +
31254 +/*! \brief Process the message queue for a bend that has just
31255 + * interrupted.
31256 + *
31257 + * Demultiplexs an interrupt from the front end driver, taking
31258 + * messages from the fifo and taking appropriate action.
31259 + *
31260 + * \param bend The back end driver data structure
31261 + */
31262 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
31263 +extern void netback_accel_msg_rx_handler(struct work_struct *arg);
31264 +#else
31265 +extern void netback_accel_msg_rx_handler(void *bend_void);
31266 +#endif
31267 +
31268 +/**************************************************************************
31269 + * From accel_xenbus.c
31270 + **************************************************************************/
31271 +/*! List of all the bends currently in existence. */
31272 +extern struct netback_accel *bend_list;
31273 +extern struct mutex bend_list_mutex;
31274 +
31275 +/*! \brief Probe a new network interface. */
31276 +extern int netback_accel_probe(struct xenbus_device *dev);
31277 +
31278 +/*! \brief Remove a network interface. */
31279 +extern int netback_accel_remove(struct xenbus_device *dev);
31280 +
31281 +/*! \brief Shutdown all accelerator backends */
31282 +extern void netback_accel_shutdown_bends(void);
31283 +
31284 +/*! \brief Initiate the xenbus state teardown handshake */
31285 +extern void netback_accel_set_closing(struct netback_accel *bend);
31286 +
31287 +/**************************************************************************
31288 + * From accel_debugfs.c
31289 + **************************************************************************/
31290 +/*! Global statistics */
31291 +struct netback_accel_global_stats {
31292 + /*! Number of TX packets seen through driverlink */
31293 + u64 dl_tx_packets;
31294 + /*! Number of TX packets seen through driverlink we didn't like */
31295 + u64 dl_tx_bad_packets;
31296 + /*! Number of RX packets seen through driverlink */
31297 + u64 dl_rx_packets;
31298 + /*! Number of mac addresses we are forwarding to */
31299 + u32 num_fwds;
31300 +};
31301 +
31302 +/*! Debug fs entries for each of the above stats */
31303 +struct netback_accel_global_dbfs {
31304 + struct dentry *dl_tx_packets;
31305 + struct dentry *dl_tx_bad_packets;
31306 + struct dentry *dl_rx_packets;
31307 + struct dentry *num_fwds;
31308 +};
31309 +
31310 +#if NETBACK_ACCEL_STATS
31311 +extern struct netback_accel_global_stats global_stats;
31312 +#endif
31313 +
31314 +/*! \brief Initialise the debugfs root and populate with global stats */
31315 +extern void netback_accel_debugfs_init(void);
31316 +
31317 +/*! \brief Remove our debugfs root directory */
31318 +extern void netback_accel_debugfs_fini(void);
31319 +
31320 +/*! \brief Add per-bend statistics to debug fs */
31321 +extern int netback_accel_debugfs_create(struct netback_accel *bend);
31322 +/*! \brief Remove per-bend statistics from debug fs */
31323 +extern int netback_accel_debugfs_remove(struct netback_accel *bend);
31324 +
31325 +#endif /* NETBACK_ACCEL_H */
31326 +
31327 +
31328 Index: head-2008-11-25/drivers/xen/sfc_netback/accel_debugfs.c
31329 ===================================================================
31330 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
31331 +++ head-2008-11-25/drivers/xen/sfc_netback/accel_debugfs.c 2008-02-26 10:54:11.000000000 +0100
31332 @@ -0,0 +1,148 @@
31333 +/****************************************************************************
31334 + * Solarflare driver for Xen network acceleration
31335 + *
31336 + * Copyright 2006-2008: Solarflare Communications Inc,
31337 + * 9501 Jeronimo Road, Suite 250,
31338 + * Irvine, CA 92618, USA
31339 + *
31340 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
31341 + *
31342 + * This program is free software; you can redistribute it and/or modify it
31343 + * under the terms of the GNU General Public License version 2 as published
31344 + * by the Free Software Foundation, incorporated herein by reference.
31345 + *
31346 + * This program is distributed in the hope that it will be useful,
31347 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
31348 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31349 + * GNU General Public License for more details.
31350 + *
31351 + * You should have received a copy of the GNU General Public License
31352 + * along with this program; if not, write to the Free Software
31353 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31354 + ****************************************************************************
31355 + */
31356 +
31357 +#include <linux/fs.h>
31358 +#include <linux/debugfs.h>
31359 +
31360 +#include "accel.h"
31361 +
31362 +#if defined(CONFIG_DEBUG_FS)
31363 +static struct dentry *sfc_debugfs_root = NULL;
31364 +#endif
31365 +
31366 +#if NETBACK_ACCEL_STATS
31367 +struct netback_accel_global_stats global_stats;
31368 +#if defined(CONFIG_DEBUG_FS)
31369 +static struct netback_accel_global_dbfs global_dbfs;
31370 +#endif
31371 +#endif
31372 +
31373 +void netback_accel_debugfs_init(void)
31374 +{
31375 +#if defined(CONFIG_DEBUG_FS)
31376 + sfc_debugfs_root = debugfs_create_dir("sfc_netback", NULL);
31377 + if (sfc_debugfs_root == NULL)
31378 + return;
31379 +
31380 + global_dbfs.num_fwds = debugfs_create_u32
31381 + ("num_fwds", S_IRUSR | S_IRGRP | S_IROTH,
31382 + sfc_debugfs_root, &global_stats.num_fwds);
31383 + global_dbfs.dl_tx_packets = debugfs_create_u64
31384 + ("dl_tx_packets", S_IRUSR | S_IRGRP | S_IROTH,
31385 + sfc_debugfs_root, &global_stats.dl_tx_packets);
31386 + global_dbfs.dl_rx_packets = debugfs_create_u64
31387 + ("dl_rx_packets", S_IRUSR | S_IRGRP | S_IROTH,
31388 + sfc_debugfs_root, &global_stats.dl_rx_packets);
31389 + global_dbfs.dl_tx_bad_packets = debugfs_create_u64
31390 + ("dl_tx_bad_packets", S_IRUSR | S_IRGRP | S_IROTH,
31391 + sfc_debugfs_root, &global_stats.dl_tx_bad_packets);
31392 +#endif
31393 +}
31394 +
31395 +
31396 +void netback_accel_debugfs_fini(void)
31397 +{
31398 +#if defined(CONFIG_DEBUG_FS)
31399 + debugfs_remove(global_dbfs.num_fwds);
31400 + debugfs_remove(global_dbfs.dl_tx_packets);
31401 + debugfs_remove(global_dbfs.dl_rx_packets);
31402 + debugfs_remove(global_dbfs.dl_tx_bad_packets);
31403 +
31404 + debugfs_remove(sfc_debugfs_root);
31405 +#endif
31406 +}
31407 +
31408 +
31409 +int netback_accel_debugfs_create(struct netback_accel *bend)
31410 +{
31411 +#if defined(CONFIG_DEBUG_FS)
31412 + /* Smallest length is 7 (vif0.0\n) */
31413 + int length = 7, temp;
31414 +
31415 + if (sfc_debugfs_root == NULL)
31416 + return -ENOENT;
31417 +
31418 + /* Work out length of string representation of far_end and vif_num */
31419 + temp = bend->far_end;
31420 + while (temp > 9) {
31421 + length++;
31422 + temp = temp / 10;
31423 + }
31424 + temp = bend->vif_num;
31425 + while (temp > 9) {
31426 + length++;
31427 + temp = temp / 10;
31428 + }
31429 +
31430 + bend->dbfs_dir_name = kmalloc(length, GFP_KERNEL);
31431 + if (bend->dbfs_dir_name == NULL)
31432 + return -ENOMEM;
31433 + sprintf(bend->dbfs_dir_name, "vif%d.%d", bend->far_end, bend->vif_num);
31434 +
31435 + bend->dbfs_dir = debugfs_create_dir(bend->dbfs_dir_name,
31436 + sfc_debugfs_root);
31437 + if (bend->dbfs_dir == NULL) {
31438 + kfree(bend->dbfs_dir_name);
31439 + return -ENOMEM;
31440 + }
31441 +
31442 +#if NETBACK_ACCEL_STATS
31443 + bend->dbfs.evq_wakeups = debugfs_create_u64
31444 + ("evq_wakeups", S_IRUSR | S_IRGRP | S_IROTH,
31445 + bend->dbfs_dir, &bend->stats.evq_wakeups);
31446 + bend->dbfs.evq_timeouts = debugfs_create_u64
31447 + ("evq_timeouts", S_IRUSR | S_IRGRP | S_IROTH,
31448 + bend->dbfs_dir, &bend->stats.evq_timeouts);
31449 + bend->dbfs.num_filters = debugfs_create_u32
31450 + ("num_filters", S_IRUSR | S_IRGRP | S_IROTH,
31451 + bend->dbfs_dir, &bend->stats.num_filters);
31452 + bend->dbfs.num_buffer_pages = debugfs_create_u32
31453 + ("num_buffer_pages", S_IRUSR | S_IRGRP | S_IROTH,
31454 + bend->dbfs_dir, &bend->stats.num_buffer_pages);
31455 +#endif
31456 +#endif
31457 + return 0;
31458 +}
31459 +
31460 +
31461 +int netback_accel_debugfs_remove(struct netback_accel *bend)
31462 +{
31463 +#if defined(CONFIG_DEBUG_FS)
31464 + if (bend->dbfs_dir != NULL) {
31465 +#if NETBACK_ACCEL_STATS
31466 + debugfs_remove(bend->dbfs.evq_wakeups);
31467 + debugfs_remove(bend->dbfs.evq_timeouts);
31468 + debugfs_remove(bend->dbfs.num_filters);
31469 + debugfs_remove(bend->dbfs.num_buffer_pages);
31470 +#endif
31471 + debugfs_remove(bend->dbfs_dir);
31472 + }
31473 +
31474 + if (bend->dbfs_dir_name)
31475 + kfree(bend->dbfs_dir_name);
31476 +#endif
31477 + return 0;
31478 +}
31479 +
31480 +
31481 Index: head-2008-11-25/drivers/xen/sfc_netback/accel_fwd.c
31482 ===================================================================
31483 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
31484 +++ head-2008-11-25/drivers/xen/sfc_netback/accel_fwd.c 2008-04-02 12:34:02.000000000 +0200
31485 @@ -0,0 +1,420 @@
31486 +/****************************************************************************
31487 + * Solarflare driver for Xen network acceleration
31488 + *
31489 + * Copyright 2006-2008: Solarflare Communications Inc,
31490 + * 9501 Jeronimo Road, Suite 250,
31491 + * Irvine, CA 92618, USA
31492 + *
31493 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
31494 + *
31495 + * This program is free software; you can redistribute it and/or modify it
31496 + * under the terms of the GNU General Public License version 2 as published
31497 + * by the Free Software Foundation, incorporated herein by reference.
31498 + *
31499 + * This program is distributed in the hope that it will be useful,
31500 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
31501 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31502 + * GNU General Public License for more details.
31503 + *
31504 + * You should have received a copy of the GNU General Public License
31505 + * along with this program; if not, write to the Free Software
31506 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31507 + ****************************************************************************
31508 + */
31509 +
31510 +#include "accel.h"
31511 +#include "accel_cuckoo_hash.h"
31512 +#include "accel_util.h"
31513 +#include "accel_solarflare.h"
31514 +
31515 +#include "driverlink_api.h"
31516 +
31517 +#include <linux/if_arp.h>
31518 +#include <linux/skbuff.h>
31519 +#include <linux/list.h>
31520 +
31521 +/* State stored in the forward table */
31522 +struct fwd_struct {
31523 + struct list_head link; /* Forms list */
31524 + void * context;
31525 + __u8 valid;
31526 + __u8 mac[ETH_ALEN];
31527 +};
31528 +
31529 +/* Max value we support */
31530 +#define NUM_FWDS_BITS 8
31531 +#define NUM_FWDS (1 << NUM_FWDS_BITS)
31532 +#define FWD_MASK (NUM_FWDS - 1)
31533 +
31534 +struct port_fwd {
31535 + /* Make a list */
31536 + struct list_head link;
31537 + /* Hash table to store the fwd_structs */
31538 + cuckoo_hash_table fwd_hash_table;
31539 + /* The array of fwd_structs */
31540 + struct fwd_struct *fwd_array;
31541 + /* Linked list of entries in use. */
31542 + struct list_head fwd_list;
31543 + /* Could do something clever with a reader/writer lock. */
31544 + spinlock_t fwd_lock;
31545 + /* Make find_free_entry() a bit faster by caching this */
31546 + int last_free_index;
31547 +};
31548 +
31549 +/*
31550 + * This is unlocked as it's only called from dl probe and remove,
31551 + * which are themselves synchronised. Could get rid of it entirely as
31552 + * it's never iterated, but useful for debug
31553 + */
31554 +static struct list_head port_fwds;
31555 +
31556 +
31557 +/* Search the fwd_array for an unused entry */
31558 +static int fwd_find_free_entry(struct port_fwd *fwd_set)
31559 +{
31560 + int index = fwd_set->last_free_index;
31561 +
31562 + do {
31563 + if (!fwd_set->fwd_array[index].valid) {
31564 + fwd_set->last_free_index = index;
31565 + return index;
31566 + }
31567 + index++;
31568 + if (index >= NUM_FWDS)
31569 + index = 0;
31570 + } while (index != fwd_set->last_free_index);
31571 +
31572 + return -ENOMEM;
31573 +}
31574 +
31575 +
31576 +/* Look up a MAC in the hash table. Caller should hold table lock. */
31577 +static inline struct fwd_struct *fwd_find_entry(const __u8 *mac,
31578 + struct port_fwd *fwd_set)
31579 +{
31580 + cuckoo_hash_value value;
31581 + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
31582 +
31583 + if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
31584 + (cuckoo_hash_key *)(&key),
31585 + &value)) {
31586 + struct fwd_struct *fwd = &fwd_set->fwd_array[value];
31587 + DPRINTK_ON(memcmp(fwd->mac, mac, ETH_ALEN) != 0);
31588 + return fwd;
31589 + }
31590 +
31591 + return NULL;
31592 +}
31593 +
31594 +
31595 +/* Initialise each nic port's fowarding table */
31596 +void *netback_accel_init_fwd_port(void)
31597 +{
31598 + struct port_fwd *fwd_set;
31599 +
31600 + fwd_set = kzalloc(sizeof(struct port_fwd), GFP_KERNEL);
31601 + if (fwd_set == NULL) {
31602 + return NULL;
31603 + }
31604 +
31605 + spin_lock_init(&fwd_set->fwd_lock);
31606 +
31607 + fwd_set->fwd_array = kzalloc(sizeof (struct fwd_struct) * NUM_FWDS,
31608 + GFP_KERNEL);
31609 + if (fwd_set->fwd_array == NULL) {
31610 + kfree(fwd_set);
31611 + return NULL;
31612 + }
31613 +
31614 + if (cuckoo_hash_init(&fwd_set->fwd_hash_table, NUM_FWDS_BITS, 8) != 0) {
31615 + kfree(fwd_set->fwd_array);
31616 + kfree(fwd_set);
31617 + return NULL;
31618 + }
31619 +
31620 + INIT_LIST_HEAD(&fwd_set->fwd_list);
31621 +
31622 + list_add(&fwd_set->link, &port_fwds);
31623 +
31624 + return fwd_set;
31625 +}
31626 +
31627 +
31628 +void netback_accel_shutdown_fwd_port(void *fwd_priv)
31629 +{
31630 + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31631 +
31632 + BUG_ON(fwd_priv == NULL);
31633 +
31634 + BUG_ON(list_empty(&port_fwds));
31635 + list_del(&fwd_set->link);
31636 +
31637 + BUG_ON(!list_empty(&fwd_set->fwd_list));
31638 +
31639 + cuckoo_hash_destroy(&fwd_set->fwd_hash_table);
31640 + kfree(fwd_set->fwd_array);
31641 + kfree(fwd_set);
31642 +}
31643 +
31644 +
31645 +int netback_accel_init_fwd()
31646 +{
31647 + INIT_LIST_HEAD(&port_fwds);
31648 + return 0;
31649 +}
31650 +
31651 +
31652 +void netback_accel_shutdown_fwd()
31653 +{
31654 + BUG_ON(!list_empty(&port_fwds));
31655 +}
31656 +
31657 +
31658 +/*
31659 + * Add an entry to the forwarding table. Returns -ENOMEM if no
31660 + * space.
31661 + */
31662 +int netback_accel_fwd_add(const __u8 *mac, void *context, void *fwd_priv)
31663 +{
31664 + struct fwd_struct *fwd;
31665 + int rc = 0, index;
31666 + unsigned long flags;
31667 + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
31668 + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31669 +
31670 + BUG_ON(fwd_priv == NULL);
31671 +
31672 + DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac));
31673 +
31674 + spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31675 +
31676 + if ((rc = fwd_find_free_entry(fwd_set)) < 0 ) {
31677 + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31678 + return rc;
31679 + }
31680 +
31681 + index = rc;
31682 +
31683 + /* Shouldn't already be in the table */
31684 + if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
31685 + (cuckoo_hash_key *)(&key), &rc) != 0) {
31686 + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31687 + EPRINTK("MAC address " MAC_FMT " already accelerated.\n",
31688 + MAC_ARG(mac));
31689 + return -EEXIST;
31690 + }
31691 +
31692 + if ((rc = cuckoo_hash_add(&fwd_set->fwd_hash_table,
31693 + (cuckoo_hash_key *)(&key), index, 1)) == 0) {
31694 + fwd = &fwd_set->fwd_array[index];
31695 + fwd->valid = 1;
31696 + fwd->context = context;
31697 + memcpy(fwd->mac, mac, ETH_ALEN);
31698 + list_add(&fwd->link, &fwd_set->fwd_list);
31699 + NETBACK_ACCEL_STATS_OP(global_stats.num_fwds++);
31700 + }
31701 +
31702 + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31703 +
31704 + /*
31705 + * No need to tell frontend that this mac address is local -
31706 + * it should auto-discover through packets on fastpath what is
31707 + * local and what is not, and just being on same server
31708 + * doesn't make it local (it could be on a different
31709 + * bridge)
31710 + */
31711 +
31712 + return rc;
31713 +}
31714 +
31715 +
31716 +/* remove an entry from the forwarding tables. */
31717 +void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv)
31718 +{
31719 + struct fwd_struct *fwd;
31720 + unsigned long flags;
31721 + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
31722 + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31723 +
31724 + DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac));
31725 +
31726 + BUG_ON(fwd_priv == NULL);
31727 +
31728 + spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31729 +
31730 + fwd = fwd_find_entry(mac, fwd_set);
31731 + if (fwd != NULL) {
31732 + BUG_ON(list_empty(&fwd_set->fwd_list));
31733 + list_del(&fwd->link);
31734 +
31735 + fwd->valid = 0;
31736 + cuckoo_hash_remove(&fwd_set->fwd_hash_table,
31737 + (cuckoo_hash_key *)(&key));
31738 + NETBACK_ACCEL_STATS_OP(global_stats.num_fwds--);
31739 + }
31740 + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31741 +
31742 + /*
31743 + * No need to tell frontend that this is no longer present -
31744 + * the frontend is currently only interested in remote
31745 + * addresses and it works these out (mostly) by itself
31746 + */
31747 +}
31748 +
31749 +
31750 +/* Set the context pointer for a hash table entry. */
31751 +int netback_accel_fwd_set_context(const __u8 *mac, void *context,
31752 + void *fwd_priv)
31753 +{
31754 + struct fwd_struct *fwd;
31755 + unsigned long flags;
31756 + int rc = -ENOENT;
31757 + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31758 +
31759 + BUG_ON(fwd_priv == NULL);
31760 +
31761 + spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31762 + fwd = fwd_find_entry(mac, fwd_set);
31763 + if (fwd != NULL) {
31764 + fwd->context = context;
31765 + rc = 0;
31766 + }
31767 + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31768 + return rc;
31769 +}
31770 +
31771 +
31772 +/**************************************************************************
31773 + * Process a received packet
31774 + **************************************************************************/
31775 +
31776 +/*
31777 + * Returns whether or not we have a match in our forward table for the
31778 + * this skb. Must be called with appropriate fwd_lock already held
31779 + */
31780 +static struct netback_accel *for_a_vnic(struct netback_pkt_buf *skb,
31781 + struct port_fwd *fwd_set)
31782 +{
31783 + struct fwd_struct *fwd;
31784 + struct netback_accel *retval = NULL;
31785 +
31786 + fwd = fwd_find_entry(skb->mac.raw, fwd_set);
31787 + if (fwd != NULL)
31788 + retval = fwd->context;
31789 + return retval;
31790 +}
31791 +
31792 +
31793 +static inline int packet_is_arp_reply(struct sk_buff *skb)
31794 +{
31795 + return skb->protocol == ntohs(ETH_P_ARP)
31796 + && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY);
31797 +}
31798 +
31799 +
31800 +static inline void hdr_to_filt(struct ethhdr *ethhdr, struct iphdr *ip,
31801 + struct netback_accel_filter_spec *spec)
31802 +{
31803 + spec->proto = ip->protocol;
31804 + spec->destip_be = ip->daddr;
31805 + memcpy(spec->mac, ethhdr->h_source, ETH_ALEN);
31806 +
31807 + if (ip->protocol == IPPROTO_TCP) {
31808 + struct tcphdr *tcp = (struct tcphdr *)((char *)ip + 4 * ip->ihl);
31809 + spec->destport_be = tcp->dest;
31810 + } else {
31811 + struct udphdr *udp = (struct udphdr *)((char *)ip + 4 * ip->ihl);
31812 + EPRINTK_ON(ip->protocol != IPPROTO_UDP);
31813 + spec->destport_be = udp->dest;
31814 + }
31815 +}
31816 +
31817 +
31818 +static inline int netback_accel_can_filter(struct netback_pkt_buf *skb)
31819 +{
31820 + return (skb->protocol == htons(ETH_P_IP) &&
31821 + ((skb->nh.iph->protocol == IPPROTO_TCP) ||
31822 + (skb->nh.iph->protocol == IPPROTO_UDP)));
31823 +}
31824 +
31825 +
31826 +static inline void netback_accel_filter_packet(struct netback_accel *bend,
31827 + struct netback_pkt_buf *skb)
31828 +{
31829 + struct netback_accel_filter_spec fs;
31830 + struct ethhdr *eh = (struct ethhdr *)(skb->mac.raw);
31831 +
31832 + hdr_to_filt(eh, skb->nh.iph, &fs);
31833 +
31834 + netback_accel_filter_check_add(bend, &fs);
31835 +}
31836 +
31837 +
31838 +/*
31839 + * Receive a packet and do something appropriate with it. Return true
31840 + * to take exclusive ownership of the packet. This is verging on
31841 + * solarflare specific
31842 + */
31843 +void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv)
31844 +{
31845 + struct netback_accel *bend;
31846 + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31847 + unsigned long flags;
31848 +
31849 + BUG_ON(fwd_priv == NULL);
31850 +
31851 + /* Checking for bcast is cheaper so do that first */
31852 + if (is_broadcast_ether_addr(skb->mac.raw)) {
31853 + /* pass through the slow path by not claiming ownership */
31854 + return;
31855 + } else if (is_multicast_ether_addr(skb->mac.raw)) {
31856 + /* pass through the slow path by not claiming ownership */
31857 + return;
31858 + } else {
31859 + /* It is unicast */
31860 + spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31861 + /* We insert filter to pass it off to a VNIC */
31862 + if ((bend = for_a_vnic(skb, fwd_set)) != NULL)
31863 + if (netback_accel_can_filter(skb))
31864 + netback_accel_filter_packet(bend, skb);
31865 + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31866 + }
31867 + return;
31868 +}
31869 +
31870 +
31871 +void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv)
31872 +{
31873 + __u8 *mac;
31874 + unsigned long flags;
31875 + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
31876 + struct fwd_struct *fwd;
31877 +
31878 + BUG_ON(fwd_priv == NULL);
31879 +
31880 + if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) {
31881 + /*
31882 + * update our fast path forwarding to reflect this
31883 + * gratuitous ARP
31884 + */
31885 + mac = skb->mac.raw+ETH_ALEN;
31886 +
31887 + DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n",
31888 + __FUNCTION__, MAC_ARG(mac));
31889 +
31890 + spin_lock_irqsave(&fwd_set->fwd_lock, flags);
31891 + /*
31892 + * Might not be local, but let's tell them all it is,
31893 + * and they can restore the fastpath if they continue
31894 + * to get packets that way
31895 + */
31896 + list_for_each_entry(fwd, &fwd_set->fwd_list, link) {
31897 + struct netback_accel *bend = fwd->context;
31898 + if (bend != NULL)
31899 + netback_accel_msg_tx_new_localmac(bend, mac);
31900 + }
31901 +
31902 + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
31903 + }
31904 + return;
31905 +}
31906 Index: head-2008-11-25/drivers/xen/sfc_netback/accel_msg.c
31907 ===================================================================
31908 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
31909 +++ head-2008-11-25/drivers/xen/sfc_netback/accel_msg.c 2008-02-20 09:32:49.000000000 +0100
31910 @@ -0,0 +1,392 @@
31911 +/****************************************************************************
31912 + * Solarflare driver for Xen network acceleration
31913 + *
31914 + * Copyright 2006-2008: Solarflare Communications Inc,
31915 + * 9501 Jeronimo Road, Suite 250,
31916 + * Irvine, CA 92618, USA
31917 + *
31918 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
31919 + *
31920 + * This program is free software; you can redistribute it and/or modify it
31921 + * under the terms of the GNU General Public License version 2 as published
31922 + * by the Free Software Foundation, incorporated herein by reference.
31923 + *
31924 + * This program is distributed in the hope that it will be useful,
31925 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
31926 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31927 + * GNU General Public License for more details.
31928 + *
31929 + * You should have received a copy of the GNU General Public License
31930 + * along with this program; if not, write to the Free Software
31931 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31932 + ****************************************************************************
31933 + */
31934 +
31935 +#include <xen/evtchn.h>
31936 +
31937 +#include "accel.h"
31938 +#include "accel_msg_iface.h"
31939 +#include "accel_util.h"
31940 +#include "accel_solarflare.h"
31941 +
31942 +/* Send a HELLO to front end to start things off */
31943 +void netback_accel_msg_tx_hello(struct netback_accel *bend, unsigned version)
31944 +{
31945 + unsigned long lock_state;
31946 + struct net_accel_msg *msg =
31947 + net_accel_msg_start_send(bend->shared_page,
31948 + &bend->to_domU, &lock_state);
31949 + /* The queue _cannot_ be full, we're the first users. */
31950 + EPRINTK_ON(msg == NULL);
31951 +
31952 + if (msg != NULL) {
31953 + net_accel_msg_init(msg, NET_ACCEL_MSG_HELLO);
31954 + msg->u.hello.version = version;
31955 + msg->u.hello.max_pages = bend->quotas.max_buf_pages;
31956 + VPRINTK("Sending hello to channel %d\n", bend->msg_channel);
31957 + net_accel_msg_complete_send_notify(bend->shared_page,
31958 + &bend->to_domU,
31959 + &lock_state,
31960 + bend->msg_channel_irq);
31961 + }
31962 +}
31963 +
31964 +/* Send a local mac message to vnic */
31965 +static void netback_accel_msg_tx_localmac(struct netback_accel *bend,
31966 + int type, const void *mac)
31967 +{
31968 + unsigned long lock_state;
31969 + struct net_accel_msg *msg;
31970 +
31971 + BUG_ON(bend == NULL || mac == NULL);
31972 +
31973 + VPRINTK("Sending local mac message: " MAC_FMT "\n",
31974 + MAC_ARG((const char *)mac));
31975 +
31976 + msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU,
31977 + &lock_state);
31978 +
31979 + if (msg != NULL) {
31980 + net_accel_msg_init(msg, NET_ACCEL_MSG_LOCALMAC);
31981 + msg->u.localmac.flags = type;
31982 + memcpy(msg->u.localmac.mac, mac, ETH_ALEN);
31983 + net_accel_msg_complete_send_notify(bend->shared_page,
31984 + &bend->to_domU,
31985 + &lock_state,
31986 + bend->msg_channel_irq);
31987 + } else {
31988 + /*
31989 + * TODO if this happens we may leave a domU
31990 + * fastpathing packets when they should be delivered
31991 + * locally. Solution is get domU to timeout entries
31992 + * in its fastpath lookup table when it receives no RX
31993 + * traffic
31994 + */
31995 + EPRINTK("%s: saw full queue, may need ARP timer to recover\n",
31996 + __FUNCTION__);
31997 + }
31998 +}
31999 +
32000 +/* Send an add local mac message to vnic */
32001 +void netback_accel_msg_tx_new_localmac(struct netback_accel *bend,
32002 + const void *mac)
32003 +{
32004 + netback_accel_msg_tx_localmac(bend, NET_ACCEL_MSG_ADD, mac);
32005 +}
32006 +
32007 +
32008 +static int netback_accel_msg_rx_buffer_map(struct netback_accel *bend,
32009 + struct net_accel_msg *msg)
32010 +{
32011 + int log2_pages, rc;
32012 +
32013 + /* Can only allocate in power of two */
32014 + log2_pages = log2_ge(msg->u.mapbufs.pages, 0);
32015 + if (msg->u.mapbufs.pages != pow2(log2_pages)) {
32016 + EPRINTK("%s: Can only alloc bufs in power of 2 sizes (%d)\n",
32017 + __FUNCTION__, msg->u.mapbufs.pages);
32018 + rc = -EINVAL;
32019 + goto err_out;
32020 + }
32021 +
32022 + /*
32023 + * Sanity. Assumes NET_ACCEL_MSG_MAX_PAGE_REQ is same for
32024 + * both directions/domains
32025 + */
32026 + if (msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ) {
32027 + EPRINTK("%s: too many pages in a single message: %d %d\n",
32028 + __FUNCTION__, msg->u.mapbufs.pages,
32029 + NET_ACCEL_MSG_MAX_PAGE_REQ);
32030 + rc = -EINVAL;
32031 + goto err_out;
32032 + }
32033 +
32034 + if ((rc = netback_accel_add_buffers(bend, msg->u.mapbufs.pages,
32035 + log2_pages, msg->u.mapbufs.grants,
32036 + &msg->u.mapbufs.buf)) < 0) {
32037 + goto err_out;
32038 + }
32039 +
32040 + msg->id |= NET_ACCEL_MSG_REPLY;
32041 +
32042 + return 0;
32043 +
32044 + err_out:
32045 + EPRINTK("%s: err_out\n", __FUNCTION__);
32046 + msg->id |= NET_ACCEL_MSG_ERROR | NET_ACCEL_MSG_REPLY;
32047 + return rc;
32048 +}
32049 +
32050 +
32051 +/* Hint from frontend that one of our filters is out of date */
32052 +static int netback_accel_process_fastpath(struct netback_accel *bend,
32053 + struct net_accel_msg *msg)
32054 +{
32055 + struct netback_accel_filter_spec spec;
32056 +
32057 + if (msg->u.fastpath.flags & NET_ACCEL_MSG_REMOVE) {
32058 + /*
32059 + * Would be nice to BUG() this but would leave us
32060 + * vulnerable to naughty frontend
32061 + */
32062 + EPRINTK_ON(msg->u.fastpath.flags & NET_ACCEL_MSG_ADD);
32063 +
32064 + memcpy(spec.mac, msg->u.fastpath.mac, ETH_ALEN);
32065 + spec.destport_be = msg->u.fastpath.port;
32066 + spec.destip_be = msg->u.fastpath.ip;
32067 + spec.proto = msg->u.fastpath.proto;
32068 +
32069 + netback_accel_filter_remove_spec(bend, &spec);
32070 + }
32071 +
32072 + return 0;
32073 +}
32074 +
32075 +
32076 +/* Flow control for message queues */
32077 +inline void set_queue_not_full(struct netback_accel *bend)
32078 +{
32079 + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
32080 + (unsigned long *)&bend->shared_page->aflags))
32081 + notify_remote_via_irq(bend->msg_channel_irq);
32082 + else
32083 + VPRINTK("queue not full bit already set, not signalling\n");
32084 +}
32085 +
32086 +
32087 +/* Flow control for message queues */
32088 +inline void set_queue_full(struct netback_accel *bend)
32089 +{
32090 + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
32091 + (unsigned long *)&bend->shared_page->aflags))
32092 + notify_remote_via_irq(bend->msg_channel_irq);
32093 + else
32094 + VPRINTK("queue full bit already set, not signalling\n");
32095 +}
32096 +
32097 +
32098 +void netback_accel_set_interface_state(struct netback_accel *bend, int up)
32099 +{
32100 + bend->shared_page->net_dev_up = up;
32101 + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
32102 + (unsigned long *)&bend->shared_page->aflags))
32103 + notify_remote_via_irq(bend->msg_channel_irq);
32104 + else
32105 + VPRINTK("interface up/down bit already set, not signalling\n");
32106 +}
32107 +
32108 +
32109 +static int check_rx_hello_version(unsigned version)
32110 +{
32111 + /* Should only happen if there's been a version mismatch */
32112 + BUG_ON(version == NET_ACCEL_MSG_VERSION);
32113 +
32114 + if (version > NET_ACCEL_MSG_VERSION) {
32115 + /* Newer protocol, we must refuse */
32116 + return -EPROTO;
32117 + }
32118 +
32119 + if (version < NET_ACCEL_MSG_VERSION) {
32120 + /*
32121 + * We are newer, so have discretion to accept if we
32122 + * wish. For now however, just reject
32123 + */
32124 + return -EPROTO;
32125 + }
32126 +
32127 + return -EINVAL;
32128 +}
32129 +
32130 +
32131 +static int process_rx_msg(struct netback_accel *bend,
32132 + struct net_accel_msg *msg)
32133 +{
32134 + int err = 0;
32135 +
32136 + switch (msg->id) {
32137 + case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO:
32138 + /* Reply to a HELLO; mark ourselves as connected */
32139 + DPRINTK("got Hello reply, version %.8x\n",
32140 + msg->u.hello.version);
32141 +
32142 + /*
32143 + * Check that we've not successfully done this
32144 + * already. NB no check at the moment that this reply
32145 + * comes after we've actually sent a HELLO as that's
32146 + * not possible with the current code structure
32147 + */
32148 + if (bend->hw_state != NETBACK_ACCEL_RES_NONE)
32149 + return -EPROTO;
32150 +
32151 + /* Store max_pages for accel_setup */
32152 + if (msg->u.hello.max_pages > bend->quotas.max_buf_pages) {
32153 + EPRINTK("More pages than quota allows (%d > %d)\n",
32154 + msg->u.hello.max_pages,
32155 + bend->quotas.max_buf_pages);
32156 + /* Force it down to the quota */
32157 + msg->u.hello.max_pages = bend->quotas.max_buf_pages;
32158 + }
32159 + bend->max_pages = msg->u.hello.max_pages;
32160 +
32161 + /* Set up the hardware visible to the other end */
32162 + err = bend->accel_setup(bend);
32163 + if (err) {
32164 + /* This is fatal */
32165 + DPRINTK("Hello gave accel_setup error %d\n", err);
32166 + netback_accel_set_closing(bend);
32167 + } else {
32168 + /*
32169 + * Now add the context so that packet
32170 + * forwarding will commence
32171 + */
32172 + netback_accel_fwd_set_context(bend->mac, bend,
32173 + bend->fwd_priv);
32174 + }
32175 + break;
32176 + case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_ERROR:
32177 + EPRINTK("got Hello error, versions us:%.8x them:%.8x\n",
32178 + NET_ACCEL_MSG_VERSION, msg->u.hello.version);
32179 +
32180 + if (bend->hw_state != NETBACK_ACCEL_RES_NONE)
32181 + return -EPROTO;
32182 +
32183 + if (msg->u.hello.version != NET_ACCEL_MSG_VERSION) {
32184 + /* Error is due to version mismatch */
32185 + err = check_rx_hello_version(msg->u.hello.version);
32186 + if (err == 0) {
32187 + /*
32188 + * It's OK to be compatible, send
32189 + * another hello with compatible version
32190 + */
32191 + netback_accel_msg_tx_hello
32192 + (bend, msg->u.hello.version);
32193 + } else {
32194 + /*
32195 + * Tell frontend that we're not going to
32196 + * send another HELLO by going to Closing.
32197 + */
32198 + netback_accel_set_closing(bend);
32199 + }
32200 + }
32201 + break;
32202 + case NET_ACCEL_MSG_MAPBUF:
32203 + VPRINTK("Got mapped buffers request %d\n",
32204 + msg->u.mapbufs.reqid);
32205 +
32206 + if (bend->hw_state == NETBACK_ACCEL_RES_NONE)
32207 + return -EPROTO;
32208 +
32209 + /*
32210 + * Frontend wants a buffer table entry for the
32211 + * supplied pages
32212 + */
32213 + err = netback_accel_msg_rx_buffer_map(bend, msg);
32214 + if (net_accel_msg_reply_notify(bend->shared_page,
32215 + bend->msg_channel_irq,
32216 + &bend->to_domU, msg)) {
32217 + /*
32218 + * This is fatal as we can't tell the frontend
32219 + * about the problem through the message
32220 + * queue, and so would otherwise stalemate
32221 + */
32222 + netback_accel_set_closing(bend);
32223 + }
32224 + break;
32225 + case NET_ACCEL_MSG_FASTPATH:
32226 + DPRINTK("Got fastpath request\n");
32227 +
32228 + if (bend->hw_state == NETBACK_ACCEL_RES_NONE)
32229 + return -EPROTO;
32230 +
32231 + err = netback_accel_process_fastpath(bend, msg);
32232 + break;
32233 + default:
32234 + EPRINTK("Huh? Message code is %x\n", msg->id);
32235 + err = -EPROTO;
32236 + break;
32237 + }
32238 + return err;
32239 +}
32240 +
32241 +
32242 +/* Demultiplex an IRQ from the frontend driver. */
32243 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
32244 +void netback_accel_msg_rx_handler(struct work_struct *arg)
32245 +#else
32246 +void netback_accel_msg_rx_handler(void *bend_void)
32247 +#endif
32248 +{
32249 + struct net_accel_msg msg;
32250 + int err, queue_was_full = 0;
32251 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
32252 + struct netback_accel *bend =
32253 + container_of(arg, struct netback_accel, handle_msg);
32254 +#else
32255 + struct netback_accel *bend = (struct netback_accel *)bend_void;
32256 +#endif
32257 +
32258 + mutex_lock(&bend->bend_mutex);
32259 +
32260 + /*
32261 + * This happens when the shared pages have been unmapped, but
32262 + * the workqueue not flushed yet
32263 + */
32264 + if (bend->shared_page == NULL)
32265 + goto done;
32266 +
32267 + if ((bend->shared_page->aflags &
32268 + NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK) != 0) {
32269 + if (bend->shared_page->aflags &
32270 + NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL) {
32271 + /* We've been told there may now be space. */
32272 + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
32273 + (unsigned long *)&bend->shared_page->aflags);
32274 + }
32275 +
32276 + if (bend->shared_page->aflags &
32277 + NET_ACCEL_MSG_AFLAGS_QUEUEUFULL) {
32278 + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
32279 + (unsigned long *)&bend->shared_page->aflags);
32280 + queue_was_full = 1;
32281 + }
32282 + }
32283 +
32284 + while ((err = net_accel_msg_recv(bend->shared_page, &bend->from_domU,
32285 + &msg)) == 0) {
32286 + err = process_rx_msg(bend, &msg);
32287 +
32288 + if (err != 0) {
32289 + EPRINTK("%s: Error %d\n", __FUNCTION__, err);
32290 + goto err;
32291 + }
32292 + }
32293 +
32294 + err:
32295 + /* There will be space now if we can make any. */
32296 + if (queue_was_full)
32297 + set_queue_not_full(bend);
32298 + done:
32299 + mutex_unlock(&bend->bend_mutex);
32300 +
32301 + return;
32302 +}
32303 Index: head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.c
32304 ===================================================================
32305 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
32306 +++ head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.c 2008-02-20 09:32:49.000000000 +0100
32307 @@ -0,0 +1,1253 @@
32308 +/****************************************************************************
32309 + * Solarflare driver for Xen network acceleration
32310 + *
32311 + * Copyright 2006-2008: Solarflare Communications Inc,
32312 + * 9501 Jeronimo Road, Suite 250,
32313 + * Irvine, CA 92618, USA
32314 + *
32315 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
32316 + *
32317 + * This program is free software; you can redistribute it and/or modify it
32318 + * under the terms of the GNU General Public License version 2 as published
32319 + * by the Free Software Foundation, incorporated herein by reference.
32320 + *
32321 + * This program is distributed in the hope that it will be useful,
32322 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
32323 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32324 + * GNU General Public License for more details.
32325 + *
32326 + * You should have received a copy of the GNU General Public License
32327 + * along with this program; if not, write to the Free Software
32328 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32329 + ****************************************************************************
32330 + */
32331 +
32332 +#include "common.h"
32333 +
32334 +#include "accel.h"
32335 +#include "accel_solarflare.h"
32336 +#include "accel_msg_iface.h"
32337 +#include "accel_util.h"
32338 +
32339 +#include "accel_cuckoo_hash.h"
32340 +
32341 +#include "ci/driver/resource/efx_vi.h"
32342 +
32343 +#include "ci/efrm/nic_table.h"
32344 +#include "ci/efhw/public.h"
32345 +
32346 +#include <xen/evtchn.h>
32347 +#include <xen/driver_util.h>
32348 +#include <linux/list.h>
32349 +#include <linux/mutex.h>
32350 +
32351 +#include "driverlink_api.h"
32352 +
32353 +#define SF_XEN_RX_USR_BUF_SIZE 2048
32354 +
32355 +struct falcon_bend_accel_priv {
32356 + struct efx_vi_state *efx_vih;
32357 +
32358 + /*! Array of pointers to dma_map state, used so VNIC can
32359 + * request their removal in a single message
32360 + */
32361 + struct efx_vi_dma_map_state **dma_maps;
32362 + /*! Index into dma_maps */
32363 + int dma_maps_index;
32364 +
32365 + /*! Serialises access to filters */
32366 + spinlock_t filter_lock;
32367 + /*! Bitmap of which filters are free */
32368 + unsigned long free_filters;
32369 + /*! Used for index normalisation */
32370 + u32 filter_idx_mask;
32371 + struct netback_accel_filter_spec *fspecs;
32372 + cuckoo_hash_table filter_hash_table;
32373 +
32374 + u32 txdmaq_gnt;
32375 + u32 rxdmaq_gnt;
32376 + u32 doorbell_gnt;
32377 + u32 evq_rptr_gnt;
32378 + u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES];
32379 + u32 evq_npages;
32380 +};
32381 +
32382 +/* Forward declaration */
32383 +static int netback_accel_filter_init(struct netback_accel *);
32384 +static void netback_accel_filter_shutdown(struct netback_accel *);
32385 +
32386 +/**************************************************************************
32387 + *
32388 + * Driverlink stuff
32389 + *
32390 + **************************************************************************/
32391 +
32392 +struct driverlink_port {
32393 + struct list_head link;
32394 + enum net_accel_hw_type type;
32395 + struct net_device *net_dev;
32396 + struct efx_dl_device *efx_dl_dev;
32397 + int nic_index;
32398 + void *fwd_priv;
32399 +};
32400 +
32401 +static struct list_head dl_ports;
32402 +
32403 +/* This mutex protects global state, such as the dl_ports list */
32404 +DEFINE_MUTEX(accel_mutex);
32405 +
32406 +static int init_done = 0;
32407 +
32408 +/* The DL callbacks */
32409 +
32410 +
32411 +#if defined(EFX_USE_FASTCALL)
32412 +static enum efx_veto fastcall
32413 +#else
32414 +static enum efx_veto
32415 +#endif
32416 +bend_dl_tx_packet(struct efx_dl_device *efx_dl_dev,
32417 + struct sk_buff *skb)
32418 +{
32419 + struct driverlink_port *port = efx_dl_dev->priv;
32420 +
32421 + BUG_ON(port == NULL);
32422 +
32423 + NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++);
32424 + if (skb->mac.raw != NULL)
32425 + netback_accel_tx_packet(skb, port->fwd_priv);
32426 + else {
32427 + DPRINTK("Ignoring packet with missing mac address\n");
32428 + NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_bad_packets++);
32429 + }
32430 + return EFX_ALLOW_PACKET;
32431 +}
32432 +
32433 +/* EFX_USE_FASTCALL */
32434 +#if defined(EFX_USE_FASTCALL)
32435 +static enum efx_veto fastcall
32436 +#else
32437 +static enum efx_veto
32438 +#endif
32439 +bend_dl_rx_packet(struct efx_dl_device *efx_dl_dev,
32440 + const char *pkt_buf, int pkt_len)
32441 +{
32442 + struct driverlink_port *port = efx_dl_dev->priv;
32443 + struct netback_pkt_buf pkt;
32444 + struct ethhdr *eh;
32445 +
32446 + BUG_ON(port == NULL);
32447 +
32448 + pkt.mac.raw = (char *)pkt_buf;
32449 + pkt.nh.raw = (char *)pkt_buf + ETH_HLEN;
32450 + eh = (struct ethhdr *)pkt_buf;
32451 + pkt.protocol = eh->h_proto;
32452 +
32453 + NETBACK_ACCEL_STATS_OP(global_stats.dl_rx_packets++);
32454 + netback_accel_rx_packet(&pkt, port->fwd_priv);
32455 + return EFX_ALLOW_PACKET;
32456 +}
32457 +
32458 +
32459 +/* Callbacks we'd like to get from the netdriver through driverlink */
32460 +struct efx_dl_callbacks bend_dl_callbacks =
32461 + {
32462 + .tx_packet = bend_dl_tx_packet,
32463 + .rx_packet = bend_dl_rx_packet,
32464 + };
32465 +
32466 +
32467 +static struct netback_accel_hooks accel_hooks = {
32468 + THIS_MODULE,
32469 + &netback_accel_probe,
32470 + &netback_accel_remove
32471 +};
32472 +
32473 +
32474 +/*
32475 + * Handy helper which given an efx_dl_device works out which
32476 + * efab_nic_t index into efrm_nic_table.nics[] it corresponds to
32477 + */
32478 +static int efx_device_to_efab_nic_index(struct efx_dl_device *efx_dl_dev)
32479 +{
32480 + int i;
32481 +
32482 + for (i = 0; i < EFHW_MAX_NR_DEVS; i++) {
32483 + struct efhw_nic *nic = efrm_nic_table.nic[i];
32484 +
32485 + /*
32486 + * It's possible for the nic structure to have not
32487 + * been initialised if the resource driver failed its
32488 + * driverlink probe
32489 + */
32490 + if (nic == NULL || nic->net_driver_dev == NULL)
32491 + continue;
32492 +
32493 + /* Work out if these are talking about the same NIC */
32494 + if (nic->net_driver_dev->pci_dev == efx_dl_dev->pci_dev)
32495 + return i;
32496 + }
32497 +
32498 + return -1;
32499 +}
32500 +
32501 +
32502 +/* Driver link probe - register our callbacks */
32503 +static int bend_dl_probe(struct efx_dl_device *efx_dl_dev,
32504 + const struct net_device *net_dev,
32505 + const struct efx_dl_device_info *dev_info,
32506 + const char* silicon_rev)
32507 +{
32508 + int rc;
32509 + enum net_accel_hw_type type;
32510 + struct driverlink_port *port;
32511 +
32512 + DPRINTK("%s: %s\n", __FUNCTION__, silicon_rev);
32513 +
32514 + if (strcmp(silicon_rev, "falcon/a1") == 0)
32515 + type = NET_ACCEL_MSG_HWTYPE_FALCON_A;
32516 + else if (strcmp(silicon_rev, "falcon/b0") == 0)
32517 + type = NET_ACCEL_MSG_HWTYPE_FALCON_B;
32518 + else {
32519 + EPRINTK("%s: unsupported silicon %s\n", __FUNCTION__,
32520 + silicon_rev);
32521 + rc = -EINVAL;
32522 + goto fail1;
32523 + }
32524 +
32525 + port = kmalloc(sizeof(struct driverlink_port), GFP_KERNEL);
32526 + if (port == NULL) {
32527 + EPRINTK("%s: no memory for dl probe\n", __FUNCTION__);
32528 + rc = -ENOMEM;
32529 + goto fail1;
32530 + }
32531 +
32532 + port->efx_dl_dev = efx_dl_dev;
32533 + efx_dl_dev->priv = port;
32534 +
32535 + port->nic_index = efx_device_to_efab_nic_index(efx_dl_dev);
32536 + if (port->nic_index < 0) {
32537 + /*
32538 + * This can happen in theory if the resource driver
32539 + * failed to initialise properly
32540 + */
32541 + EPRINTK("%s: nic structure not found\n", __FUNCTION__);
32542 + rc = -EINVAL;
32543 + goto fail2;
32544 + }
32545 +
32546 + port->fwd_priv = netback_accel_init_fwd_port();
32547 + if (port->fwd_priv == NULL) {
32548 + EPRINTK("%s: failed to set up forwarding for port\n",
32549 + __FUNCTION__);
32550 + rc = -ENOMEM;
32551 + goto fail2;
32552 + }
32553 +
32554 + rc = efx_dl_register_callbacks(efx_dl_dev, &bend_dl_callbacks);
32555 + if (rc != 0) {
32556 + EPRINTK("%s: register_callbacks failed\n", __FUNCTION__);
32557 + goto fail3;
32558 + }
32559 +
32560 + port->type = type;
32561 + port->net_dev = (struct net_device *)net_dev;
32562 +
32563 + mutex_lock(&accel_mutex);
32564 + list_add(&port->link, &dl_ports);
32565 + mutex_unlock(&accel_mutex);
32566 +
32567 + rc = netback_connect_accelerator(NETBACK_ACCEL_VERSION, 0,
32568 + port->net_dev->name, &accel_hooks);
32569 +
32570 + if (rc < 0) {
32571 + EPRINTK("Xen netback accelerator version mismatch\n");
32572 + goto fail4;
32573 + } else if (rc > 0) {
32574 + /*
32575 + * In future may want to add backwards compatibility
32576 + * and accept certain subsets of previous versions
32577 + */
32578 + EPRINTK("Xen netback accelerator version mismatch\n");
32579 + goto fail4;
32580 + }
32581 +
32582 + return 0;
32583 +
32584 + fail4:
32585 + mutex_lock(&accel_mutex);
32586 + list_del(&port->link);
32587 + mutex_unlock(&accel_mutex);
32588 +
32589 + efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks);
32590 + fail3:
32591 + netback_accel_shutdown_fwd_port(port->fwd_priv);
32592 + fail2:
32593 + efx_dl_dev->priv = NULL;
32594 + kfree(port);
32595 + fail1:
32596 + return rc;
32597 +}
32598 +
32599 +
32600 +static void bend_dl_remove(struct efx_dl_device *efx_dl_dev)
32601 +{
32602 + struct driverlink_port *port;
32603 +
32604 + DPRINTK("Unregistering driverlink callbacks.\n");
32605 +
32606 + mutex_lock(&accel_mutex);
32607 +
32608 + port = (struct driverlink_port *)efx_dl_dev->priv;
32609 +
32610 + BUG_ON(list_empty(&dl_ports));
32611 + BUG_ON(port == NULL);
32612 + BUG_ON(port->efx_dl_dev != efx_dl_dev);
32613 +
32614 + netback_disconnect_accelerator(0, port->net_dev->name);
32615 +
32616 + list_del(&port->link);
32617 +
32618 + mutex_unlock(&accel_mutex);
32619 +
32620 + efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks);
32621 + netback_accel_shutdown_fwd_port(port->fwd_priv);
32622 +
32623 + efx_dl_dev->priv = NULL;
32624 + kfree(port);
32625 +
32626 + return;
32627 +}
32628 +
32629 +
32630 +static struct efx_dl_driver bend_dl_driver =
32631 + {
32632 + .name = "SFC Xen backend",
32633 + .probe = bend_dl_probe,
32634 + .remove = bend_dl_remove,
32635 + };
32636 +
32637 +
32638 +int netback_accel_sf_init(void)
32639 +{
32640 + int rc, nic_i;
32641 + struct efhw_nic *nic;
32642 +
32643 + INIT_LIST_HEAD(&dl_ports);
32644 +
32645 + rc = efx_dl_register_driver(&bend_dl_driver);
32646 + /* If we couldn't find the NET driver, give up */
32647 + if (rc == -ENOENT)
32648 + return rc;
32649 +
32650 + if (rc == 0) {
32651 + EFRM_FOR_EACH_NIC(nic_i, nic)
32652 + falcon_nic_set_rx_usr_buf_size(nic,
32653 + SF_XEN_RX_USR_BUF_SIZE);
32654 + }
32655 +
32656 + init_done = (rc == 0);
32657 + return rc;
32658 +}
32659 +
32660 +
32661 +void netback_accel_sf_shutdown(void)
32662 +{
32663 + if (!init_done)
32664 + return;
32665 + DPRINTK("Unregistering driverlink driver\n");
32666 +
32667 + /*
32668 + * This will trigger removal callbacks for all the devices, which
32669 + * will unregister their callbacks, disconnect from netfront, etc.
32670 + */
32671 + efx_dl_unregister_driver(&bend_dl_driver);
32672 +}
32673 +
32674 +
32675 +int netback_accel_sf_hwtype(struct netback_accel *bend)
32676 +{
32677 + struct driverlink_port *port;
32678 +
32679 + mutex_lock(&accel_mutex);
32680 +
32681 + list_for_each_entry(port, &dl_ports, link) {
32682 + if (strcmp(bend->nicname, port->net_dev->name) == 0) {
32683 + bend->hw_type = port->type;
32684 + bend->accel_setup = netback_accel_setup_vnic_hw;
32685 + bend->accel_shutdown = netback_accel_shutdown_vnic_hw;
32686 + bend->fwd_priv = port->fwd_priv;
32687 + /* This is just needed to pass to efx_vi_alloc */
32688 + bend->nic_index = port->nic_index;
32689 + bend->net_dev = port->net_dev;
32690 + mutex_unlock(&accel_mutex);
32691 + return 0;
32692 + }
32693 + }
32694 +
32695 + mutex_unlock(&accel_mutex);
32696 +
32697 + EPRINTK("Failed to identify backend device '%s' with a NIC\n",
32698 + bend->nicname);
32699 +
32700 + return -ENOENT;
32701 +}
32702 +
32703 +
32704 +/****************************************************************************
32705 + * Resource management code
32706 + ***************************************************************************/
32707 +
32708 +static int alloc_page_state(struct netback_accel *bend, int max_pages)
32709 +{
32710 + struct falcon_bend_accel_priv *accel_hw_priv;
32711 +
32712 + if (max_pages < 0 || max_pages > bend->quotas.max_buf_pages) {
32713 + EPRINTK("%s: invalid max_pages: %d\n", __FUNCTION__, max_pages);
32714 + return -EINVAL;
32715 + }
32716 +
32717 + accel_hw_priv = kzalloc(sizeof(struct falcon_bend_accel_priv),
32718 + GFP_KERNEL);
32719 + if (accel_hw_priv == NULL) {
32720 + EPRINTK("%s: no memory for accel_hw_priv\n", __FUNCTION__);
32721 + return -ENOMEM;
32722 + }
32723 +
32724 + accel_hw_priv->dma_maps = kzalloc
32725 + (sizeof(struct efx_vi_dma_map_state **) *
32726 + (max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ), GFP_KERNEL);
32727 + if (accel_hw_priv->dma_maps == NULL) {
32728 + EPRINTK("%s: no memory for dma_maps\n", __FUNCTION__);
32729 + kfree(accel_hw_priv);
32730 + return -ENOMEM;
32731 + }
32732 +
32733 + bend->buffer_maps = kzalloc(sizeof(struct vm_struct *) * max_pages,
32734 + GFP_KERNEL);
32735 + if (bend->buffer_maps == NULL) {
32736 + EPRINTK("%s: no memory for buffer_maps\n", __FUNCTION__);
32737 + kfree(accel_hw_priv->dma_maps);
32738 + kfree(accel_hw_priv);
32739 + return -ENOMEM;
32740 + }
32741 +
32742 + bend->buffer_addrs = kzalloc(sizeof(u64) * max_pages, GFP_KERNEL);
32743 + if (bend->buffer_addrs == NULL) {
32744 + kfree(bend->buffer_maps);
32745 + kfree(accel_hw_priv->dma_maps);
32746 + kfree(accel_hw_priv);
32747 + return -ENOMEM;
32748 + }
32749 +
32750 + bend->accel_hw_priv = accel_hw_priv;
32751 +
32752 + return 0;
32753 +}
32754 +
32755 +
32756 +static int free_page_state(struct netback_accel *bend)
32757 +{
32758 + struct falcon_bend_accel_priv *accel_hw_priv;
32759 +
32760 + DPRINTK("%s: %p\n", __FUNCTION__, bend);
32761 +
32762 + accel_hw_priv = bend->accel_hw_priv;
32763 +
32764 + if (accel_hw_priv) {
32765 + kfree(accel_hw_priv->dma_maps);
32766 + kfree(bend->buffer_maps);
32767 + kfree(bend->buffer_addrs);
32768 + kfree(accel_hw_priv);
32769 + bend->accel_hw_priv = NULL;
32770 + bend->max_pages = 0;
32771 + }
32772 +
32773 + return 0;
32774 +}
32775 +
32776 +
32777 +/* The timeout event callback for the event q */
32778 +static void bend_evq_timeout(void *context, int is_timeout)
32779 +{
32780 + struct netback_accel *bend = (struct netback_accel *)context;
32781 + if (is_timeout) {
32782 + /* Pass event to vnic front end driver */
32783 + VPRINTK("timeout event to %d\n", bend->net_channel);
32784 + NETBACK_ACCEL_STATS_OP(bend->stats.evq_timeouts++);
32785 + notify_remote_via_irq(bend->net_channel_irq);
32786 + } else {
32787 + /* It's a wakeup event, used by Falcon */
32788 + VPRINTK("wakeup to %d\n", bend->net_channel);
32789 + NETBACK_ACCEL_STATS_OP(bend->stats.evq_wakeups++);
32790 + notify_remote_via_irq(bend->net_channel_irq);
32791 + }
32792 +}
32793 +
32794 +
32795 +/*
32796 + * Create the eventq and associated gubbins for communication with the
32797 + * front end vnic driver
32798 + */
32799 +static int ef_get_vnic(struct netback_accel *bend)
32800 +{
32801 + struct falcon_bend_accel_priv *accel_hw_priv;
32802 + int rc = 0;
32803 +
32804 + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_NONE);
32805 +
32806 + /* Allocate page related state and accel_hw_priv */
32807 + rc = alloc_page_state(bend, bend->max_pages);
32808 + if (rc != 0) {
32809 + EPRINTK("Failed to allocate page state: %d\n", rc);
32810 + return rc;
32811 + }
32812 +
32813 + accel_hw_priv = bend->accel_hw_priv;
32814 +
32815 + rc = efx_vi_alloc(&accel_hw_priv->efx_vih, bend->nic_index);
32816 + if (rc != 0) {
32817 + EPRINTK("%s: efx_vi_alloc failed %d\n", __FUNCTION__, rc);
32818 + free_page_state(bend);
32819 + return rc;
32820 + }
32821 +
32822 + rc = efx_vi_eventq_register_callback(accel_hw_priv->efx_vih,
32823 + bend_evq_timeout,
32824 + bend);
32825 + if (rc != 0) {
32826 + EPRINTK("%s: register_callback failed %d\n", __FUNCTION__, rc);
32827 + efx_vi_free(accel_hw_priv->efx_vih);
32828 + free_page_state(bend);
32829 + return rc;
32830 + }
32831 +
32832 + bend->hw_state = NETBACK_ACCEL_RES_ALLOC;
32833 +
32834 + return 0;
32835 +}
32836 +
32837 +
32838 +static void ef_free_vnic(struct netback_accel *bend)
32839 +{
32840 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32841 +
32842 + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC);
32843 +
32844 + efx_vi_eventq_kill_callback(accel_hw_priv->efx_vih);
32845 +
32846 + DPRINTK("Hardware is freeable. Will proceed.\n");
32847 +
32848 + efx_vi_free(accel_hw_priv->efx_vih);
32849 + accel_hw_priv->efx_vih = NULL;
32850 +
32851 + VPRINTK("Free page state...\n");
32852 + free_page_state(bend);
32853 +
32854 + bend->hw_state = NETBACK_ACCEL_RES_NONE;
32855 +}
32856 +
32857 +
32858 +static inline void ungrant_or_crash(grant_ref_t gntref, int domain) {
32859 + if (net_accel_ungrant_page(gntref) == -EBUSY)
32860 + net_accel_shutdown_remote(domain);
32861 +}
32862 +
32863 +
32864 +static void netback_accel_release_hwinfo(struct netback_accel *bend)
32865 +{
32866 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32867 + int i;
32868 +
32869 + DPRINTK("Remove dma q grants %d %d\n", accel_hw_priv->txdmaq_gnt,
32870 + accel_hw_priv->rxdmaq_gnt);
32871 + ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end);
32872 + ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end);
32873 +
32874 + DPRINTK("Remove doorbell grant %d\n", accel_hw_priv->doorbell_gnt);
32875 + ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end);
32876 +
32877 + if (bend->hw_type == NET_ACCEL_MSG_HWTYPE_FALCON_A) {
32878 + DPRINTK("Remove rptr grant %d\n", accel_hw_priv->evq_rptr_gnt);
32879 + ungrant_or_crash(accel_hw_priv->evq_rptr_gnt, bend->far_end);
32880 + }
32881 +
32882 + for (i = 0; i < accel_hw_priv->evq_npages; i++) {
32883 + DPRINTK("Remove evq grant %d\n", accel_hw_priv->evq_mem_gnts[i]);
32884 + ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], bend->far_end);
32885 + }
32886 +
32887 + bend->hw_state = NETBACK_ACCEL_RES_FILTER;
32888 +
32889 + return;
32890 +}
32891 +
32892 +
32893 +static int ef_bend_hwinfo_falcon_common(struct netback_accel *bend,
32894 + struct net_accel_hw_falcon_b *hwinfo)
32895 +{
32896 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32897 + struct efx_vi_hw_resource_metadata res_mdata;
32898 + struct efx_vi_hw_resource res_array[EFX_VI_HW_RESOURCE_MAXSIZE];
32899 + int rc, len = EFX_VI_HW_RESOURCE_MAXSIZE, i, pfn = 0;
32900 + unsigned long txdmaq_pfn = 0, rxdmaq_pfn = 0;
32901 +
32902 + rc = efx_vi_hw_resource_get_phys(accel_hw_priv->efx_vih, &res_mdata,
32903 + res_array, &len);
32904 + if (rc != 0) {
32905 + DPRINTK("%s: resource_get_phys returned %d\n",
32906 + __FUNCTION__, rc);
32907 + return rc;
32908 + }
32909 +
32910 + if (res_mdata.version != 0)
32911 + return -EPROTO;
32912 +
32913 + hwinfo->nic_arch = res_mdata.nic_arch;
32914 + hwinfo->nic_variant = res_mdata.nic_variant;
32915 + hwinfo->nic_revision = res_mdata.nic_revision;
32916 +
32917 + hwinfo->evq_order = res_mdata.evq_order;
32918 + hwinfo->evq_offs = res_mdata.evq_offs;
32919 + hwinfo->evq_capacity = res_mdata.evq_capacity;
32920 + hwinfo->instance = res_mdata.instance;
32921 + hwinfo->rx_capacity = res_mdata.rx_capacity;
32922 + hwinfo->tx_capacity = res_mdata.tx_capacity;
32923 +
32924 + VPRINTK("evq_order %d evq_offs %d evq_cap %d inst %d rx_cap %d tx_cap %d\n",
32925 + hwinfo->evq_order, hwinfo->evq_offs, hwinfo->evq_capacity,
32926 + hwinfo->instance, hwinfo->rx_capacity, hwinfo->tx_capacity);
32927 +
32928 + for (i = 0; i < len; i++) {
32929 + struct efx_vi_hw_resource *res = &(res_array[i]);
32930 + switch (res->type) {
32931 + case EFX_VI_HW_RESOURCE_TXDMAQ:
32932 + txdmaq_pfn = page_to_pfn(virt_to_page(res->address));
32933 + break;
32934 + case EFX_VI_HW_RESOURCE_RXDMAQ:
32935 + rxdmaq_pfn = page_to_pfn(virt_to_page(res->address));
32936 + break;
32937 + case EFX_VI_HW_RESOURCE_EVQTIMER:
32938 + break;
32939 + case EFX_VI_HW_RESOURCE_EVQRPTR:
32940 + case EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET:
32941 + hwinfo->evq_rptr = res->address;
32942 + break;
32943 + case EFX_VI_HW_RESOURCE_EVQMEMKVA:
32944 + accel_hw_priv->evq_npages = 1 << res_mdata.evq_order;
32945 + pfn = page_to_pfn(virt_to_page(res->address));
32946 + break;
32947 + case EFX_VI_HW_RESOURCE_BELLPAGE:
32948 + hwinfo->doorbell_mfn = res->address;
32949 + break;
32950 + default:
32951 + EPRINTK("%s: Unknown hardware resource type %d\n",
32952 + __FUNCTION__, res->type);
32953 + break;
32954 + }
32955 + }
32956 +
32957 + VPRINTK("Passing txdmaq page pfn %lx\n", txdmaq_pfn);
32958 + accel_hw_priv->txdmaq_gnt = hwinfo->txdmaq_gnt =
32959 + net_accel_grant_page(bend->hdev_data, pfn_to_mfn(txdmaq_pfn),
32960 + 0);
32961 +
32962 + VPRINTK("Passing rxdmaq page pfn %lx\n", rxdmaq_pfn);
32963 + accel_hw_priv->rxdmaq_gnt = hwinfo->rxdmaq_gnt =
32964 + net_accel_grant_page(bend->hdev_data, pfn_to_mfn(rxdmaq_pfn),
32965 + 0);
32966 +
32967 + VPRINTK("Passing doorbell page mfn %x\n", hwinfo->doorbell_mfn);
32968 + /* Make the relevant H/W pages mappable by the far end */
32969 + accel_hw_priv->doorbell_gnt = hwinfo->doorbell_gnt =
32970 + net_accel_grant_page(bend->hdev_data, hwinfo->doorbell_mfn, 1);
32971 +
32972 + /* Now do the same for the memory pages */
32973 + /* Convert the page + length we got back for the evq to grants. */
32974 + for (i = 0; i < accel_hw_priv->evq_npages; i++) {
32975 + accel_hw_priv->evq_mem_gnts[i] = hwinfo->evq_mem_gnts[i] =
32976 + net_accel_grant_page(bend->hdev_data, pfn_to_mfn(pfn), 0);
32977 + VPRINTK("Got grant %u for evq pfn %x\n", hwinfo->evq_mem_gnts[i],
32978 + pfn);
32979 + pfn++;
32980 + }
32981 +
32982 + return 0;
32983 +}
32984 +
32985 +
32986 +static int ef_bend_hwinfo_falcon_a(struct netback_accel *bend,
32987 + struct net_accel_hw_falcon_a *hwinfo)
32988 +{
32989 + int rc;
32990 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
32991 +
32992 + if ((rc = ef_bend_hwinfo_falcon_common(bend, &hwinfo->common)) != 0)
32993 + return rc;
32994 +
32995 + /*
32996 + * Note that unlike the above, where the message field is the
32997 + * page number, here evq_rptr is the entire address because
32998 + * it is currently a pointer into the densely mapped timer page.
32999 + */
33000 + VPRINTK("Passing evq_rptr pfn %x for rptr %x\n",
33001 + hwinfo->common.evq_rptr >> PAGE_SHIFT,
33002 + hwinfo->common.evq_rptr);
33003 + rc = net_accel_grant_page(bend->hdev_data,
33004 + hwinfo->common.evq_rptr >> PAGE_SHIFT, 0);
33005 + if (rc < 0)
33006 + return rc;
33007 +
33008 + accel_hw_priv->evq_rptr_gnt = hwinfo->evq_rptr_gnt = rc;
33009 + VPRINTK("evq_rptr_gnt got %d\n", hwinfo->evq_rptr_gnt);
33010 +
33011 + return 0;
33012 +}
33013 +
33014 +
33015 +static int ef_bend_hwinfo_falcon_b(struct netback_accel *bend,
33016 + struct net_accel_hw_falcon_b *hwinfo)
33017 +{
33018 + return ef_bend_hwinfo_falcon_common(bend, hwinfo);
33019 +}
33020 +
33021 +
33022 +/*
33023 + * Fill in the message with a description of the hardware resources, based on
33024 + * the H/W type
33025 + */
33026 +static int netback_accel_hwinfo(struct netback_accel *bend,
33027 + struct net_accel_msg_hw *msgvi)
33028 +{
33029 + int rc = 0;
33030 +
33031 + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER);
33032 +
33033 + msgvi->type = bend->hw_type;
33034 + switch (bend->hw_type) {
33035 + case NET_ACCEL_MSG_HWTYPE_FALCON_A:
33036 + rc = ef_bend_hwinfo_falcon_a(bend, &msgvi->resources.falcon_a);
33037 + break;
33038 + case NET_ACCEL_MSG_HWTYPE_FALCON_B:
33039 + rc = ef_bend_hwinfo_falcon_b(bend, &msgvi->resources.falcon_b);
33040 + break;
33041 + case NET_ACCEL_MSG_HWTYPE_NONE:
33042 + /* Nothing to do. The slow path should just work. */
33043 + break;
33044 + }
33045 +
33046 + if (rc == 0)
33047 + bend->hw_state = NETBACK_ACCEL_RES_HWINFO;
33048 +
33049 + return rc;
33050 +}
33051 +
33052 +
33053 +/* Allocate hardware resources and make them available to the client domain */
33054 +int netback_accel_setup_vnic_hw(struct netback_accel *bend)
33055 +{
33056 + struct net_accel_msg msg;
33057 + int err;
33058 +
33059 + /* Allocate the event queue, VI and so on. */
33060 + err = ef_get_vnic(bend);
33061 + if (err) {
33062 + EPRINTK("Failed to allocate hardware resource for bend:"
33063 + "error %d\n", err);
33064 + return err;
33065 + }
33066 +
33067 + /* Set up the filter management */
33068 + err = netback_accel_filter_init(bend);
33069 + if (err) {
33070 + EPRINTK("Filter setup failed, error %d", err);
33071 + ef_free_vnic(bend);
33072 + return err;
33073 + }
33074 +
33075 + net_accel_msg_init(&msg, NET_ACCEL_MSG_SETHW);
33076 +
33077 + /*
33078 + * Extract the low-level hardware info we will actually pass to the
33079 + * other end, and set up the grants/ioremap permissions needed
33080 + */
33081 + err = netback_accel_hwinfo(bend, &msg.u.hw);
33082 +
33083 + if (err != 0) {
33084 + netback_accel_filter_shutdown(bend);
33085 + ef_free_vnic(bend);
33086 + return err;
33087 + }
33088 +
33089 + /* Send the message, this is a reply to a hello-reply */
33090 + err = net_accel_msg_reply_notify(bend->shared_page,
33091 + bend->msg_channel_irq,
33092 + &bend->to_domU, &msg);
33093 +
33094 + /*
33095 + * The message should succeed as it's logically a reply and we
33096 + * guarantee space for replies, but a misbehaving frontend
33097 + * could result in that behaviour, so be tolerant
33098 + */
33099 + if (err != 0) {
33100 + netback_accel_release_hwinfo(bend);
33101 + netback_accel_filter_shutdown(bend);
33102 + ef_free_vnic(bend);
33103 + }
33104 +
33105 + return err;
33106 +}
33107 +
33108 +
33109 +/* Free hardware resources */
33110 +void netback_accel_shutdown_vnic_hw(struct netback_accel *bend)
33111 +{
33112 + /*
33113 + * Only try and release resources if accel_hw_priv was setup,
33114 + * otherwise there is nothing to do as we're on "null-op"
33115 + * acceleration
33116 + */
33117 + switch (bend->hw_state) {
33118 + case NETBACK_ACCEL_RES_HWINFO:
33119 + VPRINTK("Release hardware resources\n");
33120 + netback_accel_release_hwinfo(bend);
33121 + /* deliberate drop through */
33122 + case NETBACK_ACCEL_RES_FILTER:
33123 + VPRINTK("Free filters...\n");
33124 + netback_accel_filter_shutdown(bend);
33125 + /* deliberate drop through */
33126 + case NETBACK_ACCEL_RES_ALLOC:
33127 + VPRINTK("Free vnic...\n");
33128 + ef_free_vnic(bend);
33129 + /* deliberate drop through */
33130 + case NETBACK_ACCEL_RES_NONE:
33131 + break;
33132 + default:
33133 + BUG();
33134 + }
33135 +}
33136 +
33137 +/**************************************************************************
33138 + *
33139 + * Buffer table stuff
33140 + *
33141 + **************************************************************************/
33142 +
33143 +/*
33144 + * Undo any allocation that netback_accel_msg_rx_buffer_map() has made
33145 + * if it fails half way through
33146 + */
33147 +static inline void buffer_map_cleanup(struct netback_accel *bend, int i)
33148 +{
33149 + while (i > 0) {
33150 + i--;
33151 + bend->buffer_maps_index--;
33152 + net_accel_unmap_device_page(bend->hdev_data,
33153 + bend->buffer_maps[bend->buffer_maps_index],
33154 + bend->buffer_addrs[bend->buffer_maps_index]);
33155 + }
33156 +}
33157 +
33158 +
33159 +int netback_accel_add_buffers(struct netback_accel *bend, int pages, int log2_pages,
33160 + u32 *grants, u32 *buf_addr_out)
33161 +{
33162 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33163 + unsigned long long addr_array[NET_ACCEL_MSG_MAX_PAGE_REQ];
33164 + int rc, i, index;
33165 + u64 dev_bus_addr;
33166 +
33167 + /* Make sure we can't overflow the dma_maps array */
33168 + if (accel_hw_priv->dma_maps_index >=
33169 + bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ) {
33170 + EPRINTK("%s: too many buffer table allocations: %d %d\n",
33171 + __FUNCTION__, accel_hw_priv->dma_maps_index,
33172 + bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ);
33173 + return -EINVAL;
33174 + }
33175 +
33176 + /* Make sure we can't overflow the buffer_maps array */
33177 + if (bend->buffer_maps_index + pages > bend->max_pages) {
33178 + EPRINTK("%s: too many pages mapped: %d + %d > %d\n",
33179 + __FUNCTION__, bend->buffer_maps_index,
33180 + pages, bend->max_pages);
33181 + return -EINVAL;
33182 + }
33183 +
33184 + for (i = 0; i < pages; i++) {
33185 + VPRINTK("%s: mapping page %d\n", __FUNCTION__, i);
33186 + rc = net_accel_map_device_page
33187 + (bend->hdev_data, grants[i],
33188 + &bend->buffer_maps[bend->buffer_maps_index],
33189 + &dev_bus_addr);
33190 +
33191 + if (rc != 0) {
33192 + EPRINTK("error in net_accel_map_device_page\n");
33193 + buffer_map_cleanup(bend, i);
33194 + return rc;
33195 + }
33196 +
33197 + bend->buffer_addrs[bend->buffer_maps_index] = dev_bus_addr;
33198 +
33199 + bend->buffer_maps_index++;
33200 +
33201 + addr_array[i] = dev_bus_addr;
33202 + }
33203 +
33204 + VPRINTK("%s: mapping dma addresses to vih %p\n", __FUNCTION__,
33205 + accel_hw_priv->efx_vih);
33206 +
33207 + index = accel_hw_priv->dma_maps_index;
33208 + if ((rc = efx_vi_dma_map_addrs(accel_hw_priv->efx_vih, addr_array, pages,
33209 + &(accel_hw_priv->dma_maps[index]))) < 0) {
33210 + EPRINTK("error in dma_map_pages\n");
33211 + buffer_map_cleanup(bend, i);
33212 + return rc;
33213 + }
33214 +
33215 + accel_hw_priv->dma_maps_index++;
33216 + NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages += pages);
33217 +
33218 + //DPRINTK("%s: getting map address\n", __FUNCTION__);
33219 +
33220 + *buf_addr_out = efx_vi_dma_get_map_addr(accel_hw_priv->efx_vih,
33221 + accel_hw_priv->dma_maps[index]);
33222 +
33223 + //DPRINTK("%s: done\n", __FUNCTION__);
33224 +
33225 + return 0;
33226 +}
33227 +
33228 +
33229 +int netback_accel_remove_buffers(struct netback_accel *bend)
33230 +{
33231 + /* Only try to free buffers if accel_hw_priv was setup */
33232 + if (bend->hw_state != NETBACK_ACCEL_RES_NONE) {
33233 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33234 + int i;
33235 +
33236 + efx_vi_reset(accel_hw_priv->efx_vih);
33237 +
33238 + while (accel_hw_priv->dma_maps_index > 0) {
33239 + accel_hw_priv->dma_maps_index--;
33240 + i = accel_hw_priv->dma_maps_index;
33241 + efx_vi_dma_unmap_addrs(accel_hw_priv->efx_vih,
33242 + accel_hw_priv->dma_maps[i]);
33243 + }
33244 +
33245 + while (bend->buffer_maps_index > 0) {
33246 + VPRINTK("Unmapping granted buffer %d\n",
33247 + bend->buffer_maps_index);
33248 + bend->buffer_maps_index--;
33249 + i = bend->buffer_maps_index;
33250 + net_accel_unmap_device_page(bend->hdev_data,
33251 + bend->buffer_maps[i],
33252 + bend->buffer_addrs[i]);
33253 + }
33254 +
33255 + NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages = 0);
33256 + }
33257 +
33258 + return 0;
33259 +}
33260 +
33261 +/**************************************************************************
33262 + *
33263 + * Filter stuff
33264 + *
33265 + **************************************************************************/
33266 +
33267 +static int netback_accel_filter_init(struct netback_accel *bend)
33268 +{
33269 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33270 + int i, rc;
33271 +
33272 + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC);
33273 +
33274 + spin_lock_init(&accel_hw_priv->filter_lock);
33275 +
33276 + if ((rc = cuckoo_hash_init(&accel_hw_priv->filter_hash_table,
33277 + 5 /* space for 32 filters */, 8)) != 0) {
33278 + EPRINTK("Failed to initialise filter hash table\n");
33279 + return rc;
33280 + }
33281 +
33282 + accel_hw_priv->fspecs = kzalloc(sizeof(struct netback_accel_filter_spec) *
33283 + bend->quotas.max_filters,
33284 + GFP_KERNEL);
33285 +
33286 + if (accel_hw_priv->fspecs == NULL) {
33287 + EPRINTK("No memory for filter specs.\n");
33288 + cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table);
33289 + return -ENOMEM;
33290 + }
33291 +
33292 + for (i = 0; i < bend->quotas.max_filters; i++) {
33293 + accel_hw_priv->free_filters |= (1 << i);
33294 + }
33295 +
33296 + /* Base mask on highest set bit in max_filters */
33297 + accel_hw_priv->filter_idx_mask = (1 << fls(bend->quotas.max_filters)) - 1;
33298 + VPRINTK("filter setup: max is %x mask is %x\n",
33299 + bend->quotas.max_filters, accel_hw_priv->filter_idx_mask);
33300 +
33301 + bend->hw_state = NETBACK_ACCEL_RES_FILTER;
33302 +
33303 + return 0;
33304 +}
33305 +
33306 +
33307 +static inline void make_filter_key(cuckoo_hash_ip_key *key,
33308 + struct netback_accel_filter_spec *filt)
33309 +
33310 +{
33311 + key->local_ip = filt->destip_be;
33312 + key->local_port = filt->destport_be;
33313 + key->proto = filt->proto;
33314 +}
33315 +
33316 +
33317 +static inline
33318 +void netback_accel_free_filter(struct falcon_bend_accel_priv *accel_hw_priv,
33319 + int filter)
33320 +{
33321 + cuckoo_hash_ip_key filter_key;
33322 +
33323 + if (!(accel_hw_priv->free_filters & (1 << filter))) {
33324 + efx_vi_filter_stop(accel_hw_priv->efx_vih,
33325 + accel_hw_priv->fspecs[filter].filter_handle);
33326 + make_filter_key(&filter_key, &(accel_hw_priv->fspecs[filter]));
33327 + if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
33328 + (cuckoo_hash_key *)&filter_key)) {
33329 + EPRINTK("%s: Couldn't find filter to remove from table\n",
33330 + __FUNCTION__);
33331 + BUG();
33332 + }
33333 + }
33334 +}
33335 +
33336 +
33337 +static void netback_accel_filter_shutdown(struct netback_accel *bend)
33338 +{
33339 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33340 + int i;
33341 + unsigned long flags;
33342 +
33343 + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER);
33344 +
33345 + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
33346 +
33347 + BUG_ON(accel_hw_priv->fspecs == NULL);
33348 +
33349 + for (i = 0; i < bend->quotas.max_filters; i++) {
33350 + netback_accel_free_filter(accel_hw_priv, i);
33351 + }
33352 +
33353 + kfree(accel_hw_priv->fspecs);
33354 + accel_hw_priv->fspecs = NULL;
33355 + accel_hw_priv->free_filters = 0;
33356 +
33357 + cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table);
33358 +
33359 + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
33360 +
33361 + bend->hw_state = NETBACK_ACCEL_RES_ALLOC;
33362 +}
33363 +
33364 +
33365 +/*! Suggest a filter to replace when we want to insert a new one and have
33366 + * none free.
33367 + */
33368 +static unsigned get_victim_filter(struct netback_accel *bend)
33369 +{
33370 + /*
33371 + * We could attempt to get really clever, and may do at some
33372 + * point, but random replacement is v. cheap and low on
33373 + * pathological worst cases.
33374 + */
33375 + unsigned index, cycles;
33376 +
33377 + rdtscl(cycles);
33378 +
33379 + /*
33380 + * Some doubt about the quality of the bottom few bits, so
33381 + * throw 'em * away
33382 + */
33383 + index = (cycles >> 4) & ((struct falcon_bend_accel_priv *)
33384 + bend->accel_hw_priv)->filter_idx_mask;
33385 + /*
33386 + * We don't enforce that the number of filters is a power of
33387 + * two, but the masking gets us to within one subtraction of a
33388 + * valid index
33389 + */
33390 + if (index >= bend->quotas.max_filters)
33391 + index -= bend->quotas.max_filters;
33392 + DPRINTK("backend %s->%d has no free filters. Filter %d will be evicted\n",
33393 + bend->nicname, bend->far_end, index);
33394 + return index;
33395 +}
33396 +
33397 +
33398 +/* Add a filter for the specified IP/port to the backend */
33399 +int
33400 +netback_accel_filter_check_add(struct netback_accel *bend,
33401 + struct netback_accel_filter_spec *filt)
33402 +{
33403 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33404 + struct netback_accel_filter_spec *fs;
33405 + unsigned filter_index;
33406 + unsigned long flags;
33407 + int rc, recycling = 0;
33408 + cuckoo_hash_ip_key filter_key, evict_key;
33409 +
33410 + BUG_ON(filt->proto != IPPROTO_TCP && filt->proto != IPPROTO_UDP);
33411 +
33412 + DPRINTK("Will add %s filter for dst ip %08x and dst port %d\n",
33413 + (filt->proto == IPPROTO_TCP) ? "TCP" : "UDP",
33414 + be32_to_cpu(filt->destip_be), be16_to_cpu(filt->destport_be));
33415 +
33416 + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
33417 + /*
33418 + * Check to see if we're already filtering this IP address and
33419 + * port. Happens if you insert a filter mid-stream as there
33420 + * are many packets backed up to be delivered to dom0 already
33421 + */
33422 + make_filter_key(&filter_key, filt);
33423 + if (cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table,
33424 + (cuckoo_hash_key *)(&filter_key),
33425 + &filter_index)) {
33426 + DPRINTK("Found matching filter %d already in table\n",
33427 + filter_index);
33428 + rc = -1;
33429 + goto out;
33430 + }
33431 +
33432 + if (accel_hw_priv->free_filters == 0) {
33433 + filter_index = get_victim_filter(bend);
33434 + recycling = 1;
33435 + } else {
33436 + filter_index = __ffs(accel_hw_priv->free_filters);
33437 + clear_bit(filter_index, &accel_hw_priv->free_filters);
33438 + }
33439 +
33440 + fs = &accel_hw_priv->fspecs[filter_index];
33441 +
33442 + if (recycling) {
33443 + DPRINTK("Removing filter index %d handle %p\n", filter_index,
33444 + fs->filter_handle);
33445 +
33446 + if ((rc = efx_vi_filter_stop(accel_hw_priv->efx_vih,
33447 + fs->filter_handle)) != 0) {
33448 + EPRINTK("Couldn't clear NIC filter table entry %d\n", rc);
33449 + }
33450 +
33451 + make_filter_key(&evict_key, fs);
33452 + if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
33453 + (cuckoo_hash_key *)&evict_key)) {
33454 + EPRINTK("Couldn't find filter to remove from table\n");
33455 + BUG();
33456 + }
33457 + NETBACK_ACCEL_STATS_OP(bend->stats.num_filters--);
33458 + }
33459 +
33460 + /* Update the filter spec with new details */
33461 + *fs = *filt;
33462 +
33463 + if ((rc = cuckoo_hash_add(&accel_hw_priv->filter_hash_table,
33464 + (cuckoo_hash_key *)&filter_key, filter_index,
33465 + 1)) != 0) {
33466 + EPRINTK("Error (%d) adding filter to table\n", rc);
33467 + accel_hw_priv->free_filters |= (1 << filter_index);
33468 + goto out;
33469 + }
33470 +
33471 + rc = efx_vi_filter(accel_hw_priv->efx_vih, filt->proto, filt->destip_be,
33472 + filt->destport_be,
33473 + (struct filter_resource_t **)&fs->filter_handle);
33474 +
33475 + if (rc != 0) {
33476 + EPRINTK("Hardware filter insertion failed. Error %d\n", rc);
33477 + accel_hw_priv->free_filters |= (1 << filter_index);
33478 + cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
33479 + (cuckoo_hash_key *)&filter_key);
33480 + rc = -1;
33481 + goto out;
33482 + }
33483 +
33484 + NETBACK_ACCEL_STATS_OP(bend->stats.num_filters++);
33485 +
33486 + VPRINTK("%s: success index %d handle %p\n", __FUNCTION__, filter_index,
33487 + fs->filter_handle);
33488 +
33489 + rc = filter_index;
33490 + out:
33491 + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
33492 + return rc;
33493 +}
33494 +
33495 +
33496 +/* Remove a filter entry for the specific device and IP/port */
33497 +static void netback_accel_filter_remove(struct netback_accel *bend,
33498 + int filter_index)
33499 +{
33500 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33501 +
33502 + BUG_ON(accel_hw_priv->free_filters & (1 << filter_index));
33503 + netback_accel_free_filter(accel_hw_priv, filter_index);
33504 + accel_hw_priv->free_filters |= (1 << filter_index);
33505 +}
33506 +
33507 +
33508 +/* Remove a filter entry for the specific device and IP/port */
33509 +void netback_accel_filter_remove_spec(struct netback_accel *bend,
33510 + struct netback_accel_filter_spec *filt)
33511 +{
33512 + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
33513 + unsigned filter_found;
33514 + unsigned long flags;
33515 + cuckoo_hash_ip_key filter_key;
33516 + struct netback_accel_filter_spec *fs;
33517 +
33518 + if (filt->proto == IPPROTO_TCP) {
33519 + DPRINTK("Remove TCP filter for dst ip %08x and dst port %d\n",
33520 + be32_to_cpu(filt->destip_be),
33521 + be16_to_cpu(filt->destport_be));
33522 + } else if (filt->proto == IPPROTO_UDP) {
33523 + DPRINTK("Remove UDP filter for dst ip %08x and dst port %d\n",
33524 + be32_to_cpu(filt->destip_be),
33525 + be16_to_cpu(filt->destport_be));
33526 + } else {
33527 + /*
33528 + * This could be provoked by an evil frontend, so can't
33529 + * BUG(), but harmless as it should fail tests below
33530 + */
33531 + DPRINTK("Non-TCP/UDP filter dst ip %08x and dst port %d\n",
33532 + be32_to_cpu(filt->destip_be),
33533 + be16_to_cpu(filt->destport_be));
33534 + }
33535 +
33536 + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
33537 +
33538 + make_filter_key(&filter_key, filt);
33539 + if (!cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table,
33540 + (cuckoo_hash_key *)(&filter_key),
33541 + &filter_found)) {
33542 + EPRINTK("Couldn't find matching filter already in table\n");
33543 + goto out;
33544 + }
33545 +
33546 + /* Do a full check to make sure we've not had a hash collision */
33547 + fs = &accel_hw_priv->fspecs[filter_found];
33548 + if (fs->destip_be == filt->destip_be &&
33549 + fs->destport_be == filt->destport_be &&
33550 + fs->proto == filt->proto &&
33551 + !memcmp(fs->mac, filt->mac, ETH_ALEN)) {
33552 + netback_accel_filter_remove(bend, filter_found);
33553 + } else {
33554 + EPRINTK("Entry in hash table does not match filter spec\n");
33555 + goto out;
33556 + }
33557 +
33558 + out:
33559 + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
33560 +}
33561 Index: head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.h
33562 ===================================================================
33563 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
33564 +++ head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.h 2008-02-20 09:32:49.000000000 +0100
33565 @@ -0,0 +1,88 @@
33566 +/****************************************************************************
33567 + * Solarflare driver for Xen network acceleration
33568 + *
33569 + * Copyright 2006-2008: Solarflare Communications Inc,
33570 + * 9501 Jeronimo Road, Suite 250,
33571 + * Irvine, CA 92618, USA
33572 + *
33573 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
33574 + *
33575 + * This program is free software; you can redistribute it and/or modify it
33576 + * under the terms of the GNU General Public License version 2 as published
33577 + * by the Free Software Foundation, incorporated herein by reference.
33578 + *
33579 + * This program is distributed in the hope that it will be useful,
33580 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
33581 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33582 + * GNU General Public License for more details.
33583 + *
33584 + * You should have received a copy of the GNU General Public License
33585 + * along with this program; if not, write to the Free Software
33586 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33587 + ****************************************************************************
33588 + */
33589 +
33590 +#ifndef NETBACK_ACCEL_SOLARFLARE_H
33591 +#define NETBACK_ACCEL_SOLARFLARE_H
33592 +
33593 +#include "accel.h"
33594 +#include "accel_msg_iface.h"
33595 +
33596 +#include "driverlink_api.h"
33597 +
33598 +#define MAX_NICS 5
33599 +#define MAX_PORTS 2
33600 +
33601 +
33602 +extern int netback_accel_sf_init(void);
33603 +extern void netback_accel_sf_shutdown(void);
33604 +extern int netback_accel_sf_hwtype(struct netback_accel *bend);
33605 +
33606 +extern int netback_accel_sf_char_init(void);
33607 +extern void netback_accel_sf_char_shutdown(void);
33608 +
33609 +extern int netback_accel_setup_vnic_hw(struct netback_accel *bend);
33610 +extern void netback_accel_shutdown_vnic_hw(struct netback_accel *bend);
33611 +
33612 +extern int netback_accel_add_buffers(struct netback_accel *bend, int pages,
33613 + int log2_pages, u32 *grants,
33614 + u32 *buf_addr_out);
33615 +extern int netback_accel_remove_buffers(struct netback_accel *bend);
33616 +
33617 +
33618 +/* Add a filter for the specified IP/port to the backend */
33619 +extern int
33620 +netback_accel_filter_check_add(struct netback_accel *bend,
33621 + struct netback_accel_filter_spec *filt);
33622 +/* Remove a filter entry for the specific device and IP/port */
33623 +extern
33624 +void netback_accel_filter_remove_index(struct netback_accel *bend,
33625 + int filter_index);
33626 +extern
33627 +void netback_accel_filter_remove_spec(struct netback_accel *bend,
33628 + struct netback_accel_filter_spec *filt);
33629 +
33630 +/* This is designed to look a bit like a skb */
33631 +struct netback_pkt_buf {
33632 + union {
33633 + unsigned char *raw;
33634 + } mac;
33635 + union {
33636 + struct iphdr *iph;
33637 + struct arphdr *arph;
33638 + unsigned char *raw;
33639 + } nh;
33640 + int protocol;
33641 +};
33642 +
33643 +/*! \brief Handle a received packet: insert fast path filters as necessary
33644 + * \param skb The packet buffer
33645 + */
33646 +extern void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv);
33647 +
33648 +/*! \brief Handle a transmitted packet: update fast path filters as necessary
33649 + * \param skb The packet buffer
33650 + */
33651 +extern void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv);
33652 +
33653 +#endif /* NETBACK_ACCEL_SOLARFLARE_H */
33654 Index: head-2008-11-25/drivers/xen/sfc_netback/accel_xenbus.c
33655 ===================================================================
33656 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
33657 +++ head-2008-11-25/drivers/xen/sfc_netback/accel_xenbus.c 2008-02-26 10:54:11.000000000 +0100
33658 @@ -0,0 +1,831 @@
33659 +/****************************************************************************
33660 + * Solarflare driver for Xen network acceleration
33661 + *
33662 + * Copyright 2006-2008: Solarflare Communications Inc,
33663 + * 9501 Jeronimo Road, Suite 250,
33664 + * Irvine, CA 92618, USA
33665 + *
33666 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
33667 + *
33668 + * This program is free software; you can redistribute it and/or modify it
33669 + * under the terms of the GNU General Public License version 2 as published
33670 + * by the Free Software Foundation, incorporated herein by reference.
33671 + *
33672 + * This program is distributed in the hope that it will be useful,
33673 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
33674 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33675 + * GNU General Public License for more details.
33676 + *
33677 + * You should have received a copy of the GNU General Public License
33678 + * along with this program; if not, write to the Free Software
33679 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33680 + ****************************************************************************
33681 + */
33682 +
33683 +#include <xen/evtchn.h>
33684 +#include <linux/mutex.h>
33685 +
33686 +/* drivers/xen/netback/common.h */
33687 +#include "common.h"
33688 +
33689 +#include "accel.h"
33690 +#include "accel_solarflare.h"
33691 +#include "accel_util.h"
33692 +
33693 +#define NODENAME_PATH_FMT "backend/vif/%d/%d"
33694 +
33695 +#define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \
33696 + ((struct backend_info *)(_dev)->dev.driver_data)->netback_accel_priv
33697 +
33698 +/* List of all the bends currently in existence. */
33699 +struct netback_accel *bend_list = NULL;
33700 +DEFINE_MUTEX(bend_list_mutex);
33701 +
33702 +/* Put in bend_list. Must hold bend_list_mutex */
33703 +static void link_bend(struct netback_accel *bend)
33704 +{
33705 + bend->next_bend = bend_list;
33706 + bend_list = bend;
33707 +}
33708 +
33709 +/* Remove from bend_list, Must hold bend_list_mutex */
33710 +static void unlink_bend(struct netback_accel *bend)
33711 +{
33712 + struct netback_accel *tmp = bend_list;
33713 + struct netback_accel *prev = NULL;
33714 + while (tmp != NULL) {
33715 + if (tmp == bend) {
33716 + if (prev != NULL)
33717 + prev->next_bend = bend->next_bend;
33718 + else
33719 + bend_list = bend->next_bend;
33720 + return;
33721 + }
33722 + prev = tmp;
33723 + tmp = tmp->next_bend;
33724 + }
33725 +}
33726 +
33727 +
33728 +/* Demultiplex a message IRQ from the frontend driver. */
33729 +static irqreturn_t msgirq_from_frontend(int irq, void *context,
33730 + struct pt_regs *unused)
33731 +{
33732 + struct xenbus_device *dev = context;
33733 + struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
33734 + VPRINTK("irq %d from device %s\n", irq, dev->nodename);
33735 + schedule_work(&bend->handle_msg);
33736 + return IRQ_HANDLED;
33737 +}
33738 +
33739 +
33740 +/*
33741 + * Demultiplex an IRQ from the frontend driver. This is never used
33742 + * functionally, but we need it to pass to the bind function, and may
33743 + * get called spuriously
33744 + */
33745 +static irqreturn_t netirq_from_frontend(int irq, void *context,
33746 + struct pt_regs *unused)
33747 +{
33748 + VPRINTK("netirq %d from device %s\n", irq,
33749 + ((struct xenbus_device *)context)->nodename);
33750 +
33751 + return IRQ_HANDLED;
33752 +}
33753 +
33754 +
33755 +/* Read the limits values of the xenbus structure. */
33756 +static
33757 +void cfg_hw_quotas(struct xenbus_device *dev, struct netback_accel *bend)
33758 +{
33759 + int err = xenbus_gather
33760 + (XBT_NIL, dev->nodename,
33761 + "limits/max-filters", "%d", &bend->quotas.max_filters,
33762 + "limits/max-buf-pages", "%d", &bend->quotas.max_buf_pages,
33763 + "limits/max-mcasts", "%d", &bend->quotas.max_mcasts,
33764 + NULL);
33765 + if (err) {
33766 + /*
33767 + * TODO what if they have previously been set by the
33768 + * user? This will overwrite with defaults. Maybe
33769 + * not what we want to do, but useful in startup
33770 + * case
33771 + */
33772 + DPRINTK("Failed to read quotas from xenbus, using defaults\n");
33773 + bend->quotas.max_filters = NETBACK_ACCEL_DEFAULT_MAX_FILTERS;
33774 + bend->quotas.max_buf_pages = sfc_netback_max_pages;
33775 + bend->quotas.max_mcasts = NETBACK_ACCEL_DEFAULT_MAX_MCASTS;
33776 + }
33777 +
33778 + return;
33779 +}
33780 +
33781 +
33782 +static void bend_config_accel_change(struct xenbus_watch *watch,
33783 + const char **vec, unsigned int len)
33784 +{
33785 + struct netback_accel *bend;
33786 +
33787 + bend = container_of(watch, struct netback_accel, config_accel_watch);
33788 +
33789 + mutex_lock(&bend->bend_mutex);
33790 + if (bend->config_accel_watch.node != NULL) {
33791 + struct xenbus_device *dev =
33792 + (struct xenbus_device *)bend->hdev_data;
33793 + DPRINTK("Watch matched, got dev %p otherend %p\n",
33794 + dev, dev->otherend);
33795 + if(!xenbus_exists(XBT_NIL, watch->node, "")) {
33796 + DPRINTK("Ignoring watch as otherend seems invalid\n");
33797 + goto out;
33798 + }
33799 +
33800 + cfg_hw_quotas(dev, bend);
33801 + }
33802 + out:
33803 + mutex_unlock(&bend->bend_mutex);
33804 + return;
33805 +}
33806 +
33807 +
33808 +/*
33809 + * Setup watch on "limits" in the backend vif info to know when
33810 + * configuration has been set
33811 + */
33812 +static int setup_config_accel_watch(struct xenbus_device *dev,
33813 + struct netback_accel *bend)
33814 +{
33815 + int err;
33816 +
33817 + VPRINTK("Setting watch on %s/%s\n", dev->nodename, "limits");
33818 +
33819 + err = xenbus_watch_path2(dev, dev->nodename, "limits",
33820 + &bend->config_accel_watch,
33821 + bend_config_accel_change);
33822 +
33823 + if (err) {
33824 + EPRINTK("%s: Failed to register xenbus watch: %d\n",
33825 + __FUNCTION__, err);
33826 + bend->config_accel_watch.node = NULL;
33827 + return err;
33828 + }
33829 + return 0;
33830 +}
33831 +
33832 +
33833 +static int
33834 +cfg_frontend_info(struct xenbus_device *dev, struct netback_accel *bend,
33835 + int *grants)
33836 +{
33837 + /* Get some info from xenbus on the event channel and shmem grant */
33838 + int err = xenbus_gather(XBT_NIL, dev->otherend,
33839 + "accel-msg-channel", "%u", &bend->msg_channel,
33840 + "accel-ctrl-page", "%d", &(grants[0]),
33841 + "accel-msg-page", "%d", &(grants[1]),
33842 + "accel-net-channel", "%u", &bend->net_channel,
33843 + NULL);
33844 + if (err)
33845 + EPRINTK("failed to read event channels or shmem grant: %d\n",
33846 + err);
33847 + else
33848 + DPRINTK("got event chan %d and net chan %d from frontend\n",
33849 + bend->msg_channel, bend->net_channel);
33850 + return err;
33851 +}
33852 +
33853 +
33854 +/* Setup all the comms needed to chat with the front end driver */
33855 +static int setup_vnic(struct xenbus_device *dev)
33856 +{
33857 + struct netback_accel *bend;
33858 + int grants[2], err, msgs_per_queue;
33859 +
33860 + bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
33861 +
33862 + err = cfg_frontend_info(dev, bend, grants);
33863 + if (err)
33864 + goto fail1;
33865 +
33866 + /*
33867 + * If we get here, both frontend Connected and configuration
33868 + * options available. All is well.
33869 + */
33870 +
33871 + /* Get the hardware quotas for the VNIC in question. */
33872 + cfg_hw_quotas(dev, bend);
33873 +
33874 + /* Set up the deferred work handlers */
33875 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
33876 + INIT_WORK(&bend->handle_msg,
33877 + netback_accel_msg_rx_handler);
33878 +#else
33879 + INIT_WORK(&bend->handle_msg,
33880 + netback_accel_msg_rx_handler,
33881 + (void*)bend);
33882 +#endif
33883 +
33884 + /* Request the frontend mac */
33885 + err = net_accel_xen_net_read_mac(dev, bend->mac);
33886 + if (err)
33887 + goto fail2;
33888 +
33889 + /* Set up the shared page. */
33890 + bend->shared_page = net_accel_map_grants_contig(dev, grants, 2,
33891 + &bend->sh_pages_unmap);
33892 +
33893 + if (bend->shared_page == NULL) {
33894 + EPRINTK("failed to map shared page for %s\n", dev->otherend);
33895 + err = -ENOMEM;
33896 + goto fail2;
33897 + }
33898 +
33899 + /* Initialise the shared page(s) used for comms */
33900 + net_accel_msg_init_page(bend->shared_page, PAGE_SIZE,
33901 + bend->net_dev->flags & IFF_UP);
33902 +
33903 + msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
33904 +
33905 + net_accel_msg_init_queue
33906 + (&bend->to_domU, &bend->shared_page->queue0,
33907 + (struct net_accel_msg *)((__u8*)bend->shared_page + PAGE_SIZE),
33908 + msgs_per_queue);
33909 +
33910 + net_accel_msg_init_queue
33911 + (&bend->from_domU, &bend->shared_page->queue1,
33912 + (struct net_accel_msg *)((__u8*)bend->shared_page +
33913 + (3 * PAGE_SIZE / 2)),
33914 + msgs_per_queue);
33915 +
33916 + /* Bind the message event channel to a handler
33917 + *
33918 + * Note that we will probably get a spurious interrupt when we
33919 + * do this, so it must not be done until we have set up
33920 + * everything we need to handle it.
33921 + */
33922 + err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
33923 + bend->msg_channel,
33924 + msgirq_from_frontend,
33925 + 0,
33926 + "netback_accel",
33927 + dev);
33928 + if (err < 0) {
33929 + EPRINTK("failed to bind event channel: %d\n", err);
33930 + goto fail3;
33931 + }
33932 + else
33933 + bend->msg_channel_irq = err;
33934 +
33935 + /* TODO: No need to bind this evtchn to an irq. */
33936 + err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
33937 + bend->net_channel,
33938 + netirq_from_frontend,
33939 + 0,
33940 + "netback_accel",
33941 + dev);
33942 + if (err < 0) {
33943 + EPRINTK("failed to bind net channel: %d\n", err);
33944 + goto fail4;
33945 + }
33946 + else
33947 + bend->net_channel_irq = err;
33948 +
33949 + /*
33950 + * Grab ourselves an entry in the forwarding hash table. We do
33951 + * this now so we don't have the embarassmesnt of sorting out
33952 + * an allocation failure while at IRQ. Because we pass NULL as
33953 + * the context, the actual hash lookup will succeed for this
33954 + * NIC, but the check for somewhere to forward to will
33955 + * fail. This is necessary to prevent forwarding before
33956 + * hardware resources are set up
33957 + */
33958 + err = netback_accel_fwd_add(bend->mac, NULL, bend->fwd_priv);
33959 + if (err) {
33960 + EPRINTK("failed to add to fwd hash table\n");
33961 + goto fail5;
33962 + }
33963 +
33964 + /*
33965 + * Say hello to frontend. Important to do this straight after
33966 + * obtaining the message queue as otherwise we are vulnerable
33967 + * to an evil frontend sending a HELLO-REPLY before we've sent
33968 + * the HELLO and confusing us
33969 + */
33970 + netback_accel_msg_tx_hello(bend, NET_ACCEL_MSG_VERSION);
33971 + return 0;
33972 +
33973 + fail5:
33974 + unbind_from_irqhandler(bend->net_channel_irq, dev);
33975 + fail4:
33976 + unbind_from_irqhandler(bend->msg_channel_irq, dev);
33977 + fail3:
33978 + net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
33979 + bend->shared_page = NULL;
33980 + bend->sh_pages_unmap = NULL;
33981 + fail2:
33982 + fail1:
33983 + return err;
33984 +}
33985 +
33986 +
33987 +static int read_nicname(struct xenbus_device *dev, struct netback_accel *bend)
33988 +{
33989 + int len;
33990 +
33991 + /* nic name used to select interface used for acceleration */
33992 + bend->nicname = xenbus_read(XBT_NIL, dev->nodename, "accel", &len);
33993 + if (IS_ERR(bend->nicname))
33994 + return PTR_ERR(bend->nicname);
33995 +
33996 + return 0;
33997 +}
33998 +
33999 +static const char *frontend_name = "sfc_netfront";
34000 +
34001 +static int publish_frontend_name(struct xenbus_device *dev)
34002 +{
34003 + struct xenbus_transaction tr;
34004 + int err;
34005 +
34006 + /* Publish the name of the frontend driver */
34007 + do {
34008 + err = xenbus_transaction_start(&tr);
34009 + if (err != 0) {
34010 + EPRINTK("%s: transaction start failed\n", __FUNCTION__);
34011 + return err;
34012 + }
34013 + err = xenbus_printf(tr, dev->nodename, "accel-frontend",
34014 + "%s", frontend_name);
34015 + if (err != 0) {
34016 + EPRINTK("%s: xenbus_printf failed\n", __FUNCTION__);
34017 + xenbus_transaction_end(tr, 1);
34018 + return err;
34019 + }
34020 + err = xenbus_transaction_end(tr, 0);
34021 + } while (err == -EAGAIN);
34022 +
34023 + if (err != 0) {
34024 + EPRINTK("failed to end frontend name transaction\n");
34025 + return err;
34026 + }
34027 + return 0;
34028 +}
34029 +
34030 +
34031 +static int unpublish_frontend_name(struct xenbus_device *dev)
34032 +{
34033 + struct xenbus_transaction tr;
34034 + int err;
34035 +
34036 + do {
34037 + err = xenbus_transaction_start(&tr);
34038 + if (err != 0)
34039 + break;
34040 + err = xenbus_rm(tr, dev->nodename, "accel-frontend");
34041 + if (err != 0) {
34042 + xenbus_transaction_end(tr, 1);
34043 + break;
34044 + }
34045 + err = xenbus_transaction_end(tr, 0);
34046 + } while (err == -EAGAIN);
34047 +
34048 + return err;
34049 +}
34050 +
34051 +
34052 +static void cleanup_vnic(struct netback_accel *bend)
34053 +{
34054 + struct xenbus_device *dev;
34055 +
34056 + dev = (struct xenbus_device *)bend->hdev_data;
34057 +
34058 + DPRINTK("%s: bend %p dev %p\n", __FUNCTION__, bend, dev);
34059 +
34060 + DPRINTK("%s: Remove %p's mac from fwd table...\n",
34061 + __FUNCTION__, bend);
34062 + netback_accel_fwd_remove(bend->mac, bend->fwd_priv);
34063 +
34064 + /* Free buffer table allocations */
34065 + netback_accel_remove_buffers(bend);
34066 +
34067 + DPRINTK("%s: Release hardware resources...\n", __FUNCTION__);
34068 + if (bend->accel_shutdown)
34069 + bend->accel_shutdown(bend);
34070 +
34071 + if (bend->net_channel_irq) {
34072 + unbind_from_irqhandler(bend->net_channel_irq, dev);
34073 + bend->net_channel_irq = 0;
34074 + }
34075 +
34076 + if (bend->msg_channel_irq) {
34077 + unbind_from_irqhandler(bend->msg_channel_irq, dev);
34078 + bend->msg_channel_irq = 0;
34079 + }
34080 +
34081 + if (bend->sh_pages_unmap) {
34082 + DPRINTK("%s: Unmap grants %p\n", __FUNCTION__,
34083 + bend->sh_pages_unmap);
34084 + net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
34085 + bend->sh_pages_unmap = NULL;
34086 + bend->shared_page = NULL;
34087 + }
34088 +}
34089 +
34090 +
34091 +/*************************************************************************/
34092 +
34093 +/*
34094 + * The following code handles accelstate changes between the frontend
34095 + * and the backend. It calls setup_vnic and cleanup_vnic in matching
34096 + * pairs in response to transitions.
34097 + *
34098 + * Valid state transitions for Dom0 are as follows:
34099 + *
34100 + * Closed->Init on probe or in response to Init from domU
34101 + * Closed->Closing on error/remove
34102 + *
34103 + * Init->Connected in response to Connected from domU
34104 + * Init->Closing on error/remove or in response to Closing from domU
34105 + *
34106 + * Connected->Closing on error/remove or in response to Closing from domU
34107 + *
34108 + * Closing->Closed in response to Closed from domU
34109 + *
34110 + */
34111 +
34112 +
34113 +static void netback_accel_frontend_changed(struct xenbus_device *dev,
34114 + XenbusState frontend_state)
34115 +{
34116 + struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
34117 + XenbusState backend_state;
34118 +
34119 + DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
34120 + __FUNCTION__, xenbus_strstate(bend->frontend_state),
34121 + xenbus_strstate(frontend_state),dev->nodename, dev->otherend);
34122 +
34123 + /*
34124 + * Ignore duplicate state changes. This can happen if the
34125 + * frontend changes state twice in quick succession and the
34126 + * first watch fires in the backend after the second
34127 + * transition has completed.
34128 + */
34129 + if (bend->frontend_state == frontend_state)
34130 + return;
34131 +
34132 + bend->frontend_state = frontend_state;
34133 + backend_state = bend->backend_state;
34134 +
34135 + switch (frontend_state) {
34136 + case XenbusStateInitialising:
34137 + if (backend_state == XenbusStateClosed &&
34138 + !bend->removing)
34139 + backend_state = XenbusStateInitialising;
34140 + break;
34141 +
34142 + case XenbusStateConnected:
34143 + if (backend_state == XenbusStateInitialising) {
34144 + if (!bend->vnic_is_setup &&
34145 + setup_vnic(dev) == 0) {
34146 + bend->vnic_is_setup = 1;
34147 + backend_state = XenbusStateConnected;
34148 + } else {
34149 + backend_state = XenbusStateClosing;
34150 + }
34151 + }
34152 + break;
34153 +
34154 + case XenbusStateInitWait:
34155 + case XenbusStateInitialised:
34156 + default:
34157 + DPRINTK("Unknown state %s (%d) from frontend.\n",
34158 + xenbus_strstate(frontend_state), frontend_state);
34159 + /* Unknown state. Fall through. */
34160 + case XenbusStateClosing:
34161 + if (backend_state != XenbusStateClosed)
34162 + backend_state = XenbusStateClosing;
34163 +
34164 + /*
34165 + * The bend will now persist (with watches active) in
34166 + * case the frontend comes back again, eg. after
34167 + * frontend module reload or suspend/resume
34168 + */
34169 +
34170 + break;
34171 +
34172 + case XenbusStateUnknown:
34173 + case XenbusStateClosed:
34174 + if (bend->vnic_is_setup) {
34175 + bend->vnic_is_setup = 0;
34176 + cleanup_vnic(bend);
34177 + }
34178 +
34179 + if (backend_state == XenbusStateClosing)
34180 + backend_state = XenbusStateClosed;
34181 + break;
34182 + }
34183 +
34184 + if (backend_state != bend->backend_state) {
34185 + DPRINTK("Switching from state %s (%d) to %s (%d)\n",
34186 + xenbus_strstate(bend->backend_state),
34187 + bend->backend_state,
34188 + xenbus_strstate(backend_state), backend_state);
34189 + bend->backend_state = backend_state;
34190 + net_accel_update_state(dev, backend_state);
34191 + }
34192 +
34193 + wake_up(&bend->state_wait_queue);
34194 +}
34195 +
34196 +
34197 +/* accelstate on the frontend's xenbus node has changed */
34198 +static void bend_domu_accel_change(struct xenbus_watch *watch,
34199 + const char **vec, unsigned int len)
34200 +{
34201 + int state;
34202 + struct netback_accel *bend;
34203 +
34204 + bend = container_of(watch, struct netback_accel, domu_accel_watch);
34205 + if (bend->domu_accel_watch.node != NULL) {
34206 + struct xenbus_device *dev =
34207 + (struct xenbus_device *)bend->hdev_data;
34208 + VPRINTK("Watch matched, got dev %p otherend %p\n",
34209 + dev, dev->otherend);
34210 + /*
34211 + * dev->otherend != NULL check to protect against
34212 + * watch firing when domain goes away and we haven't
34213 + * yet cleaned up
34214 + */
34215 + if (!dev->otherend ||
34216 + !xenbus_exists(XBT_NIL, watch->node, "") ||
34217 + strncmp(dev->otherend, vec[XS_WATCH_PATH],
34218 + strlen(dev->otherend))) {
34219 + DPRINTK("Ignoring watch as otherend seems invalid\n");
34220 + return;
34221 + }
34222 +
34223 + mutex_lock(&bend->bend_mutex);
34224 +
34225 + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
34226 + &state);
34227 + netback_accel_frontend_changed(dev, state);
34228 +
34229 + mutex_unlock(&bend->bend_mutex);
34230 + }
34231 +}
34232 +
34233 +/* Setup watch on frontend's accelstate */
34234 +static int setup_domu_accel_watch(struct xenbus_device *dev,
34235 + struct netback_accel *bend)
34236 +{
34237 + int err;
34238 +
34239 + VPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
34240 +
34241 + err = xenbus_watch_path2(dev, dev->otherend, "accelstate",
34242 + &bend->domu_accel_watch,
34243 + bend_domu_accel_change);
34244 + if (err) {
34245 + EPRINTK("%s: Failed to register xenbus watch: %d\n",
34246 + __FUNCTION__, err);
34247 + goto fail;
34248 + }
34249 + return 0;
34250 + fail:
34251 + bend->domu_accel_watch.node = NULL;
34252 + return err;
34253 +}
34254 +
34255 +
34256 +int netback_accel_probe(struct xenbus_device *dev)
34257 +{
34258 + struct netback_accel *bend;
34259 + struct backend_info *binfo;
34260 + int err;
34261 +
34262 + DPRINTK("%s: passed device %s\n", __FUNCTION__, dev->nodename);
34263 +
34264 + /* Allocate structure to store all our state... */
34265 + bend = kzalloc(sizeof(struct netback_accel), GFP_KERNEL);
34266 + if (bend == NULL) {
34267 + DPRINTK("%s: no memory for bend\n", __FUNCTION__);
34268 + return -ENOMEM;
34269 + }
34270 +
34271 + mutex_init(&bend->bend_mutex);
34272 +
34273 + mutex_lock(&bend->bend_mutex);
34274 +
34275 + /* ...and store it where we can get at it */
34276 + binfo = (struct backend_info *) dev->dev.driver_data;
34277 + binfo->netback_accel_priv = bend;
34278 + /* And vice-versa */
34279 + bend->hdev_data = dev;
34280 +
34281 + DPRINTK("%s: Adding bend %p to list\n", __FUNCTION__, bend);
34282 +
34283 + init_waitqueue_head(&bend->state_wait_queue);
34284 + bend->vnic_is_setup = 0;
34285 + bend->frontend_state = XenbusStateUnknown;
34286 + bend->backend_state = XenbusStateClosed;
34287 + bend->removing = 0;
34288 +
34289 + sscanf(dev->nodename, NODENAME_PATH_FMT, &bend->far_end,
34290 + &bend->vif_num);
34291 +
34292 + err = read_nicname(dev, bend);
34293 + if (err) {
34294 + /*
34295 + * Technically not an error, just means we're not
34296 + * supposed to accelerate this
34297 + */
34298 + DPRINTK("failed to get device name\n");
34299 + goto fail_nicname;
34300 + }
34301 +
34302 + /*
34303 + * Look up the device name in the list of NICs provided by
34304 + * driverlink to get the hardware type.
34305 + */
34306 + err = netback_accel_sf_hwtype(bend);
34307 + if (err) {
34308 + /*
34309 + * Technically not an error, just means we're not
34310 + * supposed to accelerate this, probably belongs to
34311 + * some other backend
34312 + */
34313 + DPRINTK("failed to match device name\n");
34314 + goto fail_init_type;
34315 + }
34316 +
34317 + err = publish_frontend_name(dev);
34318 + if (err)
34319 + goto fail_publish;
34320 +
34321 + err = netback_accel_debugfs_create(bend);
34322 + if (err)
34323 + goto fail_debugfs;
34324 +
34325 + mutex_unlock(&bend->bend_mutex);
34326 +
34327 + err = setup_config_accel_watch(dev, bend);
34328 + if (err)
34329 + goto fail_config_watch;
34330 +
34331 + err = setup_domu_accel_watch(dev, bend);
34332 + if (err)
34333 + goto fail_domu_watch;
34334 +
34335 + /*
34336 + * Indicate to the other end that we're ready to start unless
34337 + * the watch has already fired.
34338 + */
34339 + mutex_lock(&bend->bend_mutex);
34340 + if (bend->backend_state == XenbusStateClosed) {
34341 + bend->backend_state = XenbusStateInitialising;
34342 + net_accel_update_state(dev, XenbusStateInitialising);
34343 + }
34344 + mutex_unlock(&bend->bend_mutex);
34345 +
34346 + mutex_lock(&bend_list_mutex);
34347 + link_bend(bend);
34348 + mutex_unlock(&bend_list_mutex);
34349 +
34350 + return 0;
34351 +
34352 +fail_domu_watch:
34353 +
34354 + unregister_xenbus_watch(&bend->config_accel_watch);
34355 + kfree(bend->config_accel_watch.node);
34356 +fail_config_watch:
34357 +
34358 + /*
34359 + * Flush the scheduled work queue before freeing bend to get
34360 + * rid of any pending netback_accel_msg_rx_handler()
34361 + */
34362 + flush_scheduled_work();
34363 +
34364 + mutex_lock(&bend->bend_mutex);
34365 + net_accel_update_state(dev, XenbusStateUnknown);
34366 + netback_accel_debugfs_remove(bend);
34367 +fail_debugfs:
34368 +
34369 + unpublish_frontend_name(dev);
34370 +fail_publish:
34371 +
34372 + /* No need to reverse netback_accel_sf_hwtype. */
34373 +fail_init_type:
34374 +
34375 + kfree(bend->nicname);
34376 +fail_nicname:
34377 + binfo->netback_accel_priv = NULL;
34378 + mutex_unlock(&bend->bend_mutex);
34379 + kfree(bend);
34380 + return err;
34381 +}
34382 +
34383 +
34384 +int netback_accel_remove(struct xenbus_device *dev)
34385 +{
34386 + struct backend_info *binfo;
34387 + struct netback_accel *bend;
34388 + int frontend_state;
34389 +
34390 + binfo = (struct backend_info *) dev->dev.driver_data;
34391 + bend = (struct netback_accel *) binfo->netback_accel_priv;
34392 +
34393 + DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend);
34394 +
34395 + BUG_ON(bend == NULL);
34396 +
34397 + mutex_lock(&bend_list_mutex);
34398 + unlink_bend(bend);
34399 + mutex_unlock(&bend_list_mutex);
34400 +
34401 + mutex_lock(&bend->bend_mutex);
34402 +
34403 + /* Reject any requests to connect. */
34404 + bend->removing = 1;
34405 +
34406 + /*
34407 + * Switch to closing to tell the other end that we're going
34408 + * away.
34409 + */
34410 + if (bend->backend_state != XenbusStateClosing) {
34411 + bend->backend_state = XenbusStateClosing;
34412 + net_accel_update_state(dev, XenbusStateClosing);
34413 + }
34414 +
34415 + frontend_state = (int)XenbusStateUnknown;
34416 + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
34417 + &frontend_state);
34418 +
34419 + mutex_unlock(&bend->bend_mutex);
34420 +
34421 + /*
34422 + * Wait until this end goes to the closed state. This happens
34423 + * in response to the other end going to the closed state.
34424 + * Don't bother doing this if the other end is already closed
34425 + * because if it is then there is nothing to do.
34426 + */
34427 + if (frontend_state != (int)XenbusStateClosed &&
34428 + frontend_state != (int)XenbusStateUnknown)
34429 + wait_event(bend->state_wait_queue,
34430 + bend->backend_state == XenbusStateClosed);
34431 +
34432 + unregister_xenbus_watch(&bend->domu_accel_watch);
34433 + kfree(bend->domu_accel_watch.node);
34434 +
34435 + unregister_xenbus_watch(&bend->config_accel_watch);
34436 + kfree(bend->config_accel_watch.node);
34437 +
34438 + /*
34439 + * Flush the scheduled work queue before freeing bend to get
34440 + * rid of any pending netback_accel_msg_rx_handler()
34441 + */
34442 + flush_scheduled_work();
34443 +
34444 + mutex_lock(&bend->bend_mutex);
34445 +
34446 + /* Tear down the vnic if it was set up. */
34447 + if (bend->vnic_is_setup) {
34448 + bend->vnic_is_setup = 0;
34449 + cleanup_vnic(bend);
34450 + }
34451 +
34452 + bend->backend_state = XenbusStateUnknown;
34453 + net_accel_update_state(dev, XenbusStateUnknown);
34454 +
34455 + netback_accel_debugfs_remove(bend);
34456 +
34457 + unpublish_frontend_name(dev);
34458 +
34459 + kfree(bend->nicname);
34460 +
34461 + binfo->netback_accel_priv = NULL;
34462 +
34463 + mutex_unlock(&bend->bend_mutex);
34464 +
34465 + kfree(bend);
34466 +
34467 + return 0;
34468 +}
34469 +
34470 +
34471 +void netback_accel_shutdown_bends(void)
34472 +{
34473 + mutex_lock(&bend_list_mutex);
34474 + /*
34475 + * I think we should have had a remove callback for all
34476 + * interfaces before being allowed to unload the module
34477 + */
34478 + BUG_ON(bend_list != NULL);
34479 + mutex_unlock(&bend_list_mutex);
34480 +}
34481 +
34482 +
34483 +void netback_accel_set_closing(struct netback_accel *bend)
34484 +{
34485 +
34486 + bend->backend_state = XenbusStateClosing;
34487 + net_accel_update_state((struct xenbus_device *)bend->hdev_data,
34488 + XenbusStateClosing);
34489 +}
34490 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat.h
34491 ===================================================================
34492 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
34493 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat.h 2008-02-20 09:32:49.000000000 +0100
34494 @@ -0,0 +1,53 @@
34495 +/****************************************************************************
34496 + * Copyright 2002-2005: Level 5 Networks Inc.
34497 + * Copyright 2005-2008: Solarflare Communications Inc,
34498 + * 9501 Jeronimo Road, Suite 250,
34499 + * Irvine, CA 92618, USA
34500 + *
34501 + * Maintained by Solarflare Communications
34502 + * <linux-xen-drivers@solarflare.com>
34503 + * <onload-dev@solarflare.com>
34504 + *
34505 + * This program is free software; you can redistribute it and/or modify it
34506 + * under the terms of the GNU General Public License version 2 as published
34507 + * by the Free Software Foundation, incorporated herein by reference.
34508 + *
34509 + * This program is distributed in the hope that it will be useful,
34510 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
34511 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34512 + * GNU General Public License for more details.
34513 + *
34514 + * You should have received a copy of the GNU General Public License
34515 + * along with this program; if not, write to the Free Software
34516 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34517 + ****************************************************************************
34518 + */
34519 +
34520 +/*
34521 + * \author djr
34522 + * \brief Compatability layer. Provides definitions of fundamental
34523 + * types and definitions that are used throughout CI source
34524 + * code. It does not introduce any link time dependencies,
34525 + * or include any unnecessary system headers.
34526 + */
34527 +/*! \cidoxg_include_ci */
34528 +
34529 +#ifndef __CI_COMPAT_H__
34530 +#define __CI_COMPAT_H__
34531 +
34532 +#ifdef __cplusplus
34533 +extern "C" {
34534 +#endif
34535 +
34536 +#include <ci/compat/primitive.h>
34537 +#include <ci/compat/sysdep.h>
34538 +#include <ci/compat/utils.h>
34539 +
34540 +
34541 +#ifdef __cplusplus
34542 +}
34543 +#endif
34544 +
34545 +#endif /* __CI_COMPAT_H__ */
34546 +
34547 +/*! \cidoxg_end */
34548 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc.h
34549 ===================================================================
34550 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
34551 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc.h 2008-02-20 09:32:49.000000000 +0100
34552 @@ -0,0 +1,158 @@
34553 +/****************************************************************************
34554 + * Copyright 2002-2005: Level 5 Networks Inc.
34555 + * Copyright 2005-2008: Solarflare Communications Inc,
34556 + * 9501 Jeronimo Road, Suite 250,
34557 + * Irvine, CA 92618, USA
34558 + *
34559 + * Maintained by Solarflare Communications
34560 + * <linux-xen-drivers@solarflare.com>
34561 + * <onload-dev@solarflare.com>
34562 + *
34563 + * This program is free software; you can redistribute it and/or modify it
34564 + * under the terms of the GNU General Public License version 2 as published
34565 + * by the Free Software Foundation, incorporated herein by reference.
34566 + *
34567 + * This program is distributed in the hope that it will be useful,
34568 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
34569 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34570 + * GNU General Public License for more details.
34571 + *
34572 + * You should have received a copy of the GNU General Public License
34573 + * along with this program; if not, write to the Free Software
34574 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34575 + ****************************************************************************
34576 + */
34577 +
34578 +/*! \cidoxg_include_ci_compat */
34579 +
34580 +#ifndef __CI_COMPAT_GCC_H__
34581 +#define __CI_COMPAT_GCC_H__
34582 +
34583 +
34584 +#define CI_HAVE_INT64
34585 +
34586 +
34587 +#if defined(__linux__) && defined(__KERNEL__)
34588 +
34589 +# include <linux/types.h>
34590 +
34591 +typedef __u64 ci_uint64;
34592 +typedef __s64 ci_int64;
34593 +# if BITS_PER_LONG == 32
34594 +typedef __s32 ci_ptr_arith_t;
34595 +typedef __u32 ci_uintptr_t;
34596 +# else
34597 +typedef __s64 ci_ptr_arith_t;
34598 +typedef __u64 ci_uintptr_t;
34599 +# endif
34600 +
34601 +
34602 +/* it's not obvious to me why the below is wrong for x64_64, but
34603 + * gcc seems to complain on this platform
34604 + */
34605 +# if defined(__ia64__)
34606 +# define CI_PRId64 "ld"
34607 +# define CI_PRIi64 "li"
34608 +# define CI_PRIo64 "lo"
34609 +# define CI_PRIu64 "lu"
34610 +# define CI_PRIx64 "lx"
34611 +# define CI_PRIX64 "lX"
34612 +# else
34613 +# define CI_PRId64 "lld"
34614 +# define CI_PRIi64 "lli"
34615 +# define CI_PRIo64 "llo"
34616 +# define CI_PRIu64 "llu"
34617 +# define CI_PRIx64 "llx"
34618 +# define CI_PRIX64 "llX"
34619 +# endif
34620 +
34621 +# define CI_PRId32 "d"
34622 +# define CI_PRIi32 "i"
34623 +# define CI_PRIo32 "o"
34624 +# define CI_PRIu32 "u"
34625 +# define CI_PRIx32 "x"
34626 +# define CI_PRIX32 "X"
34627 +
34628 +#else
34629 +
34630 +# include <stdint.h>
34631 +# include <inttypes.h>
34632 +
34633 +typedef uint64_t ci_uint64;
34634 +typedef int64_t ci_int64;
34635 +typedef intptr_t ci_ptr_arith_t;
34636 +typedef uintptr_t ci_uintptr_t;
34637 +
34638 +# define CI_PRId64 PRId64
34639 +# define CI_PRIi64 PRIi64
34640 +# define CI_PRIo64 PRIo64
34641 +# define CI_PRIu64 PRIu64
34642 +# define CI_PRIx64 PRIx64
34643 +# define CI_PRIX64 PRIX64
34644 +
34645 +# define CI_PRId32 PRId32
34646 +# define CI_PRIi32 PRIi32
34647 +# define CI_PRIo32 PRIo32
34648 +# define CI_PRIu32 PRIu32
34649 +# define CI_PRIx32 PRIx32
34650 +# define CI_PRIX32 PRIX32
34651 +
34652 +#endif
34653 +
34654 +
34655 +typedef ci_uint64 ci_fixed_descriptor_t;
34656 +
34657 +#define from_fixed_descriptor(desc) ((ci_uintptr_t)(desc))
34658 +#define to_fixed_descriptor(desc) ((ci_fixed_descriptor_t)(ci_uintptr_t)(desc))
34659 +
34660 +
34661 +#if __GNUC__ >= 3 && !defined(__cplusplus)
34662 +/*
34663 +** Checks that [p_mbr] has the same type as [&c_type::mbr_name].
34664 +*/
34665 +# define CI_CONTAINER(c_type, mbr_name, p_mbr) \
34666 + __builtin_choose_expr( \
34667 + __builtin_types_compatible_p(__typeof__(&((c_type*)0)->mbr_name), \
34668 + __typeof__(p_mbr)), \
34669 + __CI_CONTAINER(c_type, mbr_name, p_mbr), (void)0)
34670 +
34671 +# define ci_restrict __restrict__
34672 +#endif
34673 +
34674 +
34675 +#if !defined(__KERNEL__) || defined(__unix__)
34676 +#define CI_HAVE_NPRINTF 1
34677 +#endif
34678 +
34679 +
34680 +/* At what version was this introduced? */
34681 +#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91)
34682 +# define CI_LIKELY(t) __builtin_expect((t), 1)
34683 +# define CI_UNLIKELY(t) __builtin_expect((t), 0)
34684 +#endif
34685 +
34686 +/**********************************************************************
34687 + * Attributes
34688 + */
34689 +#if __GNUC__ >= 3 && defined(NDEBUG)
34690 +# define CI_HF __attribute__((visibility("hidden")))
34691 +# define CI_HV __attribute__((visibility("hidden")))
34692 +#else
34693 +# define CI_HF
34694 +# define CI_HV
34695 +#endif
34696 +
34697 +#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
34698 +# define ci_noinline static __attribute__((__noinline__))
34699 +/* (Linux 2.6 defines its own "noinline", so we use the "__noinline__" form) */
34700 +#else
34701 +# define ci_noinline static
34702 +#endif
34703 +
34704 +#define CI_ALIGN(x) __attribute__ ((aligned (x)))
34705 +
34706 +#define CI_PRINTF_LIKE(a,b) __attribute__((format(printf,a,b)))
34707 +
34708 +#endif /* __CI_COMPAT_GCC_H__ */
34709 +
34710 +/*! \cidoxg_end */
34711 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc_x86.h
34712 ===================================================================
34713 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
34714 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc_x86.h 2008-02-20 09:32:49.000000000 +0100
34715 @@ -0,0 +1,115 @@
34716 +/****************************************************************************
34717 + * Copyright 2002-2005: Level 5 Networks Inc.
34718 + * Copyright 2005-2008: Solarflare Communications Inc,
34719 + * 9501 Jeronimo Road, Suite 250,
34720 + * Irvine, CA 92618, USA
34721 + *
34722 + * Maintained by Solarflare Communications
34723 + * <linux-xen-drivers@solarflare.com>
34724 + * <onload-dev@solarflare.com>
34725 + *
34726 + * This program is free software; you can redistribute it and/or modify it
34727 + * under the terms of the GNU General Public License version 2 as published
34728 + * by the Free Software Foundation, incorporated herein by reference.
34729 + *
34730 + * This program is distributed in the hope that it will be useful,
34731 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
34732 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34733 + * GNU General Public License for more details.
34734 + *
34735 + * You should have received a copy of the GNU General Public License
34736 + * along with this program; if not, write to the Free Software
34737 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34738 + ****************************************************************************
34739 + */
34740 +
34741 +/*! \cidoxg_include_ci_compat */
34742 +
34743 +#ifndef __CI_COMPAT_GCC_X86_H__
34744 +#define __CI_COMPAT_GCC_X86_H__
34745 +
34746 +/*
34747 +** The facts:
34748 +**
34749 +** SSE sfence
34750 +** SSE2 lfence, mfence, pause
34751 +*/
34752 +
34753 +/*
34754 + Barriers to enforce ordering with respect to:
34755 +
34756 + normal memory use: ci_wmb, ci_rmb, ci_wmb
34757 + IO bus access use: ci_wiob, ci_riob, ci_iob
34758 +*/
34759 +#if defined(__x86_64__)
34760 +# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
34761 +#else
34762 +# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
34763 +#endif
34764 +
34765 +/* ?? measure the impact of latency of sfence on a modern processor before we
34766 + take a decision on how to integrate with respect to writecombining */
34767 +
34768 +/* DJR: I don't think we need to add "memory" here. It means the asm does
34769 +** something to memory that GCC doesn't understand. But all this does is
34770 +** commit changes that GCC thinks have already happened. NB. GCC will not
34771 +** reorder across a __volatile__ __asm__ anyway.
34772 +*/
34773 +#define ci_gcc_fence() __asm__ __volatile__ ("")
34774 +
34775 +#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
34776 +# define ci_x86_sfence() __asm__ __volatile__ ("sfence")
34777 +# define ci_x86_lfence() __asm__ __volatile__ ("lfence")
34778 +# define ci_x86_mfence() __asm__ __volatile__ ("mfence")
34779 +#else
34780 +# define ci_x86_sfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
34781 +# define ci_x86_lfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xE8")
34782 +# define ci_x86_mfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF0")
34783 +#endif
34784 +
34785 +
34786 +/* x86 processors to P4 Xeon store in-order unless executing streaming
34787 + extensions or when using writecombining
34788 +
34789 + Hence we do not define ci_wmb to use sfence by default. Requirement is that
34790 + we do not use writecombining to memory and any code which uses SSE
34791 + extensions must call sfence directly
34792 +
34793 + We need to track non intel clones which may support out of order store.
34794 +
34795 +*/
34796 +
34797 +#if CI_CPU_OOS
34798 +# if CI_CPU_HAS_SSE
34799 +# define ci_wmb() ci_x86_sfence()
34800 +# else
34801 +# define ci_wmb() ci_x86_mb()
34802 +# endif
34803 +#else
34804 +# define ci_wmb() ci_gcc_fence()
34805 +#endif
34806 +
34807 +#if CI_CPU_HAS_SSE2
34808 +# define ci_rmb() ci_x86_lfence()
34809 +# define ci_mb() ci_x86_mfence()
34810 +# define ci_riob() ci_x86_lfence()
34811 +# define ci_wiob() ci_x86_sfence()
34812 +# define ci_iob() ci_x86_mfence()
34813 +#else
34814 +# if CI_CPU_HAS_SSE
34815 +# define ci_wiob() ci_x86_sfence()
34816 +# else
34817 +# define ci_wiob() ci_x86_mb()
34818 +# endif
34819 +# define ci_rmb() ci_x86_mb()
34820 +# define ci_mb() ci_x86_mb()
34821 +# define ci_riob() ci_x86_mb()
34822 +# define ci_iob() ci_x86_mb()
34823 +#endif
34824 +
34825 +typedef unsigned long ci_phys_addr_t;
34826 +#define ci_phys_addr_fmt "%lx"
34827 +
34828 +#endif /* __CI_COMPAT_GCC_X86_H__ */
34829 +
34830 +/*! \cidoxg_end */
34831 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/primitive.h
34832 ===================================================================
34833 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
34834 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/primitive.h 2008-02-20 09:32:49.000000000 +0100
34835 @@ -0,0 +1,77 @@
34836 +/****************************************************************************
34837 + * Copyright 2002-2005: Level 5 Networks Inc.
34838 + * Copyright 2005-2008: Solarflare Communications Inc,
34839 + * 9501 Jeronimo Road, Suite 250,
34840 + * Irvine, CA 92618, USA
34841 + *
34842 + * Maintained by Solarflare Communications
34843 + * <linux-xen-drivers@solarflare.com>
34844 + * <onload-dev@solarflare.com>
34845 + *
34846 + * This program is free software; you can redistribute it and/or modify it
34847 + * under the terms of the GNU General Public License version 2 as published
34848 + * by the Free Software Foundation, incorporated herein by reference.
34849 + *
34850 + * This program is distributed in the hope that it will be useful,
34851 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
34852 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34853 + * GNU General Public License for more details.
34854 + *
34855 + * You should have received a copy of the GNU General Public License
34856 + * along with this program; if not, write to the Free Software
34857 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34858 + ****************************************************************************
34859 + */
34860 +/*! \cidoxg_include_ci_compat */
34861 +
34862 +#ifndef __CI_COMPAT_PRIMITIVE_H__
34863 +#define __CI_COMPAT_PRIMITIVE_H__
34864 +
34865 +
34866 +/**********************************************************************
34867 + * Primitive types.
34868 + */
34869 +
34870 +typedef unsigned char ci_uint8;
34871 +typedef char ci_int8;
34872 +
34873 +typedef unsigned short ci_uint16;
34874 +typedef short ci_int16;
34875 +
34876 +typedef unsigned int ci_uint32;
34877 +typedef int ci_int32;
34878 +
34879 +/* 64-bit support is platform dependent. */
34880 +
34881 +
34882 +/**********************************************************************
34883 + * Other fancy types.
34884 + */
34885 +
34886 +typedef ci_uint8 ci_octet;
34887 +
34888 +typedef enum {
34889 + CI_FALSE = 0,
34890 + CI_TRUE
34891 +} ci_boolean_t;
34892 +
34893 +
34894 +/**********************************************************************
34895 + * Some nice types you'd always assumed were standards.
34896 + * (Really, they are SYSV "standards".)
34897 + */
34898 +
34899 +#ifdef _WIN32
34900 +typedef unsigned long ulong;
34901 +typedef unsigned int uint;
34902 +typedef char* caddr_t;
34903 +#elif defined(__linux__) && defined(__KERNEL__)
34904 +#include <linux/types.h>
34905 +#elif defined(__linux__)
34906 +#include <sys/types.h>
34907 +#endif
34908 +
34909 +
34910 +#endif /* __CI_COMPAT_PRIMITIVE_H__ */
34911 +
34912 +/*! \cidoxg_end */
34913 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/sysdep.h
34914 ===================================================================
34915 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
34916 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/sysdep.h 2008-02-20 09:32:49.000000000 +0100
34917 @@ -0,0 +1,166 @@
34918 +/****************************************************************************
34919 + * Copyright 2002-2005: Level 5 Networks Inc.
34920 + * Copyright 2005-2008: Solarflare Communications Inc,
34921 + * 9501 Jeronimo Road, Suite 250,
34922 + * Irvine, CA 92618, USA
34923 + *
34924 + * Maintained by Solarflare Communications
34925 + * <linux-xen-drivers@solarflare.com>
34926 + * <onload-dev@solarflare.com>
34927 + *
34928 + * This program is free software; you can redistribute it and/or modify it
34929 + * under the terms of the GNU General Public License version 2 as published
34930 + * by the Free Software Foundation, incorporated herein by reference.
34931 + *
34932 + * This program is distributed in the hope that it will be useful,
34933 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
34934 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34935 + * GNU General Public License for more details.
34936 + *
34937 + * You should have received a copy of the GNU General Public License
34938 + * along with this program; if not, write to the Free Software
34939 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34940 + ****************************************************************************
34941 + */
34942 +
34943 +/*! \cidoxg_include_ci_compat */
34944 +
34945 +#ifndef __CI_COMPAT_SYSDEP_H__
34946 +#define __CI_COMPAT_SYSDEP_H__
34947 +
34948 +
34949 +/**********************************************************************
34950 + * Platform definition fixups.
34951 + */
34952 +
34953 +#if defined(__ci_ul_driver__) && !defined(__ci_driver__)
34954 +# define __ci_driver__
34955 +#endif
34956 +
34957 +#if defined(__ci_driver__) && !defined(__ci_ul_driver__) && \
34958 + !defined(__KERNEL__)
34959 +# define __KERNEL__
34960 +#endif
34961 +
34962 +
34963 +/**********************************************************************
34964 + * Sanity checks (no cheating!)
34965 + */
34966 +
34967 +#if defined(__KERNEL__) && !defined(__ci_driver__)
34968 +# error Insane.
34969 +#endif
34970 +
34971 +#if defined(__KERNEL__) && defined(__ci_ul_driver__)
34972 +# error Madness.
34973 +#endif
34974 +
34975 +#if defined(__unix__) && defined(_WIN32)
34976 +# error Strange.
34977 +#endif
34978 +
34979 +#if defined(__GNUC__) && defined(_MSC_VER)
34980 +# error Crazy.
34981 +#endif
34982 +
34983 +
34984 +/**********************************************************************
34985 + * Compiler and processor dependencies.
34986 + */
34987 +
34988 +#if defined(__GNUC__)
34989 +
34990 +# include <ci/compat/gcc.h>
34991 +
34992 +# if defined(__i386__)
34993 +# include <ci/compat/x86.h>
34994 +# include <ci/compat/gcc_x86.h>
34995 +# elif defined(__x86_64__)
34996 +# include <ci/compat/x86_64.h>
34997 +# include <ci/compat/gcc_x86.h>
34998 +# elif defined(__PPC__)
34999 +# include <ci/compat/ppc.h>
35000 +# include <ci/compat/gcc_ppc.h>
35001 +# elif defined(__ia64__)
35002 +# include <ci/compat/ia64.h>
35003 +# include <ci/compat/gcc_ia64.h>
35004 +# else
35005 +# error Unknown processor - GNU C
35006 +# endif
35007 +
35008 +#elif defined(_MSC_VER)
35009 +
35010 +# include <ci/compat/msvc.h>
35011 +
35012 +# if defined(__i386__)
35013 +# include <ci/compat/x86.h>
35014 +# include <ci/compat/msvc_x86.h>
35015 +# elif defined(__x86_64__)
35016 +# include <ci/compat/x86_64.h>
35017 +# include <ci/compat/msvc_x86_64.h>
35018 +# else
35019 +# error Unknown processor MSC
35020 +# endif
35021 +
35022 +#elif defined(__PGI)
35023 +
35024 +# include <ci/compat/x86.h>
35025 +# include <ci/compat/pg_x86.h>
35026 +
35027 +#elif defined(__INTEL_COMPILER)
35028 +
35029 +/* Intel compilers v7 claim to be very gcc compatible. */
35030 +# if __INTEL_COMPILER >= 700
35031 +# include <ci/compat/gcc.h>
35032 +# include <ci/compat/x86.h>
35033 +# include <ci/compat/gcc_x86.h>
35034 +# else
35035 +# error Old Intel compiler not supported. Yet.
35036 +# endif
35037 +
35038 +#else
35039 +# error Unknown compiler.
35040 +#endif
35041 +
35042 +
35043 +/**********************************************************************
35044 + * Misc stuff (that probably shouldn't be here).
35045 + */
35046 +
35047 +#ifdef __sun
35048 +# ifdef __KERNEL__
35049 +# define _KERNEL
35050 +# define _SYSCALL32
35051 +# ifdef _LP64
35052 +# define _SYSCALL32_IMPL
35053 +# endif
35054 +# else
35055 +# define _REENTRANT
35056 +# endif
35057 +#endif
35058 +
35059 +
35060 +/**********************************************************************
35061 + * Defaults for anything left undefined.
35062 + */
35063 +
35064 +#ifndef CI_LIKELY
35065 +# define CI_LIKELY(t) (t)
35066 +# define CI_UNLIKELY(t) (t)
35067 +#endif
35068 +
35069 +#ifndef ci_restrict
35070 +# define ci_restrict
35071 +#endif
35072 +
35073 +#ifndef ci_inline
35074 +# define ci_inline static inline
35075 +#endif
35076 +
35077 +#ifndef ci_noinline
35078 +# define ci_noinline static
35079 +#endif
35080 +
35081 +#endif /* __CI_COMPAT_SYSDEP_H__ */
35082 +
35083 +/*! \cidoxg_end */
35084 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/utils.h
35085 ===================================================================
35086 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
35087 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/utils.h 2008-02-20 09:32:49.000000000 +0100
35088 @@ -0,0 +1,269 @@
35089 +/****************************************************************************
35090 + * Copyright 2002-2005: Level 5 Networks Inc.
35091 + * Copyright 2005-2008: Solarflare Communications Inc,
35092 + * 9501 Jeronimo Road, Suite 250,
35093 + * Irvine, CA 92618, USA
35094 + *
35095 + * Maintained by Solarflare Communications
35096 + * <linux-xen-drivers@solarflare.com>
35097 + * <onload-dev@solarflare.com>
35098 + *
35099 + * This program is free software; you can redistribute it and/or modify it
35100 + * under the terms of the GNU General Public License version 2 as published
35101 + * by the Free Software Foundation, incorporated herein by reference.
35102 + *
35103 + * This program is distributed in the hope that it will be useful,
35104 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
35105 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35106 + * GNU General Public License for more details.
35107 + *
35108 + * You should have received a copy of the GNU General Public License
35109 + * along with this program; if not, write to the Free Software
35110 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35111 + ****************************************************************************
35112 + */
35113 +
35114 +/*
35115 + * \author djr
35116 + * \brief Handy utility macros.
35117 + * \date 2003/01/17
35118 + */
35119 +
35120 +/*! \cidoxg_include_ci_compat */
35121 +
35122 +#ifndef __CI_COMPAT_UTILS_H__
35123 +#define __CI_COMPAT_UTILS_H__
35124 +
35125 +
35126 +/**********************************************************************
35127 + * Alignment -- [align] must be a power of 2.
35128 + **********************************************************************/
35129 +
35130 + /*! Align forward onto next boundary. */
35131 +
35132 +#define CI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u))
35133 +
35134 +
35135 + /*! Align back onto prev boundary. */
35136 +
35137 +#define CI_ALIGN_BACK(p, align) ((p) & ~((align)-1u))
35138 +
35139 +
35140 + /*! How far to next boundary? */
35141 +
35142 +#define CI_ALIGN_NEEDED(p, align, signed_t) (-(signed_t)(p) & ((align)-1u))
35143 +
35144 +
35145 + /*! How far beyond prev boundary? */
35146 +
35147 +#define CI_OFFSET(p, align) ((p) & ((align)-1u))
35148 +
35149 +
35150 + /*! Does object fit in gap before next boundary? */
35151 +
35152 +#define CI_FITS(p, size, align, signed_t) \
35153 + (CI_ALIGN_NEEDED((p) + 1, (align), signed_t) + 1 >= (size))
35154 +
35155 +
35156 + /*! Align forward onto next boundary. */
35157 +
35158 +#define CI_PTR_ALIGN_FWD(p, align) \
35159 + ((char*) CI_ALIGN_FWD(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))
35160 +
35161 + /*! Align back onto prev boundary. */
35162 +
35163 +#define CI_PTR_ALIGN_BACK(p, align) \
35164 + ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))
35165 +
35166 + /*! How far to next boundary? */
35167 +
35168 +#define CI_PTR_ALIGN_NEEDED(p, align) \
35169 + CI_ALIGN_NEEDED(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)), \
35170 + ci_ptr_arith_t)
35171 +
35172 + /*! How far to next boundary? NZ = not zero i.e. give align if on boundary */
35173 +
35174 +#define CI_PTR_ALIGN_NEEDED_NZ(p, align) \
35175 + ((align) - (((char*)p) - \
35176 + ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))))
35177 +
35178 + /*! How far beyond prev boundary? */
35179 +
35180 +#define CI_PTR_OFFSET(p, align) \
35181 + CI_OFFSET(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))
35182 +
35183 +
35184 + /* Same as CI_ALIGN_FWD and CI_ALIGN_BACK. */
35185 +
35186 +#define CI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u))
35187 +
35188 +#define CI_ROUND_DOWN(i, align) ((i) & ~((align)-1u))
35189 +
35190 +
35191 +/**********************************************************************
35192 + * Byte-order
35193 + **********************************************************************/
35194 +
35195 +/* These are not flags. They are enumeration values for use with
35196 + * CI_MY_BYTE_ORDER. */
35197 +#define CI_BIG_ENDIAN 1
35198 +#define CI_LITTLE_ENDIAN 0
35199 +
35200 +/*
35201 +** Note that these byte-swapping primitives may leave junk in bits above
35202 +** the range they operate on.
35203 +**
35204 +** The CI_BSWAP_nn() routines require that bits above [nn] are zero. Use
35205 +** CI_BSWAPM_nn(x) if this cannot be guaranteed.
35206 +*/
35207 +
35208 +/* ?? May be able to improve on some of these with inline assembler on some
35209 +** platforms.
35210 +*/
35211 +
35212 +#define CI_BSWAP_16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
35213 +#define CI_BSWAPM_16(v) ((((v) & 0xff) << 8) | (((v) & 0xff00) >> 8))
35214 +
35215 +#define CI_BSWAP_32(v) (((v) >> 24) | \
35216 + (((v) & 0x00ff0000) >> 8) | \
35217 + (((v) & 0x0000ff00) << 8) | \
35218 + ((v) << 24))
35219 +#define CI_BSWAPM_32(v) ((((v) & 0xff000000) >> 24) | \
35220 + (((v) & 0x00ff0000) >> 8) | \
35221 + (((v) & 0x0000ff00) << 8) | \
35222 + ((v) << 24))
35223 +
35224 +#define CI_BSWAP_64(v) (((v) >> 56) | \
35225 + (((v) & 0x00ff000000000000) >> 40) | \
35226 + (((v) & 0x0000ff0000000000) >> 24) | \
35227 + (((v) & 0x000000ff00000000) >> 8) | \
35228 + (((v) & 0x00000000ff000000) << 8) | \
35229 + (((v) & 0x0000000000ff0000) << 24) | \
35230 + (((v) & 0x000000000000ff00) << 40) | \
35231 + ((v) << 56))
35232 +
35233 +# define CI_BSWAPPED_16_IF(c,v) ((c) ? CI_BSWAP_16(v) : (v))
35234 +# define CI_BSWAPPED_32_IF(c,v) ((c) ? CI_BSWAP_32(v) : (v))
35235 +# define CI_BSWAPPED_64_IF(c,v) ((c) ? CI_BSWAP_64(v) : (v))
35236 +# define CI_BSWAP_16_IF(c,v) do{ if((c)) (v) = CI_BSWAP_16(v); }while(0)
35237 +# define CI_BSWAP_32_IF(c,v) do{ if((c)) (v) = CI_BSWAP_32(v); }while(0)
35238 +# define CI_BSWAP_64_IF(c,v) do{ if((c)) (v) = CI_BSWAP_64(v); }while(0)
35239 +
35240 +#if (CI_MY_BYTE_ORDER == CI_LITTLE_ENDIAN)
35241 +# define CI_BSWAP_LE16(v) (v)
35242 +# define CI_BSWAP_LE32(v) (v)
35243 +# define CI_BSWAP_LE64(v) (v)
35244 +# define CI_BSWAP_BE16(v) CI_BSWAP_16(v)
35245 +# define CI_BSWAP_BE32(v) CI_BSWAP_32(v)
35246 +# define CI_BSWAP_BE64(v) CI_BSWAP_64(v)
35247 +# define CI_BSWAPM_LE16(v) (v)
35248 +# define CI_BSWAPM_LE32(v) (v)
35249 +# define CI_BSWAPM_LE64(v) (v)
35250 +# define CI_BSWAPM_BE16(v) CI_BSWAPM_16(v)
35251 +# define CI_BSWAPM_BE32(v) CI_BSWAPM_32(v)
35252 +#elif (CI_MY_BYTE_ORDER == CI_BIG_ENDIAN)
35253 +# define CI_BSWAP_BE16(v) (v)
35254 +# define CI_BSWAP_BE32(v) (v)
35255 +# define CI_BSWAP_BE64(v) (v)
35256 +# define CI_BSWAP_LE16(v) CI_BSWAP_16(v)
35257 +# define CI_BSWAP_LE32(v) CI_BSWAP_32(v)
35258 +# define CI_BSWAP_LE64(v) CI_BSWAP_64(v)
35259 +# define CI_BSWAPM_BE16(v) (v)
35260 +# define CI_BSWAPM_BE32(v) (v)
35261 +# define CI_BSWAPM_BE64(v) (v)
35262 +# define CI_BSWAPM_LE16(v) CI_BSWAPM_16(v)
35263 +# define CI_BSWAPM_LE32(v) CI_BSWAPM_32(v)
35264 +#else
35265 +# error Bad endian.
35266 +#endif
35267 +
35268 +
35269 +/**********************************************************************
35270 + * Get pointer to struct from pointer to member
35271 + **********************************************************************/
35272 +
35273 +#define CI_MEMBER_OFFSET(c_type, mbr_name) \
35274 + ((ci_uint32) (ci_uintptr_t)(&((c_type*)0)->mbr_name))
35275 +
35276 +#define CI_MEMBER_SIZE(c_type, mbr_name) \
35277 + sizeof(((c_type*)0)->mbr_name)
35278 +
35279 +#define __CI_CONTAINER(c_type, mbr_name, p_mbr) \
35280 + ( (c_type*) ((char*)(p_mbr) - CI_MEMBER_OFFSET(c_type, mbr_name)) )
35281 +
35282 +#ifndef CI_CONTAINER
35283 +# define CI_CONTAINER(t,m,p) __CI_CONTAINER(t,m,p)
35284 +#endif
35285 +
35286 +
35287 +/**********************************************************************
35288 + * Structure member initialiser.
35289 + **********************************************************************/
35290 +
35291 +#ifndef CI_STRUCT_MBR
35292 +# define CI_STRUCT_MBR(name, val) .name = val
35293 +#endif
35294 +
35295 +
35296 +/**********************************************************************
35297 + * min / max
35298 + **********************************************************************/
35299 +
35300 +#define CI_MIN(x,y) (((x) < (y)) ? (x) : (y))
35301 +#define CI_MAX(x,y) (((x) > (y)) ? (x) : (y))
35302 +
35303 +/**********************************************************************
35304 + * abs
35305 + **********************************************************************/
35306 +
35307 +#define CI_ABS(x) (((x) < 0) ? -(x) : (x))
35308 +
35309 +/**********************************************************************
35310 + * Conditional debugging
35311 + **********************************************************************/
35312 +
35313 +#ifdef NDEBUG
35314 +# define CI_DEBUG(x)
35315 +# define CI_NDEBUG(x) x
35316 +# define CI_IF_DEBUG(y,n) (n)
35317 +# define CI_DEBUG_ARG(x)
35318 +#else
35319 +# define CI_DEBUG(x) x
35320 +# define CI_NDEBUG(x)
35321 +# define CI_IF_DEBUG(y,n) (y)
35322 +# define CI_DEBUG_ARG(x) ,x
35323 +#endif
35324 +
35325 +#ifdef __KERNEL__
35326 +#define CI_KERNEL_ARG(x) ,x
35327 +#else
35328 +#define CI_KERNEL_ARG(x)
35329 +#endif
35330 +
35331 +#ifdef _WIN32
35332 +# define CI_KERNEL_ARG_WIN(x) CI_KERNEL_ARG(x)
35333 +# define CI_ARG_WIN(x) ,x
35334 +#else
35335 +# define CI_KERNEL_ARG_WIN(x)
35336 +# define CI_ARG_WIN(x)
35337 +#endif
35338 +
35339 +#ifdef __unix__
35340 +# define CI_KERNEL_ARG_UNIX(x) CI_KERNEL_ARG(x)
35341 +# define CI_ARG_UNIX(x) ,x
35342 +#else
35343 +# define CI_KERNEL_ARG_UNIX(x)
35344 +# define CI_ARG_UNIX(x)
35345 +#endif
35346 +
35347 +#ifdef __linux__
35348 +# define CI_KERNEL_ARG_LINUX(x) CI_KERNEL_ARG(x)
35349 +# define CI_ARG_LINUX(x) ,x
35350 +#else
35351 +# define CI_KERNEL_ARG_LINUX(x)
35352 +# define CI_ARG_LINUX(x)
35353 +#endif
35354 +
35355 +
35356 +#endif /* __CI_COMPAT_UTILS_H__ */
35357 +/*! \cidoxg_end */
35358 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86.h
35359 ===================================================================
35360 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
35361 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86.h 2008-02-20 09:32:49.000000000 +0100
35362 @@ -0,0 +1,48 @@
35363 +/****************************************************************************
35364 + * Copyright 2002-2005: Level 5 Networks Inc.
35365 + * Copyright 2005-2008: Solarflare Communications Inc,
35366 + * 9501 Jeronimo Road, Suite 250,
35367 + * Irvine, CA 92618, USA
35368 + *
35369 + * Maintained by Solarflare Communications
35370 + * <linux-xen-drivers@solarflare.com>
35371 + * <onload-dev@solarflare.com>
35372 + *
35373 + * This program is free software; you can redistribute it and/or modify it
35374 + * under the terms of the GNU General Public License version 2 as published
35375 + * by the Free Software Foundation, incorporated herein by reference.
35376 + *
35377 + * This program is distributed in the hope that it will be useful,
35378 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
35379 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35380 + * GNU General Public License for more details.
35381 + *
35382 + * You should have received a copy of the GNU General Public License
35383 + * along with this program; if not, write to the Free Software
35384 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35385 + ****************************************************************************
35386 + */
35387 +
35388 +/*! \cidoxg_include_ci_compat */
35389 +
35390 +#ifndef __CI_COMPAT_X86_H__
35391 +#define __CI_COMPAT_X86_H__
35392 +
35393 +
35394 +#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN
35395 +
35396 +#define CI_WORD_SIZE 4
35397 +#define CI_PTR_SIZE 4
35398 +
35399 +#define CI_PAGE_SIZE 4096
35400 +#define CI_PAGE_SHIFT 12
35401 +#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1))
35402 +
35403 +#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */
35404 +#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */
35405 +#define CI_CPU_OOS 0 /* CPU does out of order stores */
35406 +
35407 +
35408 +#endif /* __CI_COMPAT_X86_H__ */
35409 +
35410 +/*! \cidoxg_end */
35411 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86_64.h
35412 ===================================================================
35413 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
35414 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86_64.h 2008-02-20 09:32:49.000000000 +0100
35415 @@ -0,0 +1,54 @@
35416 +/****************************************************************************
35417 + * Copyright 2002-2005: Level 5 Networks Inc.
35418 + * Copyright 2005-2008: Solarflare Communications Inc,
35419 + * 9501 Jeronimo Road, Suite 250,
35420 + * Irvine, CA 92618, USA
35421 + *
35422 + * Maintained by Solarflare Communications
35423 + * <linux-xen-drivers@solarflare.com>
35424 + * <onload-dev@solarflare.com>
35425 + *
35426 + * This program is free software; you can redistribute it and/or modify it
35427 + * under the terms of the GNU General Public License version 2 as published
35428 + * by the Free Software Foundation, incorporated herein by reference.
35429 + *
35430 + * This program is distributed in the hope that it will be useful,
35431 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
35432 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35433 + * GNU General Public License for more details.
35434 + *
35435 + * You should have received a copy of the GNU General Public License
35436 + * along with this program; if not, write to the Free Software
35437 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35438 + ****************************************************************************
35439 + */
35440 +
35441 +/*
35442 + * \author djr
35443 + * \brief Arch stuff for AMD x86_64.
35444 + * \date 2004/08/17
35445 + */
35446 +
35447 +/*! \cidoxg_include_ci_compat */
35448 +#ifndef __CI_COMPAT_X86_64_H__
35449 +#define __CI_COMPAT_X86_64_H__
35450 +
35451 +
35452 +#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN
35453 +
35454 +#define CI_WORD_SIZE 8
35455 +#define CI_PTR_SIZE 8
35456 +
35457 +#define CI_PAGE_SIZE 4096
35458 +#define CI_PAGE_SHIFT 12
35459 +#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1))
35460 +
35461 +#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */
35462 +
35463 +/* SSE2 disabled while investigating BUG1060 */
35464 +#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */
35465 +#define CI_CPU_OOS 0 /* CPU does out of order stores */
35466 +
35467 +
35468 +#endif /* __CI_COMPAT_X86_64_H__ */
35469 +/*! \cidoxg_end */
35470 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h
35471 ===================================================================
35472 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
35473 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h 2008-02-20 09:32:49.000000000 +0100
35474 @@ -0,0 +1,276 @@
35475 +/****************************************************************************
35476 + * Driver for Solarflare network controllers -
35477 + * resource management for Xen backend, OpenOnload, etc
35478 + * (including support for SFE4001 10GBT NIC)
35479 + *
35480 + * This file contains public EFX VI API to Solarflare resource manager.
35481 + *
35482 + * Copyright 2005-2007: Solarflare Communications Inc,
35483 + * 9501 Jeronimo Road, Suite 250,
35484 + * Irvine, CA 92618, USA
35485 + *
35486 + * Developed and maintained by Solarflare Communications:
35487 + * <linux-xen-drivers@solarflare.com>
35488 + * <onload-dev@solarflare.com>
35489 + *
35490 + *
35491 + * This program is free software; you can redistribute it and/or modify it
35492 + * under the terms of the GNU General Public License version 2 as published
35493 + * by the Free Software Foundation, incorporated herein by reference.
35494 + *
35495 + * This program is distributed in the hope that it will be useful,
35496 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
35497 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35498 + * GNU General Public License for more details.
35499 + *
35500 + * You should have received a copy of the GNU General Public License
35501 + * along with this program; if not, write to the Free Software
35502 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35503 + ****************************************************************************
35504 + */
35505 +
35506 +#ifndef __CI_DRIVER_RESOURCE_EFX_VI_H__
35507 +#define __CI_DRIVER_RESOURCE_EFX_VI_H__
35508 +
35509 +/* Default size of event queue in the efx_vi resource. Copied from
35510 + * CI_CFG_NETIF_EVENTQ_SIZE */
35511 +#define EFX_VI_EVENTQ_SIZE_DEFAULT 1024
35512 +
35513 +extern int efx_vi_eventq_size;
35514 +
35515 +/**************************************************************************
35516 + * efx_vi_state types, allocation and free
35517 + **************************************************************************/
35518 +
35519 +/*! Handle for refering to a efx_vi */
35520 +struct efx_vi_state;
35521 +
35522 +/*!
35523 + * Allocate an efx_vi, including event queue and pt_endpoint
35524 + *
35525 + * \param vih_out Pointer to a handle that is set on success
35526 + * \param nic_index Index of NIC to apply this resource to
35527 + * \return Zero on success (and vih_out set), non-zero on failure.
35528 + */
35529 +extern int
35530 +efx_vi_alloc(struct efx_vi_state **vih_out, int nic_index);
35531 +
35532 +/*!
35533 + * Free a previously allocated efx_vi
35534 + *
35535 + * \param vih The handle of the efx_vi to free
35536 + */
35537 +extern void
35538 +efx_vi_free(struct efx_vi_state *vih);
35539 +
35540 +/*!
35541 + * Reset a previously allocated efx_vi
35542 + *
35543 + * \param vih The handle of the efx_vi to reset
35544 + */
35545 +extern void
35546 +efx_vi_reset(struct efx_vi_state *vih);
35547 +
35548 +/**************************************************************************
35549 + * efx_vi_eventq types and functions
35550 + **************************************************************************/
35551 +
35552 +/*!
35553 + * Register a function to receive callbacks when event queue timeouts
35554 + * or wakeups occur. Only one function per efx_vi can be registered
35555 + * at once.
35556 + *
35557 + * \param vih The handle to identify the efx_vi
35558 + * \param callback The function to callback
35559 + * \param context An argument to pass to the callback function
35560 + * \return Zero on success, non-zero on failure.
35561 + */
35562 +extern int
35563 +efx_vi_eventq_register_callback(struct efx_vi_state *vih,
35564 + void (*callback)(void *context, int is_timeout),
35565 + void *context);
35566 +
35567 +/*!
35568 + * Remove the current eventq timeout or wakeup callback function
35569 + *
35570 + * \param vih The handle to identify the efx_vi
35571 + * \return Zero on success, non-zero on failure
35572 + */
35573 +extern int
35574 +efx_vi_eventq_kill_callback(struct efx_vi_state *vih);
35575 +
35576 +/**************************************************************************
35577 + * efx_vi_dma_map types and functions
35578 + **************************************************************************/
35579 +
35580 +/*!
35581 + * Handle for refering to a efx_vi
35582 + */
35583 +struct efx_vi_dma_map_state;
35584 +
35585 +/*!
35586 + * Map a list of buffer pages so they are registered with the hardware
35587 + *
35588 + * \param vih The handle to identify the efx_vi
35589 + * \param addrs An array of page pointers to map
35590 + * \param n_addrs Length of the page pointer array. Must be a power of two.
35591 + * \param dmh_out Set on success to a handle used to refer to this mapping
35592 + * \return Zero on success, non-zero on failure.
35593 + */
35594 +extern int
35595 +efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages,
35596 + int n_pages, struct efx_vi_dma_map_state **dmh_out);
35597 +extern int
35598 +efx_vi_dma_map_addrs(struct efx_vi_state *vih,
35599 + unsigned long long *dev_bus_addrs, int n_pages,
35600 + struct efx_vi_dma_map_state **dmh_out);
35601 +
35602 +/*!
35603 + * Unmap a previously mapped set of pages so they are no longer registered
35604 + * with the hardware.
35605 + *
35606 + * \param vih The handle to identify the efx_vi
35607 + * \param dmh The handle to identify the dma mapping
35608 + */
35609 +extern void
35610 +efx_vi_dma_unmap_pages(struct efx_vi_state *vih,
35611 + struct efx_vi_dma_map_state *dmh);
35612 +extern void
35613 +efx_vi_dma_unmap_addrs(struct efx_vi_state *vih,
35614 + struct efx_vi_dma_map_state *dmh);
35615 +
35616 +/*!
35617 + * Retrieve the buffer address of the mapping
35618 + *
35619 + * \param vih The handle to identify the efx_vi
35620 + * \param dmh The handle to identify the buffer mapping
35621 + * \return The buffer address on success, or zero on failure
35622 + */
35623 +extern unsigned
35624 +efx_vi_dma_get_map_addr(struct efx_vi_state *vih,
35625 + struct efx_vi_dma_map_state *dmh);
35626 +
35627 +/**************************************************************************
35628 + * efx_vi filter functions
35629 + **************************************************************************/
35630 +
35631 +#define EFX_VI_STATIC_FILTERS 32
35632 +
35633 +/*! Handle to refer to a filter instance */
35634 +struct filter_resource_t;
35635 +
35636 +/*!
35637 + * Allocate and add a filter
35638 + *
35639 + * \param vih The handle to identify the efx_vi
35640 + * \param protocol The protocol of the new filter: UDP or TCP
35641 + * \param ip_addr_be32 The local ip address of the filter
35642 + * \param port_le16 The local port of the filter
35643 + * \param fh_out Set on success to be a handle to refer to this filter
35644 + * \return Zero on success, non-zero on failure.
35645 + */
35646 +extern int
35647 +efx_vi_filter(struct efx_vi_state *vih, int protocol, unsigned ip_addr_be32,
35648 + int port_le16, struct filter_resource_t **fh_out);
35649 +
35650 +/*!
35651 + * Remove a filter and free resources associated with it
35652 + *
35653 + * \param vih The handle to identify the efx_vi
35654 + * \param fh The handle to identify the filter
35655 + * \return Zero on success, non-zero on failure
35656 + */
35657 +extern int
35658 +efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh);
35659 +
35660 +/**************************************************************************
35661 + * efx_vi hw resources types and functions
35662 + **************************************************************************/
35663 +
35664 +/*! Constants for the type field in efx_vi_hw_resource */
35665 +#define EFX_VI_HW_RESOURCE_TXDMAQ 0x0 /* PFN of TX DMA Q */
35666 +#define EFX_VI_HW_RESOURCE_RXDMAQ 0x1 /* PFN of RX DMA Q */
35667 +#define EFX_VI_HW_RESOURCE_TXBELL 0x2 /* PFN of TX Doorbell (EF1) */
35668 +#define EFX_VI_HW_RESOURCE_RXBELL 0x3 /* PFN of RX Doorbell (EF1) */
35669 +#define EFX_VI_HW_RESOURCE_EVQTIMER 0x4 /* Address of event q timer */
35670 +
35671 +/* Address of event q pointer (EF1) */
35672 +#define EFX_VI_HW_RESOURCE_EVQPTR 0x5
35673 +/* Address of register pointer (Falcon A) */
35674 +#define EFX_VI_HW_RESOURCE_EVQRPTR 0x6
35675 +/* Offset of register pointer (Falcon B) */
35676 +#define EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET 0x7
35677 +/* Address of mem KVA */
35678 +#define EFX_VI_HW_RESOURCE_EVQMEMKVA 0x8
35679 +/* PFN of doorbell page (Falcon) */
35680 +#define EFX_VI_HW_RESOURCE_BELLPAGE 0x9
35681 +
35682 +/*! How large an array to allocate for the get_() functions - smaller
35683 + than the total number of constants as some are mutually exclusive */
35684 +#define EFX_VI_HW_RESOURCE_MAXSIZE 0x7
35685 +
35686 +/*! Constants for the mem_type field in efx_vi_hw_resource */
35687 +#define EFX_VI_HW_RESOURCE_IOBUFFER 0 /* Host memory */
35688 +#define EFX_VI_HW_RESOURCE_PERIPHERAL 1 /* Card memory/registers */
35689 +
35690 +/*!
35691 + * Data structure providing information on a hardware resource mapping
35692 + */
35693 +struct efx_vi_hw_resource {
35694 + u8 type; /*!< What this resource represents */
35695 + u8 mem_type; /*!< What type of memory is it in, eg,
35696 + * host or iomem */
35697 + u8 more_to_follow; /*!< Is this part of a multi-region resource */
35698 + u32 length; /*!< Length of the resource in bytes */
35699 + unsigned long address; /*!< Address of this resource */
35700 +};
35701 +
35702 +/*!
35703 + * Metadata concerning the list of hardware resource mappings
35704 + */
35705 +struct efx_vi_hw_resource_metadata {
35706 + int version;
35707 + int evq_order;
35708 + int evq_offs;
35709 + int evq_capacity;
35710 + int instance;
35711 + unsigned rx_capacity;
35712 + unsigned tx_capacity;
35713 + int nic_arch;
35714 + int nic_revision;
35715 + char nic_variant;
35716 +};
35717 +
35718 +/*!
35719 + * Obtain a list of hardware resource mappings, using virtual addresses
35720 + *
35721 + * \param vih The handle to identify the efx_vi
35722 + * \param mdata Pointer to a structure to receive the metadata
35723 + * \param hw_res_array An array to receive the list of hardware resources
35724 + * \param length The length of hw_res_array. Updated on success to contain
35725 + * the number of entries in the supplied array that were used.
35726 + * \return Zero on success, non-zero on failure
35727 + */
35728 +extern int
35729 +efx_vi_hw_resource_get_virt(struct efx_vi_state *vih,
35730 + struct efx_vi_hw_resource_metadata *mdata,
35731 + struct efx_vi_hw_resource *hw_res_array,
35732 + int *length);
35733 +
35734 +/*!
35735 + * Obtain a list of hardware resource mappings, using physical addresses
35736 + *
35737 + * \param vih The handle to identify the efx_vi
35738 + * \param mdata Pointer to a structure to receive the metadata
35739 + * \param hw_res_array An array to receive the list of hardware resources
35740 + * \param length The length of hw_res_array. Updated on success to contain
35741 + * the number of entries in the supplied array that were used.
35742 + * \return Zero on success, non-zero on failure
35743 + */
35744 +extern int
35745 +efx_vi_hw_resource_get_phys(struct efx_vi_state *vih,
35746 + struct efx_vi_hw_resource_metadata *mdata,
35747 + struct efx_vi_hw_resource *hw_res_array,
35748 + int *length);
35749 +
35750 +#endif /* __CI_DRIVER_RESOURCE_EFX_VI_H__ */
35751 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common.h
35752 ===================================================================
35753 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
35754 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common.h 2008-02-20 09:32:49.000000000 +0100
35755 @@ -0,0 +1,102 @@
35756 +/****************************************************************************
35757 + * Driver for Solarflare network controllers -
35758 + * resource management for Xen backend, OpenOnload, etc
35759 + * (including support for SFE4001 10GBT NIC)
35760 + *
35761 + * This file provides API of the efhw library which may be used both from
35762 + * the kernel and from the user-space code.
35763 + *
35764 + * Copyright 2005-2007: Solarflare Communications Inc,
35765 + * 9501 Jeronimo Road, Suite 250,
35766 + * Irvine, CA 92618, USA
35767 + *
35768 + * Developed and maintained by Solarflare Communications:
35769 + * <linux-xen-drivers@solarflare.com>
35770 + * <onload-dev@solarflare.com>
35771 + *
35772 + * Certain parts of the driver were implemented by
35773 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
35774 + * OKTET Labs Ltd, Russia,
35775 + * http://oktetlabs.ru, <info@oktetlabs.ru>
35776 + * by request of Solarflare Communications
35777 + *
35778 + *
35779 + * This program is free software; you can redistribute it and/or modify it
35780 + * under the terms of the GNU General Public License version 2 as published
35781 + * by the Free Software Foundation, incorporated herein by reference.
35782 + *
35783 + * This program is distributed in the hope that it will be useful,
35784 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
35785 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35786 + * GNU General Public License for more details.
35787 + *
35788 + * You should have received a copy of the GNU General Public License
35789 + * along with this program; if not, write to the Free Software
35790 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35791 + ****************************************************************************
35792 + */
35793 +
35794 +#ifndef __CI_EFHW_COMMON_H__
35795 +#define __CI_EFHW_COMMON_H__
35796 +
35797 +#include <ci/efhw/common_sysdep.h>
35798 +
35799 +enum efhw_arch {
35800 + EFHW_ARCH_FALCON,
35801 + EFHW_ARCH_SIENA,
35802 +};
35803 +
35804 +typedef uint32_t efhw_buffer_addr_t;
35805 +#define EFHW_BUFFER_ADDR_FMT "[ba:%"PRIx32"]"
35806 +
35807 +/*! Comment? */
35808 +typedef union {
35809 + uint64_t u64;
35810 + struct {
35811 + uint32_t a;
35812 + uint32_t b;
35813 + } opaque;
35814 + struct {
35815 + uint32_t code;
35816 + uint32_t status;
35817 + } ev1002;
35818 +} efhw_event_t;
35819 +
35820 +/* Flags for TX/RX queues */
35821 +#define EFHW_VI_JUMBO_EN 0x01 /*! scatter RX over multiple desc */
35822 +#define EFHW_VI_ISCSI_RX_HDIG_EN 0x02 /*! iscsi rx header digest */
35823 +#define EFHW_VI_ISCSI_TX_HDIG_EN 0x04 /*! iscsi tx header digest */
35824 +#define EFHW_VI_ISCSI_RX_DDIG_EN 0x08 /*! iscsi rx data digest */
35825 +#define EFHW_VI_ISCSI_TX_DDIG_EN 0x10 /*! iscsi tx data digest */
35826 +#define EFHW_VI_TX_PHYS_ADDR_EN 0x20 /*! TX physical address mode */
35827 +#define EFHW_VI_RX_PHYS_ADDR_EN 0x40 /*! RX physical address mode */
35828 +#define EFHW_VI_RM_WITH_INTERRUPT 0x80 /*! VI with an interrupt */
35829 +#define EFHW_VI_TX_IP_CSUM_DIS 0x100 /*! enable ip checksum generation */
35830 +#define EFHW_VI_TX_TCPUDP_CSUM_DIS 0x200 /*! enable tcp/udp checksum
35831 + generation */
35832 +#define EFHW_VI_TX_TCPUDP_ONLY 0x400 /*! drop non-tcp/udp packets */
35833 +
35834 +/* Types of hardware filter */
35835 +/* Each of these values implicitly selects scatter filters on B0 - or in
35836 + EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK if a non-scatter filter is required */
35837 +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD (0) /* dest host only */
35838 +#define EFHW_IP_FILTER_TYPE_UDP_FULL (1) /* dest host and port */
35839 +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD (2) /* dest based filter */
35840 +#define EFHW_IP_FILTER_TYPE_TCP_FULL (3) /* src filter */
35841 +/* Same again, but with RSS (for B0 only) */
35842 +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD_RSS_B0 (4)
35843 +#define EFHW_IP_FILTER_TYPE_UDP_FULL_RSS_B0 (5)
35844 +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD_RSS_B0 (6)
35845 +#define EFHW_IP_FILTER_TYPE_TCP_FULL_RSS_B0 (7)
35846 +
35847 +#define EFHW_IP_FILTER_TYPE_FULL_MASK (0x1) /* Mask for full / wildcard */
35848 +#define EFHW_IP_FILTER_TYPE_TCP_MASK (0x2) /* Mask for TCP type */
35849 +#define EFHW_IP_FILTER_TYPE_RSS_B0_MASK (0x4) /* Mask for B0 RSS enable */
35850 +#define EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK (0x8) /* Mask for B0 SCATTER dsbl */
35851 +
35852 +#define EFHW_IP_FILTER_TYPE_MASK (0xffff) /* Mask of types above */
35853 +
35854 +#define EFHW_IP_FILTER_BROADCAST (0x10000) /* driverlink filter
35855 + support */
35856 +
35857 +#endif /* __CI_EFHW_COMMON_H__ */
35858 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h
35859 ===================================================================
35860 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
35861 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h 2008-02-20 09:32:49.000000000 +0100
35862 @@ -0,0 +1,67 @@
35863 +/****************************************************************************
35864 + * Driver for Solarflare network controllers -
35865 + * resource management for Xen backend, OpenOnload, etc
35866 + * (including support for SFE4001 10GBT NIC)
35867 + *
35868 + * This file provides version-independent Linux kernel API for
35869 + * userland-to-kernel interfaces.
35870 + * Only kernels >=2.6.9 are supported.
35871 + *
35872 + * Copyright 2005-2007: Solarflare Communications Inc,
35873 + * 9501 Jeronimo Road, Suite 250,
35874 + * Irvine, CA 92618, USA
35875 + *
35876 + * Developed and maintained by Solarflare Communications:
35877 + * <linux-xen-drivers@solarflare.com>
35878 + * <onload-dev@solarflare.com>
35879 + *
35880 + * Certain parts of the driver were implemented by
35881 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
35882 + * OKTET Labs Ltd, Russia,
35883 + * http://oktetlabs.ru, <info@oktetlabs.ru>
35884 + * by request of Solarflare Communications
35885 + *
35886 + *
35887 + * This program is free software; you can redistribute it and/or modify it
35888 + * under the terms of the GNU General Public License version 2 as published
35889 + * by the Free Software Foundation, incorporated herein by reference.
35890 + *
35891 + * This program is distributed in the hope that it will be useful,
35892 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
35893 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35894 + * GNU General Public License for more details.
35895 + *
35896 + * You should have received a copy of the GNU General Public License
35897 + * along with this program; if not, write to the Free Software
35898 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35899 + ****************************************************************************
35900 + */
35901 +
35902 +#ifndef __CI_EFHW_COMMON_LINUX_H__
35903 +#define __CI_EFHW_COMMON_LINUX_H__
35904 +
35905 +#include <linux/types.h>
35906 +#include <linux/version.h>
35907 +
35908 +/* Dirty hack, but Linux kernel does not provide DMA_ADDR_T_FMT */
35909 +#if BITS_PER_LONG == 64 || defined(CONFIG_HIGHMEM64G)
35910 +#define DMA_ADDR_T_FMT "%llx"
35911 +#else
35912 +#define DMA_ADDR_T_FMT "%x"
35913 +#endif
35914 +
35915 +/* Linux kernel also does not provide PRIx32... Sigh. */
35916 +#define PRIx32 "x"
35917 +#define PRIx64 "llx"
35918 +
35919 +
35920 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
35921 +enum {
35922 + false = 0,
35923 + true = 1
35924 +};
35925 +
35926 +typedef _Bool bool;
35927 +#endif /* LINUX_VERSION_CODE < 2.6.19 */
35928 +
35929 +#endif /* __CI_EFHW_COMMON_LINUX_H__ */
35930 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/debug.h
35931 ===================================================================
35932 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
35933 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/debug.h 2008-02-20 09:32:49.000000000 +0100
35934 @@ -0,0 +1,84 @@
35935 +/****************************************************************************
35936 + * Driver for Solarflare network controllers -
35937 + * resource management for Xen backend, OpenOnload, etc
35938 + * (including support for SFE4001 10GBT NIC)
35939 + *
35940 + * This file provides debug-related API for efhw library using Linux kernel
35941 + * primitives.
35942 + *
35943 + * Copyright 2005-2007: Solarflare Communications Inc,
35944 + * 9501 Jeronimo Road, Suite 250,
35945 + * Irvine, CA 92618, USA
35946 + *
35947 + * Developed and maintained by Solarflare Communications:
35948 + * <linux-xen-drivers@solarflare.com>
35949 + * <onload-dev@solarflare.com>
35950 + *
35951 + * Certain parts of the driver were implemented by
35952 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
35953 + * OKTET Labs Ltd, Russia,
35954 + * http://oktetlabs.ru, <info@oktetlabs.ru>
35955 + * by request of Solarflare Communications
35956 + *
35957 + *
35958 + * This program is free software; you can redistribute it and/or modify it
35959 + * under the terms of the GNU General Public License version 2 as published
35960 + * by the Free Software Foundation, incorporated herein by reference.
35961 + *
35962 + * This program is distributed in the hope that it will be useful,
35963 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
35964 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35965 + * GNU General Public License for more details.
35966 + *
35967 + * You should have received a copy of the GNU General Public License
35968 + * along with this program; if not, write to the Free Software
35969 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35970 + ****************************************************************************
35971 + */
35972 +
35973 +#ifndef __CI_EFHW_DEBUG_LINUX_H__
35974 +#define __CI_EFHW_DEBUG_LINUX_H__
35975 +
35976 +#define EFHW_PRINTK_PREFIX "[sfc efhw] "
35977 +
35978 +#define EFHW_PRINTK(level, fmt, ...) \
35979 + printk(level EFHW_PRINTK_PREFIX fmt "\n", __VA_ARGS__)
35980 +
35981 +/* Following macros should be used with non-zero format parameters
35982 + * due to __VA_ARGS__ limitations. Use "%s" with __FUNCTION__ if you can't
35983 + * find better parameters. */
35984 +#define EFHW_ERR(fmt, ...) EFHW_PRINTK(KERN_ERR, fmt, __VA_ARGS__)
35985 +#define EFHW_WARN(fmt, ...) EFHW_PRINTK(KERN_WARNING, fmt, __VA_ARGS__)
35986 +#define EFHW_NOTICE(fmt, ...) EFHW_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__)
35987 +#if 0 && !defined(NDEBUG)
35988 +#define EFHW_TRACE(fmt, ...) EFHW_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__)
35989 +#else
35990 +#define EFHW_TRACE(fmt, ...)
35991 +#endif
35992 +
35993 +#ifndef NDEBUG
35994 +#define EFHW_ASSERT(cond) BUG_ON((cond) == 0)
35995 +#define EFHW_DO_DEBUG(expr) expr
35996 +#else
35997 +#define EFHW_ASSERT(cond)
35998 +#define EFHW_DO_DEBUG(expr)
35999 +#endif
36000 +
36001 +#define EFHW_TEST(expr) \
36002 + do { \
36003 + if (unlikely(!(expr))) \
36004 + BUG(); \
36005 + } while (0)
36006 +
36007 +/* Build time asserts. We paste the line number into the type name
36008 + * so that the macro can be used more than once per file even if the
36009 + * compiler objects to multiple identical typedefs. Collisions
36010 + * between use in different header files is still possible. */
36011 +#ifndef EFHW_BUILD_ASSERT
36012 +#define __EFHW_BUILD_ASSERT_NAME(_x) __EFHW_BUILD_ASSERT_ILOATHECPP(_x)
36013 +#define __EFHW_BUILD_ASSERT_ILOATHECPP(_x) __EFHW_BUILD_ASSERT__ ##_x
36014 +#define EFHW_BUILD_ASSERT(e) \
36015 + typedef char __EFHW_BUILD_ASSERT_NAME(__LINE__)[(e) ? 1 : -1]
36016 +#endif
36017 +
36018 +#endif /* __CI_EFHW_DEBUG_LINUX_H__ */
36019 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_config.h
36020 ===================================================================
36021 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36022 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_config.h 2008-02-20 09:32:49.000000000 +0100
36023 @@ -0,0 +1,43 @@
36024 +/****************************************************************************
36025 + * Driver for Solarflare network controllers -
36026 + * resource management for Xen backend, OpenOnload, etc
36027 + * (including support for SFE4001 10GBT NIC)
36028 + *
36029 + * This file provides some limits used in both kernel and userland code.
36030 + *
36031 + * Copyright 2005-2007: Solarflare Communications Inc,
36032 + * 9501 Jeronimo Road, Suite 250,
36033 + * Irvine, CA 92618, USA
36034 + *
36035 + * Developed and maintained by Solarflare Communications:
36036 + * <linux-xen-drivers@solarflare.com>
36037 + * <onload-dev@solarflare.com>
36038 + *
36039 + * Certain parts of the driver were implemented by
36040 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36041 + * OKTET Labs Ltd, Russia,
36042 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36043 + * by request of Solarflare Communications
36044 + *
36045 + *
36046 + * This program is free software; you can redistribute it and/or modify it
36047 + * under the terms of the GNU General Public License version 2 as published
36048 + * by the Free Software Foundation, incorporated herein by reference.
36049 + *
36050 + * This program is distributed in the hope that it will be useful,
36051 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36052 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36053 + * GNU General Public License for more details.
36054 + *
36055 + * You should have received a copy of the GNU General Public License
36056 + * along with this program; if not, write to the Free Software
36057 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36058 + ****************************************************************************
36059 + */
36060 +
36061 +#ifndef __CI_EFHW_EFAB_CONFIG_H__
36062 +#define __CI_EFHW_EFAB_CONFIG_H__
36063 +
36064 +#define EFHW_MAX_NR_DEVS 5 /* max number of efhw devices supported */
36065 +
36066 +#endif /* __CI_EFHW_EFAB_CONFIG_H__ */
36067 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_types.h
36068 ===================================================================
36069 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36070 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_types.h 2008-02-20 09:32:49.000000000 +0100
36071 @@ -0,0 +1,342 @@
36072 +/****************************************************************************
36073 + * Driver for Solarflare network controllers -
36074 + * resource management for Xen backend, OpenOnload, etc
36075 + * (including support for SFE4001 10GBT NIC)
36076 + *
36077 + * This file provides struct efhw_nic and some related types.
36078 + *
36079 + * Copyright 2005-2007: Solarflare Communications Inc,
36080 + * 9501 Jeronimo Road, Suite 250,
36081 + * Irvine, CA 92618, USA
36082 + *
36083 + * Developed and maintained by Solarflare Communications:
36084 + * <linux-xen-drivers@solarflare.com>
36085 + * <onload-dev@solarflare.com>
36086 + *
36087 + * Certain parts of the driver were implemented by
36088 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36089 + * OKTET Labs Ltd, Russia,
36090 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36091 + * by request of Solarflare Communications
36092 + *
36093 + *
36094 + * This program is free software; you can redistribute it and/or modify it
36095 + * under the terms of the GNU General Public License version 2 as published
36096 + * by the Free Software Foundation, incorporated herein by reference.
36097 + *
36098 + * This program is distributed in the hope that it will be useful,
36099 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36100 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36101 + * GNU General Public License for more details.
36102 + *
36103 + * You should have received a copy of the GNU General Public License
36104 + * along with this program; if not, write to the Free Software
36105 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36106 + ****************************************************************************
36107 + */
36108 +
36109 +#ifndef __CI_EFHW_EFAB_TYPES_H__
36110 +#define __CI_EFHW_EFAB_TYPES_H__
36111 +
36112 +#include <ci/efhw/efhw_config.h>
36113 +#include <ci/efhw/hardware_sysdep.h>
36114 +#include <ci/efhw/iopage_types.h>
36115 +#include <ci/efhw/sysdep.h>
36116 +
36117 +/*--------------------------------------------------------------------
36118 + *
36119 + * hardware limits used in the types
36120 + *
36121 + *--------------------------------------------------------------------*/
36122 +
36123 +#define EFHW_KEVENTQ_MAX 8
36124 +
36125 +/*--------------------------------------------------------------------
36126 + *
36127 + * forward type declarations
36128 + *
36129 + *--------------------------------------------------------------------*/
36130 +
36131 +struct efhw_nic;
36132 +
36133 +/*--------------------------------------------------------------------
36134 + *
36135 + * Managed interface
36136 + *
36137 + *--------------------------------------------------------------------*/
36138 +
36139 +struct efhw_buffer_table_allocation{
36140 + unsigned base;
36141 + unsigned order;
36142 +};
36143 +
36144 +struct eventq_resource_hardware {
36145 + /*!iobuffer allocated for eventq - can be larger than eventq */
36146 + efhw_iopages_t iobuff;
36147 + unsigned iobuff_off;
36148 + struct efhw_buffer_table_allocation buf_tbl_alloc;
36149 + int capacity; /*!< capacity of event queue */
36150 +};
36151 +
36152 +/*--------------------------------------------------------------------
36153 + *
36154 + * event queues and event driven callbacks
36155 + *
36156 + *--------------------------------------------------------------------*/
36157 +
36158 +struct efhw_keventq {
36159 + volatile int lock;
36160 + caddr_t evq_base;
36161 + int32_t evq_ptr;
36162 + uint32_t evq_mask;
36163 + unsigned instance;
36164 + struct eventq_resource_hardware hw;
36165 + struct efhw_ev_handler *ev_handlers;
36166 +};
36167 +
36168 +/**********************************************************************
36169 + * Portable HW interface. ***************************************
36170 + **********************************************************************/
36171 +
36172 +/*--------------------------------------------------------------------
36173 + *
36174 + * EtherFabric Functional units - configuration and control
36175 + *
36176 + *--------------------------------------------------------------------*/
36177 +
36178 +struct efhw_func_ops {
36179 +
36180 + /*-------------- Initialisation ------------ */
36181 +
36182 + /*! close down all hardware functional units - leaves NIC in a safe
36183 + state for driver unload */
36184 + void (*close_hardware) (struct efhw_nic *nic);
36185 +
36186 + /*! initialise all hardware functional units */
36187 + int (*init_hardware) (struct efhw_nic *nic,
36188 + struct efhw_ev_handler *,
36189 + const uint8_t *mac_addr);
36190 +
36191 + /*-------------- Interrupt support ------------ */
36192 +
36193 + /*! Main interrupt routine
36194 + ** This function returns,
36195 + ** - zero, if the IRQ was not generated by EF1
36196 + ** - non-zero, if EF1 was the source of the IRQ
36197 + **
36198 + **
36199 + ** opaque is an OS provided pointer for use by the OS callbacks
36200 + ** e.g in Windows used to indicate DPC scheduled
36201 + */
36202 + int (*interrupt) (struct efhw_nic *nic);
36203 +
36204 + /*! Enable given interrupt mask for the given IRQ unit */
36205 + void (*interrupt_enable) (struct efhw_nic *nic, uint idx);
36206 +
36207 + /*! Disable given interrupt mask for the given IRQ unit */
36208 + void (*interrupt_disable) (struct efhw_nic *nic, uint idx);
36209 +
36210 + /*! Set interrupt moderation strategy for the given IRQ unit
36211 + ** val is in usec
36212 + */
36213 + void (*set_interrupt_moderation)(struct efhw_nic *nic,
36214 + uint idx, uint val);
36215 +
36216 + /*-------------- Event support ------------ */
36217 +
36218 + /*! Enable the given event queue
36219 + depending on the underlying implementation (EF1 or Falcon) then
36220 + either a q_base_addr in host memory, or a buffer base id should
36221 + be proivded
36222 + */
36223 + void (*event_queue_enable) (struct efhw_nic *nic,
36224 + uint evq, /* evnt queue index */
36225 + uint evq_size, /* units of #entries */
36226 + dma_addr_t q_base_addr, uint buf_base_id);
36227 +
36228 + /*! Disable the given event queue (and any associated timer) */
36229 + void (*event_queue_disable) (struct efhw_nic *nic, uint evq,
36230 + int timer_only);
36231 +
36232 + /*! request wakeup from the NIC on a given event Q */
36233 + void (*wakeup_request) (struct efhw_nic *nic, dma_addr_t q_base_addr,
36234 + int next_i, int evq);
36235 +
36236 + /*! Push a SW event on a given eventQ */
36237 + void (*sw_event) (struct efhw_nic *nic, int data, int evq);
36238 +
36239 + /*-------------- Filter support ------------ */
36240 +
36241 + /*! Setup a given filter - The software can request a filter_i,
36242 + * but some EtherFabric implementations will override with
36243 + * a more suitable index
36244 + */
36245 + int (*ipfilter_set) (struct efhw_nic *nic, int type,
36246 + int *filter_i, int dmaq,
36247 + unsigned saddr_be32, unsigned sport_be16,
36248 + unsigned daddr_be32, unsigned dport_be16);
36249 +
36250 + /*! Attach a given filter to a DMAQ */
36251 + void (*ipfilter_attach) (struct efhw_nic *nic, int filter_idx,
36252 + int dmaq_idx);
36253 +
36254 + /*! Detach a filter from its DMAQ */
36255 + void (*ipfilter_detach) (struct efhw_nic *nic, int filter_idx);
36256 +
36257 + /*! Clear down a given filter */
36258 + void (*ipfilter_clear) (struct efhw_nic *nic, int filter_idx);
36259 +
36260 + /*-------------- DMA support ------------ */
36261 +
36262 + /*! Initialise NIC state for a given TX DMAQ */
36263 + void (*dmaq_tx_q_init) (struct efhw_nic *nic,
36264 + uint dmaq, uint evq, uint owner, uint tag,
36265 + uint dmaq_size, uint buf_idx, uint flags);
36266 +
36267 + /*! Initialise NIC state for a given RX DMAQ */
36268 + void (*dmaq_rx_q_init) (struct efhw_nic *nic,
36269 + uint dmaq, uint evq, uint owner, uint tag,
36270 + uint dmaq_size, uint buf_idx, uint flags);
36271 +
36272 + /*! Disable a given TX DMAQ */
36273 + void (*dmaq_tx_q_disable) (struct efhw_nic *nic, uint dmaq);
36274 +
36275 + /*! Disable a given RX DMAQ */
36276 + void (*dmaq_rx_q_disable) (struct efhw_nic *nic, uint dmaq);
36277 +
36278 + /*! Flush a given TX DMA channel */
36279 + int (*flush_tx_dma_channel) (struct efhw_nic *nic, uint dmaq);
36280 +
36281 + /*! Flush a given RX DMA channel */
36282 + int (*flush_rx_dma_channel) (struct efhw_nic *nic, uint dmaq);
36283 +
36284 + /*-------------- Buffer table Support ------------ */
36285 +
36286 + /*! Initialise a buffer table page */
36287 + void (*buffer_table_set) (struct efhw_nic *nic,
36288 + dma_addr_t dma_addr,
36289 + uint bufsz, uint region,
36290 + int own_id, int buffer_id);
36291 +
36292 + /*! Initialise a block of buffer table pages */
36293 + void (*buffer_table_set_n) (struct efhw_nic *nic, int buffer_id,
36294 + dma_addr_t dma_addr,
36295 + uint bufsz, uint region,
36296 + int n_pages, int own_id);
36297 +
36298 + /*! Clear a block of buffer table pages */
36299 + void (*buffer_table_clear) (struct efhw_nic *nic, int buffer_id,
36300 + int num);
36301 +
36302 + /*! Commit a buffer table update */
36303 + void (*buffer_table_commit) (struct efhw_nic *nic);
36304 +
36305 +};
36306 +
36307 +
36308 +/*----------------------------------------------------------------------------
36309 + *
36310 + * NIC type
36311 + *
36312 + *---------------------------------------------------------------------------*/
36313 +
36314 +struct efhw_device_type {
36315 + int arch; /* enum efhw_arch */
36316 + char variant; /* 'A', 'B', ... */
36317 + int revision; /* 0, 1, ... */
36318 +};
36319 +
36320 +
36321 +/*----------------------------------------------------------------------------
36322 + *
36323 + * EtherFabric NIC instance - nic.c for HW independent functions
36324 + *
36325 + *---------------------------------------------------------------------------*/
36326 +
36327 +/*! */
36328 +struct efhw_nic {
36329 + /*! zero base index in efrm_nic_table.nic array */
36330 + volatile int index;
36331 + int ifindex; /*!< OS level nic index */
36332 +#ifdef HAS_NET_NAMESPACE
36333 + struct net *nd_net;
36334 +#endif
36335 +
36336 + struct efhw_device_type devtype;
36337 +
36338 + /*! Options that can be set by user. */
36339 + unsigned options;
36340 +# define NIC_OPT_EFTEST 0x1 /* owner is an eftest app */
36341 +
36342 +# define NIC_OPT_DEFAULT 0
36343 +
36344 + /*! Internal flags that indicate hardware properties at runtime. */
36345 + unsigned flags;
36346 +# define NIC_FLAG_NO_INTERRUPT 0x01 /* to be set at init time only */
36347 +# define NIC_FLAG_TRY_MSI 0x02
36348 +# define NIC_FLAG_MSI 0x04
36349 +# define NIC_FLAG_OS_IRQ_EN 0x08
36350 +# define NIC_FLAG_10G 0x10
36351 +
36352 + unsigned mtu; /*!< MAC MTU (includes MAC hdr) */
36353 +
36354 + /* hardware resources */
36355 +
36356 + /*! I/O address of the start of the bar */
36357 + efhw_ioaddr_t bar_ioaddr;
36358 +
36359 + /*! Bar number of control aperture. */
36360 + unsigned ctr_ap_bar;
36361 + /*! Length of control aperture in bytes. */
36362 + unsigned ctr_ap_bytes;
36363 +
36364 + uint8_t mac_addr[ETH_ALEN]; /*!< mac address */
36365 +
36366 + /*! EtherFabric Functional Units -- functions */
36367 + const struct efhw_func_ops *efhw_func;
36368 +
36369 + /* Value read from FPGA version register. Zero for asic. */
36370 + unsigned fpga_version;
36371 +
36372 + /*! This lock protects a number of misc NIC resources. It should
36373 + * only be used for things that can be at the bottom of the lock
36374 + * order. ie. You mustn't attempt to grab any other lock while
36375 + * holding this one.
36376 + */
36377 + spinlock_t *reg_lock;
36378 + spinlock_t the_reg_lock;
36379 +
36380 + int buf_commit_outstanding; /*!< outstanding buffer commits */
36381 +
36382 + /*! interrupt callbacks (hard-irq) */
36383 + void (*irq_handler) (struct efhw_nic *, int unit);
36384 +
36385 + /*! event queues per driver */
36386 + struct efhw_keventq evq[EFHW_KEVENTQ_MAX];
36387 +
36388 +/* for marking when we are not using an IRQ unit
36389 + - 0 is a valid offset to an IRQ unit on EF1! */
36390 +#define EFHW_IRQ_UNIT_UNUSED 0xffff
36391 + /*! interrupt unit in use */
36392 + unsigned int irq_unit[EFHW_KEVENTQ_MAX];
36393 + efhw_iopage_t irq_iobuff; /*!< Falcon SYSERR interrupt */
36394 +
36395 + /* The new driverlink infrastructure. */
36396 + struct efx_dl_device *net_driver_dev;
36397 + struct efx_dlfilt_cb_s *dlfilter_cb;
36398 +
36399 + /*! Bit masks of the sizes of event queues and dma queues supported
36400 + * by the nic. */
36401 + unsigned evq_sizes;
36402 + unsigned rxq_sizes;
36403 + unsigned txq_sizes;
36404 +
36405 + /* Size of filter table (including odd and even banks). */
36406 + unsigned filter_tbl_size;
36407 +};
36408 +
36409 +
36410 +#define EFHW_KVA(nic) ((nic)->bar_ioaddr)
36411 +
36412 +
36413 +#endif /* __CI_EFHW_EFHW_TYPES_H__ */
36414 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h
36415 ===================================================================
36416 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36417 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h 2008-02-20 09:32:49.000000000 +0100
36418 @@ -0,0 +1,84 @@
36419 +/****************************************************************************
36420 + * Driver for Solarflare network controllers -
36421 + * resource management for Xen backend, OpenOnload, etc
36422 + * (including support for SFE4001 10GBT NIC)
36423 + *
36424 + * This file provides version-independent Linux kernel API for header files
36425 + * with hardware-related definitions (in ci/driver/efab/hardware*).
36426 + * Only kernels >=2.6.9 are supported.
36427 + *
36428 + * Copyright 2005-2007: Solarflare Communications Inc,
36429 + * 9501 Jeronimo Road, Suite 250,
36430 + * Irvine, CA 92618, USA
36431 + *
36432 + * Developed and maintained by Solarflare Communications:
36433 + * <linux-xen-drivers@solarflare.com>
36434 + * <onload-dev@solarflare.com>
36435 + *
36436 + * Certain parts of the driver were implemented by
36437 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36438 + * OKTET Labs Ltd, Russia,
36439 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36440 + * by request of Solarflare Communications
36441 + *
36442 + *
36443 + * This program is free software; you can redistribute it and/or modify it
36444 + * under the terms of the GNU General Public License version 2 as published
36445 + * by the Free Software Foundation, incorporated herein by reference.
36446 + *
36447 + * This program is distributed in the hope that it will be useful,
36448 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36449 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36450 + * GNU General Public License for more details.
36451 + *
36452 + * You should have received a copy of the GNU General Public License
36453 + * along with this program; if not, write to the Free Software
36454 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36455 + ****************************************************************************
36456 + */
36457 +
36458 +#ifndef __CI_EFHW_HARDWARE_LINUX_H__
36459 +#define __CI_EFHW_HARDWARE_LINUX_H__
36460 +
36461 +#include <asm/io.h>
36462 +
36463 +#ifdef __LITTLE_ENDIAN
36464 +#define EFHW_IS_LITTLE_ENDIAN
36465 +#elif __BIG_ENDIAN
36466 +#define EFHW_IS_BIG_ENDIAN
36467 +#else
36468 +#error Unknown endianness
36469 +#endif
36470 +
36471 +#ifndef mmiowb
36472 + #if defined(__i386__) || defined(__x86_64__)
36473 + #define mmiowb()
36474 + #elif defined(__ia64__)
36475 + #ifndef ia64_mfa
36476 + #define ia64_mfa() asm volatile ("mf.a" ::: "memory")
36477 + #endif
36478 + #define mmiowb ia64_mfa
36479 + #else
36480 + #error "Need definition for mmiowb()"
36481 + #endif
36482 +#endif
36483 +
36484 +typedef char *efhw_ioaddr_t;
36485 +
36486 +#ifndef readq
36487 +static inline uint64_t __readq(void __iomem *addr)
36488 +{
36489 + return *(volatile uint64_t *)addr;
36490 +}
36491 +#define readq(x) __readq(x)
36492 +#endif
36493 +
36494 +#ifndef writeq
36495 +static inline void __writeq(uint64_t v, void __iomem *addr)
36496 +{
36497 + *(volatile uint64_t *)addr = v;
36498 +}
36499 +#define writeq(val, addr) __writeq((val), (addr))
36500 +#endif
36501 +
36502 +#endif /* __CI_EFHW_HARDWARE_LINUX_H__ */
36503 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/iopage_types.h
36504 ===================================================================
36505 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36506 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/iopage_types.h 2008-02-20 09:32:49.000000000 +0100
36507 @@ -0,0 +1,188 @@
36508 +/****************************************************************************
36509 + * Driver for Solarflare network controllers -
36510 + * resource management for Xen backend, OpenOnload, etc
36511 + * (including support for SFE4001 10GBT NIC)
36512 + *
36513 + * This file provides efhw_page_t and efhw_iopage_t for Linux kernel.
36514 + *
36515 + * Copyright 2005-2007: Solarflare Communications Inc,
36516 + * 9501 Jeronimo Road, Suite 250,
36517 + * Irvine, CA 92618, USA
36518 + *
36519 + * Developed and maintained by Solarflare Communications:
36520 + * <linux-xen-drivers@solarflare.com>
36521 + * <onload-dev@solarflare.com>
36522 + *
36523 + * Certain parts of the driver were implemented by
36524 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36525 + * OKTET Labs Ltd, Russia,
36526 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36527 + * by request of Solarflare Communications
36528 + *
36529 + *
36530 + * This program is free software; you can redistribute it and/or modify it
36531 + * under the terms of the GNU General Public License version 2 as published
36532 + * by the Free Software Foundation, incorporated herein by reference.
36533 + *
36534 + * This program is distributed in the hope that it will be useful,
36535 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36536 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36537 + * GNU General Public License for more details.
36538 + *
36539 + * You should have received a copy of the GNU General Public License
36540 + * along with this program; if not, write to the Free Software
36541 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36542 + ****************************************************************************
36543 + */
36544 +
36545 +#ifndef __CI_EFHW_IOPAGE_LINUX_H__
36546 +#define __CI_EFHW_IOPAGE_LINUX_H__
36547 +
36548 +#include <linux/gfp.h>
36549 +#include <linux/hardirq.h>
36550 +#include <ci/efhw/debug.h>
36551 +
36552 +/*--------------------------------------------------------------------
36553 + *
36554 + * efhw_page_t: A single page of memory. Directly mapped in the driver,
36555 + * and can be mapped to userlevel.
36556 + *
36557 + *--------------------------------------------------------------------*/
36558 +
36559 +typedef struct {
36560 + unsigned long kva;
36561 +} efhw_page_t;
36562 +
36563 +static inline int efhw_page_alloc(efhw_page_t *p)
36564 +{
36565 + p->kva = __get_free_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL);
36566 + return p->kva ? 0 : -ENOMEM;
36567 +}
36568 +
36569 +static inline int efhw_page_alloc_zeroed(efhw_page_t *p)
36570 +{
36571 + p->kva = get_zeroed_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL);
36572 + return p->kva ? 0 : -ENOMEM;
36573 +}
36574 +
36575 +static inline void efhw_page_free(efhw_page_t *p)
36576 +{
36577 + free_page(p->kva);
36578 + EFHW_DO_DEBUG(memset(p, 0, sizeof(*p)));
36579 +}
36580 +
36581 +static inline char *efhw_page_ptr(efhw_page_t *p)
36582 +{
36583 + return (char *)p->kva;
36584 +}
36585 +
36586 +static inline unsigned efhw_page_pfn(efhw_page_t *p)
36587 +{
36588 + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT);
36589 +}
36590 +
36591 +static inline void efhw_page_mark_invalid(efhw_page_t *p)
36592 +{
36593 + p->kva = 0;
36594 +}
36595 +
36596 +static inline int efhw_page_is_valid(efhw_page_t *p)
36597 +{
36598 + return p->kva != 0;
36599 +}
36600 +
36601 +static inline void efhw_page_init_from_va(efhw_page_t *p, void *va)
36602 +{
36603 + p->kva = (unsigned long)va;
36604 +}
36605 +
36606 +/*--------------------------------------------------------------------
36607 + *
36608 + * efhw_iopage_t: A single page of memory. Directly mapped in the driver,
36609 + * and can be mapped to userlevel. Can also be accessed by the NIC.
36610 + *
36611 + *--------------------------------------------------------------------*/
36612 +
36613 +typedef struct {
36614 + efhw_page_t p;
36615 + dma_addr_t dma_addr;
36616 +} efhw_iopage_t;
36617 +
36618 +static inline dma_addr_t efhw_iopage_dma_addr(efhw_iopage_t *p)
36619 +{
36620 + return p->dma_addr;
36621 +}
36622 +
36623 +#define efhw_iopage_ptr(iop) efhw_page_ptr(&(iop)->p)
36624 +#define efhw_iopage_pfn(iop) efhw_page_pfn(&(iop)->p)
36625 +#define efhw_iopage_mark_invalid(iop) efhw_page_mark_invalid(&(iop)->p)
36626 +#define efhw_iopage_is_valid(iop) efhw_page_is_valid(&(iop)->p)
36627 +
36628 +/*--------------------------------------------------------------------
36629 + *
36630 + * efhw_iopages_t: A set of pages that are contiguous in physical memory.
36631 + * Directly mapped in the driver, and can be mapped to userlevel. Can also
36632 + * be accessed by the NIC.
36633 + *
36634 + * NB. The O/S may be unwilling to allocate many, or even any of these. So
36635 + * only use this type where the NIC really needs a physically contiguous
36636 + * buffer.
36637 + *
36638 + *--------------------------------------------------------------------*/
36639 +
36640 +typedef struct {
36641 + caddr_t kva;
36642 + unsigned order;
36643 + dma_addr_t dma_addr;
36644 +} efhw_iopages_t;
36645 +
36646 +static inline caddr_t efhw_iopages_ptr(efhw_iopages_t *p)
36647 +{
36648 + return p->kva;
36649 +}
36650 +
36651 +static inline unsigned efhw_iopages_pfn(efhw_iopages_t *p)
36652 +{
36653 + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT);
36654 +}
36655 +
36656 +static inline dma_addr_t efhw_iopages_dma_addr(efhw_iopages_t *p)
36657 +{
36658 + return p->dma_addr;
36659 +}
36660 +
36661 +static inline unsigned efhw_iopages_size(efhw_iopages_t *p)
36662 +{
36663 + return 1u << (p->order + PAGE_SHIFT);
36664 +}
36665 +
36666 +/* efhw_iopage_t <-> efhw_iopages_t conversions for handling physically
36667 + * contiguous allocations in iobufsets for iSCSI. This allows the
36668 + * essential information about contiguous allocations from
36669 + * efhw_iopages_alloc() to be saved away in the efhw_iopage_t array in an
36670 + * iobufset. (Changing the iobufset resource to use a union type would
36671 + * involve a lot of code changes, and make the iobufset's metadata larger
36672 + * which could be bad as it's supposed to fit into a single page on some
36673 + * platforms.)
36674 + */
36675 +static inline void
36676 +efhw_iopage_init_from_iopages(efhw_iopage_t *iopage,
36677 + efhw_iopages_t *iopages, unsigned pageno)
36678 +{
36679 + iopage->p.kva = ((unsigned long)efhw_iopages_ptr(iopages))
36680 + + (pageno * PAGE_SIZE);
36681 + iopage->dma_addr = efhw_iopages_dma_addr(iopages) +
36682 + (pageno * PAGE_SIZE);
36683 +}
36684 +
36685 +static inline void
36686 +efhw_iopages_init_from_iopage(efhw_iopages_t *iopages,
36687 + efhw_iopage_t *iopage, unsigned order)
36688 +{
36689 + iopages->kva = (caddr_t) efhw_iopage_ptr(iopage);
36690 + EFHW_ASSERT(iopages->kva);
36691 + iopages->order = order;
36692 + iopages->dma_addr = efhw_iopage_dma_addr(iopage);
36693 +}
36694 +
36695 +#endif /* __CI_EFHW_IOPAGE_LINUX_H__ */
36696 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/public.h
36697 ===================================================================
36698 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36699 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/public.h 2008-02-20 09:32:49.000000000 +0100
36700 @@ -0,0 +1,83 @@
36701 +/****************************************************************************
36702 + * Driver for Solarflare network controllers -
36703 + * resource management for Xen backend, OpenOnload, etc
36704 + * (including support for SFE4001 10GBT NIC)
36705 + *
36706 + * This file provides public API of efhw library exported from the SFC
36707 + * resource driver.
36708 + *
36709 + * Copyright 2005-2007: Solarflare Communications Inc,
36710 + * 9501 Jeronimo Road, Suite 250,
36711 + * Irvine, CA 92618, USA
36712 + *
36713 + * Developed and maintained by Solarflare Communications:
36714 + * <linux-xen-drivers@solarflare.com>
36715 + * <onload-dev@solarflare.com>
36716 + *
36717 + * Certain parts of the driver were implemented by
36718 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36719 + * OKTET Labs Ltd, Russia,
36720 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36721 + * by request of Solarflare Communications
36722 + *
36723 + *
36724 + * This program is free software; you can redistribute it and/or modify it
36725 + * under the terms of the GNU General Public License version 2 as published
36726 + * by the Free Software Foundation, incorporated herein by reference.
36727 + *
36728 + * This program is distributed in the hope that it will be useful,
36729 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36730 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36731 + * GNU General Public License for more details.
36732 + *
36733 + * You should have received a copy of the GNU General Public License
36734 + * along with this program; if not, write to the Free Software
36735 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36736 + ****************************************************************************
36737 + */
36738 +
36739 +#ifndef __CI_EFHW_PUBLIC_H__
36740 +#define __CI_EFHW_PUBLIC_H__
36741 +
36742 +#include <ci/efhw/common.h>
36743 +#include <ci/efhw/efhw_types.h>
36744 +
36745 +/*! Returns true if we have some EtherFabric functional units -
36746 + whether configured or not */
36747 +static inline int efhw_nic_have_functional_units(struct efhw_nic *nic)
36748 +{
36749 + return nic->efhw_func != 0;
36750 +}
36751 +
36752 +/*! Returns true if the EtherFabric functional units have been configured */
36753 +static inline int efhw_nic_have_hw(struct efhw_nic *nic)
36754 +{
36755 + return efhw_nic_have_functional_units(nic) && (EFHW_KVA(nic) != 0);
36756 +}
36757 +
36758 +/*! Helper function to allocate the iobuffer needed by an eventq
36759 + * - it ensures the eventq has the correct alignment for the NIC
36760 + *
36761 + * \param rm Event-queue resource manager
36762 + * \param instance Event-queue instance (index)
36763 + * \param buf_bytes Requested size of eventq
36764 + * \return < 0 if iobuffer allocation fails
36765 + */
36766 +int efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic,
36767 + struct eventq_resource_hardware *h,
36768 + int evq_instance, unsigned buf_bytes);
36769 +
36770 +extern void falcon_nic_set_rx_usr_buf_size(struct efhw_nic *,
36771 + int rx_usr_buf_size);
36772 +
36773 +extern void
36774 +falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full,
36775 + uint32_t tcp_wild,
36776 + uint32_t udp_full, uint32_t udp_wild);
36777 +
36778 +extern void
36779 +falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full,
36780 + uint32_t *tcp_wild,
36781 + uint32_t *udp_full, uint32_t *udp_wild);
36782 +
36783 +#endif /* __CI_EFHW_PUBLIC_H__ */
36784 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/sysdep.h
36785 ===================================================================
36786 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36787 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/sysdep.h 2008-02-20 09:32:49.000000000 +0100
36788 @@ -0,0 +1,72 @@
36789 +/****************************************************************************
36790 + * Driver for Solarflare network controllers -
36791 + * resource management for Xen backend, OpenOnload, etc
36792 + * (including support for SFE4001 10GBT NIC)
36793 + *
36794 + * This file provides version-independent Linux kernel API for efhw library.
36795 + * Only kernels >=2.6.9 are supported.
36796 + *
36797 + * Copyright 2005-2007: Solarflare Communications Inc,
36798 + * 9501 Jeronimo Road, Suite 250,
36799 + * Irvine, CA 92618, USA
36800 + *
36801 + * Developed and maintained by Solarflare Communications:
36802 + * <linux-xen-drivers@solarflare.com>
36803 + * <onload-dev@solarflare.com>
36804 + *
36805 + * Certain parts of the driver were implemented by
36806 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36807 + * OKTET Labs Ltd, Russia,
36808 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36809 + * by request of Solarflare Communications
36810 + *
36811 + *
36812 + * This program is free software; you can redistribute it and/or modify it
36813 + * under the terms of the GNU General Public License version 2 as published
36814 + * by the Free Software Foundation, incorporated herein by reference.
36815 + *
36816 + * This program is distributed in the hope that it will be useful,
36817 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36818 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36819 + * GNU General Public License for more details.
36820 + *
36821 + * You should have received a copy of the GNU General Public License
36822 + * along with this program; if not, write to the Free Software
36823 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36824 + ****************************************************************************
36825 + */
36826 +
36827 +#ifndef __CI_EFHW_SYSDEP_LINUX_H__
36828 +#define __CI_EFHW_SYSDEP_LINUX_H__
36829 +
36830 +#include <linux/version.h>
36831 +#include <linux/module.h>
36832 +#include <linux/spinlock.h>
36833 +#include <linux/delay.h>
36834 +#include <linux/if_ether.h>
36835 +
36836 +#include <linux/netdevice.h> /* necessary for etherdevice.h on some kernels */
36837 +#include <linux/etherdevice.h>
36838 +
36839 +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21)
36840 +static inline int is_local_ether_addr(const u8 *addr)
36841 +{
36842 + return (0x02 & addr[0]);
36843 +}
36844 +#endif
36845 +
36846 +typedef unsigned long irq_flags_t;
36847 +
36848 +#define spin_lock_destroy(l_) do {} while (0)
36849 +
36850 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
36851 +#define HAS_NET_NAMESPACE
36852 +#endif
36853 +
36854 +/* Funny, but linux has round_up for x86 only, defined in
36855 + * x86-specific header */
36856 +#ifndef round_up
36857 +#define round_up(x, y) (((x) + (y) - 1) & ~((y)-1))
36858 +#endif
36859 +
36860 +#endif /* __CI_EFHW_SYSDEP_LINUX_H__ */
36861 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/nic_table.h
36862 ===================================================================
36863 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36864 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/nic_table.h 2008-02-20 09:32:49.000000000 +0100
36865 @@ -0,0 +1,98 @@
36866 +/****************************************************************************
36867 + * Driver for Solarflare network controllers -
36868 + * resource management for Xen backend, OpenOnload, etc
36869 + * (including support for SFE4001 10GBT NIC)
36870 + *
36871 + * This file provides public API for NIC table.
36872 + *
36873 + * Copyright 2005-2007: Solarflare Communications Inc,
36874 + * 9501 Jeronimo Road, Suite 250,
36875 + * Irvine, CA 92618, USA
36876 + *
36877 + * Developed and maintained by Solarflare Communications:
36878 + * <linux-xen-drivers@solarflare.com>
36879 + * <onload-dev@solarflare.com>
36880 + *
36881 + * Certain parts of the driver were implemented by
36882 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36883 + * OKTET Labs Ltd, Russia,
36884 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36885 + * by request of Solarflare Communications
36886 + *
36887 + *
36888 + * This program is free software; you can redistribute it and/or modify it
36889 + * under the terms of the GNU General Public License version 2 as published
36890 + * by the Free Software Foundation, incorporated herein by reference.
36891 + *
36892 + * This program is distributed in the hope that it will be useful,
36893 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36894 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36895 + * GNU General Public License for more details.
36896 + *
36897 + * You should have received a copy of the GNU General Public License
36898 + * along with this program; if not, write to the Free Software
36899 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36900 + ****************************************************************************
36901 + */
36902 +
36903 +#ifndef __CI_EFRM_NIC_TABLE_H__
36904 +#define __CI_EFRM_NIC_TABLE_H__
36905 +
36906 +#include <ci/efhw/efhw_types.h>
36907 +#include <ci/efrm/sysdep.h>
36908 +
36909 +/*--------------------------------------------------------------------
36910 + *
36911 + * struct efrm_nic_table - top level driver object keeping all NICs -
36912 + * implemented in driver_object.c
36913 + *
36914 + *--------------------------------------------------------------------*/
36915 +
36916 +/*! Comment? */
36917 +struct efrm_nic_table {
36918 + /*! nics attached to this driver */
36919 + struct efhw_nic *nic[EFHW_MAX_NR_DEVS];
36920 + /*! pointer to an arbitrary struct efhw_nic if one exists;
36921 + * for code which does not care which NIC it wants but
36922 + * still needs one. Note you cannot assume nic[0] exists. */
36923 + struct efhw_nic *a_nic;
36924 + uint32_t nic_count; /*!< number of nics attached to this driver */
36925 + spinlock_t lock; /*!< lock for table modifications */
36926 + atomic_t ref_count; /*!< refcount for users of nic table */
36927 +};
36928 +
36929 +/* Resource driver structures used by other drivers as well */
36930 +extern struct efrm_nic_table efrm_nic_table;
36931 +
36932 +static inline void efrm_nic_table_hold(void)
36933 +{
36934 + atomic_inc(&efrm_nic_table.ref_count);
36935 +}
36936 +
36937 +static inline void efrm_nic_table_rele(void)
36938 +{
36939 + atomic_dec(&efrm_nic_table.ref_count);
36940 +}
36941 +
36942 +static inline int efrm_nic_table_held(void)
36943 +{
36944 + return (atomic_read(&efrm_nic_table.ref_count) != 0);
36945 +}
36946 +
36947 +/* Run code block _x multiple times with variable nic set to each
36948 + * registered NIC in turn.
36949 + * DO NOT "break" out of this loop early. */
36950 +#define EFRM_FOR_EACH_NIC(_nic_i, _nic) \
36951 + for ((_nic_i) = (efrm_nic_table_hold(), 0); \
36952 + (_nic_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \
36953 + (_nic_i)++) \
36954 + if (((_nic) = efrm_nic_table.nic[_nic_i]))
36955 +
36956 +#define EFRM_FOR_EACH_NIC_IN_SET(_set, _i, _nic) \
36957 + for ((_i) = (efrm_nic_table_hold(), 0); \
36958 + (_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \
36959 + ++(_i)) \
36960 + if (((_nic) = efrm_nic_table.nic[_i]) && \
36961 + efrm_nic_set_read((_set), (_i)))
36962 +
36963 +#endif /* __CI_EFRM_NIC_TABLE_H__ */
36964 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep.h
36965 ===================================================================
36966 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
36967 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep.h 2008-02-20 09:32:49.000000000 +0100
36968 @@ -0,0 +1,54 @@
36969 +/****************************************************************************
36970 + * Driver for Solarflare network controllers -
36971 + * resource management for Xen backend, OpenOnload, etc
36972 + * (including support for SFE4001 10GBT NIC)
36973 + *
36974 + * This file provides Linux-like system-independent API for efrm library.
36975 + *
36976 + * Copyright 2005-2007: Solarflare Communications Inc,
36977 + * 9501 Jeronimo Road, Suite 250,
36978 + * Irvine, CA 92618, USA
36979 + *
36980 + * Developed and maintained by Solarflare Communications:
36981 + * <linux-xen-drivers@solarflare.com>
36982 + * <onload-dev@solarflare.com>
36983 + *
36984 + * Certain parts of the driver were implemented by
36985 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
36986 + * OKTET Labs Ltd, Russia,
36987 + * http://oktetlabs.ru, <info@oktetlabs.ru>
36988 + * by request of Solarflare Communications
36989 + *
36990 + *
36991 + * This program is free software; you can redistribute it and/or modify it
36992 + * under the terms of the GNU General Public License version 2 as published
36993 + * by the Free Software Foundation, incorporated herein by reference.
36994 + *
36995 + * This program is distributed in the hope that it will be useful,
36996 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
36997 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36998 + * GNU General Public License for more details.
36999 + *
37000 + * You should have received a copy of the GNU General Public License
37001 + * along with this program; if not, write to the Free Software
37002 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37003 + ****************************************************************************
37004 + */
37005 +
37006 +#ifndef __CI_EFRM_SYSDEP_H__
37007 +#define __CI_EFRM_SYSDEP_H__
37008 +
37009 +/* Spinlocks are defined in efhw/sysdep.h */
37010 +#include <ci/efhw/sysdep.h>
37011 +
37012 +#if defined(__linux__) && defined(__KERNEL__)
37013 +
37014 +# include <ci/efrm/sysdep_linux.h>
37015 +
37016 +#else
37017 +
37018 +# include <ci/efrm/sysdep_ci2linux.h>
37019 +
37020 +#endif
37021 +
37022 +#endif /* __CI_EFRM_SYSDEP_H__ */
37023 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h
37024 ===================================================================
37025 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
37026 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h 2008-02-20 09:32:49.000000000 +0100
37027 @@ -0,0 +1,248 @@
37028 +/****************************************************************************
37029 + * Driver for Solarflare network controllers -
37030 + * resource management for Xen backend, OpenOnload, etc
37031 + * (including support for SFE4001 10GBT NIC)
37032 + *
37033 + * This file provides version-independent Linux kernel API for efrm library.
37034 + * Only kernels >=2.6.9 are supported.
37035 + *
37036 + * Copyright 2005-2007: Solarflare Communications Inc,
37037 + * 9501 Jeronimo Road, Suite 250,
37038 + * Irvine, CA 92618, USA
37039 + *
37040 + * Kfifo API is partially stolen from linux-2.6.22/include/linux/list.h
37041 + * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
37042 + *
37043 + * Developed and maintained by Solarflare Communications:
37044 + * <linux-xen-drivers@solarflare.com>
37045 + * <onload-dev@solarflare.com>
37046 + *
37047 + * Certain parts of the driver were implemented by
37048 + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
37049 + * OKTET Labs Ltd, Russia,
37050 + * http://oktetlabs.ru, <info@oktetlabs.ru>
37051 + * by request of Solarflare Communications
37052 + *
37053 + *
37054 + * This program is free software; you can redistribute it and/or modify it
37055 + * under the terms of the GNU General Public License version 2 as published
37056 + * by the Free Software Foundation, incorporated herein by reference.
37057 + *
37058 + * This program is distributed in the hope that it will be useful,
37059 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
37060 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37061 + * GNU General Public License for more details.
37062 + *
37063 + * You should have received a copy of the GNU General Public License
37064 + * along with this program; if not, write to the Free Software
37065 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37066 + ****************************************************************************
37067 + */
37068 +
37069 +#ifndef __CI_EFRM_SYSDEP_LINUX_H__
37070 +#define __CI_EFRM_SYSDEP_LINUX_H__
37071 +
37072 +#include <linux/version.h>
37073 +#include <linux/list.h>
37074 +#include <linux/vmalloc.h>
37075 +#include <linux/errno.h>
37076 +#include <linux/string.h>
37077 +#include <linux/workqueue.h>
37078 +#include <linux/gfp.h>
37079 +#include <linux/slab.h>
37080 +#include <linux/hardirq.h>
37081 +#include <linux/kernel.h>
37082 +#include <linux/if_ether.h>
37083 +#include <linux/completion.h>
37084 +#include <linux/in.h>
37085 +
37086 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
37087 +/* get roundup_pow_of_two(), which was in kernel.h in early kernel versions */
37088 +#include <linux/log2.h>
37089 +#endif
37090 +
37091 +/********************************************************************
37092 + *
37093 + * List API
37094 + *
37095 + ********************************************************************/
37096 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
37097 +static inline void
37098 +list_replace_init(struct list_head *old, struct list_head *new)
37099 +{
37100 + new->next = old->next;
37101 + new->next->prev = new;
37102 + new->prev = old->prev;
37103 + new->prev->next = new;
37104 + INIT_LIST_HEAD(old);
37105 +}
37106 +#endif
37107 +
37108 +static inline struct list_head *list_pop(struct list_head *list)
37109 +{
37110 + struct list_head *link = list->next;
37111 + list_del(link);
37112 + return link;
37113 +}
37114 +
37115 +static inline struct list_head *list_pop_tail(struct list_head *list)
37116 +{
37117 + struct list_head *link = list->prev;
37118 + list_del(link);
37119 + return link;
37120 +}
37121 +
37122 +/********************************************************************
37123 + *
37124 + * Workqueue API
37125 + *
37126 + ********************************************************************/
37127 +
37128 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
37129 +#define NEED_OLD_WORK_API
37130 +
37131 +/**
37132 + * The old and new work function prototypes just change
37133 + * the type of the pointer in the only argument, so it's
37134 + * safe to cast one function type to the other
37135 + */
37136 +typedef void (*efrm_old_work_func_t) (void *p);
37137 +
37138 +#undef INIT_WORK
37139 +#define INIT_WORK(_work, _func) \
37140 + do { \
37141 + INIT_LIST_HEAD(&(_work)->entry); \
37142 + (_work)->pending = 0; \
37143 + PREPARE_WORK((_work), \
37144 + (efrm_old_work_func_t) (_func), \
37145 + (_work)); \
37146 + } while (0)
37147 +
37148 +#endif
37149 +
37150 +/********************************************************************
37151 + *
37152 + * Kfifo API
37153 + *
37154 + ********************************************************************/
37155 +
37156 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
37157 +
37158 +#if !defined(RHEL_RELEASE_CODE) || (RHEL_RELEASE_CODE < 1029)
37159 +typedef unsigned gfp_t;
37160 +#endif
37161 +
37162 +#define HAS_NO_KFIFO
37163 +
37164 +struct kfifo {
37165 + unsigned char *buffer; /* the buffer holding the data */
37166 + unsigned int size; /* the size of the allocated buffer */
37167 + unsigned int in; /* data is added at offset (in % size) */
37168 + unsigned int out; /* data is extracted from off. (out % size) */
37169 + spinlock_t *lock; /* protects concurrent modifications */
37170 +};
37171 +
37172 +extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
37173 + gfp_t gfp_mask, spinlock_t *lock);
37174 +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
37175 + spinlock_t *lock);
37176 +extern void kfifo_free(struct kfifo *fifo);
37177 +extern unsigned int __kfifo_put(struct kfifo *fifo,
37178 + unsigned char *buffer, unsigned int len);
37179 +extern unsigned int __kfifo_get(struct kfifo *fifo,
37180 + unsigned char *buffer, unsigned int len);
37181 +
37182 +/**
37183 + * kfifo_put - puts some data into the FIFO
37184 + * @fifo: the fifo to be used.
37185 + * @buffer: the data to be added.
37186 + * @len: the length of the data to be added.
37187 + *
37188 + * This function copies at most @len bytes from the @buffer into
37189 + * the FIFO depending on the free space, and returns the number of
37190 + * bytes copied.
37191 + */
37192 +static inline unsigned int
37193 +kfifo_put(struct kfifo *fifo, unsigned char *buffer, unsigned int len)
37194 +{
37195 + unsigned long flags;
37196 + unsigned int ret;
37197 +
37198 + spin_lock_irqsave(fifo->lock, flags);
37199 +
37200 + ret = __kfifo_put(fifo, buffer, len);
37201 +
37202 + spin_unlock_irqrestore(fifo->lock, flags);
37203 +
37204 + return ret;
37205 +}
37206 +
37207 +/**
37208 + * kfifo_get - gets some data from the FIFO
37209 + * @fifo: the fifo to be used.
37210 + * @buffer: where the data must be copied.
37211 + * @len: the size of the destination buffer.
37212 + *
37213 + * This function copies at most @len bytes from the FIFO into the
37214 + * @buffer and returns the number of copied bytes.
37215 + */
37216 +static inline unsigned int
37217 +kfifo_get(struct kfifo *fifo, unsigned char *buffer, unsigned int len)
37218 +{
37219 + unsigned long flags;
37220 + unsigned int ret;
37221 +
37222 + spin_lock_irqsave(fifo->lock, flags);
37223 +
37224 + ret = __kfifo_get(fifo, buffer, len);
37225 +
37226 + /*
37227 + * optimization: if the FIFO is empty, set the indices to 0
37228 + * so we don't wrap the next time
37229 + */
37230 + if (fifo->in == fifo->out)
37231 + fifo->in = fifo->out = 0;
37232 +
37233 + spin_unlock_irqrestore(fifo->lock, flags);
37234 +
37235 + return ret;
37236 +}
37237 +
37238 +/**
37239 + * __kfifo_len - returns the number of bytes available in the FIFO, no locking version
37240 + * @fifo: the fifo to be used.
37241 + */
37242 +static inline unsigned int __kfifo_len(struct kfifo *fifo)
37243 +{
37244 + return fifo->in - fifo->out;
37245 +}
37246 +
37247 +/**
37248 + * kfifo_len - returns the number of bytes available in the FIFO
37249 + * @fifo: the fifo to be used.
37250 + */
37251 +static inline unsigned int kfifo_len(struct kfifo *fifo)
37252 +{
37253 + unsigned long flags;
37254 + unsigned int ret;
37255 +
37256 + spin_lock_irqsave(fifo->lock, flags);
37257 +
37258 + ret = __kfifo_len(fifo);
37259 +
37260 + spin_unlock_irqrestore(fifo->lock, flags);
37261 +
37262 + return ret;
37263 +}
37264 +
37265 +#else
37266 +#include <linux/kfifo.h>
37267 +#endif
37268 +
37269 +static inline void kfifo_vfree(struct kfifo *fifo)
37270 +{
37271 + vfree(fifo->buffer);
37272 + kfree(fifo);
37273 +}
37274 +
37275 +#endif /* __CI_EFRM_SYSDEP_LINUX_H__ */
37276 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/config.h
37277 ===================================================================
37278 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
37279 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/config.h 2008-02-20 09:32:49.000000000 +0100
37280 @@ -0,0 +1,49 @@
37281 +/****************************************************************************
37282 + * Copyright 2002-2005: Level 5 Networks Inc.
37283 + * Copyright 2005-2008: Solarflare Communications Inc,
37284 + * 9501 Jeronimo Road, Suite 250,
37285 + * Irvine, CA 92618, USA
37286 + *
37287 + * Maintained by Solarflare Communications
37288 + * <linux-xen-drivers@solarflare.com>
37289 + * <onload-dev@solarflare.com>
37290 + *
37291 + * This program is free software; you can redistribute it and/or modify it
37292 + * under the terms of the GNU General Public License version 2 as published
37293 + * by the Free Software Foundation, incorporated herein by reference.
37294 + *
37295 + * This program is distributed in the hope that it will be useful,
37296 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
37297 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37298 + * GNU General Public License for more details.
37299 + *
37300 + * You should have received a copy of the GNU General Public License
37301 + * along with this program; if not, write to the Free Software
37302 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37303 + ****************************************************************************
37304 + */
37305 +
37306 +/*! \cidoxg_include_ci_tools */
37307 +
37308 +#ifndef __CI_TOOLS_CONFIG_H__
37309 +#define __CI_TOOLS_CONFIG_H__
37310 +
37311 +
37312 +/**********************************************************************
37313 + * Debugging.
37314 + */
37315 +
37316 +#define CI_INCLUDE_ASSERT_VALID 0
37317 +
37318 +/* Set non-zero to allow info about who has allocated what to appear in
37319 + * /proc/drivers/level5/mem.
37320 + * However - Note that doing so can lead to segfault when you unload the
37321 + * driver, and other weirdness. i.e. I don't think the code for is quite
37322 + * right (written by Oktet, hacked by gel), but it does work well enough to be
37323 + * useful.
37324 + */
37325 +#define CI_MEMLEAK_DEBUG_ALLOC_TABLE 0
37326 +
37327 +
37328 +#endif /* __CI_TOOLS_CONFIG_H__ */
37329 +/*! \cidoxg_end */
37330 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/debug.h
37331 ===================================================================
37332 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
37333 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/debug.h 2008-02-20 09:32:49.000000000 +0100
37334 @@ -0,0 +1,336 @@
37335 +/****************************************************************************
37336 + * Copyright 2002-2005: Level 5 Networks Inc.
37337 + * Copyright 2005-2008: Solarflare Communications Inc,
37338 + * 9501 Jeronimo Road, Suite 250,
37339 + * Irvine, CA 92618, USA
37340 + *
37341 + * Maintained by Solarflare Communications
37342 + * <linux-xen-drivers@solarflare.com>
37343 + * <onload-dev@solarflare.com>
37344 + *
37345 + * This program is free software; you can redistribute it and/or modify it
37346 + * under the terms of the GNU General Public License version 2 as published
37347 + * by the Free Software Foundation, incorporated herein by reference.
37348 + *
37349 + * This program is distributed in the hope that it will be useful,
37350 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
37351 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37352 + * GNU General Public License for more details.
37353 + *
37354 + * You should have received a copy of the GNU General Public License
37355 + * along with this program; if not, write to the Free Software
37356 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37357 + ****************************************************************************
37358 + */
37359 +
37360 +/*! \cidoxg_include_ci_tools */
37361 +
37362 +#ifndef __CI_TOOLS_DEBUG_H__
37363 +#define __CI_TOOLS_DEBUG_H__
37364 +
37365 +#define CI_LOG_E(x) x /* errors */
37366 +#define CI_LOG_W(x) x /* warnings */
37367 +#define CI_LOG_I(x) x /* information */
37368 +#define CI_LOG_V(x) x /* verbose */
37369 +
37370 +/* Build time asserts. We paste the line number into the type name
37371 + * so that the macro can be used more than once per file even if the
37372 + * compiler objects to multiple identical typedefs. Collisions
37373 + * between use in different header files is still possible. */
37374 +#ifndef CI_BUILD_ASSERT
37375 +#define __CI_BUILD_ASSERT_NAME(_x) __CI_BUILD_ASSERT_ILOATHECPP(_x)
37376 +#define __CI_BUILD_ASSERT_ILOATHECPP(_x) __CI_BUILD_ASSERT__ ##_x
37377 +#define CI_BUILD_ASSERT(e)\
37378 + typedef char __CI_BUILD_ASSERT_NAME(__LINE__)[(e)?1:-1]
37379 +#endif
37380 +
37381 +
37382 +#ifdef NDEBUG
37383 +
37384 +# define _ci_check(exp, file, line)
37385 +# define _ci_assert2(e, x, y, file, line)
37386 +# define _ci_assert(exp, file, line)
37387 +# define _ci_assert_equal(exp1, exp2, file, line)
37388 +# define _ci_assert_equiv(exp1, exp2, file, line)
37389 +# define _ci_assert_nequal(exp1, exp2, file, line)
37390 +# define _ci_assert_le(exp1, exp2, file, line)
37391 +# define _ci_assert_lt(exp1, exp2, file, line)
37392 +# define _ci_assert_ge(exp1, exp2, file, line)
37393 +# define _ci_assert_gt(exp1, exp2, file, line)
37394 +# define _ci_assert_impl(exp1, exp2, file, line)
37395 +
37396 +# define _ci_verify(exp, file, line) \
37397 + do { \
37398 + (void)(exp); \
37399 + } while (0)
37400 +
37401 +# define CI_DEBUG_TRY(exp) \
37402 + do { \
37403 + (void)(exp); \
37404 + } while (0)
37405 +
37406 +#define CI_TRACE(exp,fmt)
37407 +#define CI_TRACE_INT(integer)
37408 +#define CI_TRACE_INT32(integer)
37409 +#define CI_TRACE_INT64(integer)
37410 +#define CI_TRACE_UINT(integer)
37411 +#define CI_TRACE_UINT32(integer)
37412 +#define CI_TRACE_UINT64(integer)
37413 +#define CI_TRACE_HEX(integer)
37414 +#define CI_TRACE_HEX32(integer)
37415 +#define CI_TRACE_HEX64(integer)
37416 +#define CI_TRACE_PTR(pointer)
37417 +#define CI_TRACE_STRING(string)
37418 +#define CI_TRACE_MAC(mac)
37419 +#define CI_TRACE_IP(ip_be32)
37420 +#define CI_TRACE_ARP(arp_pkt)
37421 +
37422 +#else
37423 +
37424 +# define _CI_ASSERT_FMT "\nfrom %s:%d"
37425 +
37426 +# define _ci_check(exp, file, line) \
37427 + do { \
37428 + if (CI_UNLIKELY(!(exp))) \
37429 + ci_warn(("ci_check(%s)"_CI_ASSERT_FMT, #exp, \
37430 + (file), (line))); \
37431 + } while (0)
37432 +
37433 +/*
37434 + * NOTE: ci_fail() emits the file and line where the assert is actually
37435 + * coded.
37436 + */
37437 +
37438 +# define _ci_assert(exp, file, line) \
37439 + do { \
37440 + if (CI_UNLIKELY(!(exp))) \
37441 + ci_fail(("ci_assert(%s)"_CI_ASSERT_FMT, #exp, \
37442 + (file), (line))); \
37443 + } while (0)
37444 +
37445 +# define _ci_assert2(e, x, y, file, line) do { \
37446 + if(CI_UNLIKELY( ! (e) )) \
37447 + ci_fail(("ci_assert(%s)\nwhere [%s=%"CI_PRIx64"] " \
37448 + "[%s=%"CI_PRIx64"]\nat %s:%d\nfrom %s:%d", #e \
37449 + , #x, (ci_uint64)(ci_uintptr_t)(x) \
37450 + , #y, (ci_uint64)(ci_uintptr_t)(y), \
37451 + __FILE__, __LINE__, (file), (line))); \
37452 + } while (0)
37453 +
37454 +# define _ci_verify(exp, file, line) \
37455 + do { \
37456 + if (CI_UNLIKELY(!(exp))) \
37457 + ci_fail(("ci_verify(%s)"_CI_ASSERT_FMT, #exp, \
37458 + (file), (line))); \
37459 + } while (0)
37460 +
37461 +# define _ci_assert_equal(x, y, f, l) _ci_assert2((x)==(y), x, y, (f), (l))
37462 +# define _ci_assert_nequal(x, y, f, l) _ci_assert2((x)!=(y), x, y, (f), (l))
37463 +# define _ci_assert_le(x, y, f, l) _ci_assert2((x)<=(y), x, y, (f), (l))
37464 +# define _ci_assert_lt(x, y, f, l) _ci_assert2((x)< (y), x, y, (f), (l))
37465 +# define _ci_assert_ge(x, y, f, l) _ci_assert2((x)>=(y), x, y, (f), (l))
37466 +# define _ci_assert_gt(x, y, f, l) _ci_assert2((x)> (y), x, y, (f), (l))
37467 +# define _ci_assert_or(x, y, f, l) _ci_assert2((x)||(y), x, y, (f), (l))
37468 +# define _ci_assert_impl(x, y, f, l) _ci_assert2(!(x) || (y), x, y, (f), (l))
37469 +# define _ci_assert_equiv(x, y, f, l) _ci_assert2(!(x)== !(y), x, y, (f), (l))
37470 +
37471 +#define _ci_assert_equal_msg(exp1, exp2, msg, file, line) \
37472 + do { \
37473 + if (CI_UNLIKELY((exp1)!=(exp2))) \
37474 + ci_fail(("ci_assert_equal_msg(%s == %s) were " \
37475 + "(%"CI_PRIx64":%"CI_PRIx64") with msg[%c%c%c%c]" \
37476 + _CI_ASSERT_FMT, #exp1, #exp2, \
37477 + (ci_uint64)(ci_uintptr_t)(exp1), \
37478 + (ci_uint64)(ci_uintptr_t)(exp2), \
37479 + (((ci_uint32)msg) >> 24) && 0xff, \
37480 + (((ci_uint32)msg) >> 16) && 0xff, \
37481 + (((ci_uint32)msg) >> 8 ) && 0xff, \
37482 + (((ci_uint32)msg) ) && 0xff, \
37483 + (file), (line))); \
37484 + } while (0)
37485 +
37486 +# define CI_DEBUG_TRY(exp) CI_TRY(exp)
37487 +
37488 +#define CI_TRACE(exp,fmt) \
37489 + ci_log("%s:%d:%s] " #exp "=" fmt, \
37490 + __FILE__, __LINE__, __FUNCTION__, (exp))
37491 +
37492 +
37493 +#define CI_TRACE_INT(integer) \
37494 + ci_log("%s:%d:%s] " #integer "=%d", \
37495 + __FILE__, __LINE__, __FUNCTION__, (integer))
37496 +
37497 +
37498 +#define CI_TRACE_INT32(integer) \
37499 + ci_log("%s:%d:%s] " #integer "=%d", \
37500 + __FILE__, __LINE__, __FUNCTION__, ((ci_int32)integer))
37501 +
37502 +
37503 +#define CI_TRACE_INT64(integer) \
37504 + ci_log("%s:%d:%s] " #integer "=%lld", \
37505 + __FILE__, __LINE__, __FUNCTION__, ((ci_int64)integer))
37506 +
37507 +
37508 +#define CI_TRACE_UINT(integer) \
37509 + ci_log("%s:%d:%s] " #integer "=%ud", \
37510 + __FILE__, __LINE__, __FUNCTION__, (integer))
37511 +
37512 +
37513 +#define CI_TRACE_UINT32(integer) \
37514 + ci_log("%s:%d:%s] " #integer "=%ud", \
37515 + __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer))
37516 +
37517 +
37518 +#define CI_TRACE_UINT64(integer) \
37519 + ci_log("%s:%d:%s] " #integer "=%ulld", \
37520 + __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer))
37521 +
37522 +
37523 +#define CI_TRACE_HEX(integer) \
37524 + ci_log("%s:%d:%s] " #integer "=0x%x", \
37525 + __FILE__, __LINE__, __FUNCTION__, (integer))
37526 +
37527 +
37528 +#define CI_TRACE_HEX32(integer) \
37529 + ci_log("%s:%d:%s] " #integer "=0x%x", \
37530 + __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer))
37531 +
37532 +
37533 +#define CI_TRACE_HEX64(integer) \
37534 + ci_log("%s:%d:%s] " #integer "=0x%llx", \
37535 + __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer))
37536 +
37537 +
37538 +#define CI_TRACE_PTR(pointer) \
37539 + ci_log("%s:%d:%s] " #pointer "=0x%p", \
37540 + __FILE__, __LINE__, __FUNCTION__, (pointer))
37541 +
37542 +
37543 +#define CI_TRACE_STRING(string) \
37544 + ci_log("%s:%d:%s] " #string "=%s", \
37545 + __FILE__, __LINE__, __FUNCTION__, (string))
37546 +
37547 +
37548 +#define CI_TRACE_MAC(mac) \
37549 + ci_log("%s:%d:%s] " #mac "=" CI_MAC_PRINTF_FORMAT, \
37550 + __FILE__, __LINE__, __FUNCTION__, CI_MAC_PRINTF_ARGS(mac))
37551 +
37552 +
37553 +#define CI_TRACE_IP(ip_be32) \
37554 + ci_log("%s:%d:%s] " #ip_be32 "=" CI_IP_PRINTF_FORMAT, __FILE__, \
37555 + __LINE__, __FUNCTION__, CI_IP_PRINTF_ARGS(&(ip_be32)))
37556 +
37557 +
37558 +#define CI_TRACE_ARP(arp_pkt) \
37559 + ci_log("%s:%d:%s]\n"CI_ARP_PRINTF_FORMAT, \
37560 + __FILE__, __LINE__, __FUNCTION__, CI_ARP_PRINTF_ARGS(arp_pkt))
37561 +
37562 +#endif /* NDEBUG */
37563 +
37564 +#define ci_check(exp) \
37565 + _ci_check(exp, __FILE__, __LINE__)
37566 +
37567 +#define ci_assert(exp) \
37568 + _ci_assert(exp, __FILE__, __LINE__)
37569 +
37570 +#define ci_verify(exp) \
37571 + _ci_verify(exp, __FILE__, __LINE__)
37572 +
37573 +#define ci_assert_equal(exp1, exp2) \
37574 + _ci_assert_equal(exp1, exp2, __FILE__, __LINE__)
37575 +
37576 +#define ci_assert_equal_msg(exp1, exp2, msg) \
37577 + _ci_assert_equal_msg(exp1, exp2, msg, __FILE__, __LINE__)
37578 +
37579 +#define ci_assert_nequal(exp1, exp2) \
37580 + _ci_assert_nequal(exp1, exp2, __FILE__, __LINE__)
37581 +
37582 +#define ci_assert_le(exp1, exp2) \
37583 + _ci_assert_le(exp1, exp2, __FILE__, __LINE__)
37584 +
37585 +#define ci_assert_lt(exp1, exp2) \
37586 + _ci_assert_lt(exp1, exp2, __FILE__, __LINE__)
37587 +
37588 +#define ci_assert_ge(exp1, exp2) \
37589 + _ci_assert_ge(exp1, exp2, __FILE__, __LINE__)
37590 +
37591 +#define ci_assert_gt(exp1, exp2) \
37592 + _ci_assert_gt(exp1, exp2, __FILE__, __LINE__)
37593 +
37594 +#define ci_assert_impl(exp1, exp2) \
37595 + _ci_assert_impl(exp1, exp2, __FILE__, __LINE__)
37596 +
37597 +#define ci_assert_equiv(exp1, exp2) \
37598 + _ci_assert_equiv(exp1, exp2, __FILE__, __LINE__)
37599 +
37600 +
37601 +#define CI_TEST(exp) \
37602 + do{ \
37603 + if( CI_UNLIKELY(!(exp)) ) \
37604 + ci_fail(("CI_TEST(%s)", #exp)); \
37605 + }while(0)
37606 +
37607 +
37608 +#define CI_TRY(exp) \
37609 + do{ \
37610 + int _trc; \
37611 + _trc=(exp); \
37612 + if( CI_UNLIKELY(_trc < 0) ) \
37613 + ci_sys_fail(#exp, _trc); \
37614 + }while(0)
37615 +
37616 +
37617 +#define CI_TRY_RET(exp) \
37618 + do{ \
37619 + int _trc; \
37620 + _trc=(exp); \
37621 + if( CI_UNLIKELY(_trc < 0) ) { \
37622 + ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__); \
37623 + return _trc; \
37624 + } \
37625 + }while(0)
37626 +
37627 +#define CI_LOGLEVEL_TRY_RET(logfn, exp) \
37628 + do{ \
37629 + int _trc; \
37630 + _trc=(exp); \
37631 + if( CI_UNLIKELY(_trc < 0) ) { \
37632 + logfn (ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__)); \
37633 + return _trc; \
37634 + } \
37635 + }while(0)
37636 +
37637 +
37638 +#define CI_SOCK_TRY(exp) \
37639 + do{ \
37640 + ci_sock_err_t _trc; \
37641 + _trc=(exp); \
37642 + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) \
37643 + ci_sys_fail(#exp, _trc.val); \
37644 + }while(0)
37645 +
37646 +
37647 +#define CI_SOCK_TRY_RET(exp) \
37648 + do{ \
37649 + ci_sock_err_t _trc; \
37650 + _trc=(exp); \
37651 + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \
37652 + ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \
37653 + return ci_sock_errcode(_trc); \
37654 + } \
37655 + }while(0)
37656 +
37657 +
37658 +#define CI_SOCK_TRY_SOCK_RET(exp) \
37659 + do{ \
37660 + ci_sock_err_t _trc; \
37661 + _trc=(exp); \
37662 + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \
37663 + ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \
37664 + return _trc; \
37665 + } \
37666 + }while(0)
37667 +
37668 +#endif /* __CI_TOOLS_DEBUG_H__ */
37669 +
37670 +/*! \cidoxg_end */
37671 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/log.h
37672 ===================================================================
37673 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
37674 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/log.h 2008-02-20 09:32:49.000000000 +0100
37675 @@ -0,0 +1,262 @@
37676 +/****************************************************************************
37677 + * Copyright 2002-2005: Level 5 Networks Inc.
37678 + * Copyright 2005-2008: Solarflare Communications Inc,
37679 + * 9501 Jeronimo Road, Suite 250,
37680 + * Irvine, CA 92618, USA
37681 + *
37682 + * Maintained by Solarflare Communications
37683 + * <linux-xen-drivers@solarflare.com>
37684 + * <onload-dev@solarflare.com>
37685 + *
37686 + * This program is free software; you can redistribute it and/or modify it
37687 + * under the terms of the GNU General Public License version 2 as published
37688 + * by the Free Software Foundation, incorporated herein by reference.
37689 + *
37690 + * This program is distributed in the hope that it will be useful,
37691 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
37692 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37693 + * GNU General Public License for more details.
37694 + *
37695 + * You should have received a copy of the GNU General Public License
37696 + * along with this program; if not, write to the Free Software
37697 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37698 + ****************************************************************************
37699 + */
37700 +
37701 +/*
37702 + * \author djr
37703 + * \brief Functions for logging and pretty-printing.
37704 + * \date 2002/08/07
37705 + */
37706 +
37707 +/*! \cidoxg_include_ci_tools */
37708 +
37709 +#ifndef __CI_TOOLS_LOG_H__
37710 +#define __CI_TOOLS_LOG_H__
37711 +
37712 +#include <stdarg.h>
37713 +
37714 +
37715 +/**********************************************************************
37716 + * Logging.
37717 + */
37718 +
37719 +/* size of internal log buffer */
37720 +#define CI_LOG_MAX_LINE 512
37721 +/* uses of ci_log must ensure that all trace messages are shorter than this */
37722 +#define CI_LOG_MAX_MSG_LENGTH (CI_LOG_MAX_LINE-50)
37723 +
37724 +extern void ci_vlog(const char* fmt, va_list args) CI_HF;
37725 +extern void ci_log(const char* fmt, ...) CI_PRINTF_LIKE(1,2) CI_HF;
37726 +
37727 + /*! Set the prefix for log messages.
37728 + **
37729 + ** Uses the storage pointed to by \em prefix. Therefore \em prefix must
37730 + ** be allocated on the heap, or statically.
37731 + */
37732 +extern void ci_set_log_prefix(const char* prefix) CI_HF;
37733 +
37734 +typedef void (*ci_log_fn_t)(const char* msg);
37735 +extern ci_log_fn_t ci_log_fn CI_HV;
37736 +
37737 +/* Log functions. */
37738 +extern void ci_log_null(const char* msg) CI_HF;
37739 +extern void ci_log_stderr(const char* msg) CI_HF;
37740 +extern void ci_log_stdout(const char* msg) CI_HF;
37741 +extern void ci_log_syslog(const char* msg) CI_HF;
37742 +
37743 +/*! Call the following to install special logging behaviours. */
37744 +extern void ci_log_buffer_till_fail(void) CI_HF;
37745 +extern void ci_log_buffer_till_exit(void) CI_HF;
37746 +
37747 +extern void __ci_log_unique(const char* msg) CI_HF;
37748 +extern ci_log_fn_t __ci_log_unique_fn CI_HV;
37749 +ci_inline void ci_log_uniquify(void) {
37750 + if( ci_log_fn != __ci_log_unique ) {
37751 + __ci_log_unique_fn = ci_log_fn;
37752 + ci_log_fn = __ci_log_unique;
37753 + }
37754 +}
37755 +
37756 +extern void ci_log_file(const char* msg) CI_HF;
37757 +extern int ci_log_file_fd CI_HV;
37758 +
37759 +extern void __ci_log_nth(const char* msg) CI_HF;
37760 +extern ci_log_fn_t __ci_log_nth_fn CI_HV;
37761 +extern int ci_log_nth_n CI_HV; /* default 100 */
37762 +ci_inline void ci_log_nth(void) {
37763 + if( ci_log_fn != __ci_log_nth ) {
37764 + __ci_log_nth_fn = ci_log_fn;
37765 + ci_log_fn = __ci_log_nth;
37766 + }
37767 +}
37768 +
37769 +extern int ci_log_level CI_HV;
37770 +
37771 +extern int ci_log_options CI_HV;
37772 +#define CI_LOG_PID 0x1
37773 +#define CI_LOG_TID 0x2
37774 +#define CI_LOG_TIME 0x4
37775 +#define CI_LOG_DELTA 0x8
37776 +
37777 +/**********************************************************************
37778 + * Used to define which mode we are in
37779 + */
37780 +#if (defined(_WIN32) && !defined(__KERNEL__))
37781 +typedef enum {
37782 + ci_log_md_NULL=0,
37783 + ci_log_md_ioctl,
37784 + ci_log_md_stderr,
37785 + ci_log_md_stdout,
37786 + ci_log_md_file,
37787 + ci_log_md_serial,
37788 + ci_log_md_syslog,
37789 + ci_log_md_pidfile
37790 +} ci_log_mode_t;
37791 +extern ci_log_mode_t ci_log_mode;
37792 +#endif
37793 +
37794 +/**********************************************************************
37795 + * Pretty-printing.
37796 + */
37797 +
37798 +extern char ci_printable_char(char c) CI_HF;
37799 +
37800 +extern void (*ci_hex_dump_formatter)(char* buf, const ci_octet* s,
37801 + int i, int off, int len) CI_HV;
37802 +extern void ci_hex_dump_format_octets(char*,const ci_octet*,int,int,int) CI_HF;
37803 +extern void ci_hex_dump_format_dwords(char*,const ci_octet*,int,int,int) CI_HF;
37804 +
37805 +extern void ci_hex_dump_row(char* buf, volatile const void* s, int len,
37806 + ci_ptr_arith_t address) CI_HF;
37807 + /*!< A row contains up to 16 bytes. Row starts at [address & 15u], so
37808 + ** therefore [len + (address & 15u)] must be <= 16.
37809 + */
37810 +
37811 +extern void ci_hex_dump(ci_log_fn_t, volatile const void*,
37812 + int len, ci_ptr_arith_t address) CI_HF;
37813 +
37814 +extern int ci_hex_dump_to_raw(const char* src_hex, void* buf,
37815 + unsigned* addr_out_opt, int* skip) CI_HF;
37816 + /*!< Recovers raw data from a single line of a hex dump. [buf] must be at
37817 + ** least 16 bytes long. Returns the number of bytes written to [buf] (in
37818 + ** range 1 -> 16), or -1 if [src_hex] doesn't contain hex data. Does not
37819 + ** cope with missing bytes at the start of a line.
37820 + */
37821 +
37822 +extern int ci_format_eth_addr(char* buf, const void* eth_mac_addr,
37823 + char sep) CI_HF;
37824 + /*!< This will write 18 characters to <buf> including terminating null.
37825 + ** Returns number of bytes written excluding null. If [sep] is zero, ':'
37826 + ** is used.
37827 + */
37828 +
37829 +extern int ci_parse_eth_addr(void* eth_mac_addr,
37830 + const char* str, char sep) CI_HF;
37831 + /*!< If [sep] is zero, absolutely any separator is accepted (even
37832 + ** inconsistent separators). Returns 0 on success, -1 on error.
37833 + */
37834 +
37835 +extern int ci_format_ip4_addr(char* buf, unsigned addr_be32) CI_HF;
37836 + /*!< Formats the IP address (in network endian) in dotted-quad. Returns
37837 + ** the number of bytes written (up to 15), excluding the null. [buf]
37838 + ** must be at least 16 bytes long.
37839 + */
37840 +
37841 +
37842 +/**********************************************************************
37843 + * Error checking.
37844 + */
37845 +
37846 +extern void (*ci_fail_stop_fn)(void) CI_HV;
37847 +
37848 +extern void ci_fail_stop(void) CI_HF;
37849 +extern void ci_fail_hang(void) CI_HF;
37850 +extern void ci_fail_bomb(void) CI_HF;
37851 +extern void ci_backtrace(void) CI_HF;
37852 +
37853 +#if defined __linux__ && !defined __KERNEL__
37854 +extern void ci_fail_abort (void) CI_HF;
37855 +#endif
37856 +
37857 +#ifdef __GNUC__
37858 +extern void
37859 +__ci_fail(const char*, ...) CI_PRINTF_LIKE(1,2) CI_HF;
37860 +#else
37861 +# if _PREFAST_
37862 + extern void _declspec(noreturn) __ci_fail(const char* fmt, ...);
37863 +# else
37864 + extern void __ci_fail(const char* fmt, ...);
37865 +# endif
37866 +
37867 +#endif
37868 +
37869 +#define ci_warn(x) \
37870 + do{ ci_log("WARN at %s:%d", __FILE__, __LINE__); }while(0)
37871 +
37872 +#define ci_fail(x) \
37873 + do{ ci_log("FAIL at %s:%d", __FILE__, __LINE__); __ci_fail x; }while(0)
37874 +
37875 +extern void __ci_sys_fail(const char* fn, int rc,
37876 + const char* file, int line) CI_HF;
37877 +#define ci_sys_fail(fn, rc) __ci_sys_fail(fn, rc, __FILE__, __LINE__)
37878 +
37879 +/**********************************************************************
37880 + * Logging to buffer (src/citools/log_buffer.c)
37881 + */
37882 +
37883 +/*! Divert ci_log() messages to the log buffer
37884 + * normally they go to the system console */
37885 +extern void ci_log_buffer_till_fail(void) CI_HF;
37886 +
37887 +/*! Dump the contents of the log buffer to the system console */
37888 +extern void ci_log_buffer_dump(void) CI_HF;
37889 +
37890 +
37891 +/**********************************************************************
37892 + * Some useful pretty-printing.
37893 + */
37894 +
37895 +#ifdef __linux__
37896 +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s%s%s"
37897 +
37898 +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
37899 + (((x) & MSG_OOB ) ? "OOB " :""), \
37900 + (((x) & MSG_PEEK ) ? "PEEK " :""), \
37901 + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \
37902 + (((x) & MSG_EOR ) ? "EOR " :""), \
37903 + (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \
37904 + (((x) & MSG_TRUNC ) ? "TRUNC " :""), \
37905 + (((x) & MSG_WAITALL ) ? "WAITALL " :""), \
37906 + (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \
37907 + (((x) & MSG_NOSIGNAL ) ? "NOSIGNAL " :""), \
37908 + (((x) & MSG_ERRQUEUE ) ? "ERRQUEUE " :""), \
37909 + (((x) & MSG_CONFIRM ) ? "CONFIRM " :"")
37910 +#endif
37911 +
37912 +#ifdef _WIN32
37913 +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s"
37914 +
37915 +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
37916 + (((x) & MSG_OOB ) ? "OOB " :""), \
37917 + (((x) & MSG_PEEK ) ? "PEEK " :""), \
37918 + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :"")
37919 +#endif
37920 +
37921 +#ifdef __sun__
37922 +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s"
37923 +
37924 +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
37925 + (((x) & MSG_OOB ) ? "OOB " :""), \
37926 + (((x) & MSG_PEEK ) ? "PEEK " :""), \
37927 + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \
37928 + (((x) & MSG_EOR ) ? "EOR " :""), \
37929 + (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \
37930 + (((x) & MSG_TRUNC ) ? "TRUNC " :""), \
37931 + (((x) & MSG_WAITALL ) ? "WAITALL " :""), \
37932 + (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \
37933 + (((x) & MSG_NOTIFICATION) ? "NOTIFICATION" :"")
37934 +#endif
37935 +
37936 +#endif /* __CI_TOOLS_LOG_H__ */
37937 +/*! \cidoxg_end */
37938 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h
37939 ===================================================================
37940 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
37941 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h 2008-02-20 09:32:49.000000000 +0100
37942 @@ -0,0 +1,361 @@
37943 +/****************************************************************************
37944 + * Copyright 2002-2005: Level 5 Networks Inc.
37945 + * Copyright 2005-2008: Solarflare Communications Inc,
37946 + * 9501 Jeronimo Road, Suite 250,
37947 + * Irvine, CA 92618, USA
37948 + *
37949 + * Maintained by Solarflare Communications
37950 + * <linux-xen-drivers@solarflare.com>
37951 + * <onload-dev@solarflare.com>
37952 + *
37953 + * This program is free software; you can redistribute it and/or modify it
37954 + * under the terms of the GNU General Public License version 2 as published
37955 + * by the Free Software Foundation, incorporated herein by reference.
37956 + *
37957 + * This program is distributed in the hope that it will be useful,
37958 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
37959 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37960 + * GNU General Public License for more details.
37961 + *
37962 + * You should have received a copy of the GNU General Public License
37963 + * along with this program; if not, write to the Free Software
37964 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
37965 + ****************************************************************************
37966 + */
37967 +
37968 +/*! \cidoxg_include_ci_tools_platform */
37969 +
37970 +#ifndef __CI_TOOLS_GCC_X86_H__
37971 +#define __CI_TOOLS_GCC_X86_H__
37972 +
37973 +
37974 +/**********************************************************************
37975 + * Free-running cycle counters.
37976 + */
37977 +
37978 +#define CI_HAVE_FRC64
37979 +#define CI_HAVE_FRC32
37980 +
37981 +#define ci_frc32(pval) __asm__ __volatile__("rdtsc" : "=a" (*pval) : : "edx")
37982 +
37983 +#if defined(__x86_64__)
37984 +ci_inline void ci_frc64(ci_uint64* pval) {
37985 + /* temp fix until we figure how to get this out in one bite */
37986 + ci_uint64 low, high;
37987 + __asm__ __volatile__("rdtsc" : "=a" (low) , "=d" (high));
37988 + *pval = (high << 32) | low;
37989 +}
37990 +
37991 +#else
37992 +#define ci_frc64(pval) __asm__ __volatile__("rdtsc" : "=A" (*pval))
37993 +#endif
37994 +
37995 +#define ci_frc_flush() /* ?? Need a pipeline barrier. */
37996 +
37997 +
37998 +/**********************************************************************
37999 + * Atomic integer.
38000 + */
38001 +
38002 +/*
38003 +** int ci_atomic_read(a) { return a->n; }
38004 +** void ci_atomic_set(a, v) { a->n = v; }
38005 +** void ci_atomic_inc(a) { ++a->n; }
38006 +** void ci_atomic_dec(a) { --a->n; }
38007 +** int ci_atomic_inc_and_test(a) { return ++a->n == 0; }
38008 +** int ci_atomic_dec_and_test(a) { return --a->n == 0; }
38009 +** void ci_atomic_and(a, v) { a->n &= v; }
38010 +** void ci_atomic_or(a, v) { a->n |= v; }
38011 +*/
38012 +
38013 +typedef struct { volatile ci_int32 n; } ci_atomic_t;
38014 +
38015 +#define CI_ATOMIC_INITIALISER(i) {(i)}
38016 +
38017 +static inline ci_int32 ci_atomic_read(const ci_atomic_t* a) { return a->n; }
38018 +static inline void ci_atomic_set(ci_atomic_t* a, int v) { a->n = v; ci_wmb(); }
38019 +
38020 +static inline void ci_atomic_inc(ci_atomic_t* a)
38021 +{ __asm__ __volatile__("lock; incl %0" : "+m" (a->n)); }
38022 +
38023 +
38024 +static inline void ci_atomic_dec(ci_atomic_t* a)
38025 +{ __asm__ __volatile__("lock; decl %0" : "+m" (a->n)); }
38026 +
38027 +static inline int ci_atomic_inc_and_test(ci_atomic_t* a) {
38028 + char r;
38029 + __asm__ __volatile__("lock; incl %0; sete %1"
38030 + : "+m" (a->n), "=qm" (r));
38031 + return r;
38032 +}
38033 +
38034 +static inline int ci_atomic_dec_and_test(ci_atomic_t* a) {
38035 + char r;
38036 + __asm__ __volatile__("lock; decl %0; sete %1"
38037 + : "+m" (a->n), "=qm" (r));
38038 + return r;
38039 +}
38040 +
38041 +ci_inline int
38042 +ci_atomic_xadd (ci_atomic_t *a, int v) {
38043 + __asm__ ("lock xadd %0, %1" : "=r" (v), "+m" (a->n) : "0" (v));
38044 + return v;
38045 +}
38046 +ci_inline int
38047 +ci_atomic_xchg (ci_atomic_t *a, int v) {
38048 + __asm__ ("lock xchg %0, %1" : "=r" (v), "+m" (a->n) : "0" (v));
38049 + return v;
38050 +}
38051 +
38052 +ci_inline void ci_atomic32_or(volatile ci_uint32* p, ci_uint32 mask)
38053 +{ __asm__ __volatile__("lock; orl %1, %0" : "+m" (*p) : "ir" (mask)); }
38054 +
38055 +ci_inline void ci_atomic32_and(volatile ci_uint32* p, ci_uint32 mask)
38056 +{ __asm__ __volatile__("lock; andl %1, %0" : "+m" (*p) : "ir" (mask)); }
38057 +
38058 +ci_inline void ci_atomic32_add(volatile ci_uint32* p, ci_uint32 v)
38059 +{ __asm__ __volatile__("lock; addl %1, %0" : "+m" (*p) : "ir" (v)); }
38060 +
38061 +#define ci_atomic_or(a, v) ci_atomic32_or ((ci_uint32*) &(a)->n, (v))
38062 +#define ci_atomic_and(a, v) ci_atomic32_and((ci_uint32*) &(a)->n, (v))
38063 +#define ci_atomic_add(a, v) ci_atomic32_add((ci_uint32*) &(a)->n, (v))
38064 +
38065 +extern int ci_glibc_uses_nptl (void) CI_HF;
38066 +extern int ci_glibc_nptl_broken(void) CI_HF;
38067 +extern int ci_glibc_gs_get_is_multihreaded_offset (void) CI_HF;
38068 +extern int ci_glibc_gs_is_multihreaded_offset CI_HV;
38069 +
38070 +#if !defined(__x86_64__)
38071 +#ifdef __GLIBC__
38072 +/* Returns non-zero if the calling process might be mulithreaded, returns 0 if
38073 + * it definitely isn't (i.e. if reimplementing this function for other
38074 + * architectures and platforms, you can safely just return 1).
38075 + */
38076 +static inline int ci_is_multithreaded (void) {
38077 +
38078 + while (1) {
38079 + if (ci_glibc_gs_is_multihreaded_offset >= 0) {
38080 + /* NPTL keeps a variable that tells us this hanging off gs (i.e. in thread-
38081 + * local storage); just return this
38082 + */
38083 + int r;
38084 + __asm__ __volatile__ ("movl %%gs:(%1), %0"
38085 + : "=r" (r)
38086 + : "r" (ci_glibc_gs_is_multihreaded_offset));
38087 + return r;
38088 + }
38089 +
38090 + if (ci_glibc_gs_is_multihreaded_offset == -2) {
38091 + /* This means we've already determined that the libc version is NOT good
38092 + * for our funky "is multithreaded" hack
38093 + */
38094 + return 1;
38095 + }
38096 +
38097 + /* If we get here, it means this is the first time the function has been
38098 + * called -- detect the libc version and go around again.
38099 + */
38100 + ci_glibc_gs_is_multihreaded_offset = ci_glibc_gs_get_is_multihreaded_offset ();
38101 +
38102 + /* Go around again. We do the test here rather than at the top so that we go
38103 + * quicker in the common the case
38104 + */
38105 + }
38106 +}
38107 +
38108 +#else /* def __GLIBC__ */
38109 +
38110 +#define ci_is_multithreaded() 1 /* ?? Is the the POSIX way of finding out */
38111 + /* whether the appication is single */
38112 + /* threaded? */
38113 +
38114 +#endif /* def __GLIBC__ */
38115 +
38116 +#else /* defined __x86_64__ */
38117 +
38118 +static inline int ci_is_multithreaded (void) {
38119 + /* Now easy way to tell on x86_64; so assume we're multithreaded */
38120 + return 1;
38121 +}
38122 +
38123 +#endif /* defined __x86_64__ */
38124 +
38125 +
38126 +/**********************************************************************
38127 + * Compare and swap.
38128 + */
38129 +
38130 +#define CI_HAVE_COMPARE_AND_SWAP
38131 +
38132 +ci_inline int ci_cas32_succeed(volatile ci_int32* p, ci_int32 oldval,
38133 + ci_int32 newval) {
38134 + char ret;
38135 + ci_int32 prevval;
38136 + __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0"
38137 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38138 + : "r"(newval), "a"(oldval));
38139 + return ret;
38140 +}
38141 +
38142 +ci_inline int ci_cas32_fail(volatile ci_int32* p, ci_int32 oldval,
38143 + ci_int32 newval) {
38144 + char ret;
38145 + ci_int32 prevval;
38146 + __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0"
38147 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38148 + : "r"(newval), "a"(oldval));
38149 + return ret;
38150 +}
38151 +
38152 +#ifdef __x86_64__
38153 +ci_inline int ci_cas64_succeed(volatile ci_int64* p, ci_int64 oldval,
38154 + ci_int64 newval) {
38155 + char ret;
38156 + ci_int64 prevval;
38157 + __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0"
38158 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38159 + : "r"(newval), "a"(oldval));
38160 + return ret;
38161 +}
38162 +
38163 +ci_inline int ci_cas64_fail(volatile ci_int64* p, ci_int64 oldval,
38164 + ci_int64 newval) {
38165 + char ret;
38166 + ci_int64 prevval;
38167 + __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0"
38168 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38169 + : "r"(newval), "a"(oldval));
38170 + return ret;
38171 +}
38172 +#endif
38173 +
38174 +ci_inline int ci_cas32u_succeed(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) {
38175 + char ret;
38176 + ci_uint32 prevval;
38177 + __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0"
38178 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38179 + : "r"(newval), "a"(oldval));
38180 + return ret;
38181 +}
38182 +
38183 +ci_inline int ci_cas32u_fail(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) {
38184 + char ret;
38185 + ci_uint32 prevval;
38186 + __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0"
38187 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38188 + : "r"(newval), "a"(oldval));
38189 + return ret;
38190 +}
38191 +
38192 +ci_inline int ci_cas64u_succeed(volatile ci_uint64* p, ci_uint64 oldval,
38193 + ci_uint64 newval) {
38194 + char ret;
38195 + ci_uint64 prevval;
38196 + __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0"
38197 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38198 + : "r"(newval), "a"(oldval));
38199 + return ret;
38200 +}
38201 +
38202 +ci_inline int ci_cas64u_fail(volatile ci_uint64* p, ci_uint64 oldval,
38203 + ci_uint64 newval) {
38204 + char ret;
38205 + ci_uint64 prevval;
38206 + __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0"
38207 + : "=q"(ret), "+m"(*p), "=a"(prevval)
38208 + : "r"(newval), "a"(oldval));
38209 + return ret;
38210 +}
38211 +
38212 +#ifdef __x86_64__
38213 +
38214 +# define ci_cas_uintptr_succeed(p,o,n) \
38215 + ci_cas64u_succeed((volatile ci_uint64*) (p), (o), (n))
38216 +# define ci_cas_uintptr_fail(p,o,n) \
38217 + ci_cas64u_fail((volatile ci_uint64*) (p), (o), (n))
38218 +
38219 +#else
38220 +
38221 +# define ci_cas_uintptr_succeed(p,o,n) \
38222 + ci_cas32u_succeed((volatile ci_uint32*) (p), (o), (n))
38223 +# define ci_cas_uintptr_fail(p,o,n) \
38224 + ci_cas32u_fail((volatile ci_uint32*) (p), (o), (n))
38225 +
38226 +#endif
38227 +
38228 +
38229 +/**********************************************************************
38230 + * Atomic bit field.
38231 + */
38232 +
38233 +typedef ci_uint32 ci_bits;
38234 +#define CI_BITS_N 32u
38235 +
38236 +#define CI_BITS_DECLARE(name, n) \
38237 + ci_bits name[((n) + CI_BITS_N - 1u) / CI_BITS_N]
38238 +
38239 +ci_inline void ci_bits_clear_all(volatile ci_bits* b, int n_bits)
38240 +{ memset((void*) b, 0, (n_bits+CI_BITS_N-1u) / CI_BITS_N * sizeof(ci_bits)); }
38241 +
38242 +ci_inline void ci_bit_set(volatile ci_bits* b, int i) {
38243 + __asm__ __volatile__("lock; btsl %1, %0"
38244 + : "=m" (*b)
38245 + : "Ir" (i));
38246 +}
38247 +
38248 +ci_inline void ci_bit_clear(volatile ci_bits* b, int i) {
38249 + __asm__ __volatile__("lock; btrl %1, %0"
38250 + : "=m" (*b)
38251 + : "Ir" (i));
38252 +}
38253 +
38254 +ci_inline int ci_bit_test(volatile ci_bits* b, int i) {
38255 + char rc;
38256 + __asm__("btl %2, %1; setc %0"
38257 + : "=r" (rc)
38258 + : "m" (*b), "Ir" (i));
38259 + return rc;
38260 +}
38261 +
38262 +ci_inline int ci_bit_test_and_set(volatile ci_bits* b, int i) {
38263 + char rc;
38264 + __asm__ __volatile__("lock; btsl %2, %1; setc %0"
38265 + : "=r" (rc), "+m" (*b)
38266 + : "Ir" (i));
38267 + return rc;
38268 +}
38269 +
38270 +ci_inline int ci_bit_test_and_clear(volatile ci_bits* b, int i) {
38271 + char rc;
38272 + __asm__ __volatile__("lock; btrl %2, %1; setc %0"
38273 + : "=r" (rc), "+m" (*b)
38274 + : "Ir" (i));
38275 + return rc;
38276 +}
38277 +
38278 +/* These mask ops only work within a single ci_bits word. */
38279 +#define ci_bit_mask_set(b,m) ci_atomic32_or((b), (m))
38280 +#define ci_bit_mask_clear(b,m) ci_atomic32_and((b), ~(m))
38281 +
38282 +
38283 +/**********************************************************************
38284 + * Misc.
38285 + */
38286 +
38287 +#if __GNUC__ >= 3
38288 +# define ci_spinloop_pause() __asm__("pause")
38289 +#else
38290 +# define ci_spinloop_pause() __asm__(".byte 0xf3, 0x90")
38291 +#endif
38292 +
38293 +
38294 +#define CI_HAVE_ADDC32
38295 +#define ci_add_carry32(sum, v) __asm__("addl %1, %0 ;" \
38296 + "adcl $0, %0 ;" \
38297 + : "=r" (sum) \
38298 + : "g" ((ci_uint32) v), "0" (sum))
38299 +
38300 +
38301 +#endif /* __CI_TOOLS_GCC_X86_H__ */
38302 +
38303 +/*! \cidoxg_end */
38304 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h
38305 ===================================================================
38306 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
38307 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2008-02-20 09:32:49.000000000 +0100
38308 @@ -0,0 +1,362 @@
38309 +/****************************************************************************
38310 + * Copyright 2002-2005: Level 5 Networks Inc.
38311 + * Copyright 2005-2008: Solarflare Communications Inc,
38312 + * 9501 Jeronimo Road, Suite 250,
38313 + * Irvine, CA 92618, USA
38314 + *
38315 + * Maintained by Solarflare Communications
38316 + * <linux-xen-drivers@solarflare.com>
38317 + * <onload-dev@solarflare.com>
38318 + *
38319 + * This program is free software; you can redistribute it and/or modify it
38320 + * under the terms of the GNU General Public License version 2 as published
38321 + * by the Free Software Foundation, incorporated herein by reference.
38322 + *
38323 + * This program is distributed in the hope that it will be useful,
38324 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
38325 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38326 + * GNU General Public License for more details.
38327 + *
38328 + * You should have received a copy of the GNU General Public License
38329 + * along with this program; if not, write to the Free Software
38330 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
38331 + ****************************************************************************
38332 + */
38333 +
38334 +
38335 +/*! \cidoxg_include_ci_tools_platform */
38336 +
38337 +#ifndef __CI_TOOLS_LINUX_KERNEL_H__
38338 +#define __CI_TOOLS_LINUX_KERNEL_H__
38339 +
38340 +/**********************************************************************
38341 + * Need to know the kernel version.
38342 + */
38343 +
38344 +#ifndef LINUX_VERSION_CODE
38345 +# include <linux/version.h>
38346 +# ifndef UTS_RELEASE
38347 + /* 2.6.18 onwards defines UTS_RELEASE in a separate header */
38348 +# include <linux/utsrelease.h>
38349 +# endif
38350 +#endif
38351 +
38352 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) || \
38353 + LINUX_VERSION_CODE >= KERNEL_VERSION(2,7,0)
38354 +# error "Linux 2.6 required"
38355 +#endif
38356 +
38357 +
38358 +#include <linux/slab.h> /* kmalloc / kfree */
38359 +#include <linux/vmalloc.h> /* vmalloc / vfree */
38360 +#include <linux/interrupt.h>/* in_interrupt() */
38361 +#include <linux/in.h>
38362 +#include <linux/in6.h>
38363 +#include <linux/spinlock.h>
38364 +#include <linux/highmem.h>
38365 +#include <linux/smp_lock.h>
38366 +#include <linux/ctype.h>
38367 +#include <linux/uio.h>
38368 +#include <asm/current.h>
38369 +#include <asm/errno.h>
38370 +#include <asm/kmap_types.h>
38371 +#include <asm/semaphore.h>
38372 +
38373 +#include <ci/tools/config.h>
38374 +
38375 +#define ci_in_irq in_irq
38376 +#define ci_in_interrupt in_interrupt
38377 +#define ci_in_atomic in_atomic
38378 +
38379 +
38380 +/**********************************************************************
38381 + * Misc stuff.
38382 + */
38383 +
38384 +#ifdef BUG
38385 +# define CI_BOMB BUG
38386 +#endif
38387 +
38388 +ci_inline void* __ci_alloc(size_t n)
38389 +{ return kmalloc(n, (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)); }
38390 +
38391 +ci_inline void* __ci_atomic_alloc(size_t n)
38392 +{ return kmalloc(n, GFP_ATOMIC ); }
38393 +
38394 +ci_inline void __ci_free(void* p) { return kfree(p); }
38395 +ci_inline void* __ci_vmalloc(size_t n) { return vmalloc(n); }
38396 +ci_inline void __ci_vfree(void* p) { return vfree(p); }
38397 +
38398 +
38399 +#if CI_MEMLEAK_DEBUG_ALLOC_TABLE
38400 + #define ci_alloc(s) ci_alloc_memleak_debug (s, __FILE__, __LINE__)
38401 + #define ci_atomic_alloc(s) ci_atomic_alloc_memleak_debug(s, __FILE__, __LINE__)
38402 + #define ci_free ci_free_memleak_debug
38403 + #define ci_vmalloc(s) ci_vmalloc_memleak_debug (s, __FILE__,__LINE__)
38404 + #define ci_vfree ci_vfree_memleak_debug
38405 + #define ci_alloc_fn ci_alloc_fn_memleak_debug
38406 + #define ci_vmalloc_fn ci_vmalloc_fn_memleak_debug
38407 +#else /* !CI_MEMLEAK_DEBUG_ALLOC_TABLE */
38408 + #define ci_alloc_fn __ci_alloc
38409 + #define ci_vmalloc_fn __ci_vmalloc
38410 +#endif
38411 +
38412 +#ifndef ci_alloc
38413 + #define ci_atomic_alloc __ci_atomic_alloc
38414 + #define ci_alloc __ci_alloc
38415 + #define ci_free __ci_free
38416 + #define ci_vmalloc __ci_vmalloc
38417 + #define ci_vmalloc_fn __ci_vmalloc
38418 + #define ci_vfree __ci_vfree
38419 +#endif
38420 +
38421 +#define ci_sprintf sprintf
38422 +#define ci_vsprintf vsprintf
38423 +#define ci_snprintf snprintf
38424 +#define ci_vsnprintf vsnprintf
38425 +#define ci_sscanf sscanf
38426 +
38427 +
38428 +#define CI_LOG_FN_DEFAULT ci_log_syslog
38429 +
38430 +
38431 +/*--------------------------------------------------------------------
38432 + *
38433 + * irqs_disabled - needed for kmap helpers on some kernels
38434 + *
38435 + *--------------------------------------------------------------------*/
38436 +#ifdef irqs_disabled
38437 +# define ci_irqs_disabled irqs_disabled
38438 +#else
38439 +# if defined(__i386__) | defined(__x86_64__)
38440 +# define ci_irqs_disabled(x) \
38441 + ({ \
38442 + unsigned long flags; \
38443 + local_save_flags(flags); \
38444 + !(flags & (1<<9)); \
38445 + })
38446 +# else
38447 +# error "Need to implement irqs_disabled() for your architecture"
38448 +# endif
38449 +#endif
38450 +
38451 +
38452 +/**********************************************************************
38453 + * kmap helpers.
38454 + *
38455 + * Use ci_k(un)map for code paths which are not in an atomic context.
38456 + * For atomic code you need to use ci_k(un)map_in_atomic. This will grab
38457 + * one of the per-CPU kmap slots.
38458 + *
38459 + * NB in_interrupt != in_irq. If you don't know the difference then
38460 + * don't use kmap_in_atomic
38461 + *
38462 + * 2.4 allocates kmap slots by function. We are going to re-use the
38463 + * skb module's slot - we also use the same interlock
38464 + *
38465 + * 2.6 allocates kmap slots by type as well as by function. We are
38466 + * going to use the currently (2.6.10) unsused SOFTIRQ slot
38467 + *
38468 + */
38469 +
38470 +ci_inline void* ci_kmap(struct page *page) {
38471 + CI_DEBUG(if( ci_in_atomic() | ci_in_interrupt() | ci_in_irq() ) BUG());
38472 + return kmap(page);
38473 +}
38474 +
38475 +ci_inline void ci_kunmap(struct page *page) {
38476 + kunmap(page);
38477 +}
38478 +
38479 +#define CI_KM_SLOT KM_SOFTIRQ0
38480 +
38481 +
38482 +typedef struct semaphore ci_semaphore_t;
38483 +
38484 +ci_inline void
38485 +ci_sem_init (ci_semaphore_t *sem, int val) {
38486 + sema_init (sem, val);
38487 +}
38488 +
38489 +ci_inline void
38490 +ci_sem_down (ci_semaphore_t *sem) {
38491 + down (sem);
38492 +}
38493 +
38494 +ci_inline int
38495 +ci_sem_trydown (ci_semaphore_t *sem) {
38496 + return down_trylock (sem);
38497 +}
38498 +
38499 +ci_inline void
38500 +ci_sem_up (ci_semaphore_t *sem) {
38501 + up (sem);
38502 +}
38503 +
38504 +ci_inline int
38505 +ci_sem_get_count(ci_semaphore_t *sem) {
38506 + return sem->count.counter;
38507 +}
38508 +
38509 +ci_inline void* ci_kmap_in_atomic(struct page *page)
38510 +{
38511 + CI_DEBUG(if( ci_in_irq() ) BUG());
38512 +
38513 + /* iSCSI can call without in_interrupt() but with irqs_disabled()
38514 + and in a context that can't sleep, so we need to check that
38515 + too */
38516 + if(ci_in_interrupt() || ci_irqs_disabled())
38517 + return kmap_atomic(page, CI_KM_SLOT);
38518 + else
38519 + return kmap(page);
38520 +}
38521 +
38522 +ci_inline void ci_kunmap_in_atomic(struct page *page, void* kaddr)
38523 +{
38524 + CI_DEBUG(if( ci_in_irq() ) BUG());
38525 +
38526 + /* iSCSI can call without in_interrupt() but with irqs_disabled()
38527 + and in a context that can't sleep, so we need to check that
38528 + too */
38529 + if(ci_in_interrupt() || ci_irqs_disabled())
38530 + kunmap_atomic(kaddr, CI_KM_SLOT);
38531 + else
38532 + kunmap(page);
38533 +}
38534 +
38535 +/**********************************************************************
38536 + * spinlock implementation: used by <ci/tools/spinlock.h>
38537 + */
38538 +
38539 +#define CI_HAVE_SPINLOCKS
38540 +
38541 +typedef ci_uintptr_t ci_lock_holder_t;
38542 +#define ci_lock_thisthread (ci_lock_holder_t)current
38543 +#define ci_lock_no_holder (ci_lock_holder_t)NULL
38544 +
38545 +typedef spinlock_t ci_lock_i;
38546 +typedef spinlock_t ci_irqlock_i;
38547 +typedef unsigned long ci_irqlock_state_t;
38548 +
38549 +#define IRQLOCK_CYCLES 500000
38550 +
38551 +#define ci_lock_ctor_i(l) spin_lock_init(l)
38552 +#define ci_lock_dtor_i(l) do{}while(0)
38553 +#define ci_lock_lock_i(l) spin_lock(l)
38554 +#define ci_lock_trylock_i(l) spin_trylock(l)
38555 +#define ci_lock_unlock_i(l) spin_unlock(l)
38556 +
38557 +#define ci_irqlock_ctor_i(l) spin_lock_init(l)
38558 +#define ci_irqlock_dtor_i(l) do{}while(0)
38559 +#define ci_irqlock_lock_i(l,s) spin_lock_irqsave(l,*(s))
38560 +#define ci_irqlock_unlock_i(l,s) spin_unlock_irqrestore(l, *(s))
38561 +
38562 +
38563 +/**********************************************************************
38564 + * register access
38565 + */
38566 +
38567 +#include <asm/io.h>
38568 +
38569 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
38570 +typedef volatile void __iomem* ioaddr_t;
38571 +#else
38572 +typedef unsigned long ioaddr_t;
38573 +#endif
38574 +
38575 +
38576 +
38577 +/**********************************************************************
38578 + * thread implementation -- kernel dependancies probably should be
38579 + * moved to driver/linux_kernel.h
38580 + */
38581 +
38582 +#define ci_linux_daemonize(name) daemonize(name)
38583 +
38584 +#include <linux/workqueue.h>
38585 +
38586 +
38587 +typedef struct {
38588 + void* (*fn)(void* arg);
38589 + void* arg;
38590 + const char* name;
38591 + int thrd_id;
38592 + struct completion exit_event;
38593 + struct work_struct keventd_witem;
38594 +} ci_kernel_thread_t;
38595 +
38596 +
38597 +typedef ci_kernel_thread_t* cithread_t;
38598 +
38599 +
38600 +extern int cithread_create(cithread_t* tid, void* (*fn)(void*), void* arg,
38601 + const char* name);
38602 +extern int cithread_detach(cithread_t kt);
38603 +extern int cithread_join(cithread_t kt);
38604 +
38605 +
38606 +/* Kernel sysctl variables. */
38607 +extern int sysctl_tcp_wmem[3];
38608 +extern int sysctl_tcp_rmem[3];
38609 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
38610 +#define LINUX_HAS_SYSCTL_MEM_MAX
38611 +extern ci_uint32 sysctl_wmem_max;
38612 +extern ci_uint32 sysctl_rmem_max;
38613 +#endif
38614 +
38615 +
38616 +/*--------------------------------------------------------------------
38617 + *
38618 + * ci_bigbuf_t: An abstraction of a large buffer. Needed because in the
38619 + * Linux kernel, large buffers need to be allocated with vmalloc(), whereas
38620 + * smaller buffers should use kmalloc(). This abstraction chooses the
38621 + * appropriate mechansim.
38622 + *
38623 + *--------------------------------------------------------------------*/
38624 +
38625 +typedef struct {
38626 + char* p;
38627 + int is_vmalloc;
38628 +} ci_bigbuf_t;
38629 +
38630 +
38631 +ci_inline int ci_bigbuf_alloc(ci_bigbuf_t* bb, size_t bytes) {
38632 + if( bytes >= CI_PAGE_SIZE && ! ci_in_atomic() ) {
38633 + bb->is_vmalloc = 1;
38634 + if( (bb->p = vmalloc(bytes)) ) return 0;
38635 + }
38636 + bb->is_vmalloc = 0;
38637 + bb->p = kmalloc(bytes, ci_in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
38638 + return bb->p ? 0 : -ENOMEM;
38639 +}
38640 +
38641 +ci_inline void ci_bigbuf_free(ci_bigbuf_t* bb) {
38642 + if( bb->is_vmalloc ) vfree(bb->p);
38643 + else kfree(bb->p);
38644 +}
38645 +
38646 +ci_inline char* ci_bigbuf_ptr(ci_bigbuf_t* bb)
38647 +{ return bb->p; }
38648 +
38649 +/**********************************************************************
38650 + * struct iovec abstraction (for Windows port)
38651 + */
38652 +
38653 +typedef struct iovec ci_iovec;
38654 +
38655 +/* Accessors for buffer/length */
38656 +#define CI_IOVEC_BASE(i) ((i)->iov_base)
38657 +#define CI_IOVEC_LEN(i) ((i)->iov_len)
38658 +
38659 +/**********************************************************************
38660 + * Signals
38661 + */
38662 +
38663 +ci_inline void
38664 +ci_send_sig(int signum)
38665 +{
38666 + send_sig(signum, current, 0);
38667 +}
38668 +
38669 +#endif /* __CI_TOOLS_LINUX_KERNEL_H__ */
38670 +/*! \cidoxg_end */
38671 Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/sysdep.h
38672 ===================================================================
38673 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
38674 +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/sysdep.h 2008-02-20 09:32:49.000000000 +0100
38675 @@ -0,0 +1,132 @@
38676 +/****************************************************************************
38677 + * Copyright 2002-2005: Level 5 Networks Inc.
38678 + * Copyright 2005-2008: Solarflare Communications Inc,
38679 + * 9501 Jeronimo Road, Suite 250,
38680 + * Irvine, CA 92618, USA
38681 + *
38682 + * Maintained by Solarflare Communications
38683 + * <linux-xen-drivers@solarflare.com>
38684 + * <onload-dev@solarflare.com>
38685 + *
38686 + * This program is free software; you can redistribute it and/or modify it
38687 + * under the terms of the GNU General Public License version 2 as published
38688 + * by the Free Software Foundation, incorporated herein by reference.
38689 + *
38690 + * This program is distributed in the hope that it will be useful,
38691 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
38692 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38693 + * GNU General Public License for more details.
38694 + *
38695 + * You should have received a copy of the GNU General Public License
38696 + * along with this program; if not, write to the Free Software
38697 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
38698 + ****************************************************************************
38699 + */
38700 +
38701 +/*! \cidoxg_include_ci_tools */
38702 +
38703 +#ifndef __CI_TOOLS_SYSDEP_H__
38704 +#define __CI_TOOLS_SYSDEP_H__
38705 +
38706 +/* Make this header self-sufficient */
38707 +#include <ci/compat.h>
38708 +#include <ci/tools/log.h>
38709 +#include <ci/tools/debug.h>
38710 +
38711 +
38712 +/**********************************************************************
38713 + * Platform dependencies.
38714 + */
38715 +
38716 +#if defined(__KERNEL__)
38717 +
38718 +# if defined(__linux__)
38719 +# include <ci/tools/platform/linux_kernel.h>
38720 +# elif defined(_WIN32)
38721 +# include <ci/tools/platform/win32_kernel.h>
38722 +# elif defined(__sun__)
38723 +# include <ci/tools/platform/sunos_kernel.h>
38724 +# else
38725 +# error Unknown platform.
38726 +# endif
38727 +
38728 +#elif defined(_WIN32)
38729 +
38730 +# include <ci/tools/platform/win32.h>
38731 +
38732 +#elif defined(__unix__)
38733 +
38734 +# include <ci/tools/platform/unix.h>
38735 +
38736 +#else
38737 +
38738 +# error Unknown platform.
38739 +
38740 +#endif
38741 +
38742 +#if defined(__linux__)
38743 +/*! Linux sendfile() support enable/disable. */
38744 +# define CI_HAVE_SENDFILE /* provide sendfile i/f */
38745 +
38746 +# define CI_HAVE_OS_NOPAGE
38747 +#endif
38748 +
38749 +#if defined(__sun__)
38750 +# define CI_HAVE_SENDFILE /* provide sendfile i/f */
38751 +# define CI_HAVE_SENDFILEV /* provide sendfilev i/f */
38752 +
38753 +# define CI_IOCTL_SENDFILE /* use efrm CI_SENDFILEV ioctl */
38754 +#endif
38755 +
38756 +#if defined(_WIN32)
38757 +typedef ci_uint32 ci_uerr_t; /* range of OS user-mode return codes */
38758 +typedef ci_uint32 ci_kerr_t; /* range of OS kernel-mode return codes */
38759 +#elif defined(__unix__)
38760 +typedef ci_int32 ci_uerr_t; /* range of OS user-mode return codes */
38761 +typedef ci_int32 ci_kerr_t; /* range of OS kernel-mode return codes */
38762 +#endif
38763 +
38764 +
38765 +/**********************************************************************
38766 + * Compiler and processor dependencies.
38767 + */
38768 +
38769 +#if defined(__GNUC__)
38770 +
38771 +#if defined(__i386__) || defined(__x86_64__)
38772 +# include <ci/tools/platform/gcc_x86.h>
38773 +#elif defined(__PPC__)
38774 +# include <ci/tools/platform/gcc_ppc.h>
38775 +#elif defined(__ia64__)
38776 +# include <ci/tools/platform/gcc_ia64.h>
38777 +#else
38778 +# error Unknown processor.
38779 +#endif
38780 +
38781 +#elif defined(_MSC_VER)
38782 +
38783 +#if defined(__i386__)
38784 +# include <ci/tools/platform/msvc_x86.h>
38785 +# elif defined(__x86_64__)
38786 +# include <ci/tools/platform/msvc_x86_64.h>
38787 +#else
38788 +# error Unknown processor.
38789 +#endif
38790 +
38791 +#elif defined(__PGI)
38792 +
38793 +# include <ci/tools/platform/pg_x86.h>
38794 +
38795 +#elif defined(__INTEL_COMPILER)
38796 +
38797 +/* Intel compilers v7 claim to be very gcc compatible. */
38798 +# include <ci/tools/platform/gcc_x86.h>
38799 +
38800 +#else
38801 +# error Unknown compiler.
38802 +#endif
38803 +
38804 +
38805 +#endif /* __CI_TOOLS_SYSDEP_H__ */
38806 +
38807 +/*! \cidoxg_end */
38808 Index: head-2008-11-25/drivers/xen/sfc_netfront/Makefile
38809 ===================================================================
38810 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
38811 +++ head-2008-11-25/drivers/xen/sfc_netfront/Makefile 2008-02-26 10:54:11.000000000 +0100
38812 @@ -0,0 +1,11 @@
38813 +EXTRA_CFLAGS += -Idrivers/xen/sfc_netfront -Idrivers/xen/sfc_netutil -Idrivers/xen/netfront
38814 +EXTRA_CFLAGS += -D__ci_driver__
38815 +EXTRA_CFLAGS += -Werror
38816 +
38817 +ifdef GCOV
38818 +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
38819 +endif
38820 +
38821 +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) := sfc_netfront.o
38822 +
38823 +sfc_netfront-objs := accel_msg.o accel_bufs.o accel_netfront.o accel_vi.o accel_xenbus.o accel_tso.o accel_ssr.o accel_debugfs.o falcon_event.o falcon_vi.o pt_tx.o vi_init.o
38824 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel.h
38825 ===================================================================
38826 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
38827 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel.h 2008-02-26 10:54:11.000000000 +0100
38828 @@ -0,0 +1,477 @@
38829 +/****************************************************************************
38830 + * Solarflare driver for Xen network acceleration
38831 + *
38832 + * Copyright 2006-2008: Solarflare Communications Inc,
38833 + * 9501 Jeronimo Road, Suite 250,
38834 + * Irvine, CA 92618, USA
38835 + *
38836 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
38837 + *
38838 + * This program is free software; you can redistribute it and/or modify it
38839 + * under the terms of the GNU General Public License version 2 as published
38840 + * by the Free Software Foundation, incorporated herein by reference.
38841 + *
38842 + * This program is distributed in the hope that it will be useful,
38843 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
38844 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38845 + * GNU General Public License for more details.
38846 + *
38847 + * You should have received a copy of the GNU General Public License
38848 + * along with this program; if not, write to the Free Software
38849 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
38850 + ****************************************************************************
38851 + */
38852 +
38853 +#ifndef NETFRONT_ACCEL_H
38854 +#define NETFRONT_ACCEL_H
38855 +
38856 +#include "accel_msg_iface.h"
38857 +#include "accel_cuckoo_hash.h"
38858 +#include "accel_bufs.h"
38859 +
38860 +#include "etherfabric/ef_vi.h"
38861 +
38862 +#include <xen/xenbus.h>
38863 +#include <xen/evtchn.h>
38864 +
38865 +#include <linux/kernel.h>
38866 +#include <linux/list.h>
38867 +
38868 +enum netfront_accel_post_status {
38869 + NETFRONT_ACCEL_STATUS_GOOD,
38870 + NETFRONT_ACCEL_STATUS_BUSY,
38871 + NETFRONT_ACCEL_STATUS_CANT
38872 +};
38873 +
38874 +#define NETFRONT_ACCEL_STATS 1
38875 +#if NETFRONT_ACCEL_STATS
38876 +#define NETFRONT_ACCEL_STATS_OP(x) x
38877 +#else
38878 +#define NETFRONT_ACCEL_STATS_OP(x)
38879 +#endif
38880 +
38881 +
38882 +enum netfront_accel_msg_state {
38883 + NETFRONT_ACCEL_MSG_NONE = 0,
38884 + NETFRONT_ACCEL_MSG_HELLO = 1,
38885 + NETFRONT_ACCEL_MSG_HW = 2
38886 +};
38887 +
38888 +
38889 +typedef struct {
38890 + u32 in_progress;
38891 + u32 total_len;
38892 + struct sk_buff *skb;
38893 +} netfront_accel_jumbo_state;
38894 +
38895 +
38896 +struct netfront_accel_ssr_state {
38897 + /** List of tracked connections. */
38898 + struct list_head conns;
38899 +
38900 + /** Free efx_ssr_conn instances. */
38901 + struct list_head free_conns;
38902 +};
38903 +
38904 +
38905 +struct netfront_accel_netdev_stats {
38906 + /* Fastpath stats. */
38907 + u32 fastpath_rx_pkts;
38908 + u32 fastpath_rx_bytes;
38909 + u32 fastpath_rx_errors;
38910 + u32 fastpath_tx_pkts;
38911 + u32 fastpath_tx_bytes;
38912 + u32 fastpath_tx_errors;
38913 +};
38914 +
38915 +
38916 +struct netfront_accel_netdev_dbfs {
38917 + struct dentry *fastpath_rx_pkts;
38918 + struct dentry *fastpath_rx_bytes;
38919 + struct dentry *fastpath_rx_errors;
38920 + struct dentry *fastpath_tx_pkts;
38921 + struct dentry *fastpath_tx_bytes;
38922 + struct dentry *fastpath_tx_errors;
38923 +};
38924 +
38925 +
38926 +struct netfront_accel_stats {
38927 + /** Fast path events */
38928 + u64 fastpath_tx_busy;
38929 +
38930 + /** TX DMA queue status */
38931 + u64 fastpath_tx_completions;
38932 +
38933 + /** The number of events processed. */
38934 + u64 event_count;
38935 +
38936 + /** Number of frame trunc events seen on fastpath */
38937 + u64 fastpath_frm_trunc;
38938 +
38939 + /** Number of no rx descriptor trunc events seen on fastpath */
38940 + u64 rx_no_desc_trunc;
38941 +
38942 + /** The number of misc bad events (e.g. RX_DISCARD) processed. */
38943 + u64 bad_event_count;
38944 +
38945 + /** Number of events dealt with in poll loop */
38946 + u32 events_per_poll_max;
38947 + u32 events_per_poll_tx_max;
38948 + u32 events_per_poll_rx_max;
38949 +
38950 + /** Largest number of concurrently outstanding tx descriptors */
38951 + u32 fastpath_tx_pending_max;
38952 +
38953 + /** The number of events since the last interrupts. */
38954 + u32 event_count_since_irq;
38955 +
38956 + /** The max number of events between interrupts. */
38957 + u32 events_per_irq_max;
38958 +
38959 + /** The number of interrupts. */
38960 + u64 irq_count;
38961 +
38962 + /** The number of useless interrupts. */
38963 + u64 useless_irq_count;
38964 +
38965 + /** The number of polls scheduled. */
38966 + u64 poll_schedule_count;
38967 +
38968 + /** The number of polls called. */
38969 + u64 poll_call_count;
38970 +
38971 + /** The number of rechecks. */
38972 + u64 poll_reschedule_count;
38973 +
38974 + /** Number of times we've called netif_stop_queue/netif_wake_queue */
38975 + u64 queue_stops;
38976 + u64 queue_wakes;
38977 +
38978 + /** SSR stats */
38979 + u64 ssr_bursts;
38980 + u64 ssr_drop_stream;
38981 + u64 ssr_misorder;
38982 + u64 ssr_slow_start;
38983 + u64 ssr_merges;
38984 + u64 ssr_too_many;
38985 + u64 ssr_new_stream;
38986 +};
38987 +
38988 +
38989 +struct netfront_accel_dbfs {
38990 + struct dentry *fastpath_tx_busy;
38991 + struct dentry *fastpath_tx_completions;
38992 + struct dentry *fastpath_tx_pending_max;
38993 + struct dentry *fastpath_frm_trunc;
38994 + struct dentry *rx_no_desc_trunc;
38995 + struct dentry *event_count;
38996 + struct dentry *bad_event_count;
38997 + struct dentry *events_per_poll_max;
38998 + struct dentry *events_per_poll_rx_max;
38999 + struct dentry *events_per_poll_tx_max;
39000 + struct dentry *event_count_since_irq;
39001 + struct dentry *events_per_irq_max;
39002 + struct dentry *irq_count;
39003 + struct dentry *useless_irq_count;
39004 + struct dentry *poll_schedule_count;
39005 + struct dentry *poll_call_count;
39006 + struct dentry *poll_reschedule_count;
39007 + struct dentry *queue_stops;
39008 + struct dentry *queue_wakes;
39009 + struct dentry *ssr_bursts;
39010 + struct dentry *ssr_drop_stream;
39011 + struct dentry *ssr_misorder;
39012 + struct dentry *ssr_slow_start;
39013 + struct dentry *ssr_merges;
39014 + struct dentry *ssr_too_many;
39015 + struct dentry *ssr_new_stream;
39016 +};
39017 +
39018 +
39019 +typedef struct netfront_accel_vnic {
39020 + struct netfront_accel_vnic *next;
39021 +
39022 + struct mutex vnic_mutex;
39023 +
39024 + spinlock_t tx_lock;
39025 +
39026 + struct netfront_accel_bufpages bufpages;
39027 + struct netfront_accel_bufinfo *rx_bufs;
39028 + struct netfront_accel_bufinfo *tx_bufs;
39029 +
39030 + /** Hardware & VI state */
39031 + ef_vi vi;
39032 +
39033 + ef_vi_state *vi_state;
39034 +
39035 + ef_eventq_state evq_state;
39036 +
39037 + void *evq_mapping;
39038 +
39039 + /** Hardware dependant state */
39040 + union {
39041 + struct {
39042 + /** Falcon A or B */
39043 + enum net_accel_hw_type type;
39044 + u32 *evq_rptr;
39045 + u32 *doorbell;
39046 + void *evq_rptr_mapping;
39047 + void *doorbell_mapping;
39048 + void *txdmaq_mapping;
39049 + void *rxdmaq_mapping;
39050 + } falcon;
39051 + } hw;
39052 +
39053 + /** RX DMA queue status */
39054 + u32 rx_dma_level;
39055 +
39056 + /** Number of RX descriptors waiting to be pushed to the card. */
39057 + u32 rx_dma_batched;
39058 +#define NETFRONT_ACCEL_RX_DESC_BATCH 16
39059 +
39060 + /**
39061 + * Hash table of remote mac addresses to decide whether to try
39062 + * fast path
39063 + */
39064 + cuckoo_hash_table fastpath_table;
39065 + spinlock_t table_lock;
39066 +
39067 + /** the local mac address of virtual interface we're accelerating */
39068 + u8 mac[ETH_ALEN];
39069 +
39070 + int rx_pkt_stride;
39071 + int rx_skb_stride;
39072 +
39073 + /**
39074 + * Keep track of fragments of jumbo packets as events are
39075 + * delivered by NIC
39076 + */
39077 + netfront_accel_jumbo_state jumbo_state;
39078 +
39079 + struct net_device *net_dev;
39080 +
39081 + /** These two gate the enabling of fast path operations */
39082 + int frontend_ready;
39083 + int backend_netdev_up;
39084 +
39085 + int irq_enabled;
39086 + spinlock_t irq_enabled_lock;
39087 +
39088 + int tx_enabled;
39089 +
39090 + int poll_enabled;
39091 +
39092 + /** A spare slot for a TX packet. This is treated as an extension
39093 + * of the DMA queue. */
39094 + struct sk_buff *tx_skb;
39095 +
39096 + /** Keep track of fragments of SSR packets */
39097 + struct netfront_accel_ssr_state ssr_state;
39098 +
39099 + struct xenbus_device *dev;
39100 +
39101 + /** Event channel for messages */
39102 + int msg_channel;
39103 + int msg_channel_irq;
39104 +
39105 + /** Event channel for network interrupts. */
39106 + int net_channel;
39107 + int net_channel_irq;
39108 +
39109 + struct net_accel_shared_page *shared_page;
39110 +
39111 + grant_ref_t ctrl_page_gnt;
39112 + grant_ref_t msg_page_gnt;
39113 +
39114 + /** Message Qs, 1 each way. */
39115 + sh_msg_fifo2 to_dom0;
39116 + sh_msg_fifo2 from_dom0;
39117 +
39118 + enum netfront_accel_msg_state msg_state;
39119 +
39120 + /** Watch on accelstate */
39121 + struct xenbus_watch backend_accel_watch;
39122 + /** Watch on frontend's MAC address */
39123 + struct xenbus_watch mac_address_watch;
39124 +
39125 + /** Work to process received irq/msg */
39126 + struct work_struct msg_from_bend;
39127 +
39128 + /** Wait queue for changes in accelstate. */
39129 + wait_queue_head_t state_wait_queue;
39130 +
39131 + /** The current accelstate of this driver. */
39132 + XenbusState frontend_state;
39133 +
39134 + /** The most recent accelstate seen by the xenbus watch. */
39135 + XenbusState backend_state;
39136 +
39137 + /** Non-zero if we should reject requests to connect. */
39138 + int removing;
39139 +
39140 + /** Non-zero if the domU shared state has been initialised. */
39141 + int domU_state_is_setup;
39142 +
39143 + /** Non-zero if the dom0 shared state has been initialised. */
39144 + int dom0_state_is_setup;
39145 +
39146 + /* Those statistics that are added to the netdev stats */
39147 + struct netfront_accel_netdev_stats netdev_stats;
39148 + struct netfront_accel_netdev_stats stats_last_read;
39149 +#ifdef CONFIG_DEBUG_FS
39150 + struct netfront_accel_netdev_dbfs netdev_dbfs;
39151 +#endif
39152 +
39153 + /* These statistics are internal and optional */
39154 +#if NETFRONT_ACCEL_STATS
39155 + struct netfront_accel_stats stats;
39156 +#ifdef CONFIG_DEBUG_FS
39157 + struct netfront_accel_dbfs dbfs;
39158 +#endif
39159 +#endif
39160 +
39161 + /** Debufs fs dir for this interface */
39162 + struct dentry *dbfs_dir;
39163 +} netfront_accel_vnic;
39164 +
39165 +
39166 +/* Module parameters */
39167 +extern unsigned sfc_netfront_max_pages;
39168 +extern unsigned sfc_netfront_buffer_split;
39169 +
39170 +extern const char *frontend_name;
39171 +extern struct netfront_accel_hooks accel_hooks;
39172 +extern struct workqueue_struct *netfront_accel_workqueue;
39173 +
39174 +
39175 +extern
39176 +void netfront_accel_vi_ctor(netfront_accel_vnic *vnic);
39177 +
39178 +extern
39179 +int netfront_accel_vi_init(netfront_accel_vnic *vnic,
39180 + struct net_accel_msg_hw *hw_msg);
39181 +
39182 +extern
39183 +void netfront_accel_vi_dtor(netfront_accel_vnic *vnic);
39184 +
39185 +
39186 +/**
39187 + * Add new buffers which have been registered with the NIC.
39188 + *
39189 + * @v vnic The vnic instance to process the response.
39190 + *
39191 + * The buffers contained in the message are added to the buffer pool.
39192 + */
39193 +extern
39194 +void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx);
39195 +
39196 +/**
39197 + * Put a packet on the tx DMA queue.
39198 + *
39199 + * @v vnic The vnic instance to accept the packet.
39200 + * @v skb A sk_buff to send.
39201 + *
39202 + * Attempt to send a packet. On success, the skb is owned by the DMA
39203 + * queue and will be released when the completion event arrives.
39204 + */
39205 +extern enum netfront_accel_post_status
39206 +netfront_accel_vi_tx_post(netfront_accel_vnic *vnic,
39207 + struct sk_buff *skb);
39208 +
39209 +
39210 +/**
39211 + * Process events in response to an interrupt.
39212 + *
39213 + * @v vnic The vnic instance to poll.
39214 + * @v rx_packets The maximum number of rx packets to process.
39215 + * @ret rx_done The number of rx packets processed.
39216 + *
39217 + * The vnic will process events until there are no more events
39218 + * remaining or the specified number of rx packets has been processed.
39219 + * The split from the interrupt call is to allow Linux NAPI
39220 + * polling.
39221 + */
39222 +extern
39223 +int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets);
39224 +
39225 +
39226 +/**
39227 + * Iterate over the fragments of a packet buffer.
39228 + *
39229 + * @v skb The packet buffer to examine.
39230 + * @v idx A variable name for the fragment index.
39231 + * @v data A variable name for the address of the fragment data.
39232 + * @v length A variable name for the fragment length.
39233 + * @v code A section of code to execute for each fragment.
39234 + *
39235 + * This macro iterates over the fragments in a packet buffer and
39236 + * executes the code for each of them.
39237 + */
39238 +#define NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT(skb, frag_idx, \
39239 + frag_data, frag_len, \
39240 + code) \
39241 + do { \
39242 + int frag_idx; \
39243 + void *frag_data; \
39244 + unsigned int frag_len; \
39245 + \
39246 + frag_data = skb->data; \
39247 + frag_len = skb_headlen(skb); \
39248 + frag_idx = 0; \
39249 + while (1) { /* For each fragment */ \
39250 + code; \
39251 + if (frag_idx >= skb_shinfo(skb)->nr_frags) { \
39252 + break; \
39253 + } else { \
39254 + skb_frag_t *fragment; \
39255 + fragment = &skb_shinfo(skb)->frags[frag_idx]; \
39256 + frag_len = fragment->size; \
39257 + frag_data = ((void*)page_address(fragment->page) \
39258 + + fragment->page_offset); \
39259 + }; \
39260 + frag_idx++; \
39261 + } \
39262 + } while(0)
39263 +
39264 +static inline
39265 +void netfront_accel_disable_net_interrupts(netfront_accel_vnic *vnic)
39266 +{
39267 + mask_evtchn(vnic->net_channel);
39268 +}
39269 +
39270 +static inline
39271 +void netfront_accel_enable_net_interrupts(netfront_accel_vnic *vnic)
39272 +{
39273 + unmask_evtchn(vnic->net_channel);
39274 +}
39275 +
39276 +void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
39277 + u32 ip, u16 port, u8 protocol);
39278 +
39279 +/* Process an IRQ received from back end driver */
39280 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
39281 + struct pt_regs *unused);
39282 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
39283 + struct pt_regs *unused);
39284 +
39285 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
39286 +extern void netfront_accel_msg_from_bend(struct work_struct *context);
39287 +#else
39288 +extern void netfront_accel_msg_from_bend(void *context);
39289 +#endif
39290 +
39291 +extern void vnic_stop_fastpath(netfront_accel_vnic *vnic);
39292 +
39293 +extern int netfront_accel_probe(struct net_device *net_dev,
39294 + struct xenbus_device *dev);
39295 +extern int netfront_accel_remove(struct xenbus_device *dev);
39296 +extern void netfront_accel_set_closing(netfront_accel_vnic *vnic);
39297 +
39298 +extern int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic);
39299 +
39300 +extern void netfront_accel_debugfs_init(void);
39301 +extern void netfront_accel_debugfs_fini(void);
39302 +extern int netfront_accel_debugfs_create(netfront_accel_vnic *vnic);
39303 +extern int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic);
39304 +
39305 +#endif /* NETFRONT_ACCEL_H */
39306 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.c
39307 ===================================================================
39308 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
39309 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.c 2008-02-26 10:54:12.000000000 +0100
39310 @@ -0,0 +1,393 @@
39311 +/****************************************************************************
39312 + * Solarflare driver for Xen network acceleration
39313 + *
39314 + * Copyright 2006-2008: Solarflare Communications Inc,
39315 + * 9501 Jeronimo Road, Suite 250,
39316 + * Irvine, CA 92618, USA
39317 + *
39318 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
39319 + *
39320 + * This program is free software; you can redistribute it and/or modify it
39321 + * under the terms of the GNU General Public License version 2 as published
39322 + * by the Free Software Foundation, incorporated herein by reference.
39323 + *
39324 + * This program is distributed in the hope that it will be useful,
39325 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
39326 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39327 + * GNU General Public License for more details.
39328 + *
39329 + * You should have received a copy of the GNU General Public License
39330 + * along with this program; if not, write to the Free Software
39331 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39332 + ****************************************************************************
39333 + */
39334 +
39335 +#include <xen/gnttab.h>
39336 +
39337 +#include "accel_bufs.h"
39338 +#include "accel_util.h"
39339 +
39340 +#include "accel.h"
39341 +
39342 +
39343 +static int
39344 +netfront_accel_alloc_buf_desc_blocks(struct netfront_accel_bufinfo *manager,
39345 + int pages)
39346 +{
39347 + manager->desc_blocks =
39348 + kzalloc(sizeof(struct netfront_accel_pkt_desc *) *
39349 + NETFRONT_ACCEL_BUF_NUM_BLOCKS(pages), GFP_KERNEL);
39350 + if (manager->desc_blocks == NULL) {
39351 + return -ENOMEM;
39352 + }
39353 +
39354 + return 0;
39355 +}
39356 +
39357 +static int
39358 +netfront_accel_alloc_buf_lists(struct netfront_accel_bufpages *bufpages,
39359 + int pages)
39360 +{
39361 + bufpages->page_list = kmalloc(pages * sizeof(void *), GFP_KERNEL);
39362 + if (bufpages->page_list == NULL) {
39363 + return -ENOMEM;
39364 + }
39365 +
39366 + bufpages->grant_list = kzalloc(pages * sizeof(grant_ref_t), GFP_KERNEL);
39367 + if (bufpages->grant_list == NULL) {
39368 + kfree(bufpages->page_list);
39369 + bufpages->page_list = NULL;
39370 + return -ENOMEM;
39371 + }
39372 +
39373 + return 0;
39374 +}
39375 +
39376 +
39377 +int netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
39378 + struct netfront_accel_bufinfo *rx_manager,
39379 + struct netfront_accel_bufinfo *tx_manager,
39380 + int pages)
39381 +{
39382 + int n, rc;
39383 +
39384 + if ((rc = netfront_accel_alloc_buf_desc_blocks
39385 + (rx_manager, pages - (pages / sfc_netfront_buffer_split))) < 0) {
39386 + goto rx_fail;
39387 + }
39388 +
39389 + if ((rc = netfront_accel_alloc_buf_desc_blocks
39390 + (tx_manager, pages / sfc_netfront_buffer_split)) < 0) {
39391 + goto tx_fail;
39392 + }
39393 +
39394 + if ((rc = netfront_accel_alloc_buf_lists(bufpages, pages)) < 0) {
39395 + goto lists_fail;
39396 + }
39397 +
39398 + for (n = 0; n < pages; n++) {
39399 + void *tmp = (void*)__get_free_page(GFP_KERNEL);
39400 + if (tmp == NULL)
39401 + break;
39402 +
39403 + bufpages->page_list[n] = tmp;
39404 + }
39405 +
39406 + if (n != pages) {
39407 + EPRINTK("%s: not enough pages: %d != %d\n", __FUNCTION__, n,
39408 + pages);
39409 + for (; n >= 0; n--)
39410 + free_page((unsigned long)(bufpages->page_list[n]));
39411 + rc = -ENOMEM;
39412 + goto pages_fail;
39413 + }
39414 +
39415 + bufpages->max_pages = pages;
39416 + bufpages->page_reqs = 0;
39417 +
39418 + return 0;
39419 +
39420 + pages_fail:
39421 + kfree(bufpages->page_list);
39422 + kfree(bufpages->grant_list);
39423 +
39424 + bufpages->page_list = NULL;
39425 + bufpages->grant_list = NULL;
39426 + lists_fail:
39427 + kfree(tx_manager->desc_blocks);
39428 + tx_manager->desc_blocks = NULL;
39429 +
39430 + tx_fail:
39431 + kfree(rx_manager->desc_blocks);
39432 + rx_manager->desc_blocks = NULL;
39433 + rx_fail:
39434 + return rc;
39435 +}
39436 +
39437 +
39438 +void netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
39439 + struct netfront_accel_bufinfo *rx_manager,
39440 + struct netfront_accel_bufinfo *tx_manager)
39441 +{
39442 + int i;
39443 +
39444 + for (i = 0; i < bufpages->max_pages; i++) {
39445 + if (bufpages->grant_list[i] != 0)
39446 + net_accel_ungrant_page(bufpages->grant_list[i]);
39447 + free_page((unsigned long)(bufpages->page_list[i]));
39448 + }
39449 +
39450 + if (bufpages->max_pages) {
39451 + kfree(bufpages->page_list);
39452 + kfree(bufpages->grant_list);
39453 + kfree(rx_manager->desc_blocks);
39454 + kfree(tx_manager->desc_blocks);
39455 + }
39456 +}
39457 +
39458 +
39459 +/*
39460 + * Allocate memory for the buffer manager and create a lock. If no
39461 + * lock is supplied its own is allocated.
39462 + */
39463 +struct netfront_accel_bufinfo *netfront_accel_init_bufs(spinlock_t *lock)
39464 +{
39465 + struct netfront_accel_bufinfo *res = kmalloc(sizeof(*res), GFP_KERNEL);
39466 + if (res != NULL) {
39467 + res->npages = res->nused = 0;
39468 + res->first_free = -1;
39469 +
39470 + if (lock == NULL) {
39471 + res->lock = kmalloc(sizeof(*res->lock), GFP_KERNEL);
39472 + if (res->lock == NULL) {
39473 + kfree(res);
39474 + return NULL;
39475 + }
39476 + spin_lock_init(res->lock);
39477 + res->internally_locked = 1;
39478 + } else {
39479 + res->lock = lock;
39480 + res->internally_locked = 0;
39481 + }
39482 +
39483 + res->desc_blocks = NULL;
39484 + }
39485 +
39486 + return res;
39487 +}
39488 +
39489 +
39490 +void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *bufs)
39491 +{
39492 + if (bufs->internally_locked)
39493 + kfree(bufs->lock);
39494 + kfree(bufs);
39495 +}
39496 +
39497 +
39498 +int netfront_accel_buf_map_request(struct xenbus_device *dev,
39499 + struct netfront_accel_bufpages *bufpages,
39500 + struct net_accel_msg *msg,
39501 + int pages, int offset)
39502 +{
39503 + int i, mfn;
39504 + int err;
39505 +
39506 + net_accel_msg_init(msg, NET_ACCEL_MSG_MAPBUF);
39507 +
39508 + BUG_ON(pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
39509 +
39510 + msg->u.mapbufs.pages = pages;
39511 +
39512 + for (i = 0; i < msg->u.mapbufs.pages; i++) {
39513 + /*
39514 + * This can happen if we tried to send this message
39515 + * earlier but the queue was full.
39516 + */
39517 + if (bufpages->grant_list[offset+i] != 0) {
39518 + msg->u.mapbufs.grants[i] =
39519 + bufpages->grant_list[offset+i];
39520 + continue;
39521 + }
39522 +
39523 + mfn = virt_to_mfn(bufpages->page_list[offset+i]);
39524 + VPRINTK("%s: Granting page %d, mfn %08x\n",
39525 + __FUNCTION__, i, mfn);
39526 +
39527 + bufpages->grant_list[offset+i] =
39528 + net_accel_grant_page(dev, mfn, 0);
39529 + msg->u.mapbufs.grants[i] = bufpages->grant_list[offset+i];
39530 +
39531 + if (msg->u.mapbufs.grants[i] < 0) {
39532 + EPRINTK("%s: Failed to grant buffer: %d\n",
39533 + __FUNCTION__, msg->u.mapbufs.grants[i]);
39534 + err = -EIO;
39535 + goto error;
39536 + }
39537 + }
39538 +
39539 + /* This is interpreted on return as the offset in the the page_list */
39540 + msg->u.mapbufs.reqid = offset;
39541 +
39542 + return 0;
39543 +
39544 +error:
39545 + /* Ungrant all the pages we've successfully granted. */
39546 + for (i--; i >= 0; i--) {
39547 + net_accel_ungrant_page(bufpages->grant_list[offset+i]);
39548 + bufpages->grant_list[offset+i] = 0;
39549 + }
39550 + return err;
39551 +}
39552 +
39553 +
39554 +/* Process a response to a buffer request. */
39555 +int netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
39556 + struct netfront_accel_bufinfo *manager,
39557 + struct net_accel_msg *msg)
39558 +{
39559 + int msg_pages, page_offset, i, newtot;
39560 + int old_block_count, new_block_count;
39561 + u32 msg_buf;
39562 + unsigned long flags;
39563 +
39564 + VPRINTK("%s: manager %p msg %p\n", __FUNCTION__, manager, msg);
39565 +
39566 + BUG_ON(msg->id != (NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY));
39567 +
39568 + msg_pages = msg->u.mapbufs.pages;
39569 + msg_buf = msg->u.mapbufs.buf;
39570 + page_offset = msg->u.mapbufs.reqid;
39571 +
39572 + spin_lock_irqsave(manager->lock, flags);
39573 + newtot = manager->npages + msg_pages;
39574 + old_block_count =
39575 + (manager->npages + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
39576 + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
39577 + new_block_count =
39578 + (newtot + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
39579 + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
39580 +
39581 + for (i = old_block_count; i < new_block_count; i++) {
39582 + struct netfront_accel_pkt_desc *block;
39583 + if (manager->desc_blocks[i] != NULL) {
39584 + VPRINTK("Not needed\n");
39585 + continue;
39586 + }
39587 + block = kzalloc(NETFRONT_ACCEL_BUFS_PER_BLOCK *
39588 + sizeof(netfront_accel_pkt_desc), GFP_ATOMIC);
39589 + if (block == NULL) {
39590 + spin_unlock_irqrestore(manager->lock, flags);
39591 + return -ENOMEM;
39592 + }
39593 + manager->desc_blocks[i] = block;
39594 + }
39595 + for (i = manager->npages; i < newtot; i++) {
39596 + int k, j = i - manager->npages;
39597 + int block_num;
39598 + int block_idx;
39599 + struct netfront_accel_pkt_desc *pkt;
39600 +
39601 + block_num = i >> NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
39602 + block_idx = (NETFRONT_ACCEL_BUFS_PER_PAGE*i)
39603 + & (NETFRONT_ACCEL_BUFS_PER_BLOCK-1);
39604 +
39605 + pkt = manager->desc_blocks[block_num] + block_idx;
39606 +
39607 + for (k = 0; k < NETFRONT_ACCEL_BUFS_PER_PAGE; k++) {
39608 + BUG_ON(page_offset + j >= bufpages->max_pages);
39609 +
39610 + pkt[k].buf_id = NETFRONT_ACCEL_BUFS_PER_PAGE * i + k;
39611 + pkt[k].pkt_kva = bufpages->page_list[page_offset + j] +
39612 + (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * k;
39613 + pkt[k].pkt_buff_addr = msg_buf +
39614 + (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) *
39615 + (NETFRONT_ACCEL_BUFS_PER_PAGE * j + k);
39616 + pkt[k].next_free = manager->first_free;
39617 + manager->first_free = pkt[k].buf_id;
39618 + *(int*)(pkt[k].pkt_kva) = pkt[k].buf_id;
39619 +
39620 + VPRINTK("buf %d desc %p kva %p buffaddr %x\n",
39621 + pkt[k].buf_id, &(pkt[k]), pkt[k].pkt_kva,
39622 + pkt[k].pkt_buff_addr);
39623 + }
39624 + }
39625 + manager->npages = newtot;
39626 + spin_unlock_irqrestore(manager->lock, flags);
39627 + VPRINTK("Added %d pages. Total is now %d\n", msg_pages,
39628 + manager->npages);
39629 + return 0;
39630 +}
39631 +
39632 +
39633 +netfront_accel_pkt_desc *
39634 +netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id)
39635 +{
39636 + netfront_accel_pkt_desc *pkt;
39637 + int block_num = id >> NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT;
39638 + int block_idx = id & (NETFRONT_ACCEL_BUFS_PER_BLOCK - 1);
39639 + BUG_ON(id >= manager->npages * NETFRONT_ACCEL_BUFS_PER_PAGE);
39640 + BUG_ON(block_idx >= NETFRONT_ACCEL_BUFS_PER_BLOCK);
39641 + pkt = manager->desc_blocks[block_num] + block_idx;
39642 + return pkt;
39643 +}
39644 +
39645 +
39646 +/* Allocate a buffer from the buffer manager */
39647 +netfront_accel_pkt_desc *
39648 +netfront_accel_buf_get(struct netfront_accel_bufinfo *manager)
39649 +{
39650 + int bufno = -1;
39651 + netfront_accel_pkt_desc *buf = NULL;
39652 + unsigned long flags = 0;
39653 +
39654 + /* Any spare? */
39655 + if (manager->first_free == -1)
39656 + return NULL;
39657 + /* Take lock */
39658 + if (manager->internally_locked)
39659 + spin_lock_irqsave(manager->lock, flags);
39660 + bufno = manager->first_free;
39661 + if (bufno != -1) {
39662 + buf = netfront_accel_buf_find(manager, bufno);
39663 + manager->first_free = buf->next_free;
39664 + manager->nused++;
39665 + }
39666 + /* Release lock */
39667 + if (manager->internally_locked)
39668 + spin_unlock_irqrestore(manager->lock, flags);
39669 +
39670 + /* Tell the world */
39671 + VPRINTK("Allocated buffer %i, buffaddr %x\n", bufno,
39672 + buf->pkt_buff_addr);
39673 +
39674 + return buf;
39675 +}
39676 +
39677 +
39678 +/* Release a buffer back to the buffer manager pool */
39679 +int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, u16 id)
39680 +{
39681 + netfront_accel_pkt_desc *buf = netfront_accel_buf_find(manager, id);
39682 + unsigned long flags = 0;
39683 + unsigned was_empty = 0;
39684 + int bufno = id;
39685 +
39686 + VPRINTK("Freeing buffer %i\n", id);
39687 + BUG_ON(id == (u16)-1);
39688 +
39689 + if (manager->internally_locked)
39690 + spin_lock_irqsave(manager->lock, flags);
39691 +
39692 + if (manager->first_free == -1)
39693 + was_empty = 1;
39694 +
39695 + buf->next_free = manager->first_free;
39696 + manager->first_free = bufno;
39697 + manager->nused--;
39698 +
39699 + if (manager->internally_locked)
39700 + spin_unlock_irqrestore(manager->lock, flags);
39701 +
39702 + return was_empty;
39703 +}
39704 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.h
39705 ===================================================================
39706 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
39707 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.h 2008-02-20 09:32:49.000000000 +0100
39708 @@ -0,0 +1,181 @@
39709 +/****************************************************************************
39710 + * Solarflare driver for Xen network acceleration
39711 + *
39712 + * Copyright 2006-2008: Solarflare Communications Inc,
39713 + * 9501 Jeronimo Road, Suite 250,
39714 + * Irvine, CA 92618, USA
39715 + *
39716 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
39717 + *
39718 + * This program is free software; you can redistribute it and/or modify it
39719 + * under the terms of the GNU General Public License version 2 as published
39720 + * by the Free Software Foundation, incorporated herein by reference.
39721 + *
39722 + * This program is distributed in the hope that it will be useful,
39723 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
39724 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39725 + * GNU General Public License for more details.
39726 + *
39727 + * You should have received a copy of the GNU General Public License
39728 + * along with this program; if not, write to the Free Software
39729 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39730 + ****************************************************************************
39731 + */
39732 +
39733 +#ifndef NETFRONT_ACCEL_BUFS_H
39734 +#define NETFRONT_ACCEL_BUFS_H
39735 +
39736 +#include <linux/skbuff.h>
39737 +#include <linux/spinlock.h>
39738 +#include <xen/xenbus.h>
39739 +
39740 +#include "accel_msg_iface.h"
39741 +
39742 +
39743 +/*! Buffer descriptor structure */
39744 +typedef struct netfront_accel_pkt_desc {
39745 + int buf_id;
39746 + u32 pkt_buff_addr;
39747 + void *pkt_kva;
39748 + /* This is the socket buffer currently married to this buffer */
39749 + struct sk_buff *skb;
39750 + int next_free;
39751 +} netfront_accel_pkt_desc;
39752 +
39753 +
39754 +#define NETFRONT_ACCEL_DEFAULT_BUF_PAGES (384)
39755 +#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT (4)
39756 +#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK \
39757 + (1 << (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT))
39758 +#define NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT (1)
39759 +#define NETFRONT_ACCEL_BUFS_PER_PAGE \
39760 + (1 << (NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT))
39761 +#define NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT \
39762 + (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT + \
39763 + NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT)
39764 +#define NETFRONT_ACCEL_BUFS_PER_BLOCK \
39765 + (1 << NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT)
39766 +#define NETFRONT_ACCEL_BUF_NUM_BLOCKS(max_pages) \
39767 + (((max_pages)+NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK-1) / \
39768 + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK)
39769 +
39770 +/*! Buffer management structure. */
39771 +struct netfront_accel_bufinfo {
39772 + /* number added to this manager */
39773 + unsigned npages;
39774 + /* number currently used from this manager */
39775 + unsigned nused;
39776 +
39777 + int first_free;
39778 +
39779 + int internally_locked;
39780 + spinlock_t *lock;
39781 +
39782 + /*
39783 + * array of pointers (length NETFRONT_ACCEL_BUF_NUM_BLOCKS) to
39784 + * pkt descs
39785 + */
39786 + struct netfront_accel_pkt_desc **desc_blocks;
39787 +};
39788 +
39789 +
39790 +struct netfront_accel_bufpages {
39791 + /* length of lists of pages/grants */
39792 + int max_pages;
39793 + /* list of pages allocated for network buffers */
39794 + void **page_list;
39795 + /* list of grants for the above pages */
39796 + grant_ref_t *grant_list;
39797 +
39798 + /* number of page requests that have been made */
39799 + unsigned page_reqs;
39800 +};
39801 +
39802 +
39803 +/*! Allocate memory for the buffer manager, set up locks etc.
39804 + * Optionally takes a lock to use, if not supplied it makes its own.
39805 + *
39806 + * \return pointer to netfront_accel_bufinfo structure that represents the
39807 + * buffer manager
39808 + */
39809 +extern struct netfront_accel_bufinfo *
39810 +netfront_accel_init_bufs(spinlock_t *lock);
39811 +
39812 +/*! Allocate memory for the buffers
39813 + */
39814 +extern int
39815 +netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
39816 + struct netfront_accel_bufinfo *rx_res,
39817 + struct netfront_accel_bufinfo *tx_res,
39818 + int pages);
39819 +extern void
39820 +netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
39821 + struct netfront_accel_bufinfo *rx_res,
39822 + struct netfront_accel_bufinfo *tx_res);
39823 +
39824 +/*! Release memory for the buffer manager, buffers, etc.
39825 + *
39826 + * \param manager pointer to netfront_accel_bufinfo structure that
39827 + * represents the buffer manager
39828 + */
39829 +extern void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *manager);
39830 +
39831 +/*! Release a buffer.
39832 + *
39833 + * \param manager The buffer manager which owns the buffer.
39834 + * \param id The buffer identifier.
39835 + */
39836 +extern int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager,
39837 + u16 id);
39838 +
39839 +/*! Get the packet descriptor associated with a buffer id.
39840 + *
39841 + * \param manager The buffer manager which owns the buffer.
39842 + * \param id The buffer identifier.
39843 + *
39844 + * The returned value is the packet descriptor for this buffer.
39845 + */
39846 +extern netfront_accel_pkt_desc *
39847 +netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id);
39848 +
39849 +
39850 +/*! Fill out a message request for some buffers to be mapped by the
39851 + * back end driver
39852 + *
39853 + * \param manager The buffer manager
39854 + * \param msg Pointer to an ef_msg to complete.
39855 + * \return 0 on success
39856 + */
39857 +extern int
39858 +netfront_accel_buf_map_request(struct xenbus_device *dev,
39859 + struct netfront_accel_bufpages *bufpages,
39860 + struct net_accel_msg *msg,
39861 + int pages, int offset);
39862 +
39863 +/*! Process a response to a buffer request.
39864 + *
39865 + * Deal with a received message from the back end in response to our
39866 + * request for buffers
39867 + *
39868 + * \param manager The buffer manager
39869 + * \param msg The received message from the back end describing new
39870 + * buffers
39871 + * \return 0 on success
39872 + */
39873 +extern int
39874 +netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
39875 + struct netfront_accel_bufinfo *manager,
39876 + struct net_accel_msg *msg);
39877 +
39878 +
39879 +/*! Allocate a buffer from the buffer manager
39880 + *
39881 + * \param manager The buffer manager data structure
39882 + * \param id On exit, the id of the buffer allocated
39883 + * \return Pointer to buffer descriptor.
39884 + */
39885 +struct netfront_accel_pkt_desc *
39886 +netfront_accel_buf_get(struct netfront_accel_bufinfo *manager);
39887 +
39888 +#endif /* NETFRONT_ACCEL_BUFS_H */
39889 +
39890 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_debugfs.c
39891 ===================================================================
39892 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
39893 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_debugfs.c 2008-02-26 10:54:12.000000000 +0100
39894 @@ -0,0 +1,211 @@
39895 +/****************************************************************************
39896 + * Solarflare driver for Xen network acceleration
39897 + *
39898 + * Copyright 2006-2008: Solarflare Communications Inc,
39899 + * 9501 Jeronimo Road, Suite 250,
39900 + * Irvine, CA 92618, USA
39901 + *
39902 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
39903 + *
39904 + * This program is free software; you can redistribute it and/or modify it
39905 + * under the terms of the GNU General Public License version 2 as published
39906 + * by the Free Software Foundation, incorporated herein by reference.
39907 + *
39908 + * This program is distributed in the hope that it will be useful,
39909 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
39910 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39911 + * GNU General Public License for more details.
39912 + *
39913 + * You should have received a copy of the GNU General Public License
39914 + * along with this program; if not, write to the Free Software
39915 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39916 + ****************************************************************************
39917 + */
39918 +
39919 +#include <linux/fs.h>
39920 +#include <linux/debugfs.h>
39921 +
39922 +#include "accel.h"
39923 +
39924 +#if defined(CONFIG_DEBUG_FS)
39925 +static struct dentry *sfc_debugfs_root = NULL;
39926 +#endif
39927 +
39928 +void netfront_accel_debugfs_init(void)
39929 +{
39930 +#if defined(CONFIG_DEBUG_FS)
39931 + sfc_debugfs_root = debugfs_create_dir(frontend_name, NULL);
39932 +#endif
39933 +}
39934 +
39935 +
39936 +void netfront_accel_debugfs_fini(void)
39937 +{
39938 +#if defined(CONFIG_DEBUG_FS)
39939 + if (sfc_debugfs_root)
39940 + debugfs_remove(sfc_debugfs_root);
39941 +#endif
39942 +}
39943 +
39944 +
39945 +int netfront_accel_debugfs_create(netfront_accel_vnic *vnic)
39946 +{
39947 +#if defined(CONFIG_DEBUG_FS)
39948 + if (sfc_debugfs_root == NULL)
39949 + return -ENOENT;
39950 +
39951 + vnic->dbfs_dir = debugfs_create_dir(vnic->net_dev->name,
39952 + sfc_debugfs_root);
39953 + if (vnic->dbfs_dir == NULL)
39954 + return -ENOMEM;
39955 +
39956 + vnic->netdev_dbfs.fastpath_rx_pkts = debugfs_create_u32
39957 + ("fastpath_rx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
39958 + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_pkts);
39959 + vnic->netdev_dbfs.fastpath_rx_bytes = debugfs_create_u32
39960 + ("fastpath_rx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
39961 + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_bytes);
39962 + vnic->netdev_dbfs.fastpath_rx_errors = debugfs_create_u32
39963 + ("fastpath_rx_errors", S_IRUSR | S_IRGRP | S_IROTH,
39964 + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_errors);
39965 + vnic->netdev_dbfs.fastpath_tx_pkts = debugfs_create_u32
39966 + ("fastpath_tx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
39967 + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_pkts);
39968 + vnic->netdev_dbfs.fastpath_tx_bytes = debugfs_create_u32
39969 + ("fastpath_tx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
39970 + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_bytes);
39971 + vnic->netdev_dbfs.fastpath_tx_errors = debugfs_create_u32
39972 + ("fastpath_tx_errors", S_IRUSR | S_IRGRP | S_IROTH,
39973 + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_errors);
39974 +
39975 +#if NETFRONT_ACCEL_STATS
39976 + vnic->dbfs.irq_count = debugfs_create_u64
39977 + ("irq_count", S_IRUSR | S_IRGRP | S_IROTH,
39978 + vnic->dbfs_dir, &vnic->stats.irq_count);
39979 + vnic->dbfs.useless_irq_count = debugfs_create_u64
39980 + ("useless_irq_count", S_IRUSR | S_IRGRP | S_IROTH,
39981 + vnic->dbfs_dir, &vnic->stats.useless_irq_count);
39982 + vnic->dbfs.poll_schedule_count = debugfs_create_u64
39983 + ("poll_schedule_count", S_IRUSR | S_IRGRP | S_IROTH,
39984 + vnic->dbfs_dir, &vnic->stats.poll_schedule_count);
39985 + vnic->dbfs.poll_call_count = debugfs_create_u64
39986 + ("poll_call_count", S_IRUSR | S_IRGRP | S_IROTH,
39987 + vnic->dbfs_dir, &vnic->stats.poll_call_count);
39988 + vnic->dbfs.poll_reschedule_count = debugfs_create_u64
39989 + ("poll_reschedule_count", S_IRUSR | S_IRGRP | S_IROTH,
39990 + vnic->dbfs_dir, &vnic->stats.poll_reschedule_count);
39991 + vnic->dbfs.queue_stops = debugfs_create_u64
39992 + ("queue_stops", S_IRUSR | S_IRGRP | S_IROTH,
39993 + vnic->dbfs_dir, &vnic->stats.queue_stops);
39994 + vnic->dbfs.queue_wakes = debugfs_create_u64
39995 + ("queue_wakes", S_IRUSR | S_IRGRP | S_IROTH,
39996 + vnic->dbfs_dir, &vnic->stats.queue_wakes);
39997 + vnic->dbfs.ssr_bursts = debugfs_create_u64
39998 + ("ssr_bursts", S_IRUSR | S_IRGRP | S_IROTH,
39999 + vnic->dbfs_dir, &vnic->stats.ssr_bursts);
40000 + vnic->dbfs.ssr_drop_stream = debugfs_create_u64
40001 + ("ssr_drop_stream", S_IRUSR | S_IRGRP | S_IROTH,
40002 + vnic->dbfs_dir, &vnic->stats.ssr_drop_stream);
40003 + vnic->dbfs.ssr_misorder = debugfs_create_u64
40004 + ("ssr_misorder", S_IRUSR | S_IRGRP | S_IROTH,
40005 + vnic->dbfs_dir, &vnic->stats.ssr_misorder);
40006 + vnic->dbfs.ssr_slow_start = debugfs_create_u64
40007 + ("ssr_slow_start", S_IRUSR | S_IRGRP | S_IROTH,
40008 + vnic->dbfs_dir, &vnic->stats.ssr_slow_start);
40009 + vnic->dbfs.ssr_merges = debugfs_create_u64
40010 + ("ssr_merges", S_IRUSR | S_IRGRP | S_IROTH,
40011 + vnic->dbfs_dir, &vnic->stats.ssr_merges);
40012 + vnic->dbfs.ssr_too_many = debugfs_create_u64
40013 + ("ssr_too_many", S_IRUSR | S_IRGRP | S_IROTH,
40014 + vnic->dbfs_dir, &vnic->stats.ssr_too_many);
40015 + vnic->dbfs.ssr_new_stream = debugfs_create_u64
40016 + ("ssr_new_stream", S_IRUSR | S_IRGRP | S_IROTH,
40017 + vnic->dbfs_dir, &vnic->stats.ssr_new_stream);
40018 +
40019 + vnic->dbfs.fastpath_tx_busy = debugfs_create_u64
40020 + ("fastpath_tx_busy", S_IRUSR | S_IRGRP | S_IROTH,
40021 + vnic->dbfs_dir, &vnic->stats.fastpath_tx_busy);
40022 + vnic->dbfs.fastpath_tx_completions = debugfs_create_u64
40023 + ("fastpath_tx_completions", S_IRUSR | S_IRGRP | S_IROTH,
40024 + vnic->dbfs_dir, &vnic->stats.fastpath_tx_completions);
40025 + vnic->dbfs.fastpath_tx_pending_max = debugfs_create_u32
40026 + ("fastpath_tx_pending_max", S_IRUSR | S_IRGRP | S_IROTH,
40027 + vnic->dbfs_dir, &vnic->stats.fastpath_tx_pending_max);
40028 + vnic->dbfs.event_count = debugfs_create_u64
40029 + ("event_count", S_IRUSR | S_IRGRP | S_IROTH,
40030 + vnic->dbfs_dir, &vnic->stats.event_count);
40031 + vnic->dbfs.bad_event_count = debugfs_create_u64
40032 + ("bad_event_count", S_IRUSR | S_IRGRP | S_IROTH,
40033 + vnic->dbfs_dir, &vnic->stats.bad_event_count);
40034 + vnic->dbfs.event_count_since_irq = debugfs_create_u32
40035 + ("event_count_since_irq", S_IRUSR | S_IRGRP | S_IROTH,
40036 + vnic->dbfs_dir, &vnic->stats.event_count_since_irq);
40037 + vnic->dbfs.events_per_irq_max = debugfs_create_u32
40038 + ("events_per_irq_max", S_IRUSR | S_IRGRP | S_IROTH,
40039 + vnic->dbfs_dir, &vnic->stats.events_per_irq_max);
40040 + vnic->dbfs.fastpath_frm_trunc = debugfs_create_u64
40041 + ("fastpath_frm_trunc", S_IRUSR | S_IRGRP | S_IROTH,
40042 + vnic->dbfs_dir, &vnic->stats.fastpath_frm_trunc);
40043 + vnic->dbfs.rx_no_desc_trunc = debugfs_create_u64
40044 + ("rx_no_desc_trunc", S_IRUSR | S_IRGRP | S_IROTH,
40045 + vnic->dbfs_dir, &vnic->stats.rx_no_desc_trunc);
40046 + vnic->dbfs.events_per_poll_max = debugfs_create_u32
40047 + ("events_per_poll_max", S_IRUSR | S_IRGRP | S_IROTH,
40048 + vnic->dbfs_dir, &vnic->stats.events_per_poll_max);
40049 + vnic->dbfs.events_per_poll_rx_max = debugfs_create_u32
40050 + ("events_per_poll_rx_max", S_IRUSR | S_IRGRP | S_IROTH,
40051 + vnic->dbfs_dir, &vnic->stats.events_per_poll_rx_max);
40052 + vnic->dbfs.events_per_poll_tx_max = debugfs_create_u32
40053 + ("events_per_poll_tx_max", S_IRUSR | S_IRGRP | S_IROTH,
40054 + vnic->dbfs_dir, &vnic->stats.events_per_poll_tx_max);
40055 +#endif
40056 +#endif
40057 + return 0;
40058 +}
40059 +
40060 +
40061 +int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic)
40062 +{
40063 +#if defined(CONFIG_DEBUG_FS)
40064 + if (vnic->dbfs_dir != NULL) {
40065 + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_pkts);
40066 + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_bytes);
40067 + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_errors);
40068 + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_pkts);
40069 + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_bytes);
40070 + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_errors);
40071 +
40072 +#if NETFRONT_ACCEL_STATS
40073 + debugfs_remove(vnic->dbfs.irq_count);
40074 + debugfs_remove(vnic->dbfs.useless_irq_count);
40075 + debugfs_remove(vnic->dbfs.poll_schedule_count);
40076 + debugfs_remove(vnic->dbfs.poll_call_count);
40077 + debugfs_remove(vnic->dbfs.poll_reschedule_count);
40078 + debugfs_remove(vnic->dbfs.queue_stops);
40079 + debugfs_remove(vnic->dbfs.queue_wakes);
40080 + debugfs_remove(vnic->dbfs.ssr_bursts);
40081 + debugfs_remove(vnic->dbfs.ssr_drop_stream);
40082 + debugfs_remove(vnic->dbfs.ssr_misorder);
40083 + debugfs_remove(vnic->dbfs.ssr_slow_start);
40084 + debugfs_remove(vnic->dbfs.ssr_merges);
40085 + debugfs_remove(vnic->dbfs.ssr_too_many);
40086 + debugfs_remove(vnic->dbfs.ssr_new_stream);
40087 +
40088 + debugfs_remove(vnic->dbfs.fastpath_tx_busy);
40089 + debugfs_remove(vnic->dbfs.fastpath_tx_completions);
40090 + debugfs_remove(vnic->dbfs.fastpath_tx_pending_max);
40091 + debugfs_remove(vnic->dbfs.event_count);
40092 + debugfs_remove(vnic->dbfs.bad_event_count);
40093 + debugfs_remove(vnic->dbfs.event_count_since_irq);
40094 + debugfs_remove(vnic->dbfs.events_per_irq_max);
40095 + debugfs_remove(vnic->dbfs.fastpath_frm_trunc);
40096 + debugfs_remove(vnic->dbfs.rx_no_desc_trunc);
40097 + debugfs_remove(vnic->dbfs.events_per_poll_max);
40098 + debugfs_remove(vnic->dbfs.events_per_poll_rx_max);
40099 + debugfs_remove(vnic->dbfs.events_per_poll_tx_max);
40100 +#endif
40101 + debugfs_remove(vnic->dbfs_dir);
40102 + }
40103 +#endif
40104 + return 0;
40105 +}
40106 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_msg.c
40107 ===================================================================
40108 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
40109 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_msg.c 2008-02-26 10:54:12.000000000 +0100
40110 @@ -0,0 +1,566 @@
40111 +/****************************************************************************
40112 + * Solarflare driver for Xen network acceleration
40113 + *
40114 + * Copyright 2006-2008: Solarflare Communications Inc,
40115 + * 9501 Jeronimo Road, Suite 250,
40116 + * Irvine, CA 92618, USA
40117 + *
40118 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
40119 + *
40120 + * This program is free software; you can redistribute it and/or modify it
40121 + * under the terms of the GNU General Public License version 2 as published
40122 + * by the Free Software Foundation, incorporated herein by reference.
40123 + *
40124 + * This program is distributed in the hope that it will be useful,
40125 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
40126 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40127 + * GNU General Public License for more details.
40128 + *
40129 + * You should have received a copy of the GNU General Public License
40130 + * along with this program; if not, write to the Free Software
40131 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
40132 + ****************************************************************************
40133 + */
40134 +
40135 +#include <linux/stddef.h>
40136 +#include <linux/errno.h>
40137 +
40138 +#include <xen/xenbus.h>
40139 +
40140 +#include "accel.h"
40141 +#include "accel_msg_iface.h"
40142 +#include "accel_util.h"
40143 +#include "accel_bufs.h"
40144 +
40145 +#include "netfront.h" /* drivers/xen/netfront/netfront.h */
40146 +
40147 +static void vnic_start_interrupts(netfront_accel_vnic *vnic)
40148 +{
40149 + unsigned long flags;
40150 +
40151 + /* Prime our interrupt */
40152 + spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40153 + if (!netfront_accel_vi_enable_interrupts(vnic)) {
40154 + /* Cripes, that was quick, better pass it up */
40155 + netfront_accel_disable_net_interrupts(vnic);
40156 + vnic->irq_enabled = 0;
40157 + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
40158 + netif_rx_schedule(vnic->net_dev);
40159 + } else {
40160 + /*
40161 + * Nothing yet, make sure we get interrupts through
40162 + * back end
40163 + */
40164 + vnic->irq_enabled = 1;
40165 + netfront_accel_enable_net_interrupts(vnic);
40166 + }
40167 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40168 +}
40169 +
40170 +
40171 +static void vnic_stop_interrupts(netfront_accel_vnic *vnic)
40172 +{
40173 + unsigned long flags;
40174 +
40175 + spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40176 + netfront_accel_disable_net_interrupts(vnic);
40177 + vnic->irq_enabled = 0;
40178 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40179 +}
40180 +
40181 +
40182 +static void vnic_start_fastpath(netfront_accel_vnic *vnic)
40183 +{
40184 + struct net_device *net_dev = vnic->net_dev;
40185 + unsigned long flags;
40186 +
40187 + DPRINTK("%s\n", __FUNCTION__);
40188 +
40189 + spin_lock_irqsave(&vnic->tx_lock, flags);
40190 + vnic->tx_enabled = 1;
40191 + spin_unlock_irqrestore(&vnic->tx_lock, flags);
40192 +
40193 + netif_poll_disable(net_dev);
40194 + vnic->poll_enabled = 1;
40195 + netif_poll_enable(net_dev);
40196 +
40197 + vnic_start_interrupts(vnic);
40198 +}
40199 +
40200 +
40201 +void vnic_stop_fastpath(netfront_accel_vnic *vnic)
40202 +{
40203 + struct net_device *net_dev = vnic->net_dev;
40204 + struct netfront_info *np = (struct netfront_info *)netdev_priv(net_dev);
40205 + unsigned long flags1, flags2;
40206 +
40207 + DPRINTK("%s\n", __FUNCTION__);
40208 +
40209 + vnic_stop_interrupts(vnic);
40210 +
40211 + spin_lock_irqsave(&vnic->tx_lock, flags1);
40212 + vnic->tx_enabled = 0;
40213 + spin_lock_irqsave(&np->tx_lock, flags2);
40214 + if (vnic->tx_skb != NULL) {
40215 + dev_kfree_skb_any(vnic->tx_skb);
40216 + vnic->tx_skb = NULL;
40217 + if (netfront_check_queue_ready(net_dev)) {
40218 + netif_wake_queue(net_dev);
40219 + NETFRONT_ACCEL_STATS_OP
40220 + (vnic->stats.queue_wakes++);
40221 + }
40222 + }
40223 + spin_unlock_irqrestore(&np->tx_lock, flags2);
40224 + spin_unlock_irqrestore(&vnic->tx_lock, flags1);
40225 +
40226 + /* Must prevent polls and hold lock to modify poll_enabled */
40227 + netif_poll_disable(net_dev);
40228 + spin_lock_irqsave(&vnic->irq_enabled_lock, flags1);
40229 + vnic->poll_enabled = 0;
40230 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1);
40231 + netif_poll_enable(net_dev);
40232 +}
40233 +
40234 +
40235 +static void netfront_accel_interface_up(netfront_accel_vnic *vnic)
40236 +{
40237 +
40238 + if (!vnic->backend_netdev_up) {
40239 + vnic->backend_netdev_up = 1;
40240 +
40241 + if (vnic->frontend_ready)
40242 + vnic_start_fastpath(vnic);
40243 + }
40244 +}
40245 +
40246 +
40247 +static void netfront_accel_interface_down(netfront_accel_vnic *vnic)
40248 +{
40249 +
40250 + if (vnic->backend_netdev_up) {
40251 + vnic->backend_netdev_up = 0;
40252 +
40253 + if (vnic->frontend_ready)
40254 + vnic_stop_fastpath(vnic);
40255 + }
40256 +}
40257 +
40258 +
40259 +static int vnic_add_bufs(netfront_accel_vnic *vnic,
40260 + struct net_accel_msg *msg)
40261 +{
40262 + int rc, offset;
40263 + struct netfront_accel_bufinfo *bufinfo;
40264 +
40265 + BUG_ON(msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
40266 +
40267 + offset = msg->u.mapbufs.reqid;
40268 +
40269 + if (offset < vnic->bufpages.max_pages -
40270 + (vnic->bufpages.max_pages / sfc_netfront_buffer_split)) {
40271 + bufinfo = vnic->rx_bufs;
40272 + } else
40273 + bufinfo = vnic->tx_bufs;
40274 +
40275 + /* Queue up some Rx buffers to start things off. */
40276 + if ((rc = netfront_accel_add_bufs(&vnic->bufpages, bufinfo, msg)) == 0) {
40277 + netfront_accel_vi_add_bufs(vnic, bufinfo == vnic->rx_bufs);
40278 +
40279 + if (offset + msg->u.mapbufs.pages == vnic->bufpages.max_pages) {
40280 + VPRINTK("%s: got all buffers back\n", __FUNCTION__);
40281 + vnic->frontend_ready = 1;
40282 + if (vnic->backend_netdev_up)
40283 + vnic_start_fastpath(vnic);
40284 + } else {
40285 + VPRINTK("%s: got buffers back %d %d\n", __FUNCTION__,
40286 + offset, msg->u.mapbufs.pages);
40287 + }
40288 + }
40289 +
40290 + return rc;
40291 +}
40292 +
40293 +
40294 +/* The largest [o] such that (1u << o) <= n. Requires n > 0. */
40295 +
40296 +inline unsigned log2_le(unsigned long n) {
40297 + unsigned order = 1;
40298 + while ((1ul << order) <= n) ++order;
40299 + return (order - 1);
40300 +}
40301 +
40302 +static int vnic_send_buffer_requests(netfront_accel_vnic *vnic,
40303 + struct netfront_accel_bufpages *bufpages)
40304 +{
40305 + int pages, offset, rc = 0, sent = 0;
40306 + struct net_accel_msg msg;
40307 +
40308 + while (bufpages->page_reqs < bufpages->max_pages) {
40309 + offset = bufpages->page_reqs;
40310 +
40311 + pages = pow2(log2_le(bufpages->max_pages -
40312 + bufpages->page_reqs));
40313 + pages = pages < NET_ACCEL_MSG_MAX_PAGE_REQ ?
40314 + pages : NET_ACCEL_MSG_MAX_PAGE_REQ;
40315 +
40316 + BUG_ON(offset < 0);
40317 + BUG_ON(pages <= 0);
40318 +
40319 + rc = netfront_accel_buf_map_request(vnic->dev, bufpages,
40320 + &msg, pages, offset);
40321 + if (rc == 0) {
40322 + rc = net_accel_msg_send(vnic->shared_page,
40323 + &vnic->to_dom0, &msg);
40324 + if (rc < 0) {
40325 + VPRINTK("%s: queue full, stopping for now\n",
40326 + __FUNCTION__);
40327 + break;
40328 + }
40329 + sent++;
40330 + } else {
40331 + EPRINTK("%s: problem with grant, stopping for now\n",
40332 + __FUNCTION__);
40333 + break;
40334 + }
40335 +
40336 + bufpages->page_reqs += pages;
40337 + }
40338 +
40339 + if (sent)
40340 + net_accel_msg_notify(vnic->msg_channel_irq);
40341 +
40342 + return rc;
40343 +}
40344 +
40345 +
40346 +/*
40347 + * In response to dom0 saying "my queue is full", we reply with this
40348 + * when it is no longer full
40349 + */
40350 +inline void vnic_set_queue_not_full(netfront_accel_vnic *vnic)
40351 +{
40352 +
40353 + if (test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
40354 + (unsigned long *)&vnic->shared_page->aflags))
40355 + notify_remote_via_irq(vnic->msg_channel_irq);
40356 + else
40357 + VPRINTK("queue not full bit already set, not signalling\n");
40358 +}
40359 +
40360 +/*
40361 + * Notify dom0 that the queue we want to use is full, it should
40362 + * respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course
40363 + */
40364 +inline void vnic_set_queue_full(netfront_accel_vnic *vnic)
40365 +{
40366 +
40367 + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
40368 + (unsigned long *)&vnic->shared_page->aflags))
40369 + notify_remote_via_irq(vnic->msg_channel_irq);
40370 + else
40371 + VPRINTK("queue full bit already set, not signalling\n");
40372 +}
40373 +
40374 +
40375 +static int vnic_check_hello_version(unsigned version)
40376 +{
40377 + if (version > NET_ACCEL_MSG_VERSION) {
40378 + /* Newer protocol, we must refuse */
40379 + return -EPROTO;
40380 + }
40381 +
40382 + if (version < NET_ACCEL_MSG_VERSION) {
40383 + /*
40384 + * We are newer, so have discretion to accept if we
40385 + * wish. For now however, just reject
40386 + */
40387 + return -EPROTO;
40388 + }
40389 +
40390 + BUG_ON(version != NET_ACCEL_MSG_VERSION);
40391 + return 0;
40392 +}
40393 +
40394 +
40395 +static int vnic_process_hello_msg(netfront_accel_vnic *vnic,
40396 + struct net_accel_msg *msg)
40397 +{
40398 + int err = 0;
40399 + unsigned pages = sfc_netfront_max_pages;
40400 +
40401 + if (vnic_check_hello_version(msg->u.hello.version) < 0) {
40402 + msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY
40403 + | NET_ACCEL_MSG_ERROR;
40404 + msg->u.hello.version = NET_ACCEL_MSG_VERSION;
40405 + } else {
40406 + vnic->backend_netdev_up
40407 + = vnic->shared_page->net_dev_up;
40408 +
40409 + msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY;
40410 + msg->u.hello.version = NET_ACCEL_MSG_VERSION;
40411 + if (msg->u.hello.max_pages &&
40412 + msg->u.hello.max_pages < pages)
40413 + pages = msg->u.hello.max_pages;
40414 + msg->u.hello.max_pages = pages;
40415 +
40416 + /* Half of pages for rx, half for tx */
40417 + err = netfront_accel_alloc_buffer_mem(&vnic->bufpages,
40418 + vnic->rx_bufs,
40419 + vnic->tx_bufs,
40420 + pages);
40421 + if (err)
40422 + msg->id |= NET_ACCEL_MSG_ERROR;
40423 + }
40424 +
40425 + /* Send reply */
40426 + net_accel_msg_reply_notify(vnic->shared_page, vnic->msg_channel_irq,
40427 + &vnic->to_dom0, msg);
40428 + return err;
40429 +}
40430 +
40431 +
40432 +static int vnic_process_localmac_msg(netfront_accel_vnic *vnic,
40433 + struct net_accel_msg *msg)
40434 +{
40435 + unsigned long flags;
40436 + cuckoo_hash_mac_key key;
40437 +
40438 + if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
40439 + DPRINTK("MAC has moved, could be local: " MAC_FMT "\n",
40440 + MAC_ARG(msg->u.localmac.mac));
40441 + key = cuckoo_mac_to_key(msg->u.localmac.mac);
40442 + spin_lock_irqsave(&vnic->table_lock, flags);
40443 + /* Try to remove it, not a big deal if not there */
40444 + cuckoo_hash_remove(&vnic->fastpath_table,
40445 + (cuckoo_hash_key *)&key);
40446 + spin_unlock_irqrestore(&vnic->table_lock, flags);
40447 + }
40448 +
40449 + return 0;
40450 +}
40451 +
40452 +
40453 +static
40454 +int vnic_process_rx_msg(netfront_accel_vnic *vnic,
40455 + struct net_accel_msg *msg)
40456 +{
40457 + int err;
40458 +
40459 + switch (msg->id) {
40460 + case NET_ACCEL_MSG_HELLO:
40461 + /* Hello, reply with Reply */
40462 + DPRINTK("got Hello, with version %.8x\n",
40463 + msg->u.hello.version);
40464 + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_NONE);
40465 + err = vnic_process_hello_msg(vnic, msg);
40466 + if (err == 0)
40467 + vnic->msg_state = NETFRONT_ACCEL_MSG_HELLO;
40468 + break;
40469 + case NET_ACCEL_MSG_SETHW:
40470 + /* Hardware info message */
40471 + DPRINTK("got H/W info\n");
40472 + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HELLO);
40473 + err = netfront_accel_vi_init(vnic, &msg->u.hw);
40474 + if (err == 0)
40475 + vnic->msg_state = NETFRONT_ACCEL_MSG_HW;
40476 + break;
40477 + case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY:
40478 + VPRINTK("Got mapped buffers back\n");
40479 + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
40480 + err = vnic_add_bufs(vnic, msg);
40481 + break;
40482 + case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_ERROR:
40483 + /* No buffers. Can't use the fast path. */
40484 + EPRINTK("Got mapped buffers error. Cannot accelerate.\n");
40485 + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
40486 + err = -EIO;
40487 + break;
40488 + case NET_ACCEL_MSG_LOCALMAC:
40489 + /* Should be add, remove not currently used */
40490 + EPRINTK_ON(!(msg->u.localmac.flags & NET_ACCEL_MSG_ADD));
40491 + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
40492 + err = vnic_process_localmac_msg(vnic, msg);
40493 + break;
40494 + default:
40495 + EPRINTK("Huh? Message code is 0x%x\n", msg->id);
40496 + err = -EPROTO;
40497 + break;
40498 + }
40499 +
40500 + return err;
40501 +}
40502 +
40503 +
40504 +/* Process an IRQ received from back end driver */
40505 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
40506 +void netfront_accel_msg_from_bend(struct work_struct *context)
40507 +#else
40508 +void netfront_accel_msg_from_bend(void *context)
40509 +#endif
40510 +{
40511 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
40512 + netfront_accel_vnic *vnic =
40513 + container_of(context, netfront_accel_vnic, msg_from_bend);
40514 +#else
40515 + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
40516 +#endif
40517 + struct net_accel_msg msg;
40518 + int err, queue_was_full = 0;
40519 +
40520 + mutex_lock(&vnic->vnic_mutex);
40521 +
40522 + /*
40523 + * This happens when the shared pages have been unmapped but
40524 + * the workqueue has yet to be flushed
40525 + */
40526 + if (!vnic->dom0_state_is_setup)
40527 + goto unlock_out;
40528 +
40529 + while ((vnic->shared_page->aflags & NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK)
40530 + != 0) {
40531 + if (vnic->shared_page->aflags &
40532 + NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL) {
40533 + /* We've been told there may now be space. */
40534 + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
40535 + (unsigned long *)&vnic->shared_page->aflags);
40536 + }
40537 +
40538 + if (vnic->shared_page->aflags &
40539 + NET_ACCEL_MSG_AFLAGS_QUEUE0FULL) {
40540 + /*
40541 + * There will be space at the end of this
40542 + * function if we can make any.
40543 + */
40544 + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
40545 + (unsigned long *)&vnic->shared_page->aflags);
40546 + queue_was_full = 1;
40547 + }
40548 +
40549 + if (vnic->shared_page->aflags &
40550 + NET_ACCEL_MSG_AFLAGS_NETUPDOWN) {
40551 + DPRINTK("%s: net interface change\n", __FUNCTION__);
40552 + clear_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
40553 + (unsigned long *)&vnic->shared_page->aflags);
40554 + if (vnic->shared_page->net_dev_up)
40555 + netfront_accel_interface_up(vnic);
40556 + else
40557 + netfront_accel_interface_down(vnic);
40558 + }
40559 + }
40560 +
40561 + /* Pull msg out of shared memory */
40562 + while ((err = net_accel_msg_recv(vnic->shared_page, &vnic->from_dom0,
40563 + &msg)) == 0) {
40564 + err = vnic_process_rx_msg(vnic, &msg);
40565 +
40566 + if (err != 0)
40567 + goto done;
40568 + }
40569 +
40570 + /*
40571 + * Send any pending buffer map request messages that we can,
40572 + * and mark domU->dom0 as full if necessary.
40573 + */
40574 + if (vnic->msg_state == NETFRONT_ACCEL_MSG_HW &&
40575 + vnic->bufpages.page_reqs < vnic->bufpages.max_pages) {
40576 + if (vnic_send_buffer_requests(vnic, &vnic->bufpages) == -ENOSPC)
40577 + vnic_set_queue_full(vnic);
40578 + }
40579 +
40580 + /*
40581 + * If there are no messages then this is not an error. It
40582 + * just means that we've finished processing the queue.
40583 + */
40584 + if (err == -ENOENT)
40585 + err = 0;
40586 + done:
40587 + /* We will now have made space in the dom0->domU queue if we can */
40588 + if (queue_was_full)
40589 + vnic_set_queue_not_full(vnic);
40590 +
40591 + if (err != 0) {
40592 + EPRINTK("%s returned %d\n", __FUNCTION__, err);
40593 + netfront_accel_set_closing(vnic);
40594 + }
40595 +
40596 + unlock_out:
40597 + mutex_unlock(&vnic->vnic_mutex);
40598 +
40599 + return;
40600 +}
40601 +
40602 +
40603 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
40604 + struct pt_regs *unused)
40605 +{
40606 + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
40607 + VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
40608 +
40609 + queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
40610 +
40611 + return IRQ_HANDLED;
40612 +}
40613 +
40614 +/* Process an interrupt received from the NIC via backend */
40615 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
40616 + struct pt_regs *unused)
40617 +{
40618 + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
40619 + struct net_device *net_dev = vnic->net_dev;
40620 + unsigned long flags;
40621 +
40622 + VPRINTK("net irq %d from device %s\n", irq, vnic->dev->nodename);
40623 +
40624 + NETFRONT_ACCEL_STATS_OP(vnic->stats.irq_count++);
40625 +
40626 + BUG_ON(net_dev==NULL);
40627 +
40628 + spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40629 + if (vnic->irq_enabled) {
40630 + netfront_accel_disable_net_interrupts(vnic);
40631 + vnic->irq_enabled = 0;
40632 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40633 +
40634 +#if NETFRONT_ACCEL_STATS
40635 + vnic->stats.poll_schedule_count++;
40636 + if (vnic->stats.event_count_since_irq >
40637 + vnic->stats.events_per_irq_max)
40638 + vnic->stats.events_per_irq_max =
40639 + vnic->stats.event_count_since_irq;
40640 + vnic->stats.event_count_since_irq = 0;
40641 +#endif
40642 + netif_rx_schedule(net_dev);
40643 + }
40644 + else {
40645 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40646 + NETFRONT_ACCEL_STATS_OP(vnic->stats.useless_irq_count++);
40647 + DPRINTK("%s: irq when disabled\n", __FUNCTION__);
40648 + }
40649 +
40650 + return IRQ_HANDLED;
40651 +}
40652 +
40653 +
40654 +void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
40655 + u32 ip, u16 port, u8 protocol)
40656 +{
40657 + unsigned long lock_state;
40658 + struct net_accel_msg *msg;
40659 +
40660 + msg = net_accel_msg_start_send(vnic->shared_page, &vnic->to_dom0,
40661 + &lock_state);
40662 +
40663 + if (msg == NULL)
40664 + return;
40665 +
40666 + net_accel_msg_init(msg, NET_ACCEL_MSG_FASTPATH);
40667 + msg->u.fastpath.flags = NET_ACCEL_MSG_REMOVE;
40668 + memcpy(msg->u.fastpath.mac, mac, ETH_ALEN);
40669 +
40670 + msg->u.fastpath.port = port;
40671 + msg->u.fastpath.ip = ip;
40672 + msg->u.fastpath.proto = protocol;
40673 +
40674 + net_accel_msg_complete_send_notify(vnic->shared_page, &vnic->to_dom0,
40675 + &lock_state, vnic->msg_channel_irq);
40676 +}
40677 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_netfront.c
40678 ===================================================================
40679 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
40680 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_netfront.c 2008-02-26 10:54:12.000000000 +0100
40681 @@ -0,0 +1,319 @@
40682 +/****************************************************************************
40683 + * Solarflare driver for Xen network acceleration
40684 + *
40685 + * Copyright 2006-2008: Solarflare Communications Inc,
40686 + * 9501 Jeronimo Road, Suite 250,
40687 + * Irvine, CA 92618, USA
40688 + *
40689 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
40690 + *
40691 + * This program is free software; you can redistribute it and/or modify it
40692 + * under the terms of the GNU General Public License version 2 as published
40693 + * by the Free Software Foundation, incorporated herein by reference.
40694 + *
40695 + * This program is distributed in the hope that it will be useful,
40696 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
40697 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40698 + * GNU General Public License for more details.
40699 + *
40700 + * You should have received a copy of the GNU General Public License
40701 + * along with this program; if not, write to the Free Software
40702 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
40703 + ****************************************************************************
40704 + */
40705 +
40706 +#include <linux/skbuff.h>
40707 +#include <linux/netdevice.h>
40708 +
40709 +/* drivers/xen/netfront/netfront.h */
40710 +#include "netfront.h"
40711 +
40712 +#include "accel.h"
40713 +#include "accel_bufs.h"
40714 +#include "accel_util.h"
40715 +#include "accel_msg_iface.h"
40716 +#include "accel_ssr.h"
40717 +
40718 +#ifdef EFX_GCOV
40719 +#include "gcov.h"
40720 +#endif
40721 +
40722 +#define NETFRONT_ACCEL_VNIC_FROM_NETDEV(_nd) \
40723 + ((netfront_accel_vnic *)((struct netfront_info *)netdev_priv(net_dev))->accel_priv)
40724 +
40725 +static int netfront_accel_netdev_start_xmit(struct sk_buff *skb,
40726 + struct net_device *net_dev)
40727 +{
40728 + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40729 + struct netfront_info *np =
40730 + (struct netfront_info *)netdev_priv(net_dev);
40731 + int handled, rc;
40732 + unsigned long flags1, flags2;
40733 +
40734 + BUG_ON(vnic == NULL);
40735 +
40736 + /* Take our tx lock and hold for the duration */
40737 + spin_lock_irqsave(&vnic->tx_lock, flags1);
40738 +
40739 + if (!vnic->tx_enabled) {
40740 + rc = 0;
40741 + goto unlock_out;
40742 + }
40743 +
40744 + handled = netfront_accel_vi_tx_post(vnic, skb);
40745 + if (handled == NETFRONT_ACCEL_STATUS_BUSY) {
40746 + BUG_ON(vnic->net_dev != net_dev);
40747 + DPRINTK("%s stopping queue\n", __FUNCTION__);
40748 +
40749 + /* Netfront's lock protects tx_skb */
40750 + spin_lock_irqsave(&np->tx_lock, flags2);
40751 + BUG_ON(vnic->tx_skb != NULL);
40752 + vnic->tx_skb = skb;
40753 + netif_stop_queue(net_dev);
40754 + spin_unlock_irqrestore(&np->tx_lock, flags2);
40755 +
40756 + NETFRONT_ACCEL_STATS_OP(vnic->stats.queue_stops++);
40757 + }
40758 +
40759 + if (handled == NETFRONT_ACCEL_STATUS_CANT)
40760 + rc = 0;
40761 + else
40762 + rc = 1;
40763 +
40764 +unlock_out:
40765 + spin_unlock_irqrestore(&vnic->tx_lock, flags1);
40766 +
40767 + return rc;
40768 +}
40769 +
40770 +
40771 +static int netfront_accel_netdev_poll(struct net_device *net_dev, int *budget)
40772 +{
40773 + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40774 + int rx_allowed = *budget, rx_done;
40775 +
40776 + BUG_ON(vnic == NULL);
40777 +
40778 + /* Can check this without lock as modifier excludes polls */
40779 + if (!vnic->poll_enabled)
40780 + return 0;
40781 +
40782 + rx_done = netfront_accel_vi_poll(vnic, rx_allowed);
40783 + *budget -= rx_done;
40784 +
40785 + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_call_count++);
40786 +
40787 + VPRINTK("%s: done %d allowed %d\n",
40788 + __FUNCTION__, rx_done, rx_allowed);
40789 +
40790 + netfront_accel_ssr_end_of_burst(vnic, &vnic->ssr_state);
40791 +
40792 + if (rx_done < rx_allowed) {
40793 + return 0; /* Done */
40794 + }
40795 +
40796 + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_reschedule_count++);
40797 +
40798 + return 1; /* More to do. */
40799 +}
40800 +
40801 +
40802 +/*
40803 + * Process request from netfront to start napi interrupt
40804 + * mode. (i.e. enable interrupts as it's finished polling)
40805 + */
40806 +static int netfront_accel_start_napi_interrupts(struct net_device *net_dev)
40807 +{
40808 + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40809 + unsigned long flags;
40810 +
40811 + BUG_ON(vnic == NULL);
40812 +
40813 + /*
40814 + * Can check this without lock as writer excludes poll before
40815 + * modifying
40816 + */
40817 + if (!vnic->poll_enabled)
40818 + return 0;
40819 +
40820 + if (!netfront_accel_vi_enable_interrupts(vnic)) {
40821 + /*
40822 + * There was something there, tell caller we had
40823 + * something to do.
40824 + */
40825 + return 1;
40826 + }
40827 +
40828 + spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40829 + vnic->irq_enabled = 1;
40830 + netfront_accel_enable_net_interrupts(vnic);
40831 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40832 +
40833 + return 0;
40834 +}
40835 +
40836 +
40837 +/*
40838 + * Process request from netfront to stop napi interrupt
40839 + * mode. (i.e. disable interrupts as it's starting to poll
40840 + */
40841 +static void netfront_accel_stop_napi_interrupts(struct net_device *net_dev)
40842 +{
40843 + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40844 + unsigned long flags;
40845 +
40846 + BUG_ON(vnic == NULL);
40847 +
40848 + spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
40849 +
40850 + if (!vnic->poll_enabled) {
40851 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40852 + return;
40853 + }
40854 +
40855 + netfront_accel_disable_net_interrupts(vnic);
40856 + vnic->irq_enabled = 0;
40857 + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
40858 +}
40859 +
40860 +
40861 +static int netfront_accel_check_ready(struct net_device *net_dev)
40862 +{
40863 + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40864 +
40865 + BUG_ON(vnic == NULL);
40866 +
40867 + /* This is protected by netfront's lock */
40868 + return vnic->tx_skb == NULL;
40869 +}
40870 +
40871 +
40872 +static int netfront_accel_get_stats(struct net_device *net_dev,
40873 + struct net_device_stats *stats)
40874 +{
40875 + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
40876 + struct netfront_accel_netdev_stats now;
40877 +
40878 + BUG_ON(vnic == NULL);
40879 +
40880 + now.fastpath_rx_pkts = vnic->netdev_stats.fastpath_rx_pkts;
40881 + now.fastpath_rx_bytes = vnic->netdev_stats.fastpath_rx_bytes;
40882 + now.fastpath_rx_errors = vnic->netdev_stats.fastpath_rx_errors;
40883 + now.fastpath_tx_pkts = vnic->netdev_stats.fastpath_tx_pkts;
40884 + now.fastpath_tx_bytes = vnic->netdev_stats.fastpath_tx_bytes;
40885 + now.fastpath_tx_errors = vnic->netdev_stats.fastpath_tx_errors;
40886 +
40887 + stats->rx_packets += (now.fastpath_rx_pkts -
40888 + vnic->stats_last_read.fastpath_rx_pkts);
40889 + stats->rx_bytes += (now.fastpath_rx_bytes -
40890 + vnic->stats_last_read.fastpath_rx_bytes);
40891 + stats->rx_errors += (now.fastpath_rx_errors -
40892 + vnic->stats_last_read.fastpath_rx_errors);
40893 + stats->tx_packets += (now.fastpath_tx_pkts -
40894 + vnic->stats_last_read.fastpath_tx_pkts);
40895 + stats->tx_bytes += (now.fastpath_tx_bytes -
40896 + vnic->stats_last_read.fastpath_tx_bytes);
40897 + stats->tx_errors += (now.fastpath_tx_errors -
40898 + vnic->stats_last_read.fastpath_tx_errors);
40899 +
40900 + vnic->stats_last_read = now;
40901 +
40902 + return 0;
40903 +}
40904 +
40905 +
40906 +struct netfront_accel_hooks accel_hooks = {
40907 + .new_device = &netfront_accel_probe,
40908 + .remove = &netfront_accel_remove,
40909 + .netdev_poll = &netfront_accel_netdev_poll,
40910 + .start_xmit = &netfront_accel_netdev_start_xmit,
40911 + .start_napi_irq = &netfront_accel_start_napi_interrupts,
40912 + .stop_napi_irq = &netfront_accel_stop_napi_interrupts,
40913 + .check_ready = &netfront_accel_check_ready,
40914 + .get_stats = &netfront_accel_get_stats
40915 +};
40916 +
40917 +
40918 +unsigned sfc_netfront_max_pages = NETFRONT_ACCEL_DEFAULT_BUF_PAGES;
40919 +module_param_named (max_pages, sfc_netfront_max_pages, uint, 0644);
40920 +MODULE_PARM_DESC(max_pages, "Number of buffer pages to request");
40921 +
40922 +unsigned sfc_netfront_buffer_split = 2;
40923 +module_param_named (buffer_split, sfc_netfront_buffer_split, uint, 0644);
40924 +MODULE_PARM_DESC(buffer_split,
40925 + "Fraction of buffers to use for TX, rest for RX");
40926 +
40927 +
40928 +const char *frontend_name = "sfc_netfront";
40929 +
40930 +struct workqueue_struct *netfront_accel_workqueue;
40931 +
40932 +static int __init netfront_accel_init(void)
40933 +{
40934 + int rc;
40935 +#ifdef EFX_GCOV
40936 + gcov_provider_init(THIS_MODULE);
40937 +#endif
40938 +
40939 + /*
40940 + * If we're running on dom0, netfront hasn't initialised
40941 + * itself, so we need to keep away
40942 + */
40943 + if (is_initial_xendomain())
40944 + return 0;
40945 +
40946 + if (!is_pow2(sizeof(struct net_accel_msg)))
40947 + EPRINTK("%s: bad structure size\n", __FUNCTION__);
40948 +
40949 + netfront_accel_workqueue = create_workqueue(frontend_name);
40950 +
40951 + netfront_accel_debugfs_init();
40952 +
40953 + rc = netfront_accelerator_loaded(NETFRONT_ACCEL_VERSION,
40954 + frontend_name, &accel_hooks);
40955 +
40956 + if (rc < 0) {
40957 + EPRINTK("Xen netfront accelerator version mismatch\n");
40958 + return -EINVAL;
40959 + }
40960 +
40961 + if (rc > 0) {
40962 + /*
40963 + * In future may want to add backwards compatibility
40964 + * and accept certain subsets of previous versions
40965 + */
40966 + EPRINTK("Xen netfront accelerator version mismatch\n");
40967 + return -EINVAL;
40968 + }
40969 +
40970 + return 0;
40971 +}
40972 +module_init(netfront_accel_init);
40973 +
40974 +static void __exit netfront_accel_exit(void)
40975 +{
40976 + if (is_initial_xendomain())
40977 + return;
40978 +
40979 + DPRINTK("%s: unhooking\n", __FUNCTION__);
40980 +
40981 + /* Unhook from normal netfront */
40982 + netfront_accelerator_stop(frontend_name);
40983 +
40984 + DPRINTK("%s: done\n", __FUNCTION__);
40985 +
40986 + netfront_accel_debugfs_fini();
40987 +
40988 + flush_workqueue(netfront_accel_workqueue);
40989 +
40990 + destroy_workqueue(netfront_accel_workqueue);
40991 +
40992 +#ifdef EFX_GCOV
40993 + gcov_provider_fini(THIS_MODULE);
40994 +#endif
40995 + return;
40996 +}
40997 +module_exit(netfront_accel_exit);
40998 +
40999 +MODULE_LICENSE("GPL");
41000 +
41001 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.c
41002 ===================================================================
41003 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
41004 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.c 2008-02-20 09:32:49.000000000 +0100
41005 @@ -0,0 +1,308 @@
41006 +/****************************************************************************
41007 + * Solarflare driver for Xen network acceleration
41008 + *
41009 + * Copyright 2006-2008: Solarflare Communications Inc,
41010 + * 9501 Jeronimo Road, Suite 250,
41011 + * Irvine, CA 92618, USA
41012 + *
41013 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41014 + *
41015 + * This program is free software; you can redistribute it and/or modify it
41016 + * under the terms of the GNU General Public License version 2 as published
41017 + * by the Free Software Foundation, incorporated herein by reference.
41018 + *
41019 + * This program is distributed in the hope that it will be useful,
41020 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
41021 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41022 + * GNU General Public License for more details.
41023 + *
41024 + * You should have received a copy of the GNU General Public License
41025 + * along with this program; if not, write to the Free Software
41026 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41027 + ****************************************************************************
41028 + */
41029 +
41030 +#include <linux/socket.h>
41031 +#include <linux/in.h>
41032 +#include <linux/ip.h>
41033 +#include <linux/tcp.h>
41034 +#include <linux/list.h>
41035 +#include <net/ip.h>
41036 +#include <net/checksum.h>
41037 +
41038 +#include "accel.h"
41039 +#include "accel_util.h"
41040 +#include "accel_bufs.h"
41041 +
41042 +#include "accel_ssr.h"
41043 +
41044 +static inline int list_valid(struct list_head *lh) {
41045 + return(lh->next != NULL);
41046 +}
41047 +
41048 +static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic,
41049 + struct netfront_accel_ssr_state *st,
41050 + struct netfront_accel_ssr_conn *c);
41051 +
41052 +/** Construct an efx_ssr_state.
41053 + *
41054 + * @v st The SSR state (per channel per port)
41055 + * @v port The port.
41056 + */
41057 +void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) {
41058 + unsigned i;
41059 +
41060 + INIT_LIST_HEAD(&st->conns);
41061 + INIT_LIST_HEAD(&st->free_conns);
41062 + for (i = 0; i < 8; ++i) {
41063 + struct netfront_accel_ssr_conn *c =
41064 + kmalloc(sizeof(*c), GFP_KERNEL);
41065 + if (c == NULL) break;
41066 + c->n_in_order_pkts = 0;
41067 + c->skb = NULL;
41068 + list_add(&c->link, &st->free_conns);
41069 + }
41070 +
41071 +}
41072 +
41073 +
41074 +/** Destructor for an efx_ssr_state.
41075 + *
41076 + * @v st The SSR state (per channel per port)
41077 + */
41078 +void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
41079 + struct netfront_accel_ssr_state *st) {
41080 + struct netfront_accel_ssr_conn *c;
41081 +
41082 + /* Return cleanly if efx_ssr_init() not previously called */
41083 + BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns));
41084 + if (! list_valid(&st->conns))
41085 + return;
41086 +
41087 + while ( ! list_empty(&st->free_conns)) {
41088 + c = list_entry(st->free_conns.prev,
41089 + struct netfront_accel_ssr_conn, link);
41090 + list_del(&c->link);
41091 + BUG_ON(c->skb != NULL);
41092 + kfree(c);
41093 + }
41094 + while ( ! list_empty(&st->conns)) {
41095 + c = list_entry(st->conns.prev,
41096 + struct netfront_accel_ssr_conn, link);
41097 + list_del(&c->link);
41098 + if (c->skb)
41099 + netfront_accel_ssr_deliver(vnic, st, c);
41100 + kfree(c);
41101 + }
41102 +}
41103 +
41104 +
41105 +/** Calc IP checksum and deliver to the OS
41106 + *
41107 + * @v st The SSR state (per channel per port)
41108 + * @v c The SSR connection state
41109 + */
41110 +static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic,
41111 + struct netfront_accel_ssr_state *st,
41112 + struct netfront_accel_ssr_conn *c) {
41113 + BUG_ON(c->skb == NULL);
41114 +
41115 + /*
41116 + * If we've chained packets together, recalculate the IP
41117 + * checksum.
41118 + */
41119 + if (skb_shinfo(c->skb)->frag_list) {
41120 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts);
41121 + c->iph->check = 0;
41122 + c->iph->check = ip_fast_csum((unsigned char *) c->iph,
41123 + c->iph->ihl);
41124 + }
41125 +
41126 + VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len);
41127 +
41128 + netif_receive_skb(c->skb);
41129 + c->skb = NULL;
41130 +}
41131 +
41132 +
41133 +/** Push held skbs down into network stack.
41134 + *
41135 + * @v st SSR state
41136 + *
41137 + * Only called if we are tracking one or more connections.
41138 + */
41139 +void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
41140 + struct netfront_accel_ssr_state *st) {
41141 + struct netfront_accel_ssr_conn *c;
41142 +
41143 + BUG_ON(list_empty(&st->conns));
41144 +
41145 + list_for_each_entry(c, &st->conns, link)
41146 + if (c->skb)
41147 + netfront_accel_ssr_deliver(vnic, st, c);
41148 +
41149 + /* Time-out connections that have received no traffic for 20ms. */
41150 + c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn,
41151 + link);
41152 + if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) {
41153 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream);
41154 + list_del(&c->link);
41155 + list_add(&c->link, &st->free_conns);
41156 + }
41157 +}
41158 +
41159 +
41160 +/** Process SKB and decide whether to dispatch it to the stack now or
41161 + * later.
41162 + *
41163 + * @v st SSR state
41164 + * @v skb SKB to exmaine
41165 + * @ret rc 0 => deliver SKB to kernel now, otherwise the SKB belongs
41166 + * us.
41167 + */
41168 +int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic,
41169 + struct netfront_accel_ssr_state *st,
41170 + struct sk_buff *skb) {
41171 + int data_length, dont_merge;
41172 + struct netfront_accel_ssr_conn *c;
41173 + struct iphdr *iph;
41174 + struct tcphdr *th;
41175 + unsigned th_seq;
41176 +
41177 + BUG_ON(skb_shinfo(skb)->frag_list != NULL);
41178 + BUG_ON(skb->next != NULL);
41179 +
41180 + /* We're not interested if it isn't TCP over IPv4. */
41181 + iph = (struct iphdr *) skb->data;
41182 + if (skb->protocol != htons(ETH_P_IP) ||
41183 + iph->protocol != IPPROTO_TCP) {
41184 + return 0;
41185 + }
41186 +
41187 + /* Ignore segments that fail csum or are fragmented. */
41188 + if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) |
41189 + (iph->frag_off & htons(IP_MF | IP_OFFSET)))) {
41190 + return 0;
41191 + }
41192 +
41193 + th = (struct tcphdr*)(skb->data + iph->ihl * 4);
41194 + data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4;
41195 + th_seq = ntohl(th->seq);
41196 + dont_merge = (data_length == 0) | th->urg | th->syn | th->rst;
41197 +
41198 + list_for_each_entry(c, &st->conns, link) {
41199 + if ((c->saddr - iph->saddr) |
41200 + (c->daddr - iph->daddr) |
41201 + (c->source - th->source) |
41202 + (c->dest - th->dest ))
41203 + continue;
41204 +
41205 + /* Re-insert at head of list to reduce lookup time. */
41206 + list_del(&c->link);
41207 + list_add(&c->link, &st->conns);
41208 + c->last_pkt_jiffies = jiffies;
41209 +
41210 + if (unlikely(th_seq - c->next_seq)) {
41211 + /* Out-of-order, so start counting again. */
41212 + if (c->skb)
41213 + netfront_accel_ssr_deliver(vnic, st, c);
41214 + c->n_in_order_pkts = 0;
41215 + c->next_seq = th_seq + data_length;
41216 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder);
41217 + return 0;
41218 + }
41219 + c->next_seq = th_seq + data_length;
41220 +
41221 + if (++c->n_in_order_pkts < 300) {
41222 + /* May be in slow-start, so don't merge. */
41223 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start);
41224 + return 0;
41225 + }
41226 +
41227 + if (unlikely(dont_merge)) {
41228 + if (c->skb)
41229 + netfront_accel_ssr_deliver(vnic, st, c);
41230 + return 0;
41231 + }
41232 +
41233 + if (c->skb) {
41234 + c->iph->tot_len = ntohs(c->iph->tot_len);
41235 + c->iph->tot_len += data_length;
41236 + c->iph->tot_len = htons(c->iph->tot_len);
41237 + c->th->ack_seq = th->ack_seq;
41238 + c->th->fin |= th->fin;
41239 + c->th->psh |= th->psh;
41240 + c->th->window = th->window;
41241 +
41242 + /* Remove the headers from this skb. */
41243 + skb_pull(skb, skb->len - data_length);
41244 +
41245 + /*
41246 + * Tack the new skb onto the head skb's frag_list.
41247 + * This is exactly the format that fragmented IP
41248 + * datagrams are reassembled into.
41249 + */
41250 + BUG_ON(skb->next != 0);
41251 + if ( ! skb_shinfo(c->skb)->frag_list)
41252 + skb_shinfo(c->skb)->frag_list = skb;
41253 + else
41254 + c->skb_tail->next = skb;
41255 + c->skb_tail = skb;
41256 + c->skb->len += skb->len;
41257 + c->skb->data_len += skb->len;
41258 + c->skb->truesize += skb->truesize;
41259 +
41260 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges);
41261 +
41262 + /*
41263 + * If the next packet might push this super-packet
41264 + * over the limit for an IP packet, deliver it now.
41265 + * This is slightly conservative, but close enough.
41266 + */
41267 + if (c->skb->len +
41268 + (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)
41269 + > 16384)
41270 + netfront_accel_ssr_deliver(vnic, st, c);
41271 +
41272 + return 1;
41273 + }
41274 + else {
41275 + c->iph = iph;
41276 + c->th = th;
41277 + c->skb = skb;
41278 + return 1;
41279 + }
41280 + }
41281 +
41282 + /* We're not yet tracking this connection. */
41283 +
41284 + if (dont_merge) {
41285 + return 0;
41286 + }
41287 +
41288 + if (list_empty(&st->free_conns)) {
41289 + c = list_entry(st->conns.prev,
41290 + struct netfront_accel_ssr_conn,
41291 + link);
41292 + if (c->skb) {
41293 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many);
41294 + return 0;
41295 + }
41296 + }
41297 + else {
41298 + c = list_entry(st->free_conns.next,
41299 + struct netfront_accel_ssr_conn,
41300 + link);
41301 + }
41302 + list_del(&c->link);
41303 + list_add(&c->link, &st->conns);
41304 + c->saddr = iph->saddr;
41305 + c->daddr = iph->daddr;
41306 + c->source = th->source;
41307 + c->dest = th->dest;
41308 + c->next_seq = th_seq + data_length;
41309 + c->n_in_order_pkts = 0;
41310 + BUG_ON(c->skb != NULL);
41311 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream);
41312 + return 0;
41313 +}
41314 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.h
41315 ===================================================================
41316 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
41317 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.h 2008-02-20 09:32:49.000000000 +0100
41318 @@ -0,0 +1,88 @@
41319 +/****************************************************************************
41320 + * Solarflare driver for Xen network acceleration
41321 + *
41322 + * Copyright 2006-2008: Solarflare Communications Inc,
41323 + * 9501 Jeronimo Road, Suite 250,
41324 + * Irvine, CA 92618, USA
41325 + *
41326 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41327 + *
41328 + * This program is free software; you can redistribute it and/or modify it
41329 + * under the terms of the GNU General Public License version 2 as published
41330 + * by the Free Software Foundation, incorporated herein by reference.
41331 + *
41332 + * This program is distributed in the hope that it will be useful,
41333 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
41334 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41335 + * GNU General Public License for more details.
41336 + *
41337 + * You should have received a copy of the GNU General Public License
41338 + * along with this program; if not, write to the Free Software
41339 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41340 + ****************************************************************************
41341 + */
41342 +
41343 +#ifndef NETFRONT_ACCEL_SSR_H
41344 +#define NETFRONT_ACCEL_SSR_H
41345 +
41346 +#include <linux/skbuff.h>
41347 +#include <linux/ip.h>
41348 +#include <linux/tcp.h>
41349 +#include <linux/list.h>
41350 +
41351 +#include "accel.h"
41352 +
41353 +/** State for Soft Segment Reassembly (SSR). */
41354 +
41355 +struct netfront_accel_ssr_conn {
41356 + struct list_head link;
41357 +
41358 + unsigned saddr, daddr;
41359 + unsigned short source, dest;
41360 +
41361 + /** Number of in-order packets we've seen with payload. */
41362 + unsigned n_in_order_pkts;
41363 +
41364 + /** Next in-order sequence number. */
41365 + unsigned next_seq;
41366 +
41367 + /** Time we last saw a packet on this connection. */
41368 + unsigned long last_pkt_jiffies;
41369 +
41370 + /** The SKB we are currently holding. If NULL, then all following
41371 + * fields are undefined.
41372 + */
41373 + struct sk_buff *skb;
41374 +
41375 + /** The tail of the frag_list of SKBs we're holding. Only valid
41376 + * after at least one merge.
41377 + */
41378 + struct sk_buff *skb_tail;
41379 +
41380 + /** The IP header of the skb we are holding. */
41381 + struct iphdr *iph;
41382 +
41383 + /** The TCP header of the skb we are holding. */
41384 + struct tcphdr *th;
41385 +};
41386 +
41387 +extern void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st);
41388 +extern void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
41389 + struct netfront_accel_ssr_state *st);
41390 +
41391 +extern void
41392 +__netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
41393 + struct netfront_accel_ssr_state *st);
41394 +
41395 +extern int netfront_accel_ssr_skb(netfront_accel_vnic *vnic,
41396 + struct netfront_accel_ssr_state *st,
41397 + struct sk_buff *skb);
41398 +
41399 +static inline void
41400 +netfront_accel_ssr_end_of_burst (netfront_accel_vnic *vnic,
41401 + struct netfront_accel_ssr_state *st) {
41402 + if ( ! list_empty(&st->conns) )
41403 + __netfront_accel_ssr_end_of_burst(vnic, st);
41404 +}
41405 +
41406 +#endif /* NETFRONT_ACCEL_SSR_H */
41407 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.c
41408 ===================================================================
41409 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
41410 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.c 2008-02-26 10:54:12.000000000 +0100
41411 @@ -0,0 +1,511 @@
41412 +/****************************************************************************
41413 + * Solarflare driver for Xen network acceleration
41414 + *
41415 + * Copyright 2006-2008: Solarflare Communications Inc,
41416 + * 9501 Jeronimo Road, Suite 250,
41417 + * Irvine, CA 92618, USA
41418 + *
41419 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41420 + *
41421 + * This program is free software; you can redistribute it and/or modify it
41422 + * under the terms of the GNU General Public License version 2 as published
41423 + * by the Free Software Foundation, incorporated herein by reference.
41424 + *
41425 + * This program is distributed in the hope that it will be useful,
41426 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
41427 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41428 + * GNU General Public License for more details.
41429 + *
41430 + * You should have received a copy of the GNU General Public License
41431 + * along with this program; if not, write to the Free Software
41432 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41433 + ****************************************************************************
41434 + */
41435 +
41436 +#include <linux/pci.h>
41437 +#include <linux/tcp.h>
41438 +#include <linux/ip.h>
41439 +#include <linux/in.h>
41440 +#include <linux/if_ether.h>
41441 +
41442 +#include "accel.h"
41443 +#include "accel_util.h"
41444 +
41445 +#include "accel_tso.h"
41446 +
41447 +#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2))
41448 +#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data)
41449 +#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data)
41450 +#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data)
41451 +
41452 +/*
41453 + * Set a maximum number of buffers in each output packet to make life
41454 + * a little simpler - if this is reached it will just move on to
41455 + * another packet
41456 + */
41457 +#define ACCEL_TSO_MAX_BUFFERS (6)
41458 +
41459 +/** TSO State.
41460 + *
41461 + * The state used during segmentation. It is put into this data structure
41462 + * just to make it easy to pass into inline functions.
41463 + */
41464 +struct netfront_accel_tso_state {
41465 + /** bytes of data we've yet to segment */
41466 + unsigned remaining_len;
41467 +
41468 + /** current sequence number */
41469 + unsigned seqnum;
41470 +
41471 + /** remaining space in current packet */
41472 + unsigned packet_space;
41473 +
41474 + /** List of packets to be output, containing the buffers and
41475 + * iovecs to describe each packet
41476 + */
41477 + struct netfront_accel_tso_output_packet *output_packets;
41478 +
41479 + /** Total number of buffers in output_packets */
41480 + unsigned buffers;
41481 +
41482 + /** Total number of packets in output_packets */
41483 + unsigned packets;
41484 +
41485 + /** Input Fragment Cursor.
41486 + *
41487 + * Where we are in the current fragment of the incoming SKB. These
41488 + * values get updated in place when we split a fragment over
41489 + * multiple packets.
41490 + */
41491 + struct {
41492 + /** address of current position */
41493 + void *addr;
41494 + /** remaining length */
41495 + unsigned int len;
41496 + } ifc; /* == ifc Input Fragment Cursor */
41497 +
41498 + /** Parameters.
41499 + *
41500 + * These values are set once at the start of the TSO send and do
41501 + * not get changed as the routine progresses.
41502 + */
41503 + struct {
41504 + /* the number of bytes of header */
41505 + unsigned int header_length;
41506 +
41507 + /* The number of bytes to put in each outgoing segment. */
41508 + int full_packet_size;
41509 +
41510 + /* Current IP ID, host endian. */
41511 + unsigned ip_id;
41512 +
41513 + /* Max size of each output packet payload */
41514 + int gso_size;
41515 + } p;
41516 +};
41517 +
41518 +
41519 +/**
41520 + * Verify that our various assumptions about sk_buffs and the conditions
41521 + * under which TSO will be attempted hold true.
41522 + *
41523 + * @v skb The sk_buff to check.
41524 + */
41525 +static inline void tso_check_safe(struct sk_buff *skb) {
41526 + EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
41527 + EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
41528 + EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP);
41529 + EPRINTK_ON((SKB_TCP_OFF(skb)
41530 + + (skb->h.th->doff << 2u)) > skb_headlen(skb));
41531 +}
41532 +
41533 +
41534 +
41535 +/** Parse the SKB header and initialise state. */
41536 +static inline void tso_start(struct netfront_accel_tso_state *st,
41537 + struct sk_buff *skb) {
41538 +
41539 + /*
41540 + * All ethernet/IP/TCP headers combined size is TCP header size
41541 + * plus offset of TCP header relative to start of packet.
41542 + */
41543 + st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb);
41544 + st->p.full_packet_size = (st->p.header_length
41545 + + skb_shinfo(skb)->gso_size);
41546 + st->p.gso_size = skb_shinfo(skb)->gso_size;
41547 +
41548 + st->p.ip_id = htons(skb->nh.iph->id);
41549 + st->seqnum = ntohl(skb->h.th->seq);
41550 +
41551 + EPRINTK_ON(skb->h.th->urg);
41552 + EPRINTK_ON(skb->h.th->syn);
41553 + EPRINTK_ON(skb->h.th->rst);
41554 +
41555 + st->remaining_len = skb->len - st->p.header_length;
41556 +
41557 + st->output_packets = NULL;
41558 + st->buffers = 0;
41559 + st->packets = 0;
41560 +
41561 + VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n",
41562 + st->p.header_length, st->p.full_packet_size, st->p.gso_size,
41563 + st->seqnum, skb->len);
41564 +}
41565 +
41566 +/**
41567 + * Add another NIC mapped buffer onto an output packet
41568 + */
41569 +static inline int tso_start_new_buffer(netfront_accel_vnic *vnic,
41570 + struct netfront_accel_tso_state *st,
41571 + int first)
41572 +{
41573 + struct netfront_accel_tso_buffer *tso_buf;
41574 + struct netfront_accel_pkt_desc *buf;
41575 +
41576 + /* Get a mapped packet buffer */
41577 + buf = netfront_accel_buf_get(vnic->tx_bufs);
41578 + if (buf == NULL) {
41579 + DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
41580 + return -1;
41581 + }
41582 +
41583 + /* Store a bit of meta-data at the end */
41584 + tso_buf =(struct netfront_accel_tso_buffer *)
41585 + (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH
41586 + + sizeof(struct netfront_accel_tso_output_packet));
41587 +
41588 + tso_buf->buf = buf;
41589 +
41590 + tso_buf->length = 0;
41591 +
41592 + if (first) {
41593 + struct netfront_accel_tso_output_packet *output_packet
41594 + = (struct netfront_accel_tso_output_packet *)
41595 + (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH);
41596 + output_packet->next = st->output_packets;
41597 + st->output_packets = output_packet;
41598 + tso_buf->next = NULL;
41599 + st->output_packets->tso_bufs = tso_buf;
41600 + st->output_packets->tso_bufs_len = 1;
41601 + } else {
41602 + tso_buf->next = st->output_packets->tso_bufs;
41603 + st->output_packets->tso_bufs = tso_buf;
41604 + st->output_packets->tso_bufs_len ++;
41605 + }
41606 +
41607 + BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
41608 +
41609 + st->buffers ++;
41610 +
41611 + /*
41612 + * Store the context, set to NULL, last packet buffer will get
41613 + * non-NULL later
41614 + */
41615 + tso_buf->buf->skb = NULL;
41616 +
41617 + return 0;
41618 +}
41619 +
41620 +
41621 +/* Generate a new header, and prepare for the new packet.
41622 + *
41623 + * @v vnic VNIC
41624 + * @v skb Socket buffer
41625 + * @v st TSO state
41626 + * @ret rc 0 on success, or -1 if failed to alloc header
41627 + */
41628 +
41629 +static inline
41630 +int tso_start_new_packet(netfront_accel_vnic *vnic,
41631 + struct sk_buff *skb,
41632 + struct netfront_accel_tso_state *st)
41633 +{
41634 + struct netfront_accel_tso_buffer *tso_buf;
41635 + struct iphdr *tsoh_iph;
41636 + struct tcphdr *tsoh_th;
41637 + unsigned ip_length;
41638 +
41639 + if (tso_start_new_buffer(vnic, st, 1) < 0) {
41640 + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
41641 + return -1;
41642 + }
41643 +
41644 + /* This has been set up by tso_start_new_buffer() */
41645 + tso_buf = st->output_packets->tso_bufs;
41646 +
41647 + /* Copy in the header */
41648 + memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length);
41649 + tso_buf->length = st->p.header_length;
41650 +
41651 + tsoh_th = (struct tcphdr*)
41652 + (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb));
41653 + tsoh_iph = (struct iphdr*)
41654 + (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb));
41655 +
41656 + /* Set to zero to encourage falcon to fill these in */
41657 + tsoh_th->check = 0;
41658 + tsoh_iph->check = 0;
41659 +
41660 + tsoh_th->seq = htonl(st->seqnum);
41661 + st->seqnum += st->p.gso_size;
41662 +
41663 + if (st->remaining_len > st->p.gso_size) {
41664 + /* This packet will not finish the TSO burst. */
41665 + ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
41666 + tsoh_th->fin = 0;
41667 + tsoh_th->psh = 0;
41668 + } else {
41669 + /* This packet will be the last in the TSO burst. */
41670 + ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
41671 + + st->remaining_len);
41672 + tsoh_th->fin = skb->h.th->fin;
41673 + tsoh_th->psh = skb->h.th->psh;
41674 + }
41675 +
41676 + tsoh_iph->tot_len = htons(ip_length);
41677 +
41678 + /* Linux leaves suitable gaps in the IP ID space for us to fill. */
41679 + tsoh_iph->id = st->p.ip_id++;
41680 + tsoh_iph->id = htons(tsoh_iph->id);
41681 +
41682 + st->packet_space = st->p.gso_size;
41683 +
41684 + st->packets++;
41685 +
41686 + return 0;
41687 +}
41688 +
41689 +
41690 +
41691 +static inline void tso_get_fragment(struct netfront_accel_tso_state *st,
41692 + int len, void *addr)
41693 +{
41694 + st->ifc.len = len;
41695 + st->ifc.addr = addr;
41696 + return;
41697 +}
41698 +
41699 +
41700 +static inline void tso_unwind(netfront_accel_vnic *vnic,
41701 + struct netfront_accel_tso_state *st)
41702 +{
41703 + struct netfront_accel_tso_buffer *tso_buf;
41704 + struct netfront_accel_tso_output_packet *output_packet;
41705 +
41706 + DPRINTK("%s\n", __FUNCTION__);
41707 +
41708 + while (st->output_packets != NULL) {
41709 + output_packet = st->output_packets;
41710 + st->output_packets = output_packet->next;
41711 + while (output_packet->tso_bufs != NULL) {
41712 + tso_buf = output_packet->tso_bufs;
41713 + output_packet->tso_bufs = tso_buf->next;
41714 +
41715 + st->buffers --;
41716 + output_packet->tso_bufs_len --;
41717 +
41718 + netfront_accel_buf_put(vnic->tx_bufs,
41719 + tso_buf->buf->buf_id);
41720 + }
41721 + }
41722 + BUG_ON(st->buffers != 0);
41723 +}
41724 +
41725 +
41726 +
41727 +static inline
41728 +void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic,
41729 + struct netfront_accel_tso_state *st)
41730 +{
41731 + struct netfront_accel_tso_buffer *tso_buf;
41732 + int n, space;
41733 +
41734 + BUG_ON(st->output_packets == NULL);
41735 + BUG_ON(st->output_packets->tso_bufs == NULL);
41736 +
41737 + tso_buf = st->output_packets->tso_bufs;
41738 +
41739 + if (st->ifc.len == 0) return;
41740 + if (st->packet_space == 0) return;
41741 + if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return;
41742 +
41743 + n = min(st->ifc.len, st->packet_space);
41744 +
41745 + space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length;
41746 + n = min(n, space);
41747 +
41748 + st->packet_space -= n;
41749 + st->remaining_len -= n;
41750 + st->ifc.len -= n;
41751 +
41752 + memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
41753 +
41754 + tso_buf->length += n;
41755 +
41756 + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH);
41757 +
41758 + st->ifc.addr += n;
41759 +
41760 + return;
41761 +}
41762 +
41763 +
41764 +int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
41765 + struct sk_buff *skb)
41766 +{
41767 + struct netfront_accel_tso_state state;
41768 + struct netfront_accel_tso_buffer *tso_buf = NULL;
41769 + struct netfront_accel_tso_output_packet *reversed_list = NULL;
41770 + struct netfront_accel_tso_output_packet *tmp_pkt;
41771 + ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS];
41772 + int frag_i, rc, dma_id;
41773 + skb_frag_t *f;
41774 +
41775 + tso_check_safe(skb);
41776 +
41777 + if (skb->ip_summed != CHECKSUM_HW)
41778 + EPRINTK("Trying to TSO send a packet without HW checksum\n");
41779 +
41780 + tso_start(&state, skb);
41781 +
41782 + /*
41783 + * Setup the first payload fragment. If the skb header area
41784 + * contains exactly the headers and all payload is in the frag
41785 + * list things are little simpler
41786 + */
41787 + if (skb_headlen(skb) == state.p.header_length) {
41788 + /* Grab the first payload fragment. */
41789 + BUG_ON(skb_shinfo(skb)->nr_frags < 1);
41790 + frag_i = 0;
41791 + f = &skb_shinfo(skb)->frags[frag_i];
41792 + tso_get_fragment(&state, f->size,
41793 + page_address(f->page) + f->page_offset);
41794 + } else {
41795 + int hl = state.p.header_length;
41796 + tso_get_fragment(&state, skb_headlen(skb) - hl,
41797 + skb->data + hl);
41798 + frag_i = -1;
41799 + }
41800 +
41801 + if (tso_start_new_packet(vnic, skb, &state) < 0) {
41802 + DPRINTK("%s: out of first start-packet memory\n",
41803 + __FUNCTION__);
41804 + goto unwind;
41805 + }
41806 +
41807 + while (1) {
41808 + tso_fill_packet_with_fragment(vnic, &state);
41809 +
41810 + /* Move onto the next fragment? */
41811 + if (state.ifc.len == 0) {
41812 + if (++frag_i >= skb_shinfo(skb)->nr_frags)
41813 + /* End of payload reached. */
41814 + break;
41815 + f = &skb_shinfo(skb)->frags[frag_i];
41816 + tso_get_fragment(&state, f->size,
41817 + page_address(f->page) +
41818 + f->page_offset);
41819 + }
41820 +
41821 + /* Start a new buffer? */
41822 + if ((state.output_packets->tso_bufs->length ==
41823 + NETFRONT_ACCEL_TSO_BUF_LENGTH) &&
41824 + tso_start_new_buffer(vnic, &state, 0)) {
41825 + DPRINTK("%s: out of start-buffer memory\n",
41826 + __FUNCTION__);
41827 + goto unwind;
41828 + }
41829 +
41830 + /* Start at new packet? */
41831 + if ((state.packet_space == 0 ||
41832 + ((state.output_packets->tso_bufs_len >=
41833 + ACCEL_TSO_MAX_BUFFERS) &&
41834 + (state.output_packets->tso_bufs->length >=
41835 + NETFRONT_ACCEL_TSO_BUF_LENGTH))) &&
41836 + tso_start_new_packet(vnic, skb, &state) < 0) {
41837 + DPRINTK("%s: out of start-packet memory\n",
41838 + __FUNCTION__);
41839 + goto unwind;
41840 + }
41841 +
41842 + }
41843 +
41844 + /* Check for space */
41845 + if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
41846 + DPRINTK("%s: Not enough TX space (%d)\n",
41847 + __FUNCTION__, state.buffers);
41848 + goto unwind;
41849 + }
41850 +
41851 + /*
41852 + * Store the skb context in the most recent buffer (i.e. the
41853 + * last buffer that will be sent)
41854 + */
41855 + state.output_packets->tso_bufs->buf->skb = skb;
41856 +
41857 + /* Reverse the list of packets as we construct it on a stack */
41858 + while (state.output_packets != NULL) {
41859 + tmp_pkt = state.output_packets;
41860 + state.output_packets = tmp_pkt->next;
41861 + tmp_pkt->next = reversed_list;
41862 + reversed_list = tmp_pkt;
41863 + }
41864 +
41865 + /* Pass off to hardware */
41866 + while (reversed_list != NULL) {
41867 + tmp_pkt = reversed_list;
41868 + reversed_list = tmp_pkt->next;
41869 +
41870 + BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
41871 + BUG_ON(tmp_pkt->tso_bufs_len == 0);
41872 +
41873 + dma_id = tmp_pkt->tso_bufs->buf->buf_id;
41874 +
41875 + /*
41876 + * Make an iovec of the buffers in the list, reversing
41877 + * the buffers as we go as they are constructed on a
41878 + * stack
41879 + */
41880 + tso_buf = tmp_pkt->tso_bufs;
41881 + for (frag_i = tmp_pkt->tso_bufs_len - 1;
41882 + frag_i >= 0;
41883 + frag_i--) {
41884 + iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
41885 + iovecs[frag_i].iov_len = tso_buf->length;
41886 + tso_buf = tso_buf->next;
41887 + }
41888 +
41889 + rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len,
41890 + dma_id);
41891 + /*
41892 + * We checked for space already, so it really should
41893 + * succeed
41894 + */
41895 + BUG_ON(rc != 0);
41896 + }
41897 +
41898 + /* Track number of tx fastpath stats */
41899 + vnic->netdev_stats.fastpath_tx_bytes += skb->len;
41900 + vnic->netdev_stats.fastpath_tx_pkts += state.packets;
41901 +#if NETFRONT_ACCEL_STATS
41902 + {
41903 + unsigned n;
41904 + n = vnic->netdev_stats.fastpath_tx_pkts -
41905 + vnic->stats.fastpath_tx_completions;
41906 + if (n > vnic->stats.fastpath_tx_pending_max)
41907 + vnic->stats.fastpath_tx_pending_max = n;
41908 + }
41909 +#endif
41910 +
41911 + return NETFRONT_ACCEL_STATUS_GOOD;
41912 +
41913 + unwind:
41914 + tso_unwind(vnic, &state);
41915 +
41916 + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
41917 +
41918 + return NETFRONT_ACCEL_STATUS_BUSY;
41919 +}
41920 +
41921 +
41922 +
41923 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.h
41924 ===================================================================
41925 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
41926 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.h 2008-02-20 09:32:49.000000000 +0100
41927 @@ -0,0 +1,57 @@
41928 +/****************************************************************************
41929 + * Solarflare driver for Xen network acceleration
41930 + *
41931 + * Copyright 2006-2008: Solarflare Communications Inc,
41932 + * 9501 Jeronimo Road, Suite 250,
41933 + * Irvine, CA 92618, USA
41934 + *
41935 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41936 + *
41937 + * This program is free software; you can redistribute it and/or modify it
41938 + * under the terms of the GNU General Public License version 2 as published
41939 + * by the Free Software Foundation, incorporated herein by reference.
41940 + *
41941 + * This program is distributed in the hope that it will be useful,
41942 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
41943 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41944 + * GNU General Public License for more details.
41945 + *
41946 + * You should have received a copy of the GNU General Public License
41947 + * along with this program; if not, write to the Free Software
41948 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
41949 + ****************************************************************************
41950 + */
41951 +
41952 +#ifndef NETFRONT_ACCEL_TSO_H
41953 +#define NETFRONT_ACCEL_TSO_H
41954 +
41955 +#include "accel_bufs.h"
41956 +
41957 +/* Track the buffers used in each output packet */
41958 +struct netfront_accel_tso_buffer {
41959 + struct netfront_accel_tso_buffer *next;
41960 + struct netfront_accel_pkt_desc *buf;
41961 + unsigned length;
41962 +};
41963 +
41964 +/* Track the output packets formed from each input packet */
41965 +struct netfront_accel_tso_output_packet {
41966 + struct netfront_accel_tso_output_packet *next;
41967 + struct netfront_accel_tso_buffer *tso_bufs;
41968 + unsigned tso_bufs_len;
41969 +};
41970 +
41971 +
41972 +/*
41973 + * Max available space in a buffer for data once meta-data has taken
41974 + * its place
41975 + */
41976 +#define NETFRONT_ACCEL_TSO_BUF_LENGTH \
41977 + ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
41978 + - sizeof(struct netfront_accel_tso_buffer) \
41979 + - sizeof(struct netfront_accel_tso_output_packet))
41980 +
41981 +int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
41982 + struct sk_buff *skb);
41983 +
41984 +#endif /* NETFRONT_ACCEL_TSO_H */
41985 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_vi.c
41986 ===================================================================
41987 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
41988 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_vi.c 2008-02-20 09:32:49.000000000 +0100
41989 @@ -0,0 +1,1194 @@
41990 +/****************************************************************************
41991 + * Solarflare driver for Xen network acceleration
41992 + *
41993 + * Copyright 2006-2008: Solarflare Communications Inc,
41994 + * 9501 Jeronimo Road, Suite 250,
41995 + * Irvine, CA 92618, USA
41996 + *
41997 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
41998 + *
41999 + * This program is free software; you can redistribute it and/or modify it
42000 + * under the terms of the GNU General Public License version 2 as published
42001 + * by the Free Software Foundation, incorporated herein by reference.
42002 + *
42003 + * This program is distributed in the hope that it will be useful,
42004 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
42005 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
42006 + * GNU General Public License for more details.
42007 + *
42008 + * You should have received a copy of the GNU General Public License
42009 + * along with this program; if not, write to the Free Software
42010 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
42011 + ****************************************************************************
42012 + */
42013 +
42014 +#include <linux/if_ether.h>
42015 +#include <linux/ip.h>
42016 +#include <net/checksum.h>
42017 +#include <asm/io.h>
42018 +
42019 +#include "accel.h"
42020 +#include "accel_util.h"
42021 +#include "accel_bufs.h"
42022 +#include "accel_tso.h"
42023 +#include "accel_ssr.h"
42024 +#include "netfront.h"
42025 +
42026 +#include "etherfabric/ef_vi.h"
42027 +
42028 +/*
42029 + * Max available space in a buffer for data once meta-data has taken
42030 + * its place
42031 + */
42032 +#define NETFRONT_ACCEL_TX_BUF_LENGTH \
42033 + ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
42034 + - sizeof(struct netfront_accel_tso_buffer))
42035 +
42036 +#define ACCEL_TX_MAX_BUFFERS (6)
42037 +#define ACCEL_VI_POLL_EVENTS (8)
42038 +
42039 +static
42040 +int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic,
42041 + struct net_accel_msg_hw *hw_msg)
42042 +{
42043 + struct ef_vi_nic_type nic_type;
42044 + struct net_accel_hw_falcon_b *hw_info;
42045 + void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
42046 + u32 *evq_gnts;
42047 + u32 evq_order;
42048 + int vi_state_size;
42049 + u8 vi_data[VI_MAPPINGS_SIZE];
42050 +
42051 + if (hw_msg == NULL)
42052 + goto fini;
42053 +
42054 + /* And create the local macs table lock */
42055 + spin_lock_init(&vnic->table_lock);
42056 +
42057 + /* Create fastpath table, initial size 8, key length 8 */
42058 + if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
42059 + EPRINTK("failed to allocate fastpath table\n");
42060 + goto fail_cuckoo;
42061 + }
42062 +
42063 + vnic->hw.falcon.type = hw_msg->type;
42064 +
42065 + switch (hw_msg->type) {
42066 + case NET_ACCEL_MSG_HWTYPE_FALCON_A:
42067 + hw_info = &hw_msg->resources.falcon_a.common;
42068 + /* Need the extra rptr register page on A1 */
42069 + io_kva = net_accel_map_iomem_page
42070 + (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
42071 + &vnic->hw.falcon.evq_rptr_mapping);
42072 + if (io_kva == NULL) {
42073 + EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__);
42074 + goto evq_rptr_fail;
42075 + }
42076 +
42077 + vnic->hw.falcon.evq_rptr = io_kva +
42078 + (hw_info->evq_rptr & (PAGE_SIZE - 1));
42079 + break;
42080 + case NET_ACCEL_MSG_HWTYPE_FALCON_B:
42081 + hw_info = &hw_msg->resources.falcon_b;
42082 + break;
42083 + default:
42084 + goto bad_type;
42085 + }
42086 +
42087 + /**** Event Queue ****/
42088 +
42089 + /* Map the event queue pages */
42090 + evq_gnts = hw_info->evq_mem_gnts;
42091 + evq_order = hw_info->evq_order;
42092 +
42093 + EPRINTK_ON(hw_info->evq_offs != 0);
42094 +
42095 + DPRINTK("Will map evq %d pages\n", 1 << evq_order);
42096 +
42097 + evq_base =
42098 + net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
42099 + &vnic->evq_mapping);
42100 + if (evq_base == NULL) {
42101 + EPRINTK("%s: evq_base failed\n", __FUNCTION__);
42102 + goto evq_fail;
42103 + }
42104 +
42105 + /**** Doorbells ****/
42106 + /* Set up the doorbell mappings. */
42107 + doorbell_kva =
42108 + net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
42109 + &vnic->hw.falcon.doorbell_mapping);
42110 + if (doorbell_kva == NULL) {
42111 + EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
42112 + goto doorbell_fail;
42113 + }
42114 + vnic->hw.falcon.doorbell = doorbell_kva;
42115 +
42116 + /* On Falcon_B we get the rptr from the doorbell page */
42117 + if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B) {
42118 + vnic->hw.falcon.evq_rptr =
42119 + (u32 *)((char *)vnic->hw.falcon.doorbell
42120 + + hw_info->evq_rptr);
42121 + }
42122 +
42123 + /**** DMA Queue ****/
42124 +
42125 + /* Set up the DMA Queues from the message. */
42126 + tx_dma_kva = net_accel_map_grants_contig
42127 + (vnic->dev, &(hw_info->txdmaq_gnt), 1,
42128 + &vnic->hw.falcon.txdmaq_mapping);
42129 + if (tx_dma_kva == NULL) {
42130 + EPRINTK("%s: TX dma failed\n", __FUNCTION__);
42131 + goto tx_dma_fail;
42132 + }
42133 +
42134 + rx_dma_kva = net_accel_map_grants_contig
42135 + (vnic->dev, &(hw_info->rxdmaq_gnt), 1,
42136 + &vnic->hw.falcon.rxdmaq_mapping);
42137 + if (rx_dma_kva == NULL) {
42138 + EPRINTK("%s: RX dma failed\n", __FUNCTION__);
42139 + goto rx_dma_fail;
42140 + }
42141 +
42142 + /* Full confession */
42143 + DPRINTK("Mapped H/W"
42144 + " Tx DMAQ grant %x -> %p\n"
42145 + " Rx DMAQ grant %x -> %p\n"
42146 + " EVQ grant %x -> %p\n",
42147 + hw_info->txdmaq_gnt, tx_dma_kva,
42148 + hw_info->rxdmaq_gnt, rx_dma_kva,
42149 + evq_gnts[0], evq_base
42150 + );
42151 +
42152 + memset(vi_data, 0, sizeof(vi_data));
42153 +
42154 + /* TODO BUG11305: convert efhw_arch to ef_vi_arch
42155 + * e.g.
42156 + * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
42157 + * assert(arch >= 0);
42158 + * nic_type.arch = arch;
42159 + */
42160 + nic_type.arch = (unsigned char)hw_info->nic_arch;
42161 + nic_type.variant = (char)hw_info->nic_variant;
42162 + nic_type.revision = (unsigned char)hw_info->nic_revision;
42163 +
42164 + ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance,
42165 + 1 << (evq_order + PAGE_SHIFT), evq_base,
42166 + (void *)0xdeadbeef);
42167 +
42168 + ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity,
42169 + hw_info->tx_capacity, hw_info->instance,
42170 + doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
42171 +
42172 + vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
42173 + hw_info->tx_capacity);
42174 + vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
42175 + if (vnic->vi_state == NULL) {
42176 + EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
42177 + goto vi_state_fail;
42178 + }
42179 + ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
42180 +
42181 + ef_eventq_state_init(&vnic->vi);
42182 +
42183 + ef_vi_state_init(&vnic->vi);
42184 +
42185 + return 0;
42186 +
42187 +fini:
42188 + kfree(vnic->vi_state);
42189 + vnic->vi_state = NULL;
42190 +vi_state_fail:
42191 + net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping);
42192 +rx_dma_fail:
42193 + net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping);
42194 +tx_dma_fail:
42195 + net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
42196 + vnic->hw.falcon.doorbell = NULL;
42197 +doorbell_fail:
42198 + net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
42199 +evq_fail:
42200 + if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
42201 + net_accel_unmap_iomem_page(vnic->dev,
42202 + vnic->hw.falcon.evq_rptr_mapping);
42203 + vnic->hw.falcon.evq_rptr = NULL;
42204 +evq_rptr_fail:
42205 +bad_type:
42206 + cuckoo_hash_destroy(&vnic->fastpath_table);
42207 +fail_cuckoo:
42208 + return -EIO;
42209 +}
42210 +
42211 +
42212 +void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
42213 +{
42214 + /* Just mark the VI as uninitialised. */
42215 + vnic->vi_state = NULL;
42216 +}
42217 +
42218 +
42219 +int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg)
42220 +{
42221 + BUG_ON(hw_msg == NULL);
42222 + return netfront_accel_vi_init_fini(vnic, hw_msg);
42223 +}
42224 +
42225 +
42226 +void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
42227 +{
42228 + if (vnic->vi_state != NULL)
42229 + netfront_accel_vi_init_fini(vnic, NULL);
42230 +}
42231 +
42232 +
42233 +static
42234 +void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
42235 + netfront_accel_pkt_desc *buf)
42236 +{
42237 +
42238 + int idx = vnic->rx_dma_batched;
42239 +
42240 +#if 0
42241 + VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
42242 + id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
42243 +#endif
42244 + /* Set up a virtual buffer descriptor */
42245 + ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
42246 + /*rx_bytes=max*/0);
42247 +
42248 + idx++;
42249 +
42250 + vnic->rx_dma_level++;
42251 +
42252 + /*
42253 + * Only push the descriptor to the card if we've reached the
42254 + * batch size. Otherwise, the descriptors can sit around for
42255 + * a while. There will be plenty available.
42256 + */
42257 + if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
42258 + vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
42259 +#if 0
42260 + VPRINTK("Flushing %d rx descriptors.\n", idx);
42261 +#endif
42262 +
42263 + /* Push buffer to hardware */
42264 + ef_vi_receive_push(&vnic->vi);
42265 +
42266 + idx = 0;
42267 + }
42268 +
42269 + vnic->rx_dma_batched = idx;
42270 +}
42271 +
42272 +
42273 +inline
42274 +void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
42275 + netfront_accel_pkt_desc *buf)
42276 +{
42277 +
42278 + VPRINTK("%s: %d\n", __FUNCTION__, id);
42279 +
42280 + if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
42281 + VPRINTK("RX space is full\n");
42282 + netfront_accel_buf_put(vnic->rx_bufs, id);
42283 + return;
42284 + }
42285 +
42286 + VPRINTK("Completed buffer %d is reposted\n", id);
42287 + netfront_accel_vi_post_rx(vnic, id, buf);
42288 +
42289 + /*
42290 + * Let's see if there's any more to be pushed out to the NIC
42291 + * while we're here
42292 + */
42293 + while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
42294 + /* Try to allocate a buffer. */
42295 + buf = netfront_accel_buf_get(vnic->rx_bufs);
42296 + if (buf == NULL)
42297 + break;
42298 +
42299 + /* Add it to the rx dma queue. */
42300 + netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
42301 + }
42302 +}
42303 +
42304 +
42305 +void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
42306 +{
42307 +
42308 + while (is_rx &&
42309 + ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
42310 + netfront_accel_pkt_desc *buf;
42311 +
42312 + VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
42313 +
42314 + /* Try to allocate a buffer. */
42315 + buf = netfront_accel_buf_get(vnic->rx_bufs);
42316 +
42317 + if (buf == NULL)
42318 + break;
42319 +
42320 + /* Add it to the rx dma queue. */
42321 + netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
42322 + }
42323 +
42324 + VPRINTK("%s: done\n", __FUNCTION__);
42325 +}
42326 +
42327 +
42328 +struct netfront_accel_multi_state {
42329 + unsigned remaining_len;
42330 +
42331 + unsigned buffers;
42332 +
42333 + struct netfront_accel_tso_buffer *output_buffers;
42334 +
42335 + /* Where we are in the current fragment of the SKB. */
42336 + struct {
42337 + /* address of current position */
42338 + void *addr;
42339 + /* remaining length */
42340 + unsigned int len;
42341 + } ifc; /* == Input Fragment Cursor */
42342 +};
42343 +
42344 +
42345 +static inline void multi_post_start(struct netfront_accel_multi_state *st,
42346 + struct sk_buff *skb)
42347 +{
42348 + st->remaining_len = skb->len;
42349 + st->output_buffers = NULL;
42350 + st->buffers = 0;
42351 + st->ifc.len = skb_headlen(skb);
42352 + st->ifc.addr = skb->data;
42353 +}
42354 +
42355 +static int multi_post_start_new_buffer(netfront_accel_vnic *vnic,
42356 + struct netfront_accel_multi_state *st)
42357 +{
42358 + struct netfront_accel_tso_buffer *tso_buf;
42359 + struct netfront_accel_pkt_desc *buf;
42360 +
42361 + /* Get a mapped packet buffer */
42362 + buf = netfront_accel_buf_get(vnic->tx_bufs);
42363 + if (buf == NULL) {
42364 + DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
42365 + return -1;
42366 + }
42367 +
42368 + /* Store a bit of meta-data at the end */
42369 + tso_buf = (struct netfront_accel_tso_buffer *)
42370 + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
42371 +
42372 + tso_buf->buf = buf;
42373 +
42374 + tso_buf->length = 0;
42375 +
42376 + tso_buf->next = st->output_buffers;
42377 + st->output_buffers = tso_buf;
42378 + st->buffers++;
42379 +
42380 + BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
42381 +
42382 + /*
42383 + * Store the context, set to NULL, last packet buffer will get
42384 + * non-NULL later
42385 + */
42386 + tso_buf->buf->skb = NULL;
42387 +
42388 + return 0;
42389 +}
42390 +
42391 +
42392 +static void
42393 +multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
42394 + struct netfront_accel_multi_state *st)
42395 +{
42396 + struct netfront_accel_tso_buffer *tso_buf;
42397 + unsigned n, space;
42398 +
42399 + BUG_ON(st->output_buffers == NULL);
42400 + tso_buf = st->output_buffers;
42401 +
42402 + if (st->ifc.len == 0) return;
42403 + if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
42404 +
42405 + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
42406 +
42407 + space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
42408 + n = min(st->ifc.len, space);
42409 +
42410 + memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
42411 +
42412 + st->remaining_len -= n;
42413 + st->ifc.len -= n;
42414 + tso_buf->length += n;
42415 + st->ifc.addr += n;
42416 +
42417 + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
42418 +
42419 + return;
42420 +}
42421 +
42422 +
42423 +static inline void multi_post_unwind(netfront_accel_vnic *vnic,
42424 + struct netfront_accel_multi_state *st)
42425 +{
42426 + struct netfront_accel_tso_buffer *tso_buf;
42427 +
42428 + DPRINTK("%s\n", __FUNCTION__);
42429 +
42430 + while (st->output_buffers != NULL) {
42431 + tso_buf = st->output_buffers;
42432 + st->output_buffers = tso_buf->next;
42433 + st->buffers--;
42434 + netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
42435 + }
42436 + BUG_ON(st->buffers != 0);
42437 +}
42438 +
42439 +
42440 +static enum netfront_accel_post_status
42441 +netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb)
42442 +{
42443 + struct netfront_accel_tso_buffer *tso_buf;
42444 + struct netfront_accel_multi_state state;
42445 + ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
42446 + skb_frag_t *f;
42447 + int frag_i, rc, dma_id;
42448 +
42449 + multi_post_start(&state, skb);
42450 +
42451 + frag_i = -1;
42452 +
42453 + if (skb->ip_summed == CHECKSUM_HW) {
42454 + /* Set to zero to encourage falcon to work it out for us */
42455 + *(u16*)(skb->h.raw + skb->csum) = 0;
42456 + }
42457 +
42458 + if (multi_post_start_new_buffer(vnic, &state)) {
42459 + DPRINTK("%s: out of buffers\n", __FUNCTION__);
42460 + goto unwind;
42461 + }
42462 +
42463 + while (1) {
42464 + multi_post_fill_buffer_with_fragment(vnic, &state);
42465 +
42466 + /* Move onto the next fragment? */
42467 + if (state.ifc.len == 0) {
42468 + if (++frag_i >= skb_shinfo(skb)->nr_frags)
42469 + /* End of payload reached. */
42470 + break;
42471 + f = &skb_shinfo(skb)->frags[frag_i];
42472 + state.ifc.len = f->size;
42473 + state.ifc.addr = page_address(f->page) + f->page_offset;
42474 + }
42475 +
42476 + /* Start a new buffer? */
42477 + if ((state.output_buffers->length ==
42478 + NETFRONT_ACCEL_TX_BUF_LENGTH) &&
42479 + multi_post_start_new_buffer(vnic, &state)) {
42480 + DPRINTK("%s: out of buffers\n", __FUNCTION__);
42481 + goto unwind;
42482 + }
42483 + }
42484 +
42485 + /* Check for space */
42486 + if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
42487 + DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers);
42488 + goto unwind;
42489 + }
42490 +
42491 + /* Store the skb in what will be the last buffer's context */
42492 + state.output_buffers->buf->skb = skb;
42493 + /* Remember dma_id of what will be the last buffer */
42494 + dma_id = state.output_buffers->buf->buf_id;
42495 +
42496 + /*
42497 + * Make an iovec of the buffers in the list, reversing the
42498 + * buffers as we go as they are constructed on a stack
42499 + */
42500 + tso_buf = state.output_buffers;
42501 + for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
42502 + iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
42503 + iovecs[frag_i].iov_len = tso_buf->length;
42504 + tso_buf = tso_buf->next;
42505 + }
42506 +
42507 + rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
42508 +
42509 + /* Track number of tx fastpath stats */
42510 + vnic->netdev_stats.fastpath_tx_bytes += skb->len;
42511 + vnic->netdev_stats.fastpath_tx_pkts ++;
42512 +#if NETFRONT_ACCEL_STATS
42513 + {
42514 + u32 n;
42515 + n = vnic->netdev_stats.fastpath_tx_pkts -
42516 + (u32)vnic->stats.fastpath_tx_completions;
42517 + if (n > vnic->stats.fastpath_tx_pending_max)
42518 + vnic->stats.fastpath_tx_pending_max = n;
42519 + }
42520 +#endif
42521 + return NETFRONT_ACCEL_STATUS_GOOD;
42522 +
42523 +unwind:
42524 + multi_post_unwind(vnic, &state);
42525 +
42526 + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
42527 +
42528 + return NETFRONT_ACCEL_STATUS_BUSY;
42529 +}
42530 +
42531 +
42532 +static enum netfront_accel_post_status
42533 +netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb)
42534 +{
42535 + struct netfront_accel_tso_buffer *tso_buf;
42536 + struct netfront_accel_pkt_desc *buf;
42537 + u8 *kva;
42538 + int rc;
42539 +
42540 + if (ef_vi_transmit_space(&vnic->vi) < 1) {
42541 + DPRINTK("%s: No TX space\n", __FUNCTION__);
42542 + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
42543 + return NETFRONT_ACCEL_STATUS_BUSY;
42544 + }
42545 +
42546 + buf = netfront_accel_buf_get(vnic->tx_bufs);
42547 + if (buf == NULL) {
42548 + DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
42549 + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
42550 + return NETFRONT_ACCEL_STATUS_BUSY;
42551 + }
42552 +
42553 + /* Track number of tx fastpath stats */
42554 + vnic->netdev_stats.fastpath_tx_pkts++;
42555 + vnic->netdev_stats.fastpath_tx_bytes += skb->len;
42556 +
42557 +#if NETFRONT_ACCEL_STATS
42558 + {
42559 + u32 n;
42560 + n = vnic->netdev_stats.fastpath_tx_pkts -
42561 + (u32)vnic->stats.fastpath_tx_completions;
42562 + if (n > vnic->stats.fastpath_tx_pending_max)
42563 + vnic->stats.fastpath_tx_pending_max = n;
42564 + }
42565 +#endif
42566 +
42567 + /* Store the context */
42568 + buf->skb = skb;
42569 +
42570 + kva = buf->pkt_kva;
42571 +
42572 + if (skb->ip_summed == CHECKSUM_HW) {
42573 + /* Set to zero to encourage falcon to work it out for us */
42574 + *(u16*)(skb->h.raw + skb->csum) = 0;
42575 + }
42576 + NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
42577 + (skb, idx, frag_data, frag_len, {
42578 + /* Copy in payload */
42579 + VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
42580 + memcpy(kva, frag_data, frag_len);
42581 + kva += frag_len;
42582 + });
42583 +
42584 + VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
42585 + buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
42586 +
42587 +
42588 + /* Set up the TSO meta-data for a single buffer/packet */
42589 + tso_buf = (struct netfront_accel_tso_buffer *)
42590 + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
42591 + tso_buf->next = NULL;
42592 + tso_buf->buf = buf;
42593 + tso_buf->length = skb->len;
42594 +
42595 + rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
42596 + buf->buf_id);
42597 + /* We checked for space already, so it really should succeed */
42598 + BUG_ON(rc != 0);
42599 +
42600 + return NETFRONT_ACCEL_STATUS_GOOD;
42601 +}
42602 +
42603 +
42604 +enum netfront_accel_post_status
42605 +netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
42606 +{
42607 + struct ethhdr *pkt_eth_hdr;
42608 + struct iphdr *pkt_ipv4_hdr;
42609 + int value, try_fastpath;
42610 +
42611 + /*
42612 + * This assumes that the data field points to the dest mac
42613 + * address.
42614 + */
42615 + cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
42616 +
42617 + /*
42618 + * NB very important that all things that could return "CANT"
42619 + * are tested before things that return "BUSY" as if it it
42620 + * returns "BUSY" it is assumed that it won't return "CANT"
42621 + * next time it is tried
42622 + */
42623 +
42624 + /*
42625 + * Do a fastpath send if fast path table lookup returns true.
42626 + * We do this without the table lock and so may get the wrong
42627 + * answer, but current opinion is that's not a big problem
42628 + */
42629 + try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table,
42630 + (cuckoo_hash_key *)(&key), &value);
42631 +
42632 + if (!try_fastpath) {
42633 + VPRINTK("try fast path false for mac: " MAC_FMT "\n",
42634 + MAC_ARG(skb->data));
42635 +
42636 + return NETFRONT_ACCEL_STATUS_CANT;
42637 + }
42638 +
42639 + /* Check to see if the packet can be sent. */
42640 + if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
42641 + EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
42642 + return NETFRONT_ACCEL_STATUS_CANT;
42643 + }
42644 +
42645 + pkt_eth_hdr = (void*)skb->data;
42646 + pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
42647 +
42648 + if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
42649 + DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__,
42650 + be16_to_cpu(pkt_eth_hdr->h_proto));
42651 + return NETFRONT_ACCEL_STATUS_CANT;
42652 + }
42653 +
42654 + if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
42655 + pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
42656 + DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
42657 + __FUNCTION__, pkt_ipv4_hdr->protocol);
42658 + return NETFRONT_ACCEL_STATUS_CANT;
42659 + }
42660 +
42661 + VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len,
42662 + skb_shinfo(skb)->gso_size);
42663 +
42664 + if (skb_shinfo(skb)->gso_size) {
42665 + return netfront_accel_enqueue_skb_tso(vnic, skb);
42666 + }
42667 +
42668 + if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
42669 + return netfront_accel_enqueue_skb_single(vnic, skb);
42670 + }
42671 +
42672 + return netfront_accel_enqueue_skb_multi(vnic, skb);
42673 +}
42674 +
42675 +
42676 +/*
42677 + * Copy the data to required end destination. NB. len is the total new
42678 + * length of the socket buffer, not the amount of data to copy
42679 + */
42680 +inline
42681 +int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb,
42682 + struct netfront_accel_pkt_desc *buf, int len)
42683 +{
42684 + int i, extra = len - skb->len;
42685 + char c;
42686 + int pkt_stride = vnic->rx_pkt_stride;
42687 + int skb_stride = vnic->rx_skb_stride;
42688 + char *skb_start;
42689 +
42690 + /*
42691 + * This pulls stuff into the cache - have seen performance
42692 + * benefit in this, but disabled by default
42693 + */
42694 + skb_start = skb->data;
42695 + if (pkt_stride) {
42696 + for (i = 0; i < len; i += pkt_stride) {
42697 + c += ((volatile char*)(buf->pkt_kva))[i];
42698 + }
42699 + }
42700 + if (skb_stride) {
42701 + for (i = skb->len; i < len ; i += skb_stride) {
42702 + c += ((volatile char*)(skb_start))[i];
42703 + }
42704 + }
42705 +
42706 + if (skb_tailroom(skb) >= extra) {
42707 + memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
42708 + return 0;
42709 + }
42710 +
42711 + return -ENOSPC;
42712 +}
42713 +
42714 +
42715 +static void discard_jumbo_state(netfront_accel_vnic *vnic)
42716 +{
42717 +
42718 + if (vnic->jumbo_state.skb != NULL) {
42719 + dev_kfree_skb_any(vnic->jumbo_state.skb);
42720 +
42721 + vnic->jumbo_state.skb = NULL;
42722 + }
42723 + vnic->jumbo_state.in_progress = 0;
42724 +}
42725 +
42726 +
42727 +static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
42728 + struct sk_buff *skb)
42729 +{
42730 + cuckoo_hash_mac_key key;
42731 + unsigned long flags;
42732 + int value;
42733 + struct net_device *net_dev;
42734 +
42735 +
42736 + key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
42737 +
42738 + /*
42739 + * If this is a MAC address that we want to do fast path TX
42740 + * to, and we don't already, add it to the fastpath table.
42741 + * The initial lookup is done without the table lock and so
42742 + * may get the wrong answer, but current opinion is that's not
42743 + * a big problem
42744 + */
42745 + if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
42746 + !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
42747 + &value)) {
42748 + spin_lock_irqsave(&vnic->table_lock, flags);
42749 +
42750 + cuckoo_hash_add_check(&vnic->fastpath_table,
42751 + (cuckoo_hash_key *)&key,
42752 + 1, 1);
42753 +
42754 + spin_unlock_irqrestore(&vnic->table_lock, flags);
42755 + }
42756 +
42757 + if (compare_ether_addr(skb->data, vnic->mac)) {
42758 + struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
42759 + u16 port;
42760 +
42761 + DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n",
42762 + __FUNCTION__, MAC_ARG(skb->data));
42763 +
42764 + if (ip->protocol == IPPROTO_TCP) {
42765 + struct tcphdr *tcp = (struct tcphdr *)
42766 + ((char *)ip + 4 * ip->ihl);
42767 + port = tcp->dest;
42768 + } else {
42769 + struct udphdr *udp = (struct udphdr *)
42770 + ((char *)ip + 4 * ip->ihl);
42771 + EPRINTK_ON(ip->protocol != IPPROTO_UDP);
42772 + port = udp->dest;
42773 + }
42774 +
42775 + netfront_accel_msg_tx_fastpath(vnic, skb->data,
42776 + ip->daddr, port,
42777 + ip->protocol);
42778 + }
42779 +
42780 + net_dev = vnic->net_dev;
42781 + skb->dev = net_dev;
42782 + skb->protocol = eth_type_trans(skb, net_dev);
42783 + /* CHECKSUM_UNNECESSARY as hardware has done it already */
42784 + skb->ip_summed = CHECKSUM_UNNECESSARY;
42785 +
42786 + if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
42787 + netif_receive_skb(skb);
42788 +}
42789 +
42790 +
42791 +static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic,
42792 + ef_event *ev)
42793 +{
42794 + struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
42795 + struct netfront_accel_pkt_desc *buf = NULL;
42796 + struct sk_buff *skb;
42797 + int id, len, sop = 0, cont = 0;
42798 +
42799 + VPRINTK("Rx event.\n");
42800 + /*
42801 + * Complete the receive operation, and get the request id of
42802 + * the buffer
42803 + */
42804 + id = ef_vi_receive_done(&vnic->vi, ev);
42805 +
42806 + if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
42807 + EPRINTK("Rx packet %d is invalid\n", id);
42808 + /* Carry on round the loop if more events */
42809 + goto bad_packet;
42810 + }
42811 + /* Get our buffer descriptor */
42812 + buf = netfront_accel_buf_find(bufinfo, id);
42813 +
42814 + len = EF_EVENT_RX_BYTES(*ev);
42815 +
42816 + /* An RX buffer has been removed from the DMA ring. */
42817 + vnic->rx_dma_level--;
42818 +
42819 + if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
42820 + sop = EF_EVENT_RX_SOP(*ev);
42821 + cont = EF_EVENT_RX_CONT(*ev);
42822 +
42823 + skb = vnic->jumbo_state.skb;
42824 +
42825 + VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n",
42826 + id, len, sop, cont);
42827 +
42828 + if (sop) {
42829 + if (!vnic->jumbo_state.in_progress) {
42830 + vnic->jumbo_state.in_progress = 1;
42831 + BUG_ON(vnic->jumbo_state.skb != NULL);
42832 + } else {
42833 + /*
42834 + * This fragment shows a missing tail in
42835 + * previous one, but is itself possibly OK
42836 + */
42837 + DPRINTK("sop and in_progress => no tail\n");
42838 +
42839 + /* Release the socket buffer we already had */
42840 + discard_jumbo_state(vnic);
42841 +
42842 + /* Now start processing this fragment */
42843 + vnic->jumbo_state.in_progress = 1;
42844 + skb = NULL;
42845 + }
42846 + } else if (!vnic->jumbo_state.in_progress) {
42847 + DPRINTK("!sop and !in_progress => missing head\n");
42848 + goto missing_head;
42849 + }
42850 +
42851 + if (!cont) {
42852 + /* Update state for next time */
42853 + vnic->jumbo_state.in_progress = 0;
42854 + vnic->jumbo_state.skb = NULL;
42855 + } else if (!vnic->jumbo_state.in_progress) {
42856 + DPRINTK("cont and !in_progress => missing head\n");
42857 + goto missing_head;
42858 + }
42859 +
42860 + if (skb == NULL) {
42861 + BUG_ON(!sop);
42862 +
42863 + if (!cont)
42864 + skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
42865 + else
42866 + skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN,
42867 + GFP_ATOMIC);
42868 +
42869 + if (skb == NULL) {
42870 + DPRINTK("%s: Couldn't get an rx skb.\n",
42871 + __FUNCTION__);
42872 + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42873 + /*
42874 + * Dropping this fragment means we
42875 + * should discard the rest too
42876 + */
42877 + discard_jumbo_state(vnic);
42878 +
42879 + /* Carry on round the loop if more events */
42880 + return 0;
42881 + }
42882 +
42883 + }
42884 +
42885 + /* Copy the data to required end destination */
42886 + if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
42887 + /*
42888 + * No space in the skb - suggests > MTU packet
42889 + * received
42890 + */
42891 + EPRINTK("%s: Rx packet too large (%d)\n",
42892 + __FUNCTION__, len);
42893 + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42894 + discard_jumbo_state(vnic);
42895 + return 0;
42896 + }
42897 +
42898 + /* Put the buffer back in the DMA queue. */
42899 + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42900 +
42901 + if (cont) {
42902 + vnic->jumbo_state.skb = skb;
42903 +
42904 + return 0;
42905 + } else {
42906 + /* Track number of rx fastpath packets */
42907 + vnic->netdev_stats.fastpath_rx_pkts++;
42908 + vnic->netdev_stats.fastpath_rx_bytes += len;
42909 +
42910 + netfront_accel_vi_rx_complete(vnic, skb);
42911 +
42912 + return 1;
42913 + }
42914 + } else {
42915 + BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
42916 +
42917 + if (EF_EVENT_RX_DISCARD_TYPE(*ev)
42918 + == EF_EVENT_RX_DISCARD_TRUNC) {
42919 + DPRINTK("%s: " EF_EVENT_FMT
42920 + " buffer %d FRM_TRUNC q_id %d\n",
42921 + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
42922 + EF_EVENT_RX_DISCARD_Q_ID(*ev) );
42923 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
42924 + } else if (EF_EVENT_RX_DISCARD_TYPE(*ev)
42925 + == EF_EVENT_RX_DISCARD_OTHER) {
42926 + DPRINTK("%s: " EF_EVENT_FMT
42927 + " buffer %d RX_DISCARD_OTHER q_id %d\n",
42928 + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
42929 + EF_EVENT_RX_DISCARD_Q_ID(*ev) );
42930 + /*
42931 + * Probably tail of packet for which error has
42932 + * already been logged, so don't count in
42933 + * stats
42934 + */
42935 + } else {
42936 + EPRINTK("%s: " EF_EVENT_FMT
42937 + " buffer %d rx discard type %d q_id %d\n",
42938 + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
42939 + EF_EVENT_RX_DISCARD_TYPE(*ev),
42940 + EF_EVENT_RX_DISCARD_Q_ID(*ev) );
42941 + NETFRONT_ACCEL_STATS_OP(++vnic->stats.bad_event_count);
42942 + }
42943 + }
42944 +
42945 + /* discard type drops through here */
42946 +
42947 +bad_packet:
42948 + /* Release the socket buffer we already had */
42949 + discard_jumbo_state(vnic);
42950 +
42951 +missing_head:
42952 + BUG_ON(vnic->jumbo_state.in_progress != 0);
42953 + BUG_ON(vnic->jumbo_state.skb != NULL);
42954 +
42955 + if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
42956 + /* Put the buffer back in the DMA queue. */
42957 + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
42958 +
42959 + vnic->netdev_stats.fastpath_rx_errors++;
42960 +
42961 + DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
42962 + __FUNCTION__, ev->rx.flags);
42963 +
42964 + return 0;
42965 +}
42966 +
42967 +
42968 +static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
42969 +{
42970 + struct netfront_info *np = ((struct netfront_info *)
42971 + netdev_priv(vnic->net_dev));
42972 + struct sk_buff *skb;
42973 + int handled;
42974 + unsigned long flags;
42975 +
42976 + /*
42977 + * TODO if we could safely check tx_skb == NULL and return
42978 + * early without taking the lock, that would obviously help
42979 + * performance
42980 + */
42981 +
42982 + /* Take the netfront lock which protects tx_skb. */
42983 + spin_lock_irqsave(&np->tx_lock, flags);
42984 + if (vnic->tx_skb != NULL) {
42985 + DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
42986 +
42987 + skb = vnic->tx_skb;
42988 + vnic->tx_skb = NULL;
42989 +
42990 + spin_unlock_irqrestore(&np->tx_lock, flags);
42991 +
42992 + handled = netfront_accel_vi_tx_post(vnic, skb);
42993 +
42994 + spin_lock_irqsave(&np->tx_lock, flags);
42995 +
42996 + if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
42997 + DPRINTK("%s restarting tx\n", __FUNCTION__);
42998 + if (netfront_check_queue_ready(vnic->net_dev)) {
42999 + netif_wake_queue(vnic->net_dev);
43000 + NETFRONT_ACCEL_STATS_OP
43001 + (vnic->stats.queue_wakes++);
43002 + }
43003 + } else {
43004 + vnic->tx_skb = skb;
43005 + }
43006 +
43007 + /*
43008 + * Should never get a CANT, as it checks that before
43009 + * deciding it was BUSY first time round
43010 + */
43011 + BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
43012 + }
43013 + spin_unlock_irqrestore(&np->tx_lock, flags);
43014 +}
43015 +
43016 +
43017 +static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic,
43018 + struct netfront_accel_tso_buffer *tso_buf,
43019 + int is_last)
43020 +{
43021 + struct netfront_accel_tso_buffer *next;
43022 +
43023 + /*
43024 + * We get a single completion for every call to
43025 + * ef_vi_transmitv so handle any other buffers which are part
43026 + * of the same packet
43027 + */
43028 + while (tso_buf != NULL) {
43029 + if (tso_buf->buf->skb != NULL) {
43030 + dev_kfree_skb_any(tso_buf->buf->skb);
43031 + tso_buf->buf->skb = NULL;
43032 + }
43033 +
43034 + next = tso_buf->next;
43035 +
43036 + netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
43037 +
43038 + tso_buf = next;
43039 + }
43040 +
43041 + /*
43042 + * If this was the last one in the batch, we try and send any
43043 + * pending tx_skb. There should now be buffers and
43044 + * descriptors
43045 + */
43046 + if (is_last)
43047 + netfront_accel_vi_not_busy(vnic);
43048 +}
43049 +
43050 +
43051 +static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
43052 + ef_event *ev)
43053 +{
43054 + struct netfront_accel_pkt_desc *buf;
43055 + struct netfront_accel_tso_buffer *tso_buf;
43056 + ef_request_id ids[EF_VI_TRANSMIT_BATCH];
43057 + int i, n_ids;
43058 + unsigned long flags;
43059 +
43060 + /* Get the request ids for this tx completion event. */
43061 + n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
43062 +
43063 + /* Take the tx buffer spin lock and hold for the duration */
43064 + spin_lock_irqsave(&vnic->tx_lock, flags);
43065 +
43066 + for (i = 0; i < n_ids; ++i) {
43067 + VPRINTK("Tx packet %d complete\n", ids[i]);
43068 + buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
43069 + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
43070 +
43071 + tso_buf = (struct netfront_accel_tso_buffer *)
43072 + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
43073 + BUG_ON(tso_buf->buf != buf);
43074 +
43075 + netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
43076 + }
43077 +
43078 + spin_unlock_irqrestore(&vnic->tx_lock, flags);
43079 +}
43080 +
43081 +
43082 +int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
43083 +{
43084 + ef_event ev[ACCEL_VI_POLL_EVENTS];
43085 + int rx_remain = rx_packets, rc, events, i;
43086 +#if NETFRONT_ACCEL_STATS
43087 + int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
43088 +#endif
43089 + BUG_ON(rx_packets <= 0);
43090 +
43091 + events = ef_eventq_poll(&vnic->vi, ev,
43092 + min(rx_remain, ACCEL_VI_POLL_EVENTS));
43093 + i = 0;
43094 + NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
43095 +
43096 + VPRINTK("%s: %d events\n", __FUNCTION__, events);
43097 +
43098 + /* Loop over each event */
43099 + while (events) {
43100 + VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__,
43101 + EF_EVENT_PRI_ARG(ev[i]),
43102 + (unsigned long)(vnic->vi.evq_state->evq_ptr));
43103 +
43104 + if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
43105 + (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
43106 + rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
43107 + rx_remain -= rc;
43108 + BUG_ON(rx_remain < 0);
43109 + NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
43110 + } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
43111 + netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
43112 + NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
43113 + } else if (EF_EVENT_TYPE(ev[i]) ==
43114 + EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
43115 + DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
43116 + __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
43117 + discard_jumbo_state(vnic);
43118 + NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
43119 + } else {
43120 + EPRINTK("Unexpected event " EF_EVENT_FMT "\n",
43121 + EF_EVENT_PRI_ARG(ev[i]));
43122 + NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
43123 + }
43124 +
43125 + i++;
43126 +
43127 + /* Carry on round the loop if more events and more space */
43128 + if (i == events) {
43129 + if (rx_remain == 0)
43130 + break;
43131 +
43132 + events = ef_eventq_poll(&vnic->vi, ev,
43133 + min(rx_remain,
43134 + ACCEL_VI_POLL_EVENTS));
43135 + i = 0;
43136 + NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
43137 + }
43138 + }
43139 +
43140 +#if NETFRONT_ACCEL_STATS
43141 + vnic->stats.event_count += n_evs_polled;
43142 + vnic->stats.event_count_since_irq += n_evs_polled;
43143 + if (n_evs_polled > vnic->stats.events_per_poll_max)
43144 + vnic->stats.events_per_poll_max = n_evs_polled;
43145 + if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
43146 + vnic->stats.events_per_poll_rx_max = rx_evs_polled;
43147 + if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
43148 + vnic->stats.events_per_poll_tx_max = tx_evs_polled;
43149 +#endif
43150 +
43151 + return rx_packets - rx_remain;
43152 +}
43153 +
43154 +
43155 +int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
43156 +{
43157 + u32 sw_evq_ptr;
43158 +
43159 + VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state);
43160 +
43161 + BUG_ON(vnic == NULL);
43162 + BUG_ON(vnic->vi.evq_state == NULL);
43163 +
43164 + /* Do a quick check for an event. */
43165 + if (ef_eventq_has_event(&vnic->vi)) {
43166 + VPRINTK("%s: found event\n", __FUNCTION__);
43167 + return 0;
43168 + }
43169 +
43170 + VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n",
43171 + vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
43172 +
43173 + /* Request a wakeup from the hardware. */
43174 + sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
43175 +
43176 + BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
43177 +
43178 + VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
43179 + vnic->hw.falcon.evq_rptr);
43180 + *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);
43181 +
43182 + return 1;
43183 +}
43184 Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_xenbus.c
43185 ===================================================================
43186 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
43187 +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_xenbus.c 2008-02-20 09:32:49.000000000 +0100
43188 @@ -0,0 +1,776 @@
43189 +/****************************************************************************
43190 + * Solarflare driver for Xen network acceleration
43191 + *
43192 + * Copyright 2006-2008: Solarflare Communications Inc,
43193 + * 9501 Jeronimo Road, Suite 250,
43194 + * Irvine, CA 92618, USA
43195 + *
43196 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
43197 + *
43198 + * This program is free software; you can redistribute it and/or modify it
43199 + * under the terms of the GNU General Public License version 2 as published
43200 + * by the Free Software Foundation, incorporated herein by reference.
43201 + *
43202 + * This program is distributed in the hope that it will be useful,
43203 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
43204 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43205 + * GNU General Public License for more details.
43206 + *
43207 + * You should have received a copy of the GNU General Public License
43208 + * along with this program; if not, write to the Free Software
43209 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
43210 + ****************************************************************************
43211 + */
43212 +
43213 +#include <linux/stddef.h>
43214 +#include <linux/errno.h>
43215 +
43216 +#include <xen/xenbus.h>
43217 +#include <xen/evtchn.h>
43218 +#include <xen/gnttab.h>
43219 +
43220 +#include "accel.h"
43221 +#include "accel_util.h"
43222 +#include "accel_msg_iface.h"
43223 +#include "accel_bufs.h"
43224 +#include "accel_ssr.h"
43225 +/* drivers/xen/netfront/netfront.h */
43226 +#include "netfront.h"
43227 +
43228 +void netfront_accel_set_closing(netfront_accel_vnic *vnic)
43229 +{
43230 +
43231 + vnic->frontend_state = XenbusStateClosing;
43232 + net_accel_update_state(vnic->dev, XenbusStateClosing);
43233 +}
43234 +
43235 +
43236 +static void mac_address_change(struct xenbus_watch *watch,
43237 + const char **vec, unsigned int len)
43238 +{
43239 + netfront_accel_vnic *vnic;
43240 + struct xenbus_device *dev;
43241 + int rc;
43242 +
43243 + DPRINTK("%s\n", __FUNCTION__);
43244 +
43245 + vnic = container_of(watch, netfront_accel_vnic,
43246 + mac_address_watch);
43247 + dev = vnic->dev;
43248 +
43249 + rc = net_accel_xen_net_read_mac(dev, vnic->mac);
43250 +
43251 + if (rc != 0)
43252 + EPRINTK("%s: failed to read mac (%d)\n", __FUNCTION__, rc);
43253 +}
43254 +
43255 +
43256 +static int setup_mac_address_watch(struct xenbus_device *dev,
43257 + netfront_accel_vnic *vnic)
43258 +{
43259 + int err;
43260 +
43261 + DPRINTK("Setting watch on %s/%s\n", dev->nodename, "mac");
43262 +
43263 + err = xenbus_watch_path2(dev, dev->nodename, "mac",
43264 + &vnic->mac_address_watch,
43265 + mac_address_change);
43266 + if (err) {
43267 + EPRINTK("%s: Failed to register xenbus watch: %d\n",
43268 + __FUNCTION__, err);
43269 + goto fail;
43270 + }
43271 +
43272 + return 0;
43273 + fail:
43274 + vnic->mac_address_watch.node = NULL;
43275 + return err;
43276 +}
43277 +
43278 +
43279 +/* Grant access to some pages and publish through xenbus */
43280 +static int make_named_grant(struct xenbus_device *dev, void *page,
43281 + const char *name, grant_ref_t *gnt_ref)
43282 +{
43283 + struct xenbus_transaction tr;
43284 + int err;
43285 + grant_ref_t gnt;
43286 +
43287 + gnt = net_accel_grant_page(dev, virt_to_mfn(page), 0);
43288 + if (gnt < 0)
43289 + return gnt;
43290 +
43291 + do {
43292 + err = xenbus_transaction_start(&tr);
43293 + if (err != 0) {
43294 + EPRINTK("%s: transaction start failed %d\n",
43295 + __FUNCTION__, err);
43296 + return err;
43297 + }
43298 + err = xenbus_printf(tr, dev->nodename, name, "%d", gnt);
43299 + if (err != 0) {
43300 + EPRINTK("%s: xenbus_printf failed %d\n", __FUNCTION__,
43301 + err);
43302 + xenbus_transaction_end(tr, 1);
43303 + return err;
43304 + }
43305 + err = xenbus_transaction_end(tr, 0);
43306 + } while (err == -EAGAIN);
43307 +
43308 + if (err != 0) {
43309 + EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
43310 + return err;
43311 + }
43312 +
43313 + *gnt_ref = gnt;
43314 +
43315 + return 0;
43316 +}
43317 +
43318 +
43319 +static int remove_named_grant(struct xenbus_device *dev,
43320 + const char *name, grant_ref_t gnt_ref)
43321 +{
43322 + struct xenbus_transaction tr;
43323 + int err;
43324 +
43325 + net_accel_ungrant_page(gnt_ref);
43326 +
43327 + do {
43328 + err = xenbus_transaction_start(&tr);
43329 + if (err != 0) {
43330 + EPRINTK("%s: transaction start failed %d\n",
43331 + __FUNCTION__, err);
43332 + return err;
43333 + }
43334 + err = xenbus_rm(tr, dev->nodename, name);
43335 + if (err != 0) {
43336 + EPRINTK("%s: xenbus_rm failed %d\n", __FUNCTION__,
43337 + err);
43338 + xenbus_transaction_end(tr, 1);
43339 + return err;
43340 + }
43341 + err = xenbus_transaction_end(tr, 0);
43342 + } while (err == -EAGAIN);
43343 +
43344 + if (err != 0) {
43345 + EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
43346 + return err;
43347 + }
43348 +
43349 + return 0;
43350 +}
43351 +
43352 +
43353 +static
43354 +netfront_accel_vnic *netfront_accel_vnic_ctor(struct net_device *net_dev,
43355 + struct xenbus_device *dev)
43356 +{
43357 + struct netfront_info *np =
43358 + (struct netfront_info *)netdev_priv(net_dev);
43359 + netfront_accel_vnic *vnic;
43360 + int err;
43361 +
43362 + /*
43363 + * A bug in earlier versions of Xen accel plugin system meant
43364 + * you could be probed twice for the same device on suspend
43365 + * cancel. Be tolerant of that.
43366 + */
43367 + if (np->accel_priv != NULL)
43368 + return ERR_PTR(-EALREADY);
43369 +
43370 + /* Alloc mem for state */
43371 + vnic = kzalloc(sizeof(netfront_accel_vnic), GFP_KERNEL);
43372 + if (vnic == NULL) {
43373 + EPRINTK("%s: no memory for vnic state\n", __FUNCTION__);
43374 + return ERR_PTR(-ENOMEM);
43375 + }
43376 +
43377 + spin_lock_init(&vnic->tx_lock);
43378 +
43379 + mutex_init(&vnic->vnic_mutex);
43380 + mutex_lock(&vnic->vnic_mutex);
43381 +
43382 + /* Store so state can be retrieved from device */
43383 + BUG_ON(np->accel_priv != NULL);
43384 + np->accel_priv = vnic;
43385 + vnic->dev = dev;
43386 + vnic->net_dev = net_dev;
43387 + spin_lock_init(&vnic->irq_enabled_lock);
43388 + netfront_accel_ssr_init(&vnic->ssr_state);
43389 +
43390 + init_waitqueue_head(&vnic->state_wait_queue);
43391 + vnic->backend_state = XenbusStateUnknown;
43392 + vnic->frontend_state = XenbusStateClosed;
43393 + vnic->removing = 0;
43394 + vnic->domU_state_is_setup = 0;
43395 + vnic->dom0_state_is_setup = 0;
43396 + vnic->poll_enabled = 0;
43397 + vnic->tx_enabled = 0;
43398 + vnic->tx_skb = NULL;
43399 +
43400 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
43401 + INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend);
43402 +#else
43403 + INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend, vnic);
43404 +#endif
43405 +
43406 + netfront_accel_debugfs_create(vnic);
43407 +
43408 + mutex_unlock(&vnic->vnic_mutex);
43409 +
43410 + err = net_accel_xen_net_read_mac(dev, vnic->mac);
43411 + if (err)
43412 + goto fail_mac;
43413 +
43414 + /* Setup a watch on the frontend's MAC address */
43415 + err = setup_mac_address_watch(dev, vnic);
43416 + if (err)
43417 + goto fail_mac;
43418 +
43419 + return vnic;
43420 +
43421 +fail_mac:
43422 +
43423 + mutex_lock(&vnic->vnic_mutex);
43424 +
43425 + netfront_accel_debugfs_remove(vnic);
43426 +
43427 + netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
43428 +
43429 + EPRINTK_ON(vnic->tx_skb != NULL);
43430 +
43431 + vnic->frontend_state = XenbusStateUnknown;
43432 + net_accel_update_state(dev, XenbusStateUnknown);
43433 +
43434 + mutex_unlock(&vnic->vnic_mutex);
43435 +
43436 + np->accel_priv = NULL;
43437 + kfree(vnic);
43438 +
43439 + return ERR_PTR(err);
43440 +}
43441 +
43442 +
43443 +static void netfront_accel_vnic_dtor(netfront_accel_vnic *vnic)
43444 +{
43445 + struct net_device *net_dev = vnic->net_dev;
43446 + struct netfront_info *np =
43447 + (struct netfront_info *)netdev_priv(net_dev);
43448 +
43449 + /*
43450 + * Now we don't hold the lock any more it is safe to remove
43451 + * this watch and synchonrise with the completion of
43452 + * watches
43453 + */
43454 + DPRINTK("%s: unregistering xenbus mac watch\n", __FUNCTION__);
43455 + unregister_xenbus_watch(&vnic->mac_address_watch);
43456 + kfree(vnic->mac_address_watch.node);
43457 +
43458 + flush_workqueue(netfront_accel_workqueue);
43459 +
43460 + mutex_lock(&vnic->vnic_mutex);
43461 +
43462 + netfront_accel_debugfs_remove(vnic);
43463 +
43464 + netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
43465 +
43466 + EPRINTK_ON(vnic->tx_skb != NULL);
43467 +
43468 + vnic->frontend_state = XenbusStateUnknown;
43469 + net_accel_update_state(vnic->dev, XenbusStateUnknown);
43470 +
43471 + mutex_unlock(&vnic->vnic_mutex);
43472 +
43473 + np->accel_priv = NULL;
43474 + kfree(vnic);
43475 +}
43476 +
43477 +
43478 +static int vnic_setup_domU_shared_state(struct xenbus_device *dev,
43479 + netfront_accel_vnic *vnic)
43480 +{
43481 + struct xenbus_transaction tr;
43482 + int err;
43483 + int msgs_per_queue;
43484 +
43485 +
43486 + DPRINTK("Setting up domU shared state.\n");
43487 +
43488 + msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
43489 +
43490 + /* Allocate buffer state */
43491 + vnic->tx_bufs = netfront_accel_init_bufs(&vnic->tx_lock);
43492 + if (vnic->tx_bufs == NULL) {
43493 + err = -ENOMEM;
43494 + EPRINTK("%s: Failed to allocate tx buffers\n", __FUNCTION__);
43495 + goto fail_tx_bufs;
43496 + }
43497 +
43498 + vnic->rx_bufs = netfront_accel_init_bufs(NULL);
43499 + if (vnic->rx_bufs == NULL) {
43500 + err = -ENOMEM;
43501 + EPRINTK("%s: Failed to allocate rx buffers\n", __FUNCTION__);
43502 + goto fail_rx_bufs;
43503 + }
43504 +
43505 + /*
43506 + * This allocates two pages, one for the shared page and one
43507 + * for the message queue.
43508 + */
43509 + vnic->shared_page = (struct net_accel_shared_page *)
43510 + __get_free_pages(GFP_KERNEL, 1);
43511 + if (vnic->shared_page == NULL) {
43512 + EPRINTK("%s: no memory for shared pages\n", __FUNCTION__);
43513 + err = -ENOMEM;
43514 + goto fail_shared_page;
43515 + }
43516 +
43517 + net_accel_msg_init_queue
43518 + (&vnic->from_dom0, &vnic->shared_page->queue0,
43519 + (struct net_accel_msg *)((u8*)vnic->shared_page + PAGE_SIZE),
43520 + msgs_per_queue);
43521 +
43522 + net_accel_msg_init_queue
43523 + (&vnic->to_dom0, &vnic->shared_page->queue1,
43524 + (struct net_accel_msg *)((u8*)vnic->shared_page +
43525 + (3 * PAGE_SIZE / 2)),
43526 + msgs_per_queue);
43527 +
43528 + vnic->msg_state = NETFRONT_ACCEL_MSG_NONE;
43529 +
43530 + err = make_named_grant(dev, vnic->shared_page, "accel-ctrl-page",
43531 + &vnic->ctrl_page_gnt);
43532 + if (err) {
43533 + EPRINTK("couldn't make ctrl-page named grant\n");
43534 + goto fail_ctrl_page_grant;
43535 + }
43536 +
43537 + err = make_named_grant(dev, (u8*)vnic->shared_page + PAGE_SIZE,
43538 + "accel-msg-page", &vnic->msg_page_gnt);
43539 + if (err) {
43540 + EPRINTK("couldn't make msg-page named grant\n");
43541 + goto fail_msg_page_grant;
43542 + }
43543 +
43544 + /* Create xenbus msg event channel */
43545 + err = bind_listening_port_to_irqhandler
43546 + (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend,
43547 + SA_SAMPLE_RANDOM, "vnicctrl", vnic);
43548 + if (err < 0) {
43549 + EPRINTK("Couldn't bind msg event channel\n");
43550 + goto fail_msg_irq;
43551 + }
43552 + vnic->msg_channel_irq = err;
43553 + vnic->msg_channel = irq_to_evtchn_port(vnic->msg_channel_irq);
43554 +
43555 + /* Create xenbus net event channel */
43556 + err = bind_listening_port_to_irqhandler
43557 + (dev->otherend_id, netfront_accel_net_channel_irq_from_bend,
43558 + SA_SAMPLE_RANDOM, "vnicfront", vnic);
43559 + if (err < 0) {
43560 + EPRINTK("Couldn't bind net event channel\n");
43561 + goto fail_net_irq;
43562 + }
43563 + vnic->net_channel_irq = err;
43564 + vnic->net_channel = irq_to_evtchn_port(vnic->net_channel_irq);
43565 + /* Want to ensure we don't get interrupts before we're ready */
43566 + netfront_accel_disable_net_interrupts(vnic);
43567 +
43568 + DPRINTK("otherend %d has msg ch %u (%u) and net ch %u (%u)\n",
43569 + dev->otherend_id, vnic->msg_channel, vnic->msg_channel_irq,
43570 + vnic->net_channel, vnic->net_channel_irq);
43571 +
43572 + do {
43573 + err = xenbus_transaction_start(&tr);
43574 + if (err != 0) {
43575 + EPRINTK("%s: Transaction start failed %d\n",
43576 + __FUNCTION__, err);
43577 + goto fail_transaction;
43578 + }
43579 +
43580 + err = xenbus_printf(tr, dev->nodename, "accel-msg-channel",
43581 + "%u", vnic->msg_channel);
43582 + if (err != 0) {
43583 + EPRINTK("%s: event channel xenbus write failed %d\n",
43584 + __FUNCTION__, err);
43585 + xenbus_transaction_end(tr, 1);
43586 + goto fail_transaction;
43587 + }
43588 +
43589 + err = xenbus_printf(tr, dev->nodename, "accel-net-channel",
43590 + "%u", vnic->net_channel);
43591 + if (err != 0) {
43592 + EPRINTK("%s: net channel xenbus write failed %d\n",
43593 + __FUNCTION__, err);
43594 + xenbus_transaction_end(tr, 1);
43595 + goto fail_transaction;
43596 + }
43597 +
43598 + err = xenbus_transaction_end(tr, 0);
43599 + } while (err == -EAGAIN);
43600 +
43601 + if (err != 0) {
43602 + EPRINTK("%s: Transaction end failed %d\n", __FUNCTION__, err);
43603 + goto fail_transaction;
43604 + }
43605 +
43606 + DPRINTK("Completed setting up domU shared state\n");
43607 +
43608 + return 0;
43609 +
43610 +fail_transaction:
43611 +
43612 + unbind_from_irqhandler(vnic->net_channel_irq, vnic);
43613 +fail_net_irq:
43614 +
43615 + unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
43616 +fail_msg_irq:
43617 +
43618 + remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
43619 +fail_msg_page_grant:
43620 +
43621 + remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
43622 +fail_ctrl_page_grant:
43623 +
43624 + free_pages((unsigned long)vnic->shared_page, 1);
43625 + vnic->shared_page = NULL;
43626 +fail_shared_page:
43627 +
43628 + netfront_accel_fini_bufs(vnic->rx_bufs);
43629 +fail_rx_bufs:
43630 +
43631 + netfront_accel_fini_bufs(vnic->tx_bufs);
43632 +fail_tx_bufs:
43633 +
43634 + /* Undo the memory allocation created when we got the HELLO */
43635 + netfront_accel_free_buffer_mem(&vnic->bufpages,
43636 + vnic->rx_bufs,
43637 + vnic->tx_bufs);
43638 +
43639 + DPRINTK("Failed to setup domU shared state with code %d\n", err);
43640 +
43641 + return err;
43642 +}
43643 +
43644 +
43645 +static void vnic_remove_domU_shared_state(struct xenbus_device *dev,
43646 + netfront_accel_vnic *vnic)
43647 +{
43648 + struct xenbus_transaction tr;
43649 +
43650 + /*
43651 + * Don't remove any watches because we currently hold the
43652 + * mutex and the watches take the mutex.
43653 + */
43654 +
43655 + DPRINTK("%s: removing event channel irq handlers %d %d\n",
43656 + __FUNCTION__, vnic->net_channel_irq, vnic->msg_channel_irq);
43657 + do {
43658 + if (xenbus_transaction_start(&tr) != 0)
43659 + break;
43660 + xenbus_rm(tr, dev->nodename, "accel-msg-channel");
43661 + xenbus_rm(tr, dev->nodename, "accel-net-channel");
43662 + } while (xenbus_transaction_end(tr, 0) == -EAGAIN);
43663 +
43664 + unbind_from_irqhandler(vnic->net_channel_irq, vnic);
43665 + unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
43666 +
43667 + /* ungrant pages for msg channel */
43668 + remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
43669 + remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
43670 + free_pages((unsigned long)vnic->shared_page, 1);
43671 + vnic->shared_page = NULL;
43672 +
43673 + /* ungrant pages for buffers, and free buffer memory */
43674 + netfront_accel_free_buffer_mem(&vnic->bufpages,
43675 + vnic->rx_bufs,
43676 + vnic->tx_bufs);
43677 + netfront_accel_fini_bufs(vnic->rx_bufs);
43678 + netfront_accel_fini_bufs(vnic->tx_bufs);
43679 +}
43680 +
43681 +
43682 +static void vnic_setup_dom0_shared_state(struct xenbus_device *dev,
43683 + netfront_accel_vnic *vnic)
43684 +{
43685 + DPRINTK("Setting up dom0 shared state\n");
43686 +
43687 + netfront_accel_vi_ctor(vnic);
43688 +
43689 + /*
43690 + * Message processing will be enabled when this function
43691 + * returns, but we might have missed an interrupt. Schedule a
43692 + * check just in case.
43693 + */
43694 + queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
43695 +}
43696 +
43697 +
43698 +static void vnic_remove_dom0_shared_state(struct xenbus_device *dev,
43699 + netfront_accel_vnic *vnic)
43700 +{
43701 + DPRINTK("Removing dom0 shared state\n");
43702 +
43703 + vnic_stop_fastpath(vnic);
43704 +
43705 + netfront_accel_vi_dtor(vnic);
43706 +}
43707 +
43708 +
43709 +/*************************************************************************/
43710 +
43711 +/*
43712 + * The following code handles accelstate changes between the frontend
43713 + * and the backend. In response to transitions, calls the following
43714 + * functions in matching pairs:
43715 + *
43716 + * vnic_setup_domU_shared_state
43717 + * vnic_remove_domU_shared_state
43718 + *
43719 + * vnic_setup_dom0_shared_state
43720 + * vnic_remove_dom0_shared_state
43721 + *
43722 + * Valid state transitions for DomU are as follows:
43723 + *
43724 + * Closed->Init on probe or in response to Init from dom0
43725 + *
43726 + * Init->Connected in response to Init from dom0
43727 + * Init->Closing on error providing dom0 is in Init
43728 + * Init->Closed on remove or in response to Closing from dom0
43729 + *
43730 + * Connected->Closing on error/remove
43731 + * Connected->Closed in response to Closing from dom0
43732 + *
43733 + * Closing->Closed in response to Closing from dom0
43734 + *
43735 + */
43736 +
43737 +
43738 +/* Function to deal with Xenbus accel state change in backend */
43739 +static void netfront_accel_backend_accel_changed(netfront_accel_vnic *vnic,
43740 + XenbusState backend_state)
43741 +{
43742 + struct xenbus_device *dev = vnic->dev;
43743 + XenbusState frontend_state;
43744 + int state;
43745 +
43746 + DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
43747 + __FUNCTION__, xenbus_strstate(vnic->backend_state),
43748 + xenbus_strstate(backend_state), dev->nodename, dev->otherend);
43749 +
43750 + /*
43751 + * Ignore duplicate state changes. This can happen if the
43752 + * backend changes state twice in quick succession and the
43753 + * first watch fires in the frontend after the second
43754 + * transition has completed.
43755 + */
43756 + if (vnic->backend_state == backend_state)
43757 + return;
43758 +
43759 + vnic->backend_state = backend_state;
43760 + frontend_state = vnic->frontend_state;
43761 +
43762 + switch (backend_state) {
43763 + case XenbusStateInitialising:
43764 + /*
43765 + * It's possible for us to miss the closed state from
43766 + * dom0, so do the work here.
43767 + */
43768 + if (vnic->domU_state_is_setup) {
43769 + vnic_remove_domU_shared_state(dev, vnic);
43770 + vnic->domU_state_is_setup = 0;
43771 + }
43772 +
43773 + if (frontend_state != XenbusStateInitialising) {
43774 + /* Make sure the backend doesn't go away. */
43775 + frontend_state = XenbusStateInitialising;
43776 + net_accel_update_state(dev, frontend_state);
43777 + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state);
43778 + backend_state = (XenbusState)state;
43779 + if (backend_state != XenbusStateInitialising)
43780 + break;
43781 + }
43782 +
43783 + /* Start the new connection. */
43784 + if (!vnic->removing) {
43785 + BUG_ON(vnic->domU_state_is_setup);
43786 + if (vnic_setup_domU_shared_state(dev, vnic) == 0) {
43787 + vnic->domU_state_is_setup = 1;
43788 + frontend_state = XenbusStateConnected;
43789 + } else
43790 + frontend_state = XenbusStateClosing;
43791 + }
43792 + break;
43793 + case XenbusStateConnected:
43794 + if (vnic->domU_state_is_setup &&
43795 + !vnic->dom0_state_is_setup) {
43796 + vnic_setup_dom0_shared_state(dev, vnic);
43797 + vnic->dom0_state_is_setup = 1;
43798 + }
43799 + break;
43800 + default:
43801 + case XenbusStateClosing:
43802 + if (vnic->dom0_state_is_setup) {
43803 + vnic_remove_dom0_shared_state(dev, vnic);
43804 + vnic->dom0_state_is_setup = 0;
43805 + }
43806 + frontend_state = XenbusStateClosed;
43807 + break;
43808 + case XenbusStateUnknown:
43809 + case XenbusStateClosed:
43810 + if (vnic->domU_state_is_setup) {
43811 + vnic_remove_domU_shared_state(dev, vnic);
43812 + vnic->domU_state_is_setup = 0;
43813 + }
43814 + break;
43815 + }
43816 +
43817 + if (frontend_state != vnic->frontend_state) {
43818 + DPRINTK("Switching from state %s (%d) to %s (%d)\n",
43819 + xenbus_strstate(vnic->frontend_state),
43820 + vnic->frontend_state,
43821 + xenbus_strstate(frontend_state), frontend_state);
43822 + vnic->frontend_state = frontend_state;
43823 + net_accel_update_state(dev, frontend_state);
43824 + }
43825 +
43826 + wake_up(&vnic->state_wait_queue);
43827 +}
43828 +
43829 +
43830 +static void backend_accel_state_change(struct xenbus_watch *watch,
43831 + const char **vec, unsigned int len)
43832 +{
43833 + int state;
43834 + netfront_accel_vnic *vnic;
43835 + struct xenbus_device *dev;
43836 +
43837 + DPRINTK("%s\n", __FUNCTION__);
43838 +
43839 + vnic = container_of(watch, struct netfront_accel_vnic,
43840 + backend_accel_watch);
43841 +
43842 + mutex_lock(&vnic->vnic_mutex);
43843 +
43844 + dev = vnic->dev;
43845 +
43846 + state = (int)XenbusStateUnknown;
43847 + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state);
43848 + netfront_accel_backend_accel_changed(vnic, state);
43849 +
43850 + mutex_unlock(&vnic->vnic_mutex);
43851 +}
43852 +
43853 +
43854 +static int setup_dom0_accel_watch(struct xenbus_device *dev,
43855 + netfront_accel_vnic *vnic)
43856 +{
43857 + int err;
43858 +
43859 + DPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
43860 +
43861 + err = xenbus_watch_path2(dev, dev->otherend, "accelstate",
43862 + &vnic->backend_accel_watch,
43863 + backend_accel_state_change);
43864 + if (err) {
43865 + EPRINTK("%s: Failed to register xenbus watch: %d\n",
43866 + __FUNCTION__, err);
43867 + goto fail;
43868 + }
43869 + return 0;
43870 + fail:
43871 + vnic->backend_accel_watch.node = NULL;
43872 + return err;
43873 +}
43874 +
43875 +
43876 +int netfront_accel_probe(struct net_device *net_dev, struct xenbus_device *dev)
43877 +{
43878 + netfront_accel_vnic *vnic;
43879 + int err;
43880 +
43881 + DPRINTK("Probe passed device %s\n", dev->nodename);
43882 +
43883 + vnic = netfront_accel_vnic_ctor(net_dev, dev);
43884 + if (IS_ERR(vnic))
43885 + return PTR_ERR(vnic);
43886 +
43887 + /*
43888 + * Setup a watch on the backend accel state. This sets things
43889 + * going.
43890 + */
43891 + err = setup_dom0_accel_watch(dev, vnic);
43892 + if (err) {
43893 + netfront_accel_vnic_dtor(vnic);
43894 + EPRINTK("%s: probe failed with code %d\n", __FUNCTION__, err);
43895 + return err;
43896 + }
43897 +
43898 + /*
43899 + * Indicate to the other end that we're ready to start unless
43900 + * the watch has already fired.
43901 + */
43902 + mutex_lock(&vnic->vnic_mutex);
43903 + VPRINTK("setup success, updating accelstate\n");
43904 + if (vnic->frontend_state == XenbusStateClosed) {
43905 + vnic->frontend_state = XenbusStateInitialising;
43906 + net_accel_update_state(dev, XenbusStateInitialising);
43907 + }
43908 + mutex_unlock(&vnic->vnic_mutex);
43909 +
43910 + DPRINTK("Probe done device %s\n", dev->nodename);
43911 +
43912 + return 0;
43913 +}
43914 +
43915 +
43916 +int netfront_accel_remove(struct xenbus_device *dev)
43917 +{
43918 + struct netfront_info *np =
43919 + (struct netfront_info *)dev->dev.driver_data;
43920 + netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv;
43921 +
43922 + DPRINTK("%s %s\n", __FUNCTION__, dev->nodename);
43923 +
43924 + BUG_ON(vnic == NULL);
43925 +
43926 + mutex_lock(&vnic->vnic_mutex);
43927 +
43928 + /* Reject any attempts to connect. */
43929 + vnic->removing = 1;
43930 +
43931 + /* Close any existing connection. */
43932 + if (vnic->frontend_state == XenbusStateConnected) {
43933 + vnic->frontend_state = XenbusStateClosing;
43934 + net_accel_update_state(dev, XenbusStateClosing);
43935 + }
43936 +
43937 + mutex_unlock(&vnic->vnic_mutex);
43938 +
43939 + DPRINTK("%s waiting for release of %s\n", __FUNCTION__, dev->nodename);
43940 +
43941 + /*
43942 + * Wait for the xenbus watch to release the shared resources.
43943 + * This indicates that dom0 has made the transition
43944 + * Closing->Closed or that dom0 was in Closed or Init and no
43945 + * resources were mapped.
43946 + */
43947 + wait_event(vnic->state_wait_queue,
43948 + !vnic->domU_state_is_setup);
43949 +
43950 + /*
43951 + * Now we don't need this watch anymore it is safe to remove
43952 + * it (and so synchronise with it completing if outstanding)
43953 + */
43954 + DPRINTK("%s: unregistering xenbus accel watch\n",
43955 + __FUNCTION__);
43956 + unregister_xenbus_watch(&vnic->backend_accel_watch);
43957 + kfree(vnic->backend_accel_watch.node);
43958 +
43959 + netfront_accel_vnic_dtor(vnic);
43960 +
43961 + DPRINTK("%s done %s\n", __FUNCTION__, dev->nodename);
43962 +
43963 + return 0;
43964 +}
43965 Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon.h
43966 ===================================================================
43967 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
43968 +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon.h 2008-02-20 09:32:49.000000000 +0100
43969 @@ -0,0 +1,172 @@
43970 +/****************************************************************************
43971 + * Copyright 2002-2005: Level 5 Networks Inc.
43972 + * Copyright 2005-2008: Solarflare Communications Inc,
43973 + * 9501 Jeronimo Road, Suite 250,
43974 + * Irvine, CA 92618, USA
43975 + *
43976 + * Maintained by Solarflare Communications
43977 + * <linux-xen-drivers@solarflare.com>
43978 + * <onload-dev@solarflare.com>
43979 + *
43980 + * This program is free software; you can redistribute it and/or modify it
43981 + * under the terms of the GNU General Public License version 2 as published
43982 + * by the Free Software Foundation, incorporated herein by reference.
43983 + *
43984 + * This program is distributed in the hope that it will be useful,
43985 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
43986 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43987 + * GNU General Public License for more details.
43988 + *
43989 + * You should have received a copy of the GNU General Public License
43990 + * along with this program; if not, write to the Free Software
43991 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
43992 + ****************************************************************************
43993 + */
43994 +
43995 +/*
43996 + * \author slp
43997 + * \brief Falcon specific definitions
43998 + * \date 2004/08
43999 + */
44000 +
44001 +#ifndef __EF_VI_FALCON_H__
44002 +#define __EF_VI_FALCON_H__
44003 +
44004 +#define EFHW_4K 0x00001000u
44005 +#define EFHW_8K 0x00002000u
44006 +
44007 +/* include the autogenerated register definitions */
44008 +
44009 +#include "ef_vi_falcon_core.h"
44010 +#include "ef_vi_falcon_desc.h"
44011 +#include "ef_vi_falcon_event.h"
44012 +
44013 +
44014 +/*----------------------------------------------------------------------------
44015 + *
44016 + * Helpers to turn bit shifts into dword shifts and check that the bit fields
44017 + * haven't overflown the dword etc. Aim is to preserve consistency with the
44018 + * autogenerated headers - once stable we could hard code.
44019 + *
44020 + *---------------------------------------------------------------------------*/
44021 +
44022 +/* mask constructors */
44023 +#define __FALCON_MASK(WIDTH,T) ((((T)1) << (WIDTH)) - 1)
44024 +#define __EFVI_MASK32(WIDTH) __FALCON_MASK((WIDTH),uint32_t)
44025 +#define __EFVI_MASK64(WIDTH) __FALCON_MASK((WIDTH),uint64_t)
44026 +
44027 +#define __EFVI_FALCON_MASKFIELD32(LBN, WIDTH) ((uint32_t) \
44028 + (__EFVI_MASK32(WIDTH) << (LBN)))
44029 +
44030 +/* constructors for fields which span the first and second dwords */
44031 +#define __LW(LBN) (32 - LBN)
44032 +#define LOW(v, LBN, WIDTH) ((uint32_t) \
44033 + (((v) & __EFVI_MASK64(__LW((LBN)))) << (LBN)))
44034 +#define HIGH(v, LBN, WIDTH) ((uint32_t)(((v) >> __LW((LBN))) & \
44035 + __EFVI_MASK64((WIDTH - __LW((LBN))))))
44036 +/* constructors for fields within the second dword */
44037 +#define __DW2(LBN) ((LBN) - 32)
44038 +
44039 +/* constructors for fields which span the second and third dwords */
44040 +#define __LW2(LBN) (64 - LBN)
44041 +#define LOW2(v, LBN, WIDTH) ((uint32_t) \
44042 + (((v) & __EFVI_MASK64(__LW2((LBN)))) << ((LBN) - 32)))
44043 +#define HIGH2(v, LBN, WIDTH) ((uint32_t) \
44044 + (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN))))))
44045 +
44046 +/* constructors for fields within the third dword */
44047 +#define __DW3(LBN) ((LBN) - 64)
44048 +
44049 +
44050 +/* constructors for fields which span the third and fourth dwords */
44051 +#define __LW3(LBN) (96 - LBN)
44052 +#define LOW3(v, LBN, WIDTH) ((uint32_t) \
44053 + (((v) & __EFVI_MASK64(__LW3((LBN)))) << ((LBN) - 64)))
44054 +#define HIGH3(v, LBN, WIDTH) ((unit32_t) \
44055 + (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN))))))
44056 +
44057 +/* constructors for fields within the fourth dword */
44058 +#define __DW4(LBN) ((LBN) - 96)
44059 +
44060 +/* checks that the autogenerated headers our consistent with our model */
44061 +#define WIDTHCHCK(a, b) ef_assert((a) == (b))
44062 +#define RANGECHCK(v, WIDTH) \
44063 + ef_assert(((uint64_t)(v) & ~(__EFVI_MASK64((WIDTH)))) == 0)
44064 +
44065 +/* fields within the first dword */
44066 +#define DWCHCK(LBN, WIDTH) ef_assert(((LBN) >= 0) &&(((LBN)+(WIDTH)) <= 32))
44067 +
44068 +/* fields which span the first and second dwords */
44069 +#define LWCHK(LBN, WIDTH) ef_assert(WIDTH >= __LW(LBN))
44070 +
44071 +/*----------------------------------------------------------------------------
44072 + *
44073 + * Buffer virtual addresses (4K buffers)
44074 + *
44075 + *---------------------------------------------------------------------------*/
44076 +
44077 +/* Form a buffer virtual address from buffer ID and offset. If the offset
44078 +** is larger than the buffer size, then the buffer indexed will be
44079 +** calculated appropriately. It is the responsibility of the caller to
44080 +** ensure that they have valid buffers programmed at that address.
44081 +*/
44082 +#define EFVI_FALCON_VADDR_4K_S (12)
44083 +#define EFVI_FALCON_VADDR_M 0xfffff /* post shift mask */
44084 +
44085 +
44086 +#define EFVI_FALCON_BUFFER_4K_ADDR(id,off) \
44087 + (((id) << EFVI_FALCON_VADDR_4K_S) + (off))
44088 +
44089 +#define EFVI_FALCON_BUFFER_4K_PAGE(vaddr) \
44090 + (((vaddr) >> EFVI_FALCON_VADDR_4K_S) & EFVI_FALCON_VADDR_M)
44091 +
44092 +#define EFVI_FALCON_BUFFER_4K_OFF(vaddr) \
44093 + ((vaddr) & __EFVI_MASK32(EFVI_FALCON_VADDR_4K_S))
44094 +
44095 +
44096 +/*----------------------------------------------------------------------------
44097 + *
44098 + * Masks
44099 + *
44100 + *---------------------------------------------------------------------------*/
44101 +
44102 +#define EFVI_FALCON_CLOCK_ASIC_HZ (125000)
44103 +#define EFVI_FALCON_CLOCK_FPGA_HZ (62500)
44104 +#define EFVI_FALCON_CLOCK_HZ EFVI_FALCON_CLOCK_ASIC_HZ
44105 +
44106 +
44107 +/*----------------------------------------------------------------------------
44108 + *
44109 + * Timers
44110 + *
44111 + *---------------------------------------------------------------------------*/
44112 +
44113 +/* Event-Queue Timer granularity - measured in us
44114 + Given by: 4096 * 3 cycle * clock period */
44115 +
44116 +#define EFVI_FALCON_EVQTIMER_PERIOD_US ((4096 * 3 * 1000) / EFVI_FALCON_CLOCK_HZ)
44117 +
44118 +/* mode bits */
44119 +#define EFVI_FALCON_TIMER_MODE_DIS 0 /* disabled */
44120 +#define EFVI_FALCON_TIMER_MODE_RUN 1 /* started counting right away */
44121 +#define EFVI_FALCON_TIMER_MODE_HOLD 2 /* trigger mode (user queues) */
44122 +
44123 +#define EFVI_FALCON_EVQTIMER_HOLD (EFVI_FALCON_TIMER_MODE_HOLD << TIMER_MODE_LBN)
44124 +#define EFVI_FALCON_EVQTIMER_RUN (EFVI_FALCON_TIMER_MODE_RUN << TIMER_MODE_LBN)
44125 +#define EFVI_FALCON_EVQTIMER_DISABLE (EFVI_FALCON_TIMER_MODE_DIS << TIMER_MODE_LBN)
44126 +
44127 +
44128 +/* ---- efhw_event_t helpers --- */
44129 +
44130 +#define EFVI_FALCON_EVENT_CODE(evp) \
44131 + ((evp)->u64 & EFVI_FALCON_EVENT_CODE_MASK)
44132 +
44133 +#define EFVI_FALCON_EVENT_SW_DATA_MASK 0x0000ffff
44134 +
44135 +#define __EFVI_FALCON_OPEN_MASK(WIDTH) ((((uint64_t)1) << (WIDTH)) - 1)
44136 +
44137 +#define EFVI_FALCON_EVENT_CODE_MASK \
44138 + (__EFVI_FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN)
44139 +
44140 +
44141 +#endif /* __EF_VI_FALCON_H__ */
44142 Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_core.h
44143 ===================================================================
44144 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
44145 +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_core.h 2008-02-20 09:32:49.000000000 +0100
44146 @@ -0,0 +1,1075 @@
44147 +
44148 +#define EFVI_FALCON_EXTENDED_P_BAR 1
44149 +
44150 +//////////////---- Bus Interface Unit Registers C Header ----//////////////
44151 +#define IOM_IND_ADR_REG_OFST 0x0 // IO-mapped indirect access address register
44152 + #define IOM_AUTO_ADR_INC_EN_LBN 16
44153 + #define IOM_AUTO_ADR_INC_EN_WIDTH 1
44154 + #define IOM_IND_ADR_LBN 0
44155 + #define IOM_IND_ADR_WIDTH 16
44156 +#define IOM_IND_DAT_REG_OFST 0x4 // IO-mapped indirect access data register
44157 + #define IOM_IND_DAT_LBN 0
44158 + #define IOM_IND_DAT_WIDTH 32
44159 +#define ADR_REGION_REG_KER_OFST 0x0 // Address region register
44160 +#define ADR_REGION_REG_OFST 0x0 // Address region register
44161 + #define ADR_REGION3_LBN 96
44162 + #define ADR_REGION3_WIDTH 18
44163 + #define ADR_REGION2_LBN 64
44164 + #define ADR_REGION2_WIDTH 18
44165 + #define ADR_REGION1_LBN 32
44166 + #define ADR_REGION1_WIDTH 18
44167 + #define ADR_REGION0_LBN 0
44168 + #define ADR_REGION0_WIDTH 18
44169 +#define INT_EN_REG_KER_OFST 0x10 // Kernel driver Interrupt enable register
44170 + #define KER_INT_CHAR_LBN 4
44171 + #define KER_INT_CHAR_WIDTH 1
44172 + #define KER_INT_KER_LBN 3
44173 + #define KER_INT_KER_WIDTH 1
44174 + #define ILL_ADR_ERR_INT_EN_KER_LBN 2
44175 + #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1
44176 + #define SRM_PERR_INT_EN_KER_LBN 1
44177 + #define SRM_PERR_INT_EN_KER_WIDTH 1
44178 + #define DRV_INT_EN_KER_LBN 0
44179 + #define DRV_INT_EN_KER_WIDTH 1
44180 +#define INT_EN_REG_CHAR_OFST 0x20 // Char Driver interrupt enable register
44181 + #define CHAR_INT_CHAR_LBN 4
44182 + #define CHAR_INT_CHAR_WIDTH 1
44183 + #define CHAR_INT_KER_LBN 3
44184 + #define CHAR_INT_KER_WIDTH 1
44185 + #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2
44186 + #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1
44187 + #define SRM_PERR_INT_EN_CHAR_LBN 1
44188 + #define SRM_PERR_INT_EN_CHAR_WIDTH 1
44189 + #define DRV_INT_EN_CHAR_LBN 0
44190 + #define DRV_INT_EN_CHAR_WIDTH 1
44191 +#define INT_ADR_REG_KER_OFST 0x30 // Interrupt host address for Kernel driver
44192 + #define INT_ADR_KER_LBN 0
44193 + #define INT_ADR_KER_WIDTH 64
44194 + #define DRV_INT_KER_LBN 32
44195 + #define DRV_INT_KER_WIDTH 1
44196 + #define EV_FF_HALF_INT_KER_LBN 3
44197 + #define EV_FF_HALF_INT_KER_WIDTH 1
44198 + #define EV_FF_FULL_INT_KER_LBN 2
44199 + #define EV_FF_FULL_INT_KER_WIDTH 1
44200 + #define ILL_ADR_ERR_INT_KER_LBN 1
44201 + #define ILL_ADR_ERR_INT_KER_WIDTH 1
44202 + #define SRAM_PERR_INT_KER_LBN 0
44203 + #define SRAM_PERR_INT_KER_WIDTH 1
44204 +#define INT_ADR_REG_CHAR_OFST 0x40 // Interrupt host address for Char driver
44205 + #define INT_ADR_CHAR_LBN 0
44206 + #define INT_ADR_CHAR_WIDTH 64
44207 + #define DRV_INT_CHAR_LBN 32
44208 + #define DRV_INT_CHAR_WIDTH 1
44209 + #define EV_FF_HALF_INT_CHAR_LBN 3
44210 + #define EV_FF_HALF_INT_CHAR_WIDTH 1
44211 + #define EV_FF_FULL_INT_CHAR_LBN 2
44212 + #define EV_FF_FULL_INT_CHAR_WIDTH 1
44213 + #define ILL_ADR_ERR_INT_CHAR_LBN 1
44214 + #define ILL_ADR_ERR_INT_CHAR_WIDTH 1
44215 + #define SRAM_PERR_INT_CHAR_LBN 0
44216 + #define SRAM_PERR_INT_CHAR_WIDTH 1
44217 +#define INT_ISR0_B0_OFST 0x90 // B0 only
44218 +#define INT_ISR1_B0_OFST 0xA0
44219 +#define INT_ACK_REG_KER_A1_OFST 0x50 // Kernel interrupt acknowledge register
44220 + #define RESERVED_LBN 0
44221 + #define RESERVED_WIDTH 32
44222 +#define INT_ACK_REG_CHAR_A1_OFST 0x60 // CHAR interrupt acknowledge register
44223 + #define RESERVED_LBN 0
44224 + #define RESERVED_WIDTH 32
44225 +//////////////---- Global CSR Registers C Header ----//////////////
44226 +#define STRAP_REG_KER_OFST 0x200 // ASIC strap status register
44227 +#define STRAP_REG_OFST 0x200 // ASIC strap status register
44228 + #define ONCHIP_SRAM_LBN 16
44229 + #define ONCHIP_SRAM_WIDTH 0
44230 + #define STRAP_ISCSI_EN_LBN 3
44231 + #define STRAP_ISCSI_EN_WIDTH 1
44232 + #define STRAP_PINS_LBN 0
44233 + #define STRAP_PINS_WIDTH 3
44234 +#define GPIO_CTL_REG_KER_OFST 0x210 // GPIO control register
44235 +#define GPIO_CTL_REG_OFST 0x210 // GPIO control register
44236 + #define GPIO_OEN_LBN 24
44237 + #define GPIO_OEN_WIDTH 4
44238 + #define GPIO_OUT_LBN 16
44239 + #define GPIO_OUT_WIDTH 4
44240 + #define GPIO_IN_LBN 8
44241 + #define GPIO_IN_WIDTH 4
44242 + #define GPIO_PWRUP_VALUE_LBN 0
44243 + #define GPIO_PWRUP_VALUE_WIDTH 4
44244 +#define GLB_CTL_REG_KER_OFST 0x220 // Global control register
44245 +#define GLB_CTL_REG_OFST 0x220 // Global control register
44246 + #define SWRST_LBN 0
44247 + #define SWRST_WIDTH 1
44248 +#define FATAL_INTR_REG_KER_OFST 0x230 // Fatal interrupt register for Kernel
44249 + #define PCI_BUSERR_INT_KER_EN_LBN 43
44250 + #define PCI_BUSERR_INT_KER_EN_WIDTH 1
44251 + #define SRAM_OOB_INT_KER_EN_LBN 42
44252 + #define SRAM_OOB_INT_KER_EN_WIDTH 1
44253 + #define BUFID_OOB_INT_KER_EN_LBN 41
44254 + #define BUFID_OOB_INT_KER_EN_WIDTH 1
44255 + #define MEM_PERR_INT_KER_EN_LBN 40
44256 + #define MEM_PERR_INT_KER_EN_WIDTH 1
44257 + #define RBUF_OWN_INT_KER_EN_LBN 39
44258 + #define RBUF_OWN_INT_KER_EN_WIDTH 1
44259 + #define TBUF_OWN_INT_KER_EN_LBN 38
44260 + #define TBUF_OWN_INT_KER_EN_WIDTH 1
44261 + #define RDESCQ_OWN_INT_KER_EN_LBN 37
44262 + #define RDESCQ_OWN_INT_KER_EN_WIDTH 1
44263 + #define TDESCQ_OWN_INT_KER_EN_LBN 36
44264 + #define TDESCQ_OWN_INT_KER_EN_WIDTH 1
44265 + #define EVQ_OWN_INT_KER_EN_LBN 35
44266 + #define EVQ_OWN_INT_KER_EN_WIDTH 1
44267 + #define EVFF_OFLO_INT_KER_EN_LBN 34
44268 + #define EVFF_OFLO_INT_KER_EN_WIDTH 1
44269 + #define ILL_ADR_INT_KER_EN_LBN 33
44270 + #define ILL_ADR_INT_KER_EN_WIDTH 1
44271 + #define SRM_PERR_INT_KER_EN_LBN 32
44272 + #define SRM_PERR_INT_KER_EN_WIDTH 1
44273 + #define PCI_BUSERR_INT_KER_LBN 11
44274 + #define PCI_BUSERR_INT_KER_WIDTH 1
44275 + #define SRAM_OOB_INT_KER_LBN 10
44276 + #define SRAM_OOB_INT_KER_WIDTH 1
44277 + #define BUFID_OOB_INT_KER_LBN 9
44278 + #define BUFID_OOB_INT_KER_WIDTH 1
44279 + #define MEM_PERR_INT_KER_LBN 8
44280 + #define MEM_PERR_INT_KER_WIDTH 1
44281 + #define RBUF_OWN_INT_KER_LBN 7
44282 + #define RBUF_OWN_INT_KER_WIDTH 1
44283 + #define TBUF_OWN_INT_KER_LBN 6
44284 + #define TBUF_OWN_INT_KER_WIDTH 1
44285 + #define RDESCQ_OWN_INT_KER_LBN 5
44286 + #define RDESCQ_OWN_INT_KER_WIDTH 1
44287 + #define TDESCQ_OWN_INT_KER_LBN 4
44288 + #define TDESCQ_OWN_INT_KER_WIDTH 1
44289 + #define EVQ_OWN_INT_KER_LBN 3
44290 + #define EVQ_OWN_INT_KER_WIDTH 1
44291 + #define EVFF_OFLO_INT_KER_LBN 2
44292 + #define EVFF_OFLO_INT_KER_WIDTH 1
44293 + #define ILL_ADR_INT_KER_LBN 1
44294 + #define ILL_ADR_INT_KER_WIDTH 1
44295 + #define SRM_PERR_INT_KER_LBN 0
44296 + #define SRM_PERR_INT_KER_WIDTH 1
44297 +#define FATAL_INTR_REG_OFST 0x240 // Fatal interrupt register for Char
44298 + #define PCI_BUSERR_INT_CHAR_EN_LBN 43
44299 + #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1
44300 + #define SRAM_OOB_INT_CHAR_EN_LBN 42
44301 + #define SRAM_OOB_INT_CHAR_EN_WIDTH 1
44302 + #define BUFID_OOB_INT_CHAR_EN_LBN 41
44303 + #define BUFID_OOB_INT_CHAR_EN_WIDTH 1
44304 + #define MEM_PERR_INT_CHAR_EN_LBN 40
44305 + #define MEM_PERR_INT_CHAR_EN_WIDTH 1
44306 + #define RBUF_OWN_INT_CHAR_EN_LBN 39
44307 + #define RBUF_OWN_INT_CHAR_EN_WIDTH 1
44308 + #define TBUF_OWN_INT_CHAR_EN_LBN 38
44309 + #define TBUF_OWN_INT_CHAR_EN_WIDTH 1
44310 + #define RDESCQ_OWN_INT_CHAR_EN_LBN 37
44311 + #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1
44312 + #define TDESCQ_OWN_INT_CHAR_EN_LBN 36
44313 + #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1
44314 + #define EVQ_OWN_INT_CHAR_EN_LBN 35
44315 + #define EVQ_OWN_INT_CHAR_EN_WIDTH 1
44316 + #define EVFF_OFLO_INT_CHAR_EN_LBN 34
44317 + #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1
44318 + #define ILL_ADR_INT_CHAR_EN_LBN 33
44319 + #define ILL_ADR_INT_CHAR_EN_WIDTH 1
44320 + #define SRM_PERR_INT_CHAR_EN_LBN 32
44321 + #define SRM_PERR_INT_CHAR_EN_WIDTH 1
44322 + #define FATAL_INTR_REG_EN_BITS 0xffffffffffffffffULL
44323 + #define PCI_BUSERR_INT_CHAR_LBN 11
44324 + #define PCI_BUSERR_INT_CHAR_WIDTH 1
44325 + #define SRAM_OOB_INT_CHAR_LBN 10
44326 + #define SRAM_OOB_INT_CHAR_WIDTH 1
44327 + #define BUFID_OOB_INT_CHAR_LBN 9
44328 + #define BUFID_OOB_INT_CHAR_WIDTH 1
44329 + #define MEM_PERR_INT_CHAR_LBN 8
44330 + #define MEM_PERR_INT_CHAR_WIDTH 1
44331 + #define RBUF_OWN_INT_CHAR_LBN 7
44332 + #define RBUF_OWN_INT_CHAR_WIDTH 1
44333 + #define TBUF_OWN_INT_CHAR_LBN 6
44334 + #define TBUF_OWN_INT_CHAR_WIDTH 1
44335 + #define RDESCQ_OWN_INT_CHAR_LBN 5
44336 + #define RDESCQ_OWN_INT_CHAR_WIDTH 1
44337 + #define TDESCQ_OWN_INT_CHAR_LBN 4
44338 + #define TDESCQ_OWN_INT_CHAR_WIDTH 1
44339 + #define EVQ_OWN_INT_CHAR_LBN 3
44340 + #define EVQ_OWN_INT_CHAR_WIDTH 1
44341 + #define EVFF_OFLO_INT_CHAR_LBN 2
44342 + #define EVFF_OFLO_INT_CHAR_WIDTH 1
44343 + #define ILL_ADR_INT_CHAR_LBN 1
44344 + #define ILL_ADR_INT_CHAR_WIDTH 1
44345 + #define SRM_PERR_INT_CHAR_LBN 0
44346 + #define SRM_PERR_INT_CHAR_WIDTH 1
44347 +#define DP_CTRL_REG_OFST 0x250 // Datapath control register
44348 + #define FLS_EVQ_ID_LBN 0
44349 + #define FLS_EVQ_ID_WIDTH 12
44350 +#define MEM_STAT_REG_KER_OFST 0x260 // Memory status register
44351 +#define MEM_STAT_REG_OFST 0x260 // Memory status register
44352 + #define MEM_PERR_VEC_LBN 53
44353 + #define MEM_PERR_VEC_WIDTH 38
44354 + #define MBIST_CORR_LBN 38
44355 + #define MBIST_CORR_WIDTH 15
44356 + #define MBIST_ERR_LBN 0
44357 + #define MBIST_ERR_WIDTH 38
44358 +#define DEBUG_REG_KER_OFST 0x270 // Debug register
44359 +#define DEBUG_REG_OFST 0x270 // Debug register
44360 + #define DEBUG_BLK_SEL2_LBN 47
44361 + #define DEBUG_BLK_SEL2_WIDTH 3
44362 + #define DEBUG_BLK_SEL1_LBN 44
44363 + #define DEBUG_BLK_SEL1_WIDTH 3
44364 + #define DEBUG_BLK_SEL0_LBN 41
44365 + #define DEBUG_BLK_SEL0_WIDTH 3
44366 + #define MISC_DEBUG_ADDR_LBN 36
44367 + #define MISC_DEBUG_ADDR_WIDTH 5
44368 + #define SERDES_DEBUG_ADDR_LBN 31
44369 + #define SERDES_DEBUG_ADDR_WIDTH 5
44370 + #define EM_DEBUG_ADDR_LBN 26
44371 + #define EM_DEBUG_ADDR_WIDTH 5
44372 + #define SR_DEBUG_ADDR_LBN 21
44373 + #define SR_DEBUG_ADDR_WIDTH 5
44374 + #define EV_DEBUG_ADDR_LBN 16
44375 + #define EV_DEBUG_ADDR_WIDTH 5
44376 + #define RX_DEBUG_ADDR_LBN 11
44377 + #define RX_DEBUG_ADDR_WIDTH 5
44378 + #define TX_DEBUG_ADDR_LBN 6
44379 + #define TX_DEBUG_ADDR_WIDTH 5
44380 + #define BIU_DEBUG_ADDR_LBN 1
44381 + #define BIU_DEBUG_ADDR_WIDTH 5
44382 + #define DEBUG_EN_LBN 0
44383 + #define DEBUG_EN_WIDTH 1
44384 +#define DRIVER_REG0_KER_OFST 0x280 // Driver scratch register 0
44385 +#define DRIVER_REG0_OFST 0x280 // Driver scratch register 0
44386 + #define DRIVER_DW0_LBN 0
44387 + #define DRIVER_DW0_WIDTH 32
44388 +#define DRIVER_REG1_KER_OFST 0x290 // Driver scratch register 1
44389 +#define DRIVER_REG1_OFST 0x290 // Driver scratch register 1
44390 + #define DRIVER_DW1_LBN 0
44391 + #define DRIVER_DW1_WIDTH 32
44392 +#define DRIVER_REG2_KER_OFST 0x2A0 // Driver scratch register 2
44393 +#define DRIVER_REG2_OFST 0x2A0 // Driver scratch register 2
44394 + #define DRIVER_DW2_LBN 0
44395 + #define DRIVER_DW2_WIDTH 32
44396 +#define DRIVER_REG3_KER_OFST 0x2B0 // Driver scratch register 3
44397 +#define DRIVER_REG3_OFST 0x2B0 // Driver scratch register 3
44398 + #define DRIVER_DW3_LBN 0
44399 + #define DRIVER_DW3_WIDTH 32
44400 +#define DRIVER_REG4_KER_OFST 0x2C0 // Driver scratch register 4
44401 +#define DRIVER_REG4_OFST 0x2C0 // Driver scratch register 4
44402 + #define DRIVER_DW4_LBN 0
44403 + #define DRIVER_DW4_WIDTH 32
44404 +#define DRIVER_REG5_KER_OFST 0x2D0 // Driver scratch register 5
44405 +#define DRIVER_REG5_OFST 0x2D0 // Driver scratch register 5
44406 + #define DRIVER_DW5_LBN 0
44407 + #define DRIVER_DW5_WIDTH 32
44408 +#define DRIVER_REG6_KER_OFST 0x2E0 // Driver scratch register 6
44409 +#define DRIVER_REG6_OFST 0x2E0 // Driver scratch register 6
44410 + #define DRIVER_DW6_LBN 0
44411 + #define DRIVER_DW6_WIDTH 32
44412 +#define DRIVER_REG7_KER_OFST 0x2F0 // Driver scratch register 7
44413 +#define DRIVER_REG7_OFST 0x2F0 // Driver scratch register 7
44414 + #define DRIVER_DW7_LBN 0
44415 + #define DRIVER_DW7_WIDTH 32
44416 +#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
44417 +#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
44418 + #define ALTERA_BUILD_VER_LBN 0
44419 + #define ALTERA_BUILD_VER_WIDTH 32
44420 +
44421 +/* so called CSR spare register
44422 + - contains separate parity enable bits for the various internal memory blocks */
44423 +#define MEM_PARITY_ERR_EN_REG_KER 0x310
44424 +#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64
44425 +#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38
44426 +#define MEM_PARITY_TX_DATA_EN_LBN 72
44427 +#define MEM_PARITY_TX_DATA_EN_WIDTH 2
44428 +
44429 +//////////////---- Event & Timer Module Registers C Header ----//////////////
44430 +
44431 +#if EFVI_FALCON_EXTENDED_P_BAR
44432 +#define EVQ_RPTR_REG_KER_OFST 0x11B00 // Event queue read pointer register
44433 +#else
44434 +#define EVQ_RPTR_REG_KER_OFST 0x1B00 // Event queue read pointer register
44435 +#endif
44436 +
44437 +#define EVQ_RPTR_REG_OFST 0xFA0000 // Event queue read pointer register array.
44438 + #define EVQ_RPTR_LBN 0
44439 + #define EVQ_RPTR_WIDTH 15
44440 +
44441 +#if EFVI_FALCON_EXTENDED_P_BAR
44442 +#define EVQ_PTR_TBL_KER_OFST 0x11A00 // Event queue pointer table for kernel access
44443 +#else
44444 +#define EVQ_PTR_TBL_KER_OFST 0x1A00 // Event queue pointer table for kernel access
44445 +#endif
44446 +
44447 +#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 // Event queue pointer table for char direct access
44448 + #define EVQ_WKUP_OR_INT_EN_LBN 39
44449 + #define EVQ_WKUP_OR_INT_EN_WIDTH 1
44450 + #define EVQ_NXT_WPTR_LBN 24
44451 + #define EVQ_NXT_WPTR_WIDTH 15
44452 + #define EVQ_EN_LBN 23
44453 + #define EVQ_EN_WIDTH 1
44454 + #define EVQ_SIZE_LBN 20
44455 + #define EVQ_SIZE_WIDTH 3
44456 + #define EVQ_BUF_BASE_ID_LBN 0
44457 + #define EVQ_BUF_BASE_ID_WIDTH 20
44458 +#define TIMER_CMD_REG_KER_OFST 0x420 // Timer table for kernel access. Page-mapped
44459 +#define TIMER_CMD_REG_PAGE4_OFST 0x8420 // Timer table for user-level access. Page-mapped. For lowest 1K queues.
44460 +#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 // Timer table for user-level access. Page-mapped. For upper 3K queues.
44461 +#define TIMER_TBL_OFST 0xF70000 // Timer table for char driver direct access
44462 + #define TIMER_MODE_LBN 12
44463 + #define TIMER_MODE_WIDTH 2
44464 + #define TIMER_VAL_LBN 0
44465 + #define TIMER_VAL_WIDTH 12
44466 + #define TIMER_MODE_INT_HLDOFF 2
44467 + #define EVQ_BUF_SIZE_LBN 0
44468 + #define EVQ_BUF_SIZE_WIDTH 1
44469 +#define DRV_EV_REG_KER_OFST 0x440 // Driver generated event register
44470 +#define DRV_EV_REG_OFST 0x440 // Driver generated event register
44471 + #define DRV_EV_QID_LBN 64
44472 + #define DRV_EV_QID_WIDTH 12
44473 + #define DRV_EV_DATA_LBN 0
44474 + #define DRV_EV_DATA_WIDTH 64
44475 +#define EVQ_CTL_REG_KER_OFST 0x450 // Event queue control register
44476 +#define EVQ_CTL_REG_OFST 0x450 // Event queue control register
44477 + #define RX_EVQ_WAKEUP_MASK_B0_LBN 15
44478 + #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6
44479 + #define EVQ_OWNERR_CTL_LBN 14
44480 + #define EVQ_OWNERR_CTL_WIDTH 1
44481 + #define EVQ_FIFO_AF_TH_LBN 8
44482 + #define EVQ_FIFO_AF_TH_WIDTH 6
44483 + #define EVQ_FIFO_NOTAF_TH_LBN 0
44484 + #define EVQ_FIFO_NOTAF_TH_WIDTH 6
44485 +//////////////---- SRAM Module Registers C Header ----//////////////
44486 +#define BUF_TBL_CFG_REG_KER_OFST 0x600 // Buffer table configuration register
44487 +#define BUF_TBL_CFG_REG_OFST 0x600 // Buffer table configuration register
44488 + #define BUF_TBL_MODE_LBN 3
44489 + #define BUF_TBL_MODE_WIDTH 1
44490 +#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 // SRAM receive descriptor cache configuration register
44491 +#define SRM_RX_DC_CFG_REG_OFST 0x610 // SRAM receive descriptor cache configuration register
44492 + #define SRM_RX_DC_BASE_ADR_LBN 0
44493 + #define SRM_RX_DC_BASE_ADR_WIDTH 21
44494 +#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 // SRAM transmit descriptor cache configuration register
44495 +#define SRM_TX_DC_CFG_REG_OFST 0x620 // SRAM transmit descriptor cache configuration register
44496 + #define SRM_TX_DC_BASE_ADR_LBN 0
44497 + #define SRM_TX_DC_BASE_ADR_WIDTH 21
44498 +#define SRM_CFG_REG_KER_OFST 0x630 // SRAM configuration register
44499 +#define SRM_CFG_REG_OFST 0x630 // SRAM configuration register
44500 + #define SRAM_OOB_ADR_INTEN_LBN 5
44501 + #define SRAM_OOB_ADR_INTEN_WIDTH 1
44502 + #define SRAM_OOB_BUF_INTEN_LBN 4
44503 + #define SRAM_OOB_BUF_INTEN_WIDTH 1
44504 + #define SRAM_BT_INIT_EN_LBN 3
44505 + #define SRAM_BT_INIT_EN_WIDTH 1
44506 + #define SRM_NUM_BANK_LBN 2
44507 + #define SRM_NUM_BANK_WIDTH 1
44508 + #define SRM_BANK_SIZE_LBN 0
44509 + #define SRM_BANK_SIZE_WIDTH 2
44510 +#define BUF_TBL_UPD_REG_KER_OFST 0x650 // Buffer table update register
44511 +#define BUF_TBL_UPD_REG_OFST 0x650 // Buffer table update register
44512 + #define BUF_UPD_CMD_LBN 63
44513 + #define BUF_UPD_CMD_WIDTH 1
44514 + #define BUF_CLR_CMD_LBN 62
44515 + #define BUF_CLR_CMD_WIDTH 1
44516 + #define BUF_CLR_END_ID_LBN 32
44517 + #define BUF_CLR_END_ID_WIDTH 20
44518 + #define BUF_CLR_START_ID_LBN 0
44519 + #define BUF_CLR_START_ID_WIDTH 20
44520 +#define SRM_UPD_EVQ_REG_KER_OFST 0x660 // Buffer table update register
44521 +#define SRM_UPD_EVQ_REG_OFST 0x660 // Buffer table update register
44522 + #define SRM_UPD_EVQ_ID_LBN 0
44523 + #define SRM_UPD_EVQ_ID_WIDTH 12
44524 +#define SRAM_PARITY_REG_KER_OFST 0x670 // SRAM parity register.
44525 +#define SRAM_PARITY_REG_OFST 0x670 // SRAM parity register.
44526 + #define FORCE_SRAM_PERR_LBN 0
44527 + #define FORCE_SRAM_PERR_WIDTH 1
44528 +
44529 +#if EFVI_FALCON_EXTENDED_P_BAR
44530 +#define BUF_HALF_TBL_KER_OFST 0x18000 // Buffer table in half buffer table mode direct access by kernel driver
44531 +#else
44532 +#define BUF_HALF_TBL_KER_OFST 0x8000 // Buffer table in half buffer table mode direct access by kernel driver
44533 +#endif
44534 +
44535 +
44536 +#define BUF_HALF_TBL_OFST 0x800000 // Buffer table in half buffer table mode direct access by char driver
44537 + #define BUF_ADR_HBUF_ODD_LBN 44
44538 + #define BUF_ADR_HBUF_ODD_WIDTH 20
44539 + #define BUF_OWNER_ID_HBUF_ODD_LBN 32
44540 + #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12
44541 + #define BUF_ADR_HBUF_EVEN_LBN 12
44542 + #define BUF_ADR_HBUF_EVEN_WIDTH 20
44543 + #define BUF_OWNER_ID_HBUF_EVEN_LBN 0
44544 + #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12
44545 +
44546 +
44547 +#if EFVI_FALCON_EXTENDED_P_BAR
44548 +#define BUF_FULL_TBL_KER_OFST 0x18000 // Buffer table in full buffer table mode direct access by kernel driver
44549 +#else
44550 +#define BUF_FULL_TBL_KER_OFST 0x8000 // Buffer table in full buffer table mode direct access by kernel driver
44551 +#endif
44552 +
44553 +
44554 +
44555 +
44556 +#define BUF_FULL_TBL_OFST 0x800000 // Buffer table in full buffer table mode direct access by char driver
44557 + #define IP_DAT_BUF_SIZE_LBN 50
44558 + #define IP_DAT_BUF_SIZE_WIDTH 1
44559 + #define BUF_ADR_REGION_LBN 48
44560 + #define BUF_ADR_REGION_WIDTH 2
44561 + #define BUF_ADR_FBUF_LBN 14
44562 + #define BUF_ADR_FBUF_WIDTH 34
44563 + #define BUF_OWNER_ID_FBUF_LBN 0
44564 + #define BUF_OWNER_ID_FBUF_WIDTH 14
44565 +#define SRM_DBG_REG_OFST 0x3000000 // SRAM debug access
44566 + #define SRM_DBG_LBN 0
44567 + #define SRM_DBG_WIDTH 64
44568 +//////////////---- RX Datapath Registers C Header ----//////////////
44569 +
44570 +#define RX_CFG_REG_KER_OFST 0x800 // Receive configuration register
44571 +#define RX_CFG_REG_OFST 0x800 // Receive configuration register
44572 +
44573 +#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029)
44574 +# if !defined(FALCON_128K_RXFIFO)
44575 +# define FALCON_128K_RXFIFO
44576 +# endif
44577 +#endif
44578 +
44579 +#if defined(FALCON_128K_RXFIFO)
44580 +
44581 +/* new for B0 */
44582 + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48
44583 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
44584 + #define RX_INGR_EN_B0_LBN 47
44585 + #define RX_INGR_EN_B0_WIDTH 1
44586 + #define RX_TOEP_IPV4_B0_LBN 46
44587 + #define RX_TOEP_IPV4_B0_WIDTH 1
44588 + #define RX_HASH_ALG_B0_LBN 45
44589 + #define RX_HASH_ALG_B0_WIDTH 1
44590 + #define RX_HASH_INSERT_HDR_B0_LBN 44
44591 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1
44592 +/* moved for B0 */
44593 + #define RX_DESC_PUSH_EN_B0_LBN 43
44594 + #define RX_DESC_PUSH_EN_B0_WIDTH 1
44595 + #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */
44596 + #define RX_RDW_PATCH_EN_WIDTH 1
44597 + #define RX_PCI_BURST_SIZE_B0_LBN 39
44598 + #define RX_PCI_BURST_SIZE_B0_WIDTH 3
44599 + #define RX_OWNERR_CTL_B0_LBN 38
44600 + #define RX_OWNERR_CTL_B0_WIDTH 1
44601 + #define RX_XON_TX_TH_B0_LBN 33
44602 + #define RX_XON_TX_TH_B0_WIDTH 5
44603 + #define RX_XOFF_TX_TH_B0_LBN 28
44604 + #define RX_XOFF_TX_TH_B0_WIDTH 5
44605 + #define RX_USR_BUF_SIZE_B0_LBN 19
44606 + #define RX_USR_BUF_SIZE_B0_WIDTH 9
44607 + #define RX_XON_MAC_TH_B0_LBN 10
44608 + #define RX_XON_MAC_TH_B0_WIDTH 9
44609 + #define RX_XOFF_MAC_TH_B0_LBN 1
44610 + #define RX_XOFF_MAC_TH_B0_WIDTH 9
44611 + #define RX_XOFF_MAC_EN_B0_LBN 0
44612 + #define RX_XOFF_MAC_EN_B0_WIDTH 1
44613 +
44614 +#elif !defined(FALCON_PRE_02020029)
44615 +/* new for B0 */
44616 + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46
44617 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
44618 + #define RX_INGR_EN_B0_LBN 45
44619 + #define RX_INGR_EN_B0_WIDTH 1
44620 + #define RX_TOEP_IPV4_B0_LBN 44
44621 + #define RX_TOEP_IPV4_B0_WIDTH 1
44622 + #define RX_HASH_ALG_B0_LBN 43
44623 + #define RX_HASH_ALG_B0_WIDTH 41
44624 + #define RX_HASH_INSERT_HDR_B0_LBN 42
44625 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1
44626 +/* moved for B0 */
44627 + #define RX_DESC_PUSH_EN_B0_LBN 41
44628 + #define RX_DESC_PUSH_EN_B0_WIDTH 1
44629 + #define RX_PCI_BURST_SIZE_B0_LBN 37
44630 + #define RX_PCI_BURST_SIZE_B0_WIDTH 3
44631 + #define RX_OWNERR_CTL_B0_LBN 36
44632 + #define RX_OWNERR_CTL_B0_WIDTH 1
44633 + #define RX_XON_TX_TH_B0_LBN 31
44634 + #define RX_XON_TX_TH_B0_WIDTH 5
44635 + #define RX_XOFF_TX_TH_B0_LBN 26
44636 + #define RX_XOFF_TX_TH_B0_WIDTH 5
44637 + #define RX_USR_BUF_SIZE_B0_LBN 17
44638 + #define RX_USR_BUF_SIZE_B0_WIDTH 9
44639 + #define RX_XON_MAC_TH_B0_LBN 9
44640 + #define RX_XON_MAC_TH_B0_WIDTH 8
44641 + #define RX_XOFF_MAC_TH_B0_LBN 1
44642 + #define RX_XOFF_MAC_TH_B0_WIDTH 8
44643 + #define RX_XOFF_MAC_EN_B0_LBN 0
44644 + #define RX_XOFF_MAC_EN_B0_WIDTH 1
44645 +
44646 +#else
44647 +/* new for B0 */
44648 + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44
44649 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
44650 + #define RX_INGR_EN_B0_LBN 43
44651 + #define RX_INGR_EN_B0_WIDTH 1
44652 + #define RX_TOEP_IPV4_B0_LBN 42
44653 + #define RX_TOEP_IPV4_B0_WIDTH 1
44654 + #define RX_HASH_ALG_B0_LBN 41
44655 + #define RX_HASH_ALG_B0_WIDTH 41
44656 + #define RX_HASH_INSERT_HDR_B0_LBN 40
44657 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1
44658 +/* moved for B0 */
44659 + #define RX_DESC_PUSH_EN_B0_LBN 35
44660 + #define RX_DESC_PUSH_EN_B0_WIDTH 1
44661 + #define RX_PCI_BURST_SIZE_B0_LBN 35
44662 + #define RX_PCI_BURST_SIZE_B0_WIDTH 2
44663 + #define RX_OWNERR_CTL_B0_LBN 34
44664 + #define RX_OWNERR_CTL_B0_WIDTH 1
44665 + #define RX_XON_TX_TH_B0_LBN 29
44666 + #define RX_XON_TX_TH_B0_WIDTH 5
44667 + #define RX_XOFF_TX_TH_B0_LBN 24
44668 + #define RX_XOFF_TX_TH_B0_WIDTH 5
44669 + #define RX_USR_BUF_SIZE_B0_LBN 15
44670 + #define RX_USR_BUF_SIZE_B0_WIDTH 9
44671 + #define RX_XON_MAC_TH_B0_LBN 8
44672 + #define RX_XON_MAC_TH_B0_WIDTH 7
44673 + #define RX_XOFF_MAC_TH_B0_LBN 1
44674 + #define RX_XOFF_MAC_TH_B0_WIDTH 7
44675 + #define RX_XOFF_MAC_EN_B0_LBN 0
44676 + #define RX_XOFF_MAC_EN_B0_WIDTH 1
44677 +
44678 +#endif
44679 +
44680 +/* A0/A1 */
44681 + #define RX_PUSH_EN_A1_LBN 35
44682 + #define RX_PUSH_EN_A1_WIDTH 1
44683 + #define RX_PCI_BURST_SIZE_A1_LBN 31
44684 + #define RX_PCI_BURST_SIZE_A1_WIDTH 3
44685 + #define RX_OWNERR_CTL_A1_LBN 30
44686 + #define RX_OWNERR_CTL_A1_WIDTH 1
44687 + #define RX_XON_TX_TH_A1_LBN 25
44688 + #define RX_XON_TX_TH_A1_WIDTH 5
44689 + #define RX_XOFF_TX_TH_A1_LBN 20
44690 + #define RX_XOFF_TX_TH_A1_WIDTH 5
44691 + #define RX_USR_BUF_SIZE_A1_LBN 11
44692 + #define RX_USR_BUF_SIZE_A1_WIDTH 9
44693 + #define RX_XON_MAC_TH_A1_LBN 6
44694 + #define RX_XON_MAC_TH_A1_WIDTH 5
44695 + #define RX_XOFF_MAC_TH_A1_LBN 1
44696 + #define RX_XOFF_MAC_TH_A1_WIDTH 5
44697 + #define RX_XOFF_MAC_EN_A1_LBN 0
44698 + #define RX_XOFF_MAC_EN_A1_WIDTH 1
44699 +
44700 +#define RX_FILTER_CTL_REG_OFST 0x810 // Receive filter control registers
44701 + #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40
44702 + #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1
44703 + #define UDP_FULL_SRCH_LIMIT_LBN 32
44704 + #define UDP_FULL_SRCH_LIMIT_WIDTH 8
44705 + #define NUM_KER_LBN 24
44706 + #define NUM_KER_WIDTH 2
44707 + #define UDP_WILD_SRCH_LIMIT_LBN 16
44708 + #define UDP_WILD_SRCH_LIMIT_WIDTH 8
44709 + #define TCP_WILD_SRCH_LIMIT_LBN 8
44710 + #define TCP_WILD_SRCH_LIMIT_WIDTH 8
44711 + #define TCP_FULL_SRCH_LIMIT_LBN 0
44712 + #define TCP_FULL_SRCH_LIMIT_WIDTH 8
44713 +#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 // Receive flush descriptor queue register
44714 +#define RX_FLUSH_DESCQ_REG_OFST 0x820 // Receive flush descriptor queue register
44715 + #define RX_FLUSH_DESCQ_CMD_LBN 24
44716 + #define RX_FLUSH_DESCQ_CMD_WIDTH 1
44717 + #define RX_FLUSH_EVQ_ID_LBN 12
44718 + #define RX_FLUSH_EVQ_ID_WIDTH 12
44719 + #define RX_FLUSH_DESCQ_LBN 0
44720 + #define RX_FLUSH_DESCQ_WIDTH 12
44721 +#define RX_DESC_UPD_REG_KER_OFST 0x830 // Kernel receive descriptor update register. Page-mapped
44722 +#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 // Char & user receive descriptor update register. Page-mapped. For lowest 1K queues.
44723 +#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 // Char & user receive descriptor update register. Page-mapped. For upper 3K queues.
44724 + #define RX_DESC_WPTR_LBN 96
44725 + #define RX_DESC_WPTR_WIDTH 12
44726 + #define RX_DESC_PUSH_CMD_LBN 95
44727 + #define RX_DESC_PUSH_CMD_WIDTH 1
44728 + #define RX_DESC_LBN 0
44729 + #define RX_DESC_WIDTH 64
44730 + #define RX_KER_DESC_LBN 0
44731 + #define RX_KER_DESC_WIDTH 64
44732 + #define RX_USR_DESC_LBN 0
44733 + #define RX_USR_DESC_WIDTH 32
44734 +#define RX_DC_CFG_REG_KER_OFST 0x840 // Receive descriptor cache configuration register
44735 +#define RX_DC_CFG_REG_OFST 0x840 // Receive descriptor cache configuration register
44736 + #define RX_DC_SIZE_LBN 0
44737 + #define RX_DC_SIZE_WIDTH 2
44738 +#define RX_DC_PF_WM_REG_KER_OFST 0x850 // Receive descriptor cache pre-fetch watermark register
44739 +#define RX_DC_PF_WM_REG_OFST 0x850 // Receive descriptor cache pre-fetch watermark register
44740 + #define RX_DC_PF_LWM_LO_LBN 0
44741 + #define RX_DC_PF_LWM_LO_WIDTH 6
44742 +
44743 +#define RX_RSS_TKEY_B0_OFST 0x860 // RSS Toeplitz hash key (B0 only)
44744 +
44745 +#define RX_NODESC_DROP_REG 0x880
44746 + #define RX_NODESC_DROP_CNT_LBN 0
44747 + #define RX_NODESC_DROP_CNT_WIDTH 16
44748 +
44749 +#define XM_TX_CFG_REG_OFST 0x1230
44750 + #define XM_AUTO_PAD_LBN 5
44751 + #define XM_AUTO_PAD_WIDTH 1
44752 +
44753 +#define RX_FILTER_TBL0_OFST 0xF00000 // Receive filter table - even entries
44754 + #define RSS_EN_0_B0_LBN 110
44755 + #define RSS_EN_0_B0_WIDTH 1
44756 + #define SCATTER_EN_0_B0_LBN 109
44757 + #define SCATTER_EN_0_B0_WIDTH 1
44758 + #define TCP_UDP_0_LBN 108
44759 + #define TCP_UDP_0_WIDTH 1
44760 + #define RXQ_ID_0_LBN 96
44761 + #define RXQ_ID_0_WIDTH 12
44762 + #define DEST_IP_0_LBN 64
44763 + #define DEST_IP_0_WIDTH 32
44764 + #define DEST_PORT_TCP_0_LBN 48
44765 + #define DEST_PORT_TCP_0_WIDTH 16
44766 + #define SRC_IP_0_LBN 16
44767 + #define SRC_IP_0_WIDTH 32
44768 + #define SRC_TCP_DEST_UDP_0_LBN 0
44769 + #define SRC_TCP_DEST_UDP_0_WIDTH 16
44770 +#define RX_FILTER_TBL1_OFST 0xF00010 // Receive filter table - odd entries
44771 + #define RSS_EN_1_B0_LBN 110
44772 + #define RSS_EN_1_B0_WIDTH 1
44773 + #define SCATTER_EN_1_B0_LBN 109
44774 + #define SCATTER_EN_1_B0_WIDTH 1
44775 + #define TCP_UDP_1_LBN 108
44776 + #define TCP_UDP_1_WIDTH 1
44777 + #define RXQ_ID_1_LBN 96
44778 + #define RXQ_ID_1_WIDTH 12
44779 + #define DEST_IP_1_LBN 64
44780 + #define DEST_IP_1_WIDTH 32
44781 + #define DEST_PORT_TCP_1_LBN 48
44782 + #define DEST_PORT_TCP_1_WIDTH 16
44783 + #define SRC_IP_1_LBN 16
44784 + #define SRC_IP_1_WIDTH 32
44785 + #define SRC_TCP_DEST_UDP_1_LBN 0
44786 + #define SRC_TCP_DEST_UDP_1_WIDTH 16
44787 +
44788 +#if EFVI_FALCON_EXTENDED_P_BAR
44789 +#define RX_DESC_PTR_TBL_KER_OFST 0x11800 // Receive descriptor pointer kernel access
44790 +#else
44791 +#define RX_DESC_PTR_TBL_KER_OFST 0x1800 // Receive descriptor pointer kernel access
44792 +#endif
44793 +
44794 +
44795 +#define RX_DESC_PTR_TBL_OFST 0xF40000 // Receive descriptor pointer table
44796 + #define RX_ISCSI_DDIG_EN_LBN 88
44797 + #define RX_ISCSI_DDIG_EN_WIDTH 1
44798 + #define RX_ISCSI_HDIG_EN_LBN 87
44799 + #define RX_ISCSI_HDIG_EN_WIDTH 1
44800 + #define RX_DESC_PREF_ACT_LBN 86
44801 + #define RX_DESC_PREF_ACT_WIDTH 1
44802 + #define RX_DC_HW_RPTR_LBN 80
44803 + #define RX_DC_HW_RPTR_WIDTH 6
44804 + #define RX_DESCQ_HW_RPTR_LBN 68
44805 + #define RX_DESCQ_HW_RPTR_WIDTH 12
44806 + #define RX_DESCQ_SW_WPTR_LBN 56
44807 + #define RX_DESCQ_SW_WPTR_WIDTH 12
44808 + #define RX_DESCQ_BUF_BASE_ID_LBN 36
44809 + #define RX_DESCQ_BUF_BASE_ID_WIDTH 20
44810 + #define RX_DESCQ_EVQ_ID_LBN 24
44811 + #define RX_DESCQ_EVQ_ID_WIDTH 12
44812 + #define RX_DESCQ_OWNER_ID_LBN 10
44813 + #define RX_DESCQ_OWNER_ID_WIDTH 14
44814 + #define RX_DESCQ_LABEL_LBN 5
44815 + #define RX_DESCQ_LABEL_WIDTH 5
44816 + #define RX_DESCQ_SIZE_LBN 3
44817 + #define RX_DESCQ_SIZE_WIDTH 2
44818 + #define RX_DESCQ_TYPE_LBN 2
44819 + #define RX_DESCQ_TYPE_WIDTH 1
44820 + #define RX_DESCQ_JUMBO_LBN 1
44821 + #define RX_DESCQ_JUMBO_WIDTH 1
44822 + #define RX_DESCQ_EN_LBN 0
44823 + #define RX_DESCQ_EN_WIDTH 1
44824 +
44825 +
44826 +#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 // RSS indirection table (B0 only)
44827 + #define RX_RSS_INDIR_ENT_B0_LBN 0
44828 + #define RX_RSS_INDIR_ENT_B0_WIDTH 6
44829 +
44830 +//////////////---- TX Datapath Registers C Header ----//////////////
44831 +#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 // Transmit flush descriptor queue register
44832 +#define TX_FLUSH_DESCQ_REG_OFST 0xA00 // Transmit flush descriptor queue register
44833 + #define TX_FLUSH_DESCQ_CMD_LBN 12
44834 + #define TX_FLUSH_DESCQ_CMD_WIDTH 1
44835 + #define TX_FLUSH_DESCQ_LBN 0
44836 + #define TX_FLUSH_DESCQ_WIDTH 12
44837 +#define TX_DESC_UPD_REG_KER_OFST 0xA10 // Kernel transmit descriptor update register. Page-mapped
44838 +#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 // Char & user transmit descriptor update register. Page-mapped
44839 +#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 // Char & user transmit descriptor update register. Page-mapped
44840 + #define TX_DESC_WPTR_LBN 96
44841 + #define TX_DESC_WPTR_WIDTH 12
44842 + #define TX_DESC_PUSH_CMD_LBN 95
44843 + #define TX_DESC_PUSH_CMD_WIDTH 1
44844 + #define TX_DESC_LBN 0
44845 + #define TX_DESC_WIDTH 95
44846 + #define TX_KER_DESC_LBN 0
44847 + #define TX_KER_DESC_WIDTH 64
44848 + #define TX_USR_DESC_LBN 0
44849 + #define TX_USR_DESC_WIDTH 64
44850 +#define TX_DC_CFG_REG_KER_OFST 0xA20 // Transmit descriptor cache configuration register
44851 +#define TX_DC_CFG_REG_OFST 0xA20 // Transmit descriptor cache configuration register
44852 + #define TX_DC_SIZE_LBN 0
44853 + #define TX_DC_SIZE_WIDTH 2
44854 +
44855 +#if EFVI_FALCON_EXTENDED_P_BAR
44856 +#define TX_DESC_PTR_TBL_KER_OFST 0x11900 // Transmit descriptor pointer.
44857 +#else
44858 +#define TX_DESC_PTR_TBL_KER_OFST 0x1900 // Transmit descriptor pointer.
44859 +#endif
44860 +
44861 +
44862 +#define TX_DESC_PTR_TBL_OFST 0xF50000 // Transmit descriptor pointer
44863 + #define TX_NON_IP_DROP_DIS_B0_LBN 91
44864 + #define TX_NON_IP_DROP_DIS_B0_WIDTH 1
44865 + #define TX_IP_CHKSM_DIS_B0_LBN 90
44866 + #define TX_IP_CHKSM_DIS_B0_WIDTH 1
44867 + #define TX_TCP_CHKSM_DIS_B0_LBN 89
44868 + #define TX_TCP_CHKSM_DIS_B0_WIDTH 1
44869 + #define TX_DESCQ_EN_LBN 88
44870 + #define TX_DESCQ_EN_WIDTH 1
44871 + #define TX_ISCSI_DDIG_EN_LBN 87
44872 + #define TX_ISCSI_DDIG_EN_WIDTH 1
44873 + #define TX_ISCSI_HDIG_EN_LBN 86
44874 + #define TX_ISCSI_HDIG_EN_WIDTH 1
44875 + #define TX_DC_HW_RPTR_LBN 80
44876 + #define TX_DC_HW_RPTR_WIDTH 6
44877 + #define TX_DESCQ_HW_RPTR_LBN 68
44878 + #define TX_DESCQ_HW_RPTR_WIDTH 12
44879 + #define TX_DESCQ_SW_WPTR_LBN 56
44880 + #define TX_DESCQ_SW_WPTR_WIDTH 12
44881 + #define TX_DESCQ_BUF_BASE_ID_LBN 36
44882 + #define TX_DESCQ_BUF_BASE_ID_WIDTH 20
44883 + #define TX_DESCQ_EVQ_ID_LBN 24
44884 + #define TX_DESCQ_EVQ_ID_WIDTH 12
44885 + #define TX_DESCQ_OWNER_ID_LBN 10
44886 + #define TX_DESCQ_OWNER_ID_WIDTH 14
44887 + #define TX_DESCQ_LABEL_LBN 5
44888 + #define TX_DESCQ_LABEL_WIDTH 5
44889 + #define TX_DESCQ_SIZE_LBN 3
44890 + #define TX_DESCQ_SIZE_WIDTH 2
44891 + #define TX_DESCQ_TYPE_LBN 1
44892 + #define TX_DESCQ_TYPE_WIDTH 2
44893 + #define TX_DESCQ_FLUSH_LBN 0
44894 + #define TX_DESCQ_FLUSH_WIDTH 1
44895 +#define TX_CFG_REG_KER_OFST 0xA50 // Transmit configuration register
44896 +#define TX_CFG_REG_OFST 0xA50 // Transmit configuration register
44897 + #define TX_IP_ID_P1_OFS_LBN 32
44898 + #define TX_IP_ID_P1_OFS_WIDTH 15
44899 + #define TX_IP_ID_P0_OFS_LBN 16
44900 + #define TX_IP_ID_P0_OFS_WIDTH 15
44901 + #define TX_TURBO_EN_LBN 3
44902 + #define TX_TURBO_EN_WIDTH 1
44903 + #define TX_OWNERR_CTL_LBN 2
44904 + #define TX_OWNERR_CTL_WIDTH 2
44905 + #define TX_NON_IP_DROP_DIS_LBN 1
44906 + #define TX_NON_IP_DROP_DIS_WIDTH 1
44907 + #define TX_IP_ID_REP_EN_LBN 0
44908 + #define TX_IP_ID_REP_EN_WIDTH 1
44909 +#define TX_RESERVED_REG_KER_OFST 0xA80 // Transmit configuration register
44910 +#define TX_RESERVED_REG_OFST 0xA80 // Transmit configuration register
44911 + #define TX_CSR_PUSH_EN_LBN 89
44912 + #define TX_CSR_PUSH_EN_WIDTH 1
44913 + #define TX_RX_SPACER_LBN 64
44914 + #define TX_RX_SPACER_WIDTH 8
44915 + #define TX_SW_EV_EN_LBN 59
44916 + #define TX_SW_EV_EN_WIDTH 1
44917 + #define TX_RX_SPACER_EN_LBN 57
44918 + #define TX_RX_SPACER_EN_WIDTH 1
44919 + #define TX_CSR_PREF_WD_TMR_LBN 24
44920 + #define TX_CSR_PREF_WD_TMR_WIDTH 16
44921 + #define TX_CSR_ONLY1TAG_LBN 21
44922 + #define TX_CSR_ONLY1TAG_WIDTH 1
44923 + #define TX_PREF_THRESHOLD_LBN 19
44924 + #define TX_PREF_THRESHOLD_WIDTH 2
44925 + #define TX_ONE_PKT_PER_Q_LBN 18
44926 + #define TX_ONE_PKT_PER_Q_WIDTH 1
44927 + #define TX_DIS_NON_IP_EV_LBN 17
44928 + #define TX_DIS_NON_IP_EV_WIDTH 1
44929 + #define TX_DMA_SPACER_LBN 8
44930 + #define TX_DMA_SPACER_WIDTH 8
44931 + #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7
44932 + #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1
44933 + #define TX_TCP_DIS_A1_LBN 7
44934 + #define TX_TCP_DIS_A1_WIDTH 1
44935 + #define TX_IP_DIS_A1_LBN 6
44936 + #define TX_IP_DIS_A1_WIDTH 1
44937 + #define TX_MAX_CPL_LBN 2
44938 + #define TX_MAX_CPL_WIDTH 2
44939 + #define TX_MAX_PREF_LBN 0
44940 + #define TX_MAX_PREF_WIDTH 2
44941 +#define TX_VLAN_REG_OFST 0xAE0 // Transmit VLAN tag register
44942 + #define TX_VLAN_EN_LBN 127
44943 + #define TX_VLAN_EN_WIDTH 1
44944 + #define TX_VLAN7_PORT1_EN_LBN 125
44945 + #define TX_VLAN7_PORT1_EN_WIDTH 1
44946 + #define TX_VLAN7_PORT0_EN_LBN 124
44947 + #define TX_VLAN7_PORT0_EN_WIDTH 1
44948 + #define TX_VLAN7_LBN 112
44949 + #define TX_VLAN7_WIDTH 12
44950 + #define TX_VLAN6_PORT1_EN_LBN 109
44951 + #define TX_VLAN6_PORT1_EN_WIDTH 1
44952 + #define TX_VLAN6_PORT0_EN_LBN 108
44953 + #define TX_VLAN6_PORT0_EN_WIDTH 1
44954 + #define TX_VLAN6_LBN 96
44955 + #define TX_VLAN6_WIDTH 12
44956 + #define TX_VLAN5_PORT1_EN_LBN 93
44957 + #define TX_VLAN5_PORT1_EN_WIDTH 1
44958 + #define TX_VLAN5_PORT0_EN_LBN 92
44959 + #define TX_VLAN5_PORT0_EN_WIDTH 1
44960 + #define TX_VLAN5_LBN 80
44961 + #define TX_VLAN5_WIDTH 12
44962 + #define TX_VLAN4_PORT1_EN_LBN 77
44963 + #define TX_VLAN4_PORT1_EN_WIDTH 1
44964 + #define TX_VLAN4_PORT0_EN_LBN 76
44965 + #define TX_VLAN4_PORT0_EN_WIDTH 1
44966 + #define TX_VLAN4_LBN 64
44967 + #define TX_VLAN4_WIDTH 12
44968 + #define TX_VLAN3_PORT1_EN_LBN 61
44969 + #define TX_VLAN3_PORT1_EN_WIDTH 1
44970 + #define TX_VLAN3_PORT0_EN_LBN 60
44971 + #define TX_VLAN3_PORT0_EN_WIDTH 1
44972 + #define TX_VLAN3_LBN 48
44973 + #define TX_VLAN3_WIDTH 12
44974 + #define TX_VLAN2_PORT1_EN_LBN 45
44975 + #define TX_VLAN2_PORT1_EN_WIDTH 1
44976 + #define TX_VLAN2_PORT0_EN_LBN 44
44977 + #define TX_VLAN2_PORT0_EN_WIDTH 1
44978 + #define TX_VLAN2_LBN 32
44979 + #define TX_VLAN2_WIDTH 12
44980 + #define TX_VLAN1_PORT1_EN_LBN 29
44981 + #define TX_VLAN1_PORT1_EN_WIDTH 1
44982 + #define TX_VLAN1_PORT0_EN_LBN 28
44983 + #define TX_VLAN1_PORT0_EN_WIDTH 1
44984 + #define TX_VLAN1_LBN 16
44985 + #define TX_VLAN1_WIDTH 12
44986 + #define TX_VLAN0_PORT1_EN_LBN 13
44987 + #define TX_VLAN0_PORT1_EN_WIDTH 1
44988 + #define TX_VLAN0_PORT0_EN_LBN 12
44989 + #define TX_VLAN0_PORT0_EN_WIDTH 1
44990 + #define TX_VLAN0_LBN 0
44991 + #define TX_VLAN0_WIDTH 12
44992 +#define TX_FIL_CTL_REG_OFST 0xAF0 // Transmit filter control register
44993 + #define TX_MADR1_FIL_EN_LBN 65
44994 + #define TX_MADR1_FIL_EN_WIDTH 1
44995 + #define TX_MADR0_FIL_EN_LBN 64
44996 + #define TX_MADR0_FIL_EN_WIDTH 1
44997 + #define TX_IPFIL31_PORT1_EN_LBN 63
44998 + #define TX_IPFIL31_PORT1_EN_WIDTH 1
44999 + #define TX_IPFIL31_PORT0_EN_LBN 62
45000 + #define TX_IPFIL31_PORT0_EN_WIDTH 1
45001 + #define TX_IPFIL30_PORT1_EN_LBN 61
45002 + #define TX_IPFIL30_PORT1_EN_WIDTH 1
45003 + #define TX_IPFIL30_PORT0_EN_LBN 60
45004 + #define TX_IPFIL30_PORT0_EN_WIDTH 1
45005 + #define TX_IPFIL29_PORT1_EN_LBN 59
45006 + #define TX_IPFIL29_PORT1_EN_WIDTH 1
45007 + #define TX_IPFIL29_PORT0_EN_LBN 58
45008 + #define TX_IPFIL29_PORT0_EN_WIDTH 1
45009 + #define TX_IPFIL28_PORT1_EN_LBN 57
45010 + #define TX_IPFIL28_PORT1_EN_WIDTH 1
45011 + #define TX_IPFIL28_PORT0_EN_LBN 56
45012 + #define TX_IPFIL28_PORT0_EN_WIDTH 1
45013 + #define TX_IPFIL27_PORT1_EN_LBN 55
45014 + #define TX_IPFIL27_PORT1_EN_WIDTH 1
45015 + #define TX_IPFIL27_PORT0_EN_LBN 54
45016 + #define TX_IPFIL27_PORT0_EN_WIDTH 1
45017 + #define TX_IPFIL26_PORT1_EN_LBN 53
45018 + #define TX_IPFIL26_PORT1_EN_WIDTH 1
45019 + #define TX_IPFIL26_PORT0_EN_LBN 52
45020 + #define TX_IPFIL26_PORT0_EN_WIDTH 1
45021 + #define TX_IPFIL25_PORT1_EN_LBN 51
45022 + #define TX_IPFIL25_PORT1_EN_WIDTH 1
45023 + #define TX_IPFIL25_PORT0_EN_LBN 50
45024 + #define TX_IPFIL25_PORT0_EN_WIDTH 1
45025 + #define TX_IPFIL24_PORT1_EN_LBN 49
45026 + #define TX_IPFIL24_PORT1_EN_WIDTH 1
45027 + #define TX_IPFIL24_PORT0_EN_LBN 48
45028 + #define TX_IPFIL24_PORT0_EN_WIDTH 1
45029 + #define TX_IPFIL23_PORT1_EN_LBN 47
45030 + #define TX_IPFIL23_PORT1_EN_WIDTH 1
45031 + #define TX_IPFIL23_PORT0_EN_LBN 46
45032 + #define TX_IPFIL23_PORT0_EN_WIDTH 1
45033 + #define TX_IPFIL22_PORT1_EN_LBN 45
45034 + #define TX_IPFIL22_PORT1_EN_WIDTH 1
45035 + #define TX_IPFIL22_PORT0_EN_LBN 44
45036 + #define TX_IPFIL22_PORT0_EN_WIDTH 1
45037 + #define TX_IPFIL21_PORT1_EN_LBN 43
45038 + #define TX_IPFIL21_PORT1_EN_WIDTH 1
45039 + #define TX_IPFIL21_PORT0_EN_LBN 42
45040 + #define TX_IPFIL21_PORT0_EN_WIDTH 1
45041 + #define TX_IPFIL20_PORT1_EN_LBN 41
45042 + #define TX_IPFIL20_PORT1_EN_WIDTH 1
45043 + #define TX_IPFIL20_PORT0_EN_LBN 40
45044 + #define TX_IPFIL20_PORT0_EN_WIDTH 1
45045 + #define TX_IPFIL19_PORT1_EN_LBN 39
45046 + #define TX_IPFIL19_PORT1_EN_WIDTH 1
45047 + #define TX_IPFIL19_PORT0_EN_LBN 38
45048 + #define TX_IPFIL19_PORT0_EN_WIDTH 1
45049 + #define TX_IPFIL18_PORT1_EN_LBN 37
45050 + #define TX_IPFIL18_PORT1_EN_WIDTH 1
45051 + #define TX_IPFIL18_PORT0_EN_LBN 36
45052 + #define TX_IPFIL18_PORT0_EN_WIDTH 1
45053 + #define TX_IPFIL17_PORT1_EN_LBN 35
45054 + #define TX_IPFIL17_PORT1_EN_WIDTH 1
45055 + #define TX_IPFIL17_PORT0_EN_LBN 34
45056 + #define TX_IPFIL17_PORT0_EN_WIDTH 1
45057 + #define TX_IPFIL16_PORT1_EN_LBN 33
45058 + #define TX_IPFIL16_PORT1_EN_WIDTH 1
45059 + #define TX_IPFIL16_PORT0_EN_LBN 32
45060 + #define TX_IPFIL16_PORT0_EN_WIDTH 1
45061 + #define TX_IPFIL15_PORT1_EN_LBN 31
45062 + #define TX_IPFIL15_PORT1_EN_WIDTH 1
45063 + #define TX_IPFIL15_PORT0_EN_LBN 30
45064 + #define TX_IPFIL15_PORT0_EN_WIDTH 1
45065 + #define TX_IPFIL14_PORT1_EN_LBN 29
45066 + #define TX_IPFIL14_PORT1_EN_WIDTH 1
45067 + #define TX_IPFIL14_PORT0_EN_LBN 28
45068 + #define TX_IPFIL14_PORT0_EN_WIDTH 1
45069 + #define TX_IPFIL13_PORT1_EN_LBN 27
45070 + #define TX_IPFIL13_PORT1_EN_WIDTH 1
45071 + #define TX_IPFIL13_PORT0_EN_LBN 26
45072 + #define TX_IPFIL13_PORT0_EN_WIDTH 1
45073 + #define TX_IPFIL12_PORT1_EN_LBN 25
45074 + #define TX_IPFIL12_PORT1_EN_WIDTH 1
45075 + #define TX_IPFIL12_PORT0_EN_LBN 24
45076 + #define TX_IPFIL12_PORT0_EN_WIDTH 1
45077 + #define TX_IPFIL11_PORT1_EN_LBN 23
45078 + #define TX_IPFIL11_PORT1_EN_WIDTH 1
45079 + #define TX_IPFIL11_PORT0_EN_LBN 22
45080 + #define TX_IPFIL11_PORT0_EN_WIDTH 1
45081 + #define TX_IPFIL10_PORT1_EN_LBN 21
45082 + #define TX_IPFIL10_PORT1_EN_WIDTH 1
45083 + #define TX_IPFIL10_PORT0_EN_LBN 20
45084 + #define TX_IPFIL10_PORT0_EN_WIDTH 1
45085 + #define TX_IPFIL9_PORT1_EN_LBN 19
45086 + #define TX_IPFIL9_PORT1_EN_WIDTH 1
45087 + #define TX_IPFIL9_PORT0_EN_LBN 18
45088 + #define TX_IPFIL9_PORT0_EN_WIDTH 1
45089 + #define TX_IPFIL8_PORT1_EN_LBN 17
45090 + #define TX_IPFIL8_PORT1_EN_WIDTH 1
45091 + #define TX_IPFIL8_PORT0_EN_LBN 16
45092 + #define TX_IPFIL8_PORT0_EN_WIDTH 1
45093 + #define TX_IPFIL7_PORT1_EN_LBN 15
45094 + #define TX_IPFIL7_PORT1_EN_WIDTH 1
45095 + #define TX_IPFIL7_PORT0_EN_LBN 14
45096 + #define TX_IPFIL7_PORT0_EN_WIDTH 1
45097 + #define TX_IPFIL6_PORT1_EN_LBN 13
45098 + #define TX_IPFIL6_PORT1_EN_WIDTH 1
45099 + #define TX_IPFIL6_PORT0_EN_LBN 12
45100 + #define TX_IPFIL6_PORT0_EN_WIDTH 1
45101 + #define TX_IPFIL5_PORT1_EN_LBN 11
45102 + #define TX_IPFIL5_PORT1_EN_WIDTH 1
45103 + #define TX_IPFIL5_PORT0_EN_LBN 10
45104 + #define TX_IPFIL5_PORT0_EN_WIDTH 1
45105 + #define TX_IPFIL4_PORT1_EN_LBN 9
45106 + #define TX_IPFIL4_PORT1_EN_WIDTH 1
45107 + #define TX_IPFIL4_PORT0_EN_LBN 8
45108 + #define TX_IPFIL4_PORT0_EN_WIDTH 1
45109 + #define TX_IPFIL3_PORT1_EN_LBN 7
45110 + #define TX_IPFIL3_PORT1_EN_WIDTH 1
45111 + #define TX_IPFIL3_PORT0_EN_LBN 6
45112 + #define TX_IPFIL3_PORT0_EN_WIDTH 1
45113 + #define TX_IPFIL2_PORT1_EN_LBN 5
45114 + #define TX_IPFIL2_PORT1_EN_WIDTH 1
45115 + #define TX_IPFIL2_PORT0_EN_LBN 4
45116 + #define TX_IPFIL2_PORT0_EN_WIDTH 1
45117 + #define TX_IPFIL1_PORT1_EN_LBN 3
45118 + #define TX_IPFIL1_PORT1_EN_WIDTH 1
45119 + #define TX_IPFIL1_PORT0_EN_LBN 2
45120 + #define TX_IPFIL1_PORT0_EN_WIDTH 1
45121 + #define TX_IPFIL0_PORT1_EN_LBN 1
45122 + #define TX_IPFIL0_PORT1_EN_WIDTH 1
45123 + #define TX_IPFIL0_PORT0_EN_LBN 0
45124 + #define TX_IPFIL0_PORT0_EN_WIDTH 1
45125 +#define TX_IPFIL_TBL_OFST 0xB00 // Transmit IP source address filter table
45126 + #define TX_IPFIL_MASK_LBN 32
45127 + #define TX_IPFIL_MASK_WIDTH 32
45128 + #define TX_IP_SRC_ADR_LBN 0
45129 + #define TX_IP_SRC_ADR_WIDTH 32
45130 +#define TX_PACE_REG_A1_OFST 0xF80000 // Transmit pace control register
45131 +#define TX_PACE_REG_B0_OFST 0xA90 // Transmit pace control register
45132 + #define TX_PACE_SB_AF_LBN 19
45133 + #define TX_PACE_SB_AF_WIDTH 10
45134 + #define TX_PACE_SB_NOTAF_LBN 9
45135 + #define TX_PACE_SB_NOTAF_WIDTH 10
45136 + #define TX_PACE_FB_BASE_LBN 5
45137 + #define TX_PACE_FB_BASE_WIDTH 4
45138 + #define TX_PACE_BIN_TH_LBN 0
45139 + #define TX_PACE_BIN_TH_WIDTH 5
45140 +#define TX_PACE_TBL_A1_OFST 0xF80040 // Transmit pacing table
45141 +#define TX_PACE_TBL_FIRST_QUEUE_A1 4
45142 +#define TX_PACE_TBL_B0_OFST 0xF80000 // Transmit pacing table
45143 +#define TX_PACE_TBL_FIRST_QUEUE_B0 0
45144 + #define TX_PACE_LBN 0
45145 + #define TX_PACE_WIDTH 5
45146 +
45147 +//////////////---- EE/Flash Registers C Header ----//////////////
45148 +#define EE_SPI_HCMD_REG_KER_OFST 0x100 // SPI host command register
45149 +#define EE_SPI_HCMD_REG_OFST 0x100 // SPI host command register
45150 + #define EE_SPI_HCMD_CMD_EN_LBN 31
45151 + #define EE_SPI_HCMD_CMD_EN_WIDTH 1
45152 + #define EE_WR_TIMER_ACTIVE_LBN 28
45153 + #define EE_WR_TIMER_ACTIVE_WIDTH 1
45154 + #define EE_SPI_HCMD_SF_SEL_LBN 24
45155 + #define EE_SPI_HCMD_SF_SEL_WIDTH 1
45156 + #define EE_SPI_HCMD_DABCNT_LBN 16
45157 + #define EE_SPI_HCMD_DABCNT_WIDTH 5
45158 + #define EE_SPI_HCMD_READ_LBN 15
45159 + #define EE_SPI_HCMD_READ_WIDTH 1
45160 + #define EE_SPI_HCMD_DUBCNT_LBN 12
45161 + #define EE_SPI_HCMD_DUBCNT_WIDTH 2
45162 + #define EE_SPI_HCMD_ADBCNT_LBN 8
45163 + #define EE_SPI_HCMD_ADBCNT_WIDTH 2
45164 + #define EE_SPI_HCMD_ENC_LBN 0
45165 + #define EE_SPI_HCMD_ENC_WIDTH 8
45166 +#define EE_SPI_HADR_REG_KER_OFST 0X110 // SPI host address register
45167 +#define EE_SPI_HADR_REG_OFST 0X110 // SPI host address register
45168 + #define EE_SPI_HADR_DUBYTE_LBN 24
45169 + #define EE_SPI_HADR_DUBYTE_WIDTH 8
45170 + #define EE_SPI_HADR_ADR_LBN 0
45171 + #define EE_SPI_HADR_ADR_WIDTH 24
45172 +#define EE_SPI_HDATA_REG_KER_OFST 0x120 // SPI host data register
45173 +#define EE_SPI_HDATA_REG_OFST 0x120 // SPI host data register
45174 + #define EE_SPI_HDATA3_LBN 96
45175 + #define EE_SPI_HDATA3_WIDTH 32
45176 + #define EE_SPI_HDATA2_LBN 64
45177 + #define EE_SPI_HDATA2_WIDTH 32
45178 + #define EE_SPI_HDATA1_LBN 32
45179 + #define EE_SPI_HDATA1_WIDTH 32
45180 + #define EE_SPI_HDATA0_LBN 0
45181 + #define EE_SPI_HDATA0_WIDTH 32
45182 +#define EE_BASE_PAGE_REG_KER_OFST 0x130 // Expansion ROM base mirror register
45183 +#define EE_BASE_PAGE_REG_OFST 0x130 // Expansion ROM base mirror register
45184 + #define EE_EXP_ROM_WINDOW_BASE_LBN 16
45185 + #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13
45186 + #define EE_EXPROM_MASK_LBN 0
45187 + #define EE_EXPROM_MASK_WIDTH 13
45188 +#define EE_VPD_CFG0_REG_KER_OFST 0X140 // SPI/VPD configuration register
45189 +#define EE_VPD_CFG0_REG_OFST 0X140 // SPI/VPD configuration register
45190 + #define EE_SF_FASTRD_EN_LBN 127
45191 + #define EE_SF_FASTRD_EN_WIDTH 1
45192 + #define EE_SF_CLOCK_DIV_LBN 120
45193 + #define EE_SF_CLOCK_DIV_WIDTH 7
45194 + #define EE_VPD_WIP_POLL_LBN 119
45195 + #define EE_VPD_WIP_POLL_WIDTH 1
45196 + #define EE_VPDW_LENGTH_LBN 80
45197 + #define EE_VPDW_LENGTH_WIDTH 15
45198 + #define EE_VPDW_BASE_LBN 64
45199 + #define EE_VPDW_BASE_WIDTH 15
45200 + #define EE_VPD_WR_CMD_EN_LBN 56
45201 + #define EE_VPD_WR_CMD_EN_WIDTH 8
45202 + #define EE_VPD_BASE_LBN 32
45203 + #define EE_VPD_BASE_WIDTH 24
45204 + #define EE_VPD_LENGTH_LBN 16
45205 + #define EE_VPD_LENGTH_WIDTH 13
45206 + #define EE_VPD_AD_SIZE_LBN 8
45207 + #define EE_VPD_AD_SIZE_WIDTH 5
45208 + #define EE_VPD_ACCESS_ON_LBN 5
45209 + #define EE_VPD_ACCESS_ON_WIDTH 1
45210 +#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 // VPD access SW control register
45211 +#define EE_VPD_SW_CNTL_REG_OFST 0X150 // VPD access SW control register
45212 + #define EE_VPD_CYCLE_PENDING_LBN 31
45213 + #define EE_VPD_CYCLE_PENDING_WIDTH 1
45214 + #define EE_VPD_CYC_WRITE_LBN 28
45215 + #define EE_VPD_CYC_WRITE_WIDTH 1
45216 + #define EE_VPD_CYC_ADR_LBN 0
45217 + #define EE_VPD_CYC_ADR_WIDTH 15
45218 +#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 // VPD access SW data register
45219 +#define EE_VPD_SW_DATA_REG_OFST 0x160 // VPD access SW data register
45220 + #define EE_VPD_CYC_DAT_LBN 0
45221 + #define EE_VPD_CYC_DAT_WIDTH 32
45222 Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h
45223 ===================================================================
45224 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
45225 +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h 2008-02-20 09:32:49.000000000 +0100
45226 @@ -0,0 +1,43 @@
45227 +//////////////---- Descriptors C Headers ----//////////////
45228 +// Receive Kernel IP Descriptor
45229 + #define RX_KER_BUF_SIZE_LBN 48
45230 + #define RX_KER_BUF_SIZE_WIDTH 14
45231 + #define RX_KER_BUF_REGION_LBN 46
45232 + #define RX_KER_BUF_REGION_WIDTH 2
45233 + #define RX_KER_BUF_REGION0_DECODE 0
45234 + #define RX_KER_BUF_REGION1_DECODE 1
45235 + #define RX_KER_BUF_REGION2_DECODE 2
45236 + #define RX_KER_BUF_REGION3_DECODE 3
45237 + #define RX_KER_BUF_ADR_LBN 0
45238 + #define RX_KER_BUF_ADR_WIDTH 46
45239 +// Receive User IP Descriptor
45240 + #define RX_USR_2BYTE_OFS_LBN 20
45241 + #define RX_USR_2BYTE_OFS_WIDTH 12
45242 + #define RX_USR_BUF_ID_LBN 0
45243 + #define RX_USR_BUF_ID_WIDTH 20
45244 +// Transmit Kernel IP Descriptor
45245 + #define TX_KER_PORT_LBN 63
45246 + #define TX_KER_PORT_WIDTH 1
45247 + #define TX_KER_CONT_LBN 62
45248 + #define TX_KER_CONT_WIDTH 1
45249 + #define TX_KER_BYTE_CNT_LBN 48
45250 + #define TX_KER_BYTE_CNT_WIDTH 14
45251 + #define TX_KER_BUF_REGION_LBN 46
45252 + #define TX_KER_BUF_REGION_WIDTH 2
45253 + #define TX_KER_BUF_REGION0_DECODE 0
45254 + #define TX_KER_BUF_REGION1_DECODE 1
45255 + #define TX_KER_BUF_REGION2_DECODE 2
45256 + #define TX_KER_BUF_REGION3_DECODE 3
45257 + #define TX_KER_BUF_ADR_LBN 0
45258 + #define TX_KER_BUF_ADR_WIDTH 46
45259 +// Transmit User IP Descriptor
45260 + #define TX_USR_PORT_LBN 47
45261 + #define TX_USR_PORT_WIDTH 1
45262 + #define TX_USR_CONT_LBN 46
45263 + #define TX_USR_CONT_WIDTH 1
45264 + #define TX_USR_BYTE_CNT_LBN 33
45265 + #define TX_USR_BYTE_CNT_WIDTH 13
45266 + #define TX_USR_BUF_ID_LBN 13
45267 + #define TX_USR_BUF_ID_WIDTH 20
45268 + #define TX_USR_BYTE_OFS_LBN 0
45269 + #define TX_USR_BYTE_OFS_WIDTH 13
45270 Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_event.h
45271 ===================================================================
45272 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
45273 +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_event.h 2008-02-20 09:32:49.000000000 +0100
45274 @@ -0,0 +1,123 @@
45275 +//////////////---- Events Format C Header ----//////////////
45276 +//////////////---- Event entry ----//////////////
45277 + #define EV_CODE_LBN 60
45278 + #define EV_CODE_WIDTH 4
45279 + #define RX_IP_EV_DECODE 0
45280 + #define TX_IP_EV_DECODE 2
45281 + #define DRIVER_EV_DECODE 5
45282 + #define GLOBAL_EV_DECODE 6
45283 + #define DRV_GEN_EV_DECODE 7
45284 + #define EV_DATA_LBN 0
45285 + #define EV_DATA_WIDTH 60
45286 +//////////////---- Receive IP events for both Kernel & User event queues ----//////////////
45287 + #define RX_EV_PKT_OK_LBN 56
45288 + #define RX_EV_PKT_OK_WIDTH 1
45289 + #define RX_EV_BUF_OWNER_ID_ERR_LBN 54
45290 + #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1
45291 + #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52
45292 + #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1
45293 + #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51
45294 + #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1
45295 + #define RX_EV_ETH_CRC_ERR_LBN 50
45296 + #define RX_EV_ETH_CRC_ERR_WIDTH 1
45297 + #define RX_EV_FRM_TRUNC_LBN 49
45298 + #define RX_EV_FRM_TRUNC_WIDTH 1
45299 + #define RX_EV_DRIB_NIB_LBN 48
45300 + #define RX_EV_DRIB_NIB_WIDTH 1
45301 + #define RX_EV_TOBE_DISC_LBN 47
45302 + #define RX_EV_TOBE_DISC_WIDTH 1
45303 + #define RX_EV_PKT_TYPE_LBN 44
45304 + #define RX_EV_PKT_TYPE_WIDTH 3
45305 + #define RX_EV_PKT_TYPE_ETH_DECODE 0
45306 + #define RX_EV_PKT_TYPE_LLC_DECODE 1
45307 + #define RX_EV_PKT_TYPE_JUMBO_DECODE 2
45308 + #define RX_EV_PKT_TYPE_VLAN_DECODE 3
45309 + #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4
45310 + #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5
45311 + #define RX_EV_HDR_TYPE_LBN 42
45312 + #define RX_EV_HDR_TYPE_WIDTH 2
45313 + #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0
45314 + #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1
45315 + #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2
45316 + #define RX_EV_HDR_TYPE_NON_IP_DECODE 3
45317 + #define RX_EV_DESC_Q_EMPTY_LBN 41
45318 + #define RX_EV_DESC_Q_EMPTY_WIDTH 1
45319 + #define RX_EV_MCAST_HASH_MATCH_LBN 40
45320 + #define RX_EV_MCAST_HASH_MATCH_WIDTH 1
45321 + #define RX_EV_MCAST_PKT_LBN 39
45322 + #define RX_EV_MCAST_PKT_WIDTH 1
45323 + #define RX_EV_Q_LABEL_LBN 32
45324 + #define RX_EV_Q_LABEL_WIDTH 5
45325 + #define RX_JUMBO_CONT_LBN 31
45326 + #define RX_JUMBO_CONT_WIDTH 1
45327 + #define RX_SOP_LBN 15
45328 + #define RX_SOP_WIDTH 1
45329 + #define RX_PORT_LBN 30
45330 + #define RX_PORT_WIDTH 1
45331 + #define RX_EV_BYTE_CNT_LBN 16
45332 + #define RX_EV_BYTE_CNT_WIDTH 14
45333 + #define RX_iSCSI_PKT_OK_LBN 14
45334 + #define RX_iSCSI_PKT_OK_WIDTH 1
45335 + #define RX_ISCSI_DDIG_ERR_LBN 13
45336 + #define RX_ISCSI_DDIG_ERR_WIDTH 1
45337 + #define RX_ISCSI_HDIG_ERR_LBN 12
45338 + #define RX_ISCSI_HDIG_ERR_WIDTH 1
45339 + #define RX_EV_DESC_PTR_LBN 0
45340 + #define RX_EV_DESC_PTR_WIDTH 12
45341 +//////////////---- Transmit IP events for both Kernel & User event queues ----//////////////
45342 + #define TX_EV_PKT_ERR_LBN 38
45343 + #define TX_EV_PKT_ERR_WIDTH 1
45344 + #define TX_EV_PKT_TOO_BIG_LBN 37
45345 + #define TX_EV_PKT_TOO_BIG_WIDTH 1
45346 + #define TX_EV_Q_LABEL_LBN 32
45347 + #define TX_EV_Q_LABEL_WIDTH 5
45348 + #define TX_EV_PORT_LBN 16
45349 + #define TX_EV_PORT_WIDTH 1
45350 + #define TX_EV_WQ_FF_FULL_LBN 15
45351 + #define TX_EV_WQ_FF_FULL_WIDTH 1
45352 + #define TX_EV_BUF_OWNER_ID_ERR_LBN 14
45353 + #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1
45354 + #define TX_EV_COMP_LBN 12
45355 + #define TX_EV_COMP_WIDTH 1
45356 + #define TX_EV_DESC_PTR_LBN 0
45357 + #define TX_EV_DESC_PTR_WIDTH 12
45358 +//////////////---- Char or Kernel driver events ----//////////////
45359 + #define DRIVER_EV_SUB_CODE_LBN 56
45360 + #define DRIVER_EV_SUB_CODE_WIDTH 4
45361 + #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0
45362 + #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1
45363 + #define EVQ_INIT_DONE_EV_DECODE 0x2
45364 + #define EVQ_NOT_EN_EV_DECODE 0x3
45365 + #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4
45366 + #define SRM_UPD_DONE_EV_DECODE 0x5
45367 + #define WAKE_UP_EV_DECODE 0x6
45368 + #define TX_PKT_NON_TCP_UDP_DECODE 0x9
45369 + #define TIMER_EV_DECODE 0xA
45370 + #define RX_DSC_ERROR_EV_DECODE 0xE
45371 + #define DRIVER_EV_TX_DESCQ_ID_LBN 0
45372 + #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12
45373 + #define DRIVER_EV_RX_DESCQ_ID_LBN 0
45374 + #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12
45375 + #define DRIVER_EV_EVQ_ID_LBN 0
45376 + #define DRIVER_EV_EVQ_ID_WIDTH 12
45377 + #define DRIVER_TMR_ID_LBN 0
45378 + #define DRIVER_TMR_ID_WIDTH 12
45379 + #define DRIVER_EV_SRM_UPD_LBN 0
45380 + #define DRIVER_EV_SRM_UPD_WIDTH 2
45381 + #define SRM_CLR_EV_DECODE 0
45382 + #define SRM_UPD_EV_DECODE 1
45383 + #define SRM_ILLCLR_EV_DECODE 2
45384 +//////////////---- Global events. Sent to both event queue 0 and 4. ----//////////////
45385 + #define XFP_PHY_INTR_LBN 10
45386 + #define XFP_PHY_INTR_WIDTH 1
45387 + #define XG_PHY_INTR_LBN 9
45388 + #define XG_PHY_INTR_WIDTH 1
45389 + #define G_PHY1_INTR_LBN 8
45390 + #define G_PHY1_INTR_WIDTH 1
45391 + #define G_PHY0_INTR_LBN 7
45392 + #define G_PHY0_INTR_WIDTH 1
45393 +//////////////---- Driver generated events ----//////////////
45394 + #define DRV_GEN_EV_CODE_LBN 60
45395 + #define DRV_GEN_EV_CODE_WIDTH 4
45396 + #define DRV_GEN_EV_DATA_LBN 0
45397 + #define DRV_GEN_EV_DATA_WIDTH 60
45398 Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_internal.h
45399 ===================================================================
45400 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
45401 +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_internal.h 2008-02-20 09:32:49.000000000 +0100
45402 @@ -0,0 +1,256 @@
45403 +/****************************************************************************
45404 + * Copyright 2002-2005: Level 5 Networks Inc.
45405 + * Copyright 2005-2008: Solarflare Communications Inc,
45406 + * 9501 Jeronimo Road, Suite 250,
45407 + * Irvine, CA 92618, USA
45408 + *
45409 + * Maintained by Solarflare Communications
45410 + * <linux-xen-drivers@solarflare.com>
45411 + * <onload-dev@solarflare.com>
45412 + *
45413 + * This program is free software; you can redistribute it and/or modify it
45414 + * under the terms of the GNU General Public License version 2 as published
45415 + * by the Free Software Foundation, incorporated herein by reference.
45416 + *
45417 + * This program is distributed in the hope that it will be useful,
45418 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
45419 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45420 + * GNU General Public License for more details.
45421 + *
45422 + * You should have received a copy of the GNU General Public License
45423 + * along with this program; if not, write to the Free Software
45424 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
45425 + ****************************************************************************
45426 + */
45427 +
45428 +/*
45429 + * \author djr
45430 + * \brief Really-and-truely-honestly internal stuff for libef.
45431 + * \date 2004/06/13
45432 + */
45433 +
45434 +/*! \cidoxg_include_ci_ul */
45435 +#ifndef __CI_EF_VI_INTERNAL_H__
45436 +#define __CI_EF_VI_INTERNAL_H__
45437 +
45438 +
45439 +/* These flags share space with enum ef_vi_flags. */
45440 +#define EF_VI_BUG5692_WORKAROUND 0x10000
45441 +
45442 +
45443 +/* ***********************************************************************
45444 + * COMPILATION CONTROL FLAGS (see ef_vi.h for "workaround" controls)
45445 + */
45446 +
45447 +#define EF_VI_DO_MAGIC_CHECKS 1
45448 +
45449 +
45450 +/**********************************************************************
45451 + * Headers
45452 + */
45453 +
45454 +#include <etherfabric/ef_vi.h>
45455 +#include "sysdep.h"
45456 +#include "ef_vi_falcon.h"
45457 +
45458 +
45459 +/**********************************************************************
45460 + * Debugging.
45461 + */
45462 +
45463 +#ifndef NDEBUG
45464 +
45465 +# define _ef_assert(exp, file, line) BUG_ON(!(exp));
45466 +
45467 +# define _ef_assert2(exp, x, y, file, line) do { \
45468 + if (unlikely(!(exp))) \
45469 + BUG(); \
45470 + } while (0)
45471 +
45472 +#else
45473 +
45474 +# define _ef_assert(exp, file, line)
45475 +# define _ef_assert2(e, x, y, file, line)
45476 +
45477 +#endif
45478 +
45479 +#define ef_assert(a) do{ _ef_assert((a),__FILE__,__LINE__); } while(0)
45480 +#define ef_assert_equal(a,b) _ef_assert2((a)==(b),(a),(b),__FILE__,__LINE__)
45481 +#define ef_assert_eq ef_assert_equal
45482 +#define ef_assert_lt(a,b) _ef_assert2((a)<(b),(a),(b),__FILE__,__LINE__)
45483 +#define ef_assert_le(a,b) _ef_assert2((a)<=(b),(a),(b),__FILE__,__LINE__)
45484 +#define ef_assert_nequal(a,b) _ef_assert2((a)!=(b),(a),(b),__FILE__,__LINE__)
45485 +#define ef_assert_ne ef_assert_nequal
45486 +#define ef_assert_ge(a,b) _ef_assert2((a)>=(b),(a),(b),__FILE__,__LINE__)
45487 +#define ef_assert_gt(a,b) _ef_assert2((a)>(b),(a),(b),__FILE__,__LINE__)
45488 +
45489 +/**********************************************************************
45490 + * Debug checks. ******************************************************
45491 + **********************************************************************/
45492 +
45493 +#ifdef NDEBUG
45494 +# define EF_VI_MAGIC_SET(p, type)
45495 +# define EF_VI_CHECK_VI(p)
45496 +# define EF_VI_CHECK_EVENT_Q(p)
45497 +# define EF_VI_CHECK_IOBUFSET(p)
45498 +# define EF_VI_CHECK_FILTER(p)
45499 +# define EF_VI_CHECK_SHMBUF(p)
45500 +# define EF_VI_CHECK_PT_EP(p)
45501 +#else
45502 +# define EF_VI 0x3
45503 +# define EF_EPLOCK 0x6
45504 +# define EF_IOBUFSET 0x9
45505 +# define EF_FILTER 0xa
45506 +# define EF_SHMBUF 0x11
45507 +
45508 +# define EF_VI_MAGIC(p, type) \
45509 + (((unsigned)(type) << 28) | \
45510 + (((unsigned)(intptr_t)(p)) & 0x0fffffffu))
45511 +
45512 +# if !EF_VI_DO_MAGIC_CHECKS
45513 +# define EF_VI_MAGIC_SET(p, type)
45514 +# define EF_VI_MAGIC_CHECK(p, type)
45515 +# else
45516 +# define EF_VI_MAGIC_SET(p, type) \
45517 + do { \
45518 + (p)->magic = EF_VI_MAGIC((p), (type)); \
45519 + } while (0)
45520 +
45521 +# define EF_VI_MAGIC_OKAY(p, type) \
45522 + ((p)->magic == EF_VI_MAGIC((p), (type)))
45523 +
45524 +# define EF_VI_MAGIC_CHECK(p, type) \
45525 + ef_assert(EF_VI_MAGIC_OKAY((p), (type)))
45526 +
45527 +#endif /* EF_VI_DO_MAGIC_CHECKS */
45528 +
45529 +# define EF_VI_CHECK_VI(p) \
45530 + ef_assert(p); \
45531 + EF_VI_MAGIC_CHECK((p), EF_VI);
45532 +
45533 +# define EF_VI_CHECK_EVENT_Q(p) \
45534 + ef_assert(p); \
45535 + EF_VI_MAGIC_CHECK((p), EF_VI); \
45536 + ef_assert((p)->evq_base); \
45537 + ef_assert((p)->evq_mask);
45538 +
45539 +# define EF_VI_CHECK_PT_EP(p) \
45540 + ef_assert(p); \
45541 + EF_VI_MAGIC_CHECK((p), EF_VI); \
45542 + ef_assert((p)->ep_state);
45543 +
45544 +# define EF_VI_CHECK_IOBUFSET(p) \
45545 + ef_assert(p); \
45546 + EF_VI_MAGIC_CHECK((p), EF_IOBUFSET)
45547 +
45548 +# define EF_VI_CHECK_FILTER(p) \
45549 + ef_assert(p); \
45550 + EF_VI_MAGIC_CHECK((p), EF_FILTER);
45551 +
45552 +# define EF_VI_CHECK_SHMBUF(p) \
45553 + ef_assert(p); \
45554 + EF_VI_MAGIC_CHECK((p), EF_SHMBUF);
45555 +
45556 +#endif
45557 +
45558 +#ifndef NDEBUG
45559 +# define EF_DRIVER_MAGIC 0x00f00ba4
45560 +# define EF_ASSERT_THIS_DRIVER_VALID(driver) \
45561 + do{ ef_assert(driver); \
45562 + EF_VI_MAGIC_CHECK((driver), EF_DRIVER_MAGIC); \
45563 + ef_assert((driver)->init); }while(0)
45564 +
45565 +# define EF_ASSERT_DRIVER_VALID() EF_ASSERT_THIS_DRIVER_VALID(&ci_driver)
45566 +#else
45567 +# define EF_ASSERT_THIS_DRIVER_VALID(driver)
45568 +# define EF_ASSERT_DRIVER_VALID()
45569 +#endif
45570 +
45571 +
45572 +/* *************************************
45573 + * Power of 2 FIFO
45574 + */
45575 +
45576 +#define EF_VI_FIFO2_M(f, x) ((x) & ((f)->fifo_mask))
45577 +#define ef_vi_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \
45578 + (f)->fifo_rd_i <= (f)->fifo_mask && \
45579 + (f)->fifo_wr_i <= (f)->fifo_mask && \
45580 + EF_VI_IS_POW2((f)->fifo_mask+1u))
45581 +
45582 +#define ef_vi_fifo2_init(f, cap) \
45583 + do{ ef_assert(EF_VI_IS_POW2((cap) + 1)); \
45584 + (f)->fifo_rd_i = (f)->fifo_wr_i = 0u; \
45585 + (f)->fifo_mask = (cap); \
45586 + }while(0)
45587 +
45588 +#define ef_vi_fifo2_is_empty(f) ((f)->fifo_rd_i == (f)->fifo_wr_i)
45589 +#define ef_vi_fifo2_capacity(f) ((f)->fifo_mask)
45590 +#define ef_vi_fifo2_buf_size(f) ((f)->fifo_mask + 1u)
45591 +#define ef_vi_fifo2_end(f) ((f)->fifo + ef_vi_fifo2_buf_size(f))
45592 +#define ef_vi_fifo2_peek(f) ((f)->fifo[(f)->fifo_rd_i])
45593 +#define ef_vi_fifo2_poke(f) ((f)->fifo[(f)->fifo_wr_i])
45594 +#define ef_vi_fifo2_num(f) EF_VI_FIFO2_M((f),(f)->fifo_wr_i-(f)->fifo_rd_i)
45595 +
45596 +#define ef_vi_fifo2_wr_prev(f) \
45597 + do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i - 1u); }while(0)
45598 +#define ef_vi_fifo2_wr_next(f) \
45599 + do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i + 1u); }while(0)
45600 +#define ef_vi_fifo2_rd_adv(f, n) \
45601 + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + (n)); }while(0)
45602 +#define ef_vi_fifo2_rd_prev(f) \
45603 + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i - 1u); }while(0)
45604 +#define ef_vi_fifo2_rd_next(f) \
45605 + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + 1u); }while(0)
45606 +
45607 +#define ef_vi_fifo2_put(f, v) \
45608 + do{ ef_vi_fifo2_poke(f) = (v); ef_vi_fifo2_wr_next(f); }while(0)
45609 +#define ef_vi_fifo2_get(f, pv) \
45610 + do{ *(pv) = ef_vi_fifo2_peek(f); ef_vi_fifo2_rd_next(f); }while(0)
45611 +
45612 +
45613 +/* *********************************************************************
45614 + * Eventq handling
45615 + */
45616 +
45617 +typedef union {
45618 + uint64_t u64;
45619 + struct {
45620 + uint32_t a;
45621 + uint32_t b;
45622 + } opaque;
45623 +} ef_vi_event;
45624 +
45625 +
45626 +#define EF_VI_EVENT_OFFSET(q, i) \
45627 + (((q)->evq_state->evq_ptr - (i) * sizeof(ef_vi_event)) & (q)->evq_mask)
45628 +
45629 +#define EF_VI_EVENT_PTR(q, i) \
45630 + ((ef_vi_event*) ((q)->evq_base + EF_VI_EVENT_OFFSET((q), (i))))
45631 +
45632 +/* *********************************************************************
45633 + * Miscellaneous goodies
45634 + */
45635 +#ifdef NDEBUG
45636 +# define EF_VI_DEBUG(x)
45637 +#else
45638 +# define EF_VI_DEBUG(x) x
45639 +#endif
45640 +
45641 +#define EF_VI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u))
45642 +#define EF_VI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u))
45643 +#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align)-1u))
45644 +#define EF_VI_PTR_ALIGN_BACK(p, align) \
45645 + ((char*)EF_VI_ALIGN_BACK(((intptr_t)(p)), ((intptr_t)(align))))
45646 +#define EF_VI_IS_POW2(x) ((x) && ! ((x) & ((x) - 1)))
45647 +
45648 +
45649 +/* ********************************************************************
45650 + */
45651 +
45652 +extern void falcon_vi_init(ef_vi*, void* vvis ) EF_VI_HF;
45653 +extern void ef_eventq_state_init(ef_vi* evq) EF_VI_HF;
45654 +extern void __ef_init(void) EF_VI_HF;
45655 +
45656 +
45657 +#endif /* __CI_EF_VI_INTERNAL_H__ */
45658 +
45659 Index: head-2008-11-25/drivers/xen/sfc_netfront/etherfabric/ef_vi.h
45660 ===================================================================
45661 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
45662 +++ head-2008-11-25/drivers/xen/sfc_netfront/etherfabric/ef_vi.h 2008-02-20 09:32:49.000000000 +0100
45663 @@ -0,0 +1,665 @@
45664 +/****************************************************************************
45665 + * Copyright 2002-2005: Level 5 Networks Inc.
45666 + * Copyright 2005-2008: Solarflare Communications Inc,
45667 + * 9501 Jeronimo Road, Suite 250,
45668 + * Irvine, CA 92618, USA
45669 + *
45670 + * Maintained by Solarflare Communications
45671 + * <linux-xen-drivers@solarflare.com>
45672 + * <onload-dev@solarflare.com>
45673 + *
45674 + * This program is free software; you can redistribute it and/or modify it
45675 + * under the terms of the GNU General Public License version 2 as published
45676 + * by the Free Software Foundation, incorporated herein by reference.
45677 + *
45678 + * This program is distributed in the hope that it will be useful,
45679 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
45680 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45681 + * GNU General Public License for more details.
45682 + *
45683 + * You should have received a copy of the GNU General Public License
45684 + * along with this program; if not, write to the Free Software
45685 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
45686 + ****************************************************************************
45687 + */
45688 +
45689 +/*
45690 + * \brief Virtual Interface
45691 + * \date 2007/05/16
45692 + */
45693 +
45694 +#ifndef __EFAB_EF_VI_H__
45695 +#define __EFAB_EF_VI_H__
45696 +
45697 +
45698 +/**********************************************************************
45699 + * Primitive types ****************************************************
45700 + **********************************************************************/
45701 +
45702 +/* We standardise on the types from stdint.h and synthesise these types
45703 + * for compilers/platforms that don't provide them */
45704 +
45705 +# include <linux/types.h>
45706 +# define EF_VI_ALIGN(x) __attribute__ ((aligned (x)))
45707 +# define ef_vi_inline static inline
45708 +
45709 +
45710 +
45711 +/**********************************************************************
45712 + * Types **************************************************************
45713 + **********************************************************************/
45714 +
45715 +typedef uint32_t ef_eventq_ptr;
45716 +
45717 +typedef uint64_t ef_addr;
45718 +typedef char* ef_vi_ioaddr_t;
45719 +
45720 +/**********************************************************************
45721 + * ef_event ***********************************************************
45722 + **********************************************************************/
45723 +
45724 +/*! \i_ef_vi A DMA request identifier.
45725 +**
45726 +** This is an integer token specified by the transport and associated
45727 +** with a DMA request. It is returned to the VI user with DMA completion
45728 +** events. It is typically used to identify the buffer associated with
45729 +** the transfer.
45730 +*/
45731 +typedef int ef_request_id;
45732 +
45733 +typedef union {
45734 + uint64_t u64[1];
45735 + uint32_t u32[2];
45736 +} ef_vi_qword;
45737 +
45738 +typedef ef_vi_qword ef_hw_event;
45739 +
45740 +#define EF_REQUEST_ID_BITS 16u
45741 +#define EF_REQUEST_ID_MASK ((1u << EF_REQUEST_ID_BITS) - 1u)
45742 +
45743 +/*! \i_ef_event An [ef_event] is a token that identifies something that
45744 +** has happened. Examples include packets received, packets transmitted
45745 +** and errors.
45746 +*/
45747 +typedef union {
45748 + struct {
45749 + ef_hw_event ev;
45750 + unsigned type :16;
45751 + } generic;
45752 + struct {
45753 + ef_hw_event ev;
45754 + unsigned type :16;
45755 + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45756 + unsigned q_id :16;
45757 + unsigned len :16;
45758 + unsigned flags :16;
45759 + } rx;
45760 + struct { /* This *must* have same layout as [rx]. */
45761 + ef_hw_event ev;
45762 + unsigned type :16;
45763 + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45764 + unsigned q_id :16;
45765 + unsigned len :16;
45766 + unsigned flags :16;
45767 + unsigned subtype :16;
45768 + } rx_discard;
45769 + struct {
45770 + ef_hw_event ev;
45771 + unsigned type :16;
45772 + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45773 + unsigned q_id :16;
45774 + } tx;
45775 + struct {
45776 + ef_hw_event ev;
45777 + unsigned type :16;
45778 + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
45779 + unsigned q_id :16;
45780 + unsigned subtype :16;
45781 + } tx_error;
45782 + struct {
45783 + ef_hw_event ev;
45784 + unsigned type :16;
45785 + unsigned q_id :16;
45786 + } rx_no_desc_trunc;
45787 + struct {
45788 + ef_hw_event ev;
45789 + unsigned type :16;
45790 + unsigned data;
45791 + } sw;
45792 +} ef_event;
45793 +
45794 +
45795 +#define EF_EVENT_TYPE(e) ((e).generic.type)
45796 +enum {
45797 + /** Good data was received. */
45798 + EF_EVENT_TYPE_RX,
45799 + /** Packets have been sent. */
45800 + EF_EVENT_TYPE_TX,
45801 + /** Data received and buffer consumed, but something is wrong. */
45802 + EF_EVENT_TYPE_RX_DISCARD,
45803 + /** Transmit of packet failed. */
45804 + EF_EVENT_TYPE_TX_ERROR,
45805 + /** Received packet was truncated due to lack of descriptors. */
45806 + EF_EVENT_TYPE_RX_NO_DESC_TRUNC,
45807 + /** Software generated event. */
45808 + EF_EVENT_TYPE_SW,
45809 + /** Event queue overflow. */
45810 + EF_EVENT_TYPE_OFLOW,
45811 +};
45812 +
45813 +#define EF_EVENT_RX_BYTES(e) ((e).rx.len)
45814 +#define EF_EVENT_RX_Q_ID(e) ((e).rx.q_id)
45815 +#define EF_EVENT_RX_CONT(e) ((e).rx.flags & EF_EVENT_FLAG_CONT)
45816 +#define EF_EVENT_RX_SOP(e) ((e).rx.flags & EF_EVENT_FLAG_SOP)
45817 +#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK)
45818 +#define EF_EVENT_FLAG_SOP 0x1
45819 +#define EF_EVENT_FLAG_CONT 0x2
45820 +#define EF_EVENT_FLAG_ISCSI_OK 0x4
45821 +
45822 +#define EF_EVENT_TX_Q_ID(e) ((e).tx.q_id)
45823 +
45824 +#define EF_EVENT_RX_DISCARD_Q_ID(e) ((e).rx_discard.q_id)
45825 +#define EF_EVENT_RX_DISCARD_LEN(e) ((e).rx_discard.len)
45826 +#define EF_EVENT_RX_DISCARD_TYPE(e) ((e).rx_discard.subtype)
45827 +enum {
45828 + EF_EVENT_RX_DISCARD_CSUM_BAD,
45829 + EF_EVENT_RX_DISCARD_CRC_BAD,
45830 + EF_EVENT_RX_DISCARD_TRUNC,
45831 + EF_EVENT_RX_DISCARD_RIGHTS,
45832 + EF_EVENT_RX_DISCARD_OTHER,
45833 +};
45834 +
45835 +#define EF_EVENT_TX_ERROR_Q_ID(e) ((e).tx_error.q_id)
45836 +#define EF_EVENT_TX_ERROR_TYPE(e) ((e).tx_error.subtype)
45837 +enum {
45838 + EF_EVENT_TX_ERROR_RIGHTS,
45839 + EF_EVENT_TX_ERROR_OFLOW,
45840 + EF_EVENT_TX_ERROR_2BIG,
45841 + EF_EVENT_TX_ERROR_BUS,
45842 +};
45843 +
45844 +#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e) ((e).rx_no_desc_trunc.q_id)
45845 +
45846 +#define EF_EVENT_SW_DATA_MASK 0xffff
45847 +#define EF_EVENT_SW_DATA(e) ((e).sw.data)
45848 +
45849 +#define EF_EVENT_FMT "[ev:%x:%08x:%08x]"
45850 +#define EF_EVENT_PRI_ARG(e) (unsigned) (e).generic.type, \
45851 + (unsigned) (e).generic.ev.u32[1], \
45852 + (unsigned) (e).generic.ev.u32[0]
45853 +
45854 +#define EF_GET_HW_EV(e) ((e).generic.ev)
45855 +#define EF_GET_HW_EV_PTR(e) (&(e).generic.ev)
45856 +#define EF_GET_HW_EV_U64(e) ((e).generic.ev.u64[0])
45857 +
45858 +
45859 +/* ***************** */
45860 +
45861 +/*! Used by netif shared state. Must use types of explicit size. */
45862 +typedef struct {
45863 + uint16_t rx_last_desc_ptr; /* for RX duplicates */
45864 + uint8_t bad_sop; /* bad SOP detected */
45865 + uint8_t frag_num; /* next fragment #, 0=>SOP */
45866 +} ef_rx_dup_state_t;
45867 +
45868 +
45869 +/* Max number of ports on any SF NIC. */
45870 +#define EFAB_DMAQS_PER_EVQ_MAX 32
45871 +
45872 +typedef struct {
45873 + ef_eventq_ptr evq_ptr;
45874 + int32_t trashed;
45875 + ef_rx_dup_state_t rx_dup_state[EFAB_DMAQS_PER_EVQ_MAX];
45876 +} ef_eventq_state;
45877 +
45878 +
45879 +/*! \i_ef_base [ef_iovec] is similar the standard [struct iovec]. An
45880 +** array of these is used to designate a scatter/gather list of I/O
45881 +** buffers.
45882 +*/
45883 +typedef struct {
45884 + ef_addr iov_base EF_VI_ALIGN(8);
45885 + unsigned iov_len;
45886 +} ef_iovec;
45887 +
45888 +/* Falcon constants */
45889 +#define TX_EV_DESC_PTR_LBN 0
45890 +
45891 +/**********************************************************************
45892 + * ef_iobufset ********************************************************
45893 + **********************************************************************/
45894 +
45895 +/*! \i_ef_bufs An [ef_iobufset] is a collection of buffers to be used
45896 +** with the NIC.
45897 +*/
45898 +typedef struct ef_iobufset {
45899 + unsigned magic;
45900 + unsigned bufs_mmap_bytes;
45901 + unsigned bufs_handle;
45902 + int bufs_ptr_off;
45903 + ef_addr bufs_addr;
45904 + unsigned bufs_size; /* size rounded to pow2 */
45905 + int bufs_num;
45906 + int faultonaccess;
45907 +} ef_iobufset;
45908 +
45909 +
45910 +/**********************************************************************
45911 + * ef_vi **************************************************************
45912 + **********************************************************************/
45913 +
45914 +enum ef_vi_flags {
45915 + EF_VI_RX_SCATTER = 0x1,
45916 + EF_VI_ISCSI_RX_HDIG = 0x2,
45917 + EF_VI_ISCSI_TX_HDIG = 0x4,
45918 + EF_VI_ISCSI_RX_DDIG = 0x8,
45919 + EF_VI_ISCSI_TX_DDIG = 0x10,
45920 + EF_VI_TX_PHYS_ADDR = 0x20,
45921 + EF_VI_RX_PHYS_ADDR = 0x40,
45922 + EF_VI_TX_IP_CSUM_DIS = 0x80,
45923 + EF_VI_TX_TCPUDP_CSUM_DIS= 0x100,
45924 + EF_VI_TX_TCPUDP_ONLY = 0x200,
45925 + /* Flags in range 0xXXXX0000 are for internal use. */
45926 +};
45927 +
45928 +typedef struct {
45929 + uint32_t added;
45930 + uint32_t removed;
45931 +} ef_vi_txq_state;
45932 +
45933 +typedef struct {
45934 + uint32_t added;
45935 + uint32_t removed;
45936 +} ef_vi_rxq_state;
45937 +
45938 +typedef struct {
45939 + uint32_t mask;
45940 + void* doorbell;
45941 + void* descriptors;
45942 + uint16_t* ids;
45943 + unsigned misalign_mask;
45944 +} ef_vi_txq;
45945 +
45946 +typedef struct {
45947 + uint32_t mask;
45948 + void* doorbell;
45949 + void* descriptors;
45950 + uint16_t* ids;
45951 +} ef_vi_rxq;
45952 +
45953 +typedef struct {
45954 + ef_eventq_state evq;
45955 + ef_vi_txq_state txq;
45956 + ef_vi_rxq_state rxq;
45957 + /* Followed by request id fifos. */
45958 +} ef_vi_state;
45959 +
45960 +/*! \i_ef_vi A virtual interface.
45961 +**
45962 +** An [ef_vi] represents a virtual interface on a specific NIC. A
45963 +** virtual interface is a collection of an event queue and two DMA queues
45964 +** used to pass Ethernet frames between the transport implementation and
45965 +** the network.
45966 +*/
45967 +typedef struct ef_vi {
45968 + unsigned magic;
45969 +
45970 + unsigned vi_resource_id;
45971 + unsigned vi_resource_handle_hack;
45972 + unsigned vi_i;
45973 +
45974 + char* vi_mem_mmap_ptr;
45975 + int vi_mem_mmap_bytes;
45976 + char* vi_io_mmap_ptr;
45977 + int vi_io_mmap_bytes;
45978 +
45979 + ef_eventq_state* evq_state;
45980 + char* evq_base;
45981 + unsigned evq_mask;
45982 + ef_vi_ioaddr_t evq_timer_reg;
45983 +
45984 + ef_vi_txq vi_txq;
45985 + ef_vi_rxq vi_rxq;
45986 + ef_vi_state* ep_state;
45987 + enum ef_vi_flags vi_flags;
45988 +} ef_vi;
45989 +
45990 +
45991 +enum ef_vi_arch {
45992 + EF_VI_ARCH_FALCON,
45993 +};
45994 +
45995 +
45996 +struct ef_vi_nic_type {
45997 + unsigned char arch;
45998 + char variant;
45999 + unsigned char revision;
46000 +};
46001 +
46002 +
46003 +/* This structure is opaque to the client & used to pass mapping data
46004 + * from the resource manager to the ef_vi lib. for ef_vi_init().
46005 + */
46006 +struct vi_mappings {
46007 + uint32_t signature;
46008 +# define VI_MAPPING_VERSION 0x02 /*Byte: Increment me if struct altered*/
46009 +# define VI_MAPPING_SIGNATURE (0xBA1150 + VI_MAPPING_VERSION)
46010 +
46011 + struct ef_vi_nic_type nic_type;
46012 +
46013 + int vi_instance;
46014 +
46015 + unsigned evq_bytes;
46016 + char* evq_base;
46017 + ef_vi_ioaddr_t evq_timer_reg;
46018 +
46019 + unsigned rx_queue_capacity;
46020 + ef_vi_ioaddr_t rx_dma_ef1;
46021 + char* rx_dma_falcon;
46022 + ef_vi_ioaddr_t rx_bell;
46023 +
46024 + unsigned tx_queue_capacity;
46025 + ef_vi_ioaddr_t tx_dma_ef1;
46026 + char* tx_dma_falcon;
46027 + ef_vi_ioaddr_t tx_bell;
46028 +};
46029 +/* This is used by clients to allocate a suitably sized buffer for the
46030 + * resource manager to fill & ef_vi_init() to use. */
46031 +#define VI_MAPPINGS_SIZE (sizeof(struct vi_mappings))
46032 +
46033 +
46034 +/**********************************************************************
46035 + * ef_config **********************************************************
46036 + **********************************************************************/
46037 +
46038 +struct ef_config_t {
46039 + int log; /* debug logging level */
46040 +};
46041 +
46042 +extern struct ef_config_t ef_config;
46043 +
46044 +
46045 +/**********************************************************************
46046 + * ef_vi **************************************************************
46047 + **********************************************************************/
46048 +
46049 +/* Initialise [data_area] with information required to initialise an ef_vi.
46050 + * In the following, an unused param should be set to NULL. Note the case
46051 + * marked (*) of [iobuf_mmap] for falcon/driver; for normal driver this
46052 + * must be NULL.
46053 + *
46054 + * \param data_area [in,out] required, must ref at least VI_MAPPINGS_SIZE
46055 + * bytes
46056 + * \param evq_capacity [in] number of events in event queue. Specify 0 for
46057 + * no event queue.
46058 + * \param rxq_capacity [in] number of descriptors in RX DMA queue. Specify
46059 + * 0 for no RX queue.
46060 + * \param txq_capacity [in] number of descriptors in TX DMA queue. Specify
46061 + * 0 for no TX queue.
46062 + * \param mmap_info [in] mem-map info for resource
46063 + * \param io_mmap [in] ef1, required
46064 + * falcon, required
46065 + * \param iobuf_mmap [in] ef1, UL: unused
46066 + * falcon, UL: required
46067 + */
46068 +extern void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type,
46069 + unsigned rxq_capacity,
46070 + unsigned txq_capacity, int instance,
46071 + void* io_mmap, void* iobuf_mmap_rx,
46072 + void* iobuf_mmap_tx, enum ef_vi_flags);
46073 +
46074 +
46075 +extern void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type,
46076 + int instance, unsigned evq_bytes,
46077 + void* base, void* timer_reg);
46078 +
46079 +ef_vi_inline unsigned ef_vi_resource_id(ef_vi* vi)
46080 +{
46081 + return vi->vi_resource_id;
46082 +}
46083 +
46084 +ef_vi_inline enum ef_vi_flags ef_vi_flags(ef_vi* vi)
46085 +{
46086 + return vi->vi_flags;
46087 +}
46088 +
46089 +
46090 +/**********************************************************************
46091 + * Receive interface **************************************************
46092 + **********************************************************************/
46093 +
46094 +/*! \i_ef_vi Returns the amount of space in the RX descriptor ring.
46095 +**
46096 +** \return the amount of space in the queue.
46097 +*/
46098 +ef_vi_inline int ef_vi_receive_space(ef_vi* vi)
46099 +{
46100 + ef_vi_rxq_state* qs = &vi->ep_state->rxq;
46101 + return vi->vi_rxq.mask - (qs->added - qs->removed);
46102 +}
46103 +
46104 +
46105 +/*! \i_ef_vi Returns the fill level of the RX descriptor ring.
46106 +**
46107 +** \return the fill level of the queue.
46108 +*/
46109 +ef_vi_inline int ef_vi_receive_fill_level(ef_vi* vi)
46110 +{
46111 + ef_vi_rxq_state* qs = &vi->ep_state->rxq;
46112 + return qs->added - qs->removed;
46113 +}
46114 +
46115 +
46116 +ef_vi_inline int ef_vi_receive_capacity(ef_vi* vi)
46117 +{
46118 + return vi->vi_rxq.mask;
46119 +}
46120 +
46121 +/*! \i_ef_vi Complete a receive operation.
46122 +**
46123 +** When a receive completion event is received, it should be passed to
46124 +** this function. The request-id for the buffer that the packet was
46125 +** delivered to is returned.
46126 +**
46127 +** After this function returns, more space may be available in the
46128 +** receive queue.
46129 +*/
46130 +extern ef_request_id ef_vi_receive_done(const ef_vi*, const ef_event*);
46131 +
46132 +/*! \i_ef_vi Return request ID indicated by a receive event
46133 + */
46134 +ef_vi_inline ef_request_id ef_vi_receive_request_id(const ef_vi* vi,
46135 + const ef_event* ef_ev)
46136 +{
46137 + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
46138 + return ev->u32[0] & vi->vi_rxq.mask;
46139 +}
46140 +
46141 +
46142 +/*! \i_ef_vi Form a receive descriptor.
46143 +**
46144 +** If \c initial_rx_bytes is zero use a reception size at least as large
46145 +** as an MTU.
46146 +*/
46147 +extern int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
46148 + int intial_rx_bytes);
46149 +
46150 +/*! \i_ef_vi Submit initialised receive descriptors to the NIC. */
46151 +extern void ef_vi_receive_push(ef_vi* vi);
46152 +
46153 +/*! \i_ef_vi Post a buffer on the receive queue.
46154 +**
46155 +** \return 0 on success, or -EAGAIN if the receive queue is full
46156 +*/
46157 +extern int ef_vi_receive_post(ef_vi*, ef_addr addr,
46158 + ef_request_id dma_id);
46159 +
46160 +/**********************************************************************
46161 + * Transmit interface *************************************************
46162 + **********************************************************************/
46163 +
46164 +/*! \i_ef_vi Return the amount of space (in descriptors) in the transmit
46165 +** queue.
46166 +**
46167 +** \return the amount of space in the queue (in descriptors)
46168 +*/
46169 +ef_vi_inline int ef_vi_transmit_space(ef_vi* vi)
46170 +{
46171 + ef_vi_txq_state* qs = &vi->ep_state->txq;
46172 + return vi->vi_txq.mask - (qs->added - qs->removed);
46173 +}
46174 +
46175 +
46176 +/*! \i_ef_vi Returns the fill level of the TX descriptor ring.
46177 +**
46178 +** \return the fill level of the queue.
46179 +*/
46180 +ef_vi_inline int ef_vi_transmit_fill_level(ef_vi* vi)
46181 +{
46182 + ef_vi_txq_state* qs = &vi->ep_state->txq;
46183 + return qs->added - qs->removed;
46184 +}
46185 +
46186 +
46187 +/*! \i_ef_vi Returns the total capacity of the TX descriptor ring.
46188 +**
46189 +** \return the capacity of the queue.
46190 +*/
46191 +ef_vi_inline int ef_vi_transmit_capacity(ef_vi* vi)
46192 +{
46193 + return vi->vi_txq.mask;
46194 +}
46195 +
46196 +
46197 +/*! \i_ef_vi Transmit a packet.
46198 +**
46199 +** \param bytes must be greater than ETH_ZLEN.
46200 +** \return -EAGAIN if the transmit queue is full, or 0 on success
46201 +*/
46202 +extern int ef_vi_transmit(ef_vi*, ef_addr, int bytes, ef_request_id dma_id);
46203 +
46204 +/*! \i_ef_vi Transmit a packet using a gather list.
46205 +**
46206 +** \param iov_len must be greater than zero
46207 +** \param iov the first must be non-zero in length (but others need not)
46208 +**
46209 +** \return -EAGAIN if the queue is full, or 0 on success
46210 +*/
46211 +extern int ef_vi_transmitv(ef_vi*, const ef_iovec* iov, int iov_len,
46212 + ef_request_id dma_id);
46213 +
46214 +/*! \i_ef_vi Initialise a DMA request.
46215 +**
46216 +** \return -EAGAIN if the queue is full, or 0 on success
46217 +*/
46218 +extern int ef_vi_transmit_init(ef_vi*, ef_addr, int bytes,
46219 + ef_request_id dma_id);
46220 +
46221 +/*! \i_ef_vi Initialise a DMA request.
46222 +**
46223 +** \return -EAGAIN if the queue is full, or 0 on success
46224 +*/
46225 +extern int ef_vi_transmitv_init(ef_vi*, const ef_iovec*, int iov_len,
46226 + ef_request_id dma_id);
46227 +
46228 +/*! \i_ef_vi Submit DMA requests to the NIC.
46229 +**
46230 +** The DMA requests must have been initialised using
46231 +** ef_vi_transmit_init() or ef_vi_transmitv_init().
46232 +*/
46233 +extern void ef_vi_transmit_push(ef_vi*);
46234 +
46235 +
46236 +/*! \i_ef_vi Maximum number of transmit completions per transmit event. */
46237 +#define EF_VI_TRANSMIT_BATCH 64
46238 +
46239 +/*! \i_ef_vi Determine the set of [ef_request_id]s for each DMA request
46240 +** which has been completed by a given transmit completion
46241 +** event.
46242 +**
46243 +** \param ids must point to an array of length EF_VI_TRANSMIT_BATCH
46244 +** \return the number of valid [ef_request_id]s (can be zero)
46245 +*/
46246 +extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event*,
46247 + ef_request_id* ids);
46248 +
46249 +
46250 +/*! \i_ef_event Returns true if ef_eventq_poll() will return event(s). */
46251 +extern int ef_eventq_has_event(ef_vi* vi);
46252 +
46253 +/*! \i_ef_event Returns true if there are quite a few events in the event
46254 +** queue.
46255 +**
46256 +** This looks ahead in the event queue, so has the property that it will
46257 +** not ping-pong a cache-line when it is called concurrently with events
46258 +** being delivered.
46259 +*/
46260 +extern int ef_eventq_has_many_events(ef_vi* evq, int look_ahead);
46261 +
46262 +/*! Type of function to handle unknown events arriving on event queue
46263 +** Return CI_TRUE iff the event has been handled.
46264 +*/
46265 +typedef int/*bool*/ ef_event_handler_fn(void* priv, ef_vi* evq, ef_event* ev);
46266 +
46267 +/*! Standard poll exception routine */
46268 +extern int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq,
46269 + ef_event* ev);
46270 +
46271 +/*! \i_ef_event Retrieve events from the event queue, handle RX/TX events
46272 +** and pass any others to an exception handler function
46273 +**
46274 +** \return The number of events retrieved.
46275 +*/
46276 +extern int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
46277 + ef_event_handler_fn *exception, void *expt_priv);
46278 +
46279 +/*! \i_ef_event Retrieve events from the event queue.
46280 +**
46281 +** \return The number of events retrieved.
46282 +*/
46283 +ef_vi_inline int ef_eventq_poll(ef_vi* evq, ef_event* evs, int evs_len)
46284 +{
46285 + return ef_eventq_poll_evs(evq, evs, evs_len,
46286 + &ef_eventq_poll_exception, (void*)0);
46287 +}
46288 +
46289 +/*! \i_ef_event Returns the capacity of an event queue. */
46290 +ef_vi_inline int ef_eventq_capacity(ef_vi* vi)
46291 +{
46292 + return (vi->evq_mask + 1u) / sizeof(ef_hw_event);
46293 +}
46294 +
46295 +/* Returns the instance ID of [vi] */
46296 +ef_vi_inline unsigned ef_vi_instance(ef_vi* vi)
46297 +{ return vi->vi_i; }
46298 +
46299 +
46300 +/**********************************************************************
46301 + * Initialisation *****************************************************
46302 + **********************************************************************/
46303 +
46304 +/*! Return size of state buffer of an initialised VI. */
46305 +extern int ef_vi_state_bytes(ef_vi*);
46306 +
46307 +/*! Return size of buffer needed for VI state given sizes of RX and TX
46308 +** DMA queues. Queue sizes must be legal sizes (power of 2), or 0 (no
46309 +** queue).
46310 +*/
46311 +extern int ef_vi_calc_state_bytes(int rxq_size, int txq_size);
46312 +
46313 +/*! Initialise [ef_vi] from the provided resources. [vvis] must have been
46314 +** created by ef_make_vi_data() & remains owned by the caller.
46315 +*/
46316 +extern void ef_vi_init(ef_vi*, void* vi_info, ef_vi_state* state,
46317 + ef_eventq_state* evq_state, enum ef_vi_flags);
46318 +
46319 +extern void ef_vi_state_init(ef_vi*);
46320 +extern void ef_eventq_state_init(ef_vi*);
46321 +
46322 +/*! Convert an efhw device arch to ef_vi_arch, or returns -1 if not
46323 +** recognised.
46324 +*/
46325 +extern int ef_vi_arch_from_efhw_arch(int efhw_arch);
46326 +
46327 +
46328 +#endif /* __EFAB_EF_VI_H__ */
46329 Index: head-2008-11-25/drivers/xen/sfc_netfront/falcon_event.c
46330 ===================================================================
46331 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
46332 +++ head-2008-11-25/drivers/xen/sfc_netfront/falcon_event.c 2008-02-20 09:32:49.000000000 +0100
46333 @@ -0,0 +1,346 @@
46334 +/****************************************************************************
46335 + * Copyright 2002-2005: Level 5 Networks Inc.
46336 + * Copyright 2005-2008: Solarflare Communications Inc,
46337 + * 9501 Jeronimo Road, Suite 250,
46338 + * Irvine, CA 92618, USA
46339 + *
46340 + * Maintained by Solarflare Communications
46341 + * <linux-xen-drivers@solarflare.com>
46342 + * <onload-dev@solarflare.com>
46343 + *
46344 + * This program is free software; you can redistribute it and/or modify it
46345 + * under the terms of the GNU General Public License version 2 as published
46346 + * by the Free Software Foundation, incorporated herein by reference.
46347 + *
46348 + * This program is distributed in the hope that it will be useful,
46349 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
46350 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46351 + * GNU General Public License for more details.
46352 + *
46353 + * You should have received a copy of the GNU General Public License
46354 + * along with this program; if not, write to the Free Software
46355 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
46356 + ****************************************************************************
46357 + */
46358 +
46359 +/*
46360 + * \author djr
46361 + * \brief Routine to poll event queues.
46362 + * \date 2003/03/04
46363 + */
46364 +
46365 +/*! \cidoxg_lib_ef */
46366 +#include "ef_vi_internal.h"
46367 +
46368 +/* Be worried about this on byteswapped machines */
46369 +/* Due to crazy chipsets, we see the event words being written in
46370 +** arbitrary order (bug4539). So test for presence of event must ensure
46371 +** that both halves have changed from the null.
46372 +*/
46373 +# define EF_VI_IS_EVENT(evp) \
46374 + ( (((evp)->opaque.a != (uint32_t)-1) && \
46375 + ((evp)->opaque.b != (uint32_t)-1)) )
46376 +
46377 +
46378 +#ifdef NDEBUG
46379 +# define IS_DEBUG 0
46380 +#else
46381 +# define IS_DEBUG 1
46382 +#endif
46383 +
46384 +
46385 +/*! Check for RX events with inconsistent SOP/CONT
46386 +**
46387 +** Returns true if this event should be discarded
46388 +*/
46389 +ef_vi_inline int ef_eventq_is_rx_sop_cont_bad_efab(ef_vi* vi,
46390 + const ef_vi_qword* ev)
46391 +{
46392 + ef_rx_dup_state_t* rx_dup_state;
46393 + uint8_t* bad_sop;
46394 +
46395 + unsigned label = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46396 + unsigned sop = QWORD_TEST_BIT(RX_SOP, *ev);
46397 +
46398 + ef_assert(vi);
46399 + ef_assert_lt(label, EFAB_DMAQS_PER_EVQ_MAX);
46400 +
46401 + rx_dup_state = &vi->evq_state->rx_dup_state[label];
46402 + bad_sop = &rx_dup_state->bad_sop;
46403 +
46404 + if( ! ((vi->vi_flags & EF_VI_BUG5692_WORKAROUND) || IS_DEBUG) ) {
46405 + *bad_sop = (*bad_sop && !sop);
46406 + }
46407 + else {
46408 + unsigned cont = QWORD_TEST_BIT(RX_JUMBO_CONT, *ev);
46409 + uint8_t *frag_num = &rx_dup_state->frag_num;
46410 +
46411 + /* bad_sop should latch till the next sop */
46412 + *bad_sop = (*bad_sop && !sop) || ( !!sop != (*frag_num==0) );
46413 +
46414 + /* we do not check the number of bytes relative to the
46415 + * fragment number and size of the user rx buffer here
46416 + * because we don't know the size of the user rx
46417 + * buffer - we probably should perform this check in
46418 + * the nearest code calling this though.
46419 + */
46420 + *frag_num = cont ? (*frag_num + 1) : 0;
46421 + }
46422 +
46423 + return *bad_sop;
46424 +}
46425 +
46426 +
46427 +ef_vi_inline int falcon_rx_check_dup(ef_vi* evq, ef_event* ev_out,
46428 + const ef_vi_qword* ev)
46429 +{
46430 + unsigned q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46431 + unsigned desc_ptr = QWORD_GET_U(RX_EV_DESC_PTR, *ev);
46432 + ef_rx_dup_state_t* rx_dup_state = &evq->evq_state->rx_dup_state[q_id];
46433 +
46434 + if(likely( desc_ptr != rx_dup_state->rx_last_desc_ptr )) {
46435 + rx_dup_state->rx_last_desc_ptr = desc_ptr;
46436 + return 0;
46437 + }
46438 +
46439 + rx_dup_state->rx_last_desc_ptr = desc_ptr;
46440 + rx_dup_state->bad_sop = 1;
46441 +#ifndef NDEBUG
46442 + rx_dup_state->frag_num = 0;
46443 +#endif
46444 + BUG_ON(!QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev));
46445 + BUG_ON( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev));
46446 + BUG_ON(!QWORD_GET_U(RX_EV_BYTE_CNT, *ev) == 0);
46447 + ev_out->rx_no_desc_trunc.type = EF_EVENT_TYPE_RX_NO_DESC_TRUNC;
46448 + ev_out->rx_no_desc_trunc.q_id = q_id;
46449 + return 1;
46450 +}
46451 +
46452 +
46453 +ef_vi_inline void falcon_rx_event(ef_event* ev_out, const ef_vi_qword* ev)
46454 +{
46455 + if(likely( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev) )) {
46456 + ev_out->rx.type = EF_EVENT_TYPE_RX;
46457 + ev_out->rx.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46458 + ev_out->rx.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
46459 + if( QWORD_TEST_BIT(RX_SOP, *ev) )
46460 + ev_out->rx.flags = EF_EVENT_FLAG_SOP;
46461 + else
46462 + ev_out->rx.flags = 0;
46463 + if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
46464 + ev_out->rx.flags |= EF_EVENT_FLAG_CONT;
46465 + if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
46466 + ev_out->rx.flags |= EF_EVENT_FLAG_ISCSI_OK;
46467 + }
46468 + else {
46469 + ev_out->rx_discard.type = EF_EVENT_TYPE_RX_DISCARD;
46470 + ev_out->rx_discard.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
46471 + ev_out->rx_discard.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
46472 +#if 1 /* hack for ptloop compatability: ?? TODO purge */
46473 + if( QWORD_TEST_BIT(RX_SOP, *ev) )
46474 + ev_out->rx_discard.flags = EF_EVENT_FLAG_SOP;
46475 + else
46476 + ev_out->rx_discard.flags = 0;
46477 + if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
46478 + ev_out->rx_discard.flags |= EF_EVENT_FLAG_CONT;
46479 + if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
46480 + ev_out->rx_discard.flags |= EF_EVENT_FLAG_ISCSI_OK;
46481 +#endif
46482 + /* Order matters here: more fundamental errors first. */
46483 + if( QWORD_TEST_BIT(RX_EV_BUF_OWNER_ID_ERR, *ev) )
46484 + ev_out->rx_discard.subtype =
46485 + EF_EVENT_RX_DISCARD_RIGHTS;
46486 + else if( QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev) )
46487 + ev_out->rx_discard.subtype =
46488 + EF_EVENT_RX_DISCARD_TRUNC;
46489 + else if( QWORD_TEST_BIT(RX_EV_ETH_CRC_ERR, *ev) )
46490 + ev_out->rx_discard.subtype =
46491 + EF_EVENT_RX_DISCARD_CRC_BAD;
46492 + else if( QWORD_TEST_BIT(RX_EV_IP_HDR_CHKSUM_ERR, *ev) )
46493 + ev_out->rx_discard.subtype =
46494 + EF_EVENT_RX_DISCARD_CSUM_BAD;
46495 + else if( QWORD_TEST_BIT(RX_EV_TCP_UDP_CHKSUM_ERR, *ev) )
46496 + ev_out->rx_discard.subtype =
46497 + EF_EVENT_RX_DISCARD_CSUM_BAD;
46498 + else
46499 + ev_out->rx_discard.subtype =
46500 + EF_EVENT_RX_DISCARD_OTHER;
46501 + }
46502 +}
46503 +
46504 +
46505 +ef_vi_inline void falcon_tx_event(ef_event* ev_out, const ef_vi_qword* ev)
46506 +{
46507 + /* Danger danger! No matter what we ask for wrt batching, we
46508 + ** will get a batched event every 16 descriptors, and we also
46509 + ** get dma-queue-empty events. i.e. Duplicates are expected.
46510 + **
46511 + ** In addition, if it's been requested in the descriptor, we
46512 + ** get an event per descriptor. (We don't currently request
46513 + ** this).
46514 + */
46515 + if(likely( QWORD_TEST_BIT(TX_EV_COMP, *ev) )) {
46516 + ev_out->tx.type = EF_EVENT_TYPE_TX;
46517 + ev_out->tx.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
46518 + }
46519 + else {
46520 + ev_out->tx_error.type = EF_EVENT_TYPE_TX_ERROR;
46521 + ev_out->tx_error.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
46522 + if(likely( QWORD_TEST_BIT(TX_EV_BUF_OWNER_ID_ERR, *ev) ))
46523 + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_RIGHTS;
46524 + else if(likely( QWORD_TEST_BIT(TX_EV_WQ_FF_FULL, *ev) ))
46525 + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_OFLOW;
46526 + else if(likely( QWORD_TEST_BIT(TX_EV_PKT_TOO_BIG, *ev) ))
46527 + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_2BIG;
46528 + else if(likely( QWORD_TEST_BIT(TX_EV_PKT_ERR, *ev) ))
46529 + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_BUS;
46530 + }
46531 +}
46532 +
46533 +
46534 +static void mark_bad(ef_event* ev)
46535 +{
46536 + ev->generic.ev.u64[0] &=~ ((uint64_t) 1u << RX_EV_PKT_OK_LBN);
46537 +}
46538 +
46539 +
46540 +int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
46541 + ef_event_handler_fn *exception, void *expt_priv)
46542 +{
46543 + int evs_len_orig = evs_len;
46544 +
46545 + EF_VI_CHECK_EVENT_Q(evq);
46546 + ef_assert(evs);
46547 + ef_assert_gt(evs_len, 0);
46548 +
46549 + if(unlikely( EF_VI_IS_EVENT(EF_VI_EVENT_PTR(evq, 1)) ))
46550 + goto overflow;
46551 +
46552 + do {
46553 + { /* Read the event out of the ring, then fiddle with
46554 + * copied version. Reason is that the ring is
46555 + * likely to get pushed out of cache by another
46556 + * event being delivered by hardware. */
46557 + ef_vi_event* ev = EF_VI_EVENT_PTR(evq, 0);
46558 + if( ! EF_VI_IS_EVENT(ev) )
46559 + break;
46560 + evs->generic.ev.u64[0] = cpu_to_le64 (ev->u64);
46561 + evq->evq_state->evq_ptr += sizeof(ef_vi_event);
46562 + ev->u64 = (uint64_t)(int64_t) -1;
46563 + }
46564 +
46565 + /* Ugly: Exploit the fact that event code lies in top
46566 + * bits of event. */
46567 + ef_assert_ge(EV_CODE_LBN, 32u);
46568 + switch( evs->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
46569 + case RX_IP_EV_DECODE:
46570 + /* Look for duplicate desc_ptr: it signals
46571 + * that a jumbo frame was truncated because we
46572 + * ran out of descriptors. */
46573 + if(unlikely( falcon_rx_check_dup
46574 + (evq, evs, &evs->generic.ev) )) {
46575 + --evs_len;
46576 + ++evs;
46577 + break;
46578 + }
46579 + else {
46580 + /* Cope with FalconA1 bugs where RX
46581 + * gives inconsistent RX events Mark
46582 + * events as bad until SOP becomes
46583 + * consistent again
46584 + * ef_eventq_is_rx_sop_cont_bad() has
46585 + * side effects - order is important
46586 + */
46587 + if(unlikely
46588 + (ef_eventq_is_rx_sop_cont_bad_efab
46589 + (evq, &evs->generic.ev) )) {
46590 + mark_bad(evs);
46591 + }
46592 + }
46593 + falcon_rx_event(evs, &evs->generic.ev);
46594 + --evs_len;
46595 + ++evs;
46596 + break;
46597 +
46598 + case TX_IP_EV_DECODE:
46599 + falcon_tx_event(evs, &evs->generic.ev);
46600 + --evs_len;
46601 + ++evs;
46602 + break;
46603 +
46604 + default:
46605 + break;
46606 + }
46607 + } while( evs_len );
46608 +
46609 + return evs_len_orig - evs_len;
46610 +
46611 +
46612 + overflow:
46613 + evs->generic.type = EF_EVENT_TYPE_OFLOW;
46614 + evs->generic.ev.u64[0] = (uint64_t)((int64_t)-1);
46615 + return 1;
46616 +}
46617 +
46618 +
46619 +int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, ef_event* ev)
46620 +{
46621 + int /*bool*/ handled = 0;
46622 +
46623 + switch( ev->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
46624 + case DRIVER_EV_DECODE:
46625 + if( QWORD_GET_U(DRIVER_EV_SUB_CODE, ev->generic.ev) ==
46626 + EVQ_INIT_DONE_EV_DECODE )
46627 + /* EVQ initialised event: ignore. */
46628 + handled = 1;
46629 + break;
46630 + }
46631 + return handled;
46632 +}
46633 +
46634 +
46635 +void ef_eventq_iterate(ef_vi* vi,
46636 + void (*fn)(void* arg, ef_vi*, int rel_pos,
46637 + int abs_pos, void* event),
46638 + void* arg, int stop_at_end)
46639 +{
46640 + int i, size_evs = (vi->evq_mask + 1) / sizeof(ef_vi_event);
46641 +
46642 + for( i = 0; i < size_evs; ++i ) {
46643 + ef_vi_event* e = EF_VI_EVENT_PTR(vi, -i);
46644 + if( EF_VI_IS_EVENT(e) )
46645 + fn(arg, vi, i,
46646 + EF_VI_EVENT_OFFSET(vi, -i) / sizeof(ef_vi_event),
46647 + e);
46648 + else if( stop_at_end )
46649 + break;
46650 + }
46651 +}
46652 +
46653 +
46654 +int ef_eventq_has_event(ef_vi* vi)
46655 +{
46656 + return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, 0));
46657 +}
46658 +
46659 +
46660 +int ef_eventq_has_many_events(ef_vi* vi, int look_ahead)
46661 +{
46662 + ef_assert_ge(look_ahead, 0);
46663 + return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, -look_ahead));
46664 +}
46665 +
46666 +
46667 +int ef_eventq_has_rx_event(ef_vi* vi)
46668 +{
46669 + ef_vi_event* ev;
46670 + int i, n_evs = 0;
46671 +
46672 + for( i = 0; EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, i)); --i ) {
46673 + ev = EF_VI_EVENT_PTR(vi, i);
46674 + if( EFVI_FALCON_EVENT_CODE(ev) == EF_EVENT_TYPE_RX ) n_evs++;
46675 + }
46676 + return n_evs;
46677 +}
46678 +
46679 +/*! \cidoxg_end */
46680 Index: head-2008-11-25/drivers/xen/sfc_netfront/falcon_vi.c
46681 ===================================================================
46682 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
46683 +++ head-2008-11-25/drivers/xen/sfc_netfront/falcon_vi.c 2008-02-20 09:32:49.000000000 +0100
46684 @@ -0,0 +1,465 @@
46685 +/****************************************************************************
46686 + * Copyright 2002-2005: Level 5 Networks Inc.
46687 + * Copyright 2005-2008: Solarflare Communications Inc,
46688 + * 9501 Jeronimo Road, Suite 250,
46689 + * Irvine, CA 92618, USA
46690 + *
46691 + * Maintained by Solarflare Communications
46692 + * <linux-xen-drivers@solarflare.com>
46693 + * <onload-dev@solarflare.com>
46694 + *
46695 + * This program is free software; you can redistribute it and/or modify it
46696 + * under the terms of the GNU General Public License version 2 as published
46697 + * by the Free Software Foundation, incorporated herein by reference.
46698 + *
46699 + * This program is distributed in the hope that it will be useful,
46700 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
46701 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46702 + * GNU General Public License for more details.
46703 + *
46704 + * You should have received a copy of the GNU General Public License
46705 + * along with this program; if not, write to the Free Software
46706 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
46707 + ****************************************************************************
46708 + */
46709 +
46710 +/*
46711 + * \author djr, stg
46712 + * \brief Falcon-specific VI
46713 + * \date 2006/11/30
46714 + */
46715 +
46716 +#include "ef_vi_internal.h"
46717 +
46718 +
46719 +#define EFVI_FALCON_DMA_TX_FRAG 1
46720 +
46721 +
46722 +/* TX descriptor for both physical and virtual packet transfers */
46723 +typedef union {
46724 + uint32_t dword[2];
46725 +} ef_vi_falcon_dma_tx_buf_desc;
46726 +typedef ef_vi_falcon_dma_tx_buf_desc ef_vi_falcon_dma_tx_phys_desc;
46727 +
46728 +
46729 +/* RX descriptor for physical addressed transfers */
46730 +typedef union {
46731 + uint32_t dword[2];
46732 +} ef_vi_falcon_dma_rx_phys_desc;
46733 +
46734 +
46735 +/* RX descriptor for virtual packet transfers */
46736 +typedef struct {
46737 + uint32_t dword[1];
46738 +} ef_vi_falcon_dma_rx_buf_desc;
46739 +
46740 +/* Buffer table index */
46741 +typedef uint32_t ef_vi_buffer_addr_t;
46742 +
46743 +ef_vi_inline int64_t dma_addr_to_u46(int64_t src_dma_addr)
46744 +{
46745 + return (src_dma_addr & __FALCON_MASK(46, int64_t));
46746 +}
46747 +
46748 +/*! Setup a physical address based descriptor with a specified length */
46749 +ef_vi_inline void
46750 +__falcon_dma_rx_calc_ip_phys(ef_vi_dma_addr_t dest_pa,
46751 + ef_vi_falcon_dma_rx_phys_desc *desc,
46752 + int bytes)
46753 +{
46754 + int region = 0; /* TODO fixme */
46755 + int64_t dest = dma_addr_to_u46(dest_pa); /* lower 46 bits */
46756 +
46757 + DWCHCK(__DW2(RX_KER_BUF_SIZE_LBN), RX_KER_BUF_SIZE_WIDTH);
46758 + DWCHCK(__DW2(RX_KER_BUF_REGION_LBN),RX_KER_BUF_REGION_WIDTH);
46759 +
46760 + LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
46761 +
46762 + RANGECHCK(bytes, RX_KER_BUF_SIZE_WIDTH);
46763 + RANGECHCK(region, RX_KER_BUF_REGION_WIDTH);
46764 +
46765 + ef_assert(desc);
46766 +
46767 + desc->dword[1] = ((bytes << __DW2(RX_KER_BUF_SIZE_LBN)) |
46768 + (region << __DW2(RX_KER_BUF_REGION_LBN)) |
46769 + (HIGH(dest,
46770 + RX_KER_BUF_ADR_LBN,
46771 + RX_KER_BUF_ADR_WIDTH)));
46772 +
46773 + desc->dword[0] = LOW(dest,
46774 + RX_KER_BUF_ADR_LBN,
46775 + RX_KER_BUF_ADR_WIDTH);
46776 +}
46777 +
46778 +/*! Setup a virtual buffer descriptor for an IPMODE transfer */
46779 +ef_vi_inline void
46780 +__falcon_dma_tx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, unsigned bytes,
46781 + int port, int frag,
46782 + ef_vi_falcon_dma_tx_buf_desc *desc)
46783 +{
46784 + DWCHCK(__DW2(TX_USR_PORT_LBN), TX_USR_PORT_WIDTH);
46785 + DWCHCK(__DW2(TX_USR_CONT_LBN), TX_USR_CONT_WIDTH);
46786 + DWCHCK(__DW2(TX_USR_BYTE_CNT_LBN), TX_USR_BYTE_CNT_WIDTH);
46787 + LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
46788 + DWCHCK(TX_USR_BYTE_OFS_LBN, TX_USR_BYTE_OFS_WIDTH);
46789 +
46790 + RANGECHCK(bytes, TX_USR_BYTE_CNT_WIDTH);
46791 + RANGECHCK(port, TX_USR_PORT_WIDTH);
46792 + RANGECHCK(frag, TX_USR_CONT_WIDTH);
46793 + RANGECHCK(buf_id, TX_USR_BUF_ID_WIDTH);
46794 + RANGECHCK(buf_ofs, TX_USR_BYTE_OFS_WIDTH);
46795 +
46796 + ef_assert(desc);
46797 +
46798 + desc->dword[1] = ((port << __DW2(TX_USR_PORT_LBN)) |
46799 + (frag << __DW2(TX_USR_CONT_LBN)) |
46800 + (bytes << __DW2(TX_USR_BYTE_CNT_LBN)) |
46801 + (HIGH(buf_id,
46802 + TX_USR_BUF_ID_LBN,
46803 + TX_USR_BUF_ID_WIDTH)));
46804 +
46805 + desc->dword[0] = ((LOW(buf_id,
46806 + TX_USR_BUF_ID_LBN,
46807 + (TX_USR_BUF_ID_WIDTH))) |
46808 + (buf_ofs << TX_USR_BYTE_OFS_LBN));
46809 +}
46810 +
46811 +ef_vi_inline void
46812 +falcon_dma_tx_calc_ip_buf_4k(unsigned buf_vaddr, unsigned bytes,
46813 + int port, int frag,
46814 + ef_vi_falcon_dma_tx_buf_desc *desc)
46815 +{
46816 + /* TODO FIXME [buf_vaddr] consists of the buffer index in the
46817 + ** high bits, and an offset in the low bits. Assumptions
46818 + ** permate the code that these can be rolled into one 32bit
46819 + ** value, so this is currently preserved for Falcon. But we
46820 + ** should change to support 8K pages
46821 + */
46822 + unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
46823 + unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
46824 +
46825 + __falcon_dma_tx_calc_ip_buf( buf_id, buf_ofs, bytes, port, frag, desc);
46826 +}
46827 +
46828 +ef_vi_inline void
46829 +falcon_dma_tx_calc_ip_buf(unsigned buf_vaddr, unsigned bytes, int port,
46830 + int frag, ef_vi_falcon_dma_tx_buf_desc *desc)
46831 +{
46832 + falcon_dma_tx_calc_ip_buf_4k(buf_vaddr, bytes, port, frag, desc);
46833 +}
46834 +
46835 +/*! Setup a virtual buffer based descriptor */
46836 +ef_vi_inline void
46837 +__falcon_dma_rx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs,
46838 + ef_vi_falcon_dma_rx_buf_desc *desc)
46839 +{
46840 + /* check alignment of buffer offset and pack */
46841 + ef_assert((buf_ofs & 0x1) == 0);
46842 +
46843 + buf_ofs >>= 1;
46844 +
46845 + DWCHCK(RX_USR_2BYTE_OFS_LBN, RX_USR_2BYTE_OFS_WIDTH);
46846 + DWCHCK(RX_USR_BUF_ID_LBN, RX_USR_BUF_ID_WIDTH);
46847 +
46848 + RANGECHCK(buf_ofs, RX_USR_2BYTE_OFS_WIDTH);
46849 + RANGECHCK(buf_id, RX_USR_BUF_ID_WIDTH);
46850 +
46851 + ef_assert(desc);
46852 +
46853 + desc->dword[0] = ((buf_ofs << RX_USR_2BYTE_OFS_LBN) |
46854 + (buf_id << RX_USR_BUF_ID_LBN));
46855 +}
46856 +
46857 +ef_vi_inline void
46858 +falcon_dma_rx_calc_ip_buf_4k(unsigned buf_vaddr,
46859 + ef_vi_falcon_dma_rx_buf_desc *desc)
46860 +{
46861 + /* TODO FIXME [buf_vaddr] consists of the buffer index in the
46862 + ** high bits, and an offset in the low bits. Assumptions
46863 + ** permeate the code that these can be rolled into one 32bit
46864 + ** value, so this is currently preserved for Falcon. But we
46865 + ** should change to support 8K pages
46866 + */
46867 + unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
46868 + unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
46869 +
46870 + __falcon_dma_rx_calc_ip_buf(buf_id, buf_ofs, desc);
46871 +}
46872 +
46873 +ef_vi_inline void
46874 +falcon_dma_rx_calc_ip_buf(unsigned buf_vaddr,
46875 + ef_vi_falcon_dma_rx_buf_desc *desc)
46876 +{
46877 + falcon_dma_rx_calc_ip_buf_4k(buf_vaddr, desc);
46878 +}
46879 +
46880 +
46881 +ef_vi_inline ef_vi_dma_addr_t ef_physaddr(ef_addr efaddr)
46882 +{
46883 + return (ef_vi_dma_addr_t) efaddr;
46884 +}
46885 +
46886 +
46887 +/*! Convert between an ef_addr and a buffer table index
46888 +** Assert that this was not a physical address
46889 +*/
46890 +ef_vi_inline ef_vi_buffer_addr_t ef_bufaddr(ef_addr efaddr)
46891 +{
46892 + ef_assert(efaddr < ((uint64_t)1 << 32) );
46893 +
46894 + return (ef_vi_buffer_addr_t) efaddr;
46895 +}
46896 +
46897 +
46898 +/*! Setup an physical address based descriptor for an IPMODE transfer */
46899 +ef_vi_inline void
46900 +falcon_dma_tx_calc_ip_phys(ef_vi_dma_addr_t src_dma_addr, unsigned bytes,
46901 + int port, int frag,
46902 + ef_vi_falcon_dma_tx_phys_desc *desc)
46903 +{
46904 +
46905 + int region = 0; /* FIXME */
46906 + int64_t src = dma_addr_to_u46(src_dma_addr); /* lower 46 bits */
46907 +
46908 + DWCHCK(__DW2(TX_KER_PORT_LBN), TX_KER_PORT_WIDTH);
46909 + DWCHCK(__DW2(TX_KER_CONT_LBN), TX_KER_CONT_WIDTH);
46910 + DWCHCK(__DW2(TX_KER_BYTE_CNT_LBN), TX_KER_BYTE_CNT_WIDTH);
46911 + DWCHCK(__DW2(TX_KER_BUF_REGION_LBN),TX_KER_BUF_REGION_WIDTH);
46912 +
46913 + LWCHK(TX_KER_BUF_ADR_LBN, TX_KER_BUF_ADR_WIDTH);
46914 +
46915 + RANGECHCK(port, TX_KER_PORT_WIDTH);
46916 + RANGECHCK(frag, TX_KER_CONT_WIDTH);
46917 + RANGECHCK(bytes, TX_KER_BYTE_CNT_WIDTH);
46918 + RANGECHCK(region, TX_KER_BUF_REGION_WIDTH);
46919 +
46920 + desc->dword[1] = ((port << __DW2(TX_KER_PORT_LBN)) |
46921 + (frag << __DW2(TX_KER_CONT_LBN)) |
46922 + (bytes << __DW2(TX_KER_BYTE_CNT_LBN)) |
46923 + (region << __DW2(TX_KER_BUF_REGION_LBN)) |
46924 + (HIGH(src,
46925 + TX_KER_BUF_ADR_LBN,
46926 + TX_KER_BUF_ADR_WIDTH)));
46927 +
46928 + ef_assert_equal(TX_KER_BUF_ADR_LBN, 0);
46929 + desc->dword[0] = (uint32_t) src_dma_addr;
46930 +}
46931 +
46932 +
46933 +void falcon_vi_init(ef_vi* vi, void* vvis)
46934 +{
46935 + struct vi_mappings *vm = (struct vi_mappings*)vvis;
46936 + uint16_t* ids;
46937 +
46938 + ef_assert(vi);
46939 + ef_assert(vvis);
46940 + ef_assert_equal(vm->signature, VI_MAPPING_SIGNATURE);
46941 + ef_assert_equal(vm->nic_type.arch, EF_VI_ARCH_FALCON);
46942 +
46943 + /* Initialise masks to zero, so that ef_vi_state_init() will
46944 + ** not do any harm when we don't have DMA queues. */
46945 + vi->vi_rxq.mask = vi->vi_txq.mask = 0;
46946 +
46947 + /* Used for BUG5391_WORKAROUND. */
46948 + vi->vi_txq.misalign_mask = 0;
46949 +
46950 + /* Initialise doorbell addresses to a distinctive small value
46951 + ** which will cause a segfault, to trap doorbell pushes to VIs
46952 + ** without DMA queues. */
46953 + vi->vi_rxq.doorbell = vi->vi_txq.doorbell = (ef_vi_ioaddr_t)0xdb;
46954 +
46955 + ids = (uint16_t*) (vi->ep_state + 1);
46956 +
46957 + if( vm->tx_queue_capacity ) {
46958 + vi->vi_txq.mask = vm->tx_queue_capacity - 1;
46959 + vi->vi_txq.doorbell = vm->tx_bell + 12;
46960 + vi->vi_txq.descriptors = vm->tx_dma_falcon;
46961 + vi->vi_txq.ids = ids;
46962 + ids += vi->vi_txq.mask + 1;
46963 + /* Check that the id fifo fits in the space allocated. */
46964 + ef_assert_le((char*) (vi->vi_txq.ids + vm->tx_queue_capacity),
46965 + (char*) vi->ep_state
46966 + + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
46967 + vm->tx_queue_capacity));
46968 + }
46969 + if( vm->rx_queue_capacity ) {
46970 + vi->vi_rxq.mask = vm->rx_queue_capacity - 1;
46971 + vi->vi_rxq.doorbell = vm->rx_bell + 12;
46972 + vi->vi_rxq.descriptors = vm->rx_dma_falcon;
46973 + vi->vi_rxq.ids = ids;
46974 + /* Check that the id fifo fits in the space allocated. */
46975 + ef_assert_le((char*) (vi->vi_rxq.ids + vm->rx_queue_capacity),
46976 + (char*) vi->ep_state
46977 + + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
46978 + vm->tx_queue_capacity));
46979 + }
46980 +
46981 + if( vm->nic_type.variant == 'A' ) {
46982 + vi->vi_txq.misalign_mask = 15; /* BUG5391_WORKAROUND */
46983 + vi->vi_flags |= EF_VI_BUG5692_WORKAROUND;
46984 + }
46985 +}
46986 +
46987 +
46988 +int ef_vi_transmitv_init(ef_vi* vi, const ef_iovec* iov, int iov_len,
46989 + ef_request_id dma_id)
46990 +{
46991 + ef_vi_txq* q = &vi->vi_txq;
46992 + ef_vi_txq_state* qs = &vi->ep_state->txq;
46993 + ef_vi_falcon_dma_tx_buf_desc* dp;
46994 + unsigned len, dma_len, di;
46995 + unsigned added_save = qs->added;
46996 + ef_addr dma_addr;
46997 + unsigned last_len = 0;
46998 +
46999 + ef_assert(iov_len > 0);
47000 + ef_assert(iov);
47001 + ef_assert_equal((dma_id & EF_REQUEST_ID_MASK), dma_id);
47002 + ef_assert_nequal(dma_id, 0xffff);
47003 +
47004 + dma_addr = iov->iov_base;
47005 + len = iov->iov_len;
47006 +
47007 + if( vi->vi_flags & EF_VI_ISCSI_TX_DDIG ) {
47008 + /* Last 4 bytes of placeholder for digest must be
47009 + * removed for h/w */
47010 + ef_assert(len > 4);
47011 + last_len = iov[iov_len - 1].iov_len;
47012 + if( last_len <= 4 ) {
47013 + ef_assert(iov_len > 1);
47014 + --iov_len;
47015 + last_len = iov[iov_len - 1].iov_len - (4 - last_len);
47016 + }
47017 + else {
47018 + last_len = iov[iov_len - 1].iov_len - 4;
47019 + }
47020 + if( iov_len == 1 )
47021 + len = last_len;
47022 + }
47023 +
47024 + while( 1 ) {
47025 + if( qs->added - qs->removed >= q->mask ) {
47026 + qs->added = added_save;
47027 + return -EAGAIN;
47028 + }
47029 +
47030 + dma_len = (~((unsigned) dma_addr) & 0xfff) + 1;
47031 + if( dma_len > len ) dma_len = len;
47032 + { /* BUG5391_WORKAROUND */
47033 + unsigned misalign =
47034 + (unsigned) dma_addr & q->misalign_mask;
47035 + if( misalign && dma_len + misalign > 512 )
47036 + dma_len = 512 - misalign;
47037 + }
47038 +
47039 + di = qs->added++ & q->mask;
47040 + dp = (ef_vi_falcon_dma_tx_buf_desc*) q->descriptors + di;
47041 + if( vi->vi_flags & EF_VI_TX_PHYS_ADDR )
47042 + falcon_dma_tx_calc_ip_phys
47043 + (ef_physaddr(dma_addr), dma_len, /*port*/ 0,
47044 + (iov_len == 1 && dma_len == len) ? 0 :
47045 + EFVI_FALCON_DMA_TX_FRAG, dp);
47046 + else
47047 + falcon_dma_tx_calc_ip_buf
47048 + (ef_bufaddr(dma_addr), dma_len, /*port*/ 0,
47049 + (iov_len == 1 && dma_len == len) ? 0 :
47050 + EFVI_FALCON_DMA_TX_FRAG, dp);
47051 +
47052 + dma_addr += dma_len;
47053 + len -= dma_len;
47054 +
47055 + if( len == 0 ) {
47056 + if( --iov_len == 0 ) break;
47057 + ++iov;
47058 + dma_addr = iov->iov_base;
47059 + len = iov->iov_len;
47060 + if( (vi->vi_flags & EF_VI_ISCSI_TX_DDIG) &&
47061 + (iov_len == 1) )
47062 + len = last_len;
47063 + }
47064 + }
47065 +
47066 + q->ids[di] = (uint16_t) dma_id;
47067 + return 0;
47068 +}
47069 +
47070 +
47071 +void ef_vi_transmit_push(ef_vi* vi)
47072 +{
47073 + ef_vi_wiob();
47074 + writel((vi->ep_state->txq.added & vi->vi_txq.mask) <<
47075 + __DW4(TX_DESC_WPTR_LBN),
47076 + vi->vi_txq.doorbell);
47077 +}
47078 +
47079 +
47080 +/*! The value of initial_rx_bytes is used to set RX_KER_BUF_SIZE in an initial
47081 +** receive descriptor here if physical addressing is being used. A value of
47082 +** zero represents 16384 bytes. This is okay, because caller must provide a
47083 +** buffer than is > MTU, and mac should filter anything bigger than that.
47084 +*/
47085 +int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
47086 + int initial_rx_bytes)
47087 +{
47088 + ef_vi_rxq* q = &vi->vi_rxq;
47089 + ef_vi_rxq_state* qs = &vi->ep_state->rxq;
47090 + unsigned di;
47091 +
47092 + if( ef_vi_receive_space(vi) ) {
47093 + di = qs->added++ & q->mask;
47094 + ef_assert_equal(q->ids[di], 0xffff);
47095 + q->ids[di] = (uint16_t) dma_id;
47096 +
47097 + if( ! (vi->vi_flags & EF_VI_RX_PHYS_ADDR) ) {
47098 + ef_vi_falcon_dma_rx_buf_desc* dp;
47099 + dp = (ef_vi_falcon_dma_rx_buf_desc*)
47100 + q->descriptors + di;
47101 + falcon_dma_rx_calc_ip_buf(ef_bufaddr(addr), dp);
47102 + }
47103 + else {
47104 + ef_vi_falcon_dma_rx_phys_desc* dp;
47105 + dp = (ef_vi_falcon_dma_rx_phys_desc*)
47106 + q->descriptors + di;
47107 + __falcon_dma_rx_calc_ip_phys(addr, dp,
47108 + initial_rx_bytes);
47109 + }
47110 +
47111 + return 0;
47112 + }
47113 +
47114 + return -EAGAIN;
47115 +}
47116 +
47117 +
47118 +void ef_vi_receive_push(ef_vi* vi)
47119 +{
47120 + ef_vi_wiob();
47121 + writel ((vi->ep_state->rxq.added & vi->vi_rxq.mask) <<
47122 + __DW4(RX_DESC_WPTR_LBN),
47123 + vi->vi_rxq.doorbell);
47124 +}
47125 +
47126 +
47127 +ef_request_id ef_vi_receive_done(const ef_vi* vi, const ef_event* ef_ev)
47128 +{
47129 + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
47130 + unsigned di = ev->u32[0] & vi->vi_rxq.mask;
47131 + ef_request_id rq_id;
47132 +
47133 + ef_assert(EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX ||
47134 + EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX_DISCARD);
47135 +
47136 + /* Detect spurious / duplicate RX events. We may need to modify this
47137 + ** code so that we are robust if they happen. */
47138 + ef_assert_equal(di, vi->ep_state->rxq.removed & vi->vi_rxq.mask);
47139 +
47140 + /* We only support 1 port: so events should be in order. */
47141 + ef_assert(vi->vi_rxq.ids[di] != 0xffff);
47142 +
47143 + rq_id = vi->vi_rxq.ids[di];
47144 + vi->vi_rxq.ids[di] = 0xffff;
47145 + ++vi->ep_state->rxq.removed;
47146 + return rq_id;
47147 +}
47148 +
47149 +/*! \cidoxg_end */
47150 Index: head-2008-11-25/drivers/xen/sfc_netfront/pt_tx.c
47151 ===================================================================
47152 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
47153 +++ head-2008-11-25/drivers/xen/sfc_netfront/pt_tx.c 2008-02-20 09:32:49.000000000 +0100
47154 @@ -0,0 +1,91 @@
47155 +/****************************************************************************
47156 + * Copyright 2002-2005: Level 5 Networks Inc.
47157 + * Copyright 2005-2008: Solarflare Communications Inc,
47158 + * 9501 Jeronimo Road, Suite 250,
47159 + * Irvine, CA 92618, USA
47160 + *
47161 + * Maintained by Solarflare Communications
47162 + * <linux-xen-drivers@solarflare.com>
47163 + * <onload-dev@solarflare.com>
47164 + *
47165 + * This program is free software; you can redistribute it and/or modify it
47166 + * under the terms of the GNU General Public License version 2 as published
47167 + * by the Free Software Foundation, incorporated herein by reference.
47168 + *
47169 + * This program is distributed in the hope that it will be useful,
47170 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
47171 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47172 + * GNU General Public License for more details.
47173 + *
47174 + * You should have received a copy of the GNU General Public License
47175 + * along with this program; if not, write to the Free Software
47176 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47177 + ****************************************************************************
47178 + */
47179 +
47180 +/*
47181 + * \author djr
47182 + * \brief Packet-mode transmit interface.
47183 + * \date 2003/04/02
47184 + */
47185 +
47186 +/*! \cidoxg_lib_ef */
47187 +#include "ef_vi_internal.h"
47188 +
47189 +
47190 +int ef_vi_transmit_init(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
47191 +{
47192 + ef_iovec iov = { base, len };
47193 + return ef_vi_transmitv_init(vi, &iov, 1, dma_id);
47194 +}
47195 +
47196 +
47197 +int ef_vi_transmit(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
47198 +{
47199 + ef_iovec iov = { base, len };
47200 + int rc = ef_vi_transmitv_init(vi, &iov, 1, dma_id);
47201 + if( rc == 0 ) ef_vi_transmit_push(vi);
47202 + return rc;
47203 +}
47204 +
47205 +
47206 +int ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len,
47207 + ef_request_id dma_id)
47208 +{
47209 + int rc = ef_vi_transmitv_init(vi, iov, iov_len, dma_id);
47210 + if( rc == 0 ) ef_vi_transmit_push(vi);
47211 + return rc;
47212 +}
47213 +
47214 +
47215 +int ef_vi_transmit_unbundle(ef_vi* vi, const ef_event* __ev,
47216 + ef_request_id* ids)
47217 +{
47218 + ef_request_id* ids_in = ids;
47219 + ef_vi_txq* q = &vi->vi_txq;
47220 + ef_vi_txq_state* qs = &vi->ep_state->txq;
47221 + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*__ev);
47222 + unsigned i, stop = (ev->u32[0] + 1) & q->mask;
47223 +
47224 + ef_assert(EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX ||
47225 + EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX_ERROR);
47226 +
47227 + /* Shouldn't be batching more than 64 descriptors, and should not go
47228 + ** backwards. */
47229 + ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), 64);
47230 + /* Should not complete more than we've posted. */
47231 + ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask),
47232 + qs->added - qs->removed);
47233 +
47234 + for( i = qs->removed & q->mask; i != stop; i = ++qs->removed & q->mask )
47235 + if( q->ids[i] != 0xffff ) {
47236 + *ids++ = q->ids[i];
47237 + q->ids[i] = 0xffff;
47238 + }
47239 +
47240 + ef_assert_le(ids - ids_in, EF_VI_TRANSMIT_BATCH);
47241 +
47242 + return (int) (ids - ids_in);
47243 +}
47244 +
47245 +/*! \cidoxg_end */
47246 Index: head-2008-11-25/drivers/xen/sfc_netfront/sysdep.h
47247 ===================================================================
47248 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
47249 +++ head-2008-11-25/drivers/xen/sfc_netfront/sysdep.h 2008-02-20 09:32:49.000000000 +0100
47250 @@ -0,0 +1,184 @@
47251 +/****************************************************************************
47252 + * Copyright 2002-2005: Level 5 Networks Inc.
47253 + * Copyright 2005-2008: Solarflare Communications Inc,
47254 + * 9501 Jeronimo Road, Suite 250,
47255 + * Irvine, CA 92618, USA
47256 + *
47257 + * Maintained by Solarflare Communications
47258 + * <linux-xen-drivers@solarflare.com>
47259 + * <onload-dev@solarflare.com>
47260 + *
47261 + * This program is free software; you can redistribute it and/or modify it
47262 + * under the terms of the GNU General Public License version 2 as published
47263 + * by the Free Software Foundation, incorporated herein by reference.
47264 + *
47265 + * This program is distributed in the hope that it will be useful,
47266 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
47267 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47268 + * GNU General Public License for more details.
47269 + *
47270 + * You should have received a copy of the GNU General Public License
47271 + * along with this program; if not, write to the Free Software
47272 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47273 + ****************************************************************************
47274 + */
47275 +
47276 +/*
47277 + * \author stg
47278 + * \brief System dependent support for ef vi lib
47279 + * \date 2007/05/10
47280 + */
47281 +
47282 +/*! \cidoxg_include_ci_ul */
47283 +#ifndef __CI_CIUL_SYSDEP_LINUX_H__
47284 +#define __CI_CIUL_SYSDEP_LINUX_H__
47285 +
47286 +/**********************************************************************
47287 + * Kernel version compatability
47288 + */
47289 +
47290 +#if defined(__GNUC__)
47291 +
47292 +/* Linux kernel doesn't have stdint.h or [u]intptr_t. */
47293 +# if !defined(LINUX_VERSION_CODE)
47294 +# include <linux/version.h>
47295 +# endif
47296 +# include <asm/io.h>
47297 +
47298 +/* In Linux 2.6.24, linux/types.h has uintptr_t */
47299 +# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
47300 +# if BITS_PER_LONG == 32
47301 + typedef __u32 uintptr_t;
47302 +# else
47303 + typedef __u64 uintptr_t;
47304 +# endif
47305 +# endif
47306 +
47307 +/* But even 2.6.24 doesn't define intptr_t */
47308 +# if BITS_PER_LONG == 32
47309 + typedef __s32 intptr_t;
47310 +# else
47311 + typedef __s64 intptr_t;
47312 +# endif
47313 +
47314 +# if defined(__ia64__)
47315 +# define EF_VI_PRIx64 "lx"
47316 +# else
47317 +# define EF_VI_PRIx64 "llx"
47318 +# endif
47319 +
47320 +# define EF_VI_HF __attribute__((visibility("hidden")))
47321 +# define EF_VI_HV __attribute__((visibility("hidden")))
47322 +
47323 +# if defined(__i386__) || defined(__x86_64__) /* GCC x86/x64 */
47324 + typedef unsigned long long ef_vi_dma_addr_t;
47325 +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
47326 +# define ef_vi_wiob() __asm__ __volatile__ ("sfence")
47327 +# else
47328 +# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
47329 +# endif
47330 +
47331 +# endif
47332 +#endif
47333 +
47334 +#ifdef EFX_NOT_UPSTREAM
47335 +
47336 +/* Stuff for architectures/compilers not officially supported */
47337 +
47338 +#if !defined(__GNUC__)
47339 +# if defined(__PPC__) /* GCC, PPC */
47340 + typedef unsigned long ef_vi_dma_addr_t;
47341 +# define ef_vi_wiob() wmb()
47342 +
47343 +# ifdef __powerpc64__
47344 +# ifdef CONFIG_SMP
47345 +# define CI_SMP_SYNC "\n eieio \n" /* memory cache sync */
47346 +# define CI_SMP_ISYNC "\n isync \n" /* instr cache sync */
47347 +# else
47348 +# define CI_SMP_SYNC
47349 +# define CI_SMP_ISYNC
47350 +# endif
47351 +# else /* for ppc32 systems */
47352 +# ifdef CONFIG_SMP
47353 +# define CI_SMP_SYNC "\n eieio \n"
47354 +# define CI_SMP_ISYNC "\n sync \n"
47355 +# else
47356 +# define CI_SMP_SYNC
47357 +# define CI_SMP_ISYNC
47358 +# endif
47359 +# endif
47360 +
47361 +# elif defined(__ia64__) /* GCC, IA64 */
47362 + typedef unsigned long ef_vi_dma_addr_t;
47363 +# define ef_vi_wiob() __asm__ __volatile__("mf.a": : :"memory")
47364 +
47365 +# else
47366 +# error Unknown processor - GNU C
47367 +# endif
47368 +
47369 +#elif defined(__PGI)
47370 +# error PGI not supported
47371 +
47372 +#elif defined(__INTEL_COMPILER)
47373 +
47374 +/* Intel compilers v7 claim to be very gcc compatible. */
47375 +# if __INTEL_COMPILER >= 700
47376 +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91)
47377 +# define EF_VI_LIKELY(t) __builtin_expect((t), 1)
47378 +# define EF_VI_UNLIKELY(t) __builtin_expect((t), 0)
47379 +# endif
47380 +
47381 +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
47382 +# define ef_vi_wiob() __asm__ __volatile__ ("sfence")
47383 +# else
47384 +# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
47385 +# endif
47386 +
47387 +# else
47388 +# error Old Intel compiler not supported.
47389 +# endif
47390 +
47391 +#else
47392 +# error Unknown compiler.
47393 +#endif
47394 +
47395 +#endif
47396 +
47397 +
47398 +# include <linux/errno.h>
47399 +
47400 +
47401 +/**********************************************************************
47402 + * Extracting bit fields.
47403 + */
47404 +
47405 +#define _QWORD_GET_LOW(f, v) \
47406 + (((v).u32[0] >> (f##_LBN)) & ((1u << f##_WIDTH) - 1u))
47407 +#define _QWORD_GET_HIGH(f, v) \
47408 + (((v).u32[1] >> (f##_LBN - 32u)) & ((1u << f##_WIDTH) - 1u))
47409 +#define _QWORD_GET_ANY(f, v) \
47410 + (((v).u64[0] >> f##_LBN) & (((uint64_t) 1u << f##_WIDTH) - 1u))
47411 +
47412 +#define QWORD_GET(f, v) \
47413 + ((f##_LBN + f##_WIDTH) <= 32u \
47414 + ? _QWORD_GET_LOW(f, (v)) \
47415 + : ((f##_LBN >= 32u) ? _QWORD_GET_HIGH(f, (v)) : _QWORD_GET_ANY(f, (v))))
47416 +
47417 +#define QWORD_GET_U(f, v) ((unsigned) QWORD_GET(f, (v)))
47418 +
47419 +#define _QWORD_TEST_BIT_LOW(f, v) ((v).u32[0] & (1u << (f##_LBN)))
47420 +#define _QWORD_TEST_BIT_HIGH(f, v) ((v).u32[1] & (1u << (f##_LBN - 32u)))
47421 +
47422 +#define QWORD_TEST_BIT(f, v) \
47423 + (f##_LBN < 32 ? _QWORD_TEST_BIT_LOW(f, (v)) : _QWORD_TEST_BIT_HIGH(f, (v)))
47424 +
47425 +
47426 +
47427 +
47428 +#ifndef DECLSPEC_NORETURN
47429 +/* normally defined on Windows to expand to a declaration that the
47430 + function will not return */
47431 +# define DECLSPEC_NORETURN
47432 +#endif
47433 +
47434 +#endif /* __CI_CIUL_SYSDEP_LINUX_H__ */
47435 Index: head-2008-11-25/drivers/xen/sfc_netfront/vi_init.c
47436 ===================================================================
47437 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
47438 +++ head-2008-11-25/drivers/xen/sfc_netfront/vi_init.c 2008-02-20 09:32:49.000000000 +0100
47439 @@ -0,0 +1,183 @@
47440 +/****************************************************************************
47441 + * Copyright 2002-2005: Level 5 Networks Inc.
47442 + * Copyright 2005-2008: Solarflare Communications Inc,
47443 + * 9501 Jeronimo Road, Suite 250,
47444 + * Irvine, CA 92618, USA
47445 + *
47446 + * Maintained by Solarflare Communications
47447 + * <linux-xen-drivers@solarflare.com>
47448 + * <onload-dev@solarflare.com>
47449 + *
47450 + * This program is free software; you can redistribute it and/or modify it
47451 + * under the terms of the GNU General Public License version 2 as published
47452 + * by the Free Software Foundation, incorporated herein by reference.
47453 + *
47454 + * This program is distributed in the hope that it will be useful,
47455 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
47456 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47457 + * GNU General Public License for more details.
47458 + *
47459 + * You should have received a copy of the GNU General Public License
47460 + * along with this program; if not, write to the Free Software
47461 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47462 + ****************************************************************************
47463 + */
47464 +
47465 +/*
47466 + * \author djr
47467 + * \brief Initialisation of VIs.
47468 + * \date 2007/06/08
47469 + */
47470 +
47471 +#include "ef_vi_internal.h"
47472 +
47473 +#define EF_VI_STATE_BYTES(rxq_sz, txq_sz) \
47474 + (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint16_t) \
47475 + + (txq_sz) * sizeof(uint16_t))
47476 +
47477 +int ef_vi_calc_state_bytes(int rxq_sz, int txq_sz)
47478 +{
47479 + ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
47480 + ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
47481 +
47482 + return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
47483 +}
47484 +
47485 +
47486 +int ef_vi_state_bytes(ef_vi* vi)
47487 +{
47488 + int rxq_sz = 0, txq_sz = 0;
47489 + if( ef_vi_receive_capacity(vi) )
47490 + rxq_sz = ef_vi_receive_capacity(vi) + 1;
47491 + if( ef_vi_transmit_capacity(vi) )
47492 + txq_sz = ef_vi_transmit_capacity(vi) + 1;
47493 +
47494 + ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
47495 + ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
47496 +
47497 + return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
47498 +}
47499 +
47500 +
47501 +void ef_eventq_state_init(ef_vi* evq)
47502 +{
47503 + int j;
47504 +
47505 + for (j = 0; j<EFAB_DMAQS_PER_EVQ_MAX; j++) {
47506 + ef_rx_dup_state_t *rx_dup_state =
47507 + &evq->evq_state->rx_dup_state[j];
47508 + rx_dup_state->bad_sop = 0;
47509 + rx_dup_state->rx_last_desc_ptr = -1;
47510 + rx_dup_state->frag_num = 0;
47511 + }
47512 +
47513 + evq->evq_state->evq_ptr = 0;
47514 +}
47515 +
47516 +
47517 +void ef_vi_state_init(ef_vi* vi)
47518 +{
47519 + ef_vi_state* state = vi->ep_state;
47520 + unsigned i;
47521 +
47522 + state->txq.added = state->txq.removed = 0;
47523 + state->rxq.added = state->rxq.removed = 0;
47524 +
47525 + if( vi->vi_rxq.mask )
47526 + for( i = 0; i <= vi->vi_rxq.mask; ++i )
47527 + vi->vi_rxq.ids[i] = (uint16_t) -1;
47528 + if( vi->vi_txq.mask )
47529 + for( i = 0; i <= vi->vi_txq.mask; ++i )
47530 + vi->vi_txq.ids[i] = (uint16_t) -1;
47531 +}
47532 +
47533 +
47534 +void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type nic_type,
47535 + int instance, unsigned evq_bytes, void* base,
47536 + void* timer_reg)
47537 +{
47538 + struct vi_mappings* vm = (struct vi_mappings*) data_area;
47539 +
47540 + vm->signature = VI_MAPPING_SIGNATURE;
47541 + vm->vi_instance = instance;
47542 + vm->nic_type = nic_type;
47543 + vm->evq_bytes = evq_bytes;
47544 + vm->evq_base = base;
47545 + vm->evq_timer_reg = timer_reg;
47546 +}
47547 +
47548 +
47549 +void ef_vi_init(ef_vi* vi, void* vvis, ef_vi_state* state,
47550 + ef_eventq_state* evq_state, enum ef_vi_flags vi_flags)
47551 +{
47552 + struct vi_mappings* vm = (struct vi_mappings*) vvis;
47553 +
47554 + vi->vi_i = vm->vi_instance;
47555 + vi->ep_state = state;
47556 + vi->vi_flags = vi_flags;
47557 +
47558 + switch( vm->nic_type.arch ) {
47559 + case EF_VI_ARCH_FALCON:
47560 + falcon_vi_init(vi, vvis);
47561 + break;
47562 + default:
47563 + /* ?? TODO: We should return an error code. */
47564 + ef_assert(0);
47565 + break;
47566 + }
47567 +
47568 + if( vm->evq_bytes ) {
47569 + vi->evq_state = evq_state;
47570 + vi->evq_mask = vm->evq_bytes - 1u;
47571 + vi->evq_base = vm->evq_base;
47572 + vi->evq_timer_reg = vm->evq_timer_reg;
47573 + }
47574 +
47575 + EF_VI_MAGIC_SET(vi, EF_VI);
47576 +}
47577 +
47578 +
47579 +/* Initialise [data_area] with information required to initialise an ef_vi.
47580 + * In the following, an unused param should be set to NULL. Note the case
47581 + * marked (*) of [iobuf_mmap] for falcon/driver; for the normal driver this
47582 + * must be NULL.
47583 + *
47584 + * \param data_area [in,out] required, must ref at least VI_MAPPING_SIZE
47585 + * bytes
47586 + * \param io_mmap [in] ef1, required
47587 + * falcon, required
47588 + * \param iobuf_mmap [in] ef1, unused
47589 + * falcon, required
47590 + */
47591 +void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type nic_type,
47592 + unsigned rxq_capacity, unsigned txq_capacity,
47593 + int instance, void* io_mmap,
47594 + void* iobuf_mmap_rx, void* iobuf_mmap_tx,
47595 + enum ef_vi_flags vi_flags)
47596 +{
47597 + struct vi_mappings* vm = (struct vi_mappings*) data_area;
47598 + int rx_desc_bytes, rxq_bytes;
47599 +
47600 + ef_assert(rxq_capacity > 0 || txq_capacity > 0);
47601 + ef_assert(vm);
47602 + ef_assert(io_mmap);
47603 + ef_assert(iobuf_mmap_rx || iobuf_mmap_tx);
47604 +
47605 + vm->signature = VI_MAPPING_SIGNATURE;
47606 + vm->vi_instance = instance;
47607 + vm->nic_type = nic_type;
47608 +
47609 + rx_desc_bytes = (vi_flags & EF_VI_RX_PHYS_ADDR) ? 8 : 4;
47610 + rxq_bytes = rxq_capacity * rx_desc_bytes;
47611 + rxq_bytes = (rxq_bytes + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
47612 +
47613 + if( iobuf_mmap_rx == iobuf_mmap_tx )
47614 + iobuf_mmap_tx = (char*) iobuf_mmap_rx + rxq_bytes;
47615 +
47616 + vm->rx_queue_capacity = rxq_capacity;
47617 + vm->rx_dma_falcon = iobuf_mmap_rx;
47618 + vm->rx_bell = (char*) io_mmap + (RX_DESC_UPD_REG_KER_OFST & 4095);
47619 + vm->tx_queue_capacity = txq_capacity;
47620 + vm->tx_dma_falcon = iobuf_mmap_tx;
47621 + vm->tx_bell = (char*) io_mmap + (TX_DESC_UPD_REG_KER_OFST & 4095);
47622 +}
47623 Index: head-2008-11-25/drivers/xen/sfc_netutil/Makefile
47624 ===================================================================
47625 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
47626 +++ head-2008-11-25/drivers/xen/sfc_netutil/Makefile 2008-02-26 10:54:12.000000000 +0100
47627 @@ -0,0 +1,11 @@
47628 +EXTRA_CFLAGS += -Idrivers/xen/sfc_netutil
47629 +EXTRA_CFLAGS += -Werror
47630 +
47631 +ifdef GGOV
47632 +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
47633 +endif
47634 +
47635 +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) := sfc_netutil.o
47636 +
47637 +sfc_netutil-objs := accel_cuckoo_hash.o accel_msg_iface.o accel_util.o
47638 +
47639 Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c
47640 ===================================================================
47641 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
47642 +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2008-02-20 09:32:49.000000000 +0100
47643 @@ -0,0 +1,651 @@
47644 +/****************************************************************************
47645 + * Solarflare driver for Xen network acceleration
47646 + *
47647 + * Copyright 2006-2008: Solarflare Communications Inc,
47648 + * 9501 Jeronimo Road, Suite 250,
47649 + * Irvine, CA 92618, USA
47650 + *
47651 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
47652 + *
47653 + * This program is free software; you can redistribute it and/or modify it
47654 + * under the terms of the GNU General Public License version 2 as published
47655 + * by the Free Software Foundation, incorporated herein by reference.
47656 + *
47657 + * This program is distributed in the hope that it will be useful,
47658 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
47659 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47660 + * GNU General Public License for more details.
47661 + *
47662 + * You should have received a copy of the GNU General Public License
47663 + * along with this program; if not, write to the Free Software
47664 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
47665 + ****************************************************************************
47666 + */
47667 +
47668 +#include <linux/types.h> /* needed for linux/random.h */
47669 +#include <linux/random.h>
47670 +
47671 +#include "accel_cuckoo_hash.h"
47672 +#include "accel_util.h"
47673 +
47674 +static inline int cuckoo_hash_key_compare(cuckoo_hash_table *hashtab,
47675 + cuckoo_hash_key *key1,
47676 + cuckoo_hash_key *key2)
47677 +{
47678 + return !memcmp(key1, key2, hashtab->key_length);
47679 +}
47680 +
47681 +
47682 +static inline void cuckoo_hash_key_set(cuckoo_hash_key *key1,
47683 + cuckoo_hash_key *key2)
47684 +{
47685 + *key1 = *key2;
47686 +}
47687 +
47688 +
47689 +/*
47690 + * Sets hash function parameters. Chooses "a" to be odd, 0 < a < 2^w
47691 + * where w is the length of the key
47692 + */
47693 +static void set_hash_parameters(cuckoo_hash_table *hashtab)
47694 +{
47695 + again:
47696 + hashtab->a0 = hashtab->a1 = 0;
47697 +
47698 + /* Make sure random */
47699 + get_random_bytes(&hashtab->a0, hashtab->key_length);
47700 + get_random_bytes(&hashtab->a1, hashtab->key_length);
47701 +
47702 + /* Make sure odd */
47703 + hashtab->a0 |= 1;
47704 + hashtab->a1 |= 1;
47705 +
47706 + /* Being different is good */
47707 + if (hashtab->a0 != hashtab->a1)
47708 + return;
47709 +
47710 + goto again;
47711 +}
47712 +
47713 +int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
47714 + unsigned key_length)
47715 +{
47716 + char *table_mem;
47717 + unsigned length = 1 << length_bits;
47718 +
47719 + BUG_ON(length_bits >= sizeof(unsigned) * 8);
47720 + BUG_ON(key_length > sizeof(cuckoo_hash_key));
47721 +
47722 + table_mem = kmalloc(sizeof(cuckoo_hash_entry) * 2 * length, GFP_KERNEL);
47723 +
47724 + if (table_mem == NULL)
47725 + return -ENOMEM;
47726 +
47727 + hashtab->length = length;
47728 + hashtab->length_bits = length_bits;
47729 + hashtab->key_length = key_length;
47730 + hashtab->entries = 0;
47731 +
47732 + hashtab->table0 = (cuckoo_hash_entry *)table_mem;
47733 + hashtab->table1 = (cuckoo_hash_entry *)
47734 + (table_mem + length * sizeof(cuckoo_hash_entry));
47735 +
47736 + set_hash_parameters(hashtab);
47737 +
47738 + /* Zero the table */
47739 + memset(hashtab->table0, 0, length * 2 * sizeof(cuckoo_hash_entry));
47740 +
47741 + return 0;
47742 +}
47743 +EXPORT_SYMBOL_GPL(cuckoo_hash_init);
47744 +
47745 +void cuckoo_hash_destroy(cuckoo_hash_table *hashtab)
47746 +{
47747 + if (hashtab->table0 != NULL)
47748 + kfree(hashtab->table0);
47749 +}
47750 +
47751 +EXPORT_SYMBOL_GPL(cuckoo_hash_destroy);
47752 +
47753 +/*
47754 + * This computes sizeof(cuckoo_hash) bits of hash, not all will be
47755 + * necessarily used, but the hash function throws away any that
47756 + * aren't
47757 + */
47758 +static inline void cuckoo_compute_hash_helper(cuckoo_hash_table *hashtab,
47759 + cuckoo_hash_key *a,
47760 + cuckoo_hash_key *x,
47761 + cuckoo_hash *result)
47762 +{
47763 + u64 multiply_result = 0, a_temp, x_temp;
47764 + u32 carry = 0;
47765 + u32 *a_words;
47766 + u32 *x_words;
47767 + int i;
47768 +
47769 + /*
47770 + * As the mod and div operations in the function effectively
47771 + * reduce and shift the bits of the product down to just the
47772 + * third word, we need only compute that and return it as a
47773 + * result.
47774 + *
47775 + * Do enough long multiplication to get the word we need
47776 + */
47777 +
47778 + /* This assumes things about the sizes of the key and hash */
47779 + BUG_ON(hashtab->key_length % sizeof(u32) != 0);
47780 + BUG_ON(sizeof(cuckoo_hash) != sizeof(u32));
47781 +
47782 + a_words = (u32 *)a;
47783 + x_words = (u32 *)x;
47784 +
47785 + for (i = 0; i < hashtab->key_length / sizeof(u32); i++) {
47786 + a_temp = a_words[i];
47787 + x_temp = x_words[i];
47788 +
47789 + multiply_result = (a_temp * x_temp) + carry;
47790 + carry = (multiply_result >> 32) & 0xffffffff;
47791 + }
47792 +
47793 + *result = multiply_result & 0xffffffff;
47794 +}
47795 +
47796 +
47797 +/*
47798 + * Want to implement (ax mod 2^w) div 2^(w-q) for odd a, 0 < a < 2^w;
47799 + * w is the length of the key, q is the length of the hash, I think.
47800 + * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
47801 + */
47802 +static cuckoo_hash cuckoo_compute_hash(cuckoo_hash_table *hashtab,
47803 + cuckoo_hash_key *key,
47804 + cuckoo_hash_key *a)
47805 +{
47806 + unsigned q = hashtab->length_bits;
47807 + unsigned shift = 32 - q;
47808 + unsigned mask = ((1 << q) - 1) << shift;
47809 + cuckoo_hash hash;
47810 +
47811 + cuckoo_compute_hash_helper(hashtab, a, key, &hash);
47812 +
47813 + /*
47814 + * Take the top few bits to get the right length for this
47815 + * hash table
47816 + */
47817 + hash = (hash & mask) >> shift;
47818 +
47819 + BUG_ON(hash >= hashtab->length);
47820 +
47821 + return hash;
47822 +}
47823 +
47824 +
47825 +static int cuckoo_hash_lookup0(cuckoo_hash_table *hashtab,
47826 + cuckoo_hash_key *key,
47827 + cuckoo_hash_value *value)
47828 +{
47829 + cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
47830 +
47831 + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
47832 + && cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
47833 + key)) {
47834 + *value = hashtab->table0[hash].value;
47835 + return 1;
47836 + }
47837 +
47838 + return 0;
47839 +}
47840 +
47841 +static int cuckoo_hash_lookup1(cuckoo_hash_table *hashtab,
47842 + cuckoo_hash_key *key,
47843 + cuckoo_hash_value *value)
47844 +{
47845 + cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
47846 +
47847 + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
47848 + && cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
47849 + key)) {
47850 + *value = hashtab->table1[hash].value;
47851 + return 1;
47852 + }
47853 +
47854 + return 0;
47855 +}
47856 +
47857 +
47858 +int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
47859 + cuckoo_hash_value *value)
47860 +{
47861 + return cuckoo_hash_lookup0(hashtab, key, value)
47862 + || cuckoo_hash_lookup1(hashtab, key, value);
47863 +}
47864 +EXPORT_SYMBOL_GPL(cuckoo_hash_lookup);
47865 +
47866 +
47867 +/* Transfer any active entries from "old_table" into hashtab */
47868 +static int cuckoo_hash_transfer_entries(cuckoo_hash_table *hashtab,
47869 + cuckoo_hash_entry *old_table,
47870 + unsigned capacity)
47871 +{
47872 + int i, rc;
47873 + cuckoo_hash_entry *entry;
47874 +
47875 + hashtab->entries = 0;
47876 +
47877 + for (i = 0; i < capacity; i++) {
47878 + entry = &old_table[i];
47879 + if (entry->state == CUCKOO_HASH_STATE_OCCUPIED) {
47880 + rc = cuckoo_hash_add(hashtab, &(entry->key),
47881 + entry->value, 0);
47882 + if (rc != 0) {
47883 + return rc;
47884 + }
47885 + }
47886 + }
47887 +
47888 + return 0;
47889 +}
47890 +
47891 +
47892 +int cuckoo_hash_rehash(cuckoo_hash_table *hashtab)
47893 +{
47894 + cuckoo_hash_entry *new_table;
47895 + cuckoo_hash_table old_hashtab;
47896 + int resize = 0, rc, rehash_count;
47897 +
47898 + /*
47899 + * Store old tables so we can access the existing values and
47900 + * copy across
47901 + */
47902 + memcpy(&old_hashtab, hashtab, sizeof(cuckoo_hash_table));
47903 +
47904 + /* resize if hashtable is more than half full */
47905 + if (old_hashtab.entries > old_hashtab.length &&
47906 + old_hashtab.length_bits < 32)
47907 + resize = 1;
47908 +
47909 + resize:
47910 + if (resize) {
47911 + new_table = kmalloc(sizeof(cuckoo_hash_entry) * 4 * hashtab->length,
47912 + GFP_ATOMIC);
47913 + if (new_table == NULL) {
47914 + rc = -ENOMEM;
47915 + goto err;
47916 + }
47917 +
47918 + hashtab->length = 2 * hashtab->length;
47919 + hashtab->length_bits++;
47920 + } else {
47921 + new_table = kmalloc(sizeof(cuckoo_hash_entry) * 2 * hashtab->length,
47922 + GFP_ATOMIC);
47923 + if (new_table == NULL) {
47924 + rc = -ENOMEM;
47925 + goto err;
47926 + }
47927 + }
47928 +
47929 + /*
47930 + * Point hashtab to new memory region so we can try to
47931 + * construct new table
47932 + */
47933 + hashtab->table0 = new_table;
47934 + hashtab->table1 = (cuckoo_hash_entry *)
47935 + ((char *)new_table + hashtab->length * sizeof(cuckoo_hash_entry));
47936 +
47937 + rehash_count = 0;
47938 +
47939 + again:
47940 + /* Zero the new tables */
47941 + memset(new_table, 0, hashtab->length * 2 * sizeof(cuckoo_hash_entry));
47942 +
47943 + /* Choose new parameters for the hash functions */
47944 + set_hash_parameters(hashtab);
47945 +
47946 + /*
47947 + * Multiply old_table_length by 2 as the length refers to each
47948 + * table, and there are two of them. This assumes that they
47949 + * are arranged sequentially in memory, so assert it
47950 + */
47951 + BUG_ON(((char *)old_hashtab.table1) !=
47952 + ((char *)old_hashtab.table0 + old_hashtab.length
47953 + * sizeof(cuckoo_hash_entry)));
47954 + rc = cuckoo_hash_transfer_entries(hashtab, old_hashtab.table0,
47955 + old_hashtab.length * 2);
47956 + if (rc < 0) {
47957 + /* Problem */
47958 + if (rc == -ENOSPC) {
47959 + ++rehash_count;
47960 + if (rehash_count < CUCKOO_HASH_MAX_LOOP) {
47961 + /*
47962 + * Wanted to rehash, but rather than
47963 + * recurse we can just do it here
47964 + */
47965 + goto again;
47966 + } else {
47967 + /*
47968 + * Didn't manage to rehash, so let's
47969 + * go up a size (if we haven't already
47970 + * and there's space)
47971 + */
47972 + if (!resize && hashtab->length_bits < 32) {
47973 + resize = 1;
47974 + kfree(new_table);
47975 + goto resize;
47976 + }
47977 + else
47978 + goto err;
47979 + }
47980 + }
47981 + else
47982 + goto err;
47983 + }
47984 +
47985 + /* Success, I think. Free up the old table */
47986 + kfree(old_hashtab.table0);
47987 +
47988 + /* We should have put all the entries from old table in the new one */
47989 + BUG_ON(hashtab->entries != old_hashtab.entries);
47990 +
47991 + return 0;
47992 + err:
47993 + EPRINTK("%s: Rehash failed, giving up\n", __FUNCTION__);
47994 + /* Some other error, give up, at least restore table to how it was */
47995 + memcpy(hashtab, &old_hashtab, sizeof(cuckoo_hash_table));
47996 + if (new_table)
47997 + kfree(new_table);
47998 + return rc;
47999 +}
48000 +EXPORT_SYMBOL_GPL(cuckoo_hash_rehash);
48001 +
48002 +
48003 +static int
48004 +cuckoo_hash_insert_or_displace(cuckoo_hash_entry *table, unsigned hash,
48005 + cuckoo_hash_key *key,
48006 + cuckoo_hash_value value,
48007 + cuckoo_hash_key *displaced_key,
48008 + cuckoo_hash_value *displaced_value)
48009 +{
48010 + if (table[hash].state == CUCKOO_HASH_STATE_VACANT) {
48011 + cuckoo_hash_key_set(&(table[hash].key), key);
48012 + table[hash].value = value;
48013 + table[hash].state = CUCKOO_HASH_STATE_OCCUPIED;
48014 +
48015 + return 1;
48016 + } else {
48017 + cuckoo_hash_key_set(displaced_key, &(table[hash].key));
48018 + *displaced_value = table[hash].value;
48019 + cuckoo_hash_key_set(&(table[hash].key), key);
48020 + table[hash].value = value;
48021 +
48022 + return 0;
48023 + }
48024 +}
48025 +
48026 +
48027 +int cuckoo_hash_add(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
48028 + cuckoo_hash_value value, int can_rehash)
48029 +{
48030 + cuckoo_hash hash0, hash1;
48031 + int i, rc;
48032 + cuckoo_hash_key key1, key2;
48033 +
48034 + cuckoo_hash_key_set(&key1, key);
48035 +
48036 + again:
48037 + i = 0;
48038 + do {
48039 + hash0 = cuckoo_compute_hash(hashtab, &key1, &hashtab->a0);
48040 + if (cuckoo_hash_insert_or_displace(hashtab->table0, hash0,
48041 + &key1, value, &key2,
48042 + &value)) {
48043 + /* Success */
48044 + hashtab->entries++;
48045 + return 0;
48046 + }
48047 +
48048 + hash1 = cuckoo_compute_hash(hashtab, &key2, &hashtab->a1);
48049 + if (cuckoo_hash_insert_or_displace(hashtab->table1, hash1,
48050 + &key2, value, &key1,
48051 + &value)) {
48052 + /* Success */
48053 + hashtab->entries++;
48054 + return 0;
48055 + }
48056 + } while (++i < CUCKOO_HASH_MAX_LOOP);
48057 +
48058 + if (can_rehash) {
48059 + if ((rc = cuckoo_hash_rehash(hashtab)) < 0) {
48060 + /*
48061 + * Give up - this will drop whichever
48062 + * key/value pair we have currently displaced
48063 + * on the floor
48064 + */
48065 + return rc;
48066 + }
48067 + goto again;
48068 + }
48069 +
48070 + EPRINTK("%s: failed hash add\n", __FUNCTION__);
48071 + /*
48072 + * Couldn't do it - bad as we've now removed some random thing
48073 + * from the table, and will just drop it on the floor. Better
48074 + * would be to somehow revert the table to the state it was in
48075 + * at the start
48076 + */
48077 + return -ENOSPC;
48078 +}
48079 +EXPORT_SYMBOL_GPL(cuckoo_hash_add);
48080 +
48081 +
48082 +int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
48083 + cuckoo_hash_key *key, cuckoo_hash_value value,
48084 + int can_rehash)
48085 +{
48086 + int stored_value;
48087 +
48088 + if (cuckoo_hash_lookup(hashtab, key, &stored_value))
48089 + return -EBUSY;
48090 +
48091 + return cuckoo_hash_add(hashtab, key, value, can_rehash);
48092 +}
48093 +EXPORT_SYMBOL_GPL(cuckoo_hash_add_check);
48094 +
48095 +
48096 +int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key)
48097 +{
48098 + cuckoo_hash hash;
48099 +
48100 + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
48101 + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48102 + cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
48103 + key)) {
48104 + hashtab->table0[hash].state = CUCKOO_HASH_STATE_VACANT;
48105 + hashtab->entries--;
48106 + return 0;
48107 + }
48108 +
48109 + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
48110 + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48111 + cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
48112 + key)) {
48113 + hashtab->table1[hash].state = CUCKOO_HASH_STATE_VACANT;
48114 + hashtab->entries--;
48115 + return 0;
48116 + }
48117 +
48118 + return -EINVAL;
48119 +}
48120 +EXPORT_SYMBOL_GPL(cuckoo_hash_remove);
48121 +
48122 +
48123 +int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
48124 + cuckoo_hash_value value)
48125 +{
48126 + cuckoo_hash hash;
48127 +
48128 + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
48129 + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48130 + cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
48131 + key)) {
48132 + hashtab->table0[hash].value = value;
48133 + return 0;
48134 + }
48135 +
48136 + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
48137 + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
48138 + cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
48139 + key)) {
48140 + hashtab->table1[hash].value = value;
48141 + return 0;
48142 + }
48143 +
48144 + return -EINVAL;
48145 +}
48146 +EXPORT_SYMBOL_GPL(cuckoo_hash_update);
48147 +
48148 +
48149 +void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab)
48150 +{
48151 + hashtab->iterate_index = 0;
48152 +}
48153 +EXPORT_SYMBOL_GPL(cuckoo_hash_iterate_reset);
48154 +
48155 +
48156 +int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
48157 + cuckoo_hash_key *key, cuckoo_hash_value *value)
48158 +{
48159 + unsigned index;
48160 +
48161 + while (hashtab->iterate_index < hashtab->length) {
48162 + index = hashtab->iterate_index;
48163 + ++hashtab->iterate_index;
48164 + if (hashtab->table0[index].state == CUCKOO_HASH_STATE_OCCUPIED) {
48165 + *key = hashtab->table0[index].key;
48166 + *value = hashtab->table0[index].value;
48167 + return 0;
48168 + }
48169 + }
48170 +
48171 + while (hashtab->iterate_index >= hashtab->length &&
48172 + hashtab->iterate_index < hashtab->length * 2) {
48173 + index = hashtab->iterate_index - hashtab->length;
48174 + ++hashtab->iterate_index;
48175 + if (hashtab->table1[index].state == CUCKOO_HASH_STATE_OCCUPIED) {
48176 + *key = hashtab->table1[index].key;
48177 + *value = hashtab->table1[index].value;
48178 + return 0;
48179 + }
48180 + }
48181 +
48182 + return -ENOSPC;
48183 +}
48184 +EXPORT_SYMBOL_GPL(cuckoo_hash_iterate);
48185 +
48186 +
48187 +#if 0
48188 +void cuckoo_hash_valid(cuckoo_hash_table *hashtab)
48189 +{
48190 + int i, entry_count = 0;
48191 +
48192 + for (i=0; i < hashtab->length; i++) {
48193 + EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT &&
48194 + hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48195 + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48196 + entry_count++;
48197 + EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT &&
48198 + hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48199 + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48200 + entry_count++;
48201 + }
48202 +
48203 + if (entry_count != hashtab->entries) {
48204 + EPRINTK("%s: bad count\n", __FUNCTION__);
48205 + cuckoo_hash_dump(hashtab);
48206 + return;
48207 + }
48208 +
48209 + for (i=0; i< hashtab->length; i++) {
48210 + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48211 + if (i != cuckoo_compute_hash(hashtab,
48212 + &hashtab->table0[i].key,
48213 + &hashtab->a0)) {
48214 + EPRINTK("%s: Bad key table 0 index %d\n",
48215 + __FUNCTION__, i);
48216 + cuckoo_hash_dump(hashtab);
48217 + return;
48218 + }
48219 + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48220 + if (i != cuckoo_compute_hash(hashtab,
48221 + &hashtab->table1[i].key,
48222 + &hashtab->a1)) {
48223 + EPRINTK("%s: Bad key table 1 index %d\n",
48224 + __FUNCTION__, i);
48225 + cuckoo_hash_dump(hashtab);
48226 + return;
48227 + }
48228 + }
48229 +
48230 +}
48231 +EXPORT_SYMBOL_GPL(cuckoo_hash_valid);
48232 +
48233 +
48234 +void cuckoo_hash_dump(cuckoo_hash_table *hashtab)
48235 +{
48236 + int i, entry_count;
48237 +
48238 + entry_count = 0;
48239 + for (i=0; i < hashtab->length; i++) {
48240 + EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT &&
48241 + hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48242 + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48243 + entry_count++;
48244 + EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT &&
48245 + hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED);
48246 + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48247 + entry_count++;
48248 + }
48249 +
48250 + EPRINTK("======================\n");
48251 + EPRINTK("Cuckoo hash table dump\n");
48252 + EPRINTK("======================\n");
48253 + EPRINTK("length: %d; length_bits: %d; key_length: %d\n", hashtab->length,
48254 + hashtab->length_bits, hashtab->key_length);
48255 + EPRINTK("Recorded entries: %d\n", hashtab->entries);
48256 + EPRINTK("Counted entries: %d\n", entry_count);
48257 + EPRINTK("a0: %llx; a1: %llx\n", hashtab->a0, hashtab->a1);
48258 + EPRINTK("-----------------------------------------\n");
48259 + EPRINTK("Index Occupied Key Value Index0 Index1\n");
48260 + EPRINTK("-----------------------------------------\n");
48261 + for (i=0; i< hashtab->length; i++) {
48262 + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48263 + EPRINTK("%d %d %llx %d %d %d\n", i,
48264 + hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED,
48265 + hashtab->table0[i].key, hashtab->table0[i].value,
48266 + cuckoo_compute_hash(hashtab, &hashtab->table0[i].key,
48267 + &hashtab->a0),
48268 + cuckoo_compute_hash(hashtab, &hashtab->table0[i].key,
48269 + &hashtab->a1));
48270 + else
48271 + EPRINTK("%d %d - - - -\n", i,
48272 + hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED);
48273 +
48274 + }
48275 + EPRINTK("-----------------------------------------\n");
48276 + EPRINTK("Index Occupied Key Value Index0 Index1\n");
48277 + EPRINTK("-----------------------------------------\n");
48278 + for (i=0; i< hashtab->length; i++) {
48279 + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
48280 + EPRINTK("%d %d %llx %d %d %d\n", i,
48281 + hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED,
48282 + hashtab->table1[i].key, hashtab->table1[i].value,
48283 + cuckoo_compute_hash(hashtab, &hashtab->table1[i].key,
48284 + &hashtab->a0),
48285 + cuckoo_compute_hash(hashtab, &hashtab->table1[i].key,
48286 + &hashtab->a1));
48287 + else
48288 + EPRINTK("%d %d - - - -\n", i,
48289 + hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED);
48290 + }
48291 + EPRINTK("======================\n");
48292 +}
48293 +EXPORT_SYMBOL_GPL(cuckoo_hash_dump);
48294 +#endif
48295 Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.h
48296 ===================================================================
48297 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
48298 +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.h 2008-02-20 09:32:49.000000000 +0100
48299 @@ -0,0 +1,227 @@
48300 +/****************************************************************************
48301 + * Solarflare driver for Xen network acceleration
48302 + *
48303 + * Copyright 2006-2008: Solarflare Communications Inc,
48304 + * 9501 Jeronimo Road, Suite 250,
48305 + * Irvine, CA 92618, USA
48306 + *
48307 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
48308 + *
48309 + * This program is free software; you can redistribute it and/or modify it
48310 + * under the terms of the GNU General Public License version 2 as published
48311 + * by the Free Software Foundation, incorporated herein by reference.
48312 + *
48313 + * This program is distributed in the hope that it will be useful,
48314 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
48315 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48316 + * GNU General Public License for more details.
48317 + *
48318 + * You should have received a copy of the GNU General Public License
48319 + * along with this program; if not, write to the Free Software
48320 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
48321 + ****************************************************************************
48322 + */
48323 +
48324 +/*
48325 + * A cuckoo hash table consists of two sub tables. Each entry can
48326 + * hash to a position in each table. If, on entry, its position is
48327 + * found to be occupied, the existing element is moved to it's other
48328 + * location. This recurses until success or a loop is found. If a
48329 + * loop is found the table is rehashed.
48330 + *
48331 + * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
48332 + */
48333 +
48334 +#ifndef NET_ACCEL_CUCKOO_HASH_H
48335 +#define NET_ACCEL_CUCKOO_HASH_H
48336 +
48337 +/*! Type used for hash table keys of ip pairs */
48338 +typedef struct {
48339 + u32 local_ip;
48340 + //u32 remote_ip;
48341 + u16 local_port;
48342 + //u16 remote_port;
48343 + /* Technically only 1 bit, but use 16 to make key a round
48344 + number size */
48345 + u16 proto;
48346 +} cuckoo_hash_ip_key;
48347 +
48348 +/*! Type used for hash table keys of mac addresses */
48349 +typedef u64 cuckoo_hash_mac_key;
48350 +
48351 +/*! This type is designed to be large enough to hold all supported key
48352 + * sizes to avoid having to malloc storage for them.
48353 + */
48354 +typedef u64 cuckoo_hash_key;
48355 +
48356 +/*! Type used for the values stored in the hash table */
48357 +typedef int cuckoo_hash_value;
48358 +
48359 +/*! Type used for the hash used to index the table */
48360 +typedef u32 cuckoo_hash;
48361 +
48362 +/*! How long to spend displacing values when adding before giving up
48363 + * and rehashing */
48364 +#define CUCKOO_HASH_MAX_LOOP (hashtab->length)
48365 +
48366 +/*! State of hash table entry */
48367 +typedef enum {
48368 + CUCKOO_HASH_STATE_VACANT = 0,
48369 + CUCKOO_HASH_STATE_OCCUPIED
48370 +} cuckoo_hash_state;
48371 +
48372 +/*! An entry in the hash table */
48373 +typedef struct {
48374 + cuckoo_hash_state state;
48375 + cuckoo_hash_key key;
48376 + cuckoo_hash_value value;
48377 +} cuckoo_hash_entry;
48378 +
48379 +/*! A cuckoo hash table */
48380 +typedef struct {
48381 + /*! The length of each table (NB. there are two tables of this
48382 + * length) */
48383 + unsigned length;
48384 + /*! The length of each table in bits */
48385 + unsigned length_bits;
48386 + /*! The length of the key in bytes */
48387 + unsigned key_length;
48388 + /*! The number of entries currently stored in the table */
48389 + unsigned entries;
48390 + /*! Index into table used by cuckoo_hash_iterate */
48391 + unsigned iterate_index;
48392 +
48393 + /* parameter of hash functions */
48394 + /*! The "a" parameter of the first hash function */
48395 + cuckoo_hash_key a0;
48396 + /*! The "a" parameter of the second hash function */
48397 + cuckoo_hash_key a1;
48398 +
48399 + /*! The first table */
48400 + cuckoo_hash_entry *table0;
48401 + /*! The second table */
48402 + cuckoo_hash_entry *table1;
48403 +} cuckoo_hash_table;
48404 +
48405 +/*! Initialise the cuckoo has table
48406 + *
48407 + * \param hashtab A pointer to an unitialised hash table structure
48408 + * \param length_bits The number of elements in each table equals
48409 + * 2**length_bits
48410 + * \param key_length The length of the key in bytes
48411 + *
48412 + * \return 0 on success, -ENOMEM if it couldn't allocate the tables
48413 + */
48414 +extern
48415 +int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
48416 + unsigned key_length);
48417 +
48418 +
48419 +/*! Destroy a hash table
48420 + *
48421 + * \param hashtab A hash table that has previously been passed to a
48422 + * successful call of cuckoo_hash_init()
48423 + */
48424 +extern
48425 +void cuckoo_hash_destroy(cuckoo_hash_table *hashtab);
48426 +
48427 +
48428 +/*! Lookup an entry in the hash table
48429 + *
48430 + * \param hashtab The hash table in which to look.
48431 + * \param key Pointer to a mac address to use as the key
48432 + * \param value On exit set to the value stored if key was present
48433 + *
48434 + * \return 0 if not present in the table, non-zero if it is (and value
48435 + * is set accordingly)
48436 + */
48437 +extern
48438 +int cuckoo_hash_lookup(cuckoo_hash_table *hashtab,
48439 + cuckoo_hash_key *key,
48440 + cuckoo_hash_value *value);
48441 +
48442 +/*! Add an entry to the hash table. Key must not be a duplicate of
48443 + * anything already in the table. If this is a risk, see
48444 + * cuckoo_hash_add_check
48445 + *
48446 + * \param hashtab The hash table to add the entry to
48447 + * \param key Pointer to a mac address to use as a key
48448 + * \param value The value to store
48449 + * \param can_rehash Flag to allow the add function to rehash the
48450 + * table if necessary
48451 + *
48452 + * \return 0 on success, non-zero on failure. -ENOSPC means it just
48453 + * couldn't find anywhere to put it - this is bad and probably means
48454 + * an entry has been dropped on the floor (but the entry you just
48455 + * tried to add may now be included)
48456 + */
48457 +extern
48458 +int cuckoo_hash_add(cuckoo_hash_table *hashtab,
48459 + cuckoo_hash_key *key,
48460 + cuckoo_hash_value value,
48461 + int can_rehash);
48462 +
48463 +/*! Same as cuckoo_hash_add but first checks to ensure entry is not
48464 + * already there
48465 + * \return -EBUSY if already there
48466 + */
48467 +
48468 +extern
48469 +int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
48470 + cuckoo_hash_key *key,
48471 + cuckoo_hash_value value,
48472 + int can_rehash);
48473 +/*! Remove an entry from the table
48474 + *
48475 + * \param hashtab The hash table to remove the entry from
48476 + * \param key The key that was used to previously add the entry
48477 + *
48478 + * \return 0 on success, -EINVAL if the entry couldn't be found
48479 + */
48480 +extern
48481 +int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key);
48482 +
48483 +
48484 +/*! Helper for those using mac addresses to convert to a key for the
48485 + * hash table
48486 + */
48487 +static inline cuckoo_hash_mac_key cuckoo_mac_to_key(const u8 *mac)
48488 +{
48489 + return (cuckoo_hash_mac_key)(mac[0])
48490 + | (cuckoo_hash_mac_key)(mac[1]) << 8
48491 + | (cuckoo_hash_mac_key)(mac[2]) << 16
48492 + | (cuckoo_hash_mac_key)(mac[3]) << 24
48493 + | (cuckoo_hash_mac_key)(mac[4]) << 32
48494 + | (cuckoo_hash_mac_key)(mac[5]) << 40;
48495 +}
48496 +
48497 +
48498 +/*! Update an entry already in the hash table to take a new value
48499 + *
48500 + * \param hashtab The hash table to add the entry to
48501 + * \param key Pointer to a mac address to use as a key
48502 + * \param value The value to store
48503 + *
48504 + * \return 0 on success, non-zero on failure.
48505 + */
48506 +int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
48507 + cuckoo_hash_value value);
48508 +
48509 +
48510 +/*! Go through the hash table and return all used entries (one per call)
48511 + *
48512 + * \param hashtab The hash table to iterate over
48513 + * \param key Pointer to a key to take the returned key
48514 + * \param value Pointer to a value to take the returned value
48515 + *
48516 + * \return 0 on success (key, value set), non-zero on failure.
48517 + */
48518 +int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
48519 + cuckoo_hash_key *key, cuckoo_hash_value *value);
48520 +void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab);
48521 +
48522 +/* debug, not compiled by default */
48523 +void cuckoo_hash_valid(cuckoo_hash_table *hashtab);
48524 +void cuckoo_hash_dump(cuckoo_hash_table *hashtab);
48525 +
48526 +#endif /* NET_ACCEL_CUCKOO_HASH_H */
48527 Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.c
48528 ===================================================================
48529 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
48530 +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.c 2008-02-20 09:32:49.000000000 +0100
48531 @@ -0,0 +1,301 @@
48532 +/****************************************************************************
48533 + * Solarflare driver for Xen network acceleration
48534 + *
48535 + * Copyright 2006-2008: Solarflare Communications Inc,
48536 + * 9501 Jeronimo Road, Suite 250,
48537 + * Irvine, CA 92618, USA
48538 + *
48539 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
48540 + *
48541 + * This program is free software; you can redistribute it and/or modify it
48542 + * under the terms of the GNU General Public License version 2 as published
48543 + * by the Free Software Foundation, incorporated herein by reference.
48544 + *
48545 + * This program is distributed in the hope that it will be useful,
48546 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
48547 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48548 + * GNU General Public License for more details.
48549 + *
48550 + * You should have received a copy of the GNU General Public License
48551 + * along with this program; if not, write to the Free Software
48552 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
48553 + ****************************************************************************
48554 + */
48555 +
48556 +#include <xen/evtchn.h>
48557 +
48558 +#include "accel_util.h"
48559 +#include "accel_msg_iface.h"
48560 +
48561 +#define NET_ACCEL_MSG_Q_SIZE (1024)
48562 +#define NET_ACCEL_MSG_Q_MASK (NET_ACCEL_MSG_Q_SIZE - 1)
48563 +
48564 +#ifdef NDEBUG
48565 +#define NET_ACCEL_CHECK_MAGIC(_p, _errval)
48566 +#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id)
48567 +#else
48568 +#define NET_ACCEL_CHECK_MAGIC(_p, _errval) \
48569 + if (_p->magic != NET_ACCEL_MSG_MAGIC) { \
48570 + printk(KERN_ERR "%s: passed invalid shared page %p!\n", \
48571 + __FUNCTION__, _p); \
48572 + return _errval; \
48573 + }
48574 +#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id) \
48575 + printk(_t ": queue %d write %x read %x base %x limit %x\n", \
48576 + _id, _q->write, _q->read, _q->base, _q->limit);
48577 +#endif
48578 +
48579 +/*
48580 + * We've been passed at least 2 pages. 1 control page and 1 or more
48581 + * data pages.
48582 + */
48583 +int net_accel_msg_init_page(void *mem, int len, int up)
48584 +{
48585 + struct net_accel_shared_page *shared_page =
48586 + (struct net_accel_shared_page*)mem;
48587 +
48588 + if ((unsigned long)shared_page & NET_ACCEL_MSG_Q_MASK)
48589 + return -EINVAL;
48590 +
48591 + shared_page->magic = NET_ACCEL_MSG_MAGIC;
48592 +
48593 + shared_page->aflags = 0;
48594 +
48595 + shared_page->net_dev_up = up;
48596 +
48597 + return 0;
48598 +}
48599 +EXPORT_SYMBOL_GPL(net_accel_msg_init_page);
48600 +
48601 +
48602 +void net_accel_msg_init_queue(sh_msg_fifo2 *queue,
48603 + struct net_accel_msg_queue *indices,
48604 + struct net_accel_msg *base, int size)
48605 +{
48606 + queue->fifo = base;
48607 + spin_lock_init(&queue->lock);
48608 + sh_fifo2_init(queue, size-1, &indices->read, &indices->write);
48609 +}
48610 +EXPORT_SYMBOL_GPL(net_accel_msg_init_queue);
48611 +
48612 +
48613 +static inline int _net_accel_msg_send(struct net_accel_shared_page *sp,
48614 + sh_msg_fifo2 *queue,
48615 + struct net_accel_msg *msg,
48616 + int is_reply)
48617 +{
48618 + int rc = 0;
48619 + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48620 + rmb();
48621 + if (is_reply) {
48622 + EPRINTK_ON(sh_fifo2_is_full(queue));
48623 + sh_fifo2_put(queue, *msg);
48624 + } else {
48625 + if (sh_fifo2_not_half_full(queue)) {
48626 + sh_fifo2_put(queue, *msg);
48627 + } else {
48628 + rc = -ENOSPC;
48629 + }
48630 + }
48631 + wmb();
48632 + return rc;
48633 +}
48634 +
48635 +/* Notify after a batch of messages have been sent */
48636 +void net_accel_msg_notify(int irq)
48637 +{
48638 + notify_remote_via_irq(irq);
48639 +}
48640 +EXPORT_SYMBOL_GPL(net_accel_msg_notify);
48641 +
48642 +/*
48643 + * Send a message on the specified FIFO. Returns 0 on success, -errno
48644 + * on failure. The message in msg is copied to the current slot of the
48645 + * FIFO.
48646 + */
48647 +int net_accel_msg_send(struct net_accel_shared_page *sp, sh_msg_fifo2 *q,
48648 + struct net_accel_msg *msg)
48649 +{
48650 + unsigned long flags;
48651 + int rc;
48652 + net_accel_msg_lock_queue(q, &flags);
48653 + rc = _net_accel_msg_send(sp, q, msg, 0);
48654 + net_accel_msg_unlock_queue(q, &flags);
48655 + return rc;
48656 +}
48657 +EXPORT_SYMBOL_GPL(net_accel_msg_send);
48658 +
48659 +
48660 +/* As net_accel_msg_send but also posts a notification to the far end. */
48661 +int net_accel_msg_send_notify(struct net_accel_shared_page *sp, int irq,
48662 + sh_msg_fifo2 *q, struct net_accel_msg *msg)
48663 +{
48664 + unsigned long flags;
48665 + int rc;
48666 + net_accel_msg_lock_queue(q, &flags);
48667 + rc = _net_accel_msg_send(sp, q, msg, 0);
48668 + net_accel_msg_unlock_queue(q, &flags);
48669 + if (rc >= 0)
48670 + notify_remote_via_irq(irq);
48671 + return rc;
48672 +}
48673 +EXPORT_SYMBOL_GPL(net_accel_msg_send_notify);
48674 +
48675 +
48676 +int net_accel_msg_reply(struct net_accel_shared_page *sp, sh_msg_fifo2 *q,
48677 + struct net_accel_msg *msg)
48678 +{
48679 + unsigned long flags;
48680 + int rc;
48681 + net_accel_msg_lock_queue(q, &flags);
48682 + rc = _net_accel_msg_send(sp, q, msg, 1);
48683 + net_accel_msg_unlock_queue(q, &flags);
48684 + return rc;
48685 +}
48686 +EXPORT_SYMBOL_GPL(net_accel_msg_reply);
48687 +
48688 +
48689 +/* As net_accel_msg_send but also posts a notification to the far end. */
48690 +int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, int irq,
48691 + sh_msg_fifo2 *q, struct net_accel_msg *msg)
48692 +{
48693 + unsigned long flags;
48694 + int rc;
48695 + net_accel_msg_lock_queue(q, &flags);
48696 + rc = _net_accel_msg_send(sp, q, msg, 1);
48697 + net_accel_msg_unlock_queue(q, &flags);
48698 + if (rc >= 0)
48699 + notify_remote_via_irq(irq);
48700 + return rc;
48701 +}
48702 +EXPORT_SYMBOL_GPL(net_accel_msg_reply_notify);
48703 +
48704 +
48705 +/*
48706 + * Look at a received message, if any, so a decision can be made about
48707 + * whether to read it now or not. Cookie is a bit of debug which is
48708 + * set here and checked when passed to net_accel_msg_recv_next()
48709 + */
48710 +int net_accel_msg_peek(struct net_accel_shared_page *sp,
48711 + sh_msg_fifo2 *queue,
48712 + struct net_accel_msg *msg, int *cookie)
48713 +{
48714 + unsigned long flags;
48715 + int rc = 0;
48716 + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48717 + net_accel_msg_lock_queue(queue, &flags);
48718 + rmb();
48719 + if (sh_fifo2_is_empty(queue)) {
48720 + rc = -ENOENT;
48721 + } else {
48722 + *msg = sh_fifo2_peek(queue);
48723 + *cookie = *(queue->fifo_rd_i);
48724 + }
48725 + net_accel_msg_unlock_queue(queue, &flags);
48726 + return rc;
48727 +}
48728 +EXPORT_SYMBOL_GPL(net_accel_msg_peek);
48729 +
48730 +
48731 +/*
48732 + * Move the queue onto the next element, used after finished with a
48733 + * peeked msg
48734 + */
48735 +int net_accel_msg_recv_next(struct net_accel_shared_page *sp,
48736 + sh_msg_fifo2 *queue, int cookie)
48737 +{
48738 + unsigned long flags;
48739 + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48740 + net_accel_msg_lock_queue(queue, &flags);
48741 + rmb();
48742 + /* Mustn't be empty */
48743 + BUG_ON(sh_fifo2_is_empty(queue));
48744 + /*
48745 + * Check cookie matches, i.e. we're advancing over the same message
48746 + * as was got using peek
48747 + */
48748 + BUG_ON(cookie != *(queue->fifo_rd_i));
48749 + sh_fifo2_rd_next(queue);
48750 + wmb();
48751 + net_accel_msg_unlock_queue(queue, &flags);
48752 + return 0;
48753 +}
48754 +EXPORT_SYMBOL_GPL(net_accel_msg_recv_next);
48755 +
48756 +
48757 +/*
48758 + * Receive a message on the specified FIFO. Returns 0 on success,
48759 + * -errno on failure.
48760 + */
48761 +int net_accel_msg_recv(struct net_accel_shared_page *sp, sh_msg_fifo2 *queue,
48762 + struct net_accel_msg *msg)
48763 +{
48764 + unsigned long flags;
48765 + int rc = 0;
48766 + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
48767 + net_accel_msg_lock_queue(queue, &flags);
48768 + rmb();
48769 + if (sh_fifo2_is_empty(queue)) {
48770 + rc = -ENOENT;
48771 + } else {
48772 + sh_fifo2_get(queue, msg);
48773 + }
48774 + wmb();
48775 + net_accel_msg_unlock_queue(queue, &flags);
48776 + return rc;
48777 +}
48778 +EXPORT_SYMBOL_GPL(net_accel_msg_recv);
48779 +
48780 +
48781 +/*
48782 + * Start sending a message without copying. returns a pointer to a message
48783 + * that will be filled out in place. The queue is locked until the message
48784 + * is sent.
48785 + */
48786 +struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp,
48787 + sh_msg_fifo2 *queue, unsigned long *flags)
48788 +{
48789 + struct net_accel_msg *msg;
48790 + NET_ACCEL_CHECK_MAGIC(sp, NULL);
48791 + net_accel_msg_lock_queue(queue, flags);
48792 + rmb();
48793 + if (sh_fifo2_not_half_full(queue)) {
48794 + msg = sh_fifo2_pokep(queue);
48795 + } else {
48796 + net_accel_msg_unlock_queue(queue, flags);
48797 + msg = NULL;
48798 + }
48799 + return msg;
48800 +}
48801 +EXPORT_SYMBOL_GPL(net_accel_msg_start_send);
48802 +
48803 +
48804 +static inline void _msg_complete(struct net_accel_shared_page *sp,
48805 + sh_msg_fifo2 *queue,
48806 + unsigned long *flags)
48807 +{
48808 + sh_fifo2_wr_next(queue);
48809 + net_accel_msg_unlock_queue(queue, flags);
48810 +}
48811 +
48812 +/*
48813 + * Complete the sending of a message started with net_accel_msg_start_send. The
48814 + * message is implicit since the queue was locked by _start
48815 + */
48816 +void net_accel_msg_complete_send(struct net_accel_shared_page *sp,
48817 + sh_msg_fifo2 *queue,
48818 + unsigned long *flags)
48819 +{
48820 + _msg_complete(sp, queue, flags);
48821 +}
48822 +EXPORT_SYMBOL_GPL(net_accel_msg_complete_send);
48823 +
48824 +/* As net_accel_msg_complete_send but does the notify. */
48825 +void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp,
48826 + sh_msg_fifo2 *queue,
48827 + unsigned long *flags, int irq)
48828 +{
48829 + _msg_complete(sp, queue, flags);
48830 + notify_remote_via_irq(irq);
48831 +}
48832 +EXPORT_SYMBOL_GPL(net_accel_msg_complete_send_notify);
48833 Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.h
48834 ===================================================================
48835 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
48836 +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.h 2008-02-20 09:32:49.000000000 +0100
48837 @@ -0,0 +1,414 @@
48838 +/****************************************************************************
48839 + * Solarflare driver for Xen network acceleration
48840 + *
48841 + * Copyright 2006-2008: Solarflare Communications Inc,
48842 + * 9501 Jeronimo Road, Suite 250,
48843 + * Irvine, CA 92618, USA
48844 + *
48845 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
48846 + *
48847 + * This program is free software; you can redistribute it and/or modify it
48848 + * under the terms of the GNU General Public License version 2 as published
48849 + * by the Free Software Foundation, incorporated herein by reference.
48850 + *
48851 + * This program is distributed in the hope that it will be useful,
48852 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
48853 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48854 + * GNU General Public License for more details.
48855 + *
48856 + * You should have received a copy of the GNU General Public License
48857 + * along with this program; if not, write to the Free Software
48858 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
48859 + ****************************************************************************
48860 + */
48861 +
48862 +#ifndef NET_ACCEL_MSG_IFACE_H
48863 +#define NET_ACCEL_MSG_IFACE_H
48864 +
48865 +#include <linux/ip.h>
48866 +#include <linux/tcp.h>
48867 +#include <linux/udp.h>
48868 +#include <linux/in.h>
48869 +#include <linux/netdevice.h>
48870 +#include <linux/etherdevice.h>
48871 +
48872 +#include "accel_shared_fifo.h"
48873 +
48874 +#define NET_ACCEL_MSG_MAGIC (0x85465479)
48875 +
48876 +/*! We talk version 0.010 of the interdomain protocol */
48877 +#define NET_ACCEL_MSG_VERSION (0x00001000)
48878 +
48879 +/*! Shared memory portion of inter-domain FIFO */
48880 +struct net_accel_msg_queue {
48881 + u32 read;
48882 + u32 write;
48883 +};
48884 +
48885 +
48886 +/*
48887 + * The aflags in the following structure is used as follows:
48888 + *
48889 + * - each bit is set when one of the corresponding variables is
48890 + * changed by either end.
48891 + *
48892 + * - the end that has made the change then forwards an IRQ to the
48893 + * other
48894 + *
48895 + * - the IRQ handler deals with these bits either on the fast path, or
48896 + * for less common changes, by jumping onto the slow path.
48897 + *
48898 + * - once it has seen a change, it clears the relevant bit.
48899 + *
48900 + * aflags is accessed atomically using clear_bit, test_bit,
48901 + * test_and_set_bit etc
48902 + */
48903 +
48904 +/*
48905 + * The following used to signify to the other domain when the queue
48906 + * they want to use is full, and when it is no longer full. Could be
48907 + * compressed to use fewer bits but done this way for simplicity and
48908 + * clarity
48909 + */
48910 +
48911 +/* "dom0->domU queue" is full */
48912 +#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL 0x1
48913 +#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B 0
48914 +/* "dom0->domU queue" is not full */
48915 +#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL 0x2
48916 +#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B 1
48917 +/* "domU->dom0 queue" is full */
48918 +#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL 0x4
48919 +#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B 2
48920 +/* "domU->dom0 queue" is not full */
48921 +#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL 0x8
48922 +#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B 3
48923 +/* dom0 -> domU net_dev up/down events */
48924 +#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN 0x10
48925 +#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B 4
48926 +
48927 +/*
48928 + * Masks used to test if there are any messages for domU and dom0
48929 + * respectively
48930 + */
48931 +#define NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK \
48932 + (NET_ACCEL_MSG_AFLAGS_QUEUE0FULL | \
48933 + NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL | \
48934 + NET_ACCEL_MSG_AFLAGS_NETUPDOWN)
48935 +#define NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK \
48936 + (NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL | \
48937 + NET_ACCEL_MSG_AFLAGS_QUEUEUFULL)
48938 +
48939 +/*! The shared data structure used for inter-VM communication. */
48940 +struct net_accel_shared_page {
48941 + /*! Sanity check */
48942 + u32 magic;
48943 + /*! Used by host/Dom0 */
48944 + struct net_accel_msg_queue queue0;
48945 + /*! Used by guest/DomU */
48946 + struct net_accel_msg_queue queue1;
48947 + /*! Atomic flags, used to communicate simple state changes */
48948 + u32 aflags;
48949 + /*! State of net_dev used for acceleration */
48950 + u32 net_dev_up;
48951 +};
48952 +
48953 +
48954 +enum net_accel_hw_type {
48955 + /*! Not a virtualisable NIC: use slow path. */
48956 + NET_ACCEL_MSG_HWTYPE_NONE = 0,
48957 + /*! NIC is Falcon-based */
48958 + NET_ACCEL_MSG_HWTYPE_FALCON_A = 1,
48959 + NET_ACCEL_MSG_HWTYPE_FALCON_B = 2,
48960 +};
48961 +
48962 +/*! The maximum number of pages used by an event queue. */
48963 +#define EF_HW_FALCON_EVQ_PAGES 8
48964 +
48965 +struct net_accel_hw_falcon_b {
48966 + /* VI */
48967 + /*! Grant for Tx DMA Q */
48968 + u32 txdmaq_gnt;
48969 + /*! Grant for Rx DMA Q */
48970 + u32 rxdmaq_gnt;
48971 + /*! Machine frame number for Tx/Rx doorbell page */
48972 + u32 doorbell_mfn;
48973 + /*! Grant for Tx/Rx doorbell page */
48974 + u32 doorbell_gnt;
48975 +
48976 + /* Event Q */
48977 + /*! Grants for the pages of the EVQ */
48978 + u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES];
48979 + u32 evq_offs;
48980 + /*! log2(pages in event Q) */
48981 + u32 evq_order;
48982 + /*! Capacity in events */
48983 + u32 evq_capacity;
48984 + /*! Eventq pointer register physical address */
48985 + u32 evq_rptr;
48986 + /*! Interface instance */
48987 + u32 instance;
48988 + /*! Capacity of RX queue */
48989 + u32 rx_capacity;
48990 + /*! Capacity of TX queue */
48991 + u32 tx_capacity;
48992 +
48993 + /* NIC */
48994 + s32 nic_arch;
48995 + s32 nic_revision;
48996 + u8 nic_variant;
48997 +};
48998 +
48999 +struct net_accel_hw_falcon_a {
49000 + struct net_accel_hw_falcon_b common;
49001 + u32 evq_rptr_gnt;
49002 +};
49003 +
49004 +
49005 +/*! Description of the hardware that the DomU is being given. */
49006 +struct net_accel_msg_hw {
49007 + u32 type; /*!< Hardware type */
49008 + union {
49009 + struct net_accel_hw_falcon_a falcon_a;
49010 + struct net_accel_hw_falcon_b falcon_b;
49011 + } resources;
49012 +};
49013 +
49014 +/*! Start-of-day handshake message. Dom0 fills in its version and
49015 + * sends, DomU checks, inserts its version and replies
49016 + */
49017 +struct net_accel_msg_hello {
49018 + /*! Sender's version (set by each side in turn) */
49019 + u32 version;
49020 + /*! max pages allocated/allowed for buffers */
49021 + u32 max_pages;
49022 +};
49023 +
49024 +/*! Maximum number of page requests that can fit in a message. */
49025 +#define NET_ACCEL_MSG_MAX_PAGE_REQ (8)
49026 +
49027 +/*! Request for NIC buffers. DomU fils out pages and grants (and
49028 + * optionally) reqid, dom0 fills out buf and sends reply
49029 + */
49030 +struct net_accel_msg_map_buffers {
49031 + u32 reqid; /*!< Optional request ID */
49032 + u32 pages; /*!< Number of pages to map */
49033 + u32 grants[NET_ACCEL_MSG_MAX_PAGE_REQ]; /*!< Grant ids to map */
49034 + u32 buf; /*!< NIC buffer address of pages obtained */
49035 +};
49036 +
49037 +/*! Notification of a change to local mac address, used to filter
49038 + locally destined packets off the fast path */
49039 +struct net_accel_msg_localmac {
49040 + u32 flags; /*!< Should this be added or removed? */
49041 + u8 mac[ETH_ALEN]; /*!< The mac address to filter onto slow path */
49042 +};
49043 +
49044 +struct net_accel_msg_fastpath {
49045 + u32 flags; /*!< Should this be added or removed? */
49046 + u8 mac[ETH_ALEN];/*!< The mac address to filter onto fast path */
49047 + u16 port; /*!< The port of the connection */
49048 + u32 ip; /*!< The IP address of the connection */
49049 + u8 proto; /*!< The protocol of connection (TCP/UDP) */
49050 +};
49051 +
49052 +/*! Values for struct ef_msg_localmac/fastpath.flags */
49053 +#define NET_ACCEL_MSG_ADD 0x1
49054 +#define NET_ACCEL_MSG_REMOVE 0x2
49055 +
49056 +/*! Overall message structure */
49057 +struct net_accel_msg {
49058 + /*! ID specifying type of messge */
49059 + u32 id;
49060 + union {
49061 + /*! handshake */
49062 + struct net_accel_msg_hello hello;
49063 + /*! hardware description */
49064 + struct net_accel_msg_hw hw;
49065 + /*! buffer map request */
49066 + struct net_accel_msg_map_buffers mapbufs;
49067 + /*! mac address of a local interface */
49068 + struct net_accel_msg_localmac localmac;
49069 + /*! address of a new fastpath connection */
49070 + struct net_accel_msg_fastpath fastpath;
49071 + /*! make the message a fixed size */
49072 + u8 pad[128 - sizeof(u32)];
49073 + } u;
49074 +};
49075 +
49076 +
49077 +#define NET_ACCEL_MSG_HW_TO_MSG(_u) container_of(_u, struct net_accel_msg, u.hw)
49078 +
49079 +/*! Inter-domain message FIFO */
49080 +typedef struct {
49081 + struct net_accel_msg *fifo;
49082 + u32 fifo_mask;
49083 + u32 *fifo_rd_i;
49084 + u32 *fifo_wr_i;
49085 + spinlock_t lock;
49086 + u32 is_locked; /* Debug flag */
49087 +} sh_msg_fifo2;
49088 +
49089 +
49090 +#define NET_ACCEL_MSG_OFFSET_MASK PAGE_MASK
49091 +
49092 +/* Modifiers */
49093 +#define NET_ACCEL_MSG_REPLY (0x80000000)
49094 +#define NET_ACCEL_MSG_ERROR (0x40000000)
49095 +
49096 +/* Dom0 -> DomU and reply. Handshake/version check. */
49097 +#define NET_ACCEL_MSG_HELLO (0x00000001)
49098 +/* Dom0 -> DomU : hardware setup (VI info.) */
49099 +#define NET_ACCEL_MSG_SETHW (0x00000002)
49100 +/*
49101 + * Dom0 -> DomU. Notification of a local mac to add/remove from slow
49102 + * path filter
49103 + */
49104 +#define NET_ACCEL_MSG_LOCALMAC (0x00000003)
49105 +/*
49106 + * DomU -> Dom0 and reply. Request for buffer table entries for
49107 + * preallocated pages.
49108 + */
49109 +#define NET_ACCEL_MSG_MAPBUF (0x00000004)
49110 +/*
49111 + * Dom0 -> DomU. Notification of a local mac to add/remove from fast
49112 + * path filter
49113 + */
49114 +#define NET_ACCEL_MSG_FASTPATH (0x00000005)
49115 +
49116 +/*! Initialise a message and set the type
49117 + * \param message : the message
49118 + * \param code : the message type
49119 + */
49120 +static inline void net_accel_msg_init(struct net_accel_msg *msg, int code) {
49121 + msg->id = (u32)code;
49122 +}
49123 +
49124 +/*! initialise a shared page structure
49125 + * \param shared_page : mapped memory in which the structure resides
49126 + * \param len : size of the message FIFO area that follows
49127 + * \param up : initial up/down state of netdev
49128 + * \return 0 or an error code
49129 + */
49130 +extern int net_accel_msg_init_page(void *shared_page, int len, int up);
49131 +
49132 +/*! initialise a message queue
49133 + * \param queue : the message FIFO to initialise
49134 + * \param indices : the read and write indices in shared memory
49135 + * \param base : the start of the memory area for the FIFO
49136 + * \param size : the size of the FIFO in bytes
49137 + */
49138 +extern void net_accel_msg_init_queue(sh_msg_fifo2 *queue,
49139 + struct net_accel_msg_queue *indices,
49140 + struct net_accel_msg *base, int size);
49141 +
49142 +/* Notify after a batch of messages have been sent */
49143 +extern void net_accel_msg_notify(int irq);
49144 +
49145 +/*! Send a message on the specified FIFO. The message is copied to the
49146 + * current slot of the FIFO.
49147 + * \param sp : pointer to shared page
49148 + * \param q : pointer to message FIFO to use
49149 + * \param msg : pointer to message
49150 + * \return 0 on success, -errno on
49151 + */
49152 +extern int net_accel_msg_send(struct net_accel_shared_page *sp,
49153 + sh_msg_fifo2 *q,
49154 + struct net_accel_msg *msg);
49155 +extern int net_accel_msg_reply(struct net_accel_shared_page *sp,
49156 + sh_msg_fifo2 *q,
49157 + struct net_accel_msg *msg);
49158 +
49159 +/*! As net_accel_msg_send but also posts a notification to the far end. */
49160 +extern int net_accel_msg_send_notify(struct net_accel_shared_page *sp,
49161 + int irq, sh_msg_fifo2 *q,
49162 + struct net_accel_msg *msg);
49163 +/*! As net_accel_msg_send but also posts a notification to the far end. */
49164 +extern int net_accel_msg_reply_notify(struct net_accel_shared_page *sp,
49165 + int irq, sh_msg_fifo2 *q,
49166 + struct net_accel_msg *msg);
49167 +
49168 +/*! Receive a message on the specified FIFO. Returns 0 on success,
49169 + * -errno on failure.
49170 + */
49171 +extern int net_accel_msg_recv(struct net_accel_shared_page *sp,
49172 + sh_msg_fifo2 *q,
49173 + struct net_accel_msg *msg);
49174 +
49175 +/*! Look at a received message, if any, so a decision can be made
49176 + * about whether to read it now or not. Cookie is a bit of debug
49177 + * which is set here and checked when passed to
49178 + * net_accel_msg_recv_next()
49179 + */
49180 +extern int net_accel_msg_peek(struct net_accel_shared_page *sp,
49181 + sh_msg_fifo2 *queue,
49182 + struct net_accel_msg *msg, int *cookie);
49183 +/*! Move the queue onto the next element, used after finished with a
49184 + * peeked msg
49185 + */
49186 +extern int net_accel_msg_recv_next(struct net_accel_shared_page *sp,
49187 + sh_msg_fifo2 *queue, int cookie);
49188 +
49189 +/*! Start sending a message without copying. returns a pointer to a
49190 + * message that will be filled out in place. The queue is locked
49191 + * until the message is sent.
49192 + */
49193 +extern
49194 +struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp,
49195 + sh_msg_fifo2 *queue,
49196 + unsigned long *flags);
49197 +
49198 +
49199 +/*! Complete the sending of a message started with
49200 + * net_accel_msg_start_send. The message is implicit since the queue
49201 + * was locked by _start
49202 + */
49203 +extern void net_accel_msg_complete_send(struct net_accel_shared_page *sp,
49204 + sh_msg_fifo2 *queue,
49205 + unsigned long *flags);
49206 +
49207 +/*! As net_accel_msg_complete_send but does the notify. */
49208 +extern void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp,
49209 + sh_msg_fifo2 *queue,
49210 + unsigned long *flags, int irq);
49211 +
49212 +/*! Lock the queue so that multiple "_locked" functions can be called
49213 + * without the queue being modified by others
49214 + */
49215 +static inline
49216 +void net_accel_msg_lock_queue(sh_msg_fifo2 *queue, unsigned long *flags)
49217 +{
49218 + spin_lock_irqsave(&queue->lock, (*flags));
49219 + rmb();
49220 + BUG_ON(queue->is_locked);
49221 + queue->is_locked = 1;
49222 +}
49223 +
49224 +/*! Unlock the queue */
49225 +static inline
49226 +void net_accel_msg_unlock_queue(sh_msg_fifo2 *queue, unsigned long *flags)
49227 +{
49228 + BUG_ON(!queue->is_locked);
49229 + queue->is_locked = 0;
49230 + wmb();
49231 + spin_unlock_irqrestore(&queue->lock, (*flags));
49232 +}
49233 +
49234 +/*! Give up without sending a message that was started with
49235 + * net_accel_msg_start_send()
49236 + */
49237 +static inline
49238 +void net_accel_msg_abort_send(struct net_accel_shared_page *sp,
49239 + sh_msg_fifo2 *queue, unsigned long *flags)
49240 +{
49241 + net_accel_msg_unlock_queue(queue, flags);
49242 +}
49243 +
49244 +/*! Test the queue to ensure there is sufficient space */
49245 +static inline
49246 +int net_accel_msg_check_space(sh_msg_fifo2 *queue, unsigned space)
49247 +{
49248 + return sh_fifo2_space(queue) >= space;
49249 +}
49250 +
49251 +#endif /* NET_ACCEL_MSG_IFACE_H */
49252 Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_shared_fifo.h
49253 ===================================================================
49254 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
49255 +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_shared_fifo.h 2008-02-20 09:32:49.000000000 +0100
49256 @@ -0,0 +1,127 @@
49257 +/****************************************************************************
49258 + * Solarflare driver for Xen network acceleration
49259 + *
49260 + * Copyright 2006-2008: Solarflare Communications Inc,
49261 + * 9501 Jeronimo Road, Suite 250,
49262 + * Irvine, CA 92618, USA
49263 + *
49264 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
49265 + *
49266 + * This program is free software; you can redistribute it and/or modify it
49267 + * under the terms of the GNU General Public License version 2 as published
49268 + * by the Free Software Foundation, incorporated herein by reference.
49269 + *
49270 + * This program is distributed in the hope that it will be useful,
49271 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
49272 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49273 + * GNU General Public License for more details.
49274 + *
49275 + * You should have received a copy of the GNU General Public License
49276 + * along with this program; if not, write to the Free Software
49277 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
49278 + ****************************************************************************
49279 + */
49280 +
49281 +#ifndef NET_ACCEL_SHARED_FIFO_H
49282 +#define NET_ACCEL_SHARED_FIFO_H
49283 +
49284 +/*
49285 + * This is based on fifo.h, but handles sharing between address spaces
49286 + * that don't trust each other, by splitting out the read and write
49287 + * indices. This costs at least one pointer indirection more than the
49288 + * vanilla version per access.
49289 + */
49290 +
49291 +typedef struct {
49292 + char* fifo;
49293 + unsigned fifo_mask;
49294 + unsigned *fifo_rd_i;
49295 + unsigned *fifo_wr_i;
49296 +} sh_byte_fifo2;
49297 +
49298 +#define SH_FIFO2_M(f, x) ((x) & ((f)->fifo_mask))
49299 +
49300 +static inline unsigned log2_ge(unsigned long n, unsigned min_order) {
49301 + unsigned order = min_order;
49302 + while((1ul << order) < n) ++order;
49303 + return order;
49304 +}
49305 +
49306 +static inline unsigned long pow2(unsigned order) {
49307 + return (1ul << order);
49308 +}
49309 +
49310 +#define is_pow2(x) (pow2(log2_ge((x), 0)) == (x))
49311 +
49312 +#define sh_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \
49313 + is_pow2((f)->fifo_mask+1u))
49314 +
49315 +#define sh_fifo2_init(f, cap, _rptr, _wptr) \
49316 + do { \
49317 + BUG_ON(!is_pow2((cap) + 1)); \
49318 + (f)->fifo_rd_i = _rptr; \
49319 + (f)->fifo_wr_i = _wptr; \
49320 + *(f)->fifo_rd_i = *(f)->fifo_wr_i = 0u; \
49321 + (f)->fifo_mask = (cap); \
49322 + } while(0)
49323 +
49324 +#define sh_fifo2_num(f) SH_FIFO2_M((f),*(f)->fifo_wr_i - *(f)->fifo_rd_i)
49325 +#define sh_fifo2_space(f) SH_FIFO2_M((f),*(f)->fifo_rd_i - *(f)->fifo_wr_i-1u)
49326 +#define sh_fifo2_is_empty(f) (sh_fifo2_num(f)==0)
49327 +#define sh_fifo2_not_empty(f) (sh_fifo2_num(f)!=0)
49328 +#define sh_fifo2_is_full(f) (sh_fifo2_space(f)==0u)
49329 +#define sh_fifo2_not_full(f) (sh_fifo2_space(f)!=0u)
49330 +#define sh_fifo2_buf_size(f) ((f)->fifo_mask + 1u)
49331 +#define sh_fifo2_capacity(f) ((f)->fifo_mask)
49332 +#define sh_fifo2_end(f) ((f)->fifo + sh_fifo2_buf_size(f))
49333 +#define sh_fifo2_not_half_full(f) (sh_fifo2_space(f) > (sh_fifo2_capacity(f) >> 1))
49334 +
49335 +#define sh_fifo2_peek(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i)])
49336 +#define sh_fifo2_peekp(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_rd_i))
49337 +#define sh_fifo2_poke(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i)])
49338 +#define sh_fifo2_pokep(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_wr_i))
49339 +#define sh_fifo2_peek_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i+(i))])
49340 +#define sh_fifo2_poke_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i+(i))])
49341 +
49342 +#define sh_fifo2_rd_next(f) \
49343 + do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + 1u;} while(0)
49344 +#define sh_fifo2_wr_next(f) \
49345 + do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + 1u;} while(0)
49346 +#define sh_fifo2_rd_adv(f, n) \
49347 + do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + (n);} while(0)
49348 +#define sh_fifo2_wr_adv(f, n) \
49349 + do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + (n);} while(0)
49350 +
49351 +#define sh_fifo2_put(f, v) \
49352 + do {sh_fifo2_poke(f) = (v); wmb(); sh_fifo2_wr_next(f);} while(0)
49353 +
49354 +#define sh_fifo2_get(f, pv) \
49355 + do {*(pv) = sh_fifo2_peek(f); mb(); sh_fifo2_rd_next(f);} while(0)
49356 +
49357 +static inline unsigned sh_fifo2_contig_num(sh_byte_fifo2 *f)
49358 +{
49359 + unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i);
49360 + unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i);
49361 +
49362 + return (fifo_wr_i >= fifo_rd_i)
49363 + ? fifo_wr_i - fifo_rd_i
49364 + : f->fifo_mask + 1u - *(f)->fifo_rd_i;
49365 +}
49366 +
49367 +static inline unsigned sh_fifo2_contig_space(sh_byte_fifo2 *f)
49368 +{
49369 + unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i);
49370 + unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i);
49371 +
49372 + return (fifo_rd_i > fifo_wr_i)
49373 + ? fifo_rd_i - fifo_wr_i - 1
49374 + : (f->fifo_mask + 1u - fifo_wr_i
49375 + /*
49376 + * The last byte can't be used if the read pointer
49377 + * is at zero.
49378 + */
49379 + - (fifo_rd_i==0));
49380 +}
49381 +
49382 +
49383 +#endif /* NET_ACCEL_SHARED_FIFO_H */
49384 Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_util.c
49385 ===================================================================
49386 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
49387 +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_util.c 2008-02-20 09:32:49.000000000 +0100
49388 @@ -0,0 +1,333 @@
49389 +/****************************************************************************
49390 + * Solarflare driver for Xen network acceleration
49391 + *
49392 + * Copyright 2006-2008: Solarflare Communications Inc,
49393 + * 9501 Jeronimo Road, Suite 250,
49394 + * Irvine, CA 92618, USA
49395 + *
49396 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
49397 + *
49398 + * This program is free software; you can redistribute it and/or modify it
49399 + * under the terms of the GNU General Public License version 2 as published
49400 + * by the Free Software Foundation, incorporated herein by reference.
49401 + *
49402 + * This program is distributed in the hope that it will be useful,
49403 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
49404 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49405 + * GNU General Public License for more details.
49406 + *
49407 + * You should have received a copy of the GNU General Public License
49408 + * along with this program; if not, write to the Free Software
49409 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
49410 + ****************************************************************************
49411 + */
49412 +
49413 +#include <linux/if_ether.h>
49414 +#include <asm/io.h>
49415 +#include <asm/pgtable.h>
49416 +#include <asm/hypercall.h>
49417 +#include <xen/xenbus.h>
49418 +#include <xen/driver_util.h>
49419 +#include <xen/gnttab.h>
49420 +
49421 +#include "accel_util.h"
49422 +
49423 +#ifdef EFX_GCOV
49424 +#include "gcov.h"
49425 +
49426 +static int __init net_accel_init(void)
49427 +{
49428 + gcov_provider_init(THIS_MODULE);
49429 + return 0;
49430 +}
49431 +module_init(net_accel_init);
49432 +
49433 +static void __exit net_accel_exit(void)
49434 +{
49435 + gcov_provider_fini(THIS_MODULE);
49436 +}
49437 +module_exit(net_accel_exit);
49438 +#endif
49439 +
49440 +/* Shutdown remote domain that is misbehaving */
49441 +int net_accel_shutdown_remote(int domain)
49442 +{
49443 + struct sched_remote_shutdown sched_shutdown = {
49444 + .domain_id = domain,
49445 + .reason = SHUTDOWN_crash
49446 + };
49447 +
49448 + EPRINTK("Crashing domain %d\n", domain);
49449 +
49450 + return HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &sched_shutdown);
49451 +}
49452 +EXPORT_SYMBOL(net_accel_shutdown_remote);
49453 +
49454 +
49455 +/* Based on xenbus_backend_client.c:xenbus_map_ring() */
49456 +static int net_accel_map_grant(struct xenbus_device *dev, int gnt_ref,
49457 + grant_handle_t *handle, void *vaddr,
49458 + u64 *dev_bus_addr, unsigned flags)
49459 +{
49460 + struct gnttab_map_grant_ref op;
49461 +
49462 + gnttab_set_map_op(&op, (unsigned long)vaddr, flags,
49463 + gnt_ref, dev->otherend_id);
49464 +
49465 + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
49466 +
49467 + if (op.status != GNTST_okay) {
49468 + xenbus_dev_error
49469 + (dev, op.status,
49470 + "failed mapping in shared page %d from domain %d\n",
49471 + gnt_ref, dev->otherend_id);
49472 + } else {
49473 + *handle = op.handle;
49474 + if (dev_bus_addr)
49475 + *dev_bus_addr = op.dev_bus_addr;
49476 + }
49477 +
49478 + return op.status;
49479 +}
49480 +
49481 +
49482 +/* Based on xenbus_backend_client.c:xenbus_unmap_ring() */
49483 +static int net_accel_unmap_grant(struct xenbus_device *dev,
49484 + grant_handle_t handle,
49485 + void *vaddr, u64 dev_bus_addr,
49486 + unsigned flags)
49487 +{
49488 + struct gnttab_unmap_grant_ref op;
49489 +
49490 + gnttab_set_unmap_op(&op, (unsigned long)vaddr, flags, handle);
49491 +
49492 + if (dev_bus_addr)
49493 + op.dev_bus_addr = dev_bus_addr;
49494 +
49495 + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
49496 +
49497 + if (op.status != GNTST_okay)
49498 + xenbus_dev_error(dev, op.status,
49499 + "failed unmapping page at handle %d error %d\n",
49500 + handle, op.status);
49501 +
49502 + return op.status;
49503 +}
49504 +
49505 +
49506 +int net_accel_map_device_page(struct xenbus_device *dev,
49507 + int gnt_ref, grant_handle_t *handle,
49508 + u64 *dev_bus_addr)
49509 +{
49510 + return net_accel_map_grant(dev, gnt_ref, handle, 0, dev_bus_addr,
49511 + GNTMAP_device_map);
49512 +}
49513 +EXPORT_SYMBOL_GPL(net_accel_map_device_page);
49514 +
49515 +
49516 +int net_accel_unmap_device_page(struct xenbus_device *dev,
49517 + grant_handle_t handle, u64 dev_bus_addr)
49518 +{
49519 + return net_accel_unmap_grant(dev, handle, 0, dev_bus_addr,
49520 + GNTMAP_device_map);
49521 +}
49522 +EXPORT_SYMBOL_GPL(net_accel_unmap_device_page);
49523 +
49524 +
49525 +struct net_accel_valloc_grant_mapping {
49526 + struct vm_struct *vm;
49527 + int pages;
49528 + grant_handle_t grant_handles[0];
49529 +};
49530 +
49531 +/* Map a series of grants into a contiguous virtual area */
49532 +static void *net_accel_map_grants_valloc(struct xenbus_device *dev,
49533 + unsigned *grants, int npages,
49534 + unsigned flags, void **priv)
49535 +{
49536 + struct net_accel_valloc_grant_mapping *map;
49537 + struct vm_struct *vm;
49538 + void *addr;
49539 + int i, j, rc;
49540 +
49541 + vm = alloc_vm_area(PAGE_SIZE * npages);
49542 + if (vm == NULL) {
49543 + EPRINTK("No memory from alloc_vm_area.\n");
49544 + return NULL;
49545 + }
49546 + /*
49547 + * Get a structure in which we will record all the info needed
49548 + * to undo the mapping.
49549 + */
49550 + map = kzalloc(sizeof(struct net_accel_valloc_grant_mapping) +
49551 + npages * sizeof(grant_handle_t), GFP_KERNEL);
49552 + if (map == NULL) {
49553 + EPRINTK("No memory for net_accel_valloc_grant_mapping\n");
49554 + free_vm_area(vm);
49555 + return NULL;
49556 + }
49557 + map->vm = vm;
49558 + map->pages = npages;
49559 +
49560 + /* Do the actual mapping */
49561 + addr = vm->addr;
49562 + for (i = 0; i < npages; i++) {
49563 + rc = net_accel_map_grant(dev, grants[i], map->grant_handles + i,
49564 + addr, NULL, flags);
49565 + if (rc != 0)
49566 + goto undo;
49567 + addr = (void*)((unsigned long)addr + PAGE_SIZE);
49568 + }
49569 +
49570 + if (priv)
49571 + *priv = (void *)map;
49572 + else
49573 + kfree(map);
49574 +
49575 + return vm->addr;
49576 +
49577 + undo:
49578 + EPRINTK("Aborting contig map due to single map failure %d (%d of %d)\n",
49579 + rc, i+1, npages);
49580 + for (j = 0; j < i; j++) {
49581 + addr = (void*)((unsigned long)vm->addr + (j * PAGE_SIZE));
49582 + net_accel_unmap_grant(dev, map->grant_handles[j], addr, 0,
49583 + flags);
49584 + }
49585 + free_vm_area(vm);
49586 + kfree(map);
49587 + return NULL;
49588 +}
49589 +
49590 +/* Undo the result of the mapping */
49591 +static void net_accel_unmap_grants_vfree(struct xenbus_device *dev,
49592 + unsigned flags, void *priv)
49593 +{
49594 + struct net_accel_valloc_grant_mapping *map =
49595 + (struct net_accel_valloc_grant_mapping *)priv;
49596 +
49597 + void *addr = map->vm->addr;
49598 + int npages = map->pages;
49599 + int i;
49600 +
49601 + for (i = 0; i < npages; i++) {
49602 + net_accel_unmap_grant(dev, map->grant_handles[i], addr, 0,
49603 + flags);
49604 + addr = (void*)((unsigned long)addr + PAGE_SIZE);
49605 + }
49606 + free_vm_area(map->vm);
49607 + kfree(map);
49608 +}
49609 +
49610 +
49611 +void *net_accel_map_grants_contig(struct xenbus_device *dev,
49612 + unsigned *grants, int npages,
49613 + void **priv)
49614 +{
49615 + return net_accel_map_grants_valloc(dev, grants, npages,
49616 + GNTMAP_host_map, priv);
49617 +}
49618 +EXPORT_SYMBOL(net_accel_map_grants_contig);
49619 +
49620 +
49621 +void net_accel_unmap_grants_contig(struct xenbus_device *dev,
49622 + void *priv)
49623 +{
49624 + net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv);
49625 +}
49626 +EXPORT_SYMBOL(net_accel_unmap_grants_contig);
49627 +
49628 +
49629 +void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref,
49630 + void **priv)
49631 +{
49632 + return net_accel_map_grants_valloc(dev, &gnt_ref, 1,
49633 + GNTMAP_host_map, priv);
49634 +}
49635 +EXPORT_SYMBOL(net_accel_map_iomem_page);
49636 +
49637 +
49638 +void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv)
49639 +{
49640 + net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv);
49641 +}
49642 +EXPORT_SYMBOL(net_accel_unmap_iomem_page);
49643 +
49644 +
49645 +int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn,
49646 + int is_iomem)
49647 +{
49648 + int err = gnttab_grant_foreign_access(dev->otherend_id, mfn,
49649 + is_iomem ? GTF_PCD : 0);
49650 + if (err < 0)
49651 + xenbus_dev_error(dev, err, "failed granting access to page\n");
49652 + return err;
49653 +}
49654 +EXPORT_SYMBOL_GPL(net_accel_grant_page);
49655 +
49656 +
49657 +int net_accel_ungrant_page(grant_ref_t gntref)
49658 +{
49659 + if (unlikely(gnttab_query_foreign_access(gntref) != 0)) {
49660 + EPRINTK("%s: remote domain still using grant %d\n", __FUNCTION__,
49661 + gntref);
49662 + return -EBUSY;
49663 + }
49664 +
49665 + gnttab_end_foreign_access(gntref, 0);
49666 + return 0;
49667 +}
49668 +EXPORT_SYMBOL_GPL(net_accel_ungrant_page);
49669 +
49670 +
49671 +int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
49672 +{
49673 + char *s, *e, *macstr;
49674 + int i;
49675 +
49676 + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
49677 + if (IS_ERR(macstr))
49678 + return PTR_ERR(macstr);
49679 +
49680 + for (i = 0; i < ETH_ALEN; i++) {
49681 + mac[i] = simple_strtoul(s, &e, 16);
49682 + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
49683 + kfree(macstr);
49684 + return -ENOENT;
49685 + }
49686 + s = e+1;
49687 + }
49688 +
49689 + kfree(macstr);
49690 + return 0;
49691 +}
49692 +EXPORT_SYMBOL_GPL(net_accel_xen_net_read_mac);
49693 +
49694 +
49695 +void net_accel_update_state(struct xenbus_device *dev, int state)
49696 +{
49697 + struct xenbus_transaction tr;
49698 + int err;
49699 +
49700 + DPRINTK("%s: setting accelstate to %s\n", __FUNCTION__,
49701 + xenbus_strstate(state));
49702 +
49703 + if (xenbus_exists(XBT_NIL, dev->nodename, "")) {
49704 + VPRINTK("%s: nodename %s\n", __FUNCTION__, dev->nodename);
49705 + again:
49706 + err = xenbus_transaction_start(&tr);
49707 + if (err == 0)
49708 + err = xenbus_printf(tr, dev->nodename, "accelstate",
49709 + "%d", state);
49710 + if (err != 0) {
49711 + xenbus_transaction_end(tr, 1);
49712 + } else {
49713 + err = xenbus_transaction_end(tr, 0);
49714 + if (err == -EAGAIN)
49715 + goto again;
49716 + }
49717 + }
49718 +}
49719 +EXPORT_SYMBOL_GPL(net_accel_update_state);
49720 +
49721 +MODULE_LICENSE("GPL");
49722 Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_util.h
49723 ===================================================================
49724 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
49725 +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_util.h 2008-02-20 09:32:49.000000000 +0100
49726 @@ -0,0 +1,127 @@
49727 +/****************************************************************************
49728 + * Solarflare driver for Xen network acceleration
49729 + *
49730 + * Copyright 2006-2008: Solarflare Communications Inc,
49731 + * 9501 Jeronimo Road, Suite 250,
49732 + * Irvine, CA 92618, USA
49733 + *
49734 + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
49735 + *
49736 + * This program is free software; you can redistribute it and/or modify it
49737 + * under the terms of the GNU General Public License version 2 as published
49738 + * by the Free Software Foundation, incorporated herein by reference.
49739 + *
49740 + * This program is distributed in the hope that it will be useful,
49741 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
49742 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49743 + * GNU General Public License for more details.
49744 + *
49745 + * You should have received a copy of the GNU General Public License
49746 + * along with this program; if not, write to the Free Software
49747 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
49748 + ****************************************************************************
49749 + */
49750 +
49751 +#ifndef NETBACK_ACCEL_UTIL_H
49752 +#define NETBACK_ACCEL_UTIL_H
49753 +
49754 +#ifdef DPRINTK
49755 +#undef DPRINTK
49756 +#endif
49757 +
49758 +#define FILE_LEAF strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__
49759 +
49760 +#if 1
49761 +#define VPRINTK(_f, _a...)
49762 +#else
49763 +#define VPRINTK(_f, _a...) \
49764 + printk("(file=%s, line=%d) " _f, \
49765 + FILE_LEAF , __LINE__ , ## _a )
49766 +#endif
49767 +
49768 +#if 1
49769 +#define DPRINTK(_f, _a...)
49770 +#else
49771 +#define DPRINTK(_f, _a...) \
49772 + printk("(file=%s, line=%d) " _f, \
49773 + FILE_LEAF , __LINE__ , ## _a )
49774 +#endif
49775 +
49776 +#define EPRINTK(_f, _a...) \
49777 + printk("(file=%s, line=%d) " _f, \
49778 + FILE_LEAF , __LINE__ , ## _a )
49779 +
49780 +#define EPRINTK_ON(exp) \
49781 + do { \
49782 + if (exp) \
49783 + EPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \
49784 + } while(0)
49785 +
49786 +#define DPRINTK_ON(exp) \
49787 + do { \
49788 + if (exp) \
49789 + DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \
49790 + } while(0)
49791 +
49792 +#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"
49793 +#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5]
49794 +
49795 +#include <xen/xenbus.h>
49796 +
49797 +/*! Map a set of pages from another domain
49798 + * \param dev The xenbus device context
49799 + * \param priv The private data returned by the mapping function
49800 + */
49801 +extern
49802 +void *net_accel_map_grants_contig(struct xenbus_device *dev,
49803 + unsigned *grants, int npages,
49804 + void **priv);
49805 +
49806 +/*! Unmap a set of pages mapped using net_accel_map_grants_contig.
49807 + * \param dev The xenbus device context
49808 + * \param priv The private data returned by the mapping function
49809 + */
49810 +extern
49811 +void net_accel_unmap_grants_contig(struct xenbus_device *dev, void *priv);
49812 +
49813 +/*! Read the MAC address of a device from xenstore */
49814 +extern
49815 +int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
49816 +
49817 +/*! Update the accelstate field for a device in xenstore */
49818 +extern
49819 +void net_accel_update_state(struct xenbus_device *dev, int state);
49820 +
49821 +/* These four map/unmap functions are based on
49822 + * xenbus_backend_client.c:xenbus_map_ring(). However, they are not
49823 + * used for ring buffers, instead just to map pages between domains,
49824 + * or to map a page so that it is accessible by a device
49825 + */
49826 +extern
49827 +int net_accel_map_device_page(struct xenbus_device *dev,
49828 + int gnt_ref, grant_handle_t *handle,
49829 + u64 *dev_bus_addr);
49830 +extern
49831 +int net_accel_unmap_device_page(struct xenbus_device *dev,
49832 + grant_handle_t handle, u64 dev_bus_addr);
49833 +extern
49834 +void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref,
49835 + void **priv);
49836 +extern
49837 +void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv);
49838 +
49839 +/*! Grrant a page to remote domain */
49840 +extern
49841 +int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn,
49842 + int is_iomem);
49843 +/*! Undo a net_accel_grant_page */
49844 +extern
49845 +int net_accel_ungrant_page(grant_ref_t gntref);
49846 +
49847 +
49848 +/*! Shutdown remote domain that is misbehaving */
49849 +extern
49850 +int net_accel_shutdown_remote(int domain);
49851 +
49852 +
49853 +#endif
49854 Index: head-2008-11-25/drivers/xen/tpmback/Makefile
49855 ===================================================================
49856 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
49857 +++ head-2008-11-25/drivers/xen/tpmback/Makefile 2007-06-12 13:13:45.000000000 +0200
49858 @@ -0,0 +1,4 @@
49859 +
49860 +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmbk.o
49861 +
49862 +tpmbk-y += tpmback.o interface.o xenbus.o
49863 Index: head-2008-11-25/drivers/xen/tpmback/common.h
49864 ===================================================================
49865 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
49866 +++ head-2008-11-25/drivers/xen/tpmback/common.h 2007-06-12 13:13:45.000000000 +0200
49867 @@ -0,0 +1,85 @@
49868 +/******************************************************************************
49869 + * drivers/xen/tpmback/common.h
49870 + */
49871 +
49872 +#ifndef __TPM__BACKEND__COMMON_H__
49873 +#define __TPM__BACKEND__COMMON_H__
49874 +
49875 +#include <linux/version.h>
49876 +#include <linux/module.h>
49877 +#include <linux/interrupt.h>
49878 +#include <linux/slab.h>
49879 +#include <xen/evtchn.h>
49880 +#include <xen/driver_util.h>
49881 +#include <xen/interface/grant_table.h>
49882 +#include <xen/interface/io/tpmif.h>
49883 +#include <asm/io.h>
49884 +#include <asm/pgalloc.h>
49885 +
49886 +#define DPRINTK(_f, _a...) \
49887 + pr_debug("(file=%s, line=%d) " _f, \
49888 + __FILE__ , __LINE__ , ## _a )
49889 +
49890 +struct backend_info;
49891 +
49892 +typedef struct tpmif_st {
49893 + struct list_head tpmif_list;
49894 + /* Unique identifier for this interface. */
49895 + domid_t domid;
49896 + unsigned int handle;
49897 +
49898 + /* Physical parameters of the comms window. */
49899 + unsigned int irq;
49900 +
49901 + /* The shared rings and indexes. */
49902 + tpmif_tx_interface_t *tx;
49903 + struct vm_struct *tx_area;
49904 +
49905 + /* Miscellaneous private stuff. */
49906 + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
49907 + int active;
49908 +
49909 + struct tpmif_st *hash_next;
49910 + struct list_head list; /* scheduling list */
49911 + atomic_t refcnt;
49912 +
49913 + struct backend_info *bi;
49914 +
49915 + grant_handle_t shmem_handle;
49916 + grant_ref_t shmem_ref;
49917 + struct page **mmap_pages;
49918 +
49919 + char devname[20];
49920 +} tpmif_t;
49921 +
49922 +void tpmif_disconnect_complete(tpmif_t * tpmif);
49923 +tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi);
49924 +void tpmif_interface_init(void);
49925 +void tpmif_interface_exit(void);
49926 +void tpmif_schedule_work(tpmif_t * tpmif);
49927 +void tpmif_deschedule_work(tpmif_t * tpmif);
49928 +void tpmif_xenbus_init(void);
49929 +void tpmif_xenbus_exit(void);
49930 +int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
49931 +irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
49932 +
49933 +long int tpmback_get_instance(struct backend_info *bi);
49934 +
49935 +int vtpm_release_packets(tpmif_t * tpmif, int send_msgs);
49936 +
49937 +
49938 +#define tpmif_get(_b) (atomic_inc(&(_b)->refcnt))
49939 +#define tpmif_put(_b) \
49940 + do { \
49941 + if (atomic_dec_and_test(&(_b)->refcnt)) \
49942 + tpmif_disconnect_complete(_b); \
49943 + } while (0)
49944 +
49945 +extern int num_frontends;
49946 +
49947 +static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx)
49948 +{
49949 + return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx]));
49950 +}
49951 +
49952 +#endif /* __TPMIF__BACKEND__COMMON_H__ */
49953 Index: head-2008-11-25/drivers/xen/tpmback/interface.c
49954 ===================================================================
49955 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
49956 +++ head-2008-11-25/drivers/xen/tpmback/interface.c 2008-01-21 11:15:26.000000000 +0100
49957 @@ -0,0 +1,168 @@
49958 + /*****************************************************************************
49959 + * drivers/xen/tpmback/interface.c
49960 + *
49961 + * Vritual TPM interface management.
49962 + *
49963 + * Copyright (c) 2005, IBM Corporation
49964 + *
49965 + * Author: Stefan Berger, stefanb@us.ibm.com
49966 + *
49967 + * This code has been derived from drivers/xen/netback/interface.c
49968 + * Copyright (c) 2004, Keir Fraser
49969 + */
49970 +
49971 +#include "common.h"
49972 +#include <xen/balloon.h>
49973 +#include <xen/gnttab.h>
49974 +
49975 +static kmem_cache_t *tpmif_cachep;
49976 +int num_frontends = 0;
49977 +
49978 +LIST_HEAD(tpmif_list);
49979 +
49980 +static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
49981 +{
49982 + tpmif_t *tpmif;
49983 +
49984 + tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
49985 + if (tpmif == NULL)
49986 + goto out_of_memory;
49987 +
49988 + memset(tpmif, 0, sizeof (*tpmif));
49989 + tpmif->domid = domid;
49990 + tpmif->status = DISCONNECTED;
49991 + tpmif->bi = bi;
49992 + snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid);
49993 + atomic_set(&tpmif->refcnt, 1);
49994 +
49995 + tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE);
49996 + if (tpmif->mmap_pages == NULL)
49997 + goto out_of_memory;
49998 +
49999 + list_add(&tpmif->tpmif_list, &tpmif_list);
50000 + num_frontends++;
50001 +
50002 + return tpmif;
50003 +
50004 + out_of_memory:
50005 + if (tpmif != NULL)
50006 + kmem_cache_free(tpmif_cachep, tpmif);
50007 + printk("%s: out of memory\n", __FUNCTION__);
50008 + return ERR_PTR(-ENOMEM);
50009 +}
50010 +
50011 +static void free_tpmif(tpmif_t * tpmif)
50012 +{
50013 + num_frontends--;
50014 + list_del(&tpmif->tpmif_list);
50015 + free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE);
50016 + kmem_cache_free(tpmif_cachep, tpmif);
50017 +}
50018 +
50019 +tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi)
50020 +{
50021 + tpmif_t *tpmif;
50022 +
50023 + list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
50024 + if (tpmif->bi == bi) {
50025 + if (tpmif->domid == domid) {
50026 + tpmif_get(tpmif);
50027 + return tpmif;
50028 + } else {
50029 + return ERR_PTR(-EEXIST);
50030 + }
50031 + }
50032 + }
50033 +
50034 + return alloc_tpmif(domid, bi);
50035 +}
50036 +
50037 +static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page)
50038 +{
50039 + struct gnttab_map_grant_ref op;
50040 +
50041 + gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr,
50042 + GNTMAP_host_map, shared_page, tpmif->domid);
50043 +
50044 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
50045 + BUG();
50046 +
50047 + if (op.status) {
50048 + DPRINTK(" Grant table operation failure !\n");
50049 + return op.status;
50050 + }
50051 +
50052 + tpmif->shmem_ref = shared_page;
50053 + tpmif->shmem_handle = op.handle;
50054 +
50055 + return 0;
50056 +}
50057 +
50058 +static void unmap_frontend_page(tpmif_t *tpmif)
50059 +{
50060 + struct gnttab_unmap_grant_ref op;
50061 +
50062 + gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr,
50063 + GNTMAP_host_map, tpmif->shmem_handle);
50064 +
50065 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
50066 + BUG();
50067 +}
50068 +
50069 +int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn)
50070 +{
50071 + int err;
50072 +
50073 + if (tpmif->irq)
50074 + return 0;
50075 +
50076 + if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL)
50077 + return -ENOMEM;
50078 +
50079 + err = map_frontend_page(tpmif, shared_page);
50080 + if (err) {
50081 + free_vm_area(tpmif->tx_area);
50082 + return err;
50083 + }
50084 +
50085 + tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr;
50086 + memset(tpmif->tx, 0, PAGE_SIZE);
50087 +
50088 + err = bind_interdomain_evtchn_to_irqhandler(
50089 + tpmif->domid, evtchn, tpmif_be_int, 0, tpmif->devname, tpmif);
50090 + if (err < 0) {
50091 + unmap_frontend_page(tpmif);
50092 + free_vm_area(tpmif->tx_area);
50093 + return err;
50094 + }
50095 + tpmif->irq = err;
50096 +
50097 + tpmif->shmem_ref = shared_page;
50098 + tpmif->active = 1;
50099 +
50100 + return 0;
50101 +}
50102 +
50103 +void tpmif_disconnect_complete(tpmif_t *tpmif)
50104 +{
50105 + if (tpmif->irq)
50106 + unbind_from_irqhandler(tpmif->irq, tpmif);
50107 +
50108 + if (tpmif->tx) {
50109 + unmap_frontend_page(tpmif);
50110 + free_vm_area(tpmif->tx_area);
50111 + }
50112 +
50113 + free_tpmif(tpmif);
50114 +}
50115 +
50116 +void __init tpmif_interface_init(void)
50117 +{
50118 + tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
50119 + 0, 0, NULL, NULL);
50120 +}
50121 +
50122 +void __exit tpmif_interface_exit(void)
50123 +{
50124 + kmem_cache_destroy(tpmif_cachep);
50125 +}
50126 Index: head-2008-11-25/drivers/xen/tpmback/tpmback.c
50127 ===================================================================
50128 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
50129 +++ head-2008-11-25/drivers/xen/tpmback/tpmback.c 2007-06-12 13:13:45.000000000 +0200
50130 @@ -0,0 +1,944 @@
50131 +/******************************************************************************
50132 + * drivers/xen/tpmback/tpmback.c
50133 + *
50134 + * Copyright (c) 2005, IBM Corporation
50135 + *
50136 + * Author: Stefan Berger, stefanb@us.ibm.com
50137 + * Grant table support: Mahadevan Gomathisankaran
50138 + *
50139 + * This code has been derived from drivers/xen/netback/netback.c
50140 + * Copyright (c) 2002-2004, K A Fraser
50141 + *
50142 + */
50143 +
50144 +#include "common.h"
50145 +#include <xen/evtchn.h>
50146 +
50147 +#include <linux/types.h>
50148 +#include <linux/list.h>
50149 +#include <linux/miscdevice.h>
50150 +#include <linux/poll.h>
50151 +#include <asm/uaccess.h>
50152 +#include <xen/xenbus.h>
50153 +#include <xen/interface/grant_table.h>
50154 +#include <xen/gnttab.h>
50155 +
50156 +/* local data structures */
50157 +struct data_exchange {
50158 + struct list_head pending_pak;
50159 + struct list_head current_pak;
50160 + unsigned int copied_so_far;
50161 + u8 has_opener:1;
50162 + u8 aborted:1;
50163 + rwlock_t pak_lock; // protects all of the previous fields
50164 + wait_queue_head_t wait_queue;
50165 +};
50166 +
50167 +struct vtpm_resp_hdr {
50168 + uint32_t instance_no;
50169 + uint16_t tag_no;
50170 + uint32_t len_no;
50171 + uint32_t ordinal_no;
50172 +} __attribute__ ((packed));
50173 +
50174 +struct packet {
50175 + struct list_head next;
50176 + unsigned int data_len;
50177 + u8 *data_buffer;
50178 + tpmif_t *tpmif;
50179 + u32 tpm_instance;
50180 + u8 req_tag;
50181 + u32 last_read;
50182 + u8 flags;
50183 + struct timer_list processing_timer;
50184 +};
50185 +
50186 +enum {
50187 + PACKET_FLAG_DISCARD_RESPONSE = 1,
50188 +};
50189 +
50190 +/* local variables */
50191 +static struct data_exchange dataex;
50192 +
50193 +/* local function prototypes */
50194 +static int _packet_write(struct packet *pak,
50195 + const char *data, size_t size, int userbuffer);
50196 +static void processing_timeout(unsigned long ptr);
50197 +static int packet_read_shmem(struct packet *pak,
50198 + tpmif_t * tpmif,
50199 + u32 offset,
50200 + char *buffer, int isuserbuffer, u32 left);
50201 +static int vtpm_queue_packet(struct packet *pak);
50202 +
50203 +/***************************************************************
50204 + Buffer copying fo user and kernel space buffes.
50205 +***************************************************************/
50206 +static inline int copy_from_buffer(void *to,
50207 + const void *from, unsigned long size,
50208 + int isuserbuffer)
50209 +{
50210 + if (isuserbuffer) {
50211 + if (copy_from_user(to, (void __user *)from, size))
50212 + return -EFAULT;
50213 + } else {
50214 + memcpy(to, from, size);
50215 + }
50216 + return 0;
50217 +}
50218 +
50219 +static inline int copy_to_buffer(void *to,
50220 + const void *from, unsigned long size,
50221 + int isuserbuffer)
50222 +{
50223 + if (isuserbuffer) {
50224 + if (copy_to_user((void __user *)to, from, size))
50225 + return -EFAULT;
50226 + } else {
50227 + memcpy(to, from, size);
50228 + }
50229 + return 0;
50230 +}
50231 +
50232 +
50233 +static void dataex_init(struct data_exchange *dataex)
50234 +{
50235 + INIT_LIST_HEAD(&dataex->pending_pak);
50236 + INIT_LIST_HEAD(&dataex->current_pak);
50237 + dataex->has_opener = 0;
50238 + rwlock_init(&dataex->pak_lock);
50239 + init_waitqueue_head(&dataex->wait_queue);
50240 +}
50241 +
50242 +/***************************************************************
50243 + Packet-related functions
50244 +***************************************************************/
50245 +
50246 +static struct packet *packet_find_instance(struct list_head *head,
50247 + u32 tpm_instance)
50248 +{
50249 + struct packet *pak;
50250 + struct list_head *p;
50251 +
50252 + /*
50253 + * traverse the list of packets and return the first
50254 + * one with the given instance number
50255 + */
50256 + list_for_each(p, head) {
50257 + pak = list_entry(p, struct packet, next);
50258 +
50259 + if (pak->tpm_instance == tpm_instance) {
50260 + return pak;
50261 + }
50262 + }
50263 + return NULL;
50264 +}
50265 +
50266 +static struct packet *packet_find_packet(struct list_head *head, void *packet)
50267 +{
50268 + struct packet *pak;
50269 + struct list_head *p;
50270 +
50271 + /*
50272 + * traverse the list of packets and return the first
50273 + * one with the given instance number
50274 + */
50275 + list_for_each(p, head) {
50276 + pak = list_entry(p, struct packet, next);
50277 +
50278 + if (pak == packet) {
50279 + return pak;
50280 + }
50281 + }
50282 + return NULL;
50283 +}
50284 +
50285 +static struct packet *packet_alloc(tpmif_t * tpmif,
50286 + u32 size, u8 req_tag, u8 flags)
50287 +{
50288 + struct packet *pak = NULL;
50289 + pak = kzalloc(sizeof (struct packet), GFP_ATOMIC);
50290 + if (NULL != pak) {
50291 + if (tpmif) {
50292 + pak->tpmif = tpmif;
50293 + pak->tpm_instance = tpmback_get_instance(tpmif->bi);
50294 + tpmif_get(tpmif);
50295 + }
50296 + pak->data_len = size;
50297 + pak->req_tag = req_tag;
50298 + pak->last_read = 0;
50299 + pak->flags = flags;
50300 +
50301 + /*
50302 + * cannot do tpmif_get(tpmif); bad things happen
50303 + * on the last tpmif_put()
50304 + */
50305 + init_timer(&pak->processing_timer);
50306 + pak->processing_timer.function = processing_timeout;
50307 + pak->processing_timer.data = (unsigned long)pak;
50308 + }
50309 + return pak;
50310 +}
50311 +
50312 +static void inline packet_reset(struct packet *pak)
50313 +{
50314 + pak->last_read = 0;
50315 +}
50316 +
50317 +static void packet_free(struct packet *pak)
50318 +{
50319 + if (timer_pending(&pak->processing_timer)) {
50320 + BUG();
50321 + }
50322 +
50323 + if (pak->tpmif)
50324 + tpmif_put(pak->tpmif);
50325 + kfree(pak->data_buffer);
50326 + /*
50327 + * cannot do tpmif_put(pak->tpmif); bad things happen
50328 + * on the last tpmif_put()
50329 + */
50330 + kfree(pak);
50331 +}
50332 +
50333 +
50334 +/*
50335 + * Write data to the shared memory and send it to the FE.
50336 + */
50337 +static int packet_write(struct packet *pak,
50338 + const char *data, size_t size, int isuserbuffer)
50339 +{
50340 + int rc = 0;
50341 +
50342 + if (0 != (pak->flags & PACKET_FLAG_DISCARD_RESPONSE)) {
50343 + /* Don't send a respone to this packet. Just acknowledge it. */
50344 + rc = size;
50345 + } else {
50346 + rc = _packet_write(pak, data, size, isuserbuffer);
50347 + }
50348 +
50349 + return rc;
50350 +}
50351 +
50352 +int _packet_write(struct packet *pak,
50353 + const char *data, size_t size, int isuserbuffer)
50354 +{
50355 + /*
50356 + * Write into the shared memory pages directly
50357 + * and send it to the front end.
50358 + */
50359 + tpmif_t *tpmif = pak->tpmif;
50360 + grant_handle_t handle;
50361 + int rc = 0;
50362 + unsigned int i = 0;
50363 + unsigned int offset = 0;
50364 +
50365 + if (tpmif == NULL) {
50366 + return -EFAULT;
50367 + }
50368 +
50369 + if (tpmif->status == DISCONNECTED) {
50370 + return size;
50371 + }
50372 +
50373 + while (offset < size && i < TPMIF_TX_RING_SIZE) {
50374 + unsigned int tocopy;
50375 + struct gnttab_map_grant_ref map_op;
50376 + struct gnttab_unmap_grant_ref unmap_op;
50377 + tpmif_tx_request_t *tx;
50378 +
50379 + tx = &tpmif->tx->ring[i].req;
50380 +
50381 + if (0 == tx->addr) {
50382 + DPRINTK("ERROR: Buffer for outgoing packet NULL?! i=%d\n", i);
50383 + return 0;
50384 + }
50385 +
50386 + gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
50387 + GNTMAP_host_map, tx->ref, tpmif->domid);
50388 +
50389 + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
50390 + &map_op, 1))) {
50391 + BUG();
50392 + }
50393 +
50394 + handle = map_op.handle;
50395 +
50396 + if (map_op.status) {
50397 + DPRINTK(" Grant table operation failure !\n");
50398 + return 0;
50399 + }
50400 +
50401 + tocopy = min_t(size_t, size - offset, PAGE_SIZE);
50402 +
50403 + if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) |
50404 + (tx->addr & ~PAGE_MASK)),
50405 + &data[offset], tocopy, isuserbuffer)) {
50406 + tpmif_put(tpmif);
50407 + return -EFAULT;
50408 + }
50409 + tx->size = tocopy;
50410 +
50411 + gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
50412 + GNTMAP_host_map, handle);
50413 +
50414 + if (unlikely
50415 + (HYPERVISOR_grant_table_op
50416 + (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
50417 + BUG();
50418 + }
50419 +
50420 + offset += tocopy;
50421 + i++;
50422 + }
50423 +
50424 + rc = offset;
50425 + DPRINTK("Notifying frontend via irq %d\n", tpmif->irq);
50426 + notify_remote_via_irq(tpmif->irq);
50427 +
50428 + return rc;
50429 +}
50430 +
50431 +/*
50432 + * Read data from the shared memory and copy it directly into the
50433 + * provided buffer. Advance the read_last indicator which tells
50434 + * how many bytes have already been read.
50435 + */
50436 +static int packet_read(struct packet *pak, size_t numbytes,
50437 + char *buffer, size_t buffersize, int isuserbuffer)
50438 +{
50439 + tpmif_t *tpmif = pak->tpmif;
50440 +
50441 + /*
50442 + * Read 'numbytes' of data from the buffer. The first 4
50443 + * bytes are the instance number in network byte order,
50444 + * after that come the data from the shared memory buffer.
50445 + */
50446 + u32 to_copy;
50447 + u32 offset = 0;
50448 + u32 room_left = buffersize;
50449 +
50450 + if (pak->last_read < 4) {
50451 + /*
50452 + * copy the instance number into the buffer
50453 + */
50454 + u32 instance_no = htonl(pak->tpm_instance);
50455 + u32 last_read = pak->last_read;
50456 +
50457 + to_copy = min_t(size_t, 4 - last_read, numbytes);
50458 +
50459 + if (copy_to_buffer(&buffer[0],
50460 + &(((u8 *) & instance_no)[last_read]),
50461 + to_copy, isuserbuffer)) {
50462 + return -EFAULT;
50463 + }
50464 +
50465 + pak->last_read += to_copy;
50466 + offset += to_copy;
50467 + room_left -= to_copy;
50468 + }
50469 +
50470 + /*
50471 + * If the packet has a data buffer appended, read from it...
50472 + */
50473 +
50474 + if (room_left > 0) {
50475 + if (pak->data_buffer) {
50476 + u32 to_copy = min_t(u32, pak->data_len - offset, room_left);
50477 + u32 last_read = pak->last_read - 4;
50478 +
50479 + if (copy_to_buffer(&buffer[offset],
50480 + &pak->data_buffer[last_read],
50481 + to_copy, isuserbuffer)) {
50482 + return -EFAULT;
50483 + }
50484 + pak->last_read += to_copy;
50485 + offset += to_copy;
50486 + } else {
50487 + offset = packet_read_shmem(pak,
50488 + tpmif,
50489 + offset,
50490 + buffer,
50491 + isuserbuffer, room_left);
50492 + }
50493 + }
50494 + return offset;
50495 +}
50496 +
50497 +static int packet_read_shmem(struct packet *pak,
50498 + tpmif_t * tpmif,
50499 + u32 offset, char *buffer, int isuserbuffer,
50500 + u32 room_left)
50501 +{
50502 + u32 last_read = pak->last_read - 4;
50503 + u32 i = (last_read / PAGE_SIZE);
50504 + u32 pg_offset = last_read & (PAGE_SIZE - 1);
50505 + u32 to_copy;
50506 + grant_handle_t handle;
50507 +
50508 + tpmif_tx_request_t *tx;
50509 +
50510 + tx = &tpmif->tx->ring[0].req;
50511 + /*
50512 + * Start copying data at the page with index 'index'
50513 + * and within that page at offset 'offset'.
50514 + * Copy a maximum of 'room_left' bytes.
50515 + */
50516 + to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
50517 + while (to_copy > 0) {
50518 + void *src;
50519 + struct gnttab_map_grant_ref map_op;
50520 + struct gnttab_unmap_grant_ref unmap_op;
50521 +
50522 + tx = &tpmif->tx->ring[i].req;
50523 +
50524 + gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
50525 + GNTMAP_host_map, tx->ref, tpmif->domid);
50526 +
50527 + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
50528 + &map_op, 1))) {
50529 + BUG();
50530 + }
50531 +
50532 + if (map_op.status) {
50533 + DPRINTK(" Grant table operation failure !\n");
50534 + return -EFAULT;
50535 + }
50536 +
50537 + handle = map_op.handle;
50538 +
50539 + if (to_copy > tx->size) {
50540 + /*
50541 + * User requests more than what's available
50542 + */
50543 + to_copy = min_t(u32, tx->size, to_copy);
50544 + }
50545 +
50546 + DPRINTK("Copying from mapped memory at %08lx\n",
50547 + (unsigned long)(idx_to_kaddr(tpmif, i) |
50548 + (tx->addr & ~PAGE_MASK)));
50549 +
50550 + src = (void *)(idx_to_kaddr(tpmif, i) |
50551 + ((tx->addr & ~PAGE_MASK) + pg_offset));
50552 + if (copy_to_buffer(&buffer[offset],
50553 + src, to_copy, isuserbuffer)) {
50554 + return -EFAULT;
50555 + }
50556 +
50557 + DPRINTK("Data from TPM-FE of domain %d are %d %d %d %d\n",
50558 + tpmif->domid, buffer[offset], buffer[offset + 1],
50559 + buffer[offset + 2], buffer[offset + 3]);
50560 +
50561 + gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
50562 + GNTMAP_host_map, handle);
50563 +
50564 + if (unlikely
50565 + (HYPERVISOR_grant_table_op
50566 + (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
50567 + BUG();
50568 + }
50569 +
50570 + offset += to_copy;
50571 + pg_offset = 0;
50572 + last_read += to_copy;
50573 + room_left -= to_copy;
50574 +
50575 + to_copy = min_t(u32, PAGE_SIZE, room_left);
50576 + i++;
50577 + } /* while (to_copy > 0) */
50578 + /*
50579 + * Adjust the last_read pointer
50580 + */
50581 + pak->last_read = last_read + 4;
50582 + return offset;
50583 +}
50584 +
50585 +/* ============================================================
50586 + * The file layer for reading data from this device
50587 + * ============================================================
50588 + */
50589 +static int vtpm_op_open(struct inode *inode, struct file *f)
50590 +{
50591 + int rc = 0;
50592 + unsigned long flags;
50593 +
50594 + write_lock_irqsave(&dataex.pak_lock, flags);
50595 + if (dataex.has_opener == 0) {
50596 + dataex.has_opener = 1;
50597 + } else {
50598 + rc = -EPERM;
50599 + }
50600 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50601 + return rc;
50602 +}
50603 +
50604 +static ssize_t vtpm_op_read(struct file *file,
50605 + char __user * data, size_t size, loff_t * offset)
50606 +{
50607 + int ret_size = -ENODATA;
50608 + struct packet *pak = NULL;
50609 + unsigned long flags;
50610 +
50611 + write_lock_irqsave(&dataex.pak_lock, flags);
50612 + if (dataex.aborted) {
50613 + dataex.aborted = 0;
50614 + dataex.copied_so_far = 0;
50615 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50616 + return -EIO;
50617 + }
50618 +
50619 + if (list_empty(&dataex.pending_pak)) {
50620 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50621 + wait_event_interruptible(dataex.wait_queue,
50622 + !list_empty(&dataex.pending_pak));
50623 + write_lock_irqsave(&dataex.pak_lock, flags);
50624 + dataex.copied_so_far = 0;
50625 + }
50626 +
50627 + if (!list_empty(&dataex.pending_pak)) {
50628 + unsigned int left;
50629 +
50630 + pak = list_entry(dataex.pending_pak.next, struct packet, next);
50631 + left = pak->data_len - dataex.copied_so_far;
50632 + list_del(&pak->next);
50633 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50634 +
50635 + DPRINTK("size given by app: %d, available: %d\n", size, left);
50636 +
50637 + ret_size = min_t(size_t, size, left);
50638 +
50639 + ret_size = packet_read(pak, ret_size, data, size, 1);
50640 +
50641 + write_lock_irqsave(&dataex.pak_lock, flags);
50642 +
50643 + if (ret_size < 0) {
50644 + del_singleshot_timer_sync(&pak->processing_timer);
50645 + packet_free(pak);
50646 + dataex.copied_so_far = 0;
50647 + } else {
50648 + DPRINTK("Copied %d bytes to user buffer\n", ret_size);
50649 +
50650 + dataex.copied_so_far += ret_size;
50651 + if (dataex.copied_so_far >= pak->data_len + 4) {
50652 + DPRINTK("All data from this packet given to app.\n");
50653 + /* All data given to app */
50654 +
50655 + del_singleshot_timer_sync(&pak->
50656 + processing_timer);
50657 + list_add_tail(&pak->next, &dataex.current_pak);
50658 + /*
50659 + * The more fontends that are handled at the same time,
50660 + * the more time we give the TPM to process the request.
50661 + */
50662 + mod_timer(&pak->processing_timer,
50663 + jiffies + (num_frontends * 60 * HZ));
50664 + dataex.copied_so_far = 0;
50665 + } else {
50666 + list_add(&pak->next, &dataex.pending_pak);
50667 + }
50668 + }
50669 + }
50670 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50671 +
50672 + DPRINTK("Returning result from read to app: %d\n", ret_size);
50673 +
50674 + return ret_size;
50675 +}
50676 +
50677 +/*
50678 + * Write operation - only works after a previous read operation!
50679 + */
50680 +static ssize_t vtpm_op_write(struct file *file,
50681 + const char __user * data, size_t size,
50682 + loff_t * offset)
50683 +{
50684 + struct packet *pak;
50685 + int rc = 0;
50686 + unsigned int off = 4;
50687 + unsigned long flags;
50688 + struct vtpm_resp_hdr vrh;
50689 +
50690 + /*
50691 + * Minimum required packet size is:
50692 + * 4 bytes for instance number
50693 + * 2 bytes for tag
50694 + * 4 bytes for paramSize
50695 + * 4 bytes for the ordinal
50696 + * sum: 14 bytes
50697 + */
50698 + if (size < sizeof (vrh))
50699 + return -EFAULT;
50700 +
50701 + if (copy_from_user(&vrh, data, sizeof (vrh)))
50702 + return -EFAULT;
50703 +
50704 + /* malformed packet? */
50705 + if ((off + ntohl(vrh.len_no)) != size)
50706 + return -EFAULT;
50707 +
50708 + write_lock_irqsave(&dataex.pak_lock, flags);
50709 + pak = packet_find_instance(&dataex.current_pak,
50710 + ntohl(vrh.instance_no));
50711 +
50712 + if (pak == NULL) {
50713 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50714 + DPRINTK(KERN_ALERT "No associated packet! (inst=%d)\n",
50715 + ntohl(vrh.instance_no));
50716 + return -EFAULT;
50717 + }
50718 +
50719 + del_singleshot_timer_sync(&pak->processing_timer);
50720 + list_del(&pak->next);
50721 +
50722 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50723 +
50724 + /*
50725 + * The first 'offset' bytes must be the instance number - skip them.
50726 + */
50727 + size -= off;
50728 +
50729 + rc = packet_write(pak, &data[off], size, 1);
50730 +
50731 + if (rc > 0) {
50732 + /* I neglected the first 4 bytes */
50733 + rc += off;
50734 + }
50735 + packet_free(pak);
50736 + return rc;
50737 +}
50738 +
50739 +static int vtpm_op_release(struct inode *inode, struct file *file)
50740 +{
50741 + unsigned long flags;
50742 +
50743 + vtpm_release_packets(NULL, 1);
50744 + write_lock_irqsave(&dataex.pak_lock, flags);
50745 + dataex.has_opener = 0;
50746 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50747 + return 0;
50748 +}
50749 +
50750 +static unsigned int vtpm_op_poll(struct file *file,
50751 + struct poll_table_struct *pts)
50752 +{
50753 + unsigned int flags = POLLOUT | POLLWRNORM;
50754 +
50755 + poll_wait(file, &dataex.wait_queue, pts);
50756 + if (!list_empty(&dataex.pending_pak)) {
50757 + flags |= POLLIN | POLLRDNORM;
50758 + }
50759 + return flags;
50760 +}
50761 +
50762 +static const struct file_operations vtpm_ops = {
50763 + .owner = THIS_MODULE,
50764 + .llseek = no_llseek,
50765 + .open = vtpm_op_open,
50766 + .read = vtpm_op_read,
50767 + .write = vtpm_op_write,
50768 + .release = vtpm_op_release,
50769 + .poll = vtpm_op_poll,
50770 +};
50771 +
50772 +static struct miscdevice vtpms_miscdevice = {
50773 + .minor = 225,
50774 + .name = "vtpm",
50775 + .fops = &vtpm_ops,
50776 +};
50777 +
50778 +/***************************************************************
50779 + Utility functions
50780 +***************************************************************/
50781 +
50782 +static int tpm_send_fail_message(struct packet *pak, u8 req_tag)
50783 +{
50784 + int rc;
50785 + static const unsigned char tpm_error_message_fail[] = {
50786 + 0x00, 0x00,
50787 + 0x00, 0x00, 0x00, 0x0a,
50788 + 0x00, 0x00, 0x00, 0x09 /* TPM_FAIL */
50789 + };
50790 + unsigned char buffer[sizeof (tpm_error_message_fail)];
50791 +
50792 + memcpy(buffer, tpm_error_message_fail,
50793 + sizeof (tpm_error_message_fail));
50794 + /*
50795 + * Insert the right response tag depending on the given tag
50796 + * All response tags are '+3' to the request tag.
50797 + */
50798 + buffer[1] = req_tag + 3;
50799 +
50800 + /*
50801 + * Write the data to shared memory and notify the front-end
50802 + */
50803 + rc = packet_write(pak, buffer, sizeof (buffer), 0);
50804 +
50805 + return rc;
50806 +}
50807 +
50808 +static int _vtpm_release_packets(struct list_head *head,
50809 + tpmif_t * tpmif, int send_msgs)
50810 +{
50811 + int aborted = 0;
50812 + int c = 0;
50813 + struct packet *pak;
50814 + struct list_head *pos, *tmp;
50815 +
50816 + list_for_each_safe(pos, tmp, head) {
50817 + pak = list_entry(pos, struct packet, next);
50818 + c += 1;
50819 +
50820 + if (tpmif == NULL || pak->tpmif == tpmif) {
50821 + int can_send = 0;
50822 +
50823 + del_singleshot_timer_sync(&pak->processing_timer);
50824 + list_del(&pak->next);
50825 +
50826 + if (pak->tpmif && pak->tpmif->status == CONNECTED) {
50827 + can_send = 1;
50828 + }
50829 +
50830 + if (send_msgs && can_send) {
50831 + tpm_send_fail_message(pak, pak->req_tag);
50832 + }
50833 + packet_free(pak);
50834 + if (c == 1)
50835 + aborted = 1;
50836 + }
50837 + }
50838 + return aborted;
50839 +}
50840 +
50841 +int vtpm_release_packets(tpmif_t * tpmif, int send_msgs)
50842 +{
50843 + unsigned long flags;
50844 +
50845 + write_lock_irqsave(&dataex.pak_lock, flags);
50846 +
50847 + dataex.aborted = _vtpm_release_packets(&dataex.pending_pak,
50848 + tpmif,
50849 + send_msgs);
50850 + _vtpm_release_packets(&dataex.current_pak, tpmif, send_msgs);
50851 +
50852 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50853 + return 0;
50854 +}
50855 +
50856 +static int vtpm_queue_packet(struct packet *pak)
50857 +{
50858 + int rc = 0;
50859 +
50860 + if (dataex.has_opener) {
50861 + unsigned long flags;
50862 +
50863 + write_lock_irqsave(&dataex.pak_lock, flags);
50864 + list_add_tail(&pak->next, &dataex.pending_pak);
50865 + /* give the TPM some time to pick up the request */
50866 + mod_timer(&pak->processing_timer, jiffies + (30 * HZ));
50867 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50868 +
50869 + wake_up_interruptible(&dataex.wait_queue);
50870 + } else {
50871 + rc = -EFAULT;
50872 + }
50873 + return rc;
50874 +}
50875 +
50876 +static int vtpm_receive(tpmif_t * tpmif, u32 size)
50877 +{
50878 + int rc = 0;
50879 + unsigned char buffer[10];
50880 + __be32 *native_size;
50881 + struct packet *pak = packet_alloc(tpmif, size, 0, 0);
50882 +
50883 + if (!pak)
50884 + return -ENOMEM;
50885 + /*
50886 + * Read 10 bytes from the received buffer to test its
50887 + * content for validity.
50888 + */
50889 + if (sizeof (buffer) != packet_read(pak,
50890 + sizeof (buffer), buffer,
50891 + sizeof (buffer), 0)) {
50892 + goto failexit;
50893 + }
50894 + /*
50895 + * Reset the packet read pointer so we can read all its
50896 + * contents again.
50897 + */
50898 + packet_reset(pak);
50899 +
50900 + native_size = (__force __be32 *) (&buffer[4 + 2]);
50901 + /*
50902 + * Verify that the size of the packet is correct
50903 + * as indicated and that there's actually someone reading packets.
50904 + * The minimum size of the packet is '10' for tag, size indicator
50905 + * and ordinal.
50906 + */
50907 + if (size < 10 ||
50908 + be32_to_cpu(*native_size) != size ||
50909 + 0 == dataex.has_opener || tpmif->status != CONNECTED) {
50910 + rc = -EINVAL;
50911 + goto failexit;
50912 + } else {
50913 + rc = vtpm_queue_packet(pak);
50914 + if (rc < 0)
50915 + goto failexit;
50916 + }
50917 + return 0;
50918 +
50919 + failexit:
50920 + if (pak) {
50921 + tpm_send_fail_message(pak, buffer[4 + 1]);
50922 + packet_free(pak);
50923 + }
50924 + return rc;
50925 +}
50926 +
50927 +/*
50928 + * Timeout function that gets invoked when a packet has not been processed
50929 + * during the timeout period.
50930 + * The packet must be on a list when this function is invoked. This
50931 + * also means that once its taken off a list, the timer must be
50932 + * destroyed as well.
50933 + */
50934 +static void processing_timeout(unsigned long ptr)
50935 +{
50936 + struct packet *pak = (struct packet *)ptr;
50937 + unsigned long flags;
50938 +
50939 + write_lock_irqsave(&dataex.pak_lock, flags);
50940 + /*
50941 + * The packet needs to be searched whether it
50942 + * is still on the list.
50943 + */
50944 + if (pak == packet_find_packet(&dataex.pending_pak, pak) ||
50945 + pak == packet_find_packet(&dataex.current_pak, pak)) {
50946 + if ((pak->flags & PACKET_FLAG_DISCARD_RESPONSE) == 0) {
50947 + tpm_send_fail_message(pak, pak->req_tag);
50948 + }
50949 + /* discard future responses */
50950 + pak->flags |= PACKET_FLAG_DISCARD_RESPONSE;
50951 + }
50952 +
50953 + write_unlock_irqrestore(&dataex.pak_lock, flags);
50954 +}
50955 +
50956 +static void tpm_tx_action(unsigned long unused);
50957 +static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0);
50958 +
50959 +static struct list_head tpm_schedule_list;
50960 +static spinlock_t tpm_schedule_list_lock;
50961 +
50962 +static inline void maybe_schedule_tx_action(void)
50963 +{
50964 + smp_mb();
50965 + tasklet_schedule(&tpm_tx_tasklet);
50966 +}
50967 +
50968 +static inline int __on_tpm_schedule_list(tpmif_t * tpmif)
50969 +{
50970 + return tpmif->list.next != NULL;
50971 +}
50972 +
50973 +static void remove_from_tpm_schedule_list(tpmif_t * tpmif)
50974 +{
50975 + spin_lock_irq(&tpm_schedule_list_lock);
50976 + if (likely(__on_tpm_schedule_list(tpmif))) {
50977 + list_del(&tpmif->list);
50978 + tpmif->list.next = NULL;
50979 + tpmif_put(tpmif);
50980 + }
50981 + spin_unlock_irq(&tpm_schedule_list_lock);
50982 +}
50983 +
50984 +static void add_to_tpm_schedule_list_tail(tpmif_t * tpmif)
50985 +{
50986 + if (__on_tpm_schedule_list(tpmif))
50987 + return;
50988 +
50989 + spin_lock_irq(&tpm_schedule_list_lock);
50990 + if (!__on_tpm_schedule_list(tpmif) && tpmif->active) {
50991 + list_add_tail(&tpmif->list, &tpm_schedule_list);
50992 + tpmif_get(tpmif);
50993 + }
50994 + spin_unlock_irq(&tpm_schedule_list_lock);
50995 +}
50996 +
50997 +void tpmif_schedule_work(tpmif_t * tpmif)
50998 +{
50999 + add_to_tpm_schedule_list_tail(tpmif);
51000 + maybe_schedule_tx_action();
51001 +}
51002 +
51003 +void tpmif_deschedule_work(tpmif_t * tpmif)
51004 +{
51005 + remove_from_tpm_schedule_list(tpmif);
51006 +}
51007 +
51008 +static void tpm_tx_action(unsigned long unused)
51009 +{
51010 + struct list_head *ent;
51011 + tpmif_t *tpmif;
51012 + tpmif_tx_request_t *tx;
51013 +
51014 + DPRINTK("%s: Getting data from front-end(s)!\n", __FUNCTION__);
51015 +
51016 + while (!list_empty(&tpm_schedule_list)) {
51017 + /* Get a tpmif from the list with work to do. */
51018 + ent = tpm_schedule_list.next;
51019 + tpmif = list_entry(ent, tpmif_t, list);
51020 + tpmif_get(tpmif);
51021 + remove_from_tpm_schedule_list(tpmif);
51022 +
51023 + tx = &tpmif->tx->ring[0].req;
51024 +
51025 + /* pass it up */
51026 + vtpm_receive(tpmif, tx->size);
51027 +
51028 + tpmif_put(tpmif);
51029 + }
51030 +}
51031 +
51032 +irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
51033 +{
51034 + tpmif_t *tpmif = (tpmif_t *) dev_id;
51035 +
51036 + add_to_tpm_schedule_list_tail(tpmif);
51037 + maybe_schedule_tx_action();
51038 + return IRQ_HANDLED;
51039 +}
51040 +
51041 +static int __init tpmback_init(void)
51042 +{
51043 + int rc;
51044 +
51045 + if ((rc = misc_register(&vtpms_miscdevice)) != 0) {
51046 + printk(KERN_ALERT
51047 + "Could not register misc device for TPM BE.\n");
51048 + return rc;
51049 + }
51050 +
51051 + dataex_init(&dataex);
51052 +
51053 + spin_lock_init(&tpm_schedule_list_lock);
51054 + INIT_LIST_HEAD(&tpm_schedule_list);
51055 +
51056 + tpmif_interface_init();
51057 + tpmif_xenbus_init();
51058 +
51059 + printk(KERN_ALERT "Successfully initialized TPM backend driver.\n");
51060 +
51061 + return 0;
51062 +}
51063 +
51064 +module_init(tpmback_init);
51065 +
51066 +void __exit tpmback_exit(void)
51067 +{
51068 + vtpm_release_packets(NULL, 0);
51069 + tpmif_xenbus_exit();
51070 + tpmif_interface_exit();
51071 + misc_deregister(&vtpms_miscdevice);
51072 +}
51073 +
51074 +MODULE_LICENSE("Dual BSD/GPL");
51075 Index: head-2008-11-25/drivers/xen/tpmback/xenbus.c
51076 ===================================================================
51077 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
51078 +++ head-2008-11-25/drivers/xen/tpmback/xenbus.c 2008-03-06 08:54:32.000000000 +0100
51079 @@ -0,0 +1,289 @@
51080 +/* Xenbus code for tpmif backend
51081 + Copyright (C) 2005 IBM Corporation
51082 + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
51083 +
51084 + This program is free software; you can redistribute it and/or modify
51085 + it under the terms of the GNU General Public License as published by
51086 + the Free Software Foundation; either version 2 of the License, or
51087 + (at your option) any later version.
51088 +
51089 + This program is distributed in the hope that it will be useful,
51090 + but WITHOUT ANY WARRANTY; without even the implied warranty of
51091 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
51092 + GNU General Public License for more details.
51093 +
51094 + You should have received a copy of the GNU General Public License
51095 + along with this program; if not, write to the Free Software
51096 + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
51097 +*/
51098 +#include <stdarg.h>
51099 +#include <linux/module.h>
51100 +#include <xen/xenbus.h>
51101 +#include "common.h"
51102 +
51103 +struct backend_info
51104 +{
51105 + struct xenbus_device *dev;
51106 +
51107 + /* our communications channel */
51108 + tpmif_t *tpmif;
51109 +
51110 + long int frontend_id;
51111 + long int instance; // instance of TPM
51112 + u8 is_instance_set;// whether instance number has been set
51113 +
51114 + /* watch front end for changes */
51115 + struct xenbus_watch backend_watch;
51116 +};
51117 +
51118 +static void maybe_connect(struct backend_info *be);
51119 +static void connect(struct backend_info *be);
51120 +static int connect_ring(struct backend_info *be);
51121 +static void backend_changed(struct xenbus_watch *watch,
51122 + const char **vec, unsigned int len);
51123 +static void frontend_changed(struct xenbus_device *dev,
51124 + enum xenbus_state frontend_state);
51125 +
51126 +long int tpmback_get_instance(struct backend_info *bi)
51127 +{
51128 + long int res = -1;
51129 + if (bi && bi->is_instance_set)
51130 + res = bi->instance;
51131 + return res;
51132 +}
51133 +
51134 +static int tpmback_remove(struct xenbus_device *dev)
51135 +{
51136 + struct backend_info *be = dev->dev.driver_data;
51137 +
51138 + if (!be) return 0;
51139 +
51140 + if (be->backend_watch.node) {
51141 + unregister_xenbus_watch(&be->backend_watch);
51142 + kfree(be->backend_watch.node);
51143 + be->backend_watch.node = NULL;
51144 + }
51145 + if (be->tpmif) {
51146 + be->tpmif->bi = NULL;
51147 + vtpm_release_packets(be->tpmif, 0);
51148 + tpmif_put(be->tpmif);
51149 + be->tpmif = NULL;
51150 + }
51151 + kfree(be);
51152 + dev->dev.driver_data = NULL;
51153 + return 0;
51154 +}
51155 +
51156 +static int tpmback_probe(struct xenbus_device *dev,
51157 + const struct xenbus_device_id *id)
51158 +{
51159 + int err;
51160 + struct backend_info *be = kzalloc(sizeof(struct backend_info),
51161 + GFP_KERNEL);
51162 +
51163 + if (!be) {
51164 + xenbus_dev_fatal(dev, -ENOMEM,
51165 + "allocating backend structure");
51166 + return -ENOMEM;
51167 + }
51168 +
51169 + be->is_instance_set = 0;
51170 + be->dev = dev;
51171 + dev->dev.driver_data = be;
51172 +
51173 + err = xenbus_watch_path2(dev, dev->nodename,
51174 + "instance", &be->backend_watch,
51175 + backend_changed);
51176 + if (err) {
51177 + goto fail;
51178 + }
51179 +
51180 + err = xenbus_switch_state(dev, XenbusStateInitWait);
51181 + if (err) {
51182 + goto fail;
51183 + }
51184 + return 0;
51185 +fail:
51186 + tpmback_remove(dev);
51187 + return err;
51188 +}
51189 +
51190 +
51191 +static void backend_changed(struct xenbus_watch *watch,
51192 + const char **vec, unsigned int len)
51193 +{
51194 + int err;
51195 + long instance;
51196 + struct backend_info *be
51197 + = container_of(watch, struct backend_info, backend_watch);
51198 + struct xenbus_device *dev = be->dev;
51199 +
51200 + err = xenbus_scanf(XBT_NIL, dev->nodename,
51201 + "instance","%li", &instance);
51202 + if (XENBUS_EXIST_ERR(err)) {
51203 + return;
51204 + }
51205 +
51206 + if (err != 1) {
51207 + xenbus_dev_fatal(dev, err, "reading instance");
51208 + return;
51209 + }
51210 +
51211 + if (be->is_instance_set == 0) {
51212 + be->instance = instance;
51213 + be->is_instance_set = 1;
51214 + }
51215 +}
51216 +
51217 +
51218 +static void frontend_changed(struct xenbus_device *dev,
51219 + enum xenbus_state frontend_state)
51220 +{
51221 + struct backend_info *be = dev->dev.driver_data;
51222 + int err;
51223 +
51224 + switch (frontend_state) {
51225 + case XenbusStateInitialising:
51226 + case XenbusStateInitialised:
51227 + break;
51228 +
51229 + case XenbusStateConnected:
51230 + err = connect_ring(be);
51231 + if (err) {
51232 + return;
51233 + }
51234 + maybe_connect(be);
51235 + break;
51236 +
51237 + case XenbusStateClosing:
51238 + be->instance = -1;
51239 + xenbus_switch_state(dev, XenbusStateClosing);
51240 + break;
51241 +
51242 + case XenbusStateUnknown: /* keep it here */
51243 + case XenbusStateClosed:
51244 + xenbus_switch_state(dev, XenbusStateClosed);
51245 + device_unregister(&be->dev->dev);
51246 + tpmback_remove(dev);
51247 + break;
51248 +
51249 + default:
51250 + xenbus_dev_fatal(dev, -EINVAL,
51251 + "saw state %d at frontend",
51252 + frontend_state);
51253 + break;
51254 + }
51255 +}
51256 +
51257 +
51258 +
51259 +static void maybe_connect(struct backend_info *be)
51260 +{
51261 + if (be->tpmif == NULL || be->tpmif->status == CONNECTED)
51262 + return;
51263 +
51264 + connect(be);
51265 +}
51266 +
51267 +
51268 +static void connect(struct backend_info *be)
51269 +{
51270 + struct xenbus_transaction xbt;
51271 + int err;
51272 + struct xenbus_device *dev = be->dev;
51273 + unsigned long ready = 1;
51274 +
51275 +again:
51276 + err = xenbus_transaction_start(&xbt);
51277 + if (err) {
51278 + xenbus_dev_fatal(be->dev, err, "starting transaction");
51279 + return;
51280 + }
51281 +
51282 + err = xenbus_printf(xbt, be->dev->nodename,
51283 + "ready", "%lu", ready);
51284 + if (err) {
51285 + xenbus_dev_fatal(be->dev, err, "writing 'ready'");
51286 + goto abort;
51287 + }
51288 +
51289 + err = xenbus_transaction_end(xbt, 0);
51290 + if (err == -EAGAIN)
51291 + goto again;
51292 + if (err)
51293 + xenbus_dev_fatal(be->dev, err, "end of transaction");
51294 +
51295 + err = xenbus_switch_state(dev, XenbusStateConnected);
51296 + if (!err)
51297 + be->tpmif->status = CONNECTED;
51298 + return;
51299 +abort:
51300 + xenbus_transaction_end(xbt, 1);
51301 +}
51302 +
51303 +
51304 +static int connect_ring(struct backend_info *be)
51305 +{
51306 + struct xenbus_device *dev = be->dev;
51307 + unsigned long ring_ref;
51308 + unsigned int evtchn;
51309 + int err;
51310 +
51311 + err = xenbus_gather(XBT_NIL, dev->otherend,
51312 + "ring-ref", "%lu", &ring_ref,
51313 + "event-channel", "%u", &evtchn, NULL);
51314 + if (err) {
51315 + xenbus_dev_error(dev, err,
51316 + "reading %s/ring-ref and event-channel",
51317 + dev->otherend);
51318 + return err;
51319 + }
51320 +
51321 + if (!be->tpmif) {
51322 + be->tpmif = tpmif_find(dev->otherend_id, be);
51323 + if (IS_ERR(be->tpmif)) {
51324 + err = PTR_ERR(be->tpmif);
51325 + be->tpmif = NULL;
51326 + xenbus_dev_fatal(dev,err,"creating vtpm interface");
51327 + return err;
51328 + }
51329 + }
51330 +
51331 + if (be->tpmif != NULL) {
51332 + err = tpmif_map(be->tpmif, ring_ref, evtchn);
51333 + if (err) {
51334 + xenbus_dev_error(dev, err,
51335 + "mapping shared-frame %lu port %u",
51336 + ring_ref, evtchn);
51337 + return err;
51338 + }
51339 + }
51340 + return 0;
51341 +}
51342 +
51343 +
51344 +static const struct xenbus_device_id tpmback_ids[] = {
51345 + { "vtpm" },
51346 + { "" }
51347 +};
51348 +
51349 +
51350 +static struct xenbus_driver tpmback = {
51351 + .name = "vtpm",
51352 + .owner = THIS_MODULE,
51353 + .ids = tpmback_ids,
51354 + .probe = tpmback_probe,
51355 + .remove = tpmback_remove,
51356 + .otherend_changed = frontend_changed,
51357 +};
51358 +
51359 +
51360 +void tpmif_xenbus_init(void)
51361 +{
51362 + xenbus_register_backend(&tpmback);
51363 +}
51364 +
51365 +void tpmif_xenbus_exit(void)
51366 +{
51367 + xenbus_unregister_driver(&tpmback);
51368 +}
51369 Index: head-2008-11-25/drivers/xen/util.c
51370 ===================================================================
51371 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
51372 +++ head-2008-11-25/drivers/xen/util.c 2007-07-10 09:42:30.000000000 +0200
51373 @@ -0,0 +1,65 @@
51374 +#include <linux/mm.h>
51375 +#include <linux/module.h>
51376 +#include <linux/slab.h>
51377 +#include <linux/vmalloc.h>
51378 +#include <asm/uaccess.h>
51379 +#include <xen/driver_util.h>
51380 +
51381 +struct class *get_xen_class(void)
51382 +{
51383 + static struct class *xen_class;
51384 +
51385 + if (xen_class)
51386 + return xen_class;
51387 +
51388 + xen_class = class_create(THIS_MODULE, "xen");
51389 + if (IS_ERR(xen_class)) {
51390 + printk("Failed to create xen sysfs class.\n");
51391 + xen_class = NULL;
51392 + }
51393 +
51394 + return xen_class;
51395 +}
51396 +EXPORT_SYMBOL_GPL(get_xen_class);
51397 +
51398 +#ifdef CONFIG_X86
51399 +static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
51400 +{
51401 + /* apply_to_page_range() does all the hard work. */
51402 + return 0;
51403 +}
51404 +
51405 +struct vm_struct *alloc_vm_area(unsigned long size)
51406 +{
51407 + struct vm_struct *area;
51408 +
51409 + area = get_vm_area(size, VM_IOREMAP);
51410 + if (area == NULL)
51411 + return NULL;
51412 +
51413 + /*
51414 + * This ensures that page tables are constructed for this region
51415 + * of kernel virtual address space and mapped into init_mm.
51416 + */
51417 + if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
51418 + area->size, f, NULL)) {
51419 + free_vm_area(area);
51420 + return NULL;
51421 + }
51422 +
51423 + /* Map page directories into every address space. */
51424 + vmalloc_sync_all();
51425 +
51426 + return area;
51427 +}
51428 +EXPORT_SYMBOL_GPL(alloc_vm_area);
51429 +
51430 +void free_vm_area(struct vm_struct *area)
51431 +{
51432 + struct vm_struct *ret;
51433 + ret = remove_vm_area(area->addr);
51434 + BUG_ON(ret != area);
51435 + kfree(area);
51436 +}
51437 +EXPORT_SYMBOL_GPL(free_vm_area);
51438 +#endif /* CONFIG_X86 */
51439 Index: head-2008-11-25/drivers/xen/xenbus/xenbus_backend_client.c
51440 ===================================================================
51441 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
51442 +++ head-2008-11-25/drivers/xen/xenbus/xenbus_backend_client.c 2007-06-12 13:13:45.000000000 +0200
51443 @@ -0,0 +1,147 @@
51444 +/******************************************************************************
51445 + * Backend-client-facing interface for the Xenbus driver. In other words, the
51446 + * interface between the Xenbus and the device-specific code in the backend
51447 + * driver.
51448 + *
51449 + * Copyright (C) 2005-2006 XenSource Ltd
51450 + *
51451 + * This program is free software; you can redistribute it and/or
51452 + * modify it under the terms of the GNU General Public License version 2
51453 + * as published by the Free Software Foundation; or, when distributed
51454 + * separately from the Linux kernel or incorporated into other
51455 + * software packages, subject to the following license:
51456 + *
51457 + * Permission is hereby granted, free of charge, to any person obtaining a copy
51458 + * of this source file (the "Software"), to deal in the Software without
51459 + * restriction, including without limitation the rights to use, copy, modify,
51460 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
51461 + * and to permit persons to whom the Software is furnished to do so, subject to
51462 + * the following conditions:
51463 + *
51464 + * The above copyright notice and this permission notice shall be included in
51465 + * all copies or substantial portions of the Software.
51466 + *
51467 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
51468 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51469 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
51470 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51471 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
51472 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
51473 + * IN THE SOFTWARE.
51474 + */
51475 +
51476 +#include <linux/err.h>
51477 +#include <xen/gnttab.h>
51478 +#include <xen/xenbus.h>
51479 +#include <xen/driver_util.h>
51480 +
51481 +/* Based on Rusty Russell's skeleton driver's map_page */
51482 +struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref)
51483 +{
51484 + struct gnttab_map_grant_ref op;
51485 + struct vm_struct *area;
51486 +
51487 + area = alloc_vm_area(PAGE_SIZE);
51488 + if (!area)
51489 + return ERR_PTR(-ENOMEM);
51490 +
51491 + gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
51492 + gnt_ref, dev->otherend_id);
51493 +
51494 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
51495 + BUG();
51496 +
51497 + if (op.status != GNTST_okay) {
51498 + free_vm_area(area);
51499 + xenbus_dev_fatal(dev, op.status,
51500 + "mapping in shared page %d from domain %d",
51501 + gnt_ref, dev->otherend_id);
51502 + BUG_ON(!IS_ERR(ERR_PTR(op.status)));
51503 + return ERR_PTR(op.status);
51504 + }
51505 +
51506 + /* Stuff the handle in an unused field */
51507 + area->phys_addr = (unsigned long)op.handle;
51508 +
51509 + return area;
51510 +}
51511 +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
51512 +
51513 +
51514 +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
51515 + grant_handle_t *handle, void *vaddr)
51516 +{
51517 + struct gnttab_map_grant_ref op;
51518 +
51519 + gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
51520 + gnt_ref, dev->otherend_id);
51521 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
51522 + BUG();
51523 +
51524 + if (op.status != GNTST_okay) {
51525 + xenbus_dev_fatal(dev, op.status,
51526 + "mapping in shared page %d from domain %d",
51527 + gnt_ref, dev->otherend_id);
51528 + } else
51529 + *handle = op.handle;
51530 +
51531 + return op.status;
51532 +}
51533 +EXPORT_SYMBOL_GPL(xenbus_map_ring);
51534 +
51535 +
51536 +/* Based on Rusty Russell's skeleton driver's unmap_page */
51537 +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *area)
51538 +{
51539 + struct gnttab_unmap_grant_ref op;
51540 +
51541 + gnttab_set_unmap_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
51542 + (grant_handle_t)area->phys_addr);
51543 +
51544 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
51545 + BUG();
51546 +
51547 + if (op.status == GNTST_okay)
51548 + free_vm_area(area);
51549 + else
51550 + xenbus_dev_error(dev, op.status,
51551 + "unmapping page at handle %d error %d",
51552 + (int16_t)area->phys_addr, op.status);
51553 +
51554 + return op.status;
51555 +}
51556 +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
51557 +
51558 +
51559 +int xenbus_unmap_ring(struct xenbus_device *dev,
51560 + grant_handle_t handle, void *vaddr)
51561 +{
51562 + struct gnttab_unmap_grant_ref op;
51563 +
51564 + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
51565 + handle);
51566 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
51567 + BUG();
51568 +
51569 + if (op.status != GNTST_okay)
51570 + xenbus_dev_error(dev, op.status,
51571 + "unmapping page at handle %d error %d",
51572 + handle, op.status);
51573 +
51574 + return op.status;
51575 +}
51576 +EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
51577 +
51578 +int xenbus_dev_is_online(struct xenbus_device *dev)
51579 +{
51580 + int rc, val;
51581 +
51582 + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
51583 + if (rc != 1)
51584 + val = 0; /* no online node present */
51585 +
51586 + return val;
51587 +}
51588 +EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
51589 +
51590 +MODULE_LICENSE("Dual BSD/GPL");
51591 Index: head-2008-11-25/drivers/xen/xenbus/xenbus_dev.c
51592 ===================================================================
51593 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
51594 +++ head-2008-11-25/drivers/xen/xenbus/xenbus_dev.c 2008-07-21 11:00:33.000000000 +0200
51595 @@ -0,0 +1,408 @@
51596 +/*
51597 + * xenbus_dev.c
51598 + *
51599 + * Driver giving user-space access to the kernel's xenbus connection
51600 + * to xenstore.
51601 + *
51602 + * Copyright (c) 2005, Christian Limpach
51603 + * Copyright (c) 2005, Rusty Russell, IBM Corporation
51604 + *
51605 + * This program is free software; you can redistribute it and/or
51606 + * modify it under the terms of the GNU General Public License version 2
51607 + * as published by the Free Software Foundation; or, when distributed
51608 + * separately from the Linux kernel or incorporated into other
51609 + * software packages, subject to the following license:
51610 + *
51611 + * Permission is hereby granted, free of charge, to any person obtaining a copy
51612 + * of this source file (the "Software"), to deal in the Software without
51613 + * restriction, including without limitation the rights to use, copy, modify,
51614 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
51615 + * and to permit persons to whom the Software is furnished to do so, subject to
51616 + * the following conditions:
51617 + *
51618 + * The above copyright notice and this permission notice shall be included in
51619 + * all copies or substantial portions of the Software.
51620 + *
51621 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
51622 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51623 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
51624 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51625 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
51626 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
51627 + * IN THE SOFTWARE.
51628 + */
51629 +
51630 +#include <linux/kernel.h>
51631 +#include <linux/errno.h>
51632 +#include <linux/uio.h>
51633 +#include <linux/notifier.h>
51634 +#include <linux/wait.h>
51635 +#include <linux/fs.h>
51636 +#include <linux/poll.h>
51637 +#include <linux/mutex.h>
51638 +
51639 +#include "xenbus_comms.h"
51640 +
51641 +#include <asm/uaccess.h>
51642 +#include <asm/hypervisor.h>
51643 +#include <xen/xenbus.h>
51644 +#include <xen/xen_proc.h>
51645 +#include <asm/hypervisor.h>
51646 +
51647 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
51648 +#include <xen/platform-compat.h>
51649 +#endif
51650 +
51651 +struct xenbus_dev_transaction {
51652 + struct list_head list;
51653 + struct xenbus_transaction handle;
51654 +};
51655 +
51656 +struct read_buffer {
51657 + struct list_head list;
51658 + unsigned int cons;
51659 + unsigned int len;
51660 + char msg[];
51661 +};
51662 +
51663 +struct xenbus_dev_data {
51664 + /* In-progress transaction. */
51665 + struct list_head transactions;
51666 +
51667 + /* Active watches. */
51668 + struct list_head watches;
51669 +
51670 + /* Partial request. */
51671 + unsigned int len;
51672 + union {
51673 + struct xsd_sockmsg msg;
51674 + char buffer[PAGE_SIZE];
51675 + } u;
51676 +
51677 + /* Response queue. */
51678 + struct list_head read_buffers;
51679 + wait_queue_head_t read_waitq;
51680 +
51681 + struct mutex reply_mutex;
51682 +};
51683 +
51684 +static struct proc_dir_entry *xenbus_dev_intf;
51685 +
51686 +static ssize_t xenbus_dev_read(struct file *filp,
51687 + char __user *ubuf,
51688 + size_t len, loff_t *ppos)
51689 +{
51690 + struct xenbus_dev_data *u = filp->private_data;
51691 + struct read_buffer *rb;
51692 + int i, ret;
51693 +
51694 + mutex_lock(&u->reply_mutex);
51695 + while (list_empty(&u->read_buffers)) {
51696 + mutex_unlock(&u->reply_mutex);
51697 + ret = wait_event_interruptible(u->read_waitq,
51698 + !list_empty(&u->read_buffers));
51699 + if (ret)
51700 + return ret;
51701 + mutex_lock(&u->reply_mutex);
51702 + }
51703 +
51704 + rb = list_entry(u->read_buffers.next, struct read_buffer, list);
51705 + for (i = 0; i < len;) {
51706 + put_user(rb->msg[rb->cons], ubuf + i);
51707 + i++;
51708 + rb->cons++;
51709 + if (rb->cons == rb->len) {
51710 + list_del(&rb->list);
51711 + kfree(rb);
51712 + if (list_empty(&u->read_buffers))
51713 + break;
51714 + rb = list_entry(u->read_buffers.next,
51715 + struct read_buffer, list);
51716 + }
51717 + }
51718 + mutex_unlock(&u->reply_mutex);
51719 +
51720 + return i;
51721 +}
51722 +
51723 +static void queue_reply(struct xenbus_dev_data *u,
51724 + char *data, unsigned int len)
51725 +{
51726 + struct read_buffer *rb;
51727 +
51728 + if (len == 0)
51729 + return;
51730 +
51731 + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
51732 + BUG_ON(rb == NULL);
51733 +
51734 + rb->cons = 0;
51735 + rb->len = len;
51736 +
51737 + memcpy(rb->msg, data, len);
51738 +
51739 + list_add_tail(&rb->list, &u->read_buffers);
51740 +
51741 + wake_up(&u->read_waitq);
51742 +}
51743 +
51744 +struct watch_adapter
51745 +{
51746 + struct list_head list;
51747 + struct xenbus_watch watch;
51748 + struct xenbus_dev_data *dev_data;
51749 + char *token;
51750 +};
51751 +
51752 +static void free_watch_adapter (struct watch_adapter *watch)
51753 +{
51754 + kfree(watch->watch.node);
51755 + kfree(watch->token);
51756 + kfree(watch);
51757 +}
51758 +
51759 +static void watch_fired(struct xenbus_watch *watch,
51760 + const char **vec,
51761 + unsigned int len)
51762 +{
51763 + struct watch_adapter *adap =
51764 + container_of(watch, struct watch_adapter, watch);
51765 + struct xsd_sockmsg hdr;
51766 + const char *path, *token;
51767 + int path_len, tok_len, body_len, data_len = 0;
51768 +
51769 + path = vec[XS_WATCH_PATH];
51770 + token = adap->token;
51771 +
51772 + path_len = strlen(path) + 1;
51773 + tok_len = strlen(token) + 1;
51774 + if (len > 2)
51775 + data_len = vec[len] - vec[2] + 1;
51776 + body_len = path_len + tok_len + data_len;
51777 +
51778 + hdr.type = XS_WATCH_EVENT;
51779 + hdr.len = body_len;
51780 +
51781 + mutex_lock(&adap->dev_data->reply_mutex);
51782 + queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr));
51783 + queue_reply(adap->dev_data, (char *)path, path_len);
51784 + queue_reply(adap->dev_data, (char *)token, tok_len);
51785 + if (len > 2)
51786 + queue_reply(adap->dev_data, (char *)vec[2], data_len);
51787 + mutex_unlock(&adap->dev_data->reply_mutex);
51788 +}
51789 +
51790 +static LIST_HEAD(watch_list);
51791 +
51792 +static ssize_t xenbus_dev_write(struct file *filp,
51793 + const char __user *ubuf,
51794 + size_t len, loff_t *ppos)
51795 +{
51796 + struct xenbus_dev_data *u = filp->private_data;
51797 + struct xenbus_dev_transaction *trans = NULL;
51798 + uint32_t msg_type;
51799 + void *reply;
51800 + char *path, *token;
51801 + struct watch_adapter *watch, *tmp_watch;
51802 + int err, rc = len;
51803 +
51804 + if ((len + u->len) > sizeof(u->u.buffer)) {
51805 + rc = -EINVAL;
51806 + goto out;
51807 + }
51808 +
51809 + if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) {
51810 + rc = -EFAULT;
51811 + goto out;
51812 + }
51813 +
51814 + u->len += len;
51815 + if ((u->len < sizeof(u->u.msg)) ||
51816 + (u->len < (sizeof(u->u.msg) + u->u.msg.len)))
51817 + return rc;
51818 +
51819 + msg_type = u->u.msg.type;
51820 +
51821 + switch (msg_type) {
51822 + case XS_TRANSACTION_START:
51823 + case XS_TRANSACTION_END:
51824 + case XS_DIRECTORY:
51825 + case XS_READ:
51826 + case XS_GET_PERMS:
51827 + case XS_RELEASE:
51828 + case XS_GET_DOMAIN_PATH:
51829 + case XS_WRITE:
51830 + case XS_MKDIR:
51831 + case XS_RM:
51832 + case XS_SET_PERMS:
51833 + if (msg_type == XS_TRANSACTION_START) {
51834 + trans = kmalloc(sizeof(*trans), GFP_KERNEL);
51835 + if (!trans) {
51836 + rc = -ENOMEM;
51837 + goto out;
51838 + }
51839 + }
51840 +
51841 + reply = xenbus_dev_request_and_reply(&u->u.msg);
51842 + if (IS_ERR(reply)) {
51843 + kfree(trans);
51844 + rc = PTR_ERR(reply);
51845 + goto out;
51846 + }
51847 +
51848 + if (msg_type == XS_TRANSACTION_START) {
51849 + trans->handle.id = simple_strtoul(reply, NULL, 0);
51850 + list_add(&trans->list, &u->transactions);
51851 + } else if (msg_type == XS_TRANSACTION_END) {
51852 + list_for_each_entry(trans, &u->transactions, list)
51853 + if (trans->handle.id == u->u.msg.tx_id)
51854 + break;
51855 + BUG_ON(&trans->list == &u->transactions);
51856 + list_del(&trans->list);
51857 + kfree(trans);
51858 + }
51859 + mutex_lock(&u->reply_mutex);
51860 + queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
51861 + queue_reply(u, (char *)reply, u->u.msg.len);
51862 + mutex_unlock(&u->reply_mutex);
51863 + kfree(reply);
51864 + break;
51865 +
51866 + case XS_WATCH:
51867 + case XS_UNWATCH: {
51868 + static const char *XS_RESP = "OK";
51869 + struct xsd_sockmsg hdr;
51870 +
51871 + path = u->u.buffer + sizeof(u->u.msg);
51872 + token = memchr(path, 0, u->u.msg.len);
51873 + if (token == NULL) {
51874 + rc = -EILSEQ;
51875 + goto out;
51876 + }
51877 + token++;
51878 +
51879 + if (msg_type == XS_WATCH) {
51880 + watch = kzalloc(sizeof(*watch), GFP_KERNEL);
51881 + watch->watch.node = kmalloc(strlen(path)+1,
51882 + GFP_KERNEL);
51883 + strcpy((char *)watch->watch.node, path);
51884 + watch->watch.callback = watch_fired;
51885 + watch->token = kmalloc(strlen(token)+1, GFP_KERNEL);
51886 + strcpy(watch->token, token);
51887 + watch->dev_data = u;
51888 +
51889 + err = register_xenbus_watch(&watch->watch);
51890 + if (err) {
51891 + free_watch_adapter(watch);
51892 + rc = err;
51893 + goto out;
51894 + }
51895 +
51896 + list_add(&watch->list, &u->watches);
51897 + } else {
51898 + list_for_each_entry_safe(watch, tmp_watch,
51899 + &u->watches, list) {
51900 + if (!strcmp(watch->token, token) &&
51901 + !strcmp(watch->watch.node, path))
51902 + {
51903 + unregister_xenbus_watch(&watch->watch);
51904 + list_del(&watch->list);
51905 + free_watch_adapter(watch);
51906 + break;
51907 + }
51908 + }
51909 + }
51910 +
51911 + hdr.type = msg_type;
51912 + hdr.len = strlen(XS_RESP) + 1;
51913 + mutex_lock(&u->reply_mutex);
51914 + queue_reply(u, (char *)&hdr, sizeof(hdr));
51915 + queue_reply(u, (char *)XS_RESP, hdr.len);
51916 + mutex_unlock(&u->reply_mutex);
51917 + break;
51918 + }
51919 +
51920 + default:
51921 + rc = -EINVAL;
51922 + break;
51923 + }
51924 +
51925 + out:
51926 + u->len = 0;
51927 + return rc;
51928 +}
51929 +
51930 +static int xenbus_dev_open(struct inode *inode, struct file *filp)
51931 +{
51932 + struct xenbus_dev_data *u;
51933 +
51934 + if (xen_store_evtchn == 0)
51935 + return -ENOENT;
51936 +
51937 + nonseekable_open(inode, filp);
51938 +
51939 + u = kzalloc(sizeof(*u), GFP_KERNEL);
51940 + if (u == NULL)
51941 + return -ENOMEM;
51942 +
51943 + INIT_LIST_HEAD(&u->transactions);
51944 + INIT_LIST_HEAD(&u->watches);
51945 + INIT_LIST_HEAD(&u->read_buffers);
51946 + init_waitqueue_head(&u->read_waitq);
51947 +
51948 + mutex_init(&u->reply_mutex);
51949 +
51950 + filp->private_data = u;
51951 +
51952 + return 0;
51953 +}
51954 +
51955 +static int xenbus_dev_release(struct inode *inode, struct file *filp)
51956 +{
51957 + struct xenbus_dev_data *u = filp->private_data;
51958 + struct xenbus_dev_transaction *trans, *tmp;
51959 + struct watch_adapter *watch, *tmp_watch;
51960 +
51961 + list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
51962 + xenbus_transaction_end(trans->handle, 1);
51963 + list_del(&trans->list);
51964 + kfree(trans);
51965 + }
51966 +
51967 + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
51968 + unregister_xenbus_watch(&watch->watch);
51969 + list_del(&watch->list);
51970 + free_watch_adapter(watch);
51971 + }
51972 +
51973 + kfree(u);
51974 +
51975 + return 0;
51976 +}
51977 +
51978 +static unsigned int xenbus_dev_poll(struct file *file, poll_table *wait)
51979 +{
51980 + struct xenbus_dev_data *u = file->private_data;
51981 +
51982 + poll_wait(file, &u->read_waitq, wait);
51983 + if (!list_empty(&u->read_buffers))
51984 + return POLLIN | POLLRDNORM;
51985 + return 0;
51986 +}
51987 +
51988 +static const struct file_operations xenbus_dev_file_ops = {
51989 + .read = xenbus_dev_read,
51990 + .write = xenbus_dev_write,
51991 + .open = xenbus_dev_open,
51992 + .release = xenbus_dev_release,
51993 + .poll = xenbus_dev_poll,
51994 +};
51995 +
51996 +int xenbus_dev_init(void)
51997 +{
51998 + xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
51999 + if (xenbus_dev_intf)
52000 + xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops;
52001 +
52002 + return 0;
52003 +}
52004 Index: head-2008-11-25/drivers/xen/xenbus/xenbus_probe_backend.c
52005 ===================================================================
52006 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
52007 +++ head-2008-11-25/drivers/xen/xenbus/xenbus_probe_backend.c 2008-01-21 11:15:26.000000000 +0100
52008 @@ -0,0 +1,292 @@
52009 +/******************************************************************************
52010 + * Talks to Xen Store to figure out what devices we have (backend half).
52011 + *
52012 + * Copyright (C) 2005 Rusty Russell, IBM Corporation
52013 + * Copyright (C) 2005 Mike Wray, Hewlett-Packard
52014 + * Copyright (C) 2005, 2006 XenSource Ltd
52015 + * Copyright (C) 2007 Solarflare Communications, Inc.
52016 + *
52017 + * This program is free software; you can redistribute it and/or
52018 + * modify it under the terms of the GNU General Public License version 2
52019 + * as published by the Free Software Foundation; or, when distributed
52020 + * separately from the Linux kernel or incorporated into other
52021 + * software packages, subject to the following license:
52022 + *
52023 + * Permission is hereby granted, free of charge, to any person obtaining a copy
52024 + * of this source file (the "Software"), to deal in the Software without
52025 + * restriction, including without limitation the rights to use, copy, modify,
52026 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
52027 + * and to permit persons to whom the Software is furnished to do so, subject to
52028 + * the following conditions:
52029 + *
52030 + * The above copyright notice and this permission notice shall be included in
52031 + * all copies or substantial portions of the Software.
52032 + *
52033 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52034 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
52035 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52036 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
52037 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
52038 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
52039 + * IN THE SOFTWARE.
52040 + */
52041 +
52042 +#define DPRINTK(fmt, args...) \
52043 + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
52044 + __FUNCTION__, __LINE__, ##args)
52045 +
52046 +#include <linux/kernel.h>
52047 +#include <linux/err.h>
52048 +#include <linux/string.h>
52049 +#include <linux/ctype.h>
52050 +#include <linux/fcntl.h>
52051 +#include <linux/mm.h>
52052 +#include <linux/notifier.h>
52053 +
52054 +#include <asm/io.h>
52055 +#include <asm/page.h>
52056 +#include <asm/maddr.h>
52057 +#include <asm/pgtable.h>
52058 +#include <asm/hypervisor.h>
52059 +#include <xen/xenbus.h>
52060 +#include <xen/xen_proc.h>
52061 +#include <xen/evtchn.h>
52062 +#include <xen/features.h>
52063 +
52064 +#include "xenbus_comms.h"
52065 +#include "xenbus_probe.h"
52066 +
52067 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
52068 +#include <xen/platform-compat.h>
52069 +#endif
52070 +
52071 +static int xenbus_uevent_backend(struct device *dev, char **envp,
52072 + int num_envp, char *buffer, int buffer_size);
52073 +static int xenbus_probe_backend(const char *type, const char *domid);
52074 +
52075 +extern int read_otherend_details(struct xenbus_device *xendev,
52076 + char *id_node, char *path_node);
52077 +
52078 +static int read_frontend_details(struct xenbus_device *xendev)
52079 +{
52080 + return read_otherend_details(xendev, "frontend-id", "frontend");
52081 +}
52082 +
52083 +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
52084 +static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
52085 +{
52086 + int domid, err;
52087 + const char *devid, *type, *frontend;
52088 + unsigned int typelen;
52089 +
52090 + type = strchr(nodename, '/');
52091 + if (!type)
52092 + return -EINVAL;
52093 + type++;
52094 + typelen = strcspn(type, "/");
52095 + if (!typelen || type[typelen] != '/')
52096 + return -EINVAL;
52097 +
52098 + devid = strrchr(nodename, '/') + 1;
52099 +
52100 + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
52101 + "frontend", NULL, &frontend,
52102 + NULL);
52103 + if (err)
52104 + return err;
52105 + if (strlen(frontend) == 0)
52106 + err = -ERANGE;
52107 + if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
52108 + err = -ENOENT;
52109 + kfree(frontend);
52110 +
52111 + if (err)
52112 + return err;
52113 +
52114 + if (snprintf(bus_id, BUS_ID_SIZE,
52115 + "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
52116 + return -ENOSPC;
52117 + return 0;
52118 +}
52119 +
52120 +static struct xen_bus_type xenbus_backend = {
52121 + .root = "backend",
52122 + .levels = 3, /* backend/type/<frontend>/<id> */
52123 + .get_bus_id = backend_bus_id,
52124 + .probe = xenbus_probe_backend,
52125 + .error = -ENODEV,
52126 + .bus = {
52127 + .name = "xen-backend",
52128 + .match = xenbus_match,
52129 + .probe = xenbus_dev_probe,
52130 + .remove = xenbus_dev_remove,
52131 +// .shutdown = xenbus_dev_shutdown,
52132 + .uevent = xenbus_uevent_backend,
52133 + },
52134 + .dev = {
52135 + .bus_id = "xen-backend",
52136 + },
52137 +};
52138 +
52139 +static int xenbus_uevent_backend(struct device *dev, char **envp,
52140 + int num_envp, char *buffer, int buffer_size)
52141 +{
52142 + struct xenbus_device *xdev;
52143 + struct xenbus_driver *drv;
52144 + int i = 0;
52145 + int length = 0;
52146 +
52147 + DPRINTK("");
52148 +
52149 + if (dev == NULL)
52150 + return -ENODEV;
52151 +
52152 + xdev = to_xenbus_device(dev);
52153 + if (xdev == NULL)
52154 + return -ENODEV;
52155 +
52156 + /* stuff we want to pass to /sbin/hotplug */
52157 + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
52158 + "XENBUS_TYPE=%s", xdev->devicetype);
52159 +
52160 + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
52161 + "XENBUS_PATH=%s", xdev->nodename);
52162 +
52163 + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
52164 + "XENBUS_BASE_PATH=%s", xenbus_backend.root);
52165 +
52166 + /* terminate, set to next free slot, shrink available space */
52167 + envp[i] = NULL;
52168 + envp = &envp[i];
52169 + num_envp -= i;
52170 + buffer = &buffer[length];
52171 + buffer_size -= length;
52172 +
52173 + if (dev->driver) {
52174 + drv = to_xenbus_driver(dev->driver);
52175 + if (drv && drv->uevent)
52176 + return drv->uevent(xdev, envp, num_envp, buffer,
52177 + buffer_size);
52178 + }
52179 +
52180 + return 0;
52181 +}
52182 +
52183 +int xenbus_register_backend(struct xenbus_driver *drv)
52184 +{
52185 + drv->read_otherend_details = read_frontend_details;
52186 +
52187 + return xenbus_register_driver_common(drv, &xenbus_backend);
52188 +}
52189 +EXPORT_SYMBOL_GPL(xenbus_register_backend);
52190 +
52191 +/* backend/<typename>/<frontend-uuid>/<name> */
52192 +static int xenbus_probe_backend_unit(const char *dir,
52193 + const char *type,
52194 + const char *name)
52195 +{
52196 + char *nodename;
52197 + int err;
52198 +
52199 + nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
52200 + if (!nodename)
52201 + return -ENOMEM;
52202 +
52203 + DPRINTK("%s\n", nodename);
52204 +
52205 + err = xenbus_probe_node(&xenbus_backend, type, nodename);
52206 + kfree(nodename);
52207 + return err;
52208 +}
52209 +
52210 +/* backend/<typename>/<frontend-domid> */
52211 +static int xenbus_probe_backend(const char *type, const char *domid)
52212 +{
52213 + char *nodename;
52214 + int err = 0;
52215 + char **dir;
52216 + unsigned int i, dir_n = 0;
52217 +
52218 + DPRINTK("");
52219 +
52220 + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid);
52221 + if (!nodename)
52222 + return -ENOMEM;
52223 +
52224 + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
52225 + if (IS_ERR(dir)) {
52226 + kfree(nodename);
52227 + return PTR_ERR(dir);
52228 + }
52229 +
52230 + for (i = 0; i < dir_n; i++) {
52231 + err = xenbus_probe_backend_unit(nodename, type, dir[i]);
52232 + if (err)
52233 + break;
52234 + }
52235 + kfree(dir);
52236 + kfree(nodename);
52237 + return err;
52238 +}
52239 +
52240 +static void backend_changed(struct xenbus_watch *watch,
52241 + const char **vec, unsigned int len)
52242 +{
52243 + DPRINTK("");
52244 +
52245 + dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
52246 +}
52247 +
52248 +static struct xenbus_watch be_watch = {
52249 + .node = "backend",
52250 + .callback = backend_changed,
52251 +};
52252 +
52253 +void xenbus_backend_suspend(int (*fn)(struct device *, void *))
52254 +{
52255 + DPRINTK("");
52256 + if (!xenbus_backend.error)
52257 + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
52258 +}
52259 +
52260 +void xenbus_backend_resume(int (*fn)(struct device *, void *))
52261 +{
52262 + DPRINTK("");
52263 + if (!xenbus_backend.error)
52264 + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
52265 +}
52266 +
52267 +void xenbus_backend_probe_and_watch(void)
52268 +{
52269 + xenbus_probe_devices(&xenbus_backend);
52270 + register_xenbus_watch(&be_watch);
52271 +}
52272 +
52273 +void xenbus_backend_bus_register(void)
52274 +{
52275 + xenbus_backend.error = bus_register(&xenbus_backend.bus);
52276 + if (xenbus_backend.error)
52277 + printk(KERN_WARNING
52278 + "XENBUS: Error registering backend bus: %i\n",
52279 + xenbus_backend.error);
52280 +}
52281 +
52282 +void xenbus_backend_device_register(void)
52283 +{
52284 + if (xenbus_backend.error)
52285 + return;
52286 +
52287 + xenbus_backend.error = device_register(&xenbus_backend.dev);
52288 + if (xenbus_backend.error) {
52289 + bus_unregister(&xenbus_backend.bus);
52290 + printk(KERN_WARNING
52291 + "XENBUS: Error registering backend device: %i\n",
52292 + xenbus_backend.error);
52293 + }
52294 +}
52295 +
52296 +int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *))
52297 +{
52298 + return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn);
52299 +}
52300 +EXPORT_SYMBOL_GPL(xenbus_for_each_backend);
52301 Index: head-2008-11-25/drivers/xen/xenoprof/xenoprofile.c
52302 ===================================================================
52303 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
52304 +++ head-2008-11-25/drivers/xen/xenoprof/xenoprofile.c 2008-09-15 13:40:15.000000000 +0200
52305 @@ -0,0 +1,545 @@
52306 +/**
52307 + * @file xenoprofile.c
52308 + *
52309 + * @remark Copyright 2002 OProfile authors
52310 + * @remark Read the file COPYING
52311 + *
52312 + * @author John Levon <levon@movementarian.org>
52313 + *
52314 + * Modified by Aravind Menon and Jose Renato Santos for Xen
52315 + * These modifications are:
52316 + * Copyright (C) 2005 Hewlett-Packard Co.
52317 + *
52318 + * Separated out arch-generic part
52319 + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
52320 + * VA Linux Systems Japan K.K.
52321 + */
52322 +
52323 +#include <linux/init.h>
52324 +#include <linux/notifier.h>
52325 +#include <linux/smp.h>
52326 +#include <linux/oprofile.h>
52327 +#include <linux/sysdev.h>
52328 +#include <linux/slab.h>
52329 +#include <linux/interrupt.h>
52330 +#include <linux/vmalloc.h>
52331 +#include <asm/pgtable.h>
52332 +#include <xen/evtchn.h>
52333 +#include <xen/xenoprof.h>
52334 +#include <xen/driver_util.h>
52335 +#include <xen/interface/xen.h>
52336 +#include <xen/interface/xenoprof.h>
52337 +#include "../../../drivers/oprofile/cpu_buffer.h"
52338 +#include "../../../drivers/oprofile/event_buffer.h"
52339 +
52340 +#define MAX_XENOPROF_SAMPLES 16
52341 +
52342 +/* sample buffers shared with Xen */
52343 +static xenoprof_buf_t *xenoprof_buf[MAX_VIRT_CPUS];
52344 +/* Shared buffer area */
52345 +static struct xenoprof_shared_buffer shared_buffer;
52346 +
52347 +/* Passive sample buffers shared with Xen */
52348 +static xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
52349 +/* Passive shared buffer area */
52350 +static struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS];
52351 +
52352 +static int xenoprof_start(void);
52353 +static void xenoprof_stop(void);
52354 +
52355 +static int xenoprof_enabled = 0;
52356 +static int xenoprof_is_primary = 0;
52357 +static int active_defined;
52358 +
52359 +extern unsigned long backtrace_depth;
52360 +
52361 +/* Number of buffers in shared area (one per VCPU) */
52362 +static int nbuf;
52363 +/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
52364 +static int ovf_irq[NR_CPUS];
52365 +/* cpu model type string - copied from Xen on XENOPROF_init command */
52366 +static char cpu_type[XENOPROF_CPU_TYPE_SIZE];
52367 +
52368 +#ifdef CONFIG_PM
52369 +
52370 +static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
52371 +{
52372 + if (xenoprof_enabled == 1)
52373 + xenoprof_stop();
52374 + return 0;
52375 +}
52376 +
52377 +
52378 +static int xenoprof_resume(struct sys_device * dev)
52379 +{
52380 + if (xenoprof_enabled == 1)
52381 + xenoprof_start();
52382 + return 0;
52383 +}
52384 +
52385 +
52386 +static struct sysdev_class oprofile_sysclass = {
52387 + set_kset_name("oprofile"),
52388 + .resume = xenoprof_resume,
52389 + .suspend = xenoprof_suspend
52390 +};
52391 +
52392 +
52393 +static struct sys_device device_oprofile = {
52394 + .id = 0,
52395 + .cls = &oprofile_sysclass,
52396 +};
52397 +
52398 +
52399 +static int __init init_driverfs(void)
52400 +{
52401 + int error;
52402 + if (!(error = sysdev_class_register(&oprofile_sysclass)))
52403 + error = sysdev_register(&device_oprofile);
52404 + return error;
52405 +}
52406 +
52407 +
52408 +static void exit_driverfs(void)
52409 +{
52410 + sysdev_unregister(&device_oprofile);
52411 + sysdev_class_unregister(&oprofile_sysclass);
52412 +}
52413 +
52414 +#else
52415 +#define init_driverfs() do { } while (0)
52416 +#define exit_driverfs() do { } while (0)
52417 +#endif /* CONFIG_PM */
52418 +
52419 +static unsigned long long oprofile_samples;
52420 +static unsigned long long p_oprofile_samples;
52421 +
52422 +static unsigned int pdomains;
52423 +static struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
52424 +
52425 +/* Check whether the given entry is an escape code */
52426 +static int xenoprof_is_escape(xenoprof_buf_t * buf, int tail)
52427 +{
52428 + return (buf->event_log[tail].eip == XENOPROF_ESCAPE_CODE);
52429 +}
52430 +
52431 +/* Get the event at the given entry */
52432 +static uint8_t xenoprof_get_event(xenoprof_buf_t * buf, int tail)
52433 +{
52434 + return (buf->event_log[tail].event);
52435 +}
52436 +
52437 +static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
52438 +{
52439 + int head, tail, size;
52440 + int tracing = 0;
52441 +
52442 + head = buf->event_head;
52443 + tail = buf->event_tail;
52444 + size = buf->event_size;
52445 +
52446 + while (tail != head) {
52447 + if (xenoprof_is_escape(buf, tail) &&
52448 + xenoprof_get_event(buf, tail) == XENOPROF_TRACE_BEGIN) {
52449 + tracing=1;
52450 + oprofile_add_pc(ESCAPE_CODE, buf->event_log[tail].mode,
52451 + CPU_TRACE_BEGIN);
52452 + if (!is_passive)
52453 + oprofile_samples++;
52454 + else
52455 + p_oprofile_samples++;
52456 +
52457 + } else {
52458 + oprofile_add_pc(buf->event_log[tail].eip,
52459 + buf->event_log[tail].mode,
52460 + buf->event_log[tail].event);
52461 + if (!tracing) {
52462 + if (!is_passive)
52463 + oprofile_samples++;
52464 + else
52465 + p_oprofile_samples++;
52466 + }
52467 +
52468 + }
52469 + tail++;
52470 + if(tail==size)
52471 + tail=0;
52472 + }
52473 + buf->event_tail = tail;
52474 +}
52475 +
52476 +static void xenoprof_handle_passive(void)
52477 +{
52478 + int i, j;
52479 + int flag_domain, flag_switch = 0;
52480 +
52481 + for (i = 0; i < pdomains; i++) {
52482 + flag_domain = 0;
52483 + for (j = 0; j < passive_domains[i].nbuf; j++) {
52484 + xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
52485 + if (buf->event_head == buf->event_tail)
52486 + continue;
52487 + if (!flag_domain) {
52488 + if (!oprofile_add_domain_switch(
52489 + passive_domains[i].domain_id))
52490 + goto done;
52491 + flag_domain = 1;
52492 + }
52493 + xenoprof_add_pc(buf, 1);
52494 + flag_switch = 1;
52495 + }
52496 + }
52497 +done:
52498 + if (flag_switch)
52499 + oprofile_add_domain_switch(COORDINATOR_DOMAIN);
52500 +}
52501 +
52502 +static irqreturn_t
52503 +xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
52504 +{
52505 + struct xenoprof_buf * buf;
52506 + static unsigned long flag;
52507 +
52508 + buf = xenoprof_buf[smp_processor_id()];
52509 +
52510 + xenoprof_add_pc(buf, 0);
52511 +
52512 + if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) {
52513 + xenoprof_handle_passive();
52514 + smp_mb__before_clear_bit();
52515 + clear_bit(0, &flag);
52516 + }
52517 +
52518 + return IRQ_HANDLED;
52519 +}
52520 +
52521 +
52522 +static void unbind_virq(void)
52523 +{
52524 + unsigned int i;
52525 +
52526 + for_each_online_cpu(i) {
52527 + if (ovf_irq[i] >= 0) {
52528 + unbind_from_irqhandler(ovf_irq[i], NULL);
52529 + ovf_irq[i] = -1;
52530 + }
52531 + }
52532 +}
52533 +
52534 +
52535 +static int bind_virq(void)
52536 +{
52537 + unsigned int i;
52538 + int result;
52539 +
52540 + for_each_online_cpu(i) {
52541 + result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
52542 + i,
52543 + xenoprof_ovf_interrupt,
52544 + SA_INTERRUPT,
52545 + "xenoprof",
52546 + NULL);
52547 +
52548 + if (result < 0) {
52549 + unbind_virq();
52550 + return result;
52551 + }
52552 +
52553 + ovf_irq[i] = result;
52554 + }
52555 +
52556 + return 0;
52557 +}
52558 +
52559 +
52560 +static void unmap_passive_list(void)
52561 +{
52562 + int i;
52563 + for (i = 0; i < pdomains; i++)
52564 + xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
52565 + pdomains = 0;
52566 +}
52567 +
52568 +
52569 +static int map_xenoprof_buffer(int max_samples)
52570 +{
52571 + struct xenoprof_get_buffer get_buffer;
52572 + struct xenoprof_buf *buf;
52573 + int ret, i;
52574 +
52575 + if ( shared_buffer.buffer )
52576 + return 0;
52577 +
52578 + get_buffer.max_samples = max_samples;
52579 + ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer);
52580 + if (ret)
52581 + return ret;
52582 + nbuf = get_buffer.nbuf;
52583 +
52584 + for (i=0; i< nbuf; i++) {
52585 + buf = (struct xenoprof_buf*)
52586 + &shared_buffer.buffer[i * get_buffer.bufsize];
52587 + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
52588 + xenoprof_buf[buf->vcpu_id] = buf;
52589 + }
52590 +
52591 + return 0;
52592 +}
52593 +
52594 +
52595 +static int xenoprof_setup(void)
52596 +{
52597 + int ret;
52598 +
52599 + if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
52600 + return ret;
52601 +
52602 + if ( (ret = bind_virq()) )
52603 + return ret;
52604 +
52605 + if (xenoprof_is_primary) {
52606 + /* Define dom0 as an active domain if not done yet */
52607 + if (!active_defined) {
52608 + domid_t domid;
52609 + ret = HYPERVISOR_xenoprof_op(
52610 + XENOPROF_reset_active_list, NULL);
52611 + if (ret)
52612 + goto err;
52613 + domid = 0;
52614 + ret = HYPERVISOR_xenoprof_op(
52615 + XENOPROF_set_active, &domid);
52616 + if (ret)
52617 + goto err;
52618 + active_defined = 1;
52619 + }
52620 +
52621 + if (backtrace_depth > 0) {
52622 + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_backtrace,
52623 + &backtrace_depth);
52624 + if (ret)
52625 + backtrace_depth = 0;
52626 + }
52627 +
52628 + ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
52629 + if (ret)
52630 + goto err;
52631 +
52632 + xenoprof_arch_counter();
52633 + ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
52634 + if (ret)
52635 + goto err;
52636 + }
52637 +
52638 + ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
52639 + if (ret)
52640 + goto err;
52641 +
52642 + xenoprof_enabled = 1;
52643 + return 0;
52644 + err:
52645 + unbind_virq();
52646 + return ret;
52647 +}
52648 +
52649 +
52650 +static void xenoprof_shutdown(void)
52651 +{
52652 + xenoprof_enabled = 0;
52653 +
52654 + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL));
52655 +
52656 + if (xenoprof_is_primary) {
52657 + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_release_counters,
52658 + NULL));
52659 + active_defined = 0;
52660 + }
52661 +
52662 + unbind_virq();
52663 +
52664 + xenoprof_arch_unmap_shared_buffer(&shared_buffer);
52665 + if (xenoprof_is_primary)
52666 + unmap_passive_list();
52667 +}
52668 +
52669 +
52670 +static int xenoprof_start(void)
52671 +{
52672 + int ret = 0;
52673 +
52674 + if (xenoprof_is_primary)
52675 + ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
52676 + if (!ret)
52677 + xenoprof_arch_start();
52678 + return ret;
52679 +}
52680 +
52681 +
52682 +static void xenoprof_stop(void)
52683 +{
52684 + if (xenoprof_is_primary)
52685 + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL));
52686 + xenoprof_arch_stop();
52687 +}
52688 +
52689 +
52690 +static int xenoprof_set_active(int * active_domains,
52691 + unsigned int adomains)
52692 +{
52693 + int ret = 0;
52694 + int i;
52695 + int set_dom0 = 0;
52696 + domid_t domid;
52697 +
52698 + if (!xenoprof_is_primary)
52699 + return 0;
52700 +
52701 + if (adomains > MAX_OPROF_DOMAINS)
52702 + return -E2BIG;
52703 +
52704 + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
52705 + if (ret)
52706 + return ret;
52707 +
52708 + for (i=0; i<adomains; i++) {
52709 + domid = active_domains[i];
52710 + if (domid != active_domains[i]) {
52711 + ret = -EINVAL;
52712 + goto out;
52713 + }
52714 + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
52715 + if (ret)
52716 + goto out;
52717 + if (active_domains[i] == 0)
52718 + set_dom0 = 1;
52719 + }
52720 + /* dom0 must always be active but may not be in the list */
52721 + if (!set_dom0) {
52722 + domid = 0;
52723 + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
52724 + }
52725 +
52726 +out:
52727 + if (ret)
52728 + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list,
52729 + NULL));
52730 + active_defined = !ret;
52731 + return ret;
52732 +}
52733 +
52734 +static int xenoprof_set_passive(int * p_domains,
52735 + unsigned int pdoms)
52736 +{
52737 + int ret;
52738 + unsigned int i, j;
52739 + struct xenoprof_buf *buf;
52740 +
52741 + if (!xenoprof_is_primary)
52742 + return 0;
52743 +
52744 + if (pdoms > MAX_OPROF_DOMAINS)
52745 + return -E2BIG;
52746 +
52747 + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
52748 + if (ret)
52749 + return ret;
52750 + unmap_passive_list();
52751 +
52752 + for (i = 0; i < pdoms; i++) {
52753 + passive_domains[i].domain_id = p_domains[i];
52754 + passive_domains[i].max_samples = 2048;
52755 + ret = xenoprof_arch_set_passive(&passive_domains[i],
52756 + &p_shared_buffer[i]);
52757 + if (ret)
52758 + goto out;
52759 + for (j = 0; j < passive_domains[i].nbuf; j++) {
52760 + buf = (struct xenoprof_buf *)
52761 + &p_shared_buffer[i].buffer[
52762 + j * passive_domains[i].bufsize];
52763 + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
52764 + p_xenoprof_buf[i][buf->vcpu_id] = buf;
52765 + }
52766 + }
52767 +
52768 + pdomains = pdoms;
52769 + return 0;
52770 +
52771 +out:
52772 + for (j = 0; j < i; j++)
52773 + xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
52774 +
52775 + return ret;
52776 +}
52777 +
52778 +
52779 +/* The dummy backtrace function to keep oprofile happy
52780 + * The real backtrace is done in xen
52781 + */
52782 +static void xenoprof_dummy_backtrace(struct pt_regs * const regs,
52783 + unsigned int depth)
52784 +{
52785 + /* this should never be called */
52786 + BUG();
52787 + return;
52788 +}
52789 +
52790 +
52791 +static struct oprofile_operations xenoprof_ops = {
52792 +#ifdef HAVE_XENOPROF_CREATE_FILES
52793 + .create_files = xenoprof_create_files,
52794 +#endif
52795 + .set_active = xenoprof_set_active,
52796 + .set_passive = xenoprof_set_passive,
52797 + .setup = xenoprof_setup,
52798 + .shutdown = xenoprof_shutdown,
52799 + .start = xenoprof_start,
52800 + .stop = xenoprof_stop,
52801 + .backtrace = xenoprof_dummy_backtrace
52802 +};
52803 +
52804 +
52805 +/* in order to get driverfs right */
52806 +static int using_xenoprof;
52807 +
52808 +int __init xenoprofile_init(struct oprofile_operations * ops)
52809 +{
52810 + struct xenoprof_init init;
52811 + unsigned int i;
52812 + int ret;
52813 +
52814 + ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
52815 + if (!ret) {
52816 + xenoprof_arch_init_counter(&init);
52817 + xenoprof_is_primary = init.is_primary;
52818 +
52819 + /* cpu_type is detected by Xen */
52820 + cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
52821 + strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
52822 + xenoprof_ops.cpu_type = cpu_type;
52823 +
52824 + init_driverfs();
52825 + using_xenoprof = 1;
52826 + *ops = xenoprof_ops;
52827 +
52828 + for (i=0; i<NR_CPUS; i++)
52829 + ovf_irq[i] = -1;
52830 +
52831 + active_defined = 0;
52832 + }
52833 +
52834 + printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n",
52835 + __func__, ret, init.num_events, xenoprof_is_primary);
52836 + return ret;
52837 +}
52838 +
52839 +
52840 +void xenoprofile_exit(void)
52841 +{
52842 + if (using_xenoprof)
52843 + exit_driverfs();
52844 +
52845 + xenoprof_arch_unmap_shared_buffer(&shared_buffer);
52846 + if (xenoprof_is_primary) {
52847 + unmap_passive_list();
52848 + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL));
52849 + }
52850 +}