Imported xen patches.
[people/pmueller/ipfire-2.x.git] / src / patches / 60009_xen3-auto-xen-drivers.patch1
1 Subject: xen3 xen-drivers
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd)
3 Patch-mainline: obsolete
4 Acked-by: jbeulich@novell.com
5
6 Index: head-2008-11-25/drivers/xen/balloon/Makefile
7 ===================================================================
8 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
9 +++ head-2008-11-25/drivers/xen/balloon/Makefile 2007-06-12 13:13:44.000000000 +0200
10 @@ -0,0 +1,2 @@
11 +
12 +obj-y := balloon.o sysfs.o
13 Index: head-2008-11-25/drivers/xen/balloon/balloon.c
14 ===================================================================
15 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
16 +++ head-2008-11-25/drivers/xen/balloon/balloon.c 2008-07-21 11:00:33.000000000 +0200
17 @@ -0,0 +1,724 @@
18 +/******************************************************************************
19 + * balloon.c
20 + *
21 + * Xen balloon driver - enables returning/claiming memory to/from Xen.
22 + *
23 + * Copyright (c) 2003, B Dragovic
24 + * Copyright (c) 2003-2004, M Williamson, K Fraser
25 + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
26 + *
27 + * This program is free software; you can redistribute it and/or
28 + * modify it under the terms of the GNU General Public License version 2
29 + * as published by the Free Software Foundation; or, when distributed
30 + * separately from the Linux kernel or incorporated into other
31 + * software packages, subject to the following license:
32 + *
33 + * Permission is hereby granted, free of charge, to any person obtaining a copy
34 + * of this source file (the "Software"), to deal in the Software without
35 + * restriction, including without limitation the rights to use, copy, modify,
36 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
37 + * and to permit persons to whom the Software is furnished to do so, subject to
38 + * the following conditions:
39 + *
40 + * The above copyright notice and this permission notice shall be included in
41 + * all copies or substantial portions of the Software.
42 + *
43 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
44 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
45 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
46 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
47 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
48 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
49 + * IN THE SOFTWARE.
50 + */
51 +
52 +#include <linux/kernel.h>
53 +#include <linux/module.h>
54 +#include <linux/sched.h>
55 +#include <linux/errno.h>
56 +#include <linux/mm.h>
57 +#include <linux/mman.h>
58 +#include <linux/smp_lock.h>
59 +#include <linux/pagemap.h>
60 +#include <linux/bootmem.h>
61 +#include <linux/highmem.h>
62 +#include <linux/vmalloc.h>
63 +#include <linux/mutex.h>
64 +#include <xen/xen_proc.h>
65 +#include <asm/hypervisor.h>
66 +#include <xen/balloon.h>
67 +#include <xen/interface/memory.h>
68 +#include <asm/maddr.h>
69 +#include <asm/page.h>
70 +#include <asm/pgalloc.h>
71 +#include <asm/pgtable.h>
72 +#include <asm/uaccess.h>
73 +#include <asm/tlb.h>
74 +#include <linux/highmem.h>
75 +#include <linux/list.h>
76 +#include <xen/xenbus.h>
77 +#include "common.h"
78 +
79 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
80 +#include <xen/platform-compat.h>
81 +#endif
82 +
83 +#ifdef CONFIG_PROC_FS
84 +static struct proc_dir_entry *balloon_pde;
85 +#endif
86 +
87 +static DEFINE_MUTEX(balloon_mutex);
88 +
89 +/*
90 + * Protects atomic reservation decrease/increase against concurrent increases.
91 + * Also protects non-atomic updates of current_pages and driver_pages, and
92 + * balloon lists.
93 + */
94 +DEFINE_SPINLOCK(balloon_lock);
95 +
96 +struct balloon_stats balloon_stats;
97 +
98 +/* We increase/decrease in batches which fit in a page */
99 +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
100 +
101 +/* VM /proc information for memory */
102 +extern unsigned long totalram_pages;
103 +
104 +#ifndef MODULE
105 +extern unsigned long totalhigh_pages;
106 +#define inc_totalhigh_pages() (totalhigh_pages++)
107 +#define dec_totalhigh_pages() (totalhigh_pages--)
108 +#else
109 +#define inc_totalhigh_pages() ((void)0)
110 +#define dec_totalhigh_pages() ((void)0)
111 +#endif
112 +
113 +/* List of ballooned pages, threaded through the mem_map array. */
114 +static LIST_HEAD(ballooned_pages);
115 +
116 +/* Main work function, always executed in process context. */
117 +static void balloon_process(void *unused);
118 +static DECLARE_WORK(balloon_worker, balloon_process, NULL);
119 +static struct timer_list balloon_timer;
120 +
121 +/* When ballooning out (allocating memory to return to Xen) we don't really
122 + want the kernel to try too hard since that can trigger the oom killer. */
123 +#define GFP_BALLOON \
124 + (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD)
125 +
126 +#define PAGE_TO_LIST(p) (&(p)->lru)
127 +#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
128 +#define UNLIST_PAGE(p) \
129 + do { \
130 + list_del(PAGE_TO_LIST(p)); \
131 + PAGE_TO_LIST(p)->next = NULL; \
132 + PAGE_TO_LIST(p)->prev = NULL; \
133 + } while(0)
134 +
135 +#define IPRINTK(fmt, args...) \
136 + printk(KERN_INFO "xen_mem: " fmt, ##args)
137 +#define WPRINTK(fmt, args...) \
138 + printk(KERN_WARNING "xen_mem: " fmt, ##args)
139 +
140 +/* balloon_append: add the given page to the balloon. */
141 +static void balloon_append(struct page *page)
142 +{
143 + /* Lowmem is re-populated first, so highmem pages go at list tail. */
144 + if (PageHighMem(page)) {
145 + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
146 + bs.balloon_high++;
147 + dec_totalhigh_pages();
148 + } else {
149 + list_add(PAGE_TO_LIST(page), &ballooned_pages);
150 + bs.balloon_low++;
151 + }
152 +}
153 +
154 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
155 +static struct page *balloon_retrieve(void)
156 +{
157 + struct page *page;
158 +
159 + if (list_empty(&ballooned_pages))
160 + return NULL;
161 +
162 + page = LIST_TO_PAGE(ballooned_pages.next);
163 + UNLIST_PAGE(page);
164 +
165 + if (PageHighMem(page)) {
166 + bs.balloon_high--;
167 + inc_totalhigh_pages();
168 + }
169 + else
170 + bs.balloon_low--;
171 +
172 + return page;
173 +}
174 +
175 +static struct page *balloon_first_page(void)
176 +{
177 + if (list_empty(&ballooned_pages))
178 + return NULL;
179 + return LIST_TO_PAGE(ballooned_pages.next);
180 +}
181 +
182 +static struct page *balloon_next_page(struct page *page)
183 +{
184 + struct list_head *next = PAGE_TO_LIST(page)->next;
185 + if (next == &ballooned_pages)
186 + return NULL;
187 + return LIST_TO_PAGE(next);
188 +}
189 +
190 +static inline void balloon_free_page(struct page *page)
191 +{
192 +#ifndef MODULE
193 + if (put_page_testzero(page))
194 + free_cold_page(page);
195 +#else
196 + /* free_cold_page() is not being exported. */
197 + __free_page(page);
198 +#endif
199 +}
200 +
201 +static void balloon_alarm(unsigned long unused)
202 +{
203 + schedule_work(&balloon_worker);
204 +}
205 +
206 +static unsigned long current_target(void)
207 +{
208 + unsigned long target = min(bs.target_pages, bs.hard_limit);
209 + if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
210 + target = bs.current_pages + bs.balloon_low + bs.balloon_high;
211 + return target;
212 +}
213 +
214 +static unsigned long minimum_target(void)
215 +{
216 +#ifndef CONFIG_XEN
217 +#define max_pfn num_physpages
218 +#endif
219 + unsigned long min_pages, curr_pages = current_target();
220 +
221 +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
222 + /* Simple continuous piecewiese linear function:
223 + * max MiB -> min MiB gradient
224 + * 0 0
225 + * 16 16
226 + * 32 24
227 + * 128 72 (1/2)
228 + * 512 168 (1/4)
229 + * 2048 360 (1/8)
230 + * 8192 552 (1/32)
231 + * 32768 1320
232 + * 131072 4392
233 + */
234 + if (max_pfn < MB2PAGES(128))
235 + min_pages = MB2PAGES(8) + (max_pfn >> 1);
236 + else if (max_pfn < MB2PAGES(512))
237 + min_pages = MB2PAGES(40) + (max_pfn >> 2);
238 + else if (max_pfn < MB2PAGES(2048))
239 + min_pages = MB2PAGES(104) + (max_pfn >> 3);
240 + else
241 + min_pages = MB2PAGES(296) + (max_pfn >> 5);
242 +#undef MB2PAGES
243 +
244 + /* Don't enforce growth */
245 + return min(min_pages, curr_pages);
246 +#ifndef CONFIG_XEN
247 +#undef max_pfn
248 +#endif
249 +}
250 +
251 +static int increase_reservation(unsigned long nr_pages)
252 +{
253 + unsigned long pfn, i, flags;
254 + struct page *page;
255 + long rc;
256 + struct xen_memory_reservation reservation = {
257 + .address_bits = 0,
258 + .extent_order = 0,
259 + .domid = DOMID_SELF
260 + };
261 +
262 + if (nr_pages > ARRAY_SIZE(frame_list))
263 + nr_pages = ARRAY_SIZE(frame_list);
264 +
265 + balloon_lock(flags);
266 +
267 + page = balloon_first_page();
268 + for (i = 0; i < nr_pages; i++) {
269 + BUG_ON(page == NULL);
270 + frame_list[i] = page_to_pfn(page);;
271 + page = balloon_next_page(page);
272 + }
273 +
274 + set_xen_guest_handle(reservation.extent_start, frame_list);
275 + reservation.nr_extents = nr_pages;
276 + rc = HYPERVISOR_memory_op(
277 + XENMEM_populate_physmap, &reservation);
278 + if (rc < nr_pages) {
279 + if (rc > 0) {
280 + int ret;
281 +
282 + /* We hit the Xen hard limit: reprobe. */
283 + reservation.nr_extents = rc;
284 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
285 + &reservation);
286 + BUG_ON(ret != rc);
287 + }
288 + if (rc >= 0)
289 + bs.hard_limit = (bs.current_pages + rc -
290 + bs.driver_pages);
291 + goto out;
292 + }
293 +
294 + for (i = 0; i < nr_pages; i++) {
295 + page = balloon_retrieve();
296 + BUG_ON(page == NULL);
297 +
298 + pfn = page_to_pfn(page);
299 + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
300 + phys_to_machine_mapping_valid(pfn));
301 +
302 + set_phys_to_machine(pfn, frame_list[i]);
303 +
304 +#ifdef CONFIG_XEN
305 + /* Link back into the page tables if not highmem. */
306 + if (pfn < max_low_pfn) {
307 + int ret;
308 + ret = HYPERVISOR_update_va_mapping(
309 + (unsigned long)__va(pfn << PAGE_SHIFT),
310 + pfn_pte_ma(frame_list[i], PAGE_KERNEL),
311 + 0);
312 + BUG_ON(ret);
313 + }
314 +#endif
315 +
316 + /* Relinquish the page back to the allocator. */
317 + ClearPageReserved(page);
318 + init_page_count(page);
319 + balloon_free_page(page);
320 + }
321 +
322 + bs.current_pages += nr_pages;
323 + totalram_pages = bs.current_pages;
324 +
325 + out:
326 + balloon_unlock(flags);
327 +
328 + return 0;
329 +}
330 +
331 +static int decrease_reservation(unsigned long nr_pages)
332 +{
333 + unsigned long pfn, i, flags;
334 + struct page *page;
335 + void *v;
336 + int need_sleep = 0;
337 + int ret;
338 + struct xen_memory_reservation reservation = {
339 + .address_bits = 0,
340 + .extent_order = 0,
341 + .domid = DOMID_SELF
342 + };
343 +
344 + if (nr_pages > ARRAY_SIZE(frame_list))
345 + nr_pages = ARRAY_SIZE(frame_list);
346 +
347 + for (i = 0; i < nr_pages; i++) {
348 + if ((page = alloc_page(GFP_BALLOON)) == NULL) {
349 + nr_pages = i;
350 + need_sleep = 1;
351 + break;
352 + }
353 +
354 + pfn = page_to_pfn(page);
355 + frame_list[i] = pfn_to_mfn(pfn);
356 +
357 + if (!PageHighMem(page)) {
358 + v = phys_to_virt(pfn << PAGE_SHIFT);
359 + scrub_pages(v, 1);
360 +#ifdef CONFIG_XEN
361 + ret = HYPERVISOR_update_va_mapping(
362 + (unsigned long)v, __pte_ma(0), 0);
363 + BUG_ON(ret);
364 +#endif
365 + }
366 +#ifdef CONFIG_XEN_SCRUB_PAGES
367 + else {
368 + v = kmap(page);
369 + scrub_pages(v, 1);
370 + kunmap(page);
371 + }
372 +#endif
373 + }
374 +
375 +#ifdef CONFIG_XEN
376 + /* Ensure that ballooned highmem pages don't have kmaps. */
377 + kmap_flush_unused();
378 + flush_tlb_all();
379 +#endif
380 +
381 + balloon_lock(flags);
382 +
383 + /* No more mappings: invalidate P2M and add to balloon. */
384 + for (i = 0; i < nr_pages; i++) {
385 + pfn = mfn_to_pfn(frame_list[i]);
386 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
387 + balloon_append(pfn_to_page(pfn));
388 + }
389 +
390 + set_xen_guest_handle(reservation.extent_start, frame_list);
391 + reservation.nr_extents = nr_pages;
392 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
393 + BUG_ON(ret != nr_pages);
394 +
395 + bs.current_pages -= nr_pages;
396 + totalram_pages = bs.current_pages;
397 +
398 + balloon_unlock(flags);
399 +
400 + return need_sleep;
401 +}
402 +
403 +/*
404 + * We avoid multiple worker processes conflicting via the balloon mutex.
405 + * We may of course race updates of the target counts (which are protected
406 + * by the balloon lock), or with changes to the Xen hard limit, but we will
407 + * recover from these in time.
408 + */
409 +static void balloon_process(void *unused)
410 +{
411 + int need_sleep = 0;
412 + long credit;
413 +
414 + mutex_lock(&balloon_mutex);
415 +
416 + do {
417 + credit = current_target() - bs.current_pages;
418 + if (credit > 0)
419 + need_sleep = (increase_reservation(credit) != 0);
420 + if (credit < 0)
421 + need_sleep = (decrease_reservation(-credit) != 0);
422 +
423 +#ifndef CONFIG_PREEMPT
424 + if (need_resched())
425 + schedule();
426 +#endif
427 + } while ((credit != 0) && !need_sleep);
428 +
429 + /* Schedule more work if there is some still to be done. */
430 + if (current_target() != bs.current_pages)
431 + mod_timer(&balloon_timer, jiffies + HZ);
432 +
433 + mutex_unlock(&balloon_mutex);
434 +}
435 +
436 +/* Resets the Xen limit, sets new target, and kicks off processing. */
437 +void balloon_set_new_target(unsigned long target)
438 +{
439 + /* No need for lock. Not read-modify-write updates. */
440 + bs.hard_limit = ~0UL;
441 + bs.target_pages = max(target, minimum_target());
442 + schedule_work(&balloon_worker);
443 +}
444 +
445 +static struct xenbus_watch target_watch =
446 +{
447 + .node = "memory/target"
448 +};
449 +
450 +/* React to a change in the target key */
451 +static void watch_target(struct xenbus_watch *watch,
452 + const char **vec, unsigned int len)
453 +{
454 + unsigned long long new_target;
455 + int err;
456 +
457 + err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
458 + if (err != 1) {
459 + /* This is ok (for domain0 at least) - so just return */
460 + return;
461 + }
462 +
463 + /* The given memory/target value is in KiB, so it needs converting to
464 + * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
465 + */
466 + balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
467 +}
468 +
469 +static int balloon_init_watcher(struct notifier_block *notifier,
470 + unsigned long event,
471 + void *data)
472 +{
473 + int err;
474 +
475 + err = register_xenbus_watch(&target_watch);
476 + if (err)
477 + printk(KERN_ERR "Failed to set balloon watcher\n");
478 +
479 + return NOTIFY_DONE;
480 +}
481 +
482 +#ifdef CONFIG_PROC_FS
483 +static int balloon_write(struct file *file, const char __user *buffer,
484 + unsigned long count, void *data)
485 +{
486 + char memstring[64], *endchar;
487 + unsigned long long target_bytes;
488 +
489 + if (!capable(CAP_SYS_ADMIN))
490 + return -EPERM;
491 +
492 + if (count <= 1)
493 + return -EBADMSG; /* runt */
494 + if (count > sizeof(memstring))
495 + return -EFBIG; /* too long */
496 +
497 + if (copy_from_user(memstring, buffer, count))
498 + return -EFAULT;
499 + memstring[sizeof(memstring)-1] = '\0';
500 +
501 + target_bytes = memparse(memstring, &endchar);
502 + balloon_set_new_target(target_bytes >> PAGE_SHIFT);
503 +
504 + return count;
505 +}
506 +
507 +static int balloon_read(char *page, char **start, off_t off,
508 + int count, int *eof, void *data)
509 +{
510 + int len;
511 +
512 + len = sprintf(
513 + page,
514 + "Current allocation: %8lu kB\n"
515 + "Requested target: %8lu kB\n"
516 + "Low-mem balloon: %8lu kB\n"
517 + "High-mem balloon: %8lu kB\n"
518 + "Driver pages: %8lu kB\n"
519 + "Xen hard limit: ",
520 + PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
521 + PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
522 + PAGES2KB(bs.driver_pages));
523 +
524 + if (bs.hard_limit != ~0UL)
525 + len += sprintf(page + len, "%8lu kB\n",
526 + PAGES2KB(bs.hard_limit));
527 + else
528 + len += sprintf(page + len, " ??? kB\n");
529 +
530 + *eof = 1;
531 + return len;
532 +}
533 +#endif
534 +
535 +static struct notifier_block xenstore_notifier;
536 +
537 +static int __init balloon_init(void)
538 +{
539 +#if defined(CONFIG_X86) && defined(CONFIG_XEN)
540 + unsigned long pfn;
541 + struct page *page;
542 +#endif
543 +
544 + if (!is_running_on_xen())
545 + return -ENODEV;
546 +
547 + IPRINTK("Initialising balloon driver.\n");
548 +
549 +#ifdef CONFIG_XEN
550 + bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
551 + totalram_pages = bs.current_pages;
552 +#else
553 + bs.current_pages = totalram_pages;
554 +#endif
555 + bs.target_pages = bs.current_pages;
556 + bs.balloon_low = 0;
557 + bs.balloon_high = 0;
558 + bs.driver_pages = 0UL;
559 + bs.hard_limit = ~0UL;
560 +
561 + init_timer(&balloon_timer);
562 + balloon_timer.data = 0;
563 + balloon_timer.function = balloon_alarm;
564 +
565 +#ifdef CONFIG_PROC_FS
566 + if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
567 + WPRINTK("Unable to create /proc/xen/balloon.\n");
568 + return -1;
569 + }
570 +
571 + balloon_pde->read_proc = balloon_read;
572 + balloon_pde->write_proc = balloon_write;
573 +#endif
574 + balloon_sysfs_init();
575 +
576 +#if defined(CONFIG_X86) && defined(CONFIG_XEN)
577 + /* Initialise the balloon with excess memory space. */
578 + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
579 + page = pfn_to_page(pfn);
580 + if (!PageReserved(page))
581 + balloon_append(page);
582 + }
583 +#endif
584 +
585 + target_watch.callback = watch_target;
586 + xenstore_notifier.notifier_call = balloon_init_watcher;
587 +
588 + register_xenstore_notifier(&xenstore_notifier);
589 +
590 + return 0;
591 +}
592 +
593 +subsys_initcall(balloon_init);
594 +
595 +static void __exit balloon_exit(void)
596 +{
597 + /* XXX - release balloon here */
598 + return;
599 +}
600 +
601 +module_exit(balloon_exit);
602 +
603 +void balloon_update_driver_allowance(long delta)
604 +{
605 + unsigned long flags;
606 +
607 + balloon_lock(flags);
608 + bs.driver_pages += delta;
609 + balloon_unlock(flags);
610 +}
611 +
612 +#ifdef CONFIG_XEN
613 +static int dealloc_pte_fn(
614 + pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
615 +{
616 + unsigned long mfn = pte_mfn(*pte);
617 + int ret;
618 + struct xen_memory_reservation reservation = {
619 + .nr_extents = 1,
620 + .extent_order = 0,
621 + .domid = DOMID_SELF
622 + };
623 + set_xen_guest_handle(reservation.extent_start, &mfn);
624 + set_pte_at(&init_mm, addr, pte, __pte_ma(0));
625 + set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
626 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
627 + BUG_ON(ret != 1);
628 + return 0;
629 +}
630 +#endif
631 +
632 +struct page **alloc_empty_pages_and_pagevec(int nr_pages)
633 +{
634 + unsigned long flags;
635 + void *v;
636 + struct page *page, **pagevec;
637 + int i, ret;
638 +
639 + pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
640 + if (pagevec == NULL)
641 + return NULL;
642 +
643 + for (i = 0; i < nr_pages; i++) {
644 + page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
645 + if (page == NULL)
646 + goto err;
647 +
648 + v = page_address(page);
649 + scrub_pages(v, 1);
650 +
651 + balloon_lock(flags);
652 +
653 + if (xen_feature(XENFEAT_auto_translated_physmap)) {
654 + unsigned long gmfn = page_to_pfn(page);
655 + struct xen_memory_reservation reservation = {
656 + .nr_extents = 1,
657 + .extent_order = 0,
658 + .domid = DOMID_SELF
659 + };
660 + set_xen_guest_handle(reservation.extent_start, &gmfn);
661 + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
662 + &reservation);
663 + if (ret == 1)
664 + ret = 0; /* success */
665 + } else {
666 +#ifdef CONFIG_XEN
667 + ret = apply_to_page_range(&init_mm, (unsigned long)v,
668 + PAGE_SIZE, dealloc_pte_fn,
669 + NULL);
670 +#else
671 + /* Cannot handle non-auto translate mode. */
672 + ret = 1;
673 +#endif
674 + }
675 +
676 + if (ret != 0) {
677 + balloon_unlock(flags);
678 + balloon_free_page(page);
679 + goto err;
680 + }
681 +
682 + totalram_pages = --bs.current_pages;
683 +
684 + balloon_unlock(flags);
685 + }
686 +
687 + out:
688 + schedule_work(&balloon_worker);
689 +#ifdef CONFIG_XEN
690 + flush_tlb_all();
691 +#endif
692 + return pagevec;
693 +
694 + err:
695 + balloon_lock(flags);
696 + while (--i >= 0)
697 + balloon_append(pagevec[i]);
698 + balloon_unlock(flags);
699 + kfree(pagevec);
700 + pagevec = NULL;
701 + goto out;
702 +}
703 +
704 +void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
705 +{
706 + unsigned long flags;
707 + int i;
708 +
709 + if (pagevec == NULL)
710 + return;
711 +
712 + balloon_lock(flags);
713 + for (i = 0; i < nr_pages; i++) {
714 + BUG_ON(page_count(pagevec[i]) != 1);
715 + balloon_append(pagevec[i]);
716 + }
717 + balloon_unlock(flags);
718 +
719 + kfree(pagevec);
720 +
721 + schedule_work(&balloon_worker);
722 +}
723 +
724 +void balloon_release_driver_page(struct page *page)
725 +{
726 + unsigned long flags;
727 +
728 + balloon_lock(flags);
729 + balloon_append(page);
730 + bs.driver_pages--;
731 + balloon_unlock(flags);
732 +
733 + schedule_work(&balloon_worker);
734 +}
735 +
736 +EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
737 +EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
738 +EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
739 +EXPORT_SYMBOL_GPL(balloon_release_driver_page);
740 +
741 +MODULE_LICENSE("Dual BSD/GPL");
742 Index: head-2008-11-25/drivers/xen/balloon/common.h
743 ===================================================================
744 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
745 +++ head-2008-11-25/drivers/xen/balloon/common.h 2007-06-12 13:13:44.000000000 +0200
746 @@ -0,0 +1,58 @@
747 +/******************************************************************************
748 + * balloon/common.h
749 + *
750 + * This program is free software; you can redistribute it and/or
751 + * modify it under the terms of the GNU General Public License version 2
752 + * as published by the Free Software Foundation; or, when distributed
753 + * separately from the Linux kernel or incorporated into other
754 + * software packages, subject to the following license:
755 + *
756 + * Permission is hereby granted, free of charge, to any person obtaining a copy
757 + * of this source file (the "Software"), to deal in the Software without
758 + * restriction, including without limitation the rights to use, copy, modify,
759 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
760 + * and to permit persons to whom the Software is furnished to do so, subject to
761 + * the following conditions:
762 + *
763 + * The above copyright notice and this permission notice shall be included in
764 + * all copies or substantial portions of the Software.
765 + *
766 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
767 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
768 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
769 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
770 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
771 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
772 + * IN THE SOFTWARE.
773 + */
774 +
775 +#ifndef __XEN_BALLOON_COMMON_H__
776 +#define __XEN_BALLOON_COMMON_H__
777 +
778 +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
779 +
780 +struct balloon_stats {
781 + /* We aim for 'current allocation' == 'target allocation'. */
782 + unsigned long current_pages;
783 + unsigned long target_pages;
784 + /* We may hit the hard limit in Xen. If we do then we remember it. */
785 + unsigned long hard_limit;
786 + /*
787 + * Drivers may alter the memory reservation independently, but they
788 + * must inform the balloon driver so we avoid hitting the hard limit.
789 + */
790 + unsigned long driver_pages;
791 + /* Number of pages in high- and low-memory balloons. */
792 + unsigned long balloon_low;
793 + unsigned long balloon_high;
794 +};
795 +
796 +extern struct balloon_stats balloon_stats;
797 +#define bs balloon_stats
798 +
799 +int balloon_sysfs_init(void);
800 +void balloon_sysfs_exit(void);
801 +
802 +void balloon_set_new_target(unsigned long target);
803 +
804 +#endif /* __XEN_BALLOON_COMMON_H__ */
805 Index: head-2008-11-25/drivers/xen/balloon/sysfs.c
806 ===================================================================
807 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
808 +++ head-2008-11-25/drivers/xen/balloon/sysfs.c 2008-04-02 12:34:02.000000000 +0200
809 @@ -0,0 +1,170 @@
810 +/******************************************************************************
811 + * balloon/sysfs.c
812 + *
813 + * Xen balloon driver - sysfs interfaces.
814 + *
815 + * This program is free software; you can redistribute it and/or
816 + * modify it under the terms of the GNU General Public License version 2
817 + * as published by the Free Software Foundation; or, when distributed
818 + * separately from the Linux kernel or incorporated into other
819 + * software packages, subject to the following license:
820 + *
821 + * Permission is hereby granted, free of charge, to any person obtaining a copy
822 + * of this source file (the "Software"), to deal in the Software without
823 + * restriction, including without limitation the rights to use, copy, modify,
824 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
825 + * and to permit persons to whom the Software is furnished to do so, subject to
826 + * the following conditions:
827 + *
828 + * The above copyright notice and this permission notice shall be included in
829 + * all copies or substantial portions of the Software.
830 + *
831 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
832 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
833 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
834 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
835 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
836 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
837 + * IN THE SOFTWARE.
838 + */
839 +
840 +#include <linux/capability.h>
841 +#include <linux/errno.h>
842 +#include <linux/stat.h>
843 +#include <linux/string.h>
844 +#include <linux/sysdev.h>
845 +#include "common.h"
846 +
847 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
848 +#include <xen/platform-compat.h>
849 +#endif
850 +
851 +#define BALLOON_CLASS_NAME "xen_memory"
852 +
853 +#define BALLOON_SHOW(name, format, args...) \
854 + static ssize_t show_##name(struct sys_device *dev, \
855 + char *buf) \
856 + { \
857 + return sprintf(buf, format, ##args); \
858 + } \
859 + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
860 +
861 +BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
862 +BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
863 +BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
864 +BALLOON_SHOW(hard_limit_kb,
865 + (bs.hard_limit!=~0UL) ? "%lu\n" : "???\n",
866 + (bs.hard_limit!=~0UL) ? PAGES2KB(bs.hard_limit) : 0);
867 +BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
868 +
869 +static ssize_t show_target_kb(struct sys_device *dev, char *buf)
870 +{
871 + return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
872 +}
873 +
874 +static ssize_t store_target_kb(struct sys_device *dev,
875 + const char *buf,
876 + size_t count)
877 +{
878 + char memstring[64], *endchar;
879 + unsigned long long target_bytes;
880 +
881 + if (!capable(CAP_SYS_ADMIN))
882 + return -EPERM;
883 +
884 + if (count <= 1)
885 + return -EBADMSG; /* runt */
886 + if (count > sizeof(memstring))
887 + return -EFBIG; /* too long */
888 + strcpy(memstring, buf);
889 +
890 + target_bytes = memparse(memstring, &endchar);
891 + balloon_set_new_target(target_bytes >> PAGE_SHIFT);
892 +
893 + return count;
894 +}
895 +
896 +static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
897 + show_target_kb, store_target_kb);
898 +
899 +static struct sysdev_attribute *balloon_attrs[] = {
900 + &attr_target_kb,
901 +};
902 +
903 +static struct attribute *balloon_info_attrs[] = {
904 + &attr_current_kb.attr,
905 + &attr_low_kb.attr,
906 + &attr_high_kb.attr,
907 + &attr_hard_limit_kb.attr,
908 + &attr_driver_kb.attr,
909 + NULL
910 +};
911 +
912 +static struct attribute_group balloon_info_group = {
913 + .name = "info",
914 + .attrs = balloon_info_attrs,
915 +};
916 +
917 +static struct sysdev_class balloon_sysdev_class = {
918 + set_kset_name(BALLOON_CLASS_NAME),
919 +};
920 +
921 +static struct sys_device balloon_sysdev;
922 +
923 +static int register_balloon(struct sys_device *sysdev)
924 +{
925 + int i, error;
926 +
927 + error = sysdev_class_register(&balloon_sysdev_class);
928 + if (error)
929 + return error;
930 +
931 + sysdev->id = 0;
932 + sysdev->cls = &balloon_sysdev_class;
933 +
934 + error = sysdev_register(sysdev);
935 + if (error) {
936 + sysdev_class_unregister(&balloon_sysdev_class);
937 + return error;
938 + }
939 +
940 + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
941 + error = sysdev_create_file(sysdev, balloon_attrs[i]);
942 + if (error)
943 + goto fail;
944 + }
945 +
946 + error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
947 + if (error)
948 + goto fail;
949 +
950 + return 0;
951 +
952 + fail:
953 + while (--i >= 0)
954 + sysdev_remove_file(sysdev, balloon_attrs[i]);
955 + sysdev_unregister(sysdev);
956 + sysdev_class_unregister(&balloon_sysdev_class);
957 + return error;
958 +}
959 +
960 +static void unregister_balloon(struct sys_device *sysdev)
961 +{
962 + int i;
963 +
964 + sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
965 + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
966 + sysdev_remove_file(sysdev, balloon_attrs[i]);
967 + sysdev_unregister(sysdev);
968 + sysdev_class_unregister(&balloon_sysdev_class);
969 +}
970 +
971 +int balloon_sysfs_init(void)
972 +{
973 + return register_balloon(&balloon_sysdev);
974 +}
975 +
976 +void balloon_sysfs_exit(void)
977 +{
978 + unregister_balloon(&balloon_sysdev);
979 +}
980 Index: head-2008-11-25/drivers/xen/blkback/Makefile
981 ===================================================================
982 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
983 +++ head-2008-11-25/drivers/xen/blkback/Makefile 2007-06-12 13:13:44.000000000 +0200
984 @@ -0,0 +1,3 @@
985 +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o
986 +
987 +blkbk-y := blkback.o xenbus.o interface.o vbd.o
988 Index: head-2008-11-25/drivers/xen/blkback/blkback.c
989 ===================================================================
990 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
991 +++ head-2008-11-25/drivers/xen/blkback/blkback.c 2008-11-10 11:44:21.000000000 +0100
992 @@ -0,0 +1,656 @@
993 +/******************************************************************************
994 + * arch/xen/drivers/blkif/backend/main.c
995 + *
996 + * Back-end of the driver for virtual block devices. This portion of the
997 + * driver exports a 'unified' block-device interface that can be accessed
998 + * by any operating system that implements a compatible front end. A
999 + * reference front-end implementation can be found in:
1000 + * arch/xen/drivers/blkif/frontend
1001 + *
1002 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
1003 + * Copyright (c) 2005, Christopher Clark
1004 + *
1005 + * This program is free software; you can redistribute it and/or
1006 + * modify it under the terms of the GNU General Public License version 2
1007 + * as published by the Free Software Foundation; or, when distributed
1008 + * separately from the Linux kernel or incorporated into other
1009 + * software packages, subject to the following license:
1010 + *
1011 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1012 + * of this source file (the "Software"), to deal in the Software without
1013 + * restriction, including without limitation the rights to use, copy, modify,
1014 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1015 + * and to permit persons to whom the Software is furnished to do so, subject to
1016 + * the following conditions:
1017 + *
1018 + * The above copyright notice and this permission notice shall be included in
1019 + * all copies or substantial portions of the Software.
1020 + *
1021 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1022 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1023 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1024 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1025 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1026 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1027 + * IN THE SOFTWARE.
1028 + */
1029 +
1030 +#include <linux/spinlock.h>
1031 +#include <linux/kthread.h>
1032 +#include <linux/list.h>
1033 +#include <linux/delay.h>
1034 +#include <xen/balloon.h>
1035 +#include <asm/hypervisor.h>
1036 +#include "common.h"
1037 +
1038 +/*
1039 + * These are rather arbitrary. They are fairly large because adjacent requests
1040 + * pulled from a communication ring are quite likely to end up being part of
1041 + * the same scatter/gather request at the disc.
1042 + *
1043 + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
1044 + *
1045 + * This will increase the chances of being able to write whole tracks.
1046 + * 64 should be enough to keep us competitive with Linux.
1047 + */
1048 +static int blkif_reqs = 64;
1049 +module_param_named(reqs, blkif_reqs, int, 0);
1050 +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
1051 +
1052 +/* Run-time switchable: /sys/module/blkback/parameters/ */
1053 +static unsigned int log_stats = 0;
1054 +static unsigned int debug_lvl = 0;
1055 +module_param(log_stats, int, 0644);
1056 +module_param(debug_lvl, int, 0644);
1057 +
1058 +/*
1059 + * Each outstanding request that we've passed to the lower device layers has a
1060 + * 'pending_req' allocated to it. Each buffer_head that completes decrements
1061 + * the pendcnt towards zero. When it hits zero, the specified domain has a
1062 + * response queued for it, with the saved 'id' passed back.
1063 + */
1064 +typedef struct {
1065 + blkif_t *blkif;
1066 + u64 id;
1067 + int nr_pages;
1068 + atomic_t pendcnt;
1069 + unsigned short operation;
1070 + int status;
1071 + struct list_head free_list;
1072 +} pending_req_t;
1073 +
1074 +static pending_req_t *pending_reqs;
1075 +static struct list_head pending_free;
1076 +static DEFINE_SPINLOCK(pending_free_lock);
1077 +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
1078 +
1079 +#define BLKBACK_INVALID_HANDLE (~0)
1080 +
1081 +static struct page **pending_pages;
1082 +static grant_handle_t *pending_grant_handles;
1083 +
1084 +static inline int vaddr_pagenr(pending_req_t *req, int seg)
1085 +{
1086 + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
1087 +}
1088 +
1089 +static inline unsigned long vaddr(pending_req_t *req, int seg)
1090 +{
1091 + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
1092 + return (unsigned long)pfn_to_kaddr(pfn);
1093 +}
1094 +
1095 +#define pending_handle(_req, _seg) \
1096 + (pending_grant_handles[vaddr_pagenr(_req, _seg)])
1097 +
1098 +
1099 +static int do_block_io_op(blkif_t *blkif);
1100 +static void dispatch_rw_block_io(blkif_t *blkif,
1101 + blkif_request_t *req,
1102 + pending_req_t *pending_req);
1103 +static void make_response(blkif_t *blkif, u64 id,
1104 + unsigned short op, int st);
1105 +
1106 +/******************************************************************
1107 + * misc small helpers
1108 + */
1109 +static pending_req_t* alloc_req(void)
1110 +{
1111 + pending_req_t *req = NULL;
1112 + unsigned long flags;
1113 +
1114 + spin_lock_irqsave(&pending_free_lock, flags);
1115 + if (!list_empty(&pending_free)) {
1116 + req = list_entry(pending_free.next, pending_req_t, free_list);
1117 + list_del(&req->free_list);
1118 + }
1119 + spin_unlock_irqrestore(&pending_free_lock, flags);
1120 + return req;
1121 +}
1122 +
1123 +static void free_req(pending_req_t *req)
1124 +{
1125 + unsigned long flags;
1126 + int was_empty;
1127 +
1128 + spin_lock_irqsave(&pending_free_lock, flags);
1129 + was_empty = list_empty(&pending_free);
1130 + list_add(&req->free_list, &pending_free);
1131 + spin_unlock_irqrestore(&pending_free_lock, flags);
1132 + if (was_empty)
1133 + wake_up(&pending_free_wq);
1134 +}
1135 +
1136 +static void unplug_queue(blkif_t *blkif)
1137 +{
1138 + if (blkif->plug == NULL)
1139 + return;
1140 + if (blkif->plug->unplug_fn)
1141 + blkif->plug->unplug_fn(blkif->plug);
1142 + blk_put_queue(blkif->plug);
1143 + blkif->plug = NULL;
1144 +}
1145 +
1146 +static void plug_queue(blkif_t *blkif, struct block_device *bdev)
1147 +{
1148 + request_queue_t *q = bdev_get_queue(bdev);
1149 +
1150 + if (q == blkif->plug)
1151 + return;
1152 + unplug_queue(blkif);
1153 + blk_get_queue(q);
1154 + blkif->plug = q;
1155 +}
1156 +
1157 +static void fast_flush_area(pending_req_t *req)
1158 +{
1159 + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1160 + unsigned int i, invcount = 0;
1161 + grant_handle_t handle;
1162 + int ret;
1163 +
1164 + for (i = 0; i < req->nr_pages; i++) {
1165 + handle = pending_handle(req, i);
1166 + if (handle == BLKBACK_INVALID_HANDLE)
1167 + continue;
1168 + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
1169 + GNTMAP_host_map, handle);
1170 + pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
1171 + invcount++;
1172 + }
1173 +
1174 + ret = HYPERVISOR_grant_table_op(
1175 + GNTTABOP_unmap_grant_ref, unmap, invcount);
1176 + BUG_ON(ret);
1177 +}
1178 +
1179 +/******************************************************************
1180 + * SCHEDULER FUNCTIONS
1181 + */
1182 +
1183 +static void print_stats(blkif_t *blkif)
1184 +{
1185 + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
1186 + current->comm, blkif->st_oo_req,
1187 + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
1188 + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
1189 + blkif->st_rd_req = 0;
1190 + blkif->st_wr_req = 0;
1191 + blkif->st_oo_req = 0;
1192 +}
1193 +
1194 +int blkif_schedule(void *arg)
1195 +{
1196 + blkif_t *blkif = arg;
1197 +
1198 + blkif_get(blkif);
1199 +
1200 + if (debug_lvl)
1201 + printk(KERN_DEBUG "%s: started\n", current->comm);
1202 +
1203 + while (!kthread_should_stop()) {
1204 + if (try_to_freeze())
1205 + continue;
1206 +
1207 + wait_event_interruptible(
1208 + blkif->wq,
1209 + blkif->waiting_reqs || kthread_should_stop());
1210 + wait_event_interruptible(
1211 + pending_free_wq,
1212 + !list_empty(&pending_free) || kthread_should_stop());
1213 +
1214 + blkif->waiting_reqs = 0;
1215 + smp_mb(); /* clear flag *before* checking for work */
1216 +
1217 + if (do_block_io_op(blkif))
1218 + blkif->waiting_reqs = 1;
1219 + unplug_queue(blkif);
1220 +
1221 + if (log_stats && time_after(jiffies, blkif->st_print))
1222 + print_stats(blkif);
1223 + }
1224 +
1225 + if (log_stats)
1226 + print_stats(blkif);
1227 + if (debug_lvl)
1228 + printk(KERN_DEBUG "%s: exiting\n", current->comm);
1229 +
1230 + blkif->xenblkd = NULL;
1231 + blkif_put(blkif);
1232 +
1233 + return 0;
1234 +}
1235 +
1236 +/******************************************************************
1237 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
1238 + */
1239 +
1240 +static void __end_block_io_op(pending_req_t *pending_req, int error)
1241 +{
1242 + /* An error fails the entire request. */
1243 + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
1244 + (error == -EOPNOTSUPP)) {
1245 + DPRINTK("blkback: write barrier op failed, not supported\n");
1246 + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
1247 + pending_req->status = BLKIF_RSP_EOPNOTSUPP;
1248 + } else if (error) {
1249 + DPRINTK("Buffer not up-to-date at end of operation, "
1250 + "error=%d\n", error);
1251 + pending_req->status = BLKIF_RSP_ERROR;
1252 + }
1253 +
1254 + if (atomic_dec_and_test(&pending_req->pendcnt)) {
1255 + fast_flush_area(pending_req);
1256 + make_response(pending_req->blkif, pending_req->id,
1257 + pending_req->operation, pending_req->status);
1258 + blkif_put(pending_req->blkif);
1259 + free_req(pending_req);
1260 + }
1261 +}
1262 +
1263 +static int end_block_io_op(struct bio *bio, unsigned int done, int error)
1264 +{
1265 + if (bio->bi_size != 0)
1266 + return 1;
1267 + __end_block_io_op(bio->bi_private, error);
1268 + bio_put(bio);
1269 + return error;
1270 +}
1271 +
1272 +
1273 +/******************************************************************************
1274 + * NOTIFICATION FROM GUEST OS.
1275 + */
1276 +
1277 +static void blkif_notify_work(blkif_t *blkif)
1278 +{
1279 + blkif->waiting_reqs = 1;
1280 + wake_up(&blkif->wq);
1281 +}
1282 +
1283 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1284 +{
1285 + blkif_notify_work(dev_id);
1286 + return IRQ_HANDLED;
1287 +}
1288 +
1289 +
1290 +
1291 +/******************************************************************
1292 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
1293 + */
1294 +
1295 +static int do_block_io_op(blkif_t *blkif)
1296 +{
1297 + blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1298 + blkif_request_t req;
1299 + pending_req_t *pending_req;
1300 + RING_IDX rc, rp;
1301 + int more_to_do = 0;
1302 +
1303 + rc = blk_rings->common.req_cons;
1304 + rp = blk_rings->common.sring->req_prod;
1305 + rmb(); /* Ensure we see queued requests up to 'rp'. */
1306 +
1307 + while (rc != rp) {
1308 +
1309 + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
1310 + break;
1311 +
1312 + pending_req = alloc_req();
1313 + if (NULL == pending_req) {
1314 + blkif->st_oo_req++;
1315 + more_to_do = 1;
1316 + break;
1317 + }
1318 +
1319 + if (kthread_should_stop()) {
1320 + more_to_do = 1;
1321 + break;
1322 + }
1323 +
1324 + switch (blkif->blk_protocol) {
1325 + case BLKIF_PROTOCOL_NATIVE:
1326 + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
1327 + break;
1328 + case BLKIF_PROTOCOL_X86_32:
1329 + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
1330 + break;
1331 + case BLKIF_PROTOCOL_X86_64:
1332 + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
1333 + break;
1334 + default:
1335 + BUG();
1336 + }
1337 + blk_rings->common.req_cons = ++rc; /* before make_response() */
1338 +
1339 + /* Apply all sanity checks to /private copy/ of request. */
1340 + barrier();
1341 +
1342 + switch (req.operation) {
1343 + case BLKIF_OP_READ:
1344 + blkif->st_rd_req++;
1345 + dispatch_rw_block_io(blkif, &req, pending_req);
1346 + break;
1347 + case BLKIF_OP_WRITE_BARRIER:
1348 + blkif->st_br_req++;
1349 + /* fall through */
1350 + case BLKIF_OP_WRITE:
1351 + blkif->st_wr_req++;
1352 + dispatch_rw_block_io(blkif, &req, pending_req);
1353 + break;
1354 + default:
1355 + /* A good sign something is wrong: sleep for a while to
1356 + * avoid excessive CPU consumption by a bad guest. */
1357 + msleep(1);
1358 + DPRINTK("error: unknown block io operation [%d]\n",
1359 + req.operation);
1360 + make_response(blkif, req.id, req.operation,
1361 + BLKIF_RSP_ERROR);
1362 + free_req(pending_req);
1363 + break;
1364 + }
1365 +
1366 + /* Yield point for this unbounded loop. */
1367 + cond_resched();
1368 + }
1369 +
1370 + return more_to_do;
1371 +}
1372 +
1373 +static void dispatch_rw_block_io(blkif_t *blkif,
1374 + blkif_request_t *req,
1375 + pending_req_t *pending_req)
1376 +{
1377 + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
1378 + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1379 + struct phys_req preq;
1380 + struct {
1381 + unsigned long buf; unsigned int nsec;
1382 + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1383 + unsigned int nseg;
1384 + struct bio *bio = NULL;
1385 + int ret, i;
1386 + int operation;
1387 +
1388 + switch (req->operation) {
1389 + case BLKIF_OP_READ:
1390 + operation = READ;
1391 + break;
1392 + case BLKIF_OP_WRITE:
1393 + operation = WRITE;
1394 + break;
1395 + case BLKIF_OP_WRITE_BARRIER:
1396 + operation = WRITE_BARRIER;
1397 + break;
1398 + default:
1399 + operation = 0; /* make gcc happy */
1400 + BUG();
1401 + }
1402 +
1403 + /* Check that number of segments is sane. */
1404 + nseg = req->nr_segments;
1405 + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
1406 + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
1407 + DPRINTK("Bad number of segments in request (%d)\n", nseg);
1408 + goto fail_response;
1409 + }
1410 +
1411 + preq.dev = req->handle;
1412 + preq.sector_number = req->sector_number;
1413 + preq.nr_sects = 0;
1414 +
1415 + pending_req->blkif = blkif;
1416 + pending_req->id = req->id;
1417 + pending_req->operation = req->operation;
1418 + pending_req->status = BLKIF_RSP_OKAY;
1419 + pending_req->nr_pages = nseg;
1420 +
1421 + for (i = 0; i < nseg; i++) {
1422 + uint32_t flags;
1423 +
1424 + seg[i].nsec = req->seg[i].last_sect -
1425 + req->seg[i].first_sect + 1;
1426 +
1427 + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
1428 + (req->seg[i].last_sect < req->seg[i].first_sect))
1429 + goto fail_response;
1430 + preq.nr_sects += seg[i].nsec;
1431 +
1432 + flags = GNTMAP_host_map;
1433 + if (operation != READ)
1434 + flags |= GNTMAP_readonly;
1435 + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
1436 + req->seg[i].gref, blkif->domid);
1437 + }
1438 +
1439 + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
1440 + BUG_ON(ret);
1441 +
1442 + for (i = 0; i < nseg; i++) {
1443 + if (unlikely(map[i].status != 0)) {
1444 + DPRINTK("invalid buffer -- could not remap it\n");
1445 + map[i].handle = BLKBACK_INVALID_HANDLE;
1446 + ret |= 1;
1447 + }
1448 +
1449 + pending_handle(pending_req, i) = map[i].handle;
1450 +
1451 + if (ret)
1452 + continue;
1453 +
1454 + set_phys_to_machine(__pa(vaddr(
1455 + pending_req, i)) >> PAGE_SHIFT,
1456 + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
1457 + seg[i].buf = map[i].dev_bus_addr |
1458 + (req->seg[i].first_sect << 9);
1459 + }
1460 +
1461 + if (ret)
1462 + goto fail_flush;
1463 +
1464 + if (vbd_translate(&preq, blkif, operation) != 0) {
1465 + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
1466 + operation == READ ? "read" : "write",
1467 + preq.sector_number,
1468 + preq.sector_number + preq.nr_sects, preq.dev);
1469 + goto fail_flush;
1470 + }
1471 +
1472 + plug_queue(blkif, preq.bdev);
1473 + atomic_set(&pending_req->pendcnt, 1);
1474 + blkif_get(blkif);
1475 +
1476 + for (i = 0; i < nseg; i++) {
1477 + if (((int)preq.sector_number|(int)seg[i].nsec) &
1478 + ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
1479 + DPRINTK("Misaligned I/O request from domain %d",
1480 + blkif->domid);
1481 + goto fail_put_bio;
1482 + }
1483 +
1484 + while ((bio == NULL) ||
1485 + (bio_add_page(bio,
1486 + virt_to_page(vaddr(pending_req, i)),
1487 + seg[i].nsec << 9,
1488 + seg[i].buf & ~PAGE_MASK) == 0)) {
1489 + if (bio) {
1490 + atomic_inc(&pending_req->pendcnt);
1491 + submit_bio(operation, bio);
1492 + }
1493 +
1494 + bio = bio_alloc(GFP_KERNEL, nseg-i);
1495 + if (unlikely(bio == NULL))
1496 + goto fail_put_bio;
1497 +
1498 + bio->bi_bdev = preq.bdev;
1499 + bio->bi_private = pending_req;
1500 + bio->bi_end_io = end_block_io_op;
1501 + bio->bi_sector = preq.sector_number;
1502 + }
1503 +
1504 + preq.sector_number += seg[i].nsec;
1505 + }
1506 +
1507 + if (!bio) {
1508 + BUG_ON(operation != WRITE_BARRIER);
1509 + bio = bio_alloc(GFP_KERNEL, 0);
1510 + if (unlikely(bio == NULL))
1511 + goto fail_put_bio;
1512 +
1513 + bio->bi_bdev = preq.bdev;
1514 + bio->bi_private = pending_req;
1515 + bio->bi_end_io = end_block_io_op;
1516 + bio->bi_sector = -1;
1517 + }
1518 +
1519 + submit_bio(operation, bio);
1520 +
1521 + if (operation == READ)
1522 + blkif->st_rd_sect += preq.nr_sects;
1523 + else if (operation == WRITE || operation == WRITE_BARRIER)
1524 + blkif->st_wr_sect += preq.nr_sects;
1525 +
1526 + return;
1527 +
1528 + fail_flush:
1529 + fast_flush_area(pending_req);
1530 + fail_response:
1531 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
1532 + free_req(pending_req);
1533 + msleep(1); /* back off a bit */
1534 + return;
1535 +
1536 + fail_put_bio:
1537 + __end_block_io_op(pending_req, -EINVAL);
1538 + if (bio)
1539 + bio_put(bio);
1540 + unplug_queue(blkif);
1541 + msleep(1); /* back off a bit */
1542 + return;
1543 +}
1544 +
1545 +
1546 +
1547 +/******************************************************************
1548 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
1549 + */
1550 +
1551 +
1552 +static void make_response(blkif_t *blkif, u64 id,
1553 + unsigned short op, int st)
1554 +{
1555 + blkif_response_t resp;
1556 + unsigned long flags;
1557 + blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1558 + int more_to_do = 0;
1559 + int notify;
1560 +
1561 + resp.id = id;
1562 + resp.operation = op;
1563 + resp.status = st;
1564 +
1565 + spin_lock_irqsave(&blkif->blk_ring_lock, flags);
1566 + /* Place on the response ring for the relevant domain. */
1567 + switch (blkif->blk_protocol) {
1568 + case BLKIF_PROTOCOL_NATIVE:
1569 + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
1570 + &resp, sizeof(resp));
1571 + break;
1572 + case BLKIF_PROTOCOL_X86_32:
1573 + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
1574 + &resp, sizeof(resp));
1575 + break;
1576 + case BLKIF_PROTOCOL_X86_64:
1577 + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
1578 + &resp, sizeof(resp));
1579 + break;
1580 + default:
1581 + BUG();
1582 + }
1583 + blk_rings->common.rsp_prod_pvt++;
1584 + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
1585 + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
1586 + /*
1587 + * Tail check for pending requests. Allows frontend to avoid
1588 + * notifications if requests are already in flight (lower
1589 + * overheads and promotes batching).
1590 + */
1591 + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
1592 +
1593 + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
1594 + more_to_do = 1;
1595 + }
1596 +
1597 + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
1598 +
1599 + if (more_to_do)
1600 + blkif_notify_work(blkif);
1601 + if (notify)
1602 + notify_remote_via_irq(blkif->irq);
1603 +}
1604 +
1605 +static int __init blkif_init(void)
1606 +{
1607 + int i, mmap_pages;
1608 +
1609 + if (!is_running_on_xen())
1610 + return -ENODEV;
1611 +
1612 + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
1613 +
1614 + pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
1615 + blkif_reqs, GFP_KERNEL);
1616 + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
1617 + mmap_pages, GFP_KERNEL);
1618 + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
1619 +
1620 + if (!pending_reqs || !pending_grant_handles || !pending_pages)
1621 + goto out_of_memory;
1622 +
1623 + for (i = 0; i < mmap_pages; i++)
1624 + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
1625 +
1626 + blkif_interface_init();
1627 +
1628 + memset(pending_reqs, 0, sizeof(pending_reqs));
1629 + INIT_LIST_HEAD(&pending_free);
1630 +
1631 + for (i = 0; i < blkif_reqs; i++)
1632 + list_add_tail(&pending_reqs[i].free_list, &pending_free);
1633 +
1634 + blkif_xenbus_init();
1635 +
1636 + return 0;
1637 +
1638 + out_of_memory:
1639 + kfree(pending_reqs);
1640 + kfree(pending_grant_handles);
1641 + free_empty_pages_and_pagevec(pending_pages, mmap_pages);
1642 + printk("%s: out of memory\n", __FUNCTION__);
1643 + return -ENOMEM;
1644 +}
1645 +
1646 +module_init(blkif_init);
1647 +
1648 +MODULE_LICENSE("Dual BSD/GPL");
1649 Index: head-2008-11-25/drivers/xen/blkback/common.h
1650 ===================================================================
1651 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1652 +++ head-2008-11-25/drivers/xen/blkback/common.h 2008-05-08 14:02:04.000000000 +0200
1653 @@ -0,0 +1,139 @@
1654 +/*
1655 + * This program is free software; you can redistribute it and/or
1656 + * modify it under the terms of the GNU General Public License version 2
1657 + * as published by the Free Software Foundation; or, when distributed
1658 + * separately from the Linux kernel or incorporated into other
1659 + * software packages, subject to the following license:
1660 + *
1661 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1662 + * of this source file (the "Software"), to deal in the Software without
1663 + * restriction, including without limitation the rights to use, copy, modify,
1664 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1665 + * and to permit persons to whom the Software is furnished to do so, subject to
1666 + * the following conditions:
1667 + *
1668 + * The above copyright notice and this permission notice shall be included in
1669 + * all copies or substantial portions of the Software.
1670 + *
1671 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1672 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1673 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1674 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1675 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1676 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1677 + * IN THE SOFTWARE.
1678 + */
1679 +
1680 +#ifndef __BLKIF__BACKEND__COMMON_H__
1681 +#define __BLKIF__BACKEND__COMMON_H__
1682 +
1683 +#include <linux/version.h>
1684 +#include <linux/module.h>
1685 +#include <linux/interrupt.h>
1686 +#include <linux/slab.h>
1687 +#include <linux/blkdev.h>
1688 +#include <linux/vmalloc.h>
1689 +#include <linux/wait.h>
1690 +#include <asm/io.h>
1691 +#include <asm/setup.h>
1692 +#include <asm/pgalloc.h>
1693 +#include <xen/evtchn.h>
1694 +#include <asm/hypervisor.h>
1695 +#include <xen/blkif.h>
1696 +#include <xen/gnttab.h>
1697 +#include <xen/driver_util.h>
1698 +#include <xen/xenbus.h>
1699 +
1700 +#define DPRINTK(_f, _a...) \
1701 + pr_debug("(file=%s, line=%d) " _f, \
1702 + __FILE__ , __LINE__ , ## _a )
1703 +
1704 +struct vbd {
1705 + blkif_vdev_t handle; /* what the domain refers to this vbd as */
1706 + unsigned char readonly; /* Non-zero -> read-only */
1707 + unsigned char type; /* VDISK_xxx */
1708 + u32 pdevice; /* phys device that this vbd maps to */
1709 + struct block_device *bdev;
1710 +};
1711 +
1712 +struct backend_info;
1713 +
1714 +typedef struct blkif_st {
1715 + /* Unique identifier for this interface. */
1716 + domid_t domid;
1717 + unsigned int handle;
1718 + /* Physical parameters of the comms window. */
1719 + unsigned int irq;
1720 + /* Comms information. */
1721 + enum blkif_protocol blk_protocol;
1722 + blkif_back_rings_t blk_rings;
1723 + struct vm_struct *blk_ring_area;
1724 + /* The VBD attached to this interface. */
1725 + struct vbd vbd;
1726 + /* Back pointer to the backend_info. */
1727 + struct backend_info *be;
1728 + /* Private fields. */
1729 + spinlock_t blk_ring_lock;
1730 + atomic_t refcnt;
1731 +
1732 + wait_queue_head_t wq;
1733 + struct task_struct *xenblkd;
1734 + unsigned int waiting_reqs;
1735 + request_queue_t *plug;
1736 +
1737 + /* statistics */
1738 + unsigned long st_print;
1739 + int st_rd_req;
1740 + int st_wr_req;
1741 + int st_oo_req;
1742 + int st_br_req;
1743 + int st_rd_sect;
1744 + int st_wr_sect;
1745 +
1746 + wait_queue_head_t waiting_to_free;
1747 +
1748 + grant_handle_t shmem_handle;
1749 + grant_ref_t shmem_ref;
1750 +} blkif_t;
1751 +
1752 +blkif_t *blkif_alloc(domid_t domid);
1753 +void blkif_disconnect(blkif_t *blkif);
1754 +void blkif_free(blkif_t *blkif);
1755 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
1756 +
1757 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
1758 +#define blkif_put(_b) \
1759 + do { \
1760 + if (atomic_dec_and_test(&(_b)->refcnt)) \
1761 + wake_up(&(_b)->waiting_to_free);\
1762 + } while (0)
1763 +
1764 +/* Create a vbd. */
1765 +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
1766 + unsigned minor, int readonly, int cdrom);
1767 +void vbd_free(struct vbd *vbd);
1768 +
1769 +unsigned long long vbd_size(struct vbd *vbd);
1770 +unsigned int vbd_info(struct vbd *vbd);
1771 +unsigned long vbd_secsize(struct vbd *vbd);
1772 +
1773 +struct phys_req {
1774 + unsigned short dev;
1775 + unsigned short nr_sects;
1776 + struct block_device *bdev;
1777 + blkif_sector_t sector_number;
1778 +};
1779 +
1780 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
1781 +
1782 +void blkif_interface_init(void);
1783 +
1784 +void blkif_xenbus_init(void);
1785 +
1786 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
1787 +int blkif_schedule(void *arg);
1788 +
1789 +int blkback_barrier(struct xenbus_transaction xbt,
1790 + struct backend_info *be, int state);
1791 +
1792 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
1793 Index: head-2008-11-25/drivers/xen/blkback/interface.c
1794 ===================================================================
1795 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1796 +++ head-2008-11-25/drivers/xen/blkback/interface.c 2007-06-12 13:13:44.000000000 +0200
1797 @@ -0,0 +1,181 @@
1798 +/******************************************************************************
1799 + * arch/xen/drivers/blkif/backend/interface.c
1800 + *
1801 + * Block-device interface management.
1802 + *
1803 + * Copyright (c) 2004, Keir Fraser
1804 + *
1805 + * This program is free software; you can redistribute it and/or
1806 + * modify it under the terms of the GNU General Public License version 2
1807 + * as published by the Free Software Foundation; or, when distributed
1808 + * separately from the Linux kernel or incorporated into other
1809 + * software packages, subject to the following license:
1810 + *
1811 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1812 + * of this source file (the "Software"), to deal in the Software without
1813 + * restriction, including without limitation the rights to use, copy, modify,
1814 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1815 + * and to permit persons to whom the Software is furnished to do so, subject to
1816 + * the following conditions:
1817 + *
1818 + * The above copyright notice and this permission notice shall be included in
1819 + * all copies or substantial portions of the Software.
1820 + *
1821 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1822 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1823 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1824 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1825 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1826 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1827 + * IN THE SOFTWARE.
1828 + */
1829 +
1830 +#include "common.h"
1831 +#include <xen/evtchn.h>
1832 +#include <linux/kthread.h>
1833 +
1834 +static kmem_cache_t *blkif_cachep;
1835 +
1836 +blkif_t *blkif_alloc(domid_t domid)
1837 +{
1838 + blkif_t *blkif;
1839 +
1840 + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
1841 + if (!blkif)
1842 + return ERR_PTR(-ENOMEM);
1843 +
1844 + memset(blkif, 0, sizeof(*blkif));
1845 + blkif->domid = domid;
1846 + spin_lock_init(&blkif->blk_ring_lock);
1847 + atomic_set(&blkif->refcnt, 1);
1848 + init_waitqueue_head(&blkif->wq);
1849 + blkif->st_print = jiffies;
1850 + init_waitqueue_head(&blkif->waiting_to_free);
1851 +
1852 + return blkif;
1853 +}
1854 +
1855 +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
1856 +{
1857 + struct gnttab_map_grant_ref op;
1858 +
1859 + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
1860 + GNTMAP_host_map, shared_page, blkif->domid);
1861 +
1862 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
1863 + BUG();
1864 +
1865 + if (op.status) {
1866 + DPRINTK(" Grant table operation failure !\n");
1867 + return op.status;
1868 + }
1869 +
1870 + blkif->shmem_ref = shared_page;
1871 + blkif->shmem_handle = op.handle;
1872 +
1873 + return 0;
1874 +}
1875 +
1876 +static void unmap_frontend_page(blkif_t *blkif)
1877 +{
1878 + struct gnttab_unmap_grant_ref op;
1879 +
1880 + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
1881 + GNTMAP_host_map, blkif->shmem_handle);
1882 +
1883 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
1884 + BUG();
1885 +}
1886 +
1887 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
1888 +{
1889 + int err;
1890 +
1891 + /* Already connected through? */
1892 + if (blkif->irq)
1893 + return 0;
1894 +
1895 + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
1896 + return -ENOMEM;
1897 +
1898 + err = map_frontend_page(blkif, shared_page);
1899 + if (err) {
1900 + free_vm_area(blkif->blk_ring_area);
1901 + return err;
1902 + }
1903 +
1904 + switch (blkif->blk_protocol) {
1905 + case BLKIF_PROTOCOL_NATIVE:
1906 + {
1907 + blkif_sring_t *sring;
1908 + sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
1909 + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
1910 + break;
1911 + }
1912 + case BLKIF_PROTOCOL_X86_32:
1913 + {
1914 + blkif_x86_32_sring_t *sring_x86_32;
1915 + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
1916 + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
1917 + break;
1918 + }
1919 + case BLKIF_PROTOCOL_X86_64:
1920 + {
1921 + blkif_x86_64_sring_t *sring_x86_64;
1922 + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
1923 + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
1924 + break;
1925 + }
1926 + default:
1927 + BUG();
1928 + }
1929 +
1930 + err = bind_interdomain_evtchn_to_irqhandler(
1931 + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
1932 + if (err < 0)
1933 + {
1934 + unmap_frontend_page(blkif);
1935 + free_vm_area(blkif->blk_ring_area);
1936 + blkif->blk_rings.common.sring = NULL;
1937 + return err;
1938 + }
1939 + blkif->irq = err;
1940 +
1941 + return 0;
1942 +}
1943 +
1944 +void blkif_disconnect(blkif_t *blkif)
1945 +{
1946 + if (blkif->xenblkd) {
1947 + kthread_stop(blkif->xenblkd);
1948 + blkif->xenblkd = NULL;
1949 + }
1950 +
1951 + atomic_dec(&blkif->refcnt);
1952 + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
1953 + atomic_inc(&blkif->refcnt);
1954 +
1955 + if (blkif->irq) {
1956 + unbind_from_irqhandler(blkif->irq, blkif);
1957 + blkif->irq = 0;
1958 + }
1959 +
1960 + if (blkif->blk_rings.common.sring) {
1961 + unmap_frontend_page(blkif);
1962 + free_vm_area(blkif->blk_ring_area);
1963 + blkif->blk_rings.common.sring = NULL;
1964 + }
1965 +}
1966 +
1967 +void blkif_free(blkif_t *blkif)
1968 +{
1969 + if (!atomic_dec_and_test(&blkif->refcnt))
1970 + BUG();
1971 + kmem_cache_free(blkif_cachep, blkif);
1972 +}
1973 +
1974 +void __init blkif_interface_init(void)
1975 +{
1976 + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
1977 + 0, 0, NULL, NULL);
1978 +}
1979 Index: head-2008-11-25/drivers/xen/blkback/vbd.c
1980 ===================================================================
1981 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1982 +++ head-2008-11-25/drivers/xen/blkback/vbd.c 2008-05-08 14:02:04.000000000 +0200
1983 @@ -0,0 +1,118 @@
1984 +/******************************************************************************
1985 + * blkback/vbd.c
1986 + *
1987 + * Routines for managing virtual block devices (VBDs).
1988 + *
1989 + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
1990 + *
1991 + * This program is free software; you can redistribute it and/or
1992 + * modify it under the terms of the GNU General Public License version 2
1993 + * as published by the Free Software Foundation; or, when distributed
1994 + * separately from the Linux kernel or incorporated into other
1995 + * software packages, subject to the following license:
1996 + *
1997 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1998 + * of this source file (the "Software"), to deal in the Software without
1999 + * restriction, including without limitation the rights to use, copy, modify,
2000 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2001 + * and to permit persons to whom the Software is furnished to do so, subject to
2002 + * the following conditions:
2003 + *
2004 + * The above copyright notice and this permission notice shall be included in
2005 + * all copies or substantial portions of the Software.
2006 + *
2007 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2008 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2009 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2010 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2011 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2012 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2013 + * IN THE SOFTWARE.
2014 + */
2015 +
2016 +#include "common.h"
2017 +
2018 +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
2019 + (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
2020 +
2021 +unsigned long long vbd_size(struct vbd *vbd)
2022 +{
2023 + return vbd_sz(vbd);
2024 +}
2025 +
2026 +unsigned int vbd_info(struct vbd *vbd)
2027 +{
2028 + return vbd->type | (vbd->readonly?VDISK_READONLY:0);
2029 +}
2030 +
2031 +unsigned long vbd_secsize(struct vbd *vbd)
2032 +{
2033 + return bdev_hardsect_size(vbd->bdev);
2034 +}
2035 +
2036 +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
2037 + unsigned minor, int readonly, int cdrom)
2038 +{
2039 + struct vbd *vbd;
2040 + struct block_device *bdev;
2041 +
2042 + vbd = &blkif->vbd;
2043 + vbd->handle = handle;
2044 + vbd->readonly = readonly;
2045 + vbd->type = 0;
2046 +
2047 + vbd->pdevice = MKDEV(major, minor);
2048 +
2049 + bdev = open_by_devnum(vbd->pdevice,
2050 + vbd->readonly ? FMODE_READ : FMODE_WRITE);
2051 +
2052 + if (IS_ERR(bdev)) {
2053 + DPRINTK("vbd_creat: device %08x could not be opened.\n",
2054 + vbd->pdevice);
2055 + return -ENOENT;
2056 + }
2057 +
2058 + vbd->bdev = bdev;
2059 +
2060 + if (vbd->bdev->bd_disk == NULL) {
2061 + DPRINTK("vbd_creat: device %08x doesn't exist.\n",
2062 + vbd->pdevice);
2063 + vbd_free(vbd);
2064 + return -ENOENT;
2065 + }
2066 +
2067 + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
2068 + vbd->type |= VDISK_CDROM;
2069 + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
2070 + vbd->type |= VDISK_REMOVABLE;
2071 +
2072 + DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
2073 + handle, blkif->domid);
2074 + return 0;
2075 +}
2076 +
2077 +void vbd_free(struct vbd *vbd)
2078 +{
2079 + if (vbd->bdev)
2080 + blkdev_put(vbd->bdev);
2081 + vbd->bdev = NULL;
2082 +}
2083 +
2084 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
2085 +{
2086 + struct vbd *vbd = &blkif->vbd;
2087 + int rc = -EACCES;
2088 +
2089 + if ((operation != READ) && vbd->readonly)
2090 + goto out;
2091 +
2092 + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
2093 + goto out;
2094 +
2095 + req->dev = vbd->pdevice;
2096 + req->bdev = vbd->bdev;
2097 + rc = 0;
2098 +
2099 + out:
2100 + return rc;
2101 +}
2102 Index: head-2008-11-25/drivers/xen/blkback/xenbus.c
2103 ===================================================================
2104 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2105 +++ head-2008-11-25/drivers/xen/blkback/xenbus.c 2008-05-08 14:02:04.000000000 +0200
2106 @@ -0,0 +1,541 @@
2107 +/* Xenbus code for blkif backend
2108 + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
2109 + Copyright (C) 2005 XenSource Ltd
2110 +
2111 + This program is free software; you can redistribute it and/or modify
2112 + it under the terms of the GNU General Public License as published by
2113 + the Free Software Foundation; either version 2 of the License, or
2114 + (at your option) any later version.
2115 +
2116 + This program is distributed in the hope that it will be useful,
2117 + but WITHOUT ANY WARRANTY; without even the implied warranty of
2118 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2119 + GNU General Public License for more details.
2120 +
2121 + You should have received a copy of the GNU General Public License
2122 + along with this program; if not, write to the Free Software
2123 + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2124 +*/
2125 +
2126 +#include <stdarg.h>
2127 +#include <linux/module.h>
2128 +#include <linux/kthread.h>
2129 +#include "common.h"
2130 +
2131 +#undef DPRINTK
2132 +#define DPRINTK(fmt, args...) \
2133 + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
2134 + __FUNCTION__, __LINE__, ##args)
2135 +
2136 +struct backend_info
2137 +{
2138 + struct xenbus_device *dev;
2139 + blkif_t *blkif;
2140 + struct xenbus_watch backend_watch;
2141 + unsigned major;
2142 + unsigned minor;
2143 + char *mode;
2144 +};
2145 +
2146 +static void connect(struct backend_info *);
2147 +static int connect_ring(struct backend_info *);
2148 +static void backend_changed(struct xenbus_watch *, const char **,
2149 + unsigned int);
2150 +
2151 +static int blkback_name(blkif_t *blkif, char *buf)
2152 +{
2153 + char *devpath, *devname;
2154 + struct xenbus_device *dev = blkif->be->dev;
2155 +
2156 + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
2157 + if (IS_ERR(devpath))
2158 + return PTR_ERR(devpath);
2159 +
2160 + if ((devname = strstr(devpath, "/dev/")) != NULL)
2161 + devname += strlen("/dev/");
2162 + else
2163 + devname = devpath;
2164 +
2165 + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
2166 + kfree(devpath);
2167 +
2168 + return 0;
2169 +}
2170 +
2171 +static void update_blkif_status(blkif_t *blkif)
2172 +{
2173 + int err;
2174 + char name[TASK_COMM_LEN];
2175 +
2176 + /* Not ready to connect? */
2177 + if (!blkif->irq || !blkif->vbd.bdev)
2178 + return;
2179 +
2180 + /* Already connected? */
2181 + if (blkif->be->dev->state == XenbusStateConnected)
2182 + return;
2183 +
2184 + /* Attempt to connect: exit if we fail to. */
2185 + connect(blkif->be);
2186 + if (blkif->be->dev->state != XenbusStateConnected)
2187 + return;
2188 +
2189 + err = blkback_name(blkif, name);
2190 + if (err) {
2191 + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
2192 + return;
2193 + }
2194 +
2195 + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
2196 + if (IS_ERR(blkif->xenblkd)) {
2197 + err = PTR_ERR(blkif->xenblkd);
2198 + blkif->xenblkd = NULL;
2199 + xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
2200 + }
2201 +}
2202 +
2203 +
2204 +/****************************************************************
2205 + * sysfs interface for VBD I/O requests
2206 + */
2207 +
2208 +#define VBD_SHOW(name, format, args...) \
2209 + static ssize_t show_##name(struct device *_dev, \
2210 + struct device_attribute *attr, \
2211 + char *buf) \
2212 + { \
2213 + struct xenbus_device *dev = to_xenbus_device(_dev); \
2214 + struct backend_info *be = dev->dev.driver_data; \
2215 + \
2216 + return sprintf(buf, format, ##args); \
2217 + } \
2218 + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
2219 +
2220 +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
2221 +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
2222 +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
2223 +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
2224 +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
2225 +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
2226 +
2227 +static struct attribute *vbdstat_attrs[] = {
2228 + &dev_attr_oo_req.attr,
2229 + &dev_attr_rd_req.attr,
2230 + &dev_attr_wr_req.attr,
2231 + &dev_attr_br_req.attr,
2232 + &dev_attr_rd_sect.attr,
2233 + &dev_attr_wr_sect.attr,
2234 + NULL
2235 +};
2236 +
2237 +static struct attribute_group vbdstat_group = {
2238 + .name = "statistics",
2239 + .attrs = vbdstat_attrs,
2240 +};
2241 +
2242 +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
2243 +VBD_SHOW(mode, "%s\n", be->mode);
2244 +
2245 +int xenvbd_sysfs_addif(struct xenbus_device *dev)
2246 +{
2247 + int error;
2248 +
2249 + error = device_create_file(&dev->dev, &dev_attr_physical_device);
2250 + if (error)
2251 + goto fail1;
2252 +
2253 + error = device_create_file(&dev->dev, &dev_attr_mode);
2254 + if (error)
2255 + goto fail2;
2256 +
2257 + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
2258 + if (error)
2259 + goto fail3;
2260 +
2261 + return 0;
2262 +
2263 +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2264 +fail2: device_remove_file(&dev->dev, &dev_attr_mode);
2265 +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
2266 + return error;
2267 +}
2268 +
2269 +void xenvbd_sysfs_delif(struct xenbus_device *dev)
2270 +{
2271 + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2272 + device_remove_file(&dev->dev, &dev_attr_mode);
2273 + device_remove_file(&dev->dev, &dev_attr_physical_device);
2274 +}
2275 +
2276 +static int blkback_remove(struct xenbus_device *dev)
2277 +{
2278 + struct backend_info *be = dev->dev.driver_data;
2279 +
2280 + DPRINTK("");
2281 +
2282 + if (be->major || be->minor)
2283 + xenvbd_sysfs_delif(dev);
2284 +
2285 + if (be->backend_watch.node) {
2286 + unregister_xenbus_watch(&be->backend_watch);
2287 + kfree(be->backend_watch.node);
2288 + be->backend_watch.node = NULL;
2289 + }
2290 +
2291 + if (be->blkif) {
2292 + blkif_disconnect(be->blkif);
2293 + vbd_free(&be->blkif->vbd);
2294 + blkif_free(be->blkif);
2295 + be->blkif = NULL;
2296 + }
2297 +
2298 + kfree(be);
2299 + dev->dev.driver_data = NULL;
2300 + return 0;
2301 +}
2302 +
2303 +int blkback_barrier(struct xenbus_transaction xbt,
2304 + struct backend_info *be, int state)
2305 +{
2306 + struct xenbus_device *dev = be->dev;
2307 + int err;
2308 +
2309 + err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
2310 + "%d", state);
2311 + if (err)
2312 + xenbus_dev_fatal(dev, err, "writing feature-barrier");
2313 +
2314 + return err;
2315 +}
2316 +
2317 +/**
2318 + * Entry point to this code when a new device is created. Allocate the basic
2319 + * structures, and watch the store waiting for the hotplug scripts to tell us
2320 + * the device's physical major and minor numbers. Switch to InitWait.
2321 + */
2322 +static int blkback_probe(struct xenbus_device *dev,
2323 + const struct xenbus_device_id *id)
2324 +{
2325 + int err;
2326 + struct backend_info *be = kzalloc(sizeof(struct backend_info),
2327 + GFP_KERNEL);
2328 + if (!be) {
2329 + xenbus_dev_fatal(dev, -ENOMEM,
2330 + "allocating backend structure");
2331 + return -ENOMEM;
2332 + }
2333 + be->dev = dev;
2334 + dev->dev.driver_data = be;
2335 +
2336 + be->blkif = blkif_alloc(dev->otherend_id);
2337 + if (IS_ERR(be->blkif)) {
2338 + err = PTR_ERR(be->blkif);
2339 + be->blkif = NULL;
2340 + xenbus_dev_fatal(dev, err, "creating block interface");
2341 + goto fail;
2342 + }
2343 +
2344 + /* setup back pointer */
2345 + be->blkif->be = be;
2346 +
2347 + err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
2348 + &be->backend_watch, backend_changed);
2349 + if (err)
2350 + goto fail;
2351 +
2352 + err = xenbus_switch_state(dev, XenbusStateInitWait);
2353 + if (err)
2354 + goto fail;
2355 +
2356 + return 0;
2357 +
2358 +fail:
2359 + DPRINTK("failed");
2360 + blkback_remove(dev);
2361 + return err;
2362 +}
2363 +
2364 +
2365 +/**
2366 + * Callback received when the hotplug scripts have placed the physical-device
2367 + * node. Read it and the mode node, and create a vbd. If the frontend is
2368 + * ready, connect.
2369 + */
2370 +static void backend_changed(struct xenbus_watch *watch,
2371 + const char **vec, unsigned int len)
2372 +{
2373 + int err;
2374 + unsigned major;
2375 + unsigned minor;
2376 + struct backend_info *be
2377 + = container_of(watch, struct backend_info, backend_watch);
2378 + struct xenbus_device *dev = be->dev;
2379 + int cdrom = 0;
2380 + char *device_type;
2381 +
2382 + DPRINTK("");
2383 +
2384 + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
2385 + &major, &minor);
2386 + if (XENBUS_EXIST_ERR(err)) {
2387 + /* Since this watch will fire once immediately after it is
2388 + registered, we expect this. Ignore it, and wait for the
2389 + hotplug scripts. */
2390 + return;
2391 + }
2392 + if (err != 2) {
2393 + xenbus_dev_fatal(dev, err, "reading physical-device");
2394 + return;
2395 + }
2396 +
2397 + if ((be->major || be->minor) &&
2398 + ((be->major != major) || (be->minor != minor))) {
2399 + printk(KERN_WARNING
2400 + "blkback: changing physical device (from %x:%x to "
2401 + "%x:%x) not supported.\n", be->major, be->minor,
2402 + major, minor);
2403 + return;
2404 + }
2405 +
2406 + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
2407 + if (IS_ERR(be->mode)) {
2408 + err = PTR_ERR(be->mode);
2409 + be->mode = NULL;
2410 + xenbus_dev_fatal(dev, err, "reading mode");
2411 + return;
2412 + }
2413 +
2414 + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
2415 + if (!IS_ERR(device_type)) {
2416 + cdrom = strcmp(device_type, "cdrom") == 0;
2417 + kfree(device_type);
2418 + }
2419 +
2420 + if (be->major == 0 && be->minor == 0) {
2421 + /* Front end dir is a number, which is used as the handle. */
2422 +
2423 + char *p = strrchr(dev->otherend, '/') + 1;
2424 + long handle = simple_strtoul(p, NULL, 0);
2425 +
2426 + be->major = major;
2427 + be->minor = minor;
2428 +
2429 + err = vbd_create(be->blkif, handle, major, minor,
2430 + (NULL == strchr(be->mode, 'w')), cdrom);
2431 + if (err) {
2432 + be->major = be->minor = 0;
2433 + xenbus_dev_fatal(dev, err, "creating vbd structure");
2434 + return;
2435 + }
2436 +
2437 + err = xenvbd_sysfs_addif(dev);
2438 + if (err) {
2439 + vbd_free(&be->blkif->vbd);
2440 + be->major = be->minor = 0;
2441 + xenbus_dev_fatal(dev, err, "creating sysfs entries");
2442 + return;
2443 + }
2444 +
2445 + /* We're potentially connected now */
2446 + update_blkif_status(be->blkif);
2447 + }
2448 +}
2449 +
2450 +
2451 +/**
2452 + * Callback received when the frontend's state changes.
2453 + */
2454 +static void frontend_changed(struct xenbus_device *dev,
2455 + enum xenbus_state frontend_state)
2456 +{
2457 + struct backend_info *be = dev->dev.driver_data;
2458 + int err;
2459 +
2460 + DPRINTK("%s", xenbus_strstate(frontend_state));
2461 +
2462 + switch (frontend_state) {
2463 + case XenbusStateInitialising:
2464 + if (dev->state == XenbusStateClosed) {
2465 + printk(KERN_INFO "%s: %s: prepare for reconnect\n",
2466 + __FUNCTION__, dev->nodename);
2467 + xenbus_switch_state(dev, XenbusStateInitWait);
2468 + }
2469 + break;
2470 +
2471 + case XenbusStateInitialised:
2472 + case XenbusStateConnected:
2473 + /* Ensure we connect even when two watches fire in
2474 + close successsion and we miss the intermediate value
2475 + of frontend_state. */
2476 + if (dev->state == XenbusStateConnected)
2477 + break;
2478 +
2479 + err = connect_ring(be);
2480 + if (err)
2481 + break;
2482 + update_blkif_status(be->blkif);
2483 + break;
2484 +
2485 + case XenbusStateClosing:
2486 + blkif_disconnect(be->blkif);
2487 + xenbus_switch_state(dev, XenbusStateClosing);
2488 + break;
2489 +
2490 + case XenbusStateClosed:
2491 + xenbus_switch_state(dev, XenbusStateClosed);
2492 + if (xenbus_dev_is_online(dev))
2493 + break;
2494 + /* fall through if not online */
2495 + case XenbusStateUnknown:
2496 + device_unregister(&dev->dev);
2497 + break;
2498 +
2499 + default:
2500 + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
2501 + frontend_state);
2502 + break;
2503 + }
2504 +}
2505 +
2506 +
2507 +/* ** Connection ** */
2508 +
2509 +
2510 +/**
2511 + * Write the physical details regarding the block device to the store, and
2512 + * switch to Connected state.
2513 + */
2514 +static void connect(struct backend_info *be)
2515 +{
2516 + struct xenbus_transaction xbt;
2517 + int err;
2518 + struct xenbus_device *dev = be->dev;
2519 +
2520 + DPRINTK("%s", dev->otherend);
2521 +
2522 + /* Supply the information about the device the frontend needs */
2523 +again:
2524 + err = xenbus_transaction_start(&xbt);
2525 + if (err) {
2526 + xenbus_dev_fatal(dev, err, "starting transaction");
2527 + return;
2528 + }
2529 +
2530 + err = blkback_barrier(xbt, be, 1);
2531 + if (err)
2532 + goto abort;
2533 +
2534 + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
2535 + vbd_size(&be->blkif->vbd));
2536 + if (err) {
2537 + xenbus_dev_fatal(dev, err, "writing %s/sectors",
2538 + dev->nodename);
2539 + goto abort;
2540 + }
2541 +
2542 + /* FIXME: use a typename instead */
2543 + err = xenbus_printf(xbt, dev->nodename, "info", "%u",
2544 + vbd_info(&be->blkif->vbd));
2545 + if (err) {
2546 + xenbus_dev_fatal(dev, err, "writing %s/info",
2547 + dev->nodename);
2548 + goto abort;
2549 + }
2550 + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
2551 + vbd_secsize(&be->blkif->vbd));
2552 + if (err) {
2553 + xenbus_dev_fatal(dev, err, "writing %s/sector-size",
2554 + dev->nodename);
2555 + goto abort;
2556 + }
2557 +
2558 + err = xenbus_transaction_end(xbt, 0);
2559 + if (err == -EAGAIN)
2560 + goto again;
2561 + if (err)
2562 + xenbus_dev_fatal(dev, err, "ending transaction");
2563 +
2564 + err = xenbus_switch_state(dev, XenbusStateConnected);
2565 + if (err)
2566 + xenbus_dev_fatal(dev, err, "switching to Connected state",
2567 + dev->nodename);
2568 +
2569 + return;
2570 + abort:
2571 + xenbus_transaction_end(xbt, 1);
2572 +}
2573 +
2574 +
2575 +static int connect_ring(struct backend_info *be)
2576 +{
2577 + struct xenbus_device *dev = be->dev;
2578 + unsigned long ring_ref;
2579 + unsigned int evtchn;
2580 + char protocol[64] = "";
2581 + int err;
2582 +
2583 + DPRINTK("%s", dev->otherend);
2584 +
2585 + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
2586 + "event-channel", "%u", &evtchn, NULL);
2587 + if (err) {
2588 + xenbus_dev_fatal(dev, err,
2589 + "reading %s/ring-ref and event-channel",
2590 + dev->otherend);
2591 + return err;
2592 + }
2593 +
2594 + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
2595 + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
2596 + "%63s", protocol, NULL);
2597 + if (err)
2598 + strcpy(protocol, "unspecified, assuming native");
2599 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
2600 + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
2601 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
2602 + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
2603 + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
2604 + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
2605 + else {
2606 + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
2607 + return -1;
2608 + }
2609 + printk(KERN_INFO
2610 + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
2611 + ring_ref, evtchn, be->blkif->blk_protocol, protocol);
2612 +
2613 + /* Map the shared frame, irq etc. */
2614 + err = blkif_map(be->blkif, ring_ref, evtchn);
2615 + if (err) {
2616 + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
2617 + ring_ref, evtchn);
2618 + return err;
2619 + }
2620 +
2621 + return 0;
2622 +}
2623 +
2624 +
2625 +/* ** Driver Registration ** */
2626 +
2627 +
2628 +static const struct xenbus_device_id blkback_ids[] = {
2629 + { "vbd" },
2630 + { "" }
2631 +};
2632 +
2633 +
2634 +static struct xenbus_driver blkback = {
2635 + .name = "vbd",
2636 + .owner = THIS_MODULE,
2637 + .ids = blkback_ids,
2638 + .probe = blkback_probe,
2639 + .remove = blkback_remove,
2640 + .otherend_changed = frontend_changed
2641 +};
2642 +
2643 +
2644 +void blkif_xenbus_init(void)
2645 +{
2646 + xenbus_register_backend(&blkback);
2647 +}
2648 Index: head-2008-11-25/drivers/xen/blkfront/Makefile
2649 ===================================================================
2650 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2651 +++ head-2008-11-25/drivers/xen/blkfront/Makefile 2007-06-12 13:13:44.000000000 +0200
2652 @@ -0,0 +1,5 @@
2653 +
2654 +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) := xenblk.o
2655 +
2656 +xenblk-objs := blkfront.o vbd.o
2657 +
2658 Index: head-2008-11-25/drivers/xen/blkfront/blkfront.c
2659 ===================================================================
2660 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
2661 +++ head-2008-11-25/drivers/xen/blkfront/blkfront.c 2008-08-07 12:44:36.000000000 +0200
2662 @@ -0,0 +1,936 @@
2663 +/******************************************************************************
2664 + * blkfront.c
2665 + *
2666 + * XenLinux virtual block-device driver.
2667 + *
2668 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
2669 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
2670 + * Copyright (c) 2004, Christian Limpach
2671 + * Copyright (c) 2004, Andrew Warfield
2672 + * Copyright (c) 2005, Christopher Clark
2673 + * Copyright (c) 2005, XenSource Ltd
2674 + *
2675 + * This program is free software; you can redistribute it and/or
2676 + * modify it under the terms of the GNU General Public License version 2
2677 + * as published by the Free Software Foundation; or, when distributed
2678 + * separately from the Linux kernel or incorporated into other
2679 + * software packages, subject to the following license:
2680 + *
2681 + * Permission is hereby granted, free of charge, to any person obtaining a copy
2682 + * of this source file (the "Software"), to deal in the Software without
2683 + * restriction, including without limitation the rights to use, copy, modify,
2684 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2685 + * and to permit persons to whom the Software is furnished to do so, subject to
2686 + * the following conditions:
2687 + *
2688 + * The above copyright notice and this permission notice shall be included in
2689 + * all copies or substantial portions of the Software.
2690 + *
2691 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2692 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2693 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2694 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2695 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2696 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2697 + * IN THE SOFTWARE.
2698 + */
2699 +
2700 +#include <linux/version.h>
2701 +#include "block.h"
2702 +#include <linux/cdrom.h>
2703 +#include <linux/sched.h>
2704 +#include <linux/interrupt.h>
2705 +#include <scsi/scsi.h>
2706 +#include <xen/evtchn.h>
2707 +#include <xen/xenbus.h>
2708 +#include <xen/interface/grant_table.h>
2709 +#include <xen/interface/io/protocols.h>
2710 +#include <xen/gnttab.h>
2711 +#include <asm/hypervisor.h>
2712 +#include <asm/maddr.h>
2713 +
2714 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
2715 +#include <xen/platform-compat.h>
2716 +#endif
2717 +
2718 +#define BLKIF_STATE_DISCONNECTED 0
2719 +#define BLKIF_STATE_CONNECTED 1
2720 +#define BLKIF_STATE_SUSPENDED 2
2721 +
2722 +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
2723 + (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
2724 +#define GRANT_INVALID_REF 0
2725 +
2726 +static void connect(struct blkfront_info *);
2727 +static void blkfront_closing(struct xenbus_device *);
2728 +static int blkfront_remove(struct xenbus_device *);
2729 +static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
2730 +static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
2731 +
2732 +static void kick_pending_request_queues(struct blkfront_info *);
2733 +
2734 +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
2735 +static void blkif_restart_queue(void *arg);
2736 +static void blkif_recover(struct blkfront_info *);
2737 +static void blkif_completion(struct blk_shadow *);
2738 +static void blkif_free(struct blkfront_info *, int);
2739 +
2740 +
2741 +/**
2742 + * Entry point to this code when a new device is created. Allocate the basic
2743 + * structures and the ring buffer for communication with the backend, and
2744 + * inform the backend of the appropriate details for those. Switch to
2745 + * Initialised state.
2746 + */
2747 +static int blkfront_probe(struct xenbus_device *dev,
2748 + const struct xenbus_device_id *id)
2749 +{
2750 + int err, vdevice, i;
2751 + struct blkfront_info *info;
2752 +
2753 + /* FIXME: Use dynamic device id if this is not set. */
2754 + err = xenbus_scanf(XBT_NIL, dev->nodename,
2755 + "virtual-device", "%i", &vdevice);
2756 + if (err != 1) {
2757 + /* go looking in the extended area instead */
2758 + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
2759 + "%i", &vdevice);
2760 + if (err != 1) {
2761 + xenbus_dev_fatal(dev, err, "reading virtual-device");
2762 + return err;
2763 + }
2764 + }
2765 +
2766 + info = kzalloc(sizeof(*info), GFP_KERNEL);
2767 + if (!info) {
2768 + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
2769 + return -ENOMEM;
2770 + }
2771 +
2772 + info->xbdev = dev;
2773 + info->vdevice = vdevice;
2774 + info->connected = BLKIF_STATE_DISCONNECTED;
2775 + INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
2776 +
2777 + for (i = 0; i < BLK_RING_SIZE; i++)
2778 + info->shadow[i].req.id = i+1;
2779 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
2780 +
2781 + /* Front end dir is a number, which is used as the id. */
2782 + info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
2783 + dev->dev.driver_data = info;
2784 +
2785 + err = talk_to_backend(dev, info);
2786 + if (err) {
2787 + kfree(info);
2788 + dev->dev.driver_data = NULL;
2789 + return err;
2790 + }
2791 +
2792 + return 0;
2793 +}
2794 +
2795 +
2796 +/**
2797 + * We are reconnecting to the backend, due to a suspend/resume, or a backend
2798 + * driver restart. We tear down our blkif structure and recreate it, but
2799 + * leave the device-layer structures intact so that this is transparent to the
2800 + * rest of the kernel.
2801 + */
2802 +static int blkfront_resume(struct xenbus_device *dev)
2803 +{
2804 + struct blkfront_info *info = dev->dev.driver_data;
2805 + int err;
2806 +
2807 + DPRINTK("blkfront_resume: %s\n", dev->nodename);
2808 +
2809 + blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
2810 +
2811 + err = talk_to_backend(dev, info);
2812 + if (info->connected == BLKIF_STATE_SUSPENDED && !err)
2813 + blkif_recover(info);
2814 +
2815 + return err;
2816 +}
2817 +
2818 +
2819 +/* Common code used when first setting up, and when resuming. */
2820 +static int talk_to_backend(struct xenbus_device *dev,
2821 + struct blkfront_info *info)
2822 +{
2823 + const char *message = NULL;
2824 + struct xenbus_transaction xbt;
2825 + int err;
2826 +
2827 + /* Create shared ring, alloc event channel. */
2828 + err = setup_blkring(dev, info);
2829 + if (err)
2830 + goto out;
2831 +
2832 +again:
2833 + err = xenbus_transaction_start(&xbt);
2834 + if (err) {
2835 + xenbus_dev_fatal(dev, err, "starting transaction");
2836 + goto destroy_blkring;
2837 + }
2838 +
2839 + err = xenbus_printf(xbt, dev->nodename,
2840 + "ring-ref","%u", info->ring_ref);
2841 + if (err) {
2842 + message = "writing ring-ref";
2843 + goto abort_transaction;
2844 + }
2845 + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
2846 + irq_to_evtchn_port(info->irq));
2847 + if (err) {
2848 + message = "writing event-channel";
2849 + goto abort_transaction;
2850 + }
2851 + err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
2852 + XEN_IO_PROTO_ABI_NATIVE);
2853 + if (err) {
2854 + message = "writing protocol";
2855 + goto abort_transaction;
2856 + }
2857 +
2858 + err = xenbus_transaction_end(xbt, 0);
2859 + if (err) {
2860 + if (err == -EAGAIN)
2861 + goto again;
2862 + xenbus_dev_fatal(dev, err, "completing transaction");
2863 + goto destroy_blkring;
2864 + }
2865 +
2866 + xenbus_switch_state(dev, XenbusStateInitialised);
2867 +
2868 + return 0;
2869 +
2870 + abort_transaction:
2871 + xenbus_transaction_end(xbt, 1);
2872 + if (message)
2873 + xenbus_dev_fatal(dev, err, "%s", message);
2874 + destroy_blkring:
2875 + blkif_free(info, 0);
2876 + out:
2877 + return err;
2878 +}
2879 +
2880 +
2881 +static int setup_blkring(struct xenbus_device *dev,
2882 + struct blkfront_info *info)
2883 +{
2884 + blkif_sring_t *sring;
2885 + int err;
2886 +
2887 + info->ring_ref = GRANT_INVALID_REF;
2888 +
2889 + sring = (blkif_sring_t *)__get_free_page(GFP_NOIO | __GFP_HIGH);
2890 + if (!sring) {
2891 + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
2892 + return -ENOMEM;
2893 + }
2894 + SHARED_RING_INIT(sring);
2895 + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
2896 +
2897 + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
2898 + if (err < 0) {
2899 + free_page((unsigned long)sring);
2900 + info->ring.sring = NULL;
2901 + goto fail;
2902 + }
2903 + info->ring_ref = err;
2904 +
2905 + err = bind_listening_port_to_irqhandler(
2906 + dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
2907 + if (err <= 0) {
2908 + xenbus_dev_fatal(dev, err,
2909 + "bind_listening_port_to_irqhandler");
2910 + goto fail;
2911 + }
2912 + info->irq = err;
2913 +
2914 + return 0;
2915 +fail:
2916 + blkif_free(info, 0);
2917 + return err;
2918 +}
2919 +
2920 +
2921 +/**
2922 + * Callback received when the backend's state changes.
2923 + */
2924 +static void backend_changed(struct xenbus_device *dev,
2925 + enum xenbus_state backend_state)
2926 +{
2927 + struct blkfront_info *info = dev->dev.driver_data;
2928 + struct block_device *bd;
2929 +
2930 + DPRINTK("blkfront:backend_changed.\n");
2931 +
2932 + switch (backend_state) {
2933 + case XenbusStateInitialising:
2934 + case XenbusStateInitWait:
2935 + case XenbusStateInitialised:
2936 + case XenbusStateReconfiguring:
2937 + case XenbusStateReconfigured:
2938 + case XenbusStateUnknown:
2939 + case XenbusStateClosed:
2940 + break;
2941 +
2942 + case XenbusStateConnected:
2943 + connect(info);
2944 + break;
2945 +
2946 + case XenbusStateClosing:
2947 + bd = bdget(info->dev);
2948 + if (bd == NULL)
2949 + xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
2950 +
2951 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
2952 + down(&bd->bd_sem);
2953 +#else
2954 + mutex_lock(&bd->bd_mutex);
2955 +#endif
2956 + if (info->users > 0)
2957 + xenbus_dev_error(dev, -EBUSY,
2958 + "Device in use; refusing to close");
2959 + else
2960 + blkfront_closing(dev);
2961 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
2962 + up(&bd->bd_sem);
2963 +#else
2964 + mutex_unlock(&bd->bd_mutex);
2965 +#endif
2966 + bdput(bd);
2967 + break;
2968 + }
2969 +}
2970 +
2971 +
2972 +/* ** Connection ** */
2973 +
2974 +
2975 +/*
2976 + * Invoked when the backend is finally 'ready' (and has told produced
2977 + * the details about the physical device - #sectors, size, etc).
2978 + */
2979 +static void connect(struct blkfront_info *info)
2980 +{
2981 + unsigned long long sectors;
2982 + unsigned long sector_size;
2983 + unsigned int binfo;
2984 + int err;
2985 +
2986 + if ((info->connected == BLKIF_STATE_CONNECTED) ||
2987 + (info->connected == BLKIF_STATE_SUSPENDED) )
2988 + return;
2989 +
2990 + DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
2991 +
2992 + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
2993 + "sectors", "%Lu", &sectors,
2994 + "info", "%u", &binfo,
2995 + "sector-size", "%lu", &sector_size,
2996 + NULL);
2997 + if (err) {
2998 + xenbus_dev_fatal(info->xbdev, err,
2999 + "reading backend fields at %s",
3000 + info->xbdev->otherend);
3001 + return;
3002 + }
3003 +
3004 + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
3005 + "feature-barrier", "%lu", &info->feature_barrier,
3006 + NULL);
3007 + if (err)
3008 + info->feature_barrier = 0;
3009 +
3010 + err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
3011 + if (err) {
3012 + xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
3013 + info->xbdev->otherend);
3014 + return;
3015 + }
3016 +
3017 + err = xlvbd_sysfs_addif(info);
3018 + if (err) {
3019 + xenbus_dev_fatal(info->xbdev, err, "xlvbd_sysfs_addif at %s",
3020 + info->xbdev->otherend);
3021 + return;
3022 + }
3023 +
3024 + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
3025 +
3026 + /* Kick pending requests. */
3027 + spin_lock_irq(&blkif_io_lock);
3028 + info->connected = BLKIF_STATE_CONNECTED;
3029 + kick_pending_request_queues(info);
3030 + spin_unlock_irq(&blkif_io_lock);
3031 +
3032 + add_disk(info->gd);
3033 +
3034 + info->is_ready = 1;
3035 +}
3036 +
3037 +/**
3038 + * Handle the change of state of the backend to Closing. We must delete our
3039 + * device-layer structures now, to ensure that writes are flushed through to
3040 + * the backend. Once is this done, we can switch to Closed in
3041 + * acknowledgement.
3042 + */
3043 +static void blkfront_closing(struct xenbus_device *dev)
3044 +{
3045 + struct blkfront_info *info = dev->dev.driver_data;
3046 + unsigned long flags;
3047 +
3048 + DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
3049 +
3050 + if (info->rq == NULL)
3051 + goto out;
3052 +
3053 + spin_lock_irqsave(&blkif_io_lock, flags);
3054 + /* No more blkif_request(). */
3055 + blk_stop_queue(info->rq);
3056 + /* No more gnttab callback work. */
3057 + gnttab_cancel_free_callback(&info->callback);
3058 + spin_unlock_irqrestore(&blkif_io_lock, flags);
3059 +
3060 + /* Flush gnttab callback work. Must be done with no locks held. */
3061 + flush_scheduled_work();
3062 +
3063 + xlvbd_sysfs_delif(info);
3064 +
3065 + xlvbd_del(info);
3066 +
3067 + out:
3068 + xenbus_frontend_closed(dev);
3069 +}
3070 +
3071 +
3072 +static int blkfront_remove(struct xenbus_device *dev)
3073 +{
3074 + struct blkfront_info *info = dev->dev.driver_data;
3075 +
3076 + DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
3077 +
3078 + blkif_free(info, 0);
3079 +
3080 + kfree(info);
3081 +
3082 + return 0;
3083 +}
3084 +
3085 +
3086 +static inline int GET_ID_FROM_FREELIST(
3087 + struct blkfront_info *info)
3088 +{
3089 + unsigned long free = info->shadow_free;
3090 + BUG_ON(free > BLK_RING_SIZE);
3091 + info->shadow_free = info->shadow[free].req.id;
3092 + info->shadow[free].req.id = 0x0fffffee; /* debug */
3093 + return free;
3094 +}
3095 +
3096 +static inline void ADD_ID_TO_FREELIST(
3097 + struct blkfront_info *info, unsigned long id)
3098 +{
3099 + info->shadow[id].req.id = info->shadow_free;
3100 + info->shadow[id].request = 0;
3101 + info->shadow_free = id;
3102 +}
3103 +
3104 +static inline void flush_requests(struct blkfront_info *info)
3105 +{
3106 + int notify;
3107 +
3108 + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
3109 +
3110 + if (notify)
3111 + notify_remote_via_irq(info->irq);
3112 +}
3113 +
3114 +static void kick_pending_request_queues(struct blkfront_info *info)
3115 +{
3116 + if (!RING_FULL(&info->ring)) {
3117 + /* Re-enable calldowns. */
3118 + blk_start_queue(info->rq);
3119 + /* Kick things off immediately. */
3120 + do_blkif_request(info->rq);
3121 + }
3122 +}
3123 +
3124 +static void blkif_restart_queue(void *arg)
3125 +{
3126 + struct blkfront_info *info = (struct blkfront_info *)arg;
3127 + spin_lock_irq(&blkif_io_lock);
3128 + if (info->connected == BLKIF_STATE_CONNECTED)
3129 + kick_pending_request_queues(info);
3130 + spin_unlock_irq(&blkif_io_lock);
3131 +}
3132 +
3133 +static void blkif_restart_queue_callback(void *arg)
3134 +{
3135 + struct blkfront_info *info = (struct blkfront_info *)arg;
3136 + schedule_work(&info->work);
3137 +}
3138 +
3139 +int blkif_open(struct inode *inode, struct file *filep)
3140 +{
3141 + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
3142 + info->users++;
3143 + return 0;
3144 +}
3145 +
3146 +
3147 +int blkif_release(struct inode *inode, struct file *filep)
3148 +{
3149 + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
3150 + info->users--;
3151 + if (info->users == 0) {
3152 + /* Check whether we have been instructed to close. We will
3153 + have ignored this request initially, as the device was
3154 + still mounted. */
3155 + struct xenbus_device * dev = info->xbdev;
3156 + enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
3157 +
3158 + if (state == XenbusStateClosing && info->is_ready)
3159 + blkfront_closing(dev);
3160 + }
3161 + return 0;
3162 +}
3163 +
3164 +
3165 +int blkif_ioctl(struct inode *inode, struct file *filep,
3166 + unsigned command, unsigned long argument)
3167 +{
3168 + int i;
3169 +
3170 + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
3171 + command, (long)argument, inode->i_rdev);
3172 +
3173 + switch (command) {
3174 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
3175 + case HDIO_GETGEO: {
3176 + struct block_device *bd = inode->i_bdev;
3177 + struct hd_geometry geo;
3178 + int ret;
3179 +
3180 + if (!argument)
3181 + return -EINVAL;
3182 +
3183 + geo.start = get_start_sect(bd);
3184 + ret = blkif_getgeo(bd, &geo);
3185 + if (ret)
3186 + return ret;
3187 +
3188 + if (copy_to_user((struct hd_geometry __user *)argument, &geo,
3189 + sizeof(geo)))
3190 + return -EFAULT;
3191 +
3192 + return 0;
3193 + }
3194 +#endif
3195 + case CDROMMULTISESSION:
3196 + DPRINTK("FIXME: support multisession CDs later\n");
3197 + for (i = 0; i < sizeof(struct cdrom_multisession); i++)
3198 + if (put_user(0, (char __user *)(argument + i)))
3199 + return -EFAULT;
3200 + return 0;
3201 +
3202 + case CDROM_GET_CAPABILITY: {
3203 + struct blkfront_info *info =
3204 + inode->i_bdev->bd_disk->private_data;
3205 + struct gendisk *gd = info->gd;
3206 + if (gd->flags & GENHD_FL_CD)
3207 + return 0;
3208 + return -EINVAL;
3209 + }
3210 + default:
3211 + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
3212 + command);*/
3213 + return -EINVAL; /* same return as native Linux */
3214 + }
3215 +
3216 + return 0;
3217 +}
3218 +
3219 +
3220 +int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
3221 +{
3222 + /* We don't have real geometry info, but let's at least return
3223 + values consistent with the size of the device */
3224 + sector_t nsect = get_capacity(bd->bd_disk);
3225 + sector_t cylinders = nsect;
3226 +
3227 + hg->heads = 0xff;
3228 + hg->sectors = 0x3f;
3229 + sector_div(cylinders, hg->heads * hg->sectors);
3230 + hg->cylinders = cylinders;
3231 + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
3232 + hg->cylinders = 0xffff;
3233 + return 0;
3234 +}
3235 +
3236 +
3237 +/*
3238 + * blkif_queue_request
3239 + *
3240 + * request block io
3241 + *
3242 + * id: for guest use only.
3243 + * operation: BLKIF_OP_{READ,WRITE,PROBE}
3244 + * buffer: buffer to read/write into. this should be a
3245 + * virtual address in the guest os.
3246 + */
3247 +static int blkif_queue_request(struct request *req)
3248 +{
3249 + struct blkfront_info *info = req->rq_disk->private_data;
3250 + unsigned long buffer_mfn;
3251 + blkif_request_t *ring_req;
3252 + struct bio *bio;
3253 + struct bio_vec *bvec;
3254 + int idx;
3255 + unsigned long id;
3256 + unsigned int fsect, lsect;
3257 + int ref;
3258 + grant_ref_t gref_head;
3259 +
3260 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
3261 + return 1;
3262 +
3263 + if (gnttab_alloc_grant_references(
3264 + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
3265 + gnttab_request_free_callback(
3266 + &info->callback,
3267 + blkif_restart_queue_callback,
3268 + info,
3269 + BLKIF_MAX_SEGMENTS_PER_REQUEST);
3270 + return 1;
3271 + }
3272 +
3273 + /* Fill out a communications ring structure. */
3274 + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
3275 + id = GET_ID_FROM_FREELIST(info);
3276 + info->shadow[id].request = (unsigned long)req;
3277 +
3278 + ring_req->id = id;
3279 + ring_req->sector_number = (blkif_sector_t)req->sector;
3280 + ring_req->handle = info->handle;
3281 +
3282 + ring_req->operation = rq_data_dir(req) ?
3283 + BLKIF_OP_WRITE : BLKIF_OP_READ;
3284 + if (blk_barrier_rq(req))
3285 + ring_req->operation = BLKIF_OP_WRITE_BARRIER;
3286 +
3287 + ring_req->nr_segments = 0;
3288 + rq_for_each_bio (bio, req) {
3289 + bio_for_each_segment (bvec, bio, idx) {
3290 + BUG_ON(ring_req->nr_segments
3291 + == BLKIF_MAX_SEGMENTS_PER_REQUEST);
3292 + buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
3293 + fsect = bvec->bv_offset >> 9;
3294 + lsect = fsect + (bvec->bv_len >> 9) - 1;
3295 + /* install a grant reference. */
3296 + ref = gnttab_claim_grant_reference(&gref_head);
3297 + BUG_ON(ref == -ENOSPC);
3298 +
3299 + gnttab_grant_foreign_access_ref(
3300 + ref,
3301 + info->xbdev->otherend_id,
3302 + buffer_mfn,
3303 + rq_data_dir(req) ? GTF_readonly : 0 );
3304 +
3305 + info->shadow[id].frame[ring_req->nr_segments] =
3306 + mfn_to_pfn(buffer_mfn);
3307 +
3308 + ring_req->seg[ring_req->nr_segments] =
3309 + (struct blkif_request_segment) {
3310 + .gref = ref,
3311 + .first_sect = fsect,
3312 + .last_sect = lsect };
3313 +
3314 + ring_req->nr_segments++;
3315 + }
3316 + }
3317 +
3318 + info->ring.req_prod_pvt++;
3319 +
3320 + /* Keep a private copy so we can reissue requests when recovering. */
3321 + info->shadow[id].req = *ring_req;
3322 +
3323 + gnttab_free_grant_references(gref_head);
3324 +
3325 + return 0;
3326 +}
3327 +
3328 +/*
3329 + * do_blkif_request
3330 + * read a block; request is in a request queue
3331 + */
3332 +void do_blkif_request(request_queue_t *rq)
3333 +{
3334 + struct blkfront_info *info = NULL;
3335 + struct request *req;
3336 + int queued;
3337 +
3338 + DPRINTK("Entered do_blkif_request\n");
3339 +
3340 + queued = 0;
3341 +
3342 + while ((req = elv_next_request(rq)) != NULL) {
3343 + info = req->rq_disk->private_data;
3344 + if (!blk_fs_request(req)) {
3345 + end_request(req, 0);
3346 + continue;
3347 + }
3348 +
3349 + if (RING_FULL(&info->ring))
3350 + goto wait;
3351 +
3352 + DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
3353 + "(%u/%li) buffer:%p [%s]\n",
3354 + req, req->cmd, (long long)req->sector,
3355 + req->current_nr_sectors,
3356 + req->nr_sectors, req->buffer,
3357 + rq_data_dir(req) ? "write" : "read");
3358 +
3359 +
3360 + blkdev_dequeue_request(req);
3361 + if (blkif_queue_request(req)) {
3362 + blk_requeue_request(rq, req);
3363 + wait:
3364 + /* Avoid pointless unplugs. */
3365 + blk_stop_queue(rq);
3366 + break;
3367 + }
3368 +
3369 + queued++;
3370 + }
3371 +
3372 + if (queued != 0)
3373 + flush_requests(info);
3374 +}
3375 +
3376 +
3377 +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
3378 +{
3379 + struct request *req;
3380 + blkif_response_t *bret;
3381 + RING_IDX i, rp;
3382 + unsigned long flags;
3383 + struct blkfront_info *info = (struct blkfront_info *)dev_id;
3384 + int uptodate;
3385 +
3386 + spin_lock_irqsave(&blkif_io_lock, flags);
3387 +
3388 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
3389 + spin_unlock_irqrestore(&blkif_io_lock, flags);
3390 + return IRQ_HANDLED;
3391 + }
3392 +
3393 + again:
3394 + rp = info->ring.sring->rsp_prod;
3395 + rmb(); /* Ensure we see queued responses up to 'rp'. */
3396 +
3397 + for (i = info->ring.rsp_cons; i != rp; i++) {
3398 + unsigned long id;
3399 + int ret;
3400 +
3401 + bret = RING_GET_RESPONSE(&info->ring, i);
3402 + id = bret->id;
3403 + req = (struct request *)info->shadow[id].request;
3404 +
3405 + blkif_completion(&info->shadow[id]);
3406 +
3407 + ADD_ID_TO_FREELIST(info, id);
3408 +
3409 + uptodate = (bret->status == BLKIF_RSP_OKAY);
3410 + switch (bret->operation) {
3411 + case BLKIF_OP_WRITE_BARRIER:
3412 + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
3413 + printk("blkfront: %s: write barrier op failed\n",
3414 + info->gd->disk_name);
3415 + uptodate = -EOPNOTSUPP;
3416 + info->feature_barrier = 0;
3417 + xlvbd_barrier(info);
3418 + }
3419 + /* fall through */
3420 + case BLKIF_OP_READ:
3421 + case BLKIF_OP_WRITE:
3422 + if (unlikely(bret->status != BLKIF_RSP_OKAY))
3423 + DPRINTK("Bad return from blkdev data "
3424 + "request: %x\n", bret->status);
3425 +
3426 + ret = end_that_request_first(req, uptodate,
3427 + req->hard_nr_sectors);
3428 + BUG_ON(ret);
3429 + end_that_request_last(req, uptodate);
3430 + break;
3431 + default:
3432 + BUG();
3433 + }
3434 + }
3435 +
3436 + info->ring.rsp_cons = i;
3437 +
3438 + if (i != info->ring.req_prod_pvt) {
3439 + int more_to_do;
3440 + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
3441 + if (more_to_do)
3442 + goto again;
3443 + } else
3444 + info->ring.sring->rsp_event = i + 1;
3445 +
3446 + kick_pending_request_queues(info);
3447 +
3448 + spin_unlock_irqrestore(&blkif_io_lock, flags);
3449 +
3450 + return IRQ_HANDLED;
3451 +}
3452 +
3453 +static void blkif_free(struct blkfront_info *info, int suspend)
3454 +{
3455 + /* Prevent new requests being issued until we fix things up. */
3456 + spin_lock_irq(&blkif_io_lock);
3457 + info->connected = suspend ?
3458 + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
3459 + /* No more blkif_request(). */
3460 + if (info->rq)
3461 + blk_stop_queue(info->rq);
3462 + /* No more gnttab callback work. */
3463 + gnttab_cancel_free_callback(&info->callback);
3464 + spin_unlock_irq(&blkif_io_lock);
3465 +
3466 + /* Flush gnttab callback work. Must be done with no locks held. */
3467 + flush_scheduled_work();
3468 +
3469 + /* Free resources associated with old device channel. */
3470 + if (info->ring_ref != GRANT_INVALID_REF) {
3471 + gnttab_end_foreign_access(info->ring_ref,
3472 + (unsigned long)info->ring.sring);
3473 + info->ring_ref = GRANT_INVALID_REF;
3474 + info->ring.sring = NULL;
3475 + }
3476 + if (info->irq)
3477 + unbind_from_irqhandler(info->irq, info);
3478 + info->irq = 0;
3479 +}
3480 +
3481 +static void blkif_completion(struct blk_shadow *s)
3482 +{
3483 + int i;
3484 + for (i = 0; i < s->req.nr_segments; i++)
3485 + gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
3486 +}
3487 +
3488 +static void blkif_recover(struct blkfront_info *info)
3489 +{
3490 + int i;
3491 + blkif_request_t *req;
3492 + struct blk_shadow *copy;
3493 + int j;
3494 +
3495 + /* Stage 1: Make a safe copy of the shadow state. */
3496 + copy = kmalloc(sizeof(info->shadow), GFP_NOIO | __GFP_NOFAIL | __GFP_HIGH);
3497 + memcpy(copy, info->shadow, sizeof(info->shadow));
3498 +
3499 + /* Stage 2: Set up free list. */
3500 + memset(&info->shadow, 0, sizeof(info->shadow));
3501 + for (i = 0; i < BLK_RING_SIZE; i++)
3502 + info->shadow[i].req.id = i+1;
3503 + info->shadow_free = info->ring.req_prod_pvt;
3504 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
3505 +
3506 + /* Stage 3: Find pending requests and requeue them. */
3507 + for (i = 0; i < BLK_RING_SIZE; i++) {
3508 + /* Not in use? */
3509 + if (copy[i].request == 0)
3510 + continue;
3511 +
3512 + /* Grab a request slot and copy shadow state into it. */
3513 + req = RING_GET_REQUEST(
3514 + &info->ring, info->ring.req_prod_pvt);
3515 + *req = copy[i].req;
3516 +
3517 + /* We get a new request id, and must reset the shadow state. */
3518 + req->id = GET_ID_FROM_FREELIST(info);
3519 + memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
3520 +
3521 + /* Rewrite any grant references invalidated by susp/resume. */
3522 + for (j = 0; j < req->nr_segments; j++)
3523 + gnttab_grant_foreign_access_ref(
3524 + req->seg[j].gref,
3525 + info->xbdev->otherend_id,
3526 + pfn_to_mfn(info->shadow[req->id].frame[j]),
3527 + rq_data_dir((struct request *)
3528 + info->shadow[req->id].request) ?
3529 + GTF_readonly : 0);
3530 + info->shadow[req->id].req = *req;
3531 +
3532 + info->ring.req_prod_pvt++;
3533 + }
3534 +
3535 + kfree(copy);
3536 +
3537 + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
3538 +
3539 + spin_lock_irq(&blkif_io_lock);
3540 +
3541 + /* Now safe for us to use the shared ring */
3542 + info->connected = BLKIF_STATE_CONNECTED;
3543 +
3544 + /* Send off requeued requests */
3545 + flush_requests(info);
3546 +
3547 + /* Kick any other new requests queued since we resumed */
3548 + kick_pending_request_queues(info);
3549 +
3550 + spin_unlock_irq(&blkif_io_lock);
3551 +}
3552 +
3553 +int blkfront_is_ready(struct xenbus_device *dev)
3554 +{
3555 + struct blkfront_info *info = dev->dev.driver_data;
3556 +
3557 + return info->is_ready;
3558 +}
3559 +
3560 +
3561 +/* ** Driver Registration ** */
3562 +
3563 +
3564 +static const struct xenbus_device_id blkfront_ids[] = {
3565 + { "vbd" },
3566 + { "" }
3567 +};
3568 +MODULE_ALIAS("xen:vbd");
3569 +
3570 +static struct xenbus_driver blkfront = {
3571 + .name = "vbd",
3572 + .owner = THIS_MODULE,
3573 + .ids = blkfront_ids,
3574 + .probe = blkfront_probe,
3575 + .remove = blkfront_remove,
3576 + .resume = blkfront_resume,
3577 + .otherend_changed = backend_changed,
3578 + .is_ready = blkfront_is_ready,
3579 +};
3580 +
3581 +
3582 +static int __init xlblk_init(void)
3583 +{
3584 + if (!is_running_on_xen())
3585 + return -ENODEV;
3586 +
3587 + return xenbus_register_frontend(&blkfront);
3588 +}
3589 +module_init(xlblk_init);
3590 +
3591 +
3592 +static void __exit xlblk_exit(void)
3593 +{
3594 + return xenbus_unregister_driver(&blkfront);
3595 +}
3596 +module_exit(xlblk_exit);
3597 +
3598 +MODULE_LICENSE("Dual BSD/GPL");
3599 Index: head-2008-11-25/drivers/xen/blkfront/block.h
3600 ===================================================================
3601 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
3602 +++ head-2008-11-25/drivers/xen/blkfront/block.h 2008-08-07 12:44:36.000000000 +0200
3603 @@ -0,0 +1,158 @@
3604 +/******************************************************************************
3605 + * block.h
3606 + *
3607 + * Shared definitions between all levels of XenLinux Virtual block devices.
3608 + *
3609 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
3610 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
3611 + * Copyright (c) 2004-2005, Christian Limpach
3612 + *
3613 + * This program is free software; you can redistribute it and/or
3614 + * modify it under the terms of the GNU General Public License version 2
3615 + * as published by the Free Software Foundation; or, when distributed
3616 + * separately from the Linux kernel or incorporated into other
3617 + * software packages, subject to the following license:
3618 + *
3619 + * Permission is hereby granted, free of charge, to any person obtaining a copy
3620 + * of this source file (the "Software"), to deal in the Software without
3621 + * restriction, including without limitation the rights to use, copy, modify,
3622 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
3623 + * and to permit persons to whom the Software is furnished to do so, subject to
3624 + * the following conditions:
3625 + *
3626 + * The above copyright notice and this permission notice shall be included in
3627 + * all copies or substantial portions of the Software.
3628 + *
3629 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3630 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3631 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3632 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3633 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
3634 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
3635 + * IN THE SOFTWARE.
3636 + */
3637 +
3638 +#ifndef __XEN_DRIVERS_BLOCK_H__
3639 +#define __XEN_DR