]>
Commit | Line | Data |
---|---|---|
cc90b958 BS |
1 | Subject: xen3 xen-drivers |
2 | From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) | |
3 | Patch-mainline: obsolete | |
4 | Acked-by: jbeulich@novell.com | |
5 | ||
6 | Index: head-2008-11-25/drivers/xen/balloon/Makefile | |
7 | =================================================================== | |
8 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
9 | +++ head-2008-11-25/drivers/xen/balloon/Makefile 2007-06-12 13:13:44.000000000 +0200 | |
10 | @@ -0,0 +1,2 @@ | |
11 | + | |
12 | +obj-y := balloon.o sysfs.o | |
13 | Index: head-2008-11-25/drivers/xen/balloon/balloon.c | |
14 | =================================================================== | |
15 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
16 | +++ head-2008-11-25/drivers/xen/balloon/balloon.c 2008-07-21 11:00:33.000000000 +0200 | |
17 | @@ -0,0 +1,724 @@ | |
18 | +/****************************************************************************** | |
19 | + * balloon.c | |
20 | + * | |
21 | + * Xen balloon driver - enables returning/claiming memory to/from Xen. | |
22 | + * | |
23 | + * Copyright (c) 2003, B Dragovic | |
24 | + * Copyright (c) 2003-2004, M Williamson, K Fraser | |
25 | + * Copyright (c) 2005 Dan M. Smith, IBM Corporation | |
26 | + * | |
27 | + * This program is free software; you can redistribute it and/or | |
28 | + * modify it under the terms of the GNU General Public License version 2 | |
29 | + * as published by the Free Software Foundation; or, when distributed | |
30 | + * separately from the Linux kernel or incorporated into other | |
31 | + * software packages, subject to the following license: | |
32 | + * | |
33 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
34 | + * of this source file (the "Software"), to deal in the Software without | |
35 | + * restriction, including without limitation the rights to use, copy, modify, | |
36 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
37 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
38 | + * the following conditions: | |
39 | + * | |
40 | + * The above copyright notice and this permission notice shall be included in | |
41 | + * all copies or substantial portions of the Software. | |
42 | + * | |
43 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
44 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
45 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
46 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
47 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
48 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
49 | + * IN THE SOFTWARE. | |
50 | + */ | |
51 | + | |
52 | +#include <linux/kernel.h> | |
53 | +#include <linux/module.h> | |
54 | +#include <linux/sched.h> | |
55 | +#include <linux/errno.h> | |
56 | +#include <linux/mm.h> | |
57 | +#include <linux/mman.h> | |
58 | +#include <linux/smp_lock.h> | |
59 | +#include <linux/pagemap.h> | |
60 | +#include <linux/bootmem.h> | |
61 | +#include <linux/highmem.h> | |
62 | +#include <linux/vmalloc.h> | |
63 | +#include <linux/mutex.h> | |
64 | +#include <xen/xen_proc.h> | |
65 | +#include <asm/hypervisor.h> | |
66 | +#include <xen/balloon.h> | |
67 | +#include <xen/interface/memory.h> | |
68 | +#include <asm/maddr.h> | |
69 | +#include <asm/page.h> | |
70 | +#include <asm/pgalloc.h> | |
71 | +#include <asm/pgtable.h> | |
72 | +#include <asm/uaccess.h> | |
73 | +#include <asm/tlb.h> | |
74 | +#include <linux/highmem.h> | |
75 | +#include <linux/list.h> | |
76 | +#include <xen/xenbus.h> | |
77 | +#include "common.h" | |
78 | + | |
79 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
80 | +#include <xen/platform-compat.h> | |
81 | +#endif | |
82 | + | |
83 | +#ifdef CONFIG_PROC_FS | |
84 | +static struct proc_dir_entry *balloon_pde; | |
85 | +#endif | |
86 | + | |
87 | +static DEFINE_MUTEX(balloon_mutex); | |
88 | + | |
89 | +/* | |
90 | + * Protects atomic reservation decrease/increase against concurrent increases. | |
91 | + * Also protects non-atomic updates of current_pages and driver_pages, and | |
92 | + * balloon lists. | |
93 | + */ | |
94 | +DEFINE_SPINLOCK(balloon_lock); | |
95 | + | |
96 | +struct balloon_stats balloon_stats; | |
97 | + | |
98 | +/* We increase/decrease in batches which fit in a page */ | |
99 | +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; | |
100 | + | |
101 | +/* VM /proc information for memory */ | |
102 | +extern unsigned long totalram_pages; | |
103 | + | |
104 | +#ifndef MODULE | |
105 | +extern unsigned long totalhigh_pages; | |
106 | +#define inc_totalhigh_pages() (totalhigh_pages++) | |
107 | +#define dec_totalhigh_pages() (totalhigh_pages--) | |
108 | +#else | |
109 | +#define inc_totalhigh_pages() ((void)0) | |
110 | +#define dec_totalhigh_pages() ((void)0) | |
111 | +#endif | |
112 | + | |
113 | +/* List of ballooned pages, threaded through the mem_map array. */ | |
114 | +static LIST_HEAD(ballooned_pages); | |
115 | + | |
116 | +/* Main work function, always executed in process context. */ | |
117 | +static void balloon_process(void *unused); | |
118 | +static DECLARE_WORK(balloon_worker, balloon_process, NULL); | |
119 | +static struct timer_list balloon_timer; | |
120 | + | |
121 | +/* When ballooning out (allocating memory to return to Xen) we don't really | |
122 | + want the kernel to try too hard since that can trigger the oom killer. */ | |
123 | +#define GFP_BALLOON \ | |
124 | + (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD) | |
125 | + | |
126 | +#define PAGE_TO_LIST(p) (&(p)->lru) | |
127 | +#define LIST_TO_PAGE(l) list_entry((l), struct page, lru) | |
128 | +#define UNLIST_PAGE(p) \ | |
129 | + do { \ | |
130 | + list_del(PAGE_TO_LIST(p)); \ | |
131 | + PAGE_TO_LIST(p)->next = NULL; \ | |
132 | + PAGE_TO_LIST(p)->prev = NULL; \ | |
133 | + } while(0) | |
134 | + | |
135 | +#define IPRINTK(fmt, args...) \ | |
136 | + printk(KERN_INFO "xen_mem: " fmt, ##args) | |
137 | +#define WPRINTK(fmt, args...) \ | |
138 | + printk(KERN_WARNING "xen_mem: " fmt, ##args) | |
139 | + | |
140 | +/* balloon_append: add the given page to the balloon. */ | |
141 | +static void balloon_append(struct page *page) | |
142 | +{ | |
143 | + /* Lowmem is re-populated first, so highmem pages go at list tail. */ | |
144 | + if (PageHighMem(page)) { | |
145 | + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); | |
146 | + bs.balloon_high++; | |
147 | + dec_totalhigh_pages(); | |
148 | + } else { | |
149 | + list_add(PAGE_TO_LIST(page), &ballooned_pages); | |
150 | + bs.balloon_low++; | |
151 | + } | |
152 | +} | |
153 | + | |
154 | +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ | |
155 | +static struct page *balloon_retrieve(void) | |
156 | +{ | |
157 | + struct page *page; | |
158 | + | |
159 | + if (list_empty(&ballooned_pages)) | |
160 | + return NULL; | |
161 | + | |
162 | + page = LIST_TO_PAGE(ballooned_pages.next); | |
163 | + UNLIST_PAGE(page); | |
164 | + | |
165 | + if (PageHighMem(page)) { | |
166 | + bs.balloon_high--; | |
167 | + inc_totalhigh_pages(); | |
168 | + } | |
169 | + else | |
170 | + bs.balloon_low--; | |
171 | + | |
172 | + return page; | |
173 | +} | |
174 | + | |
175 | +static struct page *balloon_first_page(void) | |
176 | +{ | |
177 | + if (list_empty(&ballooned_pages)) | |
178 | + return NULL; | |
179 | + return LIST_TO_PAGE(ballooned_pages.next); | |
180 | +} | |
181 | + | |
182 | +static struct page *balloon_next_page(struct page *page) | |
183 | +{ | |
184 | + struct list_head *next = PAGE_TO_LIST(page)->next; | |
185 | + if (next == &ballooned_pages) | |
186 | + return NULL; | |
187 | + return LIST_TO_PAGE(next); | |
188 | +} | |
189 | + | |
190 | +static inline void balloon_free_page(struct page *page) | |
191 | +{ | |
192 | +#ifndef MODULE | |
193 | + if (put_page_testzero(page)) | |
194 | + free_cold_page(page); | |
195 | +#else | |
196 | + /* free_cold_page() is not being exported. */ | |
197 | + __free_page(page); | |
198 | +#endif | |
199 | +} | |
200 | + | |
201 | +static void balloon_alarm(unsigned long unused) | |
202 | +{ | |
203 | + schedule_work(&balloon_worker); | |
204 | +} | |
205 | + | |
206 | +static unsigned long current_target(void) | |
207 | +{ | |
208 | + unsigned long target = min(bs.target_pages, bs.hard_limit); | |
209 | + if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) | |
210 | + target = bs.current_pages + bs.balloon_low + bs.balloon_high; | |
211 | + return target; | |
212 | +} | |
213 | + | |
214 | +static unsigned long minimum_target(void) | |
215 | +{ | |
216 | +#ifndef CONFIG_XEN | |
217 | +#define max_pfn num_physpages | |
218 | +#endif | |
219 | + unsigned long min_pages, curr_pages = current_target(); | |
220 | + | |
221 | +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) | |
222 | + /* Simple continuous piecewiese linear function: | |
223 | + * max MiB -> min MiB gradient | |
224 | + * 0 0 | |
225 | + * 16 16 | |
226 | + * 32 24 | |
227 | + * 128 72 (1/2) | |
228 | + * 512 168 (1/4) | |
229 | + * 2048 360 (1/8) | |
230 | + * 8192 552 (1/32) | |
231 | + * 32768 1320 | |
232 | + * 131072 4392 | |
233 | + */ | |
234 | + if (max_pfn < MB2PAGES(128)) | |
235 | + min_pages = MB2PAGES(8) + (max_pfn >> 1); | |
236 | + else if (max_pfn < MB2PAGES(512)) | |
237 | + min_pages = MB2PAGES(40) + (max_pfn >> 2); | |
238 | + else if (max_pfn < MB2PAGES(2048)) | |
239 | + min_pages = MB2PAGES(104) + (max_pfn >> 3); | |
240 | + else | |
241 | + min_pages = MB2PAGES(296) + (max_pfn >> 5); | |
242 | +#undef MB2PAGES | |
243 | + | |
244 | + /* Don't enforce growth */ | |
245 | + return min(min_pages, curr_pages); | |
246 | +#ifndef CONFIG_XEN | |
247 | +#undef max_pfn | |
248 | +#endif | |
249 | +} | |
250 | + | |
251 | +static int increase_reservation(unsigned long nr_pages) | |
252 | +{ | |
253 | + unsigned long pfn, i, flags; | |
254 | + struct page *page; | |
255 | + long rc; | |
256 | + struct xen_memory_reservation reservation = { | |
257 | + .address_bits = 0, | |
258 | + .extent_order = 0, | |
259 | + .domid = DOMID_SELF | |
260 | + }; | |
261 | + | |
262 | + if (nr_pages > ARRAY_SIZE(frame_list)) | |
263 | + nr_pages = ARRAY_SIZE(frame_list); | |
264 | + | |
265 | + balloon_lock(flags); | |
266 | + | |
267 | + page = balloon_first_page(); | |
268 | + for (i = 0; i < nr_pages; i++) { | |
269 | + BUG_ON(page == NULL); | |
270 | + frame_list[i] = page_to_pfn(page);; | |
271 | + page = balloon_next_page(page); | |
272 | + } | |
273 | + | |
274 | + set_xen_guest_handle(reservation.extent_start, frame_list); | |
275 | + reservation.nr_extents = nr_pages; | |
276 | + rc = HYPERVISOR_memory_op( | |
277 | + XENMEM_populate_physmap, &reservation); | |
278 | + if (rc < nr_pages) { | |
279 | + if (rc > 0) { | |
280 | + int ret; | |
281 | + | |
282 | + /* We hit the Xen hard limit: reprobe. */ | |
283 | + reservation.nr_extents = rc; | |
284 | + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
285 | + &reservation); | |
286 | + BUG_ON(ret != rc); | |
287 | + } | |
288 | + if (rc >= 0) | |
289 | + bs.hard_limit = (bs.current_pages + rc - | |
290 | + bs.driver_pages); | |
291 | + goto out; | |
292 | + } | |
293 | + | |
294 | + for (i = 0; i < nr_pages; i++) { | |
295 | + page = balloon_retrieve(); | |
296 | + BUG_ON(page == NULL); | |
297 | + | |
298 | + pfn = page_to_pfn(page); | |
299 | + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && | |
300 | + phys_to_machine_mapping_valid(pfn)); | |
301 | + | |
302 | + set_phys_to_machine(pfn, frame_list[i]); | |
303 | + | |
304 | +#ifdef CONFIG_XEN | |
305 | + /* Link back into the page tables if not highmem. */ | |
306 | + if (pfn < max_low_pfn) { | |
307 | + int ret; | |
308 | + ret = HYPERVISOR_update_va_mapping( | |
309 | + (unsigned long)__va(pfn << PAGE_SHIFT), | |
310 | + pfn_pte_ma(frame_list[i], PAGE_KERNEL), | |
311 | + 0); | |
312 | + BUG_ON(ret); | |
313 | + } | |
314 | +#endif | |
315 | + | |
316 | + /* Relinquish the page back to the allocator. */ | |
317 | + ClearPageReserved(page); | |
318 | + init_page_count(page); | |
319 | + balloon_free_page(page); | |
320 | + } | |
321 | + | |
322 | + bs.current_pages += nr_pages; | |
323 | + totalram_pages = bs.current_pages; | |
324 | + | |
325 | + out: | |
326 | + balloon_unlock(flags); | |
327 | + | |
328 | + return 0; | |
329 | +} | |
330 | + | |
331 | +static int decrease_reservation(unsigned long nr_pages) | |
332 | +{ | |
333 | + unsigned long pfn, i, flags; | |
334 | + struct page *page; | |
335 | + void *v; | |
336 | + int need_sleep = 0; | |
337 | + int ret; | |
338 | + struct xen_memory_reservation reservation = { | |
339 | + .address_bits = 0, | |
340 | + .extent_order = 0, | |
341 | + .domid = DOMID_SELF | |
342 | + }; | |
343 | + | |
344 | + if (nr_pages > ARRAY_SIZE(frame_list)) | |
345 | + nr_pages = ARRAY_SIZE(frame_list); | |
346 | + | |
347 | + for (i = 0; i < nr_pages; i++) { | |
348 | + if ((page = alloc_page(GFP_BALLOON)) == NULL) { | |
349 | + nr_pages = i; | |
350 | + need_sleep = 1; | |
351 | + break; | |
352 | + } | |
353 | + | |
354 | + pfn = page_to_pfn(page); | |
355 | + frame_list[i] = pfn_to_mfn(pfn); | |
356 | + | |
357 | + if (!PageHighMem(page)) { | |
358 | + v = phys_to_virt(pfn << PAGE_SHIFT); | |
359 | + scrub_pages(v, 1); | |
360 | +#ifdef CONFIG_XEN | |
361 | + ret = HYPERVISOR_update_va_mapping( | |
362 | + (unsigned long)v, __pte_ma(0), 0); | |
363 | + BUG_ON(ret); | |
364 | +#endif | |
365 | + } | |
366 | +#ifdef CONFIG_XEN_SCRUB_PAGES | |
367 | + else { | |
368 | + v = kmap(page); | |
369 | + scrub_pages(v, 1); | |
370 | + kunmap(page); | |
371 | + } | |
372 | +#endif | |
373 | + } | |
374 | + | |
375 | +#ifdef CONFIG_XEN | |
376 | + /* Ensure that ballooned highmem pages don't have kmaps. */ | |
377 | + kmap_flush_unused(); | |
378 | + flush_tlb_all(); | |
379 | +#endif | |
380 | + | |
381 | + balloon_lock(flags); | |
382 | + | |
383 | + /* No more mappings: invalidate P2M and add to balloon. */ | |
384 | + for (i = 0; i < nr_pages; i++) { | |
385 | + pfn = mfn_to_pfn(frame_list[i]); | |
386 | + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | |
387 | + balloon_append(pfn_to_page(pfn)); | |
388 | + } | |
389 | + | |
390 | + set_xen_guest_handle(reservation.extent_start, frame_list); | |
391 | + reservation.nr_extents = nr_pages; | |
392 | + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | |
393 | + BUG_ON(ret != nr_pages); | |
394 | + | |
395 | + bs.current_pages -= nr_pages; | |
396 | + totalram_pages = bs.current_pages; | |
397 | + | |
398 | + balloon_unlock(flags); | |
399 | + | |
400 | + return need_sleep; | |
401 | +} | |
402 | + | |
403 | +/* | |
404 | + * We avoid multiple worker processes conflicting via the balloon mutex. | |
405 | + * We may of course race updates of the target counts (which are protected | |
406 | + * by the balloon lock), or with changes to the Xen hard limit, but we will | |
407 | + * recover from these in time. | |
408 | + */ | |
409 | +static void balloon_process(void *unused) | |
410 | +{ | |
411 | + int need_sleep = 0; | |
412 | + long credit; | |
413 | + | |
414 | + mutex_lock(&balloon_mutex); | |
415 | + | |
416 | + do { | |
417 | + credit = current_target() - bs.current_pages; | |
418 | + if (credit > 0) | |
419 | + need_sleep = (increase_reservation(credit) != 0); | |
420 | + if (credit < 0) | |
421 | + need_sleep = (decrease_reservation(-credit) != 0); | |
422 | + | |
423 | +#ifndef CONFIG_PREEMPT | |
424 | + if (need_resched()) | |
425 | + schedule(); | |
426 | +#endif | |
427 | + } while ((credit != 0) && !need_sleep); | |
428 | + | |
429 | + /* Schedule more work if there is some still to be done. */ | |
430 | + if (current_target() != bs.current_pages) | |
431 | + mod_timer(&balloon_timer, jiffies + HZ); | |
432 | + | |
433 | + mutex_unlock(&balloon_mutex); | |
434 | +} | |
435 | + | |
436 | +/* Resets the Xen limit, sets new target, and kicks off processing. */ | |
437 | +void balloon_set_new_target(unsigned long target) | |
438 | +{ | |
439 | + /* No need for lock. Not read-modify-write updates. */ | |
440 | + bs.hard_limit = ~0UL; | |
441 | + bs.target_pages = max(target, minimum_target()); | |
442 | + schedule_work(&balloon_worker); | |
443 | +} | |
444 | + | |
445 | +static struct xenbus_watch target_watch = | |
446 | +{ | |
447 | + .node = "memory/target" | |
448 | +}; | |
449 | + | |
450 | +/* React to a change in the target key */ | |
451 | +static void watch_target(struct xenbus_watch *watch, | |
452 | + const char **vec, unsigned int len) | |
453 | +{ | |
454 | + unsigned long long new_target; | |
455 | + int err; | |
456 | + | |
457 | + err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | |
458 | + if (err != 1) { | |
459 | + /* This is ok (for domain0 at least) - so just return */ | |
460 | + return; | |
461 | + } | |
462 | + | |
463 | + /* The given memory/target value is in KiB, so it needs converting to | |
464 | + * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | |
465 | + */ | |
466 | + balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | |
467 | +} | |
468 | + | |
469 | +static int balloon_init_watcher(struct notifier_block *notifier, | |
470 | + unsigned long event, | |
471 | + void *data) | |
472 | +{ | |
473 | + int err; | |
474 | + | |
475 | + err = register_xenbus_watch(&target_watch); | |
476 | + if (err) | |
477 | + printk(KERN_ERR "Failed to set balloon watcher\n"); | |
478 | + | |
479 | + return NOTIFY_DONE; | |
480 | +} | |
481 | + | |
482 | +#ifdef CONFIG_PROC_FS | |
483 | +static int balloon_write(struct file *file, const char __user *buffer, | |
484 | + unsigned long count, void *data) | |
485 | +{ | |
486 | + char memstring[64], *endchar; | |
487 | + unsigned long long target_bytes; | |
488 | + | |
489 | + if (!capable(CAP_SYS_ADMIN)) | |
490 | + return -EPERM; | |
491 | + | |
492 | + if (count <= 1) | |
493 | + return -EBADMSG; /* runt */ | |
494 | + if (count > sizeof(memstring)) | |
495 | + return -EFBIG; /* too long */ | |
496 | + | |
497 | + if (copy_from_user(memstring, buffer, count)) | |
498 | + return -EFAULT; | |
499 | + memstring[sizeof(memstring)-1] = '\0'; | |
500 | + | |
501 | + target_bytes = memparse(memstring, &endchar); | |
502 | + balloon_set_new_target(target_bytes >> PAGE_SHIFT); | |
503 | + | |
504 | + return count; | |
505 | +} | |
506 | + | |
507 | +static int balloon_read(char *page, char **start, off_t off, | |
508 | + int count, int *eof, void *data) | |
509 | +{ | |
510 | + int len; | |
511 | + | |
512 | + len = sprintf( | |
513 | + page, | |
514 | + "Current allocation: %8lu kB\n" | |
515 | + "Requested target: %8lu kB\n" | |
516 | + "Low-mem balloon: %8lu kB\n" | |
517 | + "High-mem balloon: %8lu kB\n" | |
518 | + "Driver pages: %8lu kB\n" | |
519 | + "Xen hard limit: ", | |
520 | + PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages), | |
521 | + PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high), | |
522 | + PAGES2KB(bs.driver_pages)); | |
523 | + | |
524 | + if (bs.hard_limit != ~0UL) | |
525 | + len += sprintf(page + len, "%8lu kB\n", | |
526 | + PAGES2KB(bs.hard_limit)); | |
527 | + else | |
528 | + len += sprintf(page + len, " ??? kB\n"); | |
529 | + | |
530 | + *eof = 1; | |
531 | + return len; | |
532 | +} | |
533 | +#endif | |
534 | + | |
535 | +static struct notifier_block xenstore_notifier; | |
536 | + | |
537 | +static int __init balloon_init(void) | |
538 | +{ | |
539 | +#if defined(CONFIG_X86) && defined(CONFIG_XEN) | |
540 | + unsigned long pfn; | |
541 | + struct page *page; | |
542 | +#endif | |
543 | + | |
544 | + if (!is_running_on_xen()) | |
545 | + return -ENODEV; | |
546 | + | |
547 | + IPRINTK("Initialising balloon driver.\n"); | |
548 | + | |
549 | +#ifdef CONFIG_XEN | |
550 | + bs.current_pages = min(xen_start_info->nr_pages, max_pfn); | |
551 | + totalram_pages = bs.current_pages; | |
552 | +#else | |
553 | + bs.current_pages = totalram_pages; | |
554 | +#endif | |
555 | + bs.target_pages = bs.current_pages; | |
556 | + bs.balloon_low = 0; | |
557 | + bs.balloon_high = 0; | |
558 | + bs.driver_pages = 0UL; | |
559 | + bs.hard_limit = ~0UL; | |
560 | + | |
561 | + init_timer(&balloon_timer); | |
562 | + balloon_timer.data = 0; | |
563 | + balloon_timer.function = balloon_alarm; | |
564 | + | |
565 | +#ifdef CONFIG_PROC_FS | |
566 | + if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) { | |
567 | + WPRINTK("Unable to create /proc/xen/balloon.\n"); | |
568 | + return -1; | |
569 | + } | |
570 | + | |
571 | + balloon_pde->read_proc = balloon_read; | |
572 | + balloon_pde->write_proc = balloon_write; | |
573 | +#endif | |
574 | + balloon_sysfs_init(); | |
575 | + | |
576 | +#if defined(CONFIG_X86) && defined(CONFIG_XEN) | |
577 | + /* Initialise the balloon with excess memory space. */ | |
578 | + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { | |
579 | + page = pfn_to_page(pfn); | |
580 | + if (!PageReserved(page)) | |
581 | + balloon_append(page); | |
582 | + } | |
583 | +#endif | |
584 | + | |
585 | + target_watch.callback = watch_target; | |
586 | + xenstore_notifier.notifier_call = balloon_init_watcher; | |
587 | + | |
588 | + register_xenstore_notifier(&xenstore_notifier); | |
589 | + | |
590 | + return 0; | |
591 | +} | |
592 | + | |
593 | +subsys_initcall(balloon_init); | |
594 | + | |
595 | +static void __exit balloon_exit(void) | |
596 | +{ | |
597 | + /* XXX - release balloon here */ | |
598 | + return; | |
599 | +} | |
600 | + | |
601 | +module_exit(balloon_exit); | |
602 | + | |
603 | +void balloon_update_driver_allowance(long delta) | |
604 | +{ | |
605 | + unsigned long flags; | |
606 | + | |
607 | + balloon_lock(flags); | |
608 | + bs.driver_pages += delta; | |
609 | + balloon_unlock(flags); | |
610 | +} | |
611 | + | |
612 | +#ifdef CONFIG_XEN | |
613 | +static int dealloc_pte_fn( | |
614 | + pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | |
615 | +{ | |
616 | + unsigned long mfn = pte_mfn(*pte); | |
617 | + int ret; | |
618 | + struct xen_memory_reservation reservation = { | |
619 | + .nr_extents = 1, | |
620 | + .extent_order = 0, | |
621 | + .domid = DOMID_SELF | |
622 | + }; | |
623 | + set_xen_guest_handle(reservation.extent_start, &mfn); | |
624 | + set_pte_at(&init_mm, addr, pte, __pte_ma(0)); | |
625 | + set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); | |
626 | + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | |
627 | + BUG_ON(ret != 1); | |
628 | + return 0; | |
629 | +} | |
630 | +#endif | |
631 | + | |
632 | +struct page **alloc_empty_pages_and_pagevec(int nr_pages) | |
633 | +{ | |
634 | + unsigned long flags; | |
635 | + void *v; | |
636 | + struct page *page, **pagevec; | |
637 | + int i, ret; | |
638 | + | |
639 | + pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); | |
640 | + if (pagevec == NULL) | |
641 | + return NULL; | |
642 | + | |
643 | + for (i = 0; i < nr_pages; i++) { | |
644 | + page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD); | |
645 | + if (page == NULL) | |
646 | + goto err; | |
647 | + | |
648 | + v = page_address(page); | |
649 | + scrub_pages(v, 1); | |
650 | + | |
651 | + balloon_lock(flags); | |
652 | + | |
653 | + if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
654 | + unsigned long gmfn = page_to_pfn(page); | |
655 | + struct xen_memory_reservation reservation = { | |
656 | + .nr_extents = 1, | |
657 | + .extent_order = 0, | |
658 | + .domid = DOMID_SELF | |
659 | + }; | |
660 | + set_xen_guest_handle(reservation.extent_start, &gmfn); | |
661 | + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
662 | + &reservation); | |
663 | + if (ret == 1) | |
664 | + ret = 0; /* success */ | |
665 | + } else { | |
666 | +#ifdef CONFIG_XEN | |
667 | + ret = apply_to_page_range(&init_mm, (unsigned long)v, | |
668 | + PAGE_SIZE, dealloc_pte_fn, | |
669 | + NULL); | |
670 | +#else | |
671 | + /* Cannot handle non-auto translate mode. */ | |
672 | + ret = 1; | |
673 | +#endif | |
674 | + } | |
675 | + | |
676 | + if (ret != 0) { | |
677 | + balloon_unlock(flags); | |
678 | + balloon_free_page(page); | |
679 | + goto err; | |
680 | + } | |
681 | + | |
682 | + totalram_pages = --bs.current_pages; | |
683 | + | |
684 | + balloon_unlock(flags); | |
685 | + } | |
686 | + | |
687 | + out: | |
688 | + schedule_work(&balloon_worker); | |
689 | +#ifdef CONFIG_XEN | |
690 | + flush_tlb_all(); | |
691 | +#endif | |
692 | + return pagevec; | |
693 | + | |
694 | + err: | |
695 | + balloon_lock(flags); | |
696 | + while (--i >= 0) | |
697 | + balloon_append(pagevec[i]); | |
698 | + balloon_unlock(flags); | |
699 | + kfree(pagevec); | |
700 | + pagevec = NULL; | |
701 | + goto out; | |
702 | +} | |
703 | + | |
704 | +void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) | |
705 | +{ | |
706 | + unsigned long flags; | |
707 | + int i; | |
708 | + | |
709 | + if (pagevec == NULL) | |
710 | + return; | |
711 | + | |
712 | + balloon_lock(flags); | |
713 | + for (i = 0; i < nr_pages; i++) { | |
714 | + BUG_ON(page_count(pagevec[i]) != 1); | |
715 | + balloon_append(pagevec[i]); | |
716 | + } | |
717 | + balloon_unlock(flags); | |
718 | + | |
719 | + kfree(pagevec); | |
720 | + | |
721 | + schedule_work(&balloon_worker); | |
722 | +} | |
723 | + | |
724 | +void balloon_release_driver_page(struct page *page) | |
725 | +{ | |
726 | + unsigned long flags; | |
727 | + | |
728 | + balloon_lock(flags); | |
729 | + balloon_append(page); | |
730 | + bs.driver_pages--; | |
731 | + balloon_unlock(flags); | |
732 | + | |
733 | + schedule_work(&balloon_worker); | |
734 | +} | |
735 | + | |
736 | +EXPORT_SYMBOL_GPL(balloon_update_driver_allowance); | |
737 | +EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec); | |
738 | +EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec); | |
739 | +EXPORT_SYMBOL_GPL(balloon_release_driver_page); | |
740 | + | |
741 | +MODULE_LICENSE("Dual BSD/GPL"); | |
742 | Index: head-2008-11-25/drivers/xen/balloon/common.h | |
743 | =================================================================== | |
744 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
745 | +++ head-2008-11-25/drivers/xen/balloon/common.h 2007-06-12 13:13:44.000000000 +0200 | |
746 | @@ -0,0 +1,58 @@ | |
747 | +/****************************************************************************** | |
748 | + * balloon/common.h | |
749 | + * | |
750 | + * This program is free software; you can redistribute it and/or | |
751 | + * modify it under the terms of the GNU General Public License version 2 | |
752 | + * as published by the Free Software Foundation; or, when distributed | |
753 | + * separately from the Linux kernel or incorporated into other | |
754 | + * software packages, subject to the following license: | |
755 | + * | |
756 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
757 | + * of this source file (the "Software"), to deal in the Software without | |
758 | + * restriction, including without limitation the rights to use, copy, modify, | |
759 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
760 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
761 | + * the following conditions: | |
762 | + * | |
763 | + * The above copyright notice and this permission notice shall be included in | |
764 | + * all copies or substantial portions of the Software. | |
765 | + * | |
766 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
767 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
768 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
769 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
770 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
771 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
772 | + * IN THE SOFTWARE. | |
773 | + */ | |
774 | + | |
775 | +#ifndef __XEN_BALLOON_COMMON_H__ | |
776 | +#define __XEN_BALLOON_COMMON_H__ | |
777 | + | |
778 | +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | |
779 | + | |
780 | +struct balloon_stats { | |
781 | + /* We aim for 'current allocation' == 'target allocation'. */ | |
782 | + unsigned long current_pages; | |
783 | + unsigned long target_pages; | |
784 | + /* We may hit the hard limit in Xen. If we do then we remember it. */ | |
785 | + unsigned long hard_limit; | |
786 | + /* | |
787 | + * Drivers may alter the memory reservation independently, but they | |
788 | + * must inform the balloon driver so we avoid hitting the hard limit. | |
789 | + */ | |
790 | + unsigned long driver_pages; | |
791 | + /* Number of pages in high- and low-memory balloons. */ | |
792 | + unsigned long balloon_low; | |
793 | + unsigned long balloon_high; | |
794 | +}; | |
795 | + | |
796 | +extern struct balloon_stats balloon_stats; | |
797 | +#define bs balloon_stats | |
798 | + | |
799 | +int balloon_sysfs_init(void); | |
800 | +void balloon_sysfs_exit(void); | |
801 | + | |
802 | +void balloon_set_new_target(unsigned long target); | |
803 | + | |
804 | +#endif /* __XEN_BALLOON_COMMON_H__ */ | |
805 | Index: head-2008-11-25/drivers/xen/balloon/sysfs.c | |
806 | =================================================================== | |
807 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
808 | +++ head-2008-11-25/drivers/xen/balloon/sysfs.c 2008-04-02 12:34:02.000000000 +0200 | |
809 | @@ -0,0 +1,170 @@ | |
810 | +/****************************************************************************** | |
811 | + * balloon/sysfs.c | |
812 | + * | |
813 | + * Xen balloon driver - sysfs interfaces. | |
814 | + * | |
815 | + * This program is free software; you can redistribute it and/or | |
816 | + * modify it under the terms of the GNU General Public License version 2 | |
817 | + * as published by the Free Software Foundation; or, when distributed | |
818 | + * separately from the Linux kernel or incorporated into other | |
819 | + * software packages, subject to the following license: | |
820 | + * | |
821 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
822 | + * of this source file (the "Software"), to deal in the Software without | |
823 | + * restriction, including without limitation the rights to use, copy, modify, | |
824 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
825 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
826 | + * the following conditions: | |
827 | + * | |
828 | + * The above copyright notice and this permission notice shall be included in | |
829 | + * all copies or substantial portions of the Software. | |
830 | + * | |
831 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
832 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
833 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
834 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
835 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
836 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
837 | + * IN THE SOFTWARE. | |
838 | + */ | |
839 | + | |
840 | +#include <linux/capability.h> | |
841 | +#include <linux/errno.h> | |
842 | +#include <linux/stat.h> | |
843 | +#include <linux/string.h> | |
844 | +#include <linux/sysdev.h> | |
845 | +#include "common.h" | |
846 | + | |
847 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
848 | +#include <xen/platform-compat.h> | |
849 | +#endif | |
850 | + | |
851 | +#define BALLOON_CLASS_NAME "xen_memory" | |
852 | + | |
853 | +#define BALLOON_SHOW(name, format, args...) \ | |
854 | + static ssize_t show_##name(struct sys_device *dev, \ | |
855 | + char *buf) \ | |
856 | + { \ | |
857 | + return sprintf(buf, format, ##args); \ | |
858 | + } \ | |
859 | + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | |
860 | + | |
861 | +BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages)); | |
862 | +BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low)); | |
863 | +BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high)); | |
864 | +BALLOON_SHOW(hard_limit_kb, | |
865 | + (bs.hard_limit!=~0UL) ? "%lu\n" : "???\n", | |
866 | + (bs.hard_limit!=~0UL) ? PAGES2KB(bs.hard_limit) : 0); | |
867 | +BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages)); | |
868 | + | |
869 | +static ssize_t show_target_kb(struct sys_device *dev, char *buf) | |
870 | +{ | |
871 | + return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages)); | |
872 | +} | |
873 | + | |
874 | +static ssize_t store_target_kb(struct sys_device *dev, | |
875 | + const char *buf, | |
876 | + size_t count) | |
877 | +{ | |
878 | + char memstring[64], *endchar; | |
879 | + unsigned long long target_bytes; | |
880 | + | |
881 | + if (!capable(CAP_SYS_ADMIN)) | |
882 | + return -EPERM; | |
883 | + | |
884 | + if (count <= 1) | |
885 | + return -EBADMSG; /* runt */ | |
886 | + if (count > sizeof(memstring)) | |
887 | + return -EFBIG; /* too long */ | |
888 | + strcpy(memstring, buf); | |
889 | + | |
890 | + target_bytes = memparse(memstring, &endchar); | |
891 | + balloon_set_new_target(target_bytes >> PAGE_SHIFT); | |
892 | + | |
893 | + return count; | |
894 | +} | |
895 | + | |
896 | +static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | |
897 | + show_target_kb, store_target_kb); | |
898 | + | |
899 | +static struct sysdev_attribute *balloon_attrs[] = { | |
900 | + &attr_target_kb, | |
901 | +}; | |
902 | + | |
903 | +static struct attribute *balloon_info_attrs[] = { | |
904 | + &attr_current_kb.attr, | |
905 | + &attr_low_kb.attr, | |
906 | + &attr_high_kb.attr, | |
907 | + &attr_hard_limit_kb.attr, | |
908 | + &attr_driver_kb.attr, | |
909 | + NULL | |
910 | +}; | |
911 | + | |
912 | +static struct attribute_group balloon_info_group = { | |
913 | + .name = "info", | |
914 | + .attrs = balloon_info_attrs, | |
915 | +}; | |
916 | + | |
917 | +static struct sysdev_class balloon_sysdev_class = { | |
918 | + set_kset_name(BALLOON_CLASS_NAME), | |
919 | +}; | |
920 | + | |
921 | +static struct sys_device balloon_sysdev; | |
922 | + | |
923 | +static int register_balloon(struct sys_device *sysdev) | |
924 | +{ | |
925 | + int i, error; | |
926 | + | |
927 | + error = sysdev_class_register(&balloon_sysdev_class); | |
928 | + if (error) | |
929 | + return error; | |
930 | + | |
931 | + sysdev->id = 0; | |
932 | + sysdev->cls = &balloon_sysdev_class; | |
933 | + | |
934 | + error = sysdev_register(sysdev); | |
935 | + if (error) { | |
936 | + sysdev_class_unregister(&balloon_sysdev_class); | |
937 | + return error; | |
938 | + } | |
939 | + | |
940 | + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | |
941 | + error = sysdev_create_file(sysdev, balloon_attrs[i]); | |
942 | + if (error) | |
943 | + goto fail; | |
944 | + } | |
945 | + | |
946 | + error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | |
947 | + if (error) | |
948 | + goto fail; | |
949 | + | |
950 | + return 0; | |
951 | + | |
952 | + fail: | |
953 | + while (--i >= 0) | |
954 | + sysdev_remove_file(sysdev, balloon_attrs[i]); | |
955 | + sysdev_unregister(sysdev); | |
956 | + sysdev_class_unregister(&balloon_sysdev_class); | |
957 | + return error; | |
958 | +} | |
959 | + | |
960 | +static void unregister_balloon(struct sys_device *sysdev) | |
961 | +{ | |
962 | + int i; | |
963 | + | |
964 | + sysfs_remove_group(&sysdev->kobj, &balloon_info_group); | |
965 | + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) | |
966 | + sysdev_remove_file(sysdev, balloon_attrs[i]); | |
967 | + sysdev_unregister(sysdev); | |
968 | + sysdev_class_unregister(&balloon_sysdev_class); | |
969 | +} | |
970 | + | |
971 | +int balloon_sysfs_init(void) | |
972 | +{ | |
973 | + return register_balloon(&balloon_sysdev); | |
974 | +} | |
975 | + | |
976 | +void balloon_sysfs_exit(void) | |
977 | +{ | |
978 | + unregister_balloon(&balloon_sysdev); | |
979 | +} | |
980 | Index: head-2008-11-25/drivers/xen/blkback/Makefile | |
981 | =================================================================== | |
982 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
983 | +++ head-2008-11-25/drivers/xen/blkback/Makefile 2007-06-12 13:13:44.000000000 +0200 | |
984 | @@ -0,0 +1,3 @@ | |
985 | +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o | |
986 | + | |
987 | +blkbk-y := blkback.o xenbus.o interface.o vbd.o | |
988 | Index: head-2008-11-25/drivers/xen/blkback/blkback.c | |
989 | =================================================================== | |
990 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
991 | +++ head-2008-11-25/drivers/xen/blkback/blkback.c 2008-11-10 11:44:21.000000000 +0100 | |
992 | @@ -0,0 +1,656 @@ | |
993 | +/****************************************************************************** | |
994 | + * arch/xen/drivers/blkif/backend/main.c | |
995 | + * | |
996 | + * Back-end of the driver for virtual block devices. This portion of the | |
997 | + * driver exports a 'unified' block-device interface that can be accessed | |
998 | + * by any operating system that implements a compatible front end. A | |
999 | + * reference front-end implementation can be found in: | |
1000 | + * arch/xen/drivers/blkif/frontend | |
1001 | + * | |
1002 | + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | |
1003 | + * Copyright (c) 2005, Christopher Clark | |
1004 | + * | |
1005 | + * This program is free software; you can redistribute it and/or | |
1006 | + * modify it under the terms of the GNU General Public License version 2 | |
1007 | + * as published by the Free Software Foundation; or, when distributed | |
1008 | + * separately from the Linux kernel or incorporated into other | |
1009 | + * software packages, subject to the following license: | |
1010 | + * | |
1011 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
1012 | + * of this source file (the "Software"), to deal in the Software without | |
1013 | + * restriction, including without limitation the rights to use, copy, modify, | |
1014 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
1015 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
1016 | + * the following conditions: | |
1017 | + * | |
1018 | + * The above copyright notice and this permission notice shall be included in | |
1019 | + * all copies or substantial portions of the Software. | |
1020 | + * | |
1021 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
1022 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
1023 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
1024 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
1025 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
1026 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
1027 | + * IN THE SOFTWARE. | |
1028 | + */ | |
1029 | + | |
1030 | +#include <linux/spinlock.h> | |
1031 | +#include <linux/kthread.h> | |
1032 | +#include <linux/list.h> | |
1033 | +#include <linux/delay.h> | |
1034 | +#include <xen/balloon.h> | |
1035 | +#include <asm/hypervisor.h> | |
1036 | +#include "common.h" | |
1037 | + | |
1038 | +/* | |
1039 | + * These are rather arbitrary. They are fairly large because adjacent requests | |
1040 | + * pulled from a communication ring are quite likely to end up being part of | |
1041 | + * the same scatter/gather request at the disc. | |
1042 | + * | |
1043 | + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** | |
1044 | + * | |
1045 | + * This will increase the chances of being able to write whole tracks. | |
1046 | + * 64 should be enough to keep us competitive with Linux. | |
1047 | + */ | |
1048 | +static int blkif_reqs = 64; | |
1049 | +module_param_named(reqs, blkif_reqs, int, 0); | |
1050 | +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); | |
1051 | + | |
1052 | +/* Run-time switchable: /sys/module/blkback/parameters/ */ | |
1053 | +static unsigned int log_stats = 0; | |
1054 | +static unsigned int debug_lvl = 0; | |
1055 | +module_param(log_stats, int, 0644); | |
1056 | +module_param(debug_lvl, int, 0644); | |
1057 | + | |
1058 | +/* | |
1059 | + * Each outstanding request that we've passed to the lower device layers has a | |
1060 | + * 'pending_req' allocated to it. Each buffer_head that completes decrements | |
1061 | + * the pendcnt towards zero. When it hits zero, the specified domain has a | |
1062 | + * response queued for it, with the saved 'id' passed back. | |
1063 | + */ | |
1064 | +typedef struct { | |
1065 | + blkif_t *blkif; | |
1066 | + u64 id; | |
1067 | + int nr_pages; | |
1068 | + atomic_t pendcnt; | |
1069 | + unsigned short operation; | |
1070 | + int status; | |
1071 | + struct list_head free_list; | |
1072 | +} pending_req_t; | |
1073 | + | |
1074 | +static pending_req_t *pending_reqs; | |
1075 | +static struct list_head pending_free; | |
1076 | +static DEFINE_SPINLOCK(pending_free_lock); | |
1077 | +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); | |
1078 | + | |
1079 | +#define BLKBACK_INVALID_HANDLE (~0) | |
1080 | + | |
1081 | +static struct page **pending_pages; | |
1082 | +static grant_handle_t *pending_grant_handles; | |
1083 | + | |
1084 | +static inline int vaddr_pagenr(pending_req_t *req, int seg) | |
1085 | +{ | |
1086 | + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; | |
1087 | +} | |
1088 | + | |
1089 | +static inline unsigned long vaddr(pending_req_t *req, int seg) | |
1090 | +{ | |
1091 | + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); | |
1092 | + return (unsigned long)pfn_to_kaddr(pfn); | |
1093 | +} | |
1094 | + | |
1095 | +#define pending_handle(_req, _seg) \ | |
1096 | + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) | |
1097 | + | |
1098 | + | |
1099 | +static int do_block_io_op(blkif_t *blkif); | |
1100 | +static void dispatch_rw_block_io(blkif_t *blkif, | |
1101 | + blkif_request_t *req, | |
1102 | + pending_req_t *pending_req); | |
1103 | +static void make_response(blkif_t *blkif, u64 id, | |
1104 | + unsigned short op, int st); | |
1105 | + | |
1106 | +/****************************************************************** | |
1107 | + * misc small helpers | |
1108 | + */ | |
1109 | +static pending_req_t* alloc_req(void) | |
1110 | +{ | |
1111 | + pending_req_t *req = NULL; | |
1112 | + unsigned long flags; | |
1113 | + | |
1114 | + spin_lock_irqsave(&pending_free_lock, flags); | |
1115 | + if (!list_empty(&pending_free)) { | |
1116 | + req = list_entry(pending_free.next, pending_req_t, free_list); | |
1117 | + list_del(&req->free_list); | |
1118 | + } | |
1119 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
1120 | + return req; | |
1121 | +} | |
1122 | + | |
1123 | +static void free_req(pending_req_t *req) | |
1124 | +{ | |
1125 | + unsigned long flags; | |
1126 | + int was_empty; | |
1127 | + | |
1128 | + spin_lock_irqsave(&pending_free_lock, flags); | |
1129 | + was_empty = list_empty(&pending_free); | |
1130 | + list_add(&req->free_list, &pending_free); | |
1131 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
1132 | + if (was_empty) | |
1133 | + wake_up(&pending_free_wq); | |
1134 | +} | |
1135 | + | |
1136 | +static void unplug_queue(blkif_t *blkif) | |
1137 | +{ | |
1138 | + if (blkif->plug == NULL) | |
1139 | + return; | |
1140 | + if (blkif->plug->unplug_fn) | |
1141 | + blkif->plug->unplug_fn(blkif->plug); | |
1142 | + blk_put_queue(blkif->plug); | |
1143 | + blkif->plug = NULL; | |
1144 | +} | |
1145 | + | |
1146 | +static void plug_queue(blkif_t *blkif, struct block_device *bdev) | |
1147 | +{ | |
1148 | + request_queue_t *q = bdev_get_queue(bdev); | |
1149 | + | |
1150 | + if (q == blkif->plug) | |
1151 | + return; | |
1152 | + unplug_queue(blkif); | |
1153 | + blk_get_queue(q); | |
1154 | + blkif->plug = q; | |
1155 | +} | |
1156 | + | |
1157 | +static void fast_flush_area(pending_req_t *req) | |
1158 | +{ | |
1159 | + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
1160 | + unsigned int i, invcount = 0; | |
1161 | + grant_handle_t handle; | |
1162 | + int ret; | |
1163 | + | |
1164 | + for (i = 0; i < req->nr_pages; i++) { | |
1165 | + handle = pending_handle(req, i); | |
1166 | + if (handle == BLKBACK_INVALID_HANDLE) | |
1167 | + continue; | |
1168 | + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), | |
1169 | + GNTMAP_host_map, handle); | |
1170 | + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; | |
1171 | + invcount++; | |
1172 | + } | |
1173 | + | |
1174 | + ret = HYPERVISOR_grant_table_op( | |
1175 | + GNTTABOP_unmap_grant_ref, unmap, invcount); | |
1176 | + BUG_ON(ret); | |
1177 | +} | |
1178 | + | |
1179 | +/****************************************************************** | |
1180 | + * SCHEDULER FUNCTIONS | |
1181 | + */ | |
1182 | + | |
1183 | +static void print_stats(blkif_t *blkif) | |
1184 | +{ | |
1185 | + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", | |
1186 | + current->comm, blkif->st_oo_req, | |
1187 | + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); | |
1188 | + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | |
1189 | + blkif->st_rd_req = 0; | |
1190 | + blkif->st_wr_req = 0; | |
1191 | + blkif->st_oo_req = 0; | |
1192 | +} | |
1193 | + | |
1194 | +int blkif_schedule(void *arg) | |
1195 | +{ | |
1196 | + blkif_t *blkif = arg; | |
1197 | + | |
1198 | + blkif_get(blkif); | |
1199 | + | |
1200 | + if (debug_lvl) | |
1201 | + printk(KERN_DEBUG "%s: started\n", current->comm); | |
1202 | + | |
1203 | + while (!kthread_should_stop()) { | |
1204 | + if (try_to_freeze()) | |
1205 | + continue; | |
1206 | + | |
1207 | + wait_event_interruptible( | |
1208 | + blkif->wq, | |
1209 | + blkif->waiting_reqs || kthread_should_stop()); | |
1210 | + wait_event_interruptible( | |
1211 | + pending_free_wq, | |
1212 | + !list_empty(&pending_free) || kthread_should_stop()); | |
1213 | + | |
1214 | + blkif->waiting_reqs = 0; | |
1215 | + smp_mb(); /* clear flag *before* checking for work */ | |
1216 | + | |
1217 | + if (do_block_io_op(blkif)) | |
1218 | + blkif->waiting_reqs = 1; | |
1219 | + unplug_queue(blkif); | |
1220 | + | |
1221 | + if (log_stats && time_after(jiffies, blkif->st_print)) | |
1222 | + print_stats(blkif); | |
1223 | + } | |
1224 | + | |
1225 | + if (log_stats) | |
1226 | + print_stats(blkif); | |
1227 | + if (debug_lvl) | |
1228 | + printk(KERN_DEBUG "%s: exiting\n", current->comm); | |
1229 | + | |
1230 | + blkif->xenblkd = NULL; | |
1231 | + blkif_put(blkif); | |
1232 | + | |
1233 | + return 0; | |
1234 | +} | |
1235 | + | |
1236 | +/****************************************************************** | |
1237 | + * COMPLETION CALLBACK -- Called as bh->b_end_io() | |
1238 | + */ | |
1239 | + | |
1240 | +static void __end_block_io_op(pending_req_t *pending_req, int error) | |
1241 | +{ | |
1242 | + /* An error fails the entire request. */ | |
1243 | + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && | |
1244 | + (error == -EOPNOTSUPP)) { | |
1245 | + DPRINTK("blkback: write barrier op failed, not supported\n"); | |
1246 | + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); | |
1247 | + pending_req->status = BLKIF_RSP_EOPNOTSUPP; | |
1248 | + } else if (error) { | |
1249 | + DPRINTK("Buffer not up-to-date at end of operation, " | |
1250 | + "error=%d\n", error); | |
1251 | + pending_req->status = BLKIF_RSP_ERROR; | |
1252 | + } | |
1253 | + | |
1254 | + if (atomic_dec_and_test(&pending_req->pendcnt)) { | |
1255 | + fast_flush_area(pending_req); | |
1256 | + make_response(pending_req->blkif, pending_req->id, | |
1257 | + pending_req->operation, pending_req->status); | |
1258 | + blkif_put(pending_req->blkif); | |
1259 | + free_req(pending_req); | |
1260 | + } | |
1261 | +} | |
1262 | + | |
1263 | +static int end_block_io_op(struct bio *bio, unsigned int done, int error) | |
1264 | +{ | |
1265 | + if (bio->bi_size != 0) | |
1266 | + return 1; | |
1267 | + __end_block_io_op(bio->bi_private, error); | |
1268 | + bio_put(bio); | |
1269 | + return error; | |
1270 | +} | |
1271 | + | |
1272 | + | |
1273 | +/****************************************************************************** | |
1274 | + * NOTIFICATION FROM GUEST OS. | |
1275 | + */ | |
1276 | + | |
1277 | +static void blkif_notify_work(blkif_t *blkif) | |
1278 | +{ | |
1279 | + blkif->waiting_reqs = 1; | |
1280 | + wake_up(&blkif->wq); | |
1281 | +} | |
1282 | + | |
1283 | +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) | |
1284 | +{ | |
1285 | + blkif_notify_work(dev_id); | |
1286 | + return IRQ_HANDLED; | |
1287 | +} | |
1288 | + | |
1289 | + | |
1290 | + | |
1291 | +/****************************************************************** | |
1292 | + * DOWNWARD CALLS -- These interface with the block-device layer proper. | |
1293 | + */ | |
1294 | + | |
1295 | +static int do_block_io_op(blkif_t *blkif) | |
1296 | +{ | |
1297 | + blkif_back_rings_t *blk_rings = &blkif->blk_rings; | |
1298 | + blkif_request_t req; | |
1299 | + pending_req_t *pending_req; | |
1300 | + RING_IDX rc, rp; | |
1301 | + int more_to_do = 0; | |
1302 | + | |
1303 | + rc = blk_rings->common.req_cons; | |
1304 | + rp = blk_rings->common.sring->req_prod; | |
1305 | + rmb(); /* Ensure we see queued requests up to 'rp'. */ | |
1306 | + | |
1307 | + while (rc != rp) { | |
1308 | + | |
1309 | + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) | |
1310 | + break; | |
1311 | + | |
1312 | + pending_req = alloc_req(); | |
1313 | + if (NULL == pending_req) { | |
1314 | + blkif->st_oo_req++; | |
1315 | + more_to_do = 1; | |
1316 | + break; | |
1317 | + } | |
1318 | + | |
1319 | + if (kthread_should_stop()) { | |
1320 | + more_to_do = 1; | |
1321 | + break; | |
1322 | + } | |
1323 | + | |
1324 | + switch (blkif->blk_protocol) { | |
1325 | + case BLKIF_PROTOCOL_NATIVE: | |
1326 | + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); | |
1327 | + break; | |
1328 | + case BLKIF_PROTOCOL_X86_32: | |
1329 | + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); | |
1330 | + break; | |
1331 | + case BLKIF_PROTOCOL_X86_64: | |
1332 | + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); | |
1333 | + break; | |
1334 | + default: | |
1335 | + BUG(); | |
1336 | + } | |
1337 | + blk_rings->common.req_cons = ++rc; /* before make_response() */ | |
1338 | + | |
1339 | + /* Apply all sanity checks to /private copy/ of request. */ | |
1340 | + barrier(); | |
1341 | + | |
1342 | + switch (req.operation) { | |
1343 | + case BLKIF_OP_READ: | |
1344 | + blkif->st_rd_req++; | |
1345 | + dispatch_rw_block_io(blkif, &req, pending_req); | |
1346 | + break; | |
1347 | + case BLKIF_OP_WRITE_BARRIER: | |
1348 | + blkif->st_br_req++; | |
1349 | + /* fall through */ | |
1350 | + case BLKIF_OP_WRITE: | |
1351 | + blkif->st_wr_req++; | |
1352 | + dispatch_rw_block_io(blkif, &req, pending_req); | |
1353 | + break; | |
1354 | + default: | |
1355 | + /* A good sign something is wrong: sleep for a while to | |
1356 | + * avoid excessive CPU consumption by a bad guest. */ | |
1357 | + msleep(1); | |
1358 | + DPRINTK("error: unknown block io operation [%d]\n", | |
1359 | + req.operation); | |
1360 | + make_response(blkif, req.id, req.operation, | |
1361 | + BLKIF_RSP_ERROR); | |
1362 | + free_req(pending_req); | |
1363 | + break; | |
1364 | + } | |
1365 | + | |
1366 | + /* Yield point for this unbounded loop. */ | |
1367 | + cond_resched(); | |
1368 | + } | |
1369 | + | |
1370 | + return more_to_do; | |
1371 | +} | |
1372 | + | |
1373 | +static void dispatch_rw_block_io(blkif_t *blkif, | |
1374 | + blkif_request_t *req, | |
1375 | + pending_req_t *pending_req) | |
1376 | +{ | |
1377 | + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); | |
1378 | + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
1379 | + struct phys_req preq; | |
1380 | + struct { | |
1381 | + unsigned long buf; unsigned int nsec; | |
1382 | + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
1383 | + unsigned int nseg; | |
1384 | + struct bio *bio = NULL; | |
1385 | + int ret, i; | |
1386 | + int operation; | |
1387 | + | |
1388 | + switch (req->operation) { | |
1389 | + case BLKIF_OP_READ: | |
1390 | + operation = READ; | |
1391 | + break; | |
1392 | + case BLKIF_OP_WRITE: | |
1393 | + operation = WRITE; | |
1394 | + break; | |
1395 | + case BLKIF_OP_WRITE_BARRIER: | |
1396 | + operation = WRITE_BARRIER; | |
1397 | + break; | |
1398 | + default: | |
1399 | + operation = 0; /* make gcc happy */ | |
1400 | + BUG(); | |
1401 | + } | |
1402 | + | |
1403 | + /* Check that number of segments is sane. */ | |
1404 | + nseg = req->nr_segments; | |
1405 | + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || | |
1406 | + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | |
1407 | + DPRINTK("Bad number of segments in request (%d)\n", nseg); | |
1408 | + goto fail_response; | |
1409 | + } | |
1410 | + | |
1411 | + preq.dev = req->handle; | |
1412 | + preq.sector_number = req->sector_number; | |
1413 | + preq.nr_sects = 0; | |
1414 | + | |
1415 | + pending_req->blkif = blkif; | |
1416 | + pending_req->id = req->id; | |
1417 | + pending_req->operation = req->operation; | |
1418 | + pending_req->status = BLKIF_RSP_OKAY; | |
1419 | + pending_req->nr_pages = nseg; | |
1420 | + | |
1421 | + for (i = 0; i < nseg; i++) { | |
1422 | + uint32_t flags; | |
1423 | + | |
1424 | + seg[i].nsec = req->seg[i].last_sect - | |
1425 | + req->seg[i].first_sect + 1; | |
1426 | + | |
1427 | + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || | |
1428 | + (req->seg[i].last_sect < req->seg[i].first_sect)) | |
1429 | + goto fail_response; | |
1430 | + preq.nr_sects += seg[i].nsec; | |
1431 | + | |
1432 | + flags = GNTMAP_host_map; | |
1433 | + if (operation != READ) | |
1434 | + flags |= GNTMAP_readonly; | |
1435 | + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | |
1436 | + req->seg[i].gref, blkif->domid); | |
1437 | + } | |
1438 | + | |
1439 | + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); | |
1440 | + BUG_ON(ret); | |
1441 | + | |
1442 | + for (i = 0; i < nseg; i++) { | |
1443 | + if (unlikely(map[i].status != 0)) { | |
1444 | + DPRINTK("invalid buffer -- could not remap it\n"); | |
1445 | + map[i].handle = BLKBACK_INVALID_HANDLE; | |
1446 | + ret |= 1; | |
1447 | + } | |
1448 | + | |
1449 | + pending_handle(pending_req, i) = map[i].handle; | |
1450 | + | |
1451 | + if (ret) | |
1452 | + continue; | |
1453 | + | |
1454 | + set_phys_to_machine(__pa(vaddr( | |
1455 | + pending_req, i)) >> PAGE_SHIFT, | |
1456 | + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); | |
1457 | + seg[i].buf = map[i].dev_bus_addr | | |
1458 | + (req->seg[i].first_sect << 9); | |
1459 | + } | |
1460 | + | |
1461 | + if (ret) | |
1462 | + goto fail_flush; | |
1463 | + | |
1464 | + if (vbd_translate(&preq, blkif, operation) != 0) { | |
1465 | + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", | |
1466 | + operation == READ ? "read" : "write", | |
1467 | + preq.sector_number, | |
1468 | + preq.sector_number + preq.nr_sects, preq.dev); | |
1469 | + goto fail_flush; | |
1470 | + } | |
1471 | + | |
1472 | + plug_queue(blkif, preq.bdev); | |
1473 | + atomic_set(&pending_req->pendcnt, 1); | |
1474 | + blkif_get(blkif); | |
1475 | + | |
1476 | + for (i = 0; i < nseg; i++) { | |
1477 | + if (((int)preq.sector_number|(int)seg[i].nsec) & | |
1478 | + ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { | |
1479 | + DPRINTK("Misaligned I/O request from domain %d", | |
1480 | + blkif->domid); | |
1481 | + goto fail_put_bio; | |
1482 | + } | |
1483 | + | |
1484 | + while ((bio == NULL) || | |
1485 | + (bio_add_page(bio, | |
1486 | + virt_to_page(vaddr(pending_req, i)), | |
1487 | + seg[i].nsec << 9, | |
1488 | + seg[i].buf & ~PAGE_MASK) == 0)) { | |
1489 | + if (bio) { | |
1490 | + atomic_inc(&pending_req->pendcnt); | |
1491 | + submit_bio(operation, bio); | |
1492 | + } | |
1493 | + | |
1494 | + bio = bio_alloc(GFP_KERNEL, nseg-i); | |
1495 | + if (unlikely(bio == NULL)) | |
1496 | + goto fail_put_bio; | |
1497 | + | |
1498 | + bio->bi_bdev = preq.bdev; | |
1499 | + bio->bi_private = pending_req; | |
1500 | + bio->bi_end_io = end_block_io_op; | |
1501 | + bio->bi_sector = preq.sector_number; | |
1502 | + } | |
1503 | + | |
1504 | + preq.sector_number += seg[i].nsec; | |
1505 | + } | |
1506 | + | |
1507 | + if (!bio) { | |
1508 | + BUG_ON(operation != WRITE_BARRIER); | |
1509 | + bio = bio_alloc(GFP_KERNEL, 0); | |
1510 | + if (unlikely(bio == NULL)) | |
1511 | + goto fail_put_bio; | |
1512 | + | |
1513 | + bio->bi_bdev = preq.bdev; | |
1514 | + bio->bi_private = pending_req; | |
1515 | + bio->bi_end_io = end_block_io_op; | |
1516 | + bio->bi_sector = -1; | |
1517 | + } | |
1518 | + | |
1519 | + submit_bio(operation, bio); | |
1520 | + | |
1521 | + if (operation == READ) | |
1522 | + blkif->st_rd_sect += preq.nr_sects; | |
1523 | + else if (operation == WRITE || operation == WRITE_BARRIER) | |
1524 | + blkif->st_wr_sect += preq.nr_sects; | |
1525 | + | |
1526 | + return; | |
1527 | + | |
1528 | + fail_flush: | |
1529 | + fast_flush_area(pending_req); | |
1530 | + fail_response: | |
1531 | + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | |
1532 | + free_req(pending_req); | |
1533 | + msleep(1); /* back off a bit */ | |
1534 | + return; | |
1535 | + | |
1536 | + fail_put_bio: | |
1537 | + __end_block_io_op(pending_req, -EINVAL); | |
1538 | + if (bio) | |
1539 | + bio_put(bio); | |
1540 | + unplug_queue(blkif); | |
1541 | + msleep(1); /* back off a bit */ | |
1542 | + return; | |
1543 | +} | |
1544 | + | |
1545 | + | |
1546 | + | |
1547 | +/****************************************************************** | |
1548 | + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING | |
1549 | + */ | |
1550 | + | |
1551 | + | |
1552 | +static void make_response(blkif_t *blkif, u64 id, | |
1553 | + unsigned short op, int st) | |
1554 | +{ | |
1555 | + blkif_response_t resp; | |
1556 | + unsigned long flags; | |
1557 | + blkif_back_rings_t *blk_rings = &blkif->blk_rings; | |
1558 | + int more_to_do = 0; | |
1559 | + int notify; | |
1560 | + | |
1561 | + resp.id = id; | |
1562 | + resp.operation = op; | |
1563 | + resp.status = st; | |
1564 | + | |
1565 | + spin_lock_irqsave(&blkif->blk_ring_lock, flags); | |
1566 | + /* Place on the response ring for the relevant domain. */ | |
1567 | + switch (blkif->blk_protocol) { | |
1568 | + case BLKIF_PROTOCOL_NATIVE: | |
1569 | + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), | |
1570 | + &resp, sizeof(resp)); | |
1571 | + break; | |
1572 | + case BLKIF_PROTOCOL_X86_32: | |
1573 | + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), | |
1574 | + &resp, sizeof(resp)); | |
1575 | + break; | |
1576 | + case BLKIF_PROTOCOL_X86_64: | |
1577 | + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), | |
1578 | + &resp, sizeof(resp)); | |
1579 | + break; | |
1580 | + default: | |
1581 | + BUG(); | |
1582 | + } | |
1583 | + blk_rings->common.rsp_prod_pvt++; | |
1584 | + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | |
1585 | + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { | |
1586 | + /* | |
1587 | + * Tail check for pending requests. Allows frontend to avoid | |
1588 | + * notifications if requests are already in flight (lower | |
1589 | + * overheads and promotes batching). | |
1590 | + */ | |
1591 | + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); | |
1592 | + | |
1593 | + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { | |
1594 | + more_to_do = 1; | |
1595 | + } | |
1596 | + | |
1597 | + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | |
1598 | + | |
1599 | + if (more_to_do) | |
1600 | + blkif_notify_work(blkif); | |
1601 | + if (notify) | |
1602 | + notify_remote_via_irq(blkif->irq); | |
1603 | +} | |
1604 | + | |
1605 | +static int __init blkif_init(void) | |
1606 | +{ | |
1607 | + int i, mmap_pages; | |
1608 | + | |
1609 | + if (!is_running_on_xen()) | |
1610 | + return -ENODEV; | |
1611 | + | |
1612 | + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; | |
1613 | + | |
1614 | + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * | |
1615 | + blkif_reqs, GFP_KERNEL); | |
1616 | + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * | |
1617 | + mmap_pages, GFP_KERNEL); | |
1618 | + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); | |
1619 | + | |
1620 | + if (!pending_reqs || !pending_grant_handles || !pending_pages) | |
1621 | + goto out_of_memory; | |
1622 | + | |
1623 | + for (i = 0; i < mmap_pages; i++) | |
1624 | + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; | |
1625 | + | |
1626 | + blkif_interface_init(); | |
1627 | + | |
1628 | + memset(pending_reqs, 0, sizeof(pending_reqs)); | |
1629 | + INIT_LIST_HEAD(&pending_free); | |
1630 | + | |
1631 | + for (i = 0; i < blkif_reqs; i++) | |
1632 | + list_add_tail(&pending_reqs[i].free_list, &pending_free); | |
1633 | + | |
1634 | + blkif_xenbus_init(); | |
1635 | + | |
1636 | + return 0; | |
1637 | + | |
1638 | + out_of_memory: | |
1639 | + kfree(pending_reqs); | |
1640 | + kfree(pending_grant_handles); | |
1641 | + free_empty_pages_and_pagevec(pending_pages, mmap_pages); | |
1642 | + printk("%s: out of memory\n", __FUNCTION__); | |
1643 | + return -ENOMEM; | |
1644 | +} | |
1645 | + | |
1646 | +module_init(blkif_init); | |
1647 | + | |
1648 | +MODULE_LICENSE("Dual BSD/GPL"); | |
1649 | Index: head-2008-11-25/drivers/xen/blkback/common.h | |
1650 | =================================================================== | |
1651 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
1652 | +++ head-2008-11-25/drivers/xen/blkback/common.h 2008-05-08 14:02:04.000000000 +0200 | |
1653 | @@ -0,0 +1,139 @@ | |
1654 | +/* | |
1655 | + * This program is free software; you can redistribute it and/or | |
1656 | + * modify it under the terms of the GNU General Public License version 2 | |
1657 | + * as published by the Free Software Foundation; or, when distributed | |
1658 | + * separately from the Linux kernel or incorporated into other | |
1659 | + * software packages, subject to the following license: | |
1660 | + * | |
1661 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
1662 | + * of this source file (the "Software"), to deal in the Software without | |
1663 | + * restriction, including without limitation the rights to use, copy, modify, | |
1664 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
1665 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
1666 | + * the following conditions: | |
1667 | + * | |
1668 | + * The above copyright notice and this permission notice shall be included in | |
1669 | + * all copies or substantial portions of the Software. | |
1670 | + * | |
1671 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
1672 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
1673 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
1674 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
1675 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
1676 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
1677 | + * IN THE SOFTWARE. | |
1678 | + */ | |
1679 | + | |
1680 | +#ifndef __BLKIF__BACKEND__COMMON_H__ | |
1681 | +#define __BLKIF__BACKEND__COMMON_H__ | |
1682 | + | |
1683 | +#include <linux/version.h> | |
1684 | +#include <linux/module.h> | |
1685 | +#include <linux/interrupt.h> | |
1686 | +#include <linux/slab.h> | |
1687 | +#include <linux/blkdev.h> | |
1688 | +#include <linux/vmalloc.h> | |
1689 | +#include <linux/wait.h> | |
1690 | +#include <asm/io.h> | |
1691 | +#include <asm/setup.h> | |
1692 | +#include <asm/pgalloc.h> | |
1693 | +#include <xen/evtchn.h> | |
1694 | +#include <asm/hypervisor.h> | |
1695 | +#include <xen/blkif.h> | |
1696 | +#include <xen/gnttab.h> | |
1697 | +#include <xen/driver_util.h> | |
1698 | +#include <xen/xenbus.h> | |
1699 | + | |
1700 | +#define DPRINTK(_f, _a...) \ | |
1701 | + pr_debug("(file=%s, line=%d) " _f, \ | |
1702 | + __FILE__ , __LINE__ , ## _a ) | |
1703 | + | |
1704 | +struct vbd { | |
1705 | + blkif_vdev_t handle; /* what the domain refers to this vbd as */ | |
1706 | + unsigned char readonly; /* Non-zero -> read-only */ | |
1707 | + unsigned char type; /* VDISK_xxx */ | |
1708 | + u32 pdevice; /* phys device that this vbd maps to */ | |
1709 | + struct block_device *bdev; | |
1710 | +}; | |
1711 | + | |
1712 | +struct backend_info; | |
1713 | + | |
1714 | +typedef struct blkif_st { | |
1715 | + /* Unique identifier for this interface. */ | |
1716 | + domid_t domid; | |
1717 | + unsigned int handle; | |
1718 | + /* Physical parameters of the comms window. */ | |
1719 | + unsigned int irq; | |
1720 | + /* Comms information. */ | |
1721 | + enum blkif_protocol blk_protocol; | |
1722 | + blkif_back_rings_t blk_rings; | |
1723 | + struct vm_struct *blk_ring_area; | |
1724 | + /* The VBD attached to this interface. */ | |
1725 | + struct vbd vbd; | |
1726 | + /* Back pointer to the backend_info. */ | |
1727 | + struct backend_info *be; | |
1728 | + /* Private fields. */ | |
1729 | + spinlock_t blk_ring_lock; | |
1730 | + atomic_t refcnt; | |
1731 | + | |
1732 | + wait_queue_head_t wq; | |
1733 | + struct task_struct *xenblkd; | |
1734 | + unsigned int waiting_reqs; | |
1735 | + request_queue_t *plug; | |
1736 | + | |
1737 | + /* statistics */ | |
1738 | + unsigned long st_print; | |
1739 | + int st_rd_req; | |
1740 | + int st_wr_req; | |
1741 | + int st_oo_req; | |
1742 | + int st_br_req; | |
1743 | + int st_rd_sect; | |
1744 | + int st_wr_sect; | |
1745 | + | |
1746 | + wait_queue_head_t waiting_to_free; | |
1747 | + | |
1748 | + grant_handle_t shmem_handle; | |
1749 | + grant_ref_t shmem_ref; | |
1750 | +} blkif_t; | |
1751 | + | |
1752 | +blkif_t *blkif_alloc(domid_t domid); | |
1753 | +void blkif_disconnect(blkif_t *blkif); | |
1754 | +void blkif_free(blkif_t *blkif); | |
1755 | +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); | |
1756 | + | |
1757 | +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) | |
1758 | +#define blkif_put(_b) \ | |
1759 | + do { \ | |
1760 | + if (atomic_dec_and_test(&(_b)->refcnt)) \ | |
1761 | + wake_up(&(_b)->waiting_to_free);\ | |
1762 | + } while (0) | |
1763 | + | |
1764 | +/* Create a vbd. */ | |
1765 | +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, | |
1766 | + unsigned minor, int readonly, int cdrom); | |
1767 | +void vbd_free(struct vbd *vbd); | |
1768 | + | |
1769 | +unsigned long long vbd_size(struct vbd *vbd); | |
1770 | +unsigned int vbd_info(struct vbd *vbd); | |
1771 | +unsigned long vbd_secsize(struct vbd *vbd); | |
1772 | + | |
1773 | +struct phys_req { | |
1774 | + unsigned short dev; | |
1775 | + unsigned short nr_sects; | |
1776 | + struct block_device *bdev; | |
1777 | + blkif_sector_t sector_number; | |
1778 | +}; | |
1779 | + | |
1780 | +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); | |
1781 | + | |
1782 | +void blkif_interface_init(void); | |
1783 | + | |
1784 | +void blkif_xenbus_init(void); | |
1785 | + | |
1786 | +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); | |
1787 | +int blkif_schedule(void *arg); | |
1788 | + | |
1789 | +int blkback_barrier(struct xenbus_transaction xbt, | |
1790 | + struct backend_info *be, int state); | |
1791 | + | |
1792 | +#endif /* __BLKIF__BACKEND__COMMON_H__ */ | |
1793 | Index: head-2008-11-25/drivers/xen/blkback/interface.c | |
1794 | =================================================================== | |
1795 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
1796 | +++ head-2008-11-25/drivers/xen/blkback/interface.c 2007-06-12 13:13:44.000000000 +0200 | |
1797 | @@ -0,0 +1,181 @@ | |
1798 | +/****************************************************************************** | |
1799 | + * arch/xen/drivers/blkif/backend/interface.c | |
1800 | + * | |
1801 | + * Block-device interface management. | |
1802 | + * | |
1803 | + * Copyright (c) 2004, Keir Fraser | |
1804 | + * | |
1805 | + * This program is free software; you can redistribute it and/or | |
1806 | + * modify it under the terms of the GNU General Public License version 2 | |
1807 | + * as published by the Free Software Foundation; or, when distributed | |
1808 | + * separately from the Linux kernel or incorporated into other | |
1809 | + * software packages, subject to the following license: | |
1810 | + * | |
1811 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
1812 | + * of this source file (the "Software"), to deal in the Software without | |
1813 | + * restriction, including without limitation the rights to use, copy, modify, | |
1814 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
1815 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
1816 | + * the following conditions: | |
1817 | + * | |
1818 | + * The above copyright notice and this permission notice shall be included in | |
1819 | + * all copies or substantial portions of the Software. | |
1820 | + * | |
1821 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
1822 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
1823 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
1824 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
1825 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
1826 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
1827 | + * IN THE SOFTWARE. | |
1828 | + */ | |
1829 | + | |
1830 | +#include "common.h" | |
1831 | +#include <xen/evtchn.h> | |
1832 | +#include <linux/kthread.h> | |
1833 | + | |
1834 | +static kmem_cache_t *blkif_cachep; | |
1835 | + | |
1836 | +blkif_t *blkif_alloc(domid_t domid) | |
1837 | +{ | |
1838 | + blkif_t *blkif; | |
1839 | + | |
1840 | + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); | |
1841 | + if (!blkif) | |
1842 | + return ERR_PTR(-ENOMEM); | |
1843 | + | |
1844 | + memset(blkif, 0, sizeof(*blkif)); | |
1845 | + blkif->domid = domid; | |
1846 | + spin_lock_init(&blkif->blk_ring_lock); | |
1847 | + atomic_set(&blkif->refcnt, 1); | |
1848 | + init_waitqueue_head(&blkif->wq); | |
1849 | + blkif->st_print = jiffies; | |
1850 | + init_waitqueue_head(&blkif->waiting_to_free); | |
1851 | + | |
1852 | + return blkif; | |
1853 | +} | |
1854 | + | |
1855 | +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) | |
1856 | +{ | |
1857 | + struct gnttab_map_grant_ref op; | |
1858 | + | |
1859 | + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, | |
1860 | + GNTMAP_host_map, shared_page, blkif->domid); | |
1861 | + | |
1862 | + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | |
1863 | + BUG(); | |
1864 | + | |
1865 | + if (op.status) { | |
1866 | + DPRINTK(" Grant table operation failure !\n"); | |
1867 | + return op.status; | |
1868 | + } | |
1869 | + | |
1870 | + blkif->shmem_ref = shared_page; | |
1871 | + blkif->shmem_handle = op.handle; | |
1872 | + | |
1873 | + return 0; | |
1874 | +} | |
1875 | + | |
1876 | +static void unmap_frontend_page(blkif_t *blkif) | |
1877 | +{ | |
1878 | + struct gnttab_unmap_grant_ref op; | |
1879 | + | |
1880 | + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, | |
1881 | + GNTMAP_host_map, blkif->shmem_handle); | |
1882 | + | |
1883 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | |
1884 | + BUG(); | |
1885 | +} | |
1886 | + | |
1887 | +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) | |
1888 | +{ | |
1889 | + int err; | |
1890 | + | |
1891 | + /* Already connected through? */ | |
1892 | + if (blkif->irq) | |
1893 | + return 0; | |
1894 | + | |
1895 | + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) | |
1896 | + return -ENOMEM; | |
1897 | + | |
1898 | + err = map_frontend_page(blkif, shared_page); | |
1899 | + if (err) { | |
1900 | + free_vm_area(blkif->blk_ring_area); | |
1901 | + return err; | |
1902 | + } | |
1903 | + | |
1904 | + switch (blkif->blk_protocol) { | |
1905 | + case BLKIF_PROTOCOL_NATIVE: | |
1906 | + { | |
1907 | + blkif_sring_t *sring; | |
1908 | + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; | |
1909 | + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); | |
1910 | + break; | |
1911 | + } | |
1912 | + case BLKIF_PROTOCOL_X86_32: | |
1913 | + { | |
1914 | + blkif_x86_32_sring_t *sring_x86_32; | |
1915 | + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; | |
1916 | + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); | |
1917 | + break; | |
1918 | + } | |
1919 | + case BLKIF_PROTOCOL_X86_64: | |
1920 | + { | |
1921 | + blkif_x86_64_sring_t *sring_x86_64; | |
1922 | + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; | |
1923 | + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); | |
1924 | + break; | |
1925 | + } | |
1926 | + default: | |
1927 | + BUG(); | |
1928 | + } | |
1929 | + | |
1930 | + err = bind_interdomain_evtchn_to_irqhandler( | |
1931 | + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); | |
1932 | + if (err < 0) | |
1933 | + { | |
1934 | + unmap_frontend_page(blkif); | |
1935 | + free_vm_area(blkif->blk_ring_area); | |
1936 | + blkif->blk_rings.common.sring = NULL; | |
1937 | + return err; | |
1938 | + } | |
1939 | + blkif->irq = err; | |
1940 | + | |
1941 | + return 0; | |
1942 | +} | |
1943 | + | |
1944 | +void blkif_disconnect(blkif_t *blkif) | |
1945 | +{ | |
1946 | + if (blkif->xenblkd) { | |
1947 | + kthread_stop(blkif->xenblkd); | |
1948 | + blkif->xenblkd = NULL; | |
1949 | + } | |
1950 | + | |
1951 | + atomic_dec(&blkif->refcnt); | |
1952 | + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); | |
1953 | + atomic_inc(&blkif->refcnt); | |
1954 | + | |
1955 | + if (blkif->irq) { | |
1956 | + unbind_from_irqhandler(blkif->irq, blkif); | |
1957 | + blkif->irq = 0; | |
1958 | + } | |
1959 | + | |
1960 | + if (blkif->blk_rings.common.sring) { | |
1961 | + unmap_frontend_page(blkif); | |
1962 | + free_vm_area(blkif->blk_ring_area); | |
1963 | + blkif->blk_rings.common.sring = NULL; | |
1964 | + } | |
1965 | +} | |
1966 | + | |
1967 | +void blkif_free(blkif_t *blkif) | |
1968 | +{ | |
1969 | + if (!atomic_dec_and_test(&blkif->refcnt)) | |
1970 | + BUG(); | |
1971 | + kmem_cache_free(blkif_cachep, blkif); | |
1972 | +} | |
1973 | + | |
1974 | +void __init blkif_interface_init(void) | |
1975 | +{ | |
1976 | + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), | |
1977 | + 0, 0, NULL, NULL); | |
1978 | +} | |
1979 | Index: head-2008-11-25/drivers/xen/blkback/vbd.c | |
1980 | =================================================================== | |
1981 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
1982 | +++ head-2008-11-25/drivers/xen/blkback/vbd.c 2008-05-08 14:02:04.000000000 +0200 | |
1983 | @@ -0,0 +1,118 @@ | |
1984 | +/****************************************************************************** | |
1985 | + * blkback/vbd.c | |
1986 | + * | |
1987 | + * Routines for managing virtual block devices (VBDs). | |
1988 | + * | |
1989 | + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand | |
1990 | + * | |
1991 | + * This program is free software; you can redistribute it and/or | |
1992 | + * modify it under the terms of the GNU General Public License version 2 | |
1993 | + * as published by the Free Software Foundation; or, when distributed | |
1994 | + * separately from the Linux kernel or incorporated into other | |
1995 | + * software packages, subject to the following license: | |
1996 | + * | |
1997 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
1998 | + * of this source file (the "Software"), to deal in the Software without | |
1999 | + * restriction, including without limitation the rights to use, copy, modify, | |
2000 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
2001 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
2002 | + * the following conditions: | |
2003 | + * | |
2004 | + * The above copyright notice and this permission notice shall be included in | |
2005 | + * all copies or substantial portions of the Software. | |
2006 | + * | |
2007 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
2008 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
2009 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
2010 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
2011 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
2012 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
2013 | + * IN THE SOFTWARE. | |
2014 | + */ | |
2015 | + | |
2016 | +#include "common.h" | |
2017 | + | |
2018 | +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ | |
2019 | + (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) | |
2020 | + | |
2021 | +unsigned long long vbd_size(struct vbd *vbd) | |
2022 | +{ | |
2023 | + return vbd_sz(vbd); | |
2024 | +} | |
2025 | + | |
2026 | +unsigned int vbd_info(struct vbd *vbd) | |
2027 | +{ | |
2028 | + return vbd->type | (vbd->readonly?VDISK_READONLY:0); | |
2029 | +} | |
2030 | + | |
2031 | +unsigned long vbd_secsize(struct vbd *vbd) | |
2032 | +{ | |
2033 | + return bdev_hardsect_size(vbd->bdev); | |
2034 | +} | |
2035 | + | |
2036 | +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, | |
2037 | + unsigned minor, int readonly, int cdrom) | |
2038 | +{ | |
2039 | + struct vbd *vbd; | |
2040 | + struct block_device *bdev; | |
2041 | + | |
2042 | + vbd = &blkif->vbd; | |
2043 | + vbd->handle = handle; | |
2044 | + vbd->readonly = readonly; | |
2045 | + vbd->type = 0; | |
2046 | + | |
2047 | + vbd->pdevice = MKDEV(major, minor); | |
2048 | + | |
2049 | + bdev = open_by_devnum(vbd->pdevice, | |
2050 | + vbd->readonly ? FMODE_READ : FMODE_WRITE); | |
2051 | + | |
2052 | + if (IS_ERR(bdev)) { | |
2053 | + DPRINTK("vbd_creat: device %08x could not be opened.\n", | |
2054 | + vbd->pdevice); | |
2055 | + return -ENOENT; | |
2056 | + } | |
2057 | + | |
2058 | + vbd->bdev = bdev; | |
2059 | + | |
2060 | + if (vbd->bdev->bd_disk == NULL) { | |
2061 | + DPRINTK("vbd_creat: device %08x doesn't exist.\n", | |
2062 | + vbd->pdevice); | |
2063 | + vbd_free(vbd); | |
2064 | + return -ENOENT; | |
2065 | + } | |
2066 | + | |
2067 | + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) | |
2068 | + vbd->type |= VDISK_CDROM; | |
2069 | + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) | |
2070 | + vbd->type |= VDISK_REMOVABLE; | |
2071 | + | |
2072 | + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", | |
2073 | + handle, blkif->domid); | |
2074 | + return 0; | |
2075 | +} | |
2076 | + | |
2077 | +void vbd_free(struct vbd *vbd) | |
2078 | +{ | |
2079 | + if (vbd->bdev) | |
2080 | + blkdev_put(vbd->bdev); | |
2081 | + vbd->bdev = NULL; | |
2082 | +} | |
2083 | + | |
2084 | +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) | |
2085 | +{ | |
2086 | + struct vbd *vbd = &blkif->vbd; | |
2087 | + int rc = -EACCES; | |
2088 | + | |
2089 | + if ((operation != READ) && vbd->readonly) | |
2090 | + goto out; | |
2091 | + | |
2092 | + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) | |
2093 | + goto out; | |
2094 | + | |
2095 | + req->dev = vbd->pdevice; | |
2096 | + req->bdev = vbd->bdev; | |
2097 | + rc = 0; | |
2098 | + | |
2099 | + out: | |
2100 | + return rc; | |
2101 | +} | |
2102 | Index: head-2008-11-25/drivers/xen/blkback/xenbus.c | |
2103 | =================================================================== | |
2104 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
2105 | +++ head-2008-11-25/drivers/xen/blkback/xenbus.c 2008-05-08 14:02:04.000000000 +0200 | |
2106 | @@ -0,0 +1,541 @@ | |
2107 | +/* Xenbus code for blkif backend | |
2108 | + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | |
2109 | + Copyright (C) 2005 XenSource Ltd | |
2110 | + | |
2111 | + This program is free software; you can redistribute it and/or modify | |
2112 | + it under the terms of the GNU General Public License as published by | |
2113 | + the Free Software Foundation; either version 2 of the License, or | |
2114 | + (at your option) any later version. | |
2115 | + | |
2116 | + This program is distributed in the hope that it will be useful, | |
2117 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
2118 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
2119 | + GNU General Public License for more details. | |
2120 | + | |
2121 | + You should have received a copy of the GNU General Public License | |
2122 | + along with this program; if not, write to the Free Software | |
2123 | + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
2124 | +*/ | |
2125 | + | |
2126 | +#include <stdarg.h> | |
2127 | +#include <linux/module.h> | |
2128 | +#include <linux/kthread.h> | |
2129 | +#include "common.h" | |
2130 | + | |
2131 | +#undef DPRINTK | |
2132 | +#define DPRINTK(fmt, args...) \ | |
2133 | + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ | |
2134 | + __FUNCTION__, __LINE__, ##args) | |
2135 | + | |
2136 | +struct backend_info | |
2137 | +{ | |
2138 | + struct xenbus_device *dev; | |
2139 | + blkif_t *blkif; | |
2140 | + struct xenbus_watch backend_watch; | |
2141 | + unsigned major; | |
2142 | + unsigned minor; | |
2143 | + char *mode; | |
2144 | +}; | |
2145 | + | |
2146 | +static void connect(struct backend_info *); | |
2147 | +static int connect_ring(struct backend_info *); | |
2148 | +static void backend_changed(struct xenbus_watch *, const char **, | |
2149 | + unsigned int); | |
2150 | + | |
2151 | +static int blkback_name(blkif_t *blkif, char *buf) | |
2152 | +{ | |
2153 | + char *devpath, *devname; | |
2154 | + struct xenbus_device *dev = blkif->be->dev; | |
2155 | + | |
2156 | + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); | |
2157 | + if (IS_ERR(devpath)) | |
2158 | + return PTR_ERR(devpath); | |
2159 | + | |
2160 | + if ((devname = strstr(devpath, "/dev/")) != NULL) | |
2161 | + devname += strlen("/dev/"); | |
2162 | + else | |
2163 | + devname = devpath; | |
2164 | + | |
2165 | + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); | |
2166 | + kfree(devpath); | |
2167 | + | |
2168 | + return 0; | |
2169 | +} | |
2170 | + | |
2171 | +static void update_blkif_status(blkif_t *blkif) | |
2172 | +{ | |
2173 | + int err; | |
2174 | + char name[TASK_COMM_LEN]; | |
2175 | + | |
2176 | + /* Not ready to connect? */ | |
2177 | + if (!blkif->irq || !blkif->vbd.bdev) | |
2178 | + return; | |
2179 | + | |
2180 | + /* Already connected? */ | |
2181 | + if (blkif->be->dev->state == XenbusStateConnected) | |
2182 | + return; | |
2183 | + | |
2184 | + /* Attempt to connect: exit if we fail to. */ | |
2185 | + connect(blkif->be); | |
2186 | + if (blkif->be->dev->state != XenbusStateConnected) | |
2187 | + return; | |
2188 | + | |
2189 | + err = blkback_name(blkif, name); | |
2190 | + if (err) { | |
2191 | + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); | |
2192 | + return; | |
2193 | + } | |
2194 | + | |
2195 | + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); | |
2196 | + if (IS_ERR(blkif->xenblkd)) { | |
2197 | + err = PTR_ERR(blkif->xenblkd); | |
2198 | + blkif->xenblkd = NULL; | |
2199 | + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); | |
2200 | + } | |
2201 | +} | |
2202 | + | |
2203 | + | |
2204 | +/**************************************************************** | |
2205 | + * sysfs interface for VBD I/O requests | |
2206 | + */ | |
2207 | + | |
2208 | +#define VBD_SHOW(name, format, args...) \ | |
2209 | + static ssize_t show_##name(struct device *_dev, \ | |
2210 | + struct device_attribute *attr, \ | |
2211 | + char *buf) \ | |
2212 | + { \ | |
2213 | + struct xenbus_device *dev = to_xenbus_device(_dev); \ | |
2214 | + struct backend_info *be = dev->dev.driver_data; \ | |
2215 | + \ | |
2216 | + return sprintf(buf, format, ##args); \ | |
2217 | + } \ | |
2218 | + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | |
2219 | + | |
2220 | +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); | |
2221 | +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); | |
2222 | +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); | |
2223 | +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); | |
2224 | +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); | |
2225 | +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); | |
2226 | + | |
2227 | +static struct attribute *vbdstat_attrs[] = { | |
2228 | + &dev_attr_oo_req.attr, | |
2229 | + &dev_attr_rd_req.attr, | |
2230 | + &dev_attr_wr_req.attr, | |
2231 | + &dev_attr_br_req.attr, | |
2232 | + &dev_attr_rd_sect.attr, | |
2233 | + &dev_attr_wr_sect.attr, | |
2234 | + NULL | |
2235 | +}; | |
2236 | + | |
2237 | +static struct attribute_group vbdstat_group = { | |
2238 | + .name = "statistics", | |
2239 | + .attrs = vbdstat_attrs, | |
2240 | +}; | |
2241 | + | |
2242 | +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); | |
2243 | +VBD_SHOW(mode, "%s\n", be->mode); | |
2244 | + | |
2245 | +int xenvbd_sysfs_addif(struct xenbus_device *dev) | |
2246 | +{ | |
2247 | + int error; | |
2248 | + | |
2249 | + error = device_create_file(&dev->dev, &dev_attr_physical_device); | |
2250 | + if (error) | |
2251 | + goto fail1; | |
2252 | + | |
2253 | + error = device_create_file(&dev->dev, &dev_attr_mode); | |
2254 | + if (error) | |
2255 | + goto fail2; | |
2256 | + | |
2257 | + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); | |
2258 | + if (error) | |
2259 | + goto fail3; | |
2260 | + | |
2261 | + return 0; | |
2262 | + | |
2263 | +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); | |
2264 | +fail2: device_remove_file(&dev->dev, &dev_attr_mode); | |
2265 | +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); | |
2266 | + return error; | |
2267 | +} | |
2268 | + | |
2269 | +void xenvbd_sysfs_delif(struct xenbus_device *dev) | |
2270 | +{ | |
2271 | + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); | |
2272 | + device_remove_file(&dev->dev, &dev_attr_mode); | |
2273 | + device_remove_file(&dev->dev, &dev_attr_physical_device); | |
2274 | +} | |
2275 | + | |
2276 | +static int blkback_remove(struct xenbus_device *dev) | |
2277 | +{ | |
2278 | + struct backend_info *be = dev->dev.driver_data; | |
2279 | + | |
2280 | + DPRINTK(""); | |
2281 | + | |
2282 | + if (be->major || be->minor) | |
2283 | + xenvbd_sysfs_delif(dev); | |
2284 | + | |
2285 | + if (be->backend_watch.node) { | |
2286 | + unregister_xenbus_watch(&be->backend_watch); | |
2287 | + kfree(be->backend_watch.node); | |
2288 | + be->backend_watch.node = NULL; | |
2289 | + } | |
2290 | + | |
2291 | + if (be->blkif) { | |
2292 | + blkif_disconnect(be->blkif); | |
2293 | + vbd_free(&be->blkif->vbd); | |
2294 | + blkif_free(be->blkif); | |
2295 | + be->blkif = NULL; | |
2296 | + } | |
2297 | + | |
2298 | + kfree(be); | |
2299 | + dev->dev.driver_data = NULL; | |
2300 | + return 0; | |
2301 | +} | |
2302 | + | |
2303 | +int blkback_barrier(struct xenbus_transaction xbt, | |
2304 | + struct backend_info *be, int state) | |
2305 | +{ | |
2306 | + struct xenbus_device *dev = be->dev; | |
2307 | + int err; | |
2308 | + | |
2309 | + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", | |
2310 | + "%d", state); | |
2311 | + if (err) | |
2312 | + xenbus_dev_fatal(dev, err, "writing feature-barrier"); | |
2313 | + | |
2314 | + return err; | |
2315 | +} | |
2316 | + | |
2317 | +/** | |
2318 | + * Entry point to this code when a new device is created. Allocate the basic | |
2319 | + * structures, and watch the store waiting for the hotplug scripts to tell us | |
2320 | + * the device's physical major and minor numbers. Switch to InitWait. | |
2321 | + */ | |
2322 | +static int blkback_probe(struct xenbus_device *dev, | |
2323 | + const struct xenbus_device_id *id) | |
2324 | +{ | |
2325 | + int err; | |
2326 | + struct backend_info *be = kzalloc(sizeof(struct backend_info), | |
2327 | + GFP_KERNEL); | |
2328 | + if (!be) { | |
2329 | + xenbus_dev_fatal(dev, -ENOMEM, | |
2330 | + "allocating backend structure"); | |
2331 | + return -ENOMEM; | |
2332 | + } | |
2333 | + be->dev = dev; | |
2334 | + dev->dev.driver_data = be; | |
2335 | + | |
2336 | + be->blkif = blkif_alloc(dev->otherend_id); | |
2337 | + if (IS_ERR(be->blkif)) { | |
2338 | + err = PTR_ERR(be->blkif); | |
2339 | + be->blkif = NULL; | |
2340 | + xenbus_dev_fatal(dev, err, "creating block interface"); | |
2341 | + goto fail; | |
2342 | + } | |
2343 | + | |
2344 | + /* setup back pointer */ | |
2345 | + be->blkif->be = be; | |
2346 | + | |
2347 | + err = xenbus_watch_path2(dev, dev->nodename, "physical-device", | |
2348 | + &be->backend_watch, backend_changed); | |
2349 | + if (err) | |
2350 | + goto fail; | |
2351 | + | |
2352 | + err = xenbus_switch_state(dev, XenbusStateInitWait); | |
2353 | + if (err) | |
2354 | + goto fail; | |
2355 | + | |
2356 | + return 0; | |
2357 | + | |
2358 | +fail: | |
2359 | + DPRINTK("failed"); | |
2360 | + blkback_remove(dev); | |
2361 | + return err; | |
2362 | +} | |
2363 | + | |
2364 | + | |
2365 | +/** | |
2366 | + * Callback received when the hotplug scripts have placed the physical-device | |
2367 | + * node. Read it and the mode node, and create a vbd. If the frontend is | |
2368 | + * ready, connect. | |
2369 | + */ | |
2370 | +static void backend_changed(struct xenbus_watch *watch, | |
2371 | + const char **vec, unsigned int len) | |
2372 | +{ | |
2373 | + int err; | |
2374 | + unsigned major; | |
2375 | + unsigned minor; | |
2376 | + struct backend_info *be | |
2377 | + = container_of(watch, struct backend_info, backend_watch); | |
2378 | + struct xenbus_device *dev = be->dev; | |
2379 | + int cdrom = 0; | |
2380 | + char *device_type; | |
2381 | + | |
2382 | + DPRINTK(""); | |
2383 | + | |
2384 | + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", | |
2385 | + &major, &minor); | |
2386 | + if (XENBUS_EXIST_ERR(err)) { | |
2387 | + /* Since this watch will fire once immediately after it is | |
2388 | + registered, we expect this. Ignore it, and wait for the | |
2389 | + hotplug scripts. */ | |
2390 | + return; | |
2391 | + } | |
2392 | + if (err != 2) { | |
2393 | + xenbus_dev_fatal(dev, err, "reading physical-device"); | |
2394 | + return; | |
2395 | + } | |
2396 | + | |
2397 | + if ((be->major || be->minor) && | |
2398 | + ((be->major != major) || (be->minor != minor))) { | |
2399 | + printk(KERN_WARNING | |
2400 | + "blkback: changing physical device (from %x:%x to " | |
2401 | + "%x:%x) not supported.\n", be->major, be->minor, | |
2402 | + major, minor); | |
2403 | + return; | |
2404 | + } | |
2405 | + | |
2406 | + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); | |
2407 | + if (IS_ERR(be->mode)) { | |
2408 | + err = PTR_ERR(be->mode); | |
2409 | + be->mode = NULL; | |
2410 | + xenbus_dev_fatal(dev, err, "reading mode"); | |
2411 | + return; | |
2412 | + } | |
2413 | + | |
2414 | + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); | |
2415 | + if (!IS_ERR(device_type)) { | |
2416 | + cdrom = strcmp(device_type, "cdrom") == 0; | |
2417 | + kfree(device_type); | |
2418 | + } | |
2419 | + | |
2420 | + if (be->major == 0 && be->minor == 0) { | |
2421 | + /* Front end dir is a number, which is used as the handle. */ | |
2422 | + | |
2423 | + char *p = strrchr(dev->otherend, '/') + 1; | |
2424 | + long handle = simple_strtoul(p, NULL, 0); | |
2425 | + | |
2426 | + be->major = major; | |
2427 | + be->minor = minor; | |
2428 | + | |
2429 | + err = vbd_create(be->blkif, handle, major, minor, | |
2430 | + (NULL == strchr(be->mode, 'w')), cdrom); | |
2431 | + if (err) { | |
2432 | + be->major = be->minor = 0; | |
2433 | + xenbus_dev_fatal(dev, err, "creating vbd structure"); | |
2434 | + return; | |
2435 | + } | |
2436 | + | |
2437 | + err = xenvbd_sysfs_addif(dev); | |
2438 | + if (err) { | |
2439 | + vbd_free(&be->blkif->vbd); | |
2440 | + be->major = be->minor = 0; | |
2441 | + xenbus_dev_fatal(dev, err, "creating sysfs entries"); | |
2442 | + return; | |
2443 | + } | |
2444 | + | |
2445 | + /* We're potentially connected now */ | |
2446 | + update_blkif_status(be->blkif); | |
2447 | + } | |
2448 | +} | |
2449 | + | |
2450 | + | |
2451 | +/** | |
2452 | + * Callback received when the frontend's state changes. | |
2453 | + */ | |
2454 | +static void frontend_changed(struct xenbus_device *dev, | |
2455 | + enum xenbus_state frontend_state) | |
2456 | +{ | |
2457 | + struct backend_info *be = dev->dev.driver_data; | |
2458 | + int err; | |
2459 | + | |
2460 | + DPRINTK("%s", xenbus_strstate(frontend_state)); | |
2461 | + | |
2462 | + switch (frontend_state) { | |
2463 | + case XenbusStateInitialising: | |
2464 | + if (dev->state == XenbusStateClosed) { | |
2465 | + printk(KERN_INFO "%s: %s: prepare for reconnect\n", | |
2466 | + __FUNCTION__, dev->nodename); | |
2467 | + xenbus_switch_state(dev, XenbusStateInitWait); | |
2468 | + } | |
2469 | + break; | |
2470 | + | |
2471 | + case XenbusStateInitialised: | |
2472 | + case XenbusStateConnected: | |
2473 | + /* Ensure we connect even when two watches fire in | |
2474 | + close successsion and we miss the intermediate value | |
2475 | + of frontend_state. */ | |
2476 | + if (dev->state == XenbusStateConnected) | |
2477 | + break; | |
2478 | + | |
2479 | + err = connect_ring(be); | |
2480 | + if (err) | |
2481 | + break; | |
2482 | + update_blkif_status(be->blkif); | |
2483 | + break; | |
2484 | + | |
2485 | + case XenbusStateClosing: | |
2486 | + blkif_disconnect(be->blkif); | |
2487 | + xenbus_switch_state(dev, XenbusStateClosing); | |
2488 | + break; | |
2489 | + | |
2490 | + case XenbusStateClosed: | |
2491 | + xenbus_switch_state(dev, XenbusStateClosed); | |
2492 | + if (xenbus_dev_is_online(dev)) | |
2493 | + break; | |
2494 | + /* fall through if not online */ | |
2495 | + case XenbusStateUnknown: | |
2496 | + device_unregister(&dev->dev); | |
2497 | + break; | |
2498 | + | |
2499 | + default: | |
2500 | + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | |
2501 | + frontend_state); | |
2502 | + break; | |
2503 | + } | |
2504 | +} | |
2505 | + | |
2506 | + | |
2507 | +/* ** Connection ** */ | |
2508 | + | |
2509 | + | |
2510 | +/** | |
2511 | + * Write the physical details regarding the block device to the store, and | |
2512 | + * switch to Connected state. | |
2513 | + */ | |
2514 | +static void connect(struct backend_info *be) | |
2515 | +{ | |
2516 | + struct xenbus_transaction xbt; | |
2517 | + int err; | |
2518 | + struct xenbus_device *dev = be->dev; | |
2519 | + | |
2520 | + DPRINTK("%s", dev->otherend); | |
2521 | + | |
2522 | + /* Supply the information about the device the frontend needs */ | |
2523 | +again: | |
2524 | + err = xenbus_transaction_start(&xbt); | |
2525 | + if (err) { | |
2526 | + xenbus_dev_fatal(dev, err, "starting transaction"); | |
2527 | + return; | |
2528 | + } | |
2529 | + | |
2530 | + err = blkback_barrier(xbt, be, 1); | |
2531 | + if (err) | |
2532 | + goto abort; | |
2533 | + | |
2534 | + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | |
2535 | + vbd_size(&be->blkif->vbd)); | |
2536 | + if (err) { | |
2537 | + xenbus_dev_fatal(dev, err, "writing %s/sectors", | |
2538 | + dev->nodename); | |
2539 | + goto abort; | |
2540 | + } | |
2541 | + | |
2542 | + /* FIXME: use a typename instead */ | |
2543 | + err = xenbus_printf(xbt, dev->nodename, "info", "%u", | |
2544 | + vbd_info(&be->blkif->vbd)); | |
2545 | + if (err) { | |
2546 | + xenbus_dev_fatal(dev, err, "writing %s/info", | |
2547 | + dev->nodename); | |
2548 | + goto abort; | |
2549 | + } | |
2550 | + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", | |
2551 | + vbd_secsize(&be->blkif->vbd)); | |
2552 | + if (err) { | |
2553 | + xenbus_dev_fatal(dev, err, "writing %s/sector-size", | |
2554 | + dev->nodename); | |
2555 | + goto abort; | |
2556 | + } | |
2557 | + | |
2558 | + err = xenbus_transaction_end(xbt, 0); | |
2559 | + if (err == -EAGAIN) | |
2560 | + goto again; | |
2561 | + if (err) | |
2562 | + xenbus_dev_fatal(dev, err, "ending transaction"); | |
2563 | + | |
2564 | + err = xenbus_switch_state(dev, XenbusStateConnected); | |
2565 | + if (err) | |
2566 | + xenbus_dev_fatal(dev, err, "switching to Connected state", | |
2567 | + dev->nodename); | |
2568 | + | |
2569 | + return; | |
2570 | + abort: | |
2571 | + xenbus_transaction_end(xbt, 1); | |
2572 | +} | |
2573 | + | |
2574 | + | |
2575 | +static int connect_ring(struct backend_info *be) | |
2576 | +{ | |
2577 | + struct xenbus_device *dev = be->dev; | |
2578 | + unsigned long ring_ref; | |
2579 | + unsigned int evtchn; | |
2580 | + char protocol[64] = ""; | |
2581 | + int err; | |
2582 | + | |
2583 | + DPRINTK("%s", dev->otherend); | |
2584 | + | |
2585 | + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, | |
2586 | + "event-channel", "%u", &evtchn, NULL); | |
2587 | + if (err) { | |
2588 | + xenbus_dev_fatal(dev, err, | |
2589 | + "reading %s/ring-ref and event-channel", | |
2590 | + dev->otherend); | |
2591 | + return err; | |
2592 | + } | |
2593 | + | |
2594 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | |
2595 | + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | |
2596 | + "%63s", protocol, NULL); | |
2597 | + if (err) | |
2598 | + strcpy(protocol, "unspecified, assuming native"); | |
2599 | + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | |
2600 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | |
2601 | + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | |
2602 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | |
2603 | + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | |
2604 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | |
2605 | + else { | |
2606 | + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | |
2607 | + return -1; | |
2608 | + } | |
2609 | + printk(KERN_INFO | |
2610 | + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", | |
2611 | + ring_ref, evtchn, be->blkif->blk_protocol, protocol); | |
2612 | + | |
2613 | + /* Map the shared frame, irq etc. */ | |
2614 | + err = blkif_map(be->blkif, ring_ref, evtchn); | |
2615 | + if (err) { | |
2616 | + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", | |
2617 | + ring_ref, evtchn); | |
2618 | + return err; | |
2619 | + } | |
2620 | + | |
2621 | + return 0; | |
2622 | +} | |
2623 | + | |
2624 | + | |
2625 | +/* ** Driver Registration ** */ | |
2626 | + | |
2627 | + | |
2628 | +static const struct xenbus_device_id blkback_ids[] = { | |
2629 | + { "vbd" }, | |
2630 | + { "" } | |
2631 | +}; | |
2632 | + | |
2633 | + | |
2634 | +static struct xenbus_driver blkback = { | |
2635 | + .name = "vbd", | |
2636 | + .owner = THIS_MODULE, | |
2637 | + .ids = blkback_ids, | |
2638 | + .probe = blkback_probe, | |
2639 | + .remove = blkback_remove, | |
2640 | + .otherend_changed = frontend_changed | |
2641 | +}; | |
2642 | + | |
2643 | + | |
2644 | +void blkif_xenbus_init(void) | |
2645 | +{ | |
2646 | + xenbus_register_backend(&blkback); | |
2647 | +} | |
2648 | Index: head-2008-11-25/drivers/xen/blkfront/Makefile | |
2649 | =================================================================== | |
2650 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
2651 | +++ head-2008-11-25/drivers/xen/blkfront/Makefile 2007-06-12 13:13:44.000000000 +0200 | |
2652 | @@ -0,0 +1,5 @@ | |
2653 | + | |
2654 | +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) := xenblk.o | |
2655 | + | |
2656 | +xenblk-objs := blkfront.o vbd.o | |
2657 | + | |
2658 | Index: head-2008-11-25/drivers/xen/blkfront/blkfront.c | |
2659 | =================================================================== | |
2660 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
2661 | +++ head-2008-11-25/drivers/xen/blkfront/blkfront.c 2008-08-07 12:44:36.000000000 +0200 | |
2662 | @@ -0,0 +1,936 @@ | |
2663 | +/****************************************************************************** | |
2664 | + * blkfront.c | |
2665 | + * | |
2666 | + * XenLinux virtual block-device driver. | |
2667 | + * | |
2668 | + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | |
2669 | + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge | |
2670 | + * Copyright (c) 2004, Christian Limpach | |
2671 | + * Copyright (c) 2004, Andrew Warfield | |
2672 | + * Copyright (c) 2005, Christopher Clark | |
2673 | + * Copyright (c) 2005, XenSource Ltd | |
2674 | + * | |
2675 | + * This program is free software; you can redistribute it and/or | |
2676 | + * modify it under the terms of the GNU General Public License version 2 | |
2677 | + * as published by the Free Software Foundation; or, when distributed | |
2678 | + * separately from the Linux kernel or incorporated into other | |
2679 | + * software packages, subject to the following license: | |
2680 | + * | |
2681 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
2682 | + * of this source file (the "Software"), to deal in the Software without | |
2683 | + * restriction, including without limitation the rights to use, copy, modify, | |
2684 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
2685 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
2686 | + * the following conditions: | |
2687 | + * | |
2688 | + * The above copyright notice and this permission notice shall be included in | |
2689 | + * all copies or substantial portions of the Software. | |
2690 | + * | |
2691 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
2692 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
2693 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
2694 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
2695 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
2696 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
2697 | + * IN THE SOFTWARE. | |
2698 | + */ | |
2699 | + | |
2700 | +#include <linux/version.h> | |
2701 | +#include "block.h" | |
2702 | +#include <linux/cdrom.h> | |
2703 | +#include <linux/sched.h> | |
2704 | +#include <linux/interrupt.h> | |
2705 | +#include <scsi/scsi.h> | |
2706 | +#include <xen/evtchn.h> | |
2707 | +#include <xen/xenbus.h> | |
2708 | +#include <xen/interface/grant_table.h> | |
2709 | +#include <xen/interface/io/protocols.h> | |
2710 | +#include <xen/gnttab.h> | |
2711 | +#include <asm/hypervisor.h> | |
2712 | +#include <asm/maddr.h> | |
2713 | + | |
2714 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
2715 | +#include <xen/platform-compat.h> | |
2716 | +#endif | |
2717 | + | |
2718 | +#define BLKIF_STATE_DISCONNECTED 0 | |
2719 | +#define BLKIF_STATE_CONNECTED 1 | |
2720 | +#define BLKIF_STATE_SUSPENDED 2 | |
2721 | + | |
2722 | +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ | |
2723 | + (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) | |
2724 | +#define GRANT_INVALID_REF 0 | |
2725 | + | |
2726 | +static void connect(struct blkfront_info *); | |
2727 | +static void blkfront_closing(struct xenbus_device *); | |
2728 | +static int blkfront_remove(struct xenbus_device *); | |
2729 | +static int talk_to_backend(struct xenbus_device *, struct blkfront_info *); | |
2730 | +static int setup_blkring(struct xenbus_device *, struct blkfront_info *); | |
2731 | + | |
2732 | +static void kick_pending_request_queues(struct blkfront_info *); | |
2733 | + | |
2734 | +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs); | |
2735 | +static void blkif_restart_queue(void *arg); | |
2736 | +static void blkif_recover(struct blkfront_info *); | |
2737 | +static void blkif_completion(struct blk_shadow *); | |
2738 | +static void blkif_free(struct blkfront_info *, int); | |
2739 | + | |
2740 | + | |
2741 | +/** | |
2742 | + * Entry point to this code when a new device is created. Allocate the basic | |
2743 | + * structures and the ring buffer for communication with the backend, and | |
2744 | + * inform the backend of the appropriate details for those. Switch to | |
2745 | + * Initialised state. | |
2746 | + */ | |
2747 | +static int blkfront_probe(struct xenbus_device *dev, | |
2748 | + const struct xenbus_device_id *id) | |
2749 | +{ | |
2750 | + int err, vdevice, i; | |
2751 | + struct blkfront_info *info; | |
2752 | + | |
2753 | + /* FIXME: Use dynamic device id if this is not set. */ | |
2754 | + err = xenbus_scanf(XBT_NIL, dev->nodename, | |
2755 | + "virtual-device", "%i", &vdevice); | |
2756 | + if (err != 1) { | |
2757 | + /* go looking in the extended area instead */ | |
2758 | + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", | |
2759 | + "%i", &vdevice); | |
2760 | + if (err != 1) { | |
2761 | + xenbus_dev_fatal(dev, err, "reading virtual-device"); | |
2762 | + return err; | |
2763 | + } | |
2764 | + } | |
2765 | + | |
2766 | + info = kzalloc(sizeof(*info), GFP_KERNEL); | |
2767 | + if (!info) { | |
2768 | + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | |
2769 | + return -ENOMEM; | |
2770 | + } | |
2771 | + | |
2772 | + info->xbdev = dev; | |
2773 | + info->vdevice = vdevice; | |
2774 | + info->connected = BLKIF_STATE_DISCONNECTED; | |
2775 | + INIT_WORK(&info->work, blkif_restart_queue, (void *)info); | |
2776 | + | |
2777 | + for (i = 0; i < BLK_RING_SIZE; i++) | |
2778 | + info->shadow[i].req.id = i+1; | |
2779 | + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | |
2780 | + | |
2781 | + /* Front end dir is a number, which is used as the id. */ | |
2782 | + info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); | |
2783 | + dev->dev.driver_data = info; | |
2784 | + | |
2785 | + err = talk_to_backend(dev, info); | |
2786 | + if (err) { | |
2787 | + kfree(info); | |
2788 | + dev->dev.driver_data = NULL; | |
2789 | + return err; | |
2790 | + } | |
2791 | + | |
2792 | + return 0; | |
2793 | +} | |
2794 | + | |
2795 | + | |
2796 | +/** | |
2797 | + * We are reconnecting to the backend, due to a suspend/resume, or a backend | |
2798 | + * driver restart. We tear down our blkif structure and recreate it, but | |
2799 | + * leave the device-layer structures intact so that this is transparent to the | |
2800 | + * rest of the kernel. | |
2801 | + */ | |
2802 | +static int blkfront_resume(struct xenbus_device *dev) | |
2803 | +{ | |
2804 | + struct blkfront_info *info = dev->dev.driver_data; | |
2805 | + int err; | |
2806 | + | |
2807 | + DPRINTK("blkfront_resume: %s\n", dev->nodename); | |
2808 | + | |
2809 | + blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); | |
2810 | + | |
2811 | + err = talk_to_backend(dev, info); | |
2812 | + if (info->connected == BLKIF_STATE_SUSPENDED && !err) | |
2813 | + blkif_recover(info); | |
2814 | + | |
2815 | + return err; | |
2816 | +} | |
2817 | + | |
2818 | + | |
2819 | +/* Common code used when first setting up, and when resuming. */ | |
2820 | +static int talk_to_backend(struct xenbus_device *dev, | |
2821 | + struct blkfront_info *info) | |
2822 | +{ | |
2823 | + const char *message = NULL; | |
2824 | + struct xenbus_transaction xbt; | |
2825 | + int err; | |
2826 | + | |
2827 | + /* Create shared ring, alloc event channel. */ | |
2828 | + err = setup_blkring(dev, info); | |
2829 | + if (err) | |
2830 | + goto out; | |
2831 | + | |
2832 | +again: | |
2833 | + err = xenbus_transaction_start(&xbt); | |
2834 | + if (err) { | |
2835 | + xenbus_dev_fatal(dev, err, "starting transaction"); | |
2836 | + goto destroy_blkring; | |
2837 | + } | |
2838 | + | |
2839 | + err = xenbus_printf(xbt, dev->nodename, | |
2840 | + "ring-ref","%u", info->ring_ref); | |
2841 | + if (err) { | |
2842 | + message = "writing ring-ref"; | |
2843 | + goto abort_transaction; | |
2844 | + } | |
2845 | + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | |
2846 | + irq_to_evtchn_port(info->irq)); | |
2847 | + if (err) { | |
2848 | + message = "writing event-channel"; | |
2849 | + goto abort_transaction; | |
2850 | + } | |
2851 | + err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", | |
2852 | + XEN_IO_PROTO_ABI_NATIVE); | |
2853 | + if (err) { | |
2854 | + message = "writing protocol"; | |
2855 | + goto abort_transaction; | |
2856 | + } | |
2857 | + | |
2858 | + err = xenbus_transaction_end(xbt, 0); | |
2859 | + if (err) { | |
2860 | + if (err == -EAGAIN) | |
2861 | + goto again; | |
2862 | + xenbus_dev_fatal(dev, err, "completing transaction"); | |
2863 | + goto destroy_blkring; | |
2864 | + } | |
2865 | + | |
2866 | + xenbus_switch_state(dev, XenbusStateInitialised); | |
2867 | + | |
2868 | + return 0; | |
2869 | + | |
2870 | + abort_transaction: | |
2871 | + xenbus_transaction_end(xbt, 1); | |
2872 | + if (message) | |
2873 | + xenbus_dev_fatal(dev, err, "%s", message); | |
2874 | + destroy_blkring: | |
2875 | + blkif_free(info, 0); | |
2876 | + out: | |
2877 | + return err; | |
2878 | +} | |
2879 | + | |
2880 | + | |
2881 | +static int setup_blkring(struct xenbus_device *dev, | |
2882 | + struct blkfront_info *info) | |
2883 | +{ | |
2884 | + blkif_sring_t *sring; | |
2885 | + int err; | |
2886 | + | |
2887 | + info->ring_ref = GRANT_INVALID_REF; | |
2888 | + | |
2889 | + sring = (blkif_sring_t *)__get_free_page(GFP_NOIO | __GFP_HIGH); | |
2890 | + if (!sring) { | |
2891 | + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); | |
2892 | + return -ENOMEM; | |
2893 | + } | |
2894 | + SHARED_RING_INIT(sring); | |
2895 | + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); | |
2896 | + | |
2897 | + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); | |
2898 | + if (err < 0) { | |
2899 | + free_page((unsigned long)sring); | |
2900 | + info->ring.sring = NULL; | |
2901 | + goto fail; | |
2902 | + } | |
2903 | + info->ring_ref = err; | |
2904 | + | |
2905 | + err = bind_listening_port_to_irqhandler( | |
2906 | + dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); | |
2907 | + if (err <= 0) { | |
2908 | + xenbus_dev_fatal(dev, err, | |
2909 | + "bind_listening_port_to_irqhandler"); | |
2910 | + goto fail; | |
2911 | + } | |
2912 | + info->irq = err; | |
2913 | + | |
2914 | + return 0; | |
2915 | +fail: | |
2916 | + blkif_free(info, 0); | |
2917 | + return err; | |
2918 | +} | |
2919 | + | |
2920 | + | |
2921 | +/** | |
2922 | + * Callback received when the backend's state changes. | |
2923 | + */ | |
2924 | +static void backend_changed(struct xenbus_device *dev, | |
2925 | + enum xenbus_state backend_state) | |
2926 | +{ | |
2927 | + struct blkfront_info *info = dev->dev.driver_data; | |
2928 | + struct block_device *bd; | |
2929 | + | |
2930 | + DPRINTK("blkfront:backend_changed.\n"); | |
2931 | + | |
2932 | + switch (backend_state) { | |
2933 | + case XenbusStateInitialising: | |
2934 | + case XenbusStateInitWait: | |
2935 | + case XenbusStateInitialised: | |
2936 | + case XenbusStateReconfiguring: | |
2937 | + case XenbusStateReconfigured: | |
2938 | + case XenbusStateUnknown: | |
2939 | + case XenbusStateClosed: | |
2940 | + break; | |
2941 | + | |
2942 | + case XenbusStateConnected: | |
2943 | + connect(info); | |
2944 | + break; | |
2945 | + | |
2946 | + case XenbusStateClosing: | |
2947 | + bd = bdget(info->dev); | |
2948 | + if (bd == NULL) | |
2949 | + xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); | |
2950 | + | |
2951 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) | |
2952 | + down(&bd->bd_sem); | |
2953 | +#else | |
2954 | + mutex_lock(&bd->bd_mutex); | |
2955 | +#endif | |
2956 | + if (info->users > 0) | |
2957 | + xenbus_dev_error(dev, -EBUSY, | |
2958 | + "Device in use; refusing to close"); | |
2959 | + else | |
2960 | + blkfront_closing(dev); | |
2961 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) | |
2962 | + up(&bd->bd_sem); | |
2963 | +#else | |
2964 | + mutex_unlock(&bd->bd_mutex); | |
2965 | +#endif | |
2966 | + bdput(bd); | |
2967 | + break; | |
2968 | + } | |
2969 | +} | |
2970 | + | |
2971 | + | |
2972 | +/* ** Connection ** */ | |
2973 | + | |
2974 | + | |
2975 | +/* | |
2976 | + * Invoked when the backend is finally 'ready' (and has told produced | |
2977 | + * the details about the physical device - #sectors, size, etc). | |
2978 | + */ | |
2979 | +static void connect(struct blkfront_info *info) | |
2980 | +{ | |
2981 | + unsigned long long sectors; | |
2982 | + unsigned long sector_size; | |
2983 | + unsigned int binfo; | |
2984 | + int err; | |
2985 | + | |
2986 | + if ((info->connected == BLKIF_STATE_CONNECTED) || | |
2987 | + (info->connected == BLKIF_STATE_SUSPENDED) ) | |
2988 | + return; | |
2989 | + | |
2990 | + DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend); | |
2991 | + | |
2992 | + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | |
2993 | + "sectors", "%Lu", §ors, | |
2994 | + "info", "%u", &binfo, | |
2995 | + "sector-size", "%lu", §or_size, | |
2996 | + NULL); | |
2997 | + if (err) { | |
2998 | + xenbus_dev_fatal(info->xbdev, err, | |
2999 | + "reading backend fields at %s", | |
3000 | + info->xbdev->otherend); | |
3001 | + return; | |
3002 | + } | |
3003 | + | |
3004 | + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | |
3005 | + "feature-barrier", "%lu", &info->feature_barrier, | |
3006 | + NULL); | |
3007 | + if (err) | |
3008 | + info->feature_barrier = 0; | |
3009 | + | |
3010 | + err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); | |
3011 | + if (err) { | |
3012 | + xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", | |
3013 | + info->xbdev->otherend); | |
3014 | + return; | |
3015 | + } | |
3016 | + | |
3017 | + err = xlvbd_sysfs_addif(info); | |
3018 | + if (err) { | |
3019 | + xenbus_dev_fatal(info->xbdev, err, "xlvbd_sysfs_addif at %s", | |
3020 | + info->xbdev->otherend); | |
3021 | + return; | |
3022 | + } | |
3023 | + | |
3024 | + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); | |
3025 | + | |
3026 | + /* Kick pending requests. */ | |
3027 | + spin_lock_irq(&blkif_io_lock); | |
3028 | + info->connected = BLKIF_STATE_CONNECTED; | |
3029 | + kick_pending_request_queues(info); | |
3030 | + spin_unlock_irq(&blkif_io_lock); | |
3031 | + | |
3032 | + add_disk(info->gd); | |
3033 | + | |
3034 | + info->is_ready = 1; | |
3035 | +} | |
3036 | + | |
3037 | +/** | |
3038 | + * Handle the change of state of the backend to Closing. We must delete our | |
3039 | + * device-layer structures now, to ensure that writes are flushed through to | |
3040 | + * the backend. Once is this done, we can switch to Closed in | |
3041 | + * acknowledgement. | |
3042 | + */ | |
3043 | +static void blkfront_closing(struct xenbus_device *dev) | |
3044 | +{ | |
3045 | + struct blkfront_info *info = dev->dev.driver_data; | |
3046 | + unsigned long flags; | |
3047 | + | |
3048 | + DPRINTK("blkfront_closing: %s removed\n", dev->nodename); | |
3049 | + | |
3050 | + if (info->rq == NULL) | |
3051 | + goto out; | |
3052 | + | |
3053 | + spin_lock_irqsave(&blkif_io_lock, flags); | |
3054 | + /* No more blkif_request(). */ | |
3055 | + blk_stop_queue(info->rq); | |
3056 | + /* No more gnttab callback work. */ | |
3057 | + gnttab_cancel_free_callback(&info->callback); | |
3058 | + spin_unlock_irqrestore(&blkif_io_lock, flags); | |
3059 | + | |
3060 | + /* Flush gnttab callback work. Must be done with no locks held. */ | |
3061 | + flush_scheduled_work(); | |
3062 | + | |
3063 | + xlvbd_sysfs_delif(info); | |
3064 | + | |
3065 | + xlvbd_del(info); | |
3066 | + | |
3067 | + out: | |
3068 | + xenbus_frontend_closed(dev); | |
3069 | +} | |
3070 | + | |
3071 | + | |
3072 | +static int blkfront_remove(struct xenbus_device *dev) | |
3073 | +{ | |
3074 | + struct blkfront_info *info = dev->dev.driver_data; | |
3075 | + | |
3076 | + DPRINTK("blkfront_remove: %s removed\n", dev->nodename); | |
3077 | + | |
3078 | + blkif_free(info, 0); | |
3079 | + | |
3080 | + kfree(info); | |
3081 | + | |
3082 | + return 0; | |
3083 | +} | |
3084 | + | |
3085 | + | |
3086 | +static inline int GET_ID_FROM_FREELIST( | |
3087 | + struct blkfront_info *info) | |
3088 | +{ | |
3089 | + unsigned long free = info->shadow_free; | |
3090 | + BUG_ON(free > BLK_RING_SIZE); | |
3091 | + info->shadow_free = info->shadow[free].req.id; | |
3092 | + info->shadow[free].req.id = 0x0fffffee; /* debug */ | |
3093 | + return free; | |
3094 | +} | |
3095 | + | |
3096 | +static inline void ADD_ID_TO_FREELIST( | |
3097 | + struct blkfront_info *info, unsigned long id) | |
3098 | +{ | |
3099 | + info->shadow[id].req.id = info->shadow_free; | |
3100 | + info->shadow[id].request = 0; | |
3101 | + info->shadow_free = id; | |
3102 | +} | |
3103 | + | |
3104 | +static inline void flush_requests(struct blkfront_info *info) | |
3105 | +{ | |
3106 | + int notify; | |
3107 | + | |
3108 | + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); | |
3109 | + | |
3110 | + if (notify) | |
3111 | + notify_remote_via_irq(info->irq); | |
3112 | +} | |
3113 | + | |
3114 | +static void kick_pending_request_queues(struct blkfront_info *info) | |
3115 | +{ | |
3116 | + if (!RING_FULL(&info->ring)) { | |
3117 | + /* Re-enable calldowns. */ | |
3118 | + blk_start_queue(info->rq); | |
3119 | + /* Kick things off immediately. */ | |
3120 | + do_blkif_request(info->rq); | |
3121 | + } | |
3122 | +} | |
3123 | + | |
3124 | +static void blkif_restart_queue(void *arg) | |
3125 | +{ | |
3126 | + struct blkfront_info *info = (struct blkfront_info *)arg; | |
3127 | + spin_lock_irq(&blkif_io_lock); | |
3128 | + if (info->connected == BLKIF_STATE_CONNECTED) | |
3129 | + kick_pending_request_queues(info); | |
3130 | + spin_unlock_irq(&blkif_io_lock); | |
3131 | +} | |
3132 | + | |
3133 | +static void blkif_restart_queue_callback(void *arg) | |
3134 | +{ | |
3135 | + struct blkfront_info *info = (struct blkfront_info *)arg; | |
3136 | + schedule_work(&info->work); | |
3137 | +} | |
3138 | + | |
3139 | +int blkif_open(struct inode *inode, struct file *filep) | |
3140 | +{ | |
3141 | + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; | |
3142 | + info->users++; | |
3143 | + return 0; | |
3144 | +} | |
3145 | + | |
3146 | + | |
3147 | +int blkif_release(struct inode *inode, struct file *filep) | |
3148 | +{ | |
3149 | + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; | |
3150 | + info->users--; | |
3151 | + if (info->users == 0) { | |
3152 | + /* Check whether we have been instructed to close. We will | |
3153 | + have ignored this request initially, as the device was | |
3154 | + still mounted. */ | |
3155 | + struct xenbus_device * dev = info->xbdev; | |
3156 | + enum xenbus_state state = xenbus_read_driver_state(dev->otherend); | |
3157 | + | |
3158 | + if (state == XenbusStateClosing && info->is_ready) | |
3159 | + blkfront_closing(dev); | |
3160 | + } | |
3161 | + return 0; | |
3162 | +} | |
3163 | + | |
3164 | + | |
3165 | +int blkif_ioctl(struct inode *inode, struct file *filep, | |
3166 | + unsigned command, unsigned long argument) | |
3167 | +{ | |
3168 | + int i; | |
3169 | + | |
3170 | + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", | |
3171 | + command, (long)argument, inode->i_rdev); | |
3172 | + | |
3173 | + switch (command) { | |
3174 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) | |
3175 | + case HDIO_GETGEO: { | |
3176 | + struct block_device *bd = inode->i_bdev; | |
3177 | + struct hd_geometry geo; | |
3178 | + int ret; | |
3179 | + | |
3180 | + if (!argument) | |
3181 | + return -EINVAL; | |
3182 | + | |
3183 | + geo.start = get_start_sect(bd); | |
3184 | + ret = blkif_getgeo(bd, &geo); | |
3185 | + if (ret) | |
3186 | + return ret; | |
3187 | + | |
3188 | + if (copy_to_user((struct hd_geometry __user *)argument, &geo, | |
3189 | + sizeof(geo))) | |
3190 | + return -EFAULT; | |
3191 | + | |
3192 | + return 0; | |
3193 | + } | |
3194 | +#endif | |
3195 | + case CDROMMULTISESSION: | |
3196 | + DPRINTK("FIXME: support multisession CDs later\n"); | |
3197 | + for (i = 0; i < sizeof(struct cdrom_multisession); i++) | |
3198 | + if (put_user(0, (char __user *)(argument + i))) | |
3199 | + return -EFAULT; | |
3200 | + return 0; | |
3201 | + | |
3202 | + case CDROM_GET_CAPABILITY: { | |
3203 | + struct blkfront_info *info = | |
3204 | + inode->i_bdev->bd_disk->private_data; | |
3205 | + struct gendisk *gd = info->gd; | |
3206 | + if (gd->flags & GENHD_FL_CD) | |
3207 | + return 0; | |
3208 | + return -EINVAL; | |
3209 | + } | |
3210 | + default: | |
3211 | + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", | |
3212 | + command);*/ | |
3213 | + return -EINVAL; /* same return as native Linux */ | |
3214 | + } | |
3215 | + | |
3216 | + return 0; | |
3217 | +} | |
3218 | + | |
3219 | + | |
3220 | +int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) | |
3221 | +{ | |
3222 | + /* We don't have real geometry info, but let's at least return | |
3223 | + values consistent with the size of the device */ | |
3224 | + sector_t nsect = get_capacity(bd->bd_disk); | |
3225 | + sector_t cylinders = nsect; | |
3226 | + | |
3227 | + hg->heads = 0xff; | |
3228 | + hg->sectors = 0x3f; | |
3229 | + sector_div(cylinders, hg->heads * hg->sectors); | |
3230 | + hg->cylinders = cylinders; | |
3231 | + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) | |
3232 | + hg->cylinders = 0xffff; | |
3233 | + return 0; | |
3234 | +} | |
3235 | + | |
3236 | + | |
3237 | +/* | |
3238 | + * blkif_queue_request | |
3239 | + * | |
3240 | + * request block io | |
3241 | + * | |
3242 | + * id: for guest use only. | |
3243 | + * operation: BLKIF_OP_{READ,WRITE,PROBE} | |
3244 | + * buffer: buffer to read/write into. this should be a | |
3245 | + * virtual address in the guest os. | |
3246 | + */ | |
3247 | +static int blkif_queue_request(struct request *req) | |
3248 | +{ | |
3249 | + struct blkfront_info *info = req->rq_disk->private_data; | |
3250 | + unsigned long buffer_mfn; | |
3251 | + blkif_request_t *ring_req; | |
3252 | + struct bio *bio; | |
3253 | + struct bio_vec *bvec; | |
3254 | + int idx; | |
3255 | + unsigned long id; | |
3256 | + unsigned int fsect, lsect; | |
3257 | + int ref; | |
3258 | + grant_ref_t gref_head; | |
3259 | + | |
3260 | + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) | |
3261 | + return 1; | |
3262 | + | |
3263 | + if (gnttab_alloc_grant_references( | |
3264 | + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { | |
3265 | + gnttab_request_free_callback( | |
3266 | + &info->callback, | |
3267 | + blkif_restart_queue_callback, | |
3268 | + info, | |
3269 | + BLKIF_MAX_SEGMENTS_PER_REQUEST); | |
3270 | + return 1; | |
3271 | + } | |
3272 | + | |
3273 | + /* Fill out a communications ring structure. */ | |
3274 | + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | |
3275 | + id = GET_ID_FROM_FREELIST(info); | |
3276 | + info->shadow[id].request = (unsigned long)req; | |
3277 | + | |
3278 | + ring_req->id = id; | |
3279 | + ring_req->sector_number = (blkif_sector_t)req->sector; | |
3280 | + ring_req->handle = info->handle; | |
3281 | + | |
3282 | + ring_req->operation = rq_data_dir(req) ? | |
3283 | + BLKIF_OP_WRITE : BLKIF_OP_READ; | |
3284 | + if (blk_barrier_rq(req)) | |
3285 | + ring_req->operation = BLKIF_OP_WRITE_BARRIER; | |
3286 | + | |
3287 | + ring_req->nr_segments = 0; | |
3288 | + rq_for_each_bio (bio, req) { | |
3289 | + bio_for_each_segment (bvec, bio, idx) { | |
3290 | + BUG_ON(ring_req->nr_segments | |
3291 | + == BLKIF_MAX_SEGMENTS_PER_REQUEST); | |
3292 | + buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT; | |
3293 | + fsect = bvec->bv_offset >> 9; | |
3294 | + lsect = fsect + (bvec->bv_len >> 9) - 1; | |
3295 | + /* install a grant reference. */ | |
3296 | + ref = gnttab_claim_grant_reference(&gref_head); | |
3297 | + BUG_ON(ref == -ENOSPC); | |
3298 | + | |
3299 | + gnttab_grant_foreign_access_ref( | |
3300 | + ref, | |
3301 | + info->xbdev->otherend_id, | |
3302 | + buffer_mfn, | |
3303 | + rq_data_dir(req) ? GTF_readonly : 0 ); | |
3304 | + | |
3305 | + info->shadow[id].frame[ring_req->nr_segments] = | |
3306 | + mfn_to_pfn(buffer_mfn); | |
3307 | + | |
3308 | + ring_req->seg[ring_req->nr_segments] = | |
3309 | + (struct blkif_request_segment) { | |
3310 | + .gref = ref, | |
3311 | + .first_sect = fsect, | |
3312 | + .last_sect = lsect }; | |
3313 | + | |
3314 | + ring_req->nr_segments++; | |
3315 | + } | |
3316 | + } | |
3317 | + | |
3318 | + info->ring.req_prod_pvt++; | |
3319 | + | |
3320 | + /* Keep a private copy so we can reissue requests when recovering. */ | |
3321 | + info->shadow[id].req = *ring_req; | |
3322 | + | |
3323 | + gnttab_free_grant_references(gref_head); | |
3324 | + | |
3325 | + return 0; | |
3326 | +} | |
3327 | + | |
3328 | +/* | |
3329 | + * do_blkif_request | |
3330 | + * read a block; request is in a request queue | |
3331 | + */ | |
3332 | +void do_blkif_request(request_queue_t *rq) | |
3333 | +{ | |
3334 | + struct blkfront_info *info = NULL; | |
3335 | + struct request *req; | |
3336 | + int queued; | |
3337 | + | |
3338 | + DPRINTK("Entered do_blkif_request\n"); | |
3339 | + | |
3340 | + queued = 0; | |
3341 | + | |
3342 | + while ((req = elv_next_request(rq)) != NULL) { | |
3343 | + info = req->rq_disk->private_data; | |
3344 | + if (!blk_fs_request(req)) { | |
3345 | + end_request(req, 0); | |
3346 | + continue; | |
3347 | + } | |
3348 | + | |
3349 | + if (RING_FULL(&info->ring)) | |
3350 | + goto wait; | |
3351 | + | |
3352 | + DPRINTK("do_blk_req %p: cmd %p, sec %llx, " | |
3353 | + "(%u/%li) buffer:%p [%s]\n", | |
3354 | + req, req->cmd, (long long)req->sector, | |
3355 | + req->current_nr_sectors, | |
3356 | + req->nr_sectors, req->buffer, | |
3357 | + rq_data_dir(req) ? "write" : "read"); | |
3358 | + | |
3359 | + | |
3360 | + blkdev_dequeue_request(req); | |
3361 | + if (blkif_queue_request(req)) { | |
3362 | + blk_requeue_request(rq, req); | |
3363 | + wait: | |
3364 | + /* Avoid pointless unplugs. */ | |
3365 | + blk_stop_queue(rq); | |
3366 | + break; | |
3367 | + } | |
3368 | + | |
3369 | + queued++; | |
3370 | + } | |
3371 | + | |
3372 | + if (queued != 0) | |
3373 | + flush_requests(info); | |
3374 | +} | |
3375 | + | |
3376 | + | |
3377 | +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) | |
3378 | +{ | |
3379 | + struct request *req; | |
3380 | + blkif_response_t *bret; | |
3381 | + RING_IDX i, rp; | |
3382 | + unsigned long flags; | |
3383 | + struct blkfront_info *info = (struct blkfront_info *)dev_id; | |
3384 | + int uptodate; | |
3385 | + | |
3386 | + spin_lock_irqsave(&blkif_io_lock, flags); | |
3387 | + | |
3388 | + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { | |
3389 | + spin_unlock_irqrestore(&blkif_io_lock, flags); | |
3390 | + return IRQ_HANDLED; | |
3391 | + } | |
3392 | + | |
3393 | + again: | |
3394 | + rp = info->ring.sring->rsp_prod; | |
3395 | + rmb(); /* Ensure we see queued responses up to 'rp'. */ | |
3396 | + | |
3397 | + for (i = info->ring.rsp_cons; i != rp; i++) { | |
3398 | + unsigned long id; | |
3399 | + int ret; | |
3400 | + | |
3401 | + bret = RING_GET_RESPONSE(&info->ring, i); | |
3402 | + id = bret->id; | |
3403 | + req = (struct request *)info->shadow[id].request; | |
3404 | + | |
3405 | + blkif_completion(&info->shadow[id]); | |
3406 | + | |
3407 | + ADD_ID_TO_FREELIST(info, id); | |
3408 | + | |
3409 | + uptodate = (bret->status == BLKIF_RSP_OKAY); | |
3410 | + switch (bret->operation) { | |
3411 | + case BLKIF_OP_WRITE_BARRIER: | |
3412 | + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | |
3413 | + printk("blkfront: %s: write barrier op failed\n", | |
3414 | + info->gd->disk_name); | |
3415 | + uptodate = -EOPNOTSUPP; | |
3416 | + info->feature_barrier = 0; | |
3417 | + xlvbd_barrier(info); | |
3418 | + } | |
3419 | + /* fall through */ | |
3420 | + case BLKIF_OP_READ: | |
3421 | + case BLKIF_OP_WRITE: | |
3422 | + if (unlikely(bret->status != BLKIF_RSP_OKAY)) | |
3423 | + DPRINTK("Bad return from blkdev data " | |
3424 | + "request: %x\n", bret->status); | |
3425 | + | |
3426 | + ret = end_that_request_first(req, uptodate, | |
3427 | + req->hard_nr_sectors); | |
3428 | + BUG_ON(ret); | |
3429 | + end_that_request_last(req, uptodate); | |
3430 | + break; | |
3431 | + default: | |
3432 | + BUG(); | |
3433 | + } | |
3434 | + } | |
3435 | + | |
3436 | + info->ring.rsp_cons = i; | |
3437 | + | |
3438 | + if (i != info->ring.req_prod_pvt) { | |
3439 | + int more_to_do; | |
3440 | + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); | |
3441 | + if (more_to_do) | |
3442 | + goto again; | |
3443 | + } else | |
3444 | + info->ring.sring->rsp_event = i + 1; | |
3445 | + | |
3446 | + kick_pending_request_queues(info); | |
3447 | + | |
3448 | + spin_unlock_irqrestore(&blkif_io_lock, flags); | |
3449 | + | |
3450 | + return IRQ_HANDLED; | |
3451 | +} | |
3452 | + | |
3453 | +static void blkif_free(struct blkfront_info *info, int suspend) | |
3454 | +{ | |
3455 | + /* Prevent new requests being issued until we fix things up. */ | |
3456 | + spin_lock_irq(&blkif_io_lock); | |
3457 | + info->connected = suspend ? | |
3458 | + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; | |
3459 | + /* No more blkif_request(). */ | |
3460 | + if (info->rq) | |
3461 | + blk_stop_queue(info->rq); | |
3462 | + /* No more gnttab callback work. */ | |
3463 | + gnttab_cancel_free_callback(&info->callback); | |
3464 | + spin_unlock_irq(&blkif_io_lock); | |
3465 | + | |
3466 | + /* Flush gnttab callback work. Must be done with no locks held. */ | |
3467 | + flush_scheduled_work(); | |
3468 | + | |
3469 | + /* Free resources associated with old device channel. */ | |
3470 | + if (info->ring_ref != GRANT_INVALID_REF) { | |
3471 | + gnttab_end_foreign_access(info->ring_ref, | |
3472 | + (unsigned long)info->ring.sring); | |
3473 | + info->ring_ref = GRANT_INVALID_REF; | |
3474 | + info->ring.sring = NULL; | |
3475 | + } | |
3476 | + if (info->irq) | |
3477 | + unbind_from_irqhandler(info->irq, info); | |
3478 | + info->irq = 0; | |
3479 | +} | |
3480 | + | |
3481 | +static void blkif_completion(struct blk_shadow *s) | |
3482 | +{ | |
3483 | + int i; | |
3484 | + for (i = 0; i < s->req.nr_segments; i++) | |
3485 | + gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); | |
3486 | +} | |
3487 | + | |
3488 | +static void blkif_recover(struct blkfront_info *info) | |
3489 | +{ | |
3490 | + int i; | |
3491 | + blkif_request_t *req; | |
3492 | + struct blk_shadow *copy; | |
3493 | + int j; | |
3494 | + | |
3495 | + /* Stage 1: Make a safe copy of the shadow state. */ | |
3496 | + copy = kmalloc(sizeof(info->shadow), GFP_NOIO | __GFP_NOFAIL | __GFP_HIGH); | |
3497 | + memcpy(copy, info->shadow, sizeof(info->shadow)); | |
3498 | + | |
3499 | + /* Stage 2: Set up free list. */ | |
3500 | + memset(&info->shadow, 0, sizeof(info->shadow)); | |
3501 | + for (i = 0; i < BLK_RING_SIZE; i++) | |
3502 | + info->shadow[i].req.id = i+1; | |
3503 | + info->shadow_free = info->ring.req_prod_pvt; | |
3504 | + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | |
3505 | + | |
3506 | + /* Stage 3: Find pending requests and requeue them. */ | |
3507 | + for (i = 0; i < BLK_RING_SIZE; i++) { | |
3508 | + /* Not in use? */ | |
3509 | + if (copy[i].request == 0) | |
3510 | + continue; | |
3511 | + | |
3512 | + /* Grab a request slot and copy shadow state into it. */ | |
3513 | + req = RING_GET_REQUEST( | |
3514 | + &info->ring, info->ring.req_prod_pvt); | |
3515 | + *req = copy[i].req; | |
3516 | + | |
3517 | + /* We get a new request id, and must reset the shadow state. */ | |
3518 | + req->id = GET_ID_FROM_FREELIST(info); | |
3519 | + memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); | |
3520 | + | |
3521 | + /* Rewrite any grant references invalidated by susp/resume. */ | |
3522 | + for (j = 0; j < req->nr_segments; j++) | |
3523 | + gnttab_grant_foreign_access_ref( | |
3524 | + req->seg[j].gref, | |
3525 | + info->xbdev->otherend_id, | |
3526 | + pfn_to_mfn(info->shadow[req->id].frame[j]), | |
3527 | + rq_data_dir((struct request *) | |
3528 | + info->shadow[req->id].request) ? | |
3529 | + GTF_readonly : 0); | |
3530 | + info->shadow[req->id].req = *req; | |
3531 | + | |
3532 | + info->ring.req_prod_pvt++; | |
3533 | + } | |
3534 | + | |
3535 | + kfree(copy); | |
3536 | + | |
3537 | + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); | |
3538 | + | |
3539 | + spin_lock_irq(&blkif_io_lock); | |
3540 | + | |
3541 | + /* Now safe for us to use the shared ring */ | |
3542 | + info->connected = BLKIF_STATE_CONNECTED; | |
3543 | + | |
3544 | + /* Send off requeued requests */ | |
3545 | + flush_requests(info); | |
3546 | + | |
3547 | + /* Kick any other new requests queued since we resumed */ | |
3548 | + kick_pending_request_queues(info); | |
3549 | + | |
3550 | + spin_unlock_irq(&blkif_io_lock); | |
3551 | +} | |
3552 | + | |
3553 | +int blkfront_is_ready(struct xenbus_device *dev) | |
3554 | +{ | |
3555 | + struct blkfront_info *info = dev->dev.driver_data; | |
3556 | + | |
3557 | + return info->is_ready; | |
3558 | +} | |
3559 | + | |
3560 | + | |
3561 | +/* ** Driver Registration ** */ | |
3562 | + | |
3563 | + | |
3564 | +static const struct xenbus_device_id blkfront_ids[] = { | |
3565 | + { "vbd" }, | |
3566 | + { "" } | |
3567 | +}; | |
3568 | +MODULE_ALIAS("xen:vbd"); | |
3569 | + | |
3570 | +static struct xenbus_driver blkfront = { | |
3571 | + .name = "vbd", | |
3572 | + .owner = THIS_MODULE, | |
3573 | + .ids = blkfront_ids, | |
3574 | + .probe = blkfront_probe, | |
3575 | + .remove = blkfront_remove, | |
3576 | + .resume = blkfront_resume, | |
3577 | + .otherend_changed = backend_changed, | |
3578 | + .is_ready = blkfront_is_ready, | |
3579 | +}; | |
3580 | + | |
3581 | + | |
3582 | +static int __init xlblk_init(void) | |
3583 | +{ | |
3584 | + if (!is_running_on_xen()) | |
3585 | + return -ENODEV; | |
3586 | + | |
3587 | + return xenbus_register_frontend(&blkfront); | |
3588 | +} | |
3589 | +module_init(xlblk_init); | |
3590 | + | |
3591 | + | |
3592 | +static void __exit xlblk_exit(void) | |
3593 | +{ | |
3594 | + return xenbus_unregister_driver(&blkfront); | |
3595 | +} | |
3596 | +module_exit(xlblk_exit); | |
3597 | + | |
3598 | +MODULE_LICENSE("Dual BSD/GPL"); | |
3599 | Index: head-2008-11-25/drivers/xen/blkfront/block.h | |
3600 | =================================================================== | |
3601 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
3602 | +++ head-2008-11-25/drivers/xen/blkfront/block.h 2008-08-07 12:44:36.000000000 +0200 | |
3603 | @@ -0,0 +1,158 @@ | |
3604 | +/****************************************************************************** | |
3605 | + * block.h | |
3606 | + * | |
3607 | + * Shared definitions between all levels of XenLinux Virtual block devices. | |
3608 | + * | |
3609 | + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | |
3610 | + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge | |
3611 | + * Copyright (c) 2004-2005, Christian Limpach | |
3612 | + * | |
3613 | + * This program is free software; you can redistribute it and/or | |
3614 | + * modify it under the terms of the GNU General Public License version 2 | |
3615 | + * as published by the Free Software Foundation; or, when distributed | |
3616 | + * separately from the Linux kernel or incorporated into other | |
3617 | + * software packages, subject to the following license: | |
3618 | + * | |
3619 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
3620 | + * of this source file (the "Software"), to deal in the Software without | |
3621 | + * restriction, including without limitation the rights to use, copy, modify, | |
3622 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
3623 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
3624 | + * the following conditions: | |
3625 | + * | |
3626 | + * The above copyright notice and this permission notice shall be included in | |
3627 | + * all copies or substantial portions of the Software. | |
3628 | + * | |
3629 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
3630 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
3631 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
3632 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
3633 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
3634 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
3635 | + * IN THE SOFTWARE. | |
3636 | + */ | |
3637 | + | |
3638 | +#ifndef __XEN_DRIVERS_BLOCK_H__ | |
3639 | +#define __XEN_DRIVERS_BLOCK_H__ | |
3640 | + | |
3641 | +#include <linux/version.h> | |
3642 | +#include <linux/module.h> | |
3643 | +#include <linux/kernel.h> | |
3644 | +#include <linux/sched.h> | |
3645 | +#include <linux/slab.h> | |
3646 | +#include <linux/string.h> | |
3647 | +#include <linux/errno.h> | |
3648 | +#include <linux/fs.h> | |
3649 | +#include <linux/hdreg.h> | |
3650 | +#include <linux/blkdev.h> | |
3651 | +#include <linux/major.h> | |
3652 | +#include <asm/hypervisor.h> | |
3653 | +#include <xen/xenbus.h> | |
3654 | +#include <xen/gnttab.h> | |
3655 | +#include <xen/interface/xen.h> | |
3656 | +#include <xen/interface/io/blkif.h> | |
3657 | +#include <xen/interface/io/ring.h> | |
3658 | +#include <asm/io.h> | |
3659 | +#include <asm/atomic.h> | |
3660 | +#include <asm/uaccess.h> | |
3661 | + | |
3662 | +#define DPRINTK(_f, _a...) pr_debug(_f, ## _a) | |
3663 | + | |
3664 | +#if 0 | |
3665 | +#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a) | |
3666 | +#else | |
3667 | +#define DPRINTK_IOCTL(_f, _a...) ((void)0) | |
3668 | +#endif | |
3669 | + | |
3670 | +struct xlbd_type_info | |
3671 | +{ | |
3672 | + int partn_shift; | |
3673 | + int disks_per_major; | |
3674 | + char *devname; | |
3675 | + char *diskname; | |
3676 | +}; | |
3677 | + | |
3678 | +struct xlbd_major_info | |
3679 | +{ | |
3680 | + int major; | |
3681 | + int index; | |
3682 | + int usage; | |
3683 | + struct xlbd_type_info *type; | |
3684 | +}; | |
3685 | + | |
3686 | +struct blk_shadow { | |
3687 | + blkif_request_t req; | |
3688 | + unsigned long request; | |
3689 | + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
3690 | +}; | |
3691 | + | |
3692 | +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) | |
3693 | + | |
3694 | +/* | |
3695 | + * We have one of these per vbd, whether ide, scsi or 'other'. They | |
3696 | + * hang in private_data off the gendisk structure. We may end up | |
3697 | + * putting all kinds of interesting stuff here :-) | |
3698 | + */ | |
3699 | +struct blkfront_info | |
3700 | +{ | |
3701 | + struct xenbus_device *xbdev; | |
3702 | + dev_t dev; | |
3703 | + struct gendisk *gd; | |
3704 | + int vdevice; | |
3705 | + blkif_vdev_t handle; | |
3706 | + int connected; | |
3707 | + int ring_ref; | |
3708 | + blkif_front_ring_t ring; | |
3709 | + unsigned int irq; | |
3710 | + struct xlbd_major_info *mi; | |
3711 | + request_queue_t *rq; | |
3712 | + struct work_struct work; | |
3713 | + struct gnttab_free_callback callback; | |
3714 | + struct blk_shadow shadow[BLK_RING_SIZE]; | |
3715 | + unsigned long shadow_free; | |
3716 | + int feature_barrier; | |
3717 | + int is_ready; | |
3718 | + | |
3719 | + /** | |
3720 | + * The number of people holding this device open. We won't allow a | |
3721 | + * hot-unplug unless this is 0. | |
3722 | + */ | |
3723 | + int users; | |
3724 | +}; | |
3725 | + | |
3726 | +extern spinlock_t blkif_io_lock; | |
3727 | + | |
3728 | +extern int blkif_open(struct inode *inode, struct file *filep); | |
3729 | +extern int blkif_release(struct inode *inode, struct file *filep); | |
3730 | +extern int blkif_ioctl(struct inode *inode, struct file *filep, | |
3731 | + unsigned command, unsigned long argument); | |
3732 | +extern int blkif_getgeo(struct block_device *, struct hd_geometry *); | |
3733 | +extern int blkif_check(dev_t dev); | |
3734 | +extern int blkif_revalidate(dev_t dev); | |
3735 | +extern void do_blkif_request (request_queue_t *rq); | |
3736 | + | |
3737 | +/* Virtual block-device subsystem. */ | |
3738 | +/* Note that xlvbd_add doesn't call add_disk for you: you're expected | |
3739 | + to call add_disk on info->gd once the disk is properly connected | |
3740 | + up. */ | |
3741 | +int xlvbd_add(blkif_sector_t capacity, int device, | |
3742 | + u16 vdisk_info, u16 sector_size, struct blkfront_info *info); | |
3743 | +void xlvbd_del(struct blkfront_info *info); | |
3744 | +int xlvbd_barrier(struct blkfront_info *info); | |
3745 | + | |
3746 | +#ifdef CONFIG_SYSFS | |
3747 | +int xlvbd_sysfs_addif(struct blkfront_info *info); | |
3748 | +void xlvbd_sysfs_delif(struct blkfront_info *info); | |
3749 | +#else | |
3750 | +static inline int xlvbd_sysfs_addif(struct blkfront_info *info) | |
3751 | +{ | |
3752 | + return 0; | |
3753 | +} | |
3754 | + | |
3755 | +static inline void xlvbd_sysfs_delif(struct blkfront_info *info) | |
3756 | +{ | |
3757 | + ; | |
3758 | +} | |
3759 | +#endif | |
3760 | + | |
3761 | +#endif /* __XEN_DRIVERS_BLOCK_H__ */ | |
3762 | Index: head-2008-11-25/drivers/xen/blkfront/vbd.c | |
3763 | =================================================================== | |
3764 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
3765 | +++ head-2008-11-25/drivers/xen/blkfront/vbd.c 2008-08-07 12:44:36.000000000 +0200 | |
3766 | @@ -0,0 +1,460 @@ | |
3767 | +/****************************************************************************** | |
3768 | + * vbd.c | |
3769 | + * | |
3770 | + * XenLinux virtual block-device driver (xvd). | |
3771 | + * | |
3772 | + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | |
3773 | + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge | |
3774 | + * Copyright (c) 2004-2005, Christian Limpach | |
3775 | + * | |
3776 | + * This program is free software; you can redistribute it and/or | |
3777 | + * modify it under the terms of the GNU General Public License version 2 | |
3778 | + * as published by the Free Software Foundation; or, when distributed | |
3779 | + * separately from the Linux kernel or incorporated into other | |
3780 | + * software packages, subject to the following license: | |
3781 | + * | |
3782 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
3783 | + * of this source file (the "Software"), to deal in the Software without | |
3784 | + * restriction, including without limitation the rights to use, copy, modify, | |
3785 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
3786 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
3787 | + * the following conditions: | |
3788 | + * | |
3789 | + * The above copyright notice and this permission notice shall be included in | |
3790 | + * all copies or substantial portions of the Software. | |
3791 | + * | |
3792 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
3793 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
3794 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
3795 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
3796 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
3797 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
3798 | + * IN THE SOFTWARE. | |
3799 | + */ | |
3800 | + | |
3801 | +#include "block.h" | |
3802 | +#include <linux/blkdev.h> | |
3803 | +#include <linux/list.h> | |
3804 | + | |
3805 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
3806 | +#include <xen/platform-compat.h> | |
3807 | +#endif | |
3808 | + | |
3809 | +#define BLKIF_MAJOR(dev) ((dev)>>8) | |
3810 | +#define BLKIF_MINOR(dev) ((dev) & 0xff) | |
3811 | + | |
3812 | +#define EXT_SHIFT 28 | |
3813 | +#define EXTENDED (1<<EXT_SHIFT) | |
3814 | +#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) | |
3815 | +#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) | |
3816 | + | |
3817 | +/* | |
3818 | + * For convenience we distinguish between ide, scsi and 'other' (i.e., | |
3819 | + * potentially combinations of the two) in the naming scheme and in a few other | |
3820 | + * places. | |
3821 | + */ | |
3822 | + | |
3823 | +#define NUM_IDE_MAJORS 10 | |
3824 | +#define NUM_SCSI_MAJORS 17 | |
3825 | +#define NUM_VBD_MAJORS 2 | |
3826 | + | |
3827 | +static struct xlbd_type_info xlbd_ide_type = { | |
3828 | + .partn_shift = 6, | |
3829 | + .disks_per_major = 2, | |
3830 | + .devname = "ide", | |
3831 | + .diskname = "hd", | |
3832 | +}; | |
3833 | + | |
3834 | +static struct xlbd_type_info xlbd_scsi_type = { | |
3835 | + .partn_shift = 4, | |
3836 | + .disks_per_major = 16, | |
3837 | + .devname = "sd", | |
3838 | + .diskname = "sd", | |
3839 | +}; | |
3840 | + | |
3841 | +static struct xlbd_type_info xlbd_vbd_type = { | |
3842 | + .partn_shift = 4, | |
3843 | + .disks_per_major = 16, | |
3844 | + .devname = "xvd", | |
3845 | + .diskname = "xvd", | |
3846 | +}; | |
3847 | + | |
3848 | +static struct xlbd_type_info xlbd_vbd_type_ext = { | |
3849 | + .partn_shift = 8, | |
3850 | + .disks_per_major = 256, | |
3851 | + .devname = "xvd", | |
3852 | + .diskname = "xvd", | |
3853 | +}; | |
3854 | + | |
3855 | +static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS + | |
3856 | + NUM_VBD_MAJORS]; | |
3857 | + | |
3858 | +#define XLBD_MAJOR_IDE_START 0 | |
3859 | +#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS) | |
3860 | +#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) | |
3861 | + | |
3862 | +#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1 | |
3863 | +#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1 | |
3864 | +#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1 | |
3865 | + | |
3866 | +static struct block_device_operations xlvbd_block_fops = | |
3867 | +{ | |
3868 | + .owner = THIS_MODULE, | |
3869 | + .open = blkif_open, | |
3870 | + .release = blkif_release, | |
3871 | + .ioctl = blkif_ioctl, | |
3872 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) | |
3873 | + .getgeo = blkif_getgeo | |
3874 | +#endif | |
3875 | +}; | |
3876 | + | |
3877 | +DEFINE_SPINLOCK(blkif_io_lock); | |
3878 | + | |
3879 | +static struct xlbd_major_info * | |
3880 | +xlbd_alloc_major_info(int major, int minor, int index) | |
3881 | +{ | |
3882 | + struct xlbd_major_info *ptr; | |
3883 | + int do_register; | |
3884 | + | |
3885 | + ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL); | |
3886 | + if (ptr == NULL) | |
3887 | + return NULL; | |
3888 | + | |
3889 | + ptr->major = major; | |
3890 | + do_register = 1; | |
3891 | + | |
3892 | + switch (index) { | |
3893 | + case XLBD_MAJOR_IDE_RANGE: | |
3894 | + ptr->type = &xlbd_ide_type; | |
3895 | + ptr->index = index - XLBD_MAJOR_IDE_START; | |
3896 | + break; | |
3897 | + case XLBD_MAJOR_SCSI_RANGE: | |
3898 | + ptr->type = &xlbd_scsi_type; | |
3899 | + ptr->index = index - XLBD_MAJOR_SCSI_START; | |
3900 | + break; | |
3901 | + case XLBD_MAJOR_VBD_RANGE: | |
3902 | + ptr->index = 0; | |
3903 | + if ((index - XLBD_MAJOR_VBD_START) == 0) | |
3904 | + ptr->type = &xlbd_vbd_type; | |
3905 | + else | |
3906 | + ptr->type = &xlbd_vbd_type_ext; | |
3907 | + | |
3908 | + /* | |
3909 | + * if someone already registered block major 202, | |
3910 | + * don't try to register it again | |
3911 | + */ | |
3912 | + if (major_info[XLBD_MAJOR_VBD_START] != NULL) | |
3913 | + do_register = 0; | |
3914 | + break; | |
3915 | + } | |
3916 | + | |
3917 | + if (do_register) { | |
3918 | + if (register_blkdev(ptr->major, ptr->type->devname)) { | |
3919 | + kfree(ptr); | |
3920 | + return NULL; | |
3921 | + } | |
3922 | + | |
3923 | + printk("xen-vbd: registered block device major %i\n", ptr->major); | |
3924 | + } | |
3925 | + | |
3926 | + major_info[index] = ptr; | |
3927 | + return ptr; | |
3928 | +} | |
3929 | + | |
3930 | +static struct xlbd_major_info * | |
3931 | +xlbd_get_major_info(int major, int minor, int vdevice) | |
3932 | +{ | |
3933 | + struct xlbd_major_info *mi; | |
3934 | + int index; | |
3935 | + | |
3936 | + switch (major) { | |
3937 | + case IDE0_MAJOR: index = 0; break; | |
3938 | + case IDE1_MAJOR: index = 1; break; | |
3939 | + case IDE2_MAJOR: index = 2; break; | |
3940 | + case IDE3_MAJOR: index = 3; break; | |
3941 | + case IDE4_MAJOR: index = 4; break; | |
3942 | + case IDE5_MAJOR: index = 5; break; | |
3943 | + case IDE6_MAJOR: index = 6; break; | |
3944 | + case IDE7_MAJOR: index = 7; break; | |
3945 | + case IDE8_MAJOR: index = 8; break; | |
3946 | + case IDE9_MAJOR: index = 9; break; | |
3947 | + case SCSI_DISK0_MAJOR: index = 10; break; | |
3948 | + case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: | |
3949 | + index = 11 + major - SCSI_DISK1_MAJOR; | |
3950 | + break; | |
3951 | + case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR: | |
3952 | + index = 18 + major - SCSI_DISK8_MAJOR; | |
3953 | + break; | |
3954 | + case SCSI_CDROM_MAJOR: index = 26; break; | |
3955 | + default: | |
3956 | + if (!VDEV_IS_EXTENDED(vdevice)) | |
3957 | + index = 27; | |
3958 | + else | |
3959 | + index = 28; | |
3960 | + break; | |
3961 | + } | |
3962 | + | |
3963 | + mi = ((major_info[index] != NULL) ? major_info[index] : | |
3964 | + xlbd_alloc_major_info(major, minor, index)); | |
3965 | + if (mi) | |
3966 | + mi->usage++; | |
3967 | + return mi; | |
3968 | +} | |
3969 | + | |
3970 | +static void | |
3971 | +xlbd_put_major_info(struct xlbd_major_info *mi) | |
3972 | +{ | |
3973 | + mi->usage--; | |
3974 | + /* XXX: release major if 0 */ | |
3975 | +} | |
3976 | + | |
3977 | +static int | |
3978 | +xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | |
3979 | +{ | |
3980 | + request_queue_t *rq; | |
3981 | + | |
3982 | + rq = blk_init_queue(do_blkif_request, &blkif_io_lock); | |
3983 | + if (rq == NULL) | |
3984 | + return -1; | |
3985 | + | |
3986 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) | |
3987 | + elevator_init(rq, "noop"); | |
3988 | +#else | |
3989 | + elevator_init(rq, &elevator_noop); | |
3990 | +#endif | |
3991 | + | |
3992 | + /* Hard sector size and max sectors impersonate the equiv. hardware. */ | |
3993 | + blk_queue_hardsect_size(rq, sector_size); | |
3994 | + blk_queue_max_sectors(rq, 512); | |
3995 | + | |
3996 | + /* Each segment in a request is up to an aligned page in size. */ | |
3997 | + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); | |
3998 | + blk_queue_max_segment_size(rq, PAGE_SIZE); | |
3999 | + | |
4000 | + /* Ensure a merged request will fit in a single I/O ring slot. */ | |
4001 | + blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); | |
4002 | + blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); | |
4003 | + | |
4004 | + /* Make sure buffer addresses are sector-aligned. */ | |
4005 | + blk_queue_dma_alignment(rq, 511); | |
4006 | + | |
4007 | + /* Make sure we don't use bounce buffers. */ | |
4008 | + blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); | |
4009 | + | |
4010 | + gd->queue = rq; | |
4011 | + | |
4012 | + return 0; | |
4013 | +} | |
4014 | + | |
4015 | +static int | |
4016 | +xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice, | |
4017 | + u16 vdisk_info, u16 sector_size, | |
4018 | + struct blkfront_info *info) | |
4019 | +{ | |
4020 | + struct gendisk *gd; | |
4021 | + struct xlbd_major_info *mi; | |
4022 | + int nr_minors = 1; | |
4023 | + int err = -ENODEV; | |
4024 | + unsigned int offset; | |
4025 | + | |
4026 | + BUG_ON(info->gd != NULL); | |
4027 | + BUG_ON(info->mi != NULL); | |
4028 | + BUG_ON(info->rq != NULL); | |
4029 | + | |
4030 | + mi = xlbd_get_major_info(major, minor, vdevice); | |
4031 | + if (mi == NULL) | |
4032 | + goto out; | |
4033 | + info->mi = mi; | |
4034 | + | |
4035 | + if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0) | |
4036 | + nr_minors = 1 << mi->type->partn_shift; | |
4037 | + | |
4038 | + gd = alloc_disk(nr_minors); | |
4039 | + if (gd == NULL) | |
4040 | + goto out; | |
4041 | + | |
4042 | + offset = mi->index * mi->type->disks_per_major + | |
4043 | + (minor >> mi->type->partn_shift); | |
4044 | + if (nr_minors > 1) { | |
4045 | + if (offset < 26) { | |
4046 | + sprintf(gd->disk_name, "%s%c", | |
4047 | + mi->type->diskname, 'a' + offset ); | |
4048 | + } | |
4049 | + else { | |
4050 | + sprintf(gd->disk_name, "%s%c%c", | |
4051 | + mi->type->diskname, | |
4052 | + 'a' + ((offset/26)-1), 'a' + (offset%26) ); | |
4053 | + } | |
4054 | + } | |
4055 | + else { | |
4056 | + if (offset < 26) { | |
4057 | + sprintf(gd->disk_name, "%s%c%d", | |
4058 | + mi->type->diskname, | |
4059 | + 'a' + offset, | |
4060 | + minor & ((1 << mi->type->partn_shift) - 1)); | |
4061 | + } | |
4062 | + else { | |
4063 | + sprintf(gd->disk_name, "%s%c%c%d", | |
4064 | + mi->type->diskname, | |
4065 | + 'a' + ((offset/26)-1), 'a' + (offset%26), | |
4066 | + minor & ((1 << mi->type->partn_shift) - 1)); | |
4067 | + } | |
4068 | + } | |
4069 | + | |
4070 | + gd->major = mi->major; | |
4071 | + gd->first_minor = minor; | |
4072 | + gd->fops = &xlvbd_block_fops; | |
4073 | + gd->private_data = info; | |
4074 | + gd->driverfs_dev = &(info->xbdev->dev); | |
4075 | + set_capacity(gd, capacity); | |
4076 | + | |
4077 | + if (xlvbd_init_blk_queue(gd, sector_size)) { | |
4078 | + del_gendisk(gd); | |
4079 | + goto out; | |
4080 | + } | |
4081 | + | |
4082 | + info->rq = gd->queue; | |
4083 | + info->gd = gd; | |
4084 | + | |
4085 | + if (info->feature_barrier) | |
4086 | + xlvbd_barrier(info); | |
4087 | + | |
4088 | + if (vdisk_info & VDISK_READONLY) | |
4089 | + set_disk_ro(gd, 1); | |
4090 | + | |
4091 | + if (vdisk_info & VDISK_REMOVABLE) | |
4092 | + gd->flags |= GENHD_FL_REMOVABLE; | |
4093 | + | |
4094 | + if (vdisk_info & VDISK_CDROM) | |
4095 | + gd->flags |= GENHD_FL_CD; | |
4096 | + | |
4097 | + return 0; | |
4098 | + | |
4099 | + out: | |
4100 | + if (mi) | |
4101 | + xlbd_put_major_info(mi); | |
4102 | + info->mi = NULL; | |
4103 | + return err; | |
4104 | +} | |
4105 | + | |
4106 | +int | |
4107 | +xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info, | |
4108 | + u16 sector_size, struct blkfront_info *info) | |
4109 | +{ | |
4110 | + struct block_device *bd; | |
4111 | + int err = 0; | |
4112 | + int major, minor; | |
4113 | + | |
4114 | + if ((vdevice>>EXT_SHIFT) > 1) { | |
4115 | + /* this is above the extended range; something is wrong */ | |
4116 | + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice); | |
4117 | + return -ENODEV; | |
4118 | + } | |
4119 | + | |
4120 | + if (!VDEV_IS_EXTENDED(vdevice)) { | |
4121 | + major = BLKIF_MAJOR(vdevice); | |
4122 | + minor = BLKIF_MINOR(vdevice); | |
4123 | + } | |
4124 | + else { | |
4125 | + major = 202; | |
4126 | + minor = BLKIF_MINOR_EXT(vdevice); | |
4127 | + } | |
4128 | + | |
4129 | + info->dev = MKDEV(major, minor); | |
4130 | + bd = bdget(info->dev); | |
4131 | + if (bd == NULL) | |
4132 | + return -ENODEV; | |
4133 | + | |
4134 | + err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info, | |
4135 | + sector_size, info); | |
4136 | + | |
4137 | + bdput(bd); | |
4138 | + return err; | |
4139 | +} | |
4140 | + | |
4141 | +void | |
4142 | +xlvbd_del(struct blkfront_info *info) | |
4143 | +{ | |
4144 | + if (info->mi == NULL) | |
4145 | + return; | |
4146 | + | |
4147 | + BUG_ON(info->gd == NULL); | |
4148 | + del_gendisk(info->gd); | |
4149 | + put_disk(info->gd); | |
4150 | + info->gd = NULL; | |
4151 | + | |
4152 | + xlbd_put_major_info(info->mi); | |
4153 | + info->mi = NULL; | |
4154 | + | |
4155 | + BUG_ON(info->rq == NULL); | |
4156 | + blk_cleanup_queue(info->rq); | |
4157 | + info->rq = NULL; | |
4158 | +} | |
4159 | + | |
4160 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) | |
4161 | +int | |
4162 | +xlvbd_barrier(struct blkfront_info *info) | |
4163 | +{ | |
4164 | + int err; | |
4165 | + | |
4166 | + err = blk_queue_ordered(info->rq, | |
4167 | + info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL); | |
4168 | + if (err) | |
4169 | + return err; | |
4170 | + printk(KERN_INFO "blkfront: %s: barriers %s\n", | |
4171 | + info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled"); | |
4172 | + return 0; | |
4173 | +} | |
4174 | +#else | |
4175 | +int | |
4176 | +xlvbd_barrier(struct blkfront_info *info) | |
4177 | +{ | |
4178 | + printk(KERN_INFO "blkfront: %s: barriers disabled\n", info->gd->disk_name); | |
4179 | + return -ENOSYS; | |
4180 | +} | |
4181 | +#endif | |
4182 | + | |
4183 | +#ifdef CONFIG_SYSFS | |
4184 | +static ssize_t show_media(struct device *dev, | |
4185 | + struct device_attribute *attr, char *buf) | |
4186 | +{ | |
4187 | + struct xenbus_device *xendev = to_xenbus_device(dev); | |
4188 | + struct blkfront_info *info = xendev->dev.driver_data; | |
4189 | + | |
4190 | + if (info->gd->flags & GENHD_FL_CD) | |
4191 | + return sprintf(buf, "cdrom\n"); | |
4192 | + return sprintf(buf, "disk\n"); | |
4193 | +} | |
4194 | + | |
4195 | +static struct device_attribute xlvbd_attrs[] = { | |
4196 | + __ATTR(media, S_IRUGO, show_media, NULL), | |
4197 | +}; | |
4198 | + | |
4199 | +int xlvbd_sysfs_addif(struct blkfront_info *info) | |
4200 | +{ | |
4201 | + int i; | |
4202 | + int error = 0; | |
4203 | + | |
4204 | + for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++) { | |
4205 | + error = device_create_file(info->gd->driverfs_dev, | |
4206 | + &xlvbd_attrs[i]); | |
4207 | + if (error) | |
4208 | + goto fail; | |
4209 | + } | |
4210 | + return 0; | |
4211 | + | |
4212 | +fail: | |
4213 | + while (--i >= 0) | |
4214 | + device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]); | |
4215 | + return error; | |
4216 | +} | |
4217 | + | |
4218 | +void xlvbd_sysfs_delif(struct blkfront_info *info) | |
4219 | +{ | |
4220 | + int i; | |
4221 | + | |
4222 | + for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++) | |
4223 | + device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]); | |
4224 | +} | |
4225 | + | |
4226 | +#endif /* CONFIG_SYSFS */ | |
4227 | Index: head-2008-11-25/drivers/xen/blktap/Makefile | |
4228 | =================================================================== | |
4229 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
4230 | +++ head-2008-11-25/drivers/xen/blktap/Makefile 2007-06-12 13:13:44.000000000 +0200 | |
4231 | @@ -0,0 +1,5 @@ | |
4232 | +LINUXINCLUDE += -I../xen/include/public/io | |
4233 | + | |
4234 | +obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o | |
4235 | + | |
4236 | +xenblktap-y := xenbus.o interface.o blktap.o | |
4237 | Index: head-2008-11-25/drivers/xen/blktap/blktap.c | |
4238 | =================================================================== | |
4239 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
4240 | +++ head-2008-11-25/drivers/xen/blktap/blktap.c 2008-11-10 11:44:21.000000000 +0100 | |
4241 | @@ -0,0 +1,1704 @@ | |
4242 | +/****************************************************************************** | |
4243 | + * drivers/xen/blktap/blktap.c | |
4244 | + * | |
4245 | + * Back-end driver for user level virtual block devices. This portion of the | |
4246 | + * driver exports a 'unified' block-device interface that can be accessed | |
4247 | + * by any operating system that implements a compatible front end. Requests | |
4248 | + * are remapped to a user-space memory region. | |
4249 | + * | |
4250 | + * Based on the blkback driver code. | |
4251 | + * | |
4252 | + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield | |
4253 | + * | |
4254 | + * Clean ups and fix ups: | |
4255 | + * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc. | |
4256 | + * | |
4257 | + * This program is free software; you can redistribute it and/or | |
4258 | + * modify it under the terms of the GNU General Public License version 2 | |
4259 | + * as published by the Free Software Foundation; or, when distributed | |
4260 | + * separately from the Linux kernel or incorporated into other | |
4261 | + * software packages, subject to the following license: | |
4262 | + * | |
4263 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
4264 | + * of this source file (the "Software"), to deal in the Software without | |
4265 | + * restriction, including without limitation the rights to use, copy, modify, | |
4266 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
4267 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
4268 | + * the following conditions: | |
4269 | + * | |
4270 | + * The above copyright notice and this permission notice shall be included in | |
4271 | + * all copies or substantial portions of the Software. | |
4272 | + * | |
4273 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
4274 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
4275 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
4276 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
4277 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
4278 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
4279 | + * IN THE SOFTWARE. | |
4280 | + */ | |
4281 | + | |
4282 | +#include <linux/spinlock.h> | |
4283 | +#include <linux/kthread.h> | |
4284 | +#include <linux/list.h> | |
4285 | +#include <asm/hypervisor.h> | |
4286 | +#include "common.h" | |
4287 | +#include <xen/balloon.h> | |
4288 | +#include <xen/driver_util.h> | |
4289 | +#include <linux/kernel.h> | |
4290 | +#include <linux/fs.h> | |
4291 | +#include <linux/mm.h> | |
4292 | +#include <linux/errno.h> | |
4293 | +#include <linux/major.h> | |
4294 | +#include <linux/gfp.h> | |
4295 | +#include <linux/poll.h> | |
4296 | +#include <linux/delay.h> | |
4297 | +#include <asm/tlbflush.h> | |
4298 | + | |
4299 | +#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */ | |
4300 | +#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */ | |
4301 | + | |
4302 | +/* | |
4303 | + * The maximum number of requests that can be outstanding at any time | |
4304 | + * is determined by | |
4305 | + * | |
4306 | + * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] | |
4307 | + * | |
4308 | + * where mmap_alloc < MAX_DYNAMIC_MEM. | |
4309 | + * | |
4310 | + * TODO: | |
4311 | + * mmap_alloc is initialised to 2 and should be adjustable on the fly via | |
4312 | + * sysfs. | |
4313 | + */ | |
4314 | +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) | |
4315 | +#define MAX_DYNAMIC_MEM BLK_RING_SIZE | |
4316 | +#define MAX_PENDING_REQS BLK_RING_SIZE | |
4317 | +#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) | |
4318 | +#define MMAP_VADDR(_start, _req,_seg) \ | |
4319 | + (_start + \ | |
4320 | + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ | |
4321 | + ((_seg) * PAGE_SIZE)) | |
4322 | +static int blkif_reqs = MAX_PENDING_REQS; | |
4323 | +static int mmap_pages = MMAP_PAGES; | |
4324 | + | |
4325 | +#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we | |
4326 | + * have a bunch of pages reserved for shared | |
4327 | + * memory rings. | |
4328 | + */ | |
4329 | + | |
4330 | +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ | |
4331 | +typedef struct domid_translate { | |
4332 | + unsigned short domid; | |
4333 | + unsigned short busid; | |
4334 | +} domid_translate_t ; | |
4335 | + | |
4336 | +typedef struct domid_translate_ext { | |
4337 | + unsigned short domid; | |
4338 | + u32 busid; | |
4339 | +} domid_translate_ext_t ; | |
4340 | + | |
4341 | +/*Data struct associated with each of the tapdisk devices*/ | |
4342 | +typedef struct tap_blkif { | |
4343 | + struct vm_area_struct *vma; /*Shared memory area */ | |
4344 | + unsigned long rings_vstart; /*Kernel memory mapping */ | |
4345 | + unsigned long user_vstart; /*User memory mapping */ | |
4346 | + unsigned long dev_inuse; /*One process opens device at a time. */ | |
4347 | + unsigned long dev_pending; /*In process of being opened */ | |
4348 | + unsigned long ring_ok; /*make this ring->state */ | |
4349 | + blkif_front_ring_t ufe_ring; /*Rings up to user space. */ | |
4350 | + wait_queue_head_t wait; /*for poll */ | |
4351 | + unsigned long mode; /*current switching mode */ | |
4352 | + int minor; /*Minor number for tapdisk device */ | |
4353 | + pid_t pid; /*tapdisk process id */ | |
4354 | + enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace | |
4355 | + shutdown */ | |
4356 | + unsigned long *idx_map; /*Record the user ring id to kern | |
4357 | + [req id, idx] tuple */ | |
4358 | + blkif_t *blkif; /*Associate blkif with tapdev */ | |
4359 | + struct domid_translate_ext trans; /*Translation from domid to bus. */ | |
4360 | +} tap_blkif_t; | |
4361 | + | |
4362 | +static struct tap_blkif *tapfds[MAX_TAP_DEV]; | |
4363 | +static int blktap_next_minor; | |
4364 | + | |
4365 | +module_param(blkif_reqs, int, 0); | |
4366 | +/* Run-time switchable: /sys/module/blktap/parameters/ */ | |
4367 | +static unsigned int log_stats = 0; | |
4368 | +static unsigned int debug_lvl = 0; | |
4369 | +module_param(log_stats, int, 0644); | |
4370 | +module_param(debug_lvl, int, 0644); | |
4371 | + | |
4372 | +/* | |
4373 | + * Each outstanding request that we've passed to the lower device layers has a | |
4374 | + * 'pending_req' allocated to it. Each buffer_head that completes decrements | |
4375 | + * the pendcnt towards zero. When it hits zero, the specified domain has a | |
4376 | + * response queued for it, with the saved 'id' passed back. | |
4377 | + */ | |
4378 | +typedef struct { | |
4379 | + blkif_t *blkif; | |
4380 | + u64 id; | |
4381 | + unsigned short mem_idx; | |
4382 | + int nr_pages; | |
4383 | + atomic_t pendcnt; | |
4384 | + unsigned short operation; | |
4385 | + int status; | |
4386 | + struct list_head free_list; | |
4387 | + int inuse; | |
4388 | +} pending_req_t; | |
4389 | + | |
4390 | +static pending_req_t *pending_reqs[MAX_PENDING_REQS]; | |
4391 | +static struct list_head pending_free; | |
4392 | +static DEFINE_SPINLOCK(pending_free_lock); | |
4393 | +static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq); | |
4394 | +static int alloc_pending_reqs; | |
4395 | + | |
4396 | +typedef unsigned int PEND_RING_IDX; | |
4397 | + | |
4398 | +static inline int MASK_PEND_IDX(int i) { | |
4399 | + return (i & (MAX_PENDING_REQS-1)); | |
4400 | +} | |
4401 | + | |
4402 | +static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) { | |
4403 | + return (req - pending_reqs[idx]); | |
4404 | +} | |
4405 | + | |
4406 | +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) | |
4407 | + | |
4408 | +#define BLKBACK_INVALID_HANDLE (~0) | |
4409 | + | |
4410 | +static struct page **foreign_pages[MAX_DYNAMIC_MEM]; | |
4411 | +static inline unsigned long idx_to_kaddr( | |
4412 | + unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx) | |
4413 | +{ | |
4414 | + unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx; | |
4415 | + unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]); | |
4416 | + return (unsigned long)pfn_to_kaddr(pfn); | |
4417 | +} | |
4418 | + | |
4419 | +static unsigned short mmap_alloc = 0; | |
4420 | +static unsigned short mmap_lock = 0; | |
4421 | +static unsigned short mmap_inuse = 0; | |
4422 | + | |
4423 | +/****************************************************************** | |
4424 | + * GRANT HANDLES | |
4425 | + */ | |
4426 | + | |
4427 | +/* When using grant tables to map a frame for device access then the | |
4428 | + * handle returned must be used to unmap the frame. This is needed to | |
4429 | + * drop the ref count on the frame. | |
4430 | + */ | |
4431 | +struct grant_handle_pair | |
4432 | +{ | |
4433 | + grant_handle_t kernel; | |
4434 | + grant_handle_t user; | |
4435 | +}; | |
4436 | +#define INVALID_GRANT_HANDLE 0xFFFF | |
4437 | + | |
4438 | +static struct grant_handle_pair | |
4439 | + pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES]; | |
4440 | +#define pending_handle(_id, _idx, _i) \ | |
4441 | + (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \ | |
4442 | + + (_i)]) | |
4443 | + | |
4444 | + | |
4445 | +static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/ | |
4446 | + | |
4447 | +#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */ | |
4448 | +#define BLKTAP_DEV_DIR "/dev/xen" | |
4449 | + | |
4450 | +static int blktap_major; | |
4451 | + | |
4452 | +/* blktap IOCTLs: */ | |
4453 | +#define BLKTAP_IOCTL_KICK_FE 1 | |
4454 | +#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */ | |
4455 | +#define BLKTAP_IOCTL_SETMODE 3 | |
4456 | +#define BLKTAP_IOCTL_SENDPID 4 | |
4457 | +#define BLKTAP_IOCTL_NEWINTF 5 | |
4458 | +#define BLKTAP_IOCTL_MINOR 6 | |
4459 | +#define BLKTAP_IOCTL_MAJOR 7 | |
4460 | +#define BLKTAP_QUERY_ALLOC_REQS 8 | |
4461 | +#define BLKTAP_IOCTL_FREEINTF 9 | |
4462 | +#define BLKTAP_IOCTL_NEWINTF_EXT 50 | |
4463 | +#define BLKTAP_IOCTL_PRINT_IDXS 100 | |
4464 | + | |
4465 | +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ | |
4466 | +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ | |
4467 | +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 | |
4468 | +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */ | |
4469 | + | |
4470 | +#define BLKTAP_MODE_INTERPOSE \ | |
4471 | + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) | |
4472 | + | |
4473 | + | |
4474 | +static inline int BLKTAP_MODE_VALID(unsigned long arg) | |
4475 | +{ | |
4476 | + return ((arg == BLKTAP_MODE_PASSTHROUGH ) || | |
4477 | + (arg == BLKTAP_MODE_INTERCEPT_FE) || | |
4478 | + (arg == BLKTAP_MODE_INTERPOSE )); | |
4479 | +} | |
4480 | + | |
4481 | +/* Requests passing through the tap to userspace are re-assigned an ID. | |
4482 | + * We must record a mapping between the BE [IDX,ID] tuple and the userspace | |
4483 | + * ring ID. | |
4484 | + */ | |
4485 | + | |
4486 | +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx) | |
4487 | +{ | |
4488 | + return ((fe_dom << 16) | MASK_PEND_IDX(idx)); | |
4489 | +} | |
4490 | + | |
4491 | +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) | |
4492 | +{ | |
4493 | + return (PEND_RING_IDX)(id & 0x0000ffff); | |
4494 | +} | |
4495 | + | |
4496 | +extern inline int ID_TO_MIDX(unsigned long id) | |
4497 | +{ | |
4498 | + return (int)(id >> 16); | |
4499 | +} | |
4500 | + | |
4501 | +#define INVALID_REQ 0xdead0000 | |
4502 | + | |
4503 | +/*TODO: Convert to a free list*/ | |
4504 | +static inline int GET_NEXT_REQ(unsigned long *idx_map) | |
4505 | +{ | |
4506 | + int i; | |
4507 | + for (i = 0; i < MAX_PENDING_REQS; i++) | |
4508 | + if (idx_map[i] == INVALID_REQ) | |
4509 | + return i; | |
4510 | + | |
4511 | + return INVALID_REQ; | |
4512 | +} | |
4513 | + | |
4514 | +static inline int OFFSET_TO_USR_IDX(int offset) | |
4515 | +{ | |
4516 | + return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST; | |
4517 | +} | |
4518 | + | |
4519 | +static inline int OFFSET_TO_SEG(int offset) | |
4520 | +{ | |
4521 | + return offset % BLKIF_MAX_SEGMENTS_PER_REQUEST; | |
4522 | +} | |
4523 | + | |
4524 | + | |
4525 | +#define BLKTAP_INVALID_HANDLE(_g) \ | |
4526 | + (((_g->kernel) == INVALID_GRANT_HANDLE) && \ | |
4527 | + ((_g->user) == INVALID_GRANT_HANDLE)) | |
4528 | + | |
4529 | +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ | |
4530 | + (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \ | |
4531 | + } while(0) | |
4532 | + | |
4533 | + | |
4534 | +/****************************************************************** | |
4535 | + * BLKTAP VM OPS | |
4536 | + */ | |
4537 | + | |
4538 | +static struct page *blktap_nopage(struct vm_area_struct *vma, | |
4539 | + unsigned long address, | |
4540 | + int *type) | |
4541 | +{ | |
4542 | + /* | |
4543 | + * if the page has not been mapped in by the driver then return | |
4544 | + * NOPAGE_SIGBUS to the domain. | |
4545 | + */ | |
4546 | + | |
4547 | + return NOPAGE_SIGBUS; | |
4548 | +} | |
4549 | + | |
4550 | +static pte_t blktap_clear_pte(struct vm_area_struct *vma, | |
4551 | + unsigned long uvaddr, | |
4552 | + pte_t *ptep, int is_fullmm) | |
4553 | +{ | |
4554 | + pte_t copy; | |
4555 | + tap_blkif_t *info; | |
4556 | + int offset, seg, usr_idx, pending_idx, mmap_idx; | |
4557 | + unsigned long uvstart = vma->vm_start + (RING_PAGES << PAGE_SHIFT); | |
4558 | + unsigned long kvaddr; | |
4559 | + struct page **map; | |
4560 | + struct page *pg; | |
4561 | + struct grant_handle_pair *khandle; | |
4562 | + struct gnttab_unmap_grant_ref unmap[2]; | |
4563 | + int count = 0; | |
4564 | + | |
4565 | + /* | |
4566 | + * If the address is before the start of the grant mapped region or | |
4567 | + * if vm_file is NULL (meaning mmap failed and we have nothing to do) | |
4568 | + */ | |
4569 | + if (uvaddr < uvstart || vma->vm_file == NULL) | |
4570 | + return ptep_get_and_clear_full(vma->vm_mm, uvaddr, | |
4571 | + ptep, is_fullmm); | |
4572 | + | |
4573 | + info = vma->vm_file->private_data; | |
4574 | + map = vma->vm_private_data; | |
4575 | + | |
4576 | + /* TODO Should these be changed to if statements? */ | |
4577 | + BUG_ON(!info); | |
4578 | + BUG_ON(!info->idx_map); | |
4579 | + BUG_ON(!map); | |
4580 | + | |
4581 | + offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT); | |
4582 | + usr_idx = OFFSET_TO_USR_IDX(offset); | |
4583 | + seg = OFFSET_TO_SEG(offset); | |
4584 | + | |
4585 | + pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); | |
4586 | + mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); | |
4587 | + | |
4588 | + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg); | |
4589 | + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); | |
4590 | + ClearPageReserved(pg); | |
4591 | + map[offset + RING_PAGES] = NULL; | |
4592 | + | |
4593 | + khandle = &pending_handle(mmap_idx, pending_idx, seg); | |
4594 | + | |
4595 | + if (khandle->kernel != INVALID_GRANT_HANDLE) { | |
4596 | + gnttab_set_unmap_op(&unmap[count], kvaddr, | |
4597 | + GNTMAP_host_map, khandle->kernel); | |
4598 | + count++; | |
4599 | + | |
4600 | + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, | |
4601 | + INVALID_P2M_ENTRY); | |
4602 | + } | |
4603 | + | |
4604 | + if (khandle->user != INVALID_GRANT_HANDLE) { | |
4605 | + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); | |
4606 | + | |
4607 | + copy = *ptep; | |
4608 | + gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep), | |
4609 | + GNTMAP_host_map | |
4610 | + | GNTMAP_application_map | |
4611 | + | GNTMAP_contains_pte, | |
4612 | + khandle->user); | |
4613 | + count++; | |
4614 | + } else { | |
4615 | + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap)); | |
4616 | + | |
4617 | + /* USING SHADOW PAGE TABLES. */ | |
4618 | + copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep, | |
4619 | + is_fullmm); | |
4620 | + } | |
4621 | + | |
4622 | + if (count) { | |
4623 | + BLKTAP_INVALIDATE_HANDLE(khandle); | |
4624 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, | |
4625 | + unmap, count)) | |
4626 | + BUG(); | |
4627 | + } | |
4628 | + | |
4629 | + return copy; | |
4630 | +} | |
4631 | + | |
4632 | +struct vm_operations_struct blktap_vm_ops = { | |
4633 | + nopage: blktap_nopage, | |
4634 | + zap_pte: blktap_clear_pte, | |
4635 | +}; | |
4636 | + | |
4637 | +/****************************************************************** | |
4638 | + * BLKTAP FILE OPS | |
4639 | + */ | |
4640 | + | |
4641 | +/*Function Declarations*/ | |
4642 | +static tap_blkif_t *get_next_free_dev(void); | |
4643 | +static int blktap_open(struct inode *inode, struct file *filp); | |
4644 | +static int blktap_release(struct inode *inode, struct file *filp); | |
4645 | +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma); | |
4646 | +static int blktap_ioctl(struct inode *inode, struct file *filp, | |
4647 | + unsigned int cmd, unsigned long arg); | |
4648 | +static unsigned int blktap_poll(struct file *file, poll_table *wait); | |
4649 | + | |
4650 | +static const struct file_operations blktap_fops = { | |
4651 | + .owner = THIS_MODULE, | |
4652 | + .poll = blktap_poll, | |
4653 | + .ioctl = blktap_ioctl, | |
4654 | + .open = blktap_open, | |
4655 | + .release = blktap_release, | |
4656 | + .mmap = blktap_mmap, | |
4657 | +}; | |
4658 | + | |
4659 | + | |
4660 | +static tap_blkif_t *get_next_free_dev(void) | |
4661 | +{ | |
4662 | + struct class *class; | |
4663 | + tap_blkif_t *info; | |
4664 | + int minor; | |
4665 | + | |
4666 | + /* | |
4667 | + * This is called only from the ioctl, which | |
4668 | + * means we should always have interrupts enabled. | |
4669 | + */ | |
4670 | + BUG_ON(irqs_disabled()); | |
4671 | + | |
4672 | + spin_lock_irq(&pending_free_lock); | |
4673 | + | |
4674 | + /* tapfds[0] is always NULL */ | |
4675 | + | |
4676 | + for (minor = 1; minor < blktap_next_minor; minor++) { | |
4677 | + info = tapfds[minor]; | |
4678 | + /* we could have failed a previous attempt. */ | |
4679 | + if (!info || | |
4680 | + ((info->dev_inuse == 0) && | |
4681 | + (info->dev_pending == 0)) ) { | |
4682 | + info->dev_pending = 1; | |
4683 | + goto found; | |
4684 | + } | |
4685 | + } | |
4686 | + info = NULL; | |
4687 | + minor = -1; | |
4688 | + | |
4689 | + /* | |
4690 | + * We didn't find free device. If we can still allocate | |
4691 | + * more, then we grab the next device minor that is | |
4692 | + * available. This is done while we are still under | |
4693 | + * the protection of the pending_free_lock. | |
4694 | + */ | |
4695 | + if (blktap_next_minor < MAX_TAP_DEV) | |
4696 | + minor = blktap_next_minor++; | |
4697 | +found: | |
4698 | + spin_unlock_irq(&pending_free_lock); | |
4699 | + | |
4700 | + if (!info && minor > 0) { | |
4701 | + info = kzalloc(sizeof(*info), GFP_KERNEL); | |
4702 | + if (unlikely(!info)) { | |
4703 | + /* | |
4704 | + * If we failed here, try to put back | |
4705 | + * the next minor number. But if one | |
4706 | + * was just taken, then we just lose this | |
4707 | + * minor. We can try to allocate this | |
4708 | + * minor again later. | |
4709 | + */ | |
4710 | + spin_lock_irq(&pending_free_lock); | |
4711 | + if (blktap_next_minor == minor+1) | |
4712 | + blktap_next_minor--; | |
4713 | + spin_unlock_irq(&pending_free_lock); | |
4714 | + goto out; | |
4715 | + } | |
4716 | + | |
4717 | + info->minor = minor; | |
4718 | + /* | |
4719 | + * Make sure that we have a minor before others can | |
4720 | + * see us. | |
4721 | + */ | |
4722 | + wmb(); | |
4723 | + tapfds[minor] = info; | |
4724 | + | |
4725 | + if ((class = get_xen_class()) != NULL) | |
4726 | + class_device_create(class, NULL, | |
4727 | + MKDEV(blktap_major, minor), NULL, | |
4728 | + "blktap%d", minor); | |
4729 | + } | |
4730 | + | |
4731 | +out: | |
4732 | + return info; | |
4733 | +} | |
4734 | + | |
4735 | +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) | |
4736 | +{ | |
4737 | + tap_blkif_t *info; | |
4738 | + int i; | |
4739 | + | |
4740 | + for (i = 1; i < blktap_next_minor; i++) { | |
4741 | + info = tapfds[i]; | |
4742 | + if ( info && | |
4743 | + (info->trans.domid == domid) && | |
4744 | + (info->trans.busid == xenbus_id) ) { | |
4745 | + info->blkif = blkif; | |
4746 | + info->status = RUNNING; | |
4747 | + return i; | |
4748 | + } | |
4749 | + } | |
4750 | + return -1; | |
4751 | +} | |
4752 | + | |
4753 | +void signal_tapdisk(int idx) | |
4754 | +{ | |
4755 | + tap_blkif_t *info; | |
4756 | + struct task_struct *ptask; | |
4757 | + | |
4758 | + /* | |
4759 | + * if the userland tools set things up wrong, this could be negative; | |
4760 | + * just don't try to signal in this case | |
4761 | + */ | |
4762 | + if (idx < 0) | |
4763 | + return; | |
4764 | + | |
4765 | + info = tapfds[idx]; | |
4766 | + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) | |
4767 | + return; | |
4768 | + | |
4769 | + if (info->pid > 0) { | |
4770 | + ptask = find_task_by_pid(info->pid); | |
4771 | + if (ptask) | |
4772 | + info->status = CLEANSHUTDOWN; | |
4773 | + } | |
4774 | + info->blkif = NULL; | |
4775 | + | |
4776 | + return; | |
4777 | +} | |
4778 | + | |
4779 | +static int blktap_open(struct inode *inode, struct file *filp) | |
4780 | +{ | |
4781 | + blkif_sring_t *sring; | |
4782 | + int idx = iminor(inode) - BLKTAP_MINOR; | |
4783 | + tap_blkif_t *info; | |
4784 | + int i; | |
4785 | + | |
4786 | + /* ctrl device, treat differently */ | |
4787 | + if (!idx) | |
4788 | + return 0; | |
4789 | + | |
4790 | + info = tapfds[idx]; | |
4791 | + | |
4792 | + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) { | |
4793 | + WPRINTK("Unable to open device /dev/xen/blktap%d\n", | |
4794 | + idx); | |
4795 | + return -ENODEV; | |
4796 | + } | |
4797 | + | |
4798 | + DPRINTK("Opening device /dev/xen/blktap%d\n",idx); | |
4799 | + | |
4800 | + /*Only one process can access device at a time*/ | |
4801 | + if (test_and_set_bit(0, &info->dev_inuse)) | |
4802 | + return -EBUSY; | |
4803 | + | |
4804 | + info->dev_pending = 0; | |
4805 | + | |
4806 | + /* Allocate the fe ring. */ | |
4807 | + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); | |
4808 | + if (sring == NULL) | |
4809 | + goto fail_nomem; | |
4810 | + | |
4811 | + SetPageReserved(virt_to_page(sring)); | |
4812 | + | |
4813 | + SHARED_RING_INIT(sring); | |
4814 | + FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE); | |
4815 | + | |
4816 | + filp->private_data = info; | |
4817 | + info->vma = NULL; | |
4818 | + | |
4819 | + info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, | |
4820 | + GFP_KERNEL); | |
4821 | + | |
4822 | + if (info->idx_map == NULL) | |
4823 | + goto fail_nomem; | |
4824 | + | |
4825 | + if (idx > 0) { | |
4826 | + init_waitqueue_head(&info->wait); | |
4827 | + for (i = 0; i < MAX_PENDING_REQS; i++) | |
4828 | + info->idx_map[i] = INVALID_REQ; | |
4829 | + } | |
4830 | + | |
4831 | + DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx); | |
4832 | + return 0; | |
4833 | + | |
4834 | + fail_nomem: | |
4835 | + return -ENOMEM; | |
4836 | +} | |
4837 | + | |
4838 | +static int blktap_release(struct inode *inode, struct file *filp) | |
4839 | +{ | |
4840 | + tap_blkif_t *info = filp->private_data; | |
4841 | + | |
4842 | + /* check for control device */ | |
4843 | + if (!info) | |
4844 | + return 0; | |
4845 | + | |
4846 | + info->dev_inuse = 0; | |
4847 | + DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor); | |
4848 | + | |
4849 | + /* Free the ring page. */ | |
4850 | + ClearPageReserved(virt_to_page(info->ufe_ring.sring)); | |
4851 | + free_page((unsigned long) info->ufe_ring.sring); | |
4852 | + | |
4853 | + /* Clear any active mappings and free foreign map table */ | |
4854 | + if (info->vma) { | |
4855 | + struct mm_struct *mm = info->vma->vm_mm; | |
4856 | + | |
4857 | + down_write(&mm->mmap_sem); | |
4858 | + zap_page_range( | |
4859 | + info->vma, info->vma->vm_start, | |
4860 | + info->vma->vm_end - info->vma->vm_start, NULL); | |
4861 | + up_write(&mm->mmap_sem); | |
4862 | + | |
4863 | + kfree(info->vma->vm_private_data); | |
4864 | + | |
4865 | + info->vma = NULL; | |
4866 | + } | |
4867 | + | |
4868 | + if (info->idx_map) { | |
4869 | + kfree(info->idx_map); | |
4870 | + info->idx_map = NULL; | |
4871 | + } | |
4872 | + | |
4873 | + if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) { | |
4874 | + if (info->blkif->xenblkd != NULL) { | |
4875 | + kthread_stop(info->blkif->xenblkd); | |
4876 | + info->blkif->xenblkd = NULL; | |
4877 | + } | |
4878 | + info->status = CLEANSHUTDOWN; | |
4879 | + } | |
4880 | + | |
4881 | + return 0; | |
4882 | +} | |
4883 | + | |
4884 | + | |
4885 | +/* Note on mmap: | |
4886 | + * We need to map pages to user space in a way that will allow the block | |
4887 | + * subsystem set up direct IO to them. This couldn't be done before, because | |
4888 | + * there isn't really a sane way to translate a user virtual address down to a | |
4889 | + * physical address when the page belongs to another domain. | |
4890 | + * | |
4891 | + * My first approach was to map the page in to kernel memory, add an entry | |
4892 | + * for it in the physical frame list (using alloc_lomem_region as in blkback) | |
4893 | + * and then attempt to map that page up to user space. This is disallowed | |
4894 | + * by xen though, which realizes that we don't really own the machine frame | |
4895 | + * underlying the physical page. | |
4896 | + * | |
4897 | + * The new approach is to provide explicit support for this in xen linux. | |
4898 | + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages | |
4899 | + * mapped from other vms. vma->vm_private_data is set up as a mapping | |
4900 | + * from pages to actual page structs. There is a new clause in get_user_pages | |
4901 | + * that does the right thing for this sort of mapping. | |
4902 | + */ | |
4903 | +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) | |
4904 | +{ | |
4905 | + int size; | |
4906 | + struct page **map; | |
4907 | + int i; | |
4908 | + tap_blkif_t *info = filp->private_data; | |
4909 | + int ret; | |
4910 | + | |
4911 | + if (info == NULL) { | |
4912 | + WPRINTK("blktap: mmap, retrieving idx failed\n"); | |
4913 | + return -ENOMEM; | |
4914 | + } | |
4915 | + | |
4916 | + vma->vm_flags |= VM_RESERVED; | |
4917 | + vma->vm_ops = &blktap_vm_ops; | |
4918 | + | |
4919 | + size = vma->vm_end - vma->vm_start; | |
4920 | + if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) { | |
4921 | + WPRINTK("you _must_ map exactly %d pages!\n", | |
4922 | + mmap_pages + RING_PAGES); | |
4923 | + return -EAGAIN; | |
4924 | + } | |
4925 | + | |
4926 | + size >>= PAGE_SHIFT; | |
4927 | + info->rings_vstart = vma->vm_start; | |
4928 | + info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); | |
4929 | + | |
4930 | + /* Map the ring pages to the start of the region and reserve it. */ | |
4931 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
4932 | + ret = vm_insert_page(vma, vma->vm_start, | |
4933 | + virt_to_page(info->ufe_ring.sring)); | |
4934 | + else | |
4935 | + ret = remap_pfn_range(vma, vma->vm_start, | |
4936 | + __pa(info->ufe_ring.sring) >> PAGE_SHIFT, | |
4937 | + PAGE_SIZE, vma->vm_page_prot); | |
4938 | + if (ret) { | |
4939 | + WPRINTK("Mapping user ring failed!\n"); | |
4940 | + goto fail; | |
4941 | + } | |
4942 | + | |
4943 | + /* Mark this VM as containing foreign pages, and set up mappings. */ | |
4944 | + map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) | |
4945 | + * sizeof(struct page *), | |
4946 | + GFP_KERNEL); | |
4947 | + if (map == NULL) { | |
4948 | + WPRINTK("Couldn't alloc VM_FOREIGN map.\n"); | |
4949 | + goto fail; | |
4950 | + } | |
4951 | + | |
4952 | + for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) | |
4953 | + map[i] = NULL; | |
4954 | + | |
4955 | + vma->vm_private_data = map; | |
4956 | + vma->vm_flags |= VM_FOREIGN; | |
4957 | + vma->vm_flags |= VM_DONTCOPY; | |
4958 | + | |
4959 | +#ifdef CONFIG_X86 | |
4960 | + vma->vm_mm->context.has_foreign_mappings = 1; | |
4961 | +#endif | |
4962 | + | |
4963 | + info->vma = vma; | |
4964 | + info->ring_ok = 1; | |
4965 | + return 0; | |
4966 | + fail: | |
4967 | + /* Clear any active mappings. */ | |
4968 | + zap_page_range(vma, vma->vm_start, | |
4969 | + vma->vm_end - vma->vm_start, NULL); | |
4970 | + | |
4971 | + return -ENOMEM; | |
4972 | +} | |
4973 | + | |
4974 | + | |
4975 | +static int blktap_ioctl(struct inode *inode, struct file *filp, | |
4976 | + unsigned int cmd, unsigned long arg) | |
4977 | +{ | |
4978 | + tap_blkif_t *info = filp->private_data; | |
4979 | + | |
4980 | + switch(cmd) { | |
4981 | + case BLKTAP_IOCTL_KICK_FE: | |
4982 | + { | |
4983 | + /* There are fe messages to process. */ | |
4984 | + return blktap_read_ufe_ring(info); | |
4985 | + } | |
4986 | + case BLKTAP_IOCTL_SETMODE: | |
4987 | + { | |
4988 | + if (info) { | |
4989 | + if (BLKTAP_MODE_VALID(arg)) { | |
4990 | + info->mode = arg; | |
4991 | + /* XXX: may need to flush rings here. */ | |
4992 | + DPRINTK("blktap: set mode to %lx\n", | |
4993 | + arg); | |
4994 | + return 0; | |
4995 | + } | |
4996 | + } | |
4997 | + return 0; | |
4998 | + } | |
4999 | + case BLKTAP_IOCTL_PRINT_IDXS: | |
5000 | + { | |
5001 | + if (info) { | |
5002 | + printk("User Rings: \n-----------\n"); | |
5003 | + printk("UF: rsp_cons: %2d, req_prod_prv: %2d " | |
5004 | + "| req_prod: %2d, rsp_prod: %2d\n", | |
5005 | + info->ufe_ring.rsp_cons, | |
5006 | + info->ufe_ring.req_prod_pvt, | |
5007 | + info->ufe_ring.sring->req_prod, | |
5008 | + info->ufe_ring.sring->rsp_prod); | |
5009 | + } | |
5010 | + return 0; | |
5011 | + } | |
5012 | + case BLKTAP_IOCTL_SENDPID: | |
5013 | + { | |
5014 | + if (info) { | |
5015 | + info->pid = (pid_t)arg; | |
5016 | + DPRINTK("blktap: pid received %d\n", | |
5017 | + info->pid); | |
5018 | + } | |
5019 | + return 0; | |
5020 | + } | |
5021 | + case BLKTAP_IOCTL_NEWINTF: | |
5022 | + { | |
5023 | + uint64_t val = (uint64_t)arg; | |
5024 | + domid_translate_t *tr = (domid_translate_t *)&val; | |
5025 | + | |
5026 | + DPRINTK("NEWINTF Req for domid %d and bus id %d\n", | |
5027 | + tr->domid, tr->busid); | |
5028 | + info = get_next_free_dev(); | |
5029 | + if (!info) { | |
5030 | + WPRINTK("Error initialising /dev/xen/blktap - " | |
5031 | + "No more devices\n"); | |
5032 | + return -1; | |
5033 | + } | |
5034 | + info->trans.domid = tr->domid; | |
5035 | + info->trans.busid = tr->busid; | |
5036 | + return info->minor; | |
5037 | + } | |
5038 | + case BLKTAP_IOCTL_NEWINTF_EXT: | |
5039 | + { | |
5040 | + void __user *udata = (void __user *) arg; | |
5041 | + domid_translate_ext_t tr; | |
5042 | + | |
5043 | + if (copy_from_user(&tr, udata, sizeof(domid_translate_ext_t))) | |
5044 | + return -EFAULT; | |
5045 | + | |
5046 | + DPRINTK("NEWINTF_EXT Req for domid %d and bus id %d\n", | |
5047 | + tr.domid, tr.busid); | |
5048 | + info = get_next_free_dev(); | |
5049 | + if (!info) { | |
5050 | + WPRINTK("Error initialising /dev/xen/blktap - " | |
5051 | + "No more devices\n"); | |
5052 | + return -1; | |
5053 | + } | |
5054 | + info->trans.domid = tr.domid; | |
5055 | + info->trans.busid = tr.busid; | |
5056 | + return info->minor; | |
5057 | + } | |
5058 | + case BLKTAP_IOCTL_FREEINTF: | |
5059 | + { | |
5060 | + unsigned long dev = arg; | |
5061 | + unsigned long flags; | |
5062 | + | |
5063 | + info = tapfds[dev]; | |
5064 | + | |
5065 | + if ((dev > MAX_TAP_DEV) || !info) | |
5066 | + return 0; /* should this be an error? */ | |
5067 | + | |
5068 | + spin_lock_irqsave(&pending_free_lock, flags); | |
5069 | + if (info->dev_pending) | |
5070 | + info->dev_pending = 0; | |
5071 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
5072 | + | |
5073 | + return 0; | |
5074 | + } | |
5075 | + case BLKTAP_IOCTL_MINOR: | |
5076 | + { | |
5077 | + unsigned long dev = arg; | |
5078 | + | |
5079 | + info = tapfds[dev]; | |
5080 | + | |
5081 | + if ((dev > MAX_TAP_DEV) || !info) | |
5082 | + return -EINVAL; | |
5083 | + | |
5084 | + return info->minor; | |
5085 | + } | |
5086 | + case BLKTAP_IOCTL_MAJOR: | |
5087 | + return blktap_major; | |
5088 | + | |
5089 | + case BLKTAP_QUERY_ALLOC_REQS: | |
5090 | + { | |
5091 | + WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n", | |
5092 | + alloc_pending_reqs, blkif_reqs); | |
5093 | + return (alloc_pending_reqs/blkif_reqs) * 100; | |
5094 | + } | |
5095 | + } | |
5096 | + return -ENOIOCTLCMD; | |
5097 | +} | |
5098 | + | |
5099 | +static unsigned int blktap_poll(struct file *filp, poll_table *wait) | |
5100 | +{ | |
5101 | + tap_blkif_t *info = filp->private_data; | |
5102 | + | |
5103 | + /* do not work on the control device */ | |
5104 | + if (!info) | |
5105 | + return 0; | |
5106 | + | |
5107 | + poll_wait(filp, &info->wait, wait); | |
5108 | + if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) { | |
5109 | + RING_PUSH_REQUESTS(&info->ufe_ring); | |
5110 | + return POLLIN | POLLRDNORM; | |
5111 | + } | |
5112 | + return 0; | |
5113 | +} | |
5114 | + | |
5115 | +void blktap_kick_user(int idx) | |
5116 | +{ | |
5117 | + tap_blkif_t *info; | |
5118 | + | |
5119 | + info = tapfds[idx]; | |
5120 | + | |
5121 | + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) | |
5122 | + return; | |
5123 | + | |
5124 | + wake_up_interruptible(&info->wait); | |
5125 | + | |
5126 | + return; | |
5127 | +} | |
5128 | + | |
5129 | +static int do_block_io_op(blkif_t *blkif); | |
5130 | +static void dispatch_rw_block_io(blkif_t *blkif, | |
5131 | + blkif_request_t *req, | |
5132 | + pending_req_t *pending_req); | |
5133 | +static void make_response(blkif_t *blkif, u64 id, | |
5134 | + unsigned short op, int st); | |
5135 | + | |
5136 | +/****************************************************************** | |
5137 | + * misc small helpers | |
5138 | + */ | |
5139 | +static int req_increase(void) | |
5140 | +{ | |
5141 | + int i, j; | |
5142 | + | |
5143 | + if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) | |
5144 | + return -EINVAL; | |
5145 | + | |
5146 | + pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) | |
5147 | + * blkif_reqs, GFP_KERNEL); | |
5148 | + foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages); | |
5149 | + | |
5150 | + if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc]) | |
5151 | + goto out_of_memory; | |
5152 | + | |
5153 | + DPRINTK("%s: reqs=%d, pages=%d\n", | |
5154 | + __FUNCTION__, blkif_reqs, mmap_pages); | |
5155 | + | |
5156 | + for (i = 0; i < MAX_PENDING_REQS; i++) { | |
5157 | + list_add_tail(&pending_reqs[mmap_alloc][i].free_list, | |
5158 | + &pending_free); | |
5159 | + pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc; | |
5160 | + for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) | |
5161 | + BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, | |
5162 | + i, j)); | |
5163 | + } | |
5164 | + | |
5165 | + mmap_alloc++; | |
5166 | + DPRINTK("# MMAPs increased to %d\n",mmap_alloc); | |
5167 | + return 0; | |
5168 | + | |
5169 | + out_of_memory: | |
5170 | + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages); | |
5171 | + kfree(pending_reqs[mmap_alloc]); | |
5172 | + WPRINTK("%s: out of memory\n", __FUNCTION__); | |
5173 | + return -ENOMEM; | |
5174 | +} | |
5175 | + | |
5176 | +static void mmap_req_del(int mmap) | |
5177 | +{ | |
5178 | + BUG_ON(!spin_is_locked(&pending_free_lock)); | |
5179 | + | |
5180 | + kfree(pending_reqs[mmap]); | |
5181 | + pending_reqs[mmap] = NULL; | |
5182 | + | |
5183 | + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages); | |
5184 | + foreign_pages[mmap] = NULL; | |
5185 | + | |
5186 | + mmap_lock = 0; | |
5187 | + DPRINTK("# MMAPs decreased to %d\n",mmap_alloc); | |
5188 | + mmap_alloc--; | |
5189 | +} | |
5190 | + | |
5191 | +static pending_req_t* alloc_req(void) | |
5192 | +{ | |
5193 | + pending_req_t *req = NULL; | |
5194 | + unsigned long flags; | |
5195 | + | |
5196 | + spin_lock_irqsave(&pending_free_lock, flags); | |
5197 | + | |
5198 | + if (!list_empty(&pending_free)) { | |
5199 | + req = list_entry(pending_free.next, pending_req_t, free_list); | |
5200 | + list_del(&req->free_list); | |
5201 | + } | |
5202 | + | |
5203 | + if (req) { | |
5204 | + req->inuse = 1; | |
5205 | + alloc_pending_reqs++; | |
5206 | + } | |
5207 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
5208 | + | |
5209 | + return req; | |
5210 | +} | |
5211 | + | |
5212 | +static void free_req(pending_req_t *req) | |
5213 | +{ | |
5214 | + unsigned long flags; | |
5215 | + int was_empty; | |
5216 | + | |
5217 | + spin_lock_irqsave(&pending_free_lock, flags); | |
5218 | + | |
5219 | + alloc_pending_reqs--; | |
5220 | + req->inuse = 0; | |
5221 | + if (mmap_lock && (req->mem_idx == mmap_alloc-1)) { | |
5222 | + mmap_inuse--; | |
5223 | + if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1); | |
5224 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
5225 | + return; | |
5226 | + } | |
5227 | + was_empty = list_empty(&pending_free); | |
5228 | + list_add(&req->free_list, &pending_free); | |
5229 | + | |
5230 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
5231 | + | |
5232 | + if (was_empty) | |
5233 | + wake_up(&pending_free_wq); | |
5234 | +} | |
5235 | + | |
5236 | +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, | |
5237 | + int tapidx) | |
5238 | +{ | |
5239 | + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; | |
5240 | + unsigned int i, invcount = 0, locked = 0; | |
5241 | + struct grant_handle_pair *khandle; | |
5242 | + uint64_t ptep; | |
5243 | + int ret, mmap_idx; | |
5244 | + unsigned long kvaddr, uvaddr; | |
5245 | + tap_blkif_t *info; | |
5246 | + struct mm_struct *mm; | |
5247 | + | |
5248 | + | |
5249 | + info = tapfds[tapidx]; | |
5250 | + | |
5251 | + if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) { | |
5252 | + WPRINTK("fast_flush: Couldn't get info!\n"); | |
5253 | + return; | |
5254 | + } | |
5255 | + | |
5256 | + mm = info->vma ? info->vma->vm_mm : NULL; | |
5257 | + | |
5258 | + if (info->vma != NULL && | |
5259 | + xen_feature(XENFEAT_auto_translated_physmap)) { | |
5260 | + down_write(&mm->mmap_sem); | |
5261 | + zap_page_range(info->vma, | |
5262 | + MMAP_VADDR(info->user_vstart, u_idx, 0), | |
5263 | + req->nr_pages << PAGE_SHIFT, NULL); | |
5264 | + up_write(&mm->mmap_sem); | |
5265 | + return; | |
5266 | + } | |
5267 | + | |
5268 | + mmap_idx = req->mem_idx; | |
5269 | + | |
5270 | + for (i = 0; i < req->nr_pages; i++) { | |
5271 | + kvaddr = idx_to_kaddr(mmap_idx, k_idx, i); | |
5272 | + uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); | |
5273 | + | |
5274 | + khandle = &pending_handle(mmap_idx, k_idx, i); | |
5275 | + | |
5276 | + if (khandle->kernel != INVALID_GRANT_HANDLE) { | |
5277 | + gnttab_set_unmap_op(&unmap[invcount], | |
5278 | + idx_to_kaddr(mmap_idx, k_idx, i), | |
5279 | + GNTMAP_host_map, khandle->kernel); | |
5280 | + invcount++; | |
5281 | + | |
5282 | + set_phys_to_machine( | |
5283 | + __pa(idx_to_kaddr(mmap_idx, k_idx, i)) | |
5284 | + >> PAGE_SHIFT, INVALID_P2M_ENTRY); | |
5285 | + } | |
5286 | + | |
5287 | + if (khandle->user != INVALID_GRANT_HANDLE) { | |
5288 | + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); | |
5289 | + if (!locked++) | |
5290 | + down_write(&mm->mmap_sem); | |
5291 | + if (create_lookup_pte_addr( | |
5292 | + mm, | |
5293 | + MMAP_VADDR(info->user_vstart, u_idx, i), | |
5294 | + &ptep) !=0) { | |
5295 | + up_write(&mm->mmap_sem); | |
5296 | + WPRINTK("Couldn't get a pte addr!\n"); | |
5297 | + return; | |
5298 | + } | |
5299 | + | |
5300 | + gnttab_set_unmap_op(&unmap[invcount], ptep, | |
5301 | + GNTMAP_host_map | |
5302 | + | GNTMAP_application_map | |
5303 | + | GNTMAP_contains_pte, | |
5304 | + khandle->user); | |
5305 | + invcount++; | |
5306 | + } | |
5307 | + | |
5308 | + BLKTAP_INVALIDATE_HANDLE(khandle); | |
5309 | + } | |
5310 | + ret = HYPERVISOR_grant_table_op( | |
5311 | + GNTTABOP_unmap_grant_ref, unmap, invcount); | |
5312 | + BUG_ON(ret); | |
5313 | + | |
5314 | + if (info->vma != NULL && | |
5315 | + !xen_feature(XENFEAT_auto_translated_physmap)) { | |
5316 | + if (!locked++) | |
5317 | + down_write(&mm->mmap_sem); | |
5318 | + zap_page_range(info->vma, | |
5319 | + MMAP_VADDR(info->user_vstart, u_idx, 0), | |
5320 | + req->nr_pages << PAGE_SHIFT, NULL); | |
5321 | + } | |
5322 | + | |
5323 | + if (locked) | |
5324 | + up_write(&mm->mmap_sem); | |
5325 | +} | |
5326 | + | |
5327 | +/****************************************************************** | |
5328 | + * SCHEDULER FUNCTIONS | |
5329 | + */ | |
5330 | + | |
5331 | +static void print_stats(blkif_t *blkif) | |
5332 | +{ | |
5333 | + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n", | |
5334 | + current->comm, blkif->st_oo_req, | |
5335 | + blkif->st_rd_req, blkif->st_wr_req); | |
5336 | + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | |
5337 | + blkif->st_rd_req = 0; | |
5338 | + blkif->st_wr_req = 0; | |
5339 | + blkif->st_oo_req = 0; | |
5340 | +} | |
5341 | + | |
5342 | +int tap_blkif_schedule(void *arg) | |
5343 | +{ | |
5344 | + blkif_t *blkif = arg; | |
5345 | + | |
5346 | + blkif_get(blkif); | |
5347 | + | |
5348 | + if (debug_lvl) | |
5349 | + printk(KERN_DEBUG "%s: started\n", current->comm); | |
5350 | + | |
5351 | + while (!kthread_should_stop()) { | |
5352 | + if (try_to_freeze()) | |
5353 | + continue; | |
5354 | + | |
5355 | + wait_event_interruptible( | |
5356 | + blkif->wq, | |
5357 | + blkif->waiting_reqs || kthread_should_stop()); | |
5358 | + wait_event_interruptible( | |
5359 | + pending_free_wq, | |
5360 | + !list_empty(&pending_free) || kthread_should_stop()); | |
5361 | + | |
5362 | + blkif->waiting_reqs = 0; | |
5363 | + smp_mb(); /* clear flag *before* checking for work */ | |
5364 | + | |
5365 | + if (do_block_io_op(blkif)) | |
5366 | + blkif->waiting_reqs = 1; | |
5367 | + | |
5368 | + if (log_stats && time_after(jiffies, blkif->st_print)) | |
5369 | + print_stats(blkif); | |
5370 | + } | |
5371 | + | |
5372 | + if (log_stats) | |
5373 | + print_stats(blkif); | |
5374 | + if (debug_lvl) | |
5375 | + printk(KERN_DEBUG "%s: exiting\n", current->comm); | |
5376 | + | |
5377 | + blkif->xenblkd = NULL; | |
5378 | + blkif_put(blkif); | |
5379 | + | |
5380 | + return 0; | |
5381 | +} | |
5382 | + | |
5383 | +/****************************************************************** | |
5384 | + * COMPLETION CALLBACK -- Called by user level ioctl() | |
5385 | + */ | |
5386 | + | |
5387 | +static int blktap_read_ufe_ring(tap_blkif_t *info) | |
5388 | +{ | |
5389 | + /* This is called to read responses from the UFE ring. */ | |
5390 | + RING_IDX i, j, rp; | |
5391 | + blkif_response_t *resp; | |
5392 | + blkif_t *blkif=NULL; | |
5393 | + int pending_idx, usr_idx, mmap_idx; | |
5394 | + pending_req_t *pending_req; | |
5395 | + | |
5396 | + if (!info) | |
5397 | + return 0; | |
5398 | + | |
5399 | + /* We currently only forward packets in INTERCEPT_FE mode. */ | |
5400 | + if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE)) | |
5401 | + return 0; | |
5402 | + | |
5403 | + /* for each outstanding message on the UFEring */ | |
5404 | + rp = info->ufe_ring.sring->rsp_prod; | |
5405 | + rmb(); | |
5406 | + | |
5407 | + for (i = info->ufe_ring.rsp_cons; i != rp; i++) { | |
5408 | + blkif_response_t res; | |
5409 | + resp = RING_GET_RESPONSE(&info->ufe_ring, i); | |
5410 | + memcpy(&res, resp, sizeof(res)); | |
5411 | + mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */ | |
5412 | + ++info->ufe_ring.rsp_cons; | |
5413 | + | |
5414 | + /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/ | |
5415 | + usr_idx = (int)res.id; | |
5416 | + pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); | |
5417 | + mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); | |
5418 | + | |
5419 | + if ( (mmap_idx >= mmap_alloc) || | |
5420 | + (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) ) | |
5421 | + WPRINTK("Incorrect req map" | |
5422 | + "[%d], internal map [%d,%d (%d)]\n", | |
5423 | + usr_idx, mmap_idx, | |
5424 | + ID_TO_IDX(info->idx_map[usr_idx]), | |
5425 | + MASK_PEND_IDX( | |
5426 | + ID_TO_IDX(info->idx_map[usr_idx]))); | |
5427 | + | |
5428 | + pending_req = &pending_reqs[mmap_idx][pending_idx]; | |
5429 | + blkif = pending_req->blkif; | |
5430 | + | |
5431 | + for (j = 0; j < pending_req->nr_pages; j++) { | |
5432 | + | |
5433 | + unsigned long kvaddr, uvaddr; | |
5434 | + struct page **map = info->vma->vm_private_data; | |
5435 | + struct page *pg; | |
5436 | + int offset; | |
5437 | + | |
5438 | + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j); | |
5439 | + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j); | |
5440 | + | |
5441 | + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); | |
5442 | + ClearPageReserved(pg); | |
5443 | + offset = (uvaddr - info->vma->vm_start) | |
5444 | + >> PAGE_SHIFT; | |
5445 | + map[offset] = NULL; | |
5446 | + } | |
5447 | + fast_flush_area(pending_req, pending_idx, usr_idx, info->minor); | |
5448 | + info->idx_map[usr_idx] = INVALID_REQ; | |
5449 | + make_response(blkif, pending_req->id, res.operation, | |
5450 | + res.status); | |
5451 | + blkif_put(pending_req->blkif); | |
5452 | + free_req(pending_req); | |
5453 | + } | |
5454 | + | |
5455 | + return 0; | |
5456 | +} | |
5457 | + | |
5458 | + | |
5459 | +/****************************************************************************** | |
5460 | + * NOTIFICATION FROM GUEST OS. | |
5461 | + */ | |
5462 | + | |
5463 | +static void blkif_notify_work(blkif_t *blkif) | |
5464 | +{ | |
5465 | + blkif->waiting_reqs = 1; | |
5466 | + wake_up(&blkif->wq); | |
5467 | +} | |
5468 | + | |
5469 | +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) | |
5470 | +{ | |
5471 | + blkif_notify_work(dev_id); | |
5472 | + return IRQ_HANDLED; | |
5473 | +} | |
5474 | + | |
5475 | + | |
5476 | + | |
5477 | +/****************************************************************** | |
5478 | + * DOWNWARD CALLS -- These interface with the block-device layer proper. | |
5479 | + */ | |
5480 | +static int print_dbug = 1; | |
5481 | +static int do_block_io_op(blkif_t *blkif) | |
5482 | +{ | |
5483 | + blkif_back_rings_t *blk_rings = &blkif->blk_rings; | |
5484 | + blkif_request_t req; | |
5485 | + pending_req_t *pending_req; | |
5486 | + RING_IDX rc, rp; | |
5487 | + int more_to_do = 0; | |
5488 | + tap_blkif_t *info; | |
5489 | + | |
5490 | + rc = blk_rings->common.req_cons; | |
5491 | + rp = blk_rings->common.sring->req_prod; | |
5492 | + rmb(); /* Ensure we see queued requests up to 'rp'. */ | |
5493 | + | |
5494 | + /*Check blkif has corresponding UE ring*/ | |
5495 | + if (blkif->dev_num < 0) { | |
5496 | + /*oops*/ | |
5497 | + if (print_dbug) { | |
5498 | + WPRINTK("Corresponding UE " | |
5499 | + "ring does not exist!\n"); | |
5500 | + print_dbug = 0; /*We only print this message once*/ | |
5501 | + } | |
5502 | + return 0; | |
5503 | + } | |
5504 | + | |
5505 | + info = tapfds[blkif->dev_num]; | |
5506 | + | |
5507 | + if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) { | |
5508 | + if (print_dbug) { | |
5509 | + WPRINTK("Can't get UE info!\n"); | |
5510 | + print_dbug = 0; | |
5511 | + } | |
5512 | + return 0; | |
5513 | + } | |
5514 | + | |
5515 | + while (rc != rp) { | |
5516 | + | |
5517 | + if (RING_FULL(&info->ufe_ring)) { | |
5518 | + WPRINTK("RING_FULL! More to do\n"); | |
5519 | + more_to_do = 1; | |
5520 | + break; | |
5521 | + } | |
5522 | + | |
5523 | + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) { | |
5524 | + WPRINTK("RING_REQUEST_CONS_OVERFLOW!" | |
5525 | + " More to do\n"); | |
5526 | + more_to_do = 1; | |
5527 | + break; | |
5528 | + } | |
5529 | + | |
5530 | + pending_req = alloc_req(); | |
5531 | + if (NULL == pending_req) { | |
5532 | + blkif->st_oo_req++; | |
5533 | + more_to_do = 1; | |
5534 | + break; | |
5535 | + } | |
5536 | + | |
5537 | + if (kthread_should_stop()) { | |
5538 | + more_to_do = 1; | |
5539 | + break; | |
5540 | + } | |
5541 | + | |
5542 | + switch (blkif->blk_protocol) { | |
5543 | + case BLKIF_PROTOCOL_NATIVE: | |
5544 | + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), | |
5545 | + sizeof(req)); | |
5546 | + break; | |
5547 | + case BLKIF_PROTOCOL_X86_32: | |
5548 | + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); | |
5549 | + break; | |
5550 | + case BLKIF_PROTOCOL_X86_64: | |
5551 | + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); | |
5552 | + break; | |
5553 | + default: | |
5554 | + BUG(); | |
5555 | + } | |
5556 | + blk_rings->common.req_cons = ++rc; /* before make_response() */ | |
5557 | + | |
5558 | + /* Apply all sanity checks to /private copy/ of request. */ | |
5559 | + barrier(); | |
5560 | + | |
5561 | + switch (req.operation) { | |
5562 | + case BLKIF_OP_READ: | |
5563 | + blkif->st_rd_req++; | |
5564 | + dispatch_rw_block_io(blkif, &req, pending_req); | |
5565 | + break; | |
5566 | + | |
5567 | + case BLKIF_OP_WRITE: | |
5568 | + blkif->st_wr_req++; | |
5569 | + dispatch_rw_block_io(blkif, &req, pending_req); | |
5570 | + break; | |
5571 | + | |
5572 | + default: | |
5573 | + /* A good sign something is wrong: sleep for a while to | |
5574 | + * avoid excessive CPU consumption by a bad guest. */ | |
5575 | + msleep(1); | |
5576 | + WPRINTK("unknown operation [%d]\n", | |
5577 | + req.operation); | |
5578 | + make_response(blkif, req.id, req.operation, | |
5579 | + BLKIF_RSP_ERROR); | |
5580 | + free_req(pending_req); | |
5581 | + break; | |
5582 | + } | |
5583 | + | |
5584 | + /* Yield point for this unbounded loop. */ | |
5585 | + cond_resched(); | |
5586 | + } | |
5587 | + | |
5588 | + blktap_kick_user(blkif->dev_num); | |
5589 | + | |
5590 | + return more_to_do; | |
5591 | +} | |
5592 | + | |
5593 | +static void dispatch_rw_block_io(blkif_t *blkif, | |
5594 | + blkif_request_t *req, | |
5595 | + pending_req_t *pending_req) | |
5596 | +{ | |
5597 | + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); | |
5598 | + int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; | |
5599 | + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; | |
5600 | + unsigned int nseg; | |
5601 | + int ret, i, nr_sects = 0; | |
5602 | + tap_blkif_t *info; | |
5603 | + blkif_request_t *target; | |
5604 | + int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); | |
5605 | + int usr_idx; | |
5606 | + uint16_t mmap_idx = pending_req->mem_idx; | |
5607 | + struct mm_struct *mm; | |
5608 | + | |
5609 | + if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV) | |
5610 | + goto fail_response; | |
5611 | + | |
5612 | + info = tapfds[blkif->dev_num]; | |
5613 | + if (info == NULL) | |
5614 | + goto fail_response; | |
5615 | + | |
5616 | + /* Check we have space on user ring - should never fail. */ | |
5617 | + usr_idx = GET_NEXT_REQ(info->idx_map); | |
5618 | + if (usr_idx == INVALID_REQ) { | |
5619 | + BUG(); | |
5620 | + goto fail_response; | |
5621 | + } | |
5622 | + | |
5623 | + /* Check that number of segments is sane. */ | |
5624 | + nseg = req->nr_segments; | |
5625 | + if ( unlikely(nseg == 0) || | |
5626 | + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) { | |
5627 | + WPRINTK("Bad number of segments in request (%d)\n", nseg); | |
5628 | + goto fail_response; | |
5629 | + } | |
5630 | + | |
5631 | + /* Make sure userspace is ready. */ | |
5632 | + if (!info->ring_ok) { | |
5633 | + WPRINTK("blktap: ring not ready for requests!\n"); | |
5634 | + goto fail_response; | |
5635 | + } | |
5636 | + | |
5637 | + if (RING_FULL(&info->ufe_ring)) { | |
5638 | + WPRINTK("blktap: fe_ring is full, can't add " | |
5639 | + "IO Request will be dropped. %d %d\n", | |
5640 | + RING_SIZE(&info->ufe_ring), | |
5641 | + RING_SIZE(&blkif->blk_rings.common)); | |
5642 | + goto fail_response; | |
5643 | + } | |
5644 | + | |
5645 | + pending_req->blkif = blkif; | |
5646 | + pending_req->id = req->id; | |
5647 | + pending_req->operation = operation; | |
5648 | + pending_req->status = BLKIF_RSP_OKAY; | |
5649 | + pending_req->nr_pages = nseg; | |
5650 | + op = 0; | |
5651 | + mm = info->vma->vm_mm; | |
5652 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) | |
5653 | + down_write(&mm->mmap_sem); | |
5654 | + for (i = 0; i < nseg; i++) { | |
5655 | + unsigned long uvaddr; | |
5656 | + unsigned long kvaddr; | |
5657 | + uint64_t ptep; | |
5658 | + uint32_t flags; | |
5659 | + | |
5660 | + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); | |
5661 | + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); | |
5662 | + | |
5663 | + flags = GNTMAP_host_map; | |
5664 | + if (operation == WRITE) | |
5665 | + flags |= GNTMAP_readonly; | |
5666 | + gnttab_set_map_op(&map[op], kvaddr, flags, | |
5667 | + req->seg[i].gref, blkif->domid); | |
5668 | + op++; | |
5669 | + | |
5670 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
5671 | + /* Now map it to user. */ | |
5672 | + ret = create_lookup_pte_addr(mm, uvaddr, &ptep); | |
5673 | + if (ret) { | |
5674 | + up_write(&mm->mmap_sem); | |
5675 | + WPRINTK("Couldn't get a pte addr!\n"); | |
5676 | + goto fail_flush; | |
5677 | + } | |
5678 | + | |
5679 | + flags = GNTMAP_host_map | GNTMAP_application_map | |
5680 | + | GNTMAP_contains_pte; | |
5681 | + if (operation == WRITE) | |
5682 | + flags |= GNTMAP_readonly; | |
5683 | + gnttab_set_map_op(&map[op], ptep, flags, | |
5684 | + req->seg[i].gref, blkif->domid); | |
5685 | + op++; | |
5686 | + } | |
5687 | + | |
5688 | + nr_sects += (req->seg[i].last_sect - | |
5689 | + req->seg[i].first_sect + 1); | |
5690 | + } | |
5691 | + | |
5692 | + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); | |
5693 | + BUG_ON(ret); | |
5694 | + | |
5695 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
5696 | + up_write(&mm->mmap_sem); | |
5697 | + | |
5698 | + for (i = 0; i < (nseg*2); i+=2) { | |
5699 | + unsigned long uvaddr; | |
5700 | + unsigned long kvaddr; | |
5701 | + unsigned long offset; | |
5702 | + struct page *pg; | |
5703 | + | |
5704 | + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2); | |
5705 | + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2); | |
5706 | + | |
5707 | + if (unlikely(map[i].status != 0)) { | |
5708 | + WPRINTK("invalid kernel buffer -- " | |
5709 | + "could not remap it\n"); | |
5710 | + ret |= 1; | |
5711 | + map[i].handle = INVALID_GRANT_HANDLE; | |
5712 | + } | |
5713 | + | |
5714 | + if (unlikely(map[i+1].status != 0)) { | |
5715 | + WPRINTK("invalid user buffer -- " | |
5716 | + "could not remap it\n"); | |
5717 | + ret |= 1; | |
5718 | + map[i+1].handle = INVALID_GRANT_HANDLE; | |
5719 | + } | |
5720 | + | |
5721 | + pending_handle(mmap_idx, pending_idx, i/2).kernel | |
5722 | + = map[i].handle; | |
5723 | + pending_handle(mmap_idx, pending_idx, i/2).user | |
5724 | + = map[i+1].handle; | |
5725 | + | |
5726 | + if (ret) | |
5727 | + continue; | |
5728 | + | |
5729 | + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, | |
5730 | + FOREIGN_FRAME(map[i].dev_bus_addr | |
5731 | + >> PAGE_SHIFT)); | |
5732 | + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; | |
5733 | + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); | |
5734 | + ((struct page **)info->vma->vm_private_data)[offset] = | |
5735 | + pg; | |
5736 | + } | |
5737 | + } else { | |
5738 | + for (i = 0; i < nseg; i++) { | |
5739 | + unsigned long uvaddr; | |
5740 | + unsigned long kvaddr; | |
5741 | + unsigned long offset; | |
5742 | + struct page *pg; | |
5743 | + | |
5744 | + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); | |
5745 | + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); | |
5746 | + | |
5747 | + if (unlikely(map[i].status != 0)) { | |
5748 | + WPRINTK("invalid kernel buffer -- " | |
5749 | + "could not remap it\n"); | |
5750 | + ret |= 1; | |
5751 | + map[i].handle = INVALID_GRANT_HANDLE; | |
5752 | + } | |
5753 | + | |
5754 | + pending_handle(mmap_idx, pending_idx, i).kernel | |
5755 | + = map[i].handle; | |
5756 | + | |
5757 | + if (ret) | |
5758 | + continue; | |
5759 | + | |
5760 | + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; | |
5761 | + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); | |
5762 | + ((struct page **)info->vma->vm_private_data)[offset] = | |
5763 | + pg; | |
5764 | + } | |
5765 | + } | |
5766 | + | |
5767 | + if (ret) | |
5768 | + goto fail_flush; | |
5769 | + | |
5770 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
5771 | + down_write(&mm->mmap_sem); | |
5772 | + /* Mark mapped pages as reserved: */ | |
5773 | + for (i = 0; i < req->nr_segments; i++) { | |
5774 | + unsigned long kvaddr; | |
5775 | + struct page *pg; | |
5776 | + | |
5777 | + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); | |
5778 | + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); | |
5779 | + SetPageReserved(pg); | |
5780 | + if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
5781 | + ret = vm_insert_page(info->vma, | |
5782 | + MMAP_VADDR(info->user_vstart, | |
5783 | + usr_idx, i), pg); | |
5784 | + if (ret) { | |
5785 | + up_write(&mm->mmap_sem); | |
5786 | + goto fail_flush; | |
5787 | + } | |
5788 | + } | |
5789 | + } | |
5790 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
5791 | + up_write(&mm->mmap_sem); | |
5792 | + | |
5793 | + /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ | |
5794 | + info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx); | |
5795 | + | |
5796 | + blkif_get(blkif); | |
5797 | + /* Finally, write the request message to the user ring. */ | |
5798 | + target = RING_GET_REQUEST(&info->ufe_ring, | |
5799 | + info->ufe_ring.req_prod_pvt); | |
5800 | + memcpy(target, req, sizeof(*req)); | |
5801 | + target->id = usr_idx; | |
5802 | + wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */ | |
5803 | + info->ufe_ring.req_prod_pvt++; | |
5804 | + | |
5805 | + if (operation == READ) | |
5806 | + blkif->st_rd_sect += nr_sects; | |
5807 | + else if (operation == WRITE) | |
5808 | + blkif->st_wr_sect += nr_sects; | |
5809 | + | |
5810 | + return; | |
5811 | + | |
5812 | + fail_flush: | |
5813 | + WPRINTK("Reached Fail_flush\n"); | |
5814 | + fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num); | |
5815 | + fail_response: | |
5816 | + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | |
5817 | + free_req(pending_req); | |
5818 | + msleep(1); /* back off a bit */ | |
5819 | +} | |
5820 | + | |
5821 | + | |
5822 | + | |
5823 | +/****************************************************************** | |
5824 | + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING | |
5825 | + */ | |
5826 | + | |
5827 | + | |
5828 | +static void make_response(blkif_t *blkif, u64 id, | |
5829 | + unsigned short op, int st) | |
5830 | +{ | |
5831 | + blkif_response_t resp; | |
5832 | + unsigned long flags; | |
5833 | + blkif_back_rings_t *blk_rings = &blkif->blk_rings; | |
5834 | + int more_to_do = 0; | |
5835 | + int notify; | |
5836 | + | |
5837 | + resp.id = id; | |
5838 | + resp.operation = op; | |
5839 | + resp.status = st; | |
5840 | + | |
5841 | + spin_lock_irqsave(&blkif->blk_ring_lock, flags); | |
5842 | + /* Place on the response ring for the relevant domain. */ | |
5843 | + switch (blkif->blk_protocol) { | |
5844 | + case BLKIF_PROTOCOL_NATIVE: | |
5845 | + memcpy(RING_GET_RESPONSE(&blk_rings->native, | |
5846 | + blk_rings->native.rsp_prod_pvt), | |
5847 | + &resp, sizeof(resp)); | |
5848 | + break; | |
5849 | + case BLKIF_PROTOCOL_X86_32: | |
5850 | + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, | |
5851 | + blk_rings->x86_32.rsp_prod_pvt), | |
5852 | + &resp, sizeof(resp)); | |
5853 | + break; | |
5854 | + case BLKIF_PROTOCOL_X86_64: | |
5855 | + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, | |
5856 | + blk_rings->x86_64.rsp_prod_pvt), | |
5857 | + &resp, sizeof(resp)); | |
5858 | + break; | |
5859 | + default: | |
5860 | + BUG(); | |
5861 | + } | |
5862 | + blk_rings->common.rsp_prod_pvt++; | |
5863 | + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | |
5864 | + | |
5865 | + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { | |
5866 | + /* | |
5867 | + * Tail check for pending requests. Allows frontend to avoid | |
5868 | + * notifications if requests are already in flight (lower | |
5869 | + * overheads and promotes batching). | |
5870 | + */ | |
5871 | + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); | |
5872 | + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { | |
5873 | + more_to_do = 1; | |
5874 | + } | |
5875 | + | |
5876 | + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | |
5877 | + if (more_to_do) | |
5878 | + blkif_notify_work(blkif); | |
5879 | + if (notify) | |
5880 | + notify_remote_via_irq(blkif->irq); | |
5881 | +} | |
5882 | + | |
5883 | +static int __init blkif_init(void) | |
5884 | +{ | |
5885 | + int i, ret; | |
5886 | + struct class *class; | |
5887 | + | |
5888 | + if (!is_running_on_xen()) | |
5889 | + return -ENODEV; | |
5890 | + | |
5891 | + INIT_LIST_HEAD(&pending_free); | |
5892 | + for(i = 0; i < 2; i++) { | |
5893 | + ret = req_increase(); | |
5894 | + if (ret) | |
5895 | + break; | |
5896 | + } | |
5897 | + if (i == 0) | |
5898 | + return ret; | |
5899 | + | |
5900 | + tap_blkif_interface_init(); | |
5901 | + | |
5902 | + alloc_pending_reqs = 0; | |
5903 | + | |
5904 | + tap_blkif_xenbus_init(); | |
5905 | + | |
5906 | + /* Dynamically allocate a major for this device */ | |
5907 | + ret = register_chrdev(0, "blktap", &blktap_fops); | |
5908 | + | |
5909 | + if (ret < 0) { | |
5910 | + WPRINTK("Couldn't register /dev/xen/blktap\n"); | |
5911 | + return -ENOMEM; | |
5912 | + } | |
5913 | + | |
5914 | + blktap_major = ret; | |
5915 | + | |
5916 | + /* tapfds[0] is always NULL */ | |
5917 | + blktap_next_minor++; | |
5918 | + | |
5919 | + DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); | |
5920 | + | |
5921 | + /* Make sure the xen class exists */ | |
5922 | + if ((class = get_xen_class()) != NULL) { | |
5923 | + /* | |
5924 | + * This will allow udev to create the blktap ctrl device. | |
5925 | + * We only want to create blktap0 first. We don't want | |
5926 | + * to flood the sysfs system with needless blktap devices. | |
5927 | + * We only create the device when a request of a new device is | |
5928 | + * made. | |
5929 | + */ | |
5930 | + class_device_create(class, NULL, | |
5931 | + MKDEV(blktap_major, 0), NULL, | |
5932 | + "blktap0"); | |
5933 | + } else { | |
5934 | + /* this is bad, but not fatal */ | |
5935 | + WPRINTK("blktap: sysfs xen_class not created\n"); | |
5936 | + } | |
5937 | + | |
5938 | + DPRINTK("Blktap device successfully created\n"); | |
5939 | + | |
5940 | + return 0; | |
5941 | +} | |
5942 | + | |
5943 | +module_init(blkif_init); | |
5944 | + | |
5945 | +MODULE_LICENSE("Dual BSD/GPL"); | |
5946 | Index: head-2008-11-25/drivers/xen/blktap/common.h | |
5947 | =================================================================== | |
5948 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
5949 | +++ head-2008-11-25/drivers/xen/blktap/common.h 2008-09-15 13:40:15.000000000 +0200 | |
5950 | @@ -0,0 +1,122 @@ | |
5951 | +/* | |
5952 | + * This program is free software; you can redistribute it and/or | |
5953 | + * modify it under the terms of the GNU General Public License version 2 | |
5954 | + * as published by the Free Software Foundation; or, when distributed | |
5955 | + * separately from the Linux kernel or incorporated into other | |
5956 | + * software packages, subject to the following license: | |
5957 | + * | |
5958 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
5959 | + * of this source file (the "Software"), to deal in the Software without | |
5960 | + * restriction, including without limitation the rights to use, copy, modify, | |
5961 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
5962 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
5963 | + * the following conditions: | |
5964 | + * | |
5965 | + * The above copyright notice and this permission notice shall be included in | |
5966 | + * all copies or substantial portions of the Software. | |
5967 | + * | |
5968 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
5969 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
5970 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
5971 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
5972 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
5973 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
5974 | + * IN THE SOFTWARE. | |
5975 | + */ | |
5976 | + | |
5977 | +#ifndef __BLKIF__BACKEND__COMMON_H__ | |
5978 | +#define __BLKIF__BACKEND__COMMON_H__ | |
5979 | + | |
5980 | +#include <linux/version.h> | |
5981 | +#include <linux/module.h> | |
5982 | +#include <linux/interrupt.h> | |
5983 | +#include <linux/slab.h> | |
5984 | +#include <linux/blkdev.h> | |
5985 | +#include <linux/vmalloc.h> | |
5986 | +#include <asm/io.h> | |
5987 | +#include <asm/setup.h> | |
5988 | +#include <asm/pgalloc.h> | |
5989 | +#include <xen/evtchn.h> | |
5990 | +#include <asm/hypervisor.h> | |
5991 | +#include <xen/blkif.h> | |
5992 | +#include <xen/gnttab.h> | |
5993 | +#include <xen/driver_util.h> | |
5994 | + | |
5995 | +#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \ | |
5996 | + __FILE__ , __LINE__ , ## _a ) | |
5997 | + | |
5998 | +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) | |
5999 | + | |
6000 | +struct backend_info; | |
6001 | + | |
6002 | +typedef struct blkif_st { | |
6003 | + /* Unique identifier for this interface. */ | |
6004 | + domid_t domid; | |
6005 | + unsigned int handle; | |
6006 | + /* Physical parameters of the comms window. */ | |
6007 | + unsigned int irq; | |
6008 | + /* Comms information. */ | |
6009 | + enum blkif_protocol blk_protocol; | |
6010 | + blkif_back_rings_t blk_rings; | |
6011 | + struct vm_struct *blk_ring_area; | |
6012 | + /* Back pointer to the backend_info. */ | |
6013 | + struct backend_info *be; | |
6014 | + /* Private fields. */ | |
6015 | + spinlock_t blk_ring_lock; | |
6016 | + atomic_t refcnt; | |
6017 | + | |
6018 | + wait_queue_head_t wq; | |
6019 | + struct task_struct *xenblkd; | |
6020 | + unsigned int waiting_reqs; | |
6021 | + request_queue_t *plug; | |
6022 | + | |
6023 | + /* statistics */ | |
6024 | + unsigned long st_print; | |
6025 | + int st_rd_req; | |
6026 | + int st_wr_req; | |
6027 | + int st_oo_req; | |
6028 | + int st_rd_sect; | |
6029 | + int st_wr_sect; | |
6030 | + | |
6031 | + wait_queue_head_t waiting_to_free; | |
6032 | + | |
6033 | + grant_handle_t shmem_handle; | |
6034 | + grant_ref_t shmem_ref; | |
6035 | + | |
6036 | + int dev_num; | |
6037 | + uint64_t sectors; | |
6038 | +} blkif_t; | |
6039 | + | |
6040 | +blkif_t *tap_alloc_blkif(domid_t domid); | |
6041 | +void tap_blkif_free(blkif_t *blkif); | |
6042 | +void tap_blkif_kmem_cache_free(blkif_t *blkif); | |
6043 | +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, | |
6044 | + unsigned int evtchn); | |
6045 | +void tap_blkif_unmap(blkif_t *blkif); | |
6046 | + | |
6047 | +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) | |
6048 | +#define blkif_put(_b) \ | |
6049 | + do { \ | |
6050 | + if (atomic_dec_and_test(&(_b)->refcnt)) \ | |
6051 | + wake_up(&(_b)->waiting_to_free);\ | |
6052 | + } while (0) | |
6053 | + | |
6054 | + | |
6055 | +struct phys_req { | |
6056 | + unsigned short dev; | |
6057 | + unsigned short nr_sects; | |
6058 | + struct block_device *bdev; | |
6059 | + blkif_sector_t sector_number; | |
6060 | +}; | |
6061 | + | |
6062 | +void tap_blkif_interface_init(void); | |
6063 | + | |
6064 | +void tap_blkif_xenbus_init(void); | |
6065 | + | |
6066 | +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); | |
6067 | +int tap_blkif_schedule(void *arg); | |
6068 | + | |
6069 | +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif); | |
6070 | +void signal_tapdisk(int idx); | |
6071 | + | |
6072 | +#endif /* __BLKIF__BACKEND__COMMON_H__ */ | |
6073 | Index: head-2008-11-25/drivers/xen/blktap/interface.c | |
6074 | =================================================================== | |
6075 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
6076 | +++ head-2008-11-25/drivers/xen/blktap/interface.c 2008-09-15 13:40:15.000000000 +0200 | |
6077 | @@ -0,0 +1,181 @@ | |
6078 | +/****************************************************************************** | |
6079 | + * drivers/xen/blktap/interface.c | |
6080 | + * | |
6081 | + * Block-device interface management. | |
6082 | + * | |
6083 | + * Copyright (c) 2004, Keir Fraser | |
6084 | + * | |
6085 | + * This program is free software; you can redistribute it and/or | |
6086 | + * modify it under the terms of the GNU General Public License version 2 | |
6087 | + * as published by the Free Software Foundation; or, when distributed | |
6088 | + * separately from the Linux kernel or incorporated into other | |
6089 | + * software packages, subject to the following license: | |
6090 | + * | |
6091 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
6092 | + * of this source file (the "Software"), to deal in the Software without | |
6093 | + * restriction, including without limitation the rights to use, copy, modify, | |
6094 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
6095 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
6096 | + * the following conditions: | |
6097 | + * | |
6098 | + * The above copyright notice and this permission notice shall be included in | |
6099 | + * all copies or substantial portions of the Software. | |
6100 | + * | |
6101 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
6102 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
6103 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
6104 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
6105 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
6106 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
6107 | + * IN THE SOFTWARE. | |
6108 | + | |
6109 | + */ | |
6110 | + | |
6111 | +#include "common.h" | |
6112 | +#include <xen/evtchn.h> | |
6113 | + | |
6114 | +static kmem_cache_t *blkif_cachep; | |
6115 | + | |
6116 | +blkif_t *tap_alloc_blkif(domid_t domid) | |
6117 | +{ | |
6118 | + blkif_t *blkif; | |
6119 | + | |
6120 | + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); | |
6121 | + if (!blkif) | |
6122 | + return ERR_PTR(-ENOMEM); | |
6123 | + | |
6124 | + memset(blkif, 0, sizeof(*blkif)); | |
6125 | + blkif->domid = domid; | |
6126 | + spin_lock_init(&blkif->blk_ring_lock); | |
6127 | + atomic_set(&blkif->refcnt, 1); | |
6128 | + init_waitqueue_head(&blkif->wq); | |
6129 | + blkif->st_print = jiffies; | |
6130 | + init_waitqueue_head(&blkif->waiting_to_free); | |
6131 | + | |
6132 | + return blkif; | |
6133 | +} | |
6134 | + | |
6135 | +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) | |
6136 | +{ | |
6137 | + struct gnttab_map_grant_ref op; | |
6138 | + | |
6139 | + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, | |
6140 | + GNTMAP_host_map, shared_page, blkif->domid); | |
6141 | + | |
6142 | + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | |
6143 | + BUG(); | |
6144 | + | |
6145 | + if (op.status) { | |
6146 | + DPRINTK(" Grant table operation failure !\n"); | |
6147 | + return op.status; | |
6148 | + } | |
6149 | + | |
6150 | + blkif->shmem_ref = shared_page; | |
6151 | + blkif->shmem_handle = op.handle; | |
6152 | + | |
6153 | + return 0; | |
6154 | +} | |
6155 | + | |
6156 | +static void unmap_frontend_page(blkif_t *blkif) | |
6157 | +{ | |
6158 | + struct gnttab_unmap_grant_ref op; | |
6159 | + | |
6160 | + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, | |
6161 | + GNTMAP_host_map, blkif->shmem_handle); | |
6162 | + | |
6163 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | |
6164 | + BUG(); | |
6165 | +} | |
6166 | + | |
6167 | +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, | |
6168 | + unsigned int evtchn) | |
6169 | +{ | |
6170 | + int err; | |
6171 | + | |
6172 | + /* Already connected through? */ | |
6173 | + if (blkif->irq) | |
6174 | + return 0; | |
6175 | + | |
6176 | + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) | |
6177 | + return -ENOMEM; | |
6178 | + | |
6179 | + err = map_frontend_page(blkif, shared_page); | |
6180 | + if (err) { | |
6181 | + free_vm_area(blkif->blk_ring_area); | |
6182 | + return err; | |
6183 | + } | |
6184 | + | |
6185 | + switch (blkif->blk_protocol) { | |
6186 | + case BLKIF_PROTOCOL_NATIVE: | |
6187 | + { | |
6188 | + blkif_sring_t *sring; | |
6189 | + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; | |
6190 | + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); | |
6191 | + break; | |
6192 | + } | |
6193 | + case BLKIF_PROTOCOL_X86_32: | |
6194 | + { | |
6195 | + blkif_x86_32_sring_t *sring_x86_32; | |
6196 | + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; | |
6197 | + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); | |
6198 | + break; | |
6199 | + } | |
6200 | + case BLKIF_PROTOCOL_X86_64: | |
6201 | + { | |
6202 | + blkif_x86_64_sring_t *sring_x86_64; | |
6203 | + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; | |
6204 | + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); | |
6205 | + break; | |
6206 | + } | |
6207 | + default: | |
6208 | + BUG(); | |
6209 | + } | |
6210 | + | |
6211 | + err = bind_interdomain_evtchn_to_irqhandler( | |
6212 | + blkif->domid, evtchn, tap_blkif_be_int, | |
6213 | + 0, "blkif-backend", blkif); | |
6214 | + if (err < 0) { | |
6215 | + unmap_frontend_page(blkif); | |
6216 | + free_vm_area(blkif->blk_ring_area); | |
6217 | + blkif->blk_rings.common.sring = NULL; | |
6218 | + return err; | |
6219 | + } | |
6220 | + blkif->irq = err; | |
6221 | + | |
6222 | + return 0; | |
6223 | +} | |
6224 | + | |
6225 | +void tap_blkif_unmap(blkif_t *blkif) | |
6226 | +{ | |
6227 | + if (blkif->irq) { | |
6228 | + unbind_from_irqhandler(blkif->irq, blkif); | |
6229 | + blkif->irq = 0; | |
6230 | + } | |
6231 | + if (blkif->blk_rings.common.sring) { | |
6232 | + unmap_frontend_page(blkif); | |
6233 | + free_vm_area(blkif->blk_ring_area); | |
6234 | + blkif->blk_rings.common.sring = NULL; | |
6235 | + } | |
6236 | +} | |
6237 | + | |
6238 | +void tap_blkif_free(blkif_t *blkif) | |
6239 | +{ | |
6240 | + atomic_dec(&blkif->refcnt); | |
6241 | + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); | |
6242 | + atomic_inc(&blkif->refcnt); | |
6243 | + | |
6244 | + tap_blkif_unmap(blkif); | |
6245 | +} | |
6246 | + | |
6247 | +void tap_blkif_kmem_cache_free(blkif_t *blkif) | |
6248 | +{ | |
6249 | + if (!atomic_dec_and_test(&blkif->refcnt)) | |
6250 | + BUG(); | |
6251 | + kmem_cache_free(blkif_cachep, blkif); | |
6252 | +} | |
6253 | + | |
6254 | +void __init tap_blkif_interface_init(void) | |
6255 | +{ | |
6256 | + blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t), | |
6257 | + 0, 0, NULL, NULL); | |
6258 | +} | |
6259 | Index: head-2008-11-25/drivers/xen/blktap/xenbus.c | |
6260 | =================================================================== | |
6261 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
6262 | +++ head-2008-11-25/drivers/xen/blktap/xenbus.c 2008-09-15 13:40:15.000000000 +0200 | |
6263 | @@ -0,0 +1,479 @@ | |
6264 | +/* drivers/xen/blktap/xenbus.c | |
6265 | + * | |
6266 | + * Xenbus code for blktap | |
6267 | + * | |
6268 | + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield | |
6269 | + * | |
6270 | + * Based on the blkback xenbus code: | |
6271 | + * | |
6272 | + * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | |
6273 | + * Copyright (C) 2005 XenSource Ltd | |
6274 | + * | |
6275 | + * This program is free software; you can redistribute it and/or | |
6276 | + * modify it under the terms of the GNU General Public License version 2 | |
6277 | + * as published by the Free Software Foundation; or, when distributed | |
6278 | + * separately from the Linux kernel or incorporated into other | |
6279 | + * software packages, subject to the following license: | |
6280 | + * | |
6281 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
6282 | + * of this source file (the "Software"), to deal in the Software without | |
6283 | + * restriction, including without limitation the rights to use, copy, modify, | |
6284 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
6285 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
6286 | + * the following conditions: | |
6287 | + * | |
6288 | + * The above copyright notice and this permission notice shall be included in | |
6289 | + * all copies or substantial portions of the Software. | |
6290 | + * | |
6291 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
6292 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
6293 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
6294 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
6295 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
6296 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
6297 | + * IN THE SOFTWARE. | |
6298 | + */ | |
6299 | + | |
6300 | +#include <stdarg.h> | |
6301 | +#include <linux/module.h> | |
6302 | +#include <linux/kthread.h> | |
6303 | +#include <xen/xenbus.h> | |
6304 | +#include "common.h" | |
6305 | + | |
6306 | + | |
6307 | +struct backend_info | |
6308 | +{ | |
6309 | + struct xenbus_device *dev; | |
6310 | + blkif_t *blkif; | |
6311 | + struct xenbus_watch backend_watch; | |
6312 | + int xenbus_id; | |
6313 | + int group_added; | |
6314 | +}; | |
6315 | + | |
6316 | + | |
6317 | +static void connect(struct backend_info *); | |
6318 | +static int connect_ring(struct backend_info *); | |
6319 | +static int blktap_remove(struct xenbus_device *dev); | |
6320 | +static int blktap_probe(struct xenbus_device *dev, | |
6321 | + const struct xenbus_device_id *id); | |
6322 | +static void tap_backend_changed(struct xenbus_watch *, const char **, | |
6323 | + unsigned int); | |
6324 | +static void tap_frontend_changed(struct xenbus_device *dev, | |
6325 | + enum xenbus_state frontend_state); | |
6326 | + | |
6327 | +static int strsep_len(const char *str, char c, unsigned int len) | |
6328 | +{ | |
6329 | + unsigned int i; | |
6330 | + | |
6331 | + for (i = 0; str[i]; i++) | |
6332 | + if (str[i] == c) { | |
6333 | + if (len == 0) | |
6334 | + return i; | |
6335 | + len--; | |
6336 | + } | |
6337 | + return (len == 0) ? i : -ERANGE; | |
6338 | +} | |
6339 | + | |
6340 | +static long get_id(const char *str) | |
6341 | +{ | |
6342 | + int len,end; | |
6343 | + const char *ptr; | |
6344 | + char *tptr, num[10]; | |
6345 | + | |
6346 | + len = strsep_len(str, '/', 2); | |
6347 | + end = strlen(str); | |
6348 | + if ( (len < 0) || (end < 0) ) return -1; | |
6349 | + | |
6350 | + ptr = str + len + 1; | |
6351 | + strncpy(num,ptr,end - len); | |
6352 | + tptr = num + (end - (len + 1)); | |
6353 | + *tptr = '\0'; | |
6354 | + DPRINTK("Get_id called for %s (%s)\n",str,num); | |
6355 | + | |
6356 | + return simple_strtol(num, NULL, 10); | |
6357 | +} | |
6358 | + | |
6359 | +static int blktap_name(blkif_t *blkif, char *buf) | |
6360 | +{ | |
6361 | + char *devpath, *devname; | |
6362 | + struct xenbus_device *dev = blkif->be->dev; | |
6363 | + | |
6364 | + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); | |
6365 | + if (IS_ERR(devpath)) | |
6366 | + return PTR_ERR(devpath); | |
6367 | + | |
6368 | + if ((devname = strstr(devpath, "/dev/")) != NULL) | |
6369 | + devname += strlen("/dev/"); | |
6370 | + else | |
6371 | + devname = devpath; | |
6372 | + | |
6373 | + snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname); | |
6374 | + kfree(devpath); | |
6375 | + | |
6376 | + return 0; | |
6377 | +} | |
6378 | + | |
6379 | +/**************************************************************** | |
6380 | + * sysfs interface for I/O requests of blktap device | |
6381 | + */ | |
6382 | + | |
6383 | +#define VBD_SHOW(name, format, args...) \ | |
6384 | + static ssize_t show_##name(struct device *_dev, \ | |
6385 | + struct device_attribute *attr, \ | |
6386 | + char *buf) \ | |
6387 | + { \ | |
6388 | + struct xenbus_device *dev = to_xenbus_device(_dev); \ | |
6389 | + struct backend_info *be = dev->dev.driver_data; \ | |
6390 | + \ | |
6391 | + return sprintf(buf, format, ##args); \ | |
6392 | + } \ | |
6393 | + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | |
6394 | + | |
6395 | +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); | |
6396 | +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); | |
6397 | +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); | |
6398 | +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); | |
6399 | +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); | |
6400 | + | |
6401 | +static struct attribute *tapstat_attrs[] = { | |
6402 | + &dev_attr_oo_req.attr, | |
6403 | + &dev_attr_rd_req.attr, | |
6404 | + &dev_attr_wr_req.attr, | |
6405 | + &dev_attr_rd_sect.attr, | |
6406 | + &dev_attr_wr_sect.attr, | |
6407 | + NULL | |
6408 | +}; | |
6409 | + | |
6410 | +static struct attribute_group tapstat_group = { | |
6411 | + .name = "statistics", | |
6412 | + .attrs = tapstat_attrs, | |
6413 | +}; | |
6414 | + | |
6415 | +int xentap_sysfs_addif(struct xenbus_device *dev) | |
6416 | +{ | |
6417 | + int err; | |
6418 | + struct backend_info *be = dev->dev.driver_data; | |
6419 | + err = sysfs_create_group(&dev->dev.kobj, &tapstat_group); | |
6420 | + if (!err) | |
6421 | + be->group_added = 1; | |
6422 | + return err; | |
6423 | +} | |
6424 | + | |
6425 | +void xentap_sysfs_delif(struct xenbus_device *dev) | |
6426 | +{ | |
6427 | + struct backend_info *be = dev->dev.driver_data; | |
6428 | + sysfs_remove_group(&dev->dev.kobj, &tapstat_group); | |
6429 | + be->group_added = 0; | |
6430 | +} | |
6431 | + | |
6432 | +static int blktap_remove(struct xenbus_device *dev) | |
6433 | +{ | |
6434 | + struct backend_info *be = dev->dev.driver_data; | |
6435 | + | |
6436 | + if (be->group_added) | |
6437 | + xentap_sysfs_delif(be->dev); | |
6438 | + if (be->backend_watch.node) { | |
6439 | + unregister_xenbus_watch(&be->backend_watch); | |
6440 | + kfree(be->backend_watch.node); | |
6441 | + be->backend_watch.node = NULL; | |
6442 | + } | |
6443 | + if (be->blkif) { | |
6444 | + if (be->blkif->xenblkd) | |
6445 | + kthread_stop(be->blkif->xenblkd); | |
6446 | + signal_tapdisk(be->blkif->dev_num); | |
6447 | + tap_blkif_free(be->blkif); | |
6448 | + tap_blkif_kmem_cache_free(be->blkif); | |
6449 | + be->blkif = NULL; | |
6450 | + } | |
6451 | + kfree(be); | |
6452 | + dev->dev.driver_data = NULL; | |
6453 | + return 0; | |
6454 | +} | |
6455 | + | |
6456 | +static void tap_update_blkif_status(blkif_t *blkif) | |
6457 | +{ | |
6458 | + int err; | |
6459 | + char name[TASK_COMM_LEN]; | |
6460 | + | |
6461 | + /* Not ready to connect? */ | |
6462 | + if(!blkif->irq || !blkif->sectors) { | |
6463 | + return; | |
6464 | + } | |
6465 | + | |
6466 | + /* Already connected? */ | |
6467 | + if (blkif->be->dev->state == XenbusStateConnected) | |
6468 | + return; | |
6469 | + | |
6470 | + /* Attempt to connect: exit if we fail to. */ | |
6471 | + connect(blkif->be); | |
6472 | + if (blkif->be->dev->state != XenbusStateConnected) | |
6473 | + return; | |
6474 | + | |
6475 | + err = blktap_name(blkif, name); | |
6476 | + if (err) { | |
6477 | + xenbus_dev_error(blkif->be->dev, err, "get blktap dev name"); | |
6478 | + return; | |
6479 | + } | |
6480 | + | |
6481 | + if (!blkif->be->group_added) { | |
6482 | + err = xentap_sysfs_addif(blkif->be->dev); | |
6483 | + if (err) { | |
6484 | + xenbus_dev_fatal(blkif->be->dev, err, | |
6485 | + "creating sysfs entries"); | |
6486 | + return; | |
6487 | + } | |
6488 | + } | |
6489 | + | |
6490 | + blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name); | |
6491 | + if (IS_ERR(blkif->xenblkd)) { | |
6492 | + err = PTR_ERR(blkif->xenblkd); | |
6493 | + blkif->xenblkd = NULL; | |
6494 | + xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd"); | |
6495 | + WPRINTK("Error starting thread\n"); | |
6496 | + } | |
6497 | +} | |
6498 | + | |
6499 | +/** | |
6500 | + * Entry point to this code when a new device is created. Allocate | |
6501 | + * the basic structures, and watch the store waiting for the | |
6502 | + * user-space program to tell us the physical device info. Switch to | |
6503 | + * InitWait. | |
6504 | + */ | |
6505 | +static int blktap_probe(struct xenbus_device *dev, | |
6506 | + const struct xenbus_device_id *id) | |
6507 | +{ | |
6508 | + int err; | |
6509 | + struct backend_info *be = kzalloc(sizeof(struct backend_info), | |
6510 | + GFP_KERNEL); | |
6511 | + if (!be) { | |
6512 | + xenbus_dev_fatal(dev, -ENOMEM, | |
6513 | + "allocating backend structure"); | |
6514 | + return -ENOMEM; | |
6515 | + } | |
6516 | + | |
6517 | + be->dev = dev; | |
6518 | + dev->dev.driver_data = be; | |
6519 | + be->xenbus_id = get_id(dev->nodename); | |
6520 | + | |
6521 | + be->blkif = tap_alloc_blkif(dev->otherend_id); | |
6522 | + if (IS_ERR(be->blkif)) { | |
6523 | + err = PTR_ERR(be->blkif); | |
6524 | + be->blkif = NULL; | |
6525 | + xenbus_dev_fatal(dev, err, "creating block interface"); | |
6526 | + goto fail; | |
6527 | + } | |
6528 | + | |
6529 | + /* setup back pointer */ | |
6530 | + be->blkif->be = be; | |
6531 | + be->blkif->sectors = 0; | |
6532 | + | |
6533 | + /* set a watch on disk info, waiting for userspace to update details*/ | |
6534 | + err = xenbus_watch_path2(dev, dev->nodename, "info", | |
6535 | + &be->backend_watch, tap_backend_changed); | |
6536 | + if (err) | |
6537 | + goto fail; | |
6538 | + | |
6539 | + err = xenbus_switch_state(dev, XenbusStateInitWait); | |
6540 | + if (err) | |
6541 | + goto fail; | |
6542 | + return 0; | |
6543 | + | |
6544 | +fail: | |
6545 | + DPRINTK("blktap probe failed\n"); | |
6546 | + blktap_remove(dev); | |
6547 | + return err; | |
6548 | +} | |
6549 | + | |
6550 | + | |
6551 | +/** | |
6552 | + * Callback received when the user space code has placed the device | |
6553 | + * information in xenstore. | |
6554 | + */ | |
6555 | +static void tap_backend_changed(struct xenbus_watch *watch, | |
6556 | + const char **vec, unsigned int len) | |
6557 | +{ | |
6558 | + int err; | |
6559 | + unsigned long info; | |
6560 | + struct backend_info *be | |
6561 | + = container_of(watch, struct backend_info, backend_watch); | |
6562 | + struct xenbus_device *dev = be->dev; | |
6563 | + | |
6564 | + /** | |
6565 | + * Check to see whether userspace code has opened the image | |
6566 | + * and written sector | |
6567 | + * and disk info to xenstore | |
6568 | + */ | |
6569 | + err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info, | |
6570 | + NULL); | |
6571 | + if (XENBUS_EXIST_ERR(err)) | |
6572 | + return; | |
6573 | + if (err) { | |
6574 | + xenbus_dev_error(dev, err, "getting info"); | |
6575 | + return; | |
6576 | + } | |
6577 | + | |
6578 | + DPRINTK("Userspace update on disk info, %lu\n",info); | |
6579 | + | |
6580 | + err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", | |
6581 | + &be->blkif->sectors, NULL); | |
6582 | + | |
6583 | + /* Associate tap dev with domid*/ | |
6584 | + be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id, | |
6585 | + be->blkif); | |
6586 | + DPRINTK("Thread started for domid [%d], connecting disk\n", | |
6587 | + be->blkif->dev_num); | |
6588 | + | |
6589 | + tap_update_blkif_status(be->blkif); | |
6590 | +} | |
6591 | + | |
6592 | +/** | |
6593 | + * Callback received when the frontend's state changes. | |
6594 | + */ | |
6595 | +static void tap_frontend_changed(struct xenbus_device *dev, | |
6596 | + enum xenbus_state frontend_state) | |
6597 | +{ | |
6598 | + struct backend_info *be = dev->dev.driver_data; | |
6599 | + int err; | |
6600 | + | |
6601 | + DPRINTK("\n"); | |
6602 | + | |
6603 | + switch (frontend_state) { | |
6604 | + case XenbusStateInitialising: | |
6605 | + if (dev->state == XenbusStateClosed) { | |
6606 | + printk(KERN_INFO "%s: %s: prepare for reconnect\n", | |
6607 | + __FUNCTION__, dev->nodename); | |
6608 | + xenbus_switch_state(dev, XenbusStateInitWait); | |
6609 | + } | |
6610 | + break; | |
6611 | + | |
6612 | + case XenbusStateInitialised: | |
6613 | + case XenbusStateConnected: | |
6614 | + /* Ensure we connect even when two watches fire in | |
6615 | + close successsion and we miss the intermediate value | |
6616 | + of frontend_state. */ | |
6617 | + if (dev->state == XenbusStateConnected) | |
6618 | + break; | |
6619 | + | |
6620 | + err = connect_ring(be); | |
6621 | + if (err) | |
6622 | + break; | |
6623 | + tap_update_blkif_status(be->blkif); | |
6624 | + break; | |
6625 | + | |
6626 | + case XenbusStateClosing: | |
6627 | + if (be->blkif->xenblkd) { | |
6628 | + kthread_stop(be->blkif->xenblkd); | |
6629 | + be->blkif->xenblkd = NULL; | |
6630 | + } | |
6631 | + tap_blkif_free(be->blkif); | |
6632 | + xenbus_switch_state(dev, XenbusStateClosing); | |
6633 | + break; | |
6634 | + | |
6635 | + case XenbusStateClosed: | |
6636 | + xenbus_switch_state(dev, XenbusStateClosed); | |
6637 | + if (xenbus_dev_is_online(dev)) | |
6638 | + break; | |
6639 | + /* fall through if not online */ | |
6640 | + case XenbusStateUnknown: | |
6641 | + device_unregister(&dev->dev); | |
6642 | + break; | |
6643 | + | |
6644 | + default: | |
6645 | + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | |
6646 | + frontend_state); | |
6647 | + break; | |
6648 | + } | |
6649 | +} | |
6650 | + | |
6651 | + | |
6652 | +/** | |
6653 | + * Switch to Connected state. | |
6654 | + */ | |
6655 | +static void connect(struct backend_info *be) | |
6656 | +{ | |
6657 | + int err; | |
6658 | + | |
6659 | + struct xenbus_device *dev = be->dev; | |
6660 | + | |
6661 | + err = xenbus_switch_state(dev, XenbusStateConnected); | |
6662 | + if (err) | |
6663 | + xenbus_dev_fatal(dev, err, "switching to Connected state", | |
6664 | + dev->nodename); | |
6665 | + | |
6666 | + return; | |
6667 | +} | |
6668 | + | |
6669 | + | |
6670 | +static int connect_ring(struct backend_info *be) | |
6671 | +{ | |
6672 | + struct xenbus_device *dev = be->dev; | |
6673 | + unsigned long ring_ref; | |
6674 | + unsigned int evtchn; | |
6675 | + char protocol[64]; | |
6676 | + int err; | |
6677 | + | |
6678 | + DPRINTK("%s\n", dev->otherend); | |
6679 | + | |
6680 | + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", | |
6681 | + &ring_ref, "event-channel", "%u", &evtchn, NULL); | |
6682 | + if (err) { | |
6683 | + xenbus_dev_fatal(dev, err, | |
6684 | + "reading %s/ring-ref and event-channel", | |
6685 | + dev->otherend); | |
6686 | + return err; | |
6687 | + } | |
6688 | + | |
6689 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | |
6690 | + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | |
6691 | + "%63s", protocol, NULL); | |
6692 | + if (err) | |
6693 | + strcpy(protocol, "unspecified, assuming native"); | |
6694 | + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | |
6695 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | |
6696 | + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | |
6697 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | |
6698 | + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | |
6699 | + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | |
6700 | + else { | |
6701 | + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | |
6702 | + return -1; | |
6703 | + } | |
6704 | + printk(KERN_INFO | |
6705 | + "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", | |
6706 | + ring_ref, evtchn, be->blkif->blk_protocol, protocol); | |
6707 | + | |
6708 | + /* Map the shared frame, irq etc. */ | |
6709 | + err = tap_blkif_map(be->blkif, ring_ref, evtchn); | |
6710 | + if (err) { | |
6711 | + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", | |
6712 | + ring_ref, evtchn); | |
6713 | + return err; | |
6714 | + } | |
6715 | + | |
6716 | + return 0; | |
6717 | +} | |
6718 | + | |
6719 | + | |
6720 | +/* ** Driver Registration ** */ | |
6721 | + | |
6722 | + | |
6723 | +static const struct xenbus_device_id blktap_ids[] = { | |
6724 | + { "tap" }, | |
6725 | + { "" } | |
6726 | +}; | |
6727 | + | |
6728 | + | |
6729 | +static struct xenbus_driver blktap = { | |
6730 | + .name = "tap", | |
6731 | + .owner = THIS_MODULE, | |
6732 | + .ids = blktap_ids, | |
6733 | + .probe = blktap_probe, | |
6734 | + .remove = blktap_remove, | |
6735 | + .otherend_changed = tap_frontend_changed | |
6736 | +}; | |
6737 | + | |
6738 | + | |
6739 | +void tap_blkif_xenbus_init(void) | |
6740 | +{ | |
6741 | + xenbus_register_backend(&blktap); | |
6742 | +} | |
6743 | Index: head-2008-11-25/drivers/xen/char/Makefile | |
6744 | =================================================================== | |
6745 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
6746 | +++ head-2008-11-25/drivers/xen/char/Makefile 2007-07-10 09:42:30.000000000 +0200 | |
6747 | @@ -0,0 +1 @@ | |
6748 | +obj-$(CONFIG_XEN_DEVMEM) := mem.o | |
6749 | Index: head-2008-11-25/drivers/xen/char/mem.c | |
6750 | =================================================================== | |
6751 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
6752 | +++ head-2008-11-25/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200 | |
6753 | @@ -0,0 +1,190 @@ | |
6754 | +/* | |
6755 | + * Originally from linux/drivers/char/mem.c | |
6756 | + * | |
6757 | + * Copyright (C) 1991, 1992 Linus Torvalds | |
6758 | + * | |
6759 | + * Added devfs support. | |
6760 | + * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu> | |
6761 | + * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com> | |
6762 | + */ | |
6763 | + | |
6764 | +#include <linux/mm.h> | |
6765 | +#include <linux/miscdevice.h> | |
6766 | +#include <linux/slab.h> | |
6767 | +#include <linux/vmalloc.h> | |
6768 | +#include <linux/mman.h> | |
6769 | +#include <linux/random.h> | |
6770 | +#include <linux/init.h> | |
6771 | +#include <linux/raw.h> | |
6772 | +#include <linux/tty.h> | |
6773 | +#include <linux/capability.h> | |
6774 | +#include <linux/smp_lock.h> | |
6775 | +#include <linux/ptrace.h> | |
6776 | +#include <linux/device.h> | |
6777 | +#include <asm/pgalloc.h> | |
6778 | +#include <asm/uaccess.h> | |
6779 | +#include <asm/io.h> | |
6780 | +#include <asm/hypervisor.h> | |
6781 | + | |
6782 | +static inline int uncached_access(struct file *file) | |
6783 | +{ | |
6784 | + if (file->f_flags & O_SYNC) | |
6785 | + return 1; | |
6786 | + /* Xen sets correct MTRR type on non-RAM for us. */ | |
6787 | + return 0; | |
6788 | +} | |
6789 | + | |
6790 | +/* | |
6791 | + * This funcion reads the *physical* memory. The f_pos points directly to the | |
6792 | + * memory location. | |
6793 | + */ | |
6794 | +static ssize_t read_mem(struct file * file, char __user * buf, | |
6795 | + size_t count, loff_t *ppos) | |
6796 | +{ | |
6797 | + unsigned long p = *ppos, ignored; | |
6798 | + ssize_t read = 0, sz; | |
6799 | + void __iomem *v; | |
6800 | + | |
6801 | + while (count > 0) { | |
6802 | + /* | |
6803 | + * Handle first page in case it's not aligned | |
6804 | + */ | |
6805 | + if (-p & (PAGE_SIZE - 1)) | |
6806 | + sz = -p & (PAGE_SIZE - 1); | |
6807 | + else | |
6808 | + sz = PAGE_SIZE; | |
6809 | + | |
6810 | + sz = min_t(unsigned long, sz, count); | |
6811 | + | |
6812 | + v = ioremap(p, sz); | |
6813 | + if (IS_ERR(v) || v == NULL) { | |
6814 | + /* | |
6815 | + * Some programs (e.g., dmidecode) groove off into | |
6816 | + * weird RAM areas where no tables can possibly exist | |
6817 | + * (because Xen will have stomped on them!). These | |
6818 | + * programs get rather upset if we let them know that | |
6819 | + * Xen failed their access, so we fake out a read of | |
6820 | + * all zeroes. | |
6821 | + */ | |
6822 | + if (clear_user(buf, count)) | |
6823 | + return -EFAULT; | |
6824 | + read += count; | |
6825 | + break; | |
6826 | + } | |
6827 | + | |
6828 | + ignored = copy_to_user(buf, v, sz); | |
6829 | + iounmap(v); | |
6830 | + if (ignored) | |
6831 | + return -EFAULT; | |
6832 | + buf += sz; | |
6833 | + p += sz; | |
6834 | + count -= sz; | |
6835 | + read += sz; | |
6836 | + } | |
6837 | + | |
6838 | + *ppos += read; | |
6839 | + return read; | |
6840 | +} | |
6841 | + | |
6842 | +static ssize_t write_mem(struct file * file, const char __user * buf, | |
6843 | + size_t count, loff_t *ppos) | |
6844 | +{ | |
6845 | + unsigned long p = *ppos, ignored; | |
6846 | + ssize_t written = 0, sz; | |
6847 | + void __iomem *v; | |
6848 | + | |
6849 | + while (count > 0) { | |
6850 | + /* | |
6851 | + * Handle first page in case it's not aligned | |
6852 | + */ | |
6853 | + if (-p & (PAGE_SIZE - 1)) | |
6854 | + sz = -p & (PAGE_SIZE - 1); | |
6855 | + else | |
6856 | + sz = PAGE_SIZE; | |
6857 | + | |
6858 | + sz = min_t(unsigned long, sz, count); | |
6859 | + | |
6860 | + v = ioremap(p, sz); | |
6861 | + if (v == NULL) | |
6862 | + break; | |
6863 | + if (IS_ERR(v)) { | |
6864 | + if (written == 0) | |
6865 | + return PTR_ERR(v); | |
6866 | + break; | |
6867 | + } | |
6868 | + | |
6869 | + ignored = copy_from_user(v, buf, sz); | |
6870 | + iounmap(v); | |
6871 | + if (ignored) { | |
6872 | + written += sz - ignored; | |
6873 | + if (written) | |
6874 | + break; | |
6875 | + return -EFAULT; | |
6876 | + } | |
6877 | + buf += sz; | |
6878 | + p += sz; | |
6879 | + count -= sz; | |
6880 | + written += sz; | |
6881 | + } | |
6882 | + | |
6883 | + *ppos += written; | |
6884 | + return written; | |
6885 | +} | |
6886 | + | |
6887 | +#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM | |
6888 | +static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma) | |
6889 | +{ | |
6890 | + size_t size = vma->vm_end - vma->vm_start; | |
6891 | + | |
6892 | + if (uncached_access(file)) | |
6893 | + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | |
6894 | + | |
6895 | + /* We want to return the real error code, not EAGAIN. */ | |
6896 | + return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, | |
6897 | + size, vma->vm_page_prot, DOMID_IO); | |
6898 | +} | |
6899 | +#endif | |
6900 | + | |
6901 | +/* | |
6902 | + * The memory devices use the full 32/64 bits of the offset, and so we cannot | |
6903 | + * check against negative addresses: they are ok. The return value is weird, | |
6904 | + * though, in that case (0). | |
6905 | + * | |
6906 | + * also note that seeking relative to the "end of file" isn't supported: | |
6907 | + * it has no meaning, so it returns -EINVAL. | |
6908 | + */ | |
6909 | +static loff_t memory_lseek(struct file * file, loff_t offset, int orig) | |
6910 | +{ | |
6911 | + loff_t ret; | |
6912 | + | |
6913 | + mutex_lock(&file->f_dentry->d_inode->i_mutex); | |
6914 | + switch (orig) { | |
6915 | + case 0: | |
6916 | + file->f_pos = offset; | |
6917 | + ret = file->f_pos; | |
6918 | + force_successful_syscall_return(); | |
6919 | + break; | |
6920 | + case 1: | |
6921 | + file->f_pos += offset; | |
6922 | + ret = file->f_pos; | |
6923 | + force_successful_syscall_return(); | |
6924 | + break; | |
6925 | + default: | |
6926 | + ret = -EINVAL; | |
6927 | + } | |
6928 | + mutex_unlock(&file->f_dentry->d_inode->i_mutex); | |
6929 | + return ret; | |
6930 | +} | |
6931 | + | |
6932 | +static int open_mem(struct inode * inode, struct file * filp) | |
6933 | +{ | |
6934 | + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | |
6935 | +} | |
6936 | + | |
6937 | +const struct file_operations mem_fops = { | |
6938 | + .llseek = memory_lseek, | |
6939 | + .read = read_mem, | |
6940 | + .write = write_mem, | |
6941 | + .mmap = xen_mmap_mem, | |
6942 | + .open = open_mem, | |
6943 | +}; | |
6944 | Index: head-2008-11-25/drivers/xen/console/Makefile | |
6945 | =================================================================== | |
6946 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
6947 | +++ head-2008-11-25/drivers/xen/console/Makefile 2007-06-12 13:13:44.000000000 +0200 | |
6948 | @@ -0,0 +1,2 @@ | |
6949 | + | |
6950 | +obj-y := console.o xencons_ring.o | |
6951 | Index: head-2008-11-25/drivers/xen/console/console.c | |
6952 | =================================================================== | |
6953 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
6954 | +++ head-2008-11-25/drivers/xen/console/console.c 2007-10-15 09:39:38.000000000 +0200 | |
6955 | @@ -0,0 +1,731 @@ | |
6956 | +/****************************************************************************** | |
6957 | + * console.c | |
6958 | + * | |
6959 | + * Virtual console driver. | |
6960 | + * | |
6961 | + * Copyright (c) 2002-2004, K A Fraser. | |
6962 | + * | |
6963 | + * This program is free software; you can redistribute it and/or | |
6964 | + * modify it under the terms of the GNU General Public License version 2 | |
6965 | + * as published by the Free Software Foundation; or, when distributed | |
6966 | + * separately from the Linux kernel or incorporated into other | |
6967 | + * software packages, subject to the following license: | |
6968 | + * | |
6969 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
6970 | + * of this source file (the "Software"), to deal in the Software without | |
6971 | + * restriction, including without limitation the rights to use, copy, modify, | |
6972 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
6973 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
6974 | + * the following conditions: | |
6975 | + * | |
6976 | + * The above copyright notice and this permission notice shall be included in | |
6977 | + * all copies or substantial portions of the Software. | |
6978 | + * | |
6979 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
6980 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
6981 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
6982 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
6983 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
6984 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
6985 | + * IN THE SOFTWARE. | |
6986 | + */ | |
6987 | + | |
6988 | +#include <linux/version.h> | |
6989 | +#include <linux/module.h> | |
6990 | +#include <linux/errno.h> | |
6991 | +#include <linux/signal.h> | |
6992 | +#include <linux/sched.h> | |
6993 | +#include <linux/interrupt.h> | |
6994 | +#include <linux/tty.h> | |
6995 | +#include <linux/tty_flip.h> | |
6996 | +#include <linux/serial.h> | |
6997 | +#include <linux/major.h> | |
6998 | +#include <linux/ptrace.h> | |
6999 | +#include <linux/ioport.h> | |
7000 | +#include <linux/mm.h> | |
7001 | +#include <linux/slab.h> | |
7002 | +#include <linux/init.h> | |
7003 | +#include <linux/console.h> | |
7004 | +#include <linux/bootmem.h> | |
7005 | +#include <linux/sysrq.h> | |
7006 | +#include <linux/screen_info.h> | |
7007 | +#include <linux/vt.h> | |
7008 | +#include <asm/io.h> | |
7009 | +#include <asm/irq.h> | |
7010 | +#include <asm/uaccess.h> | |
7011 | +#include <xen/interface/xen.h> | |
7012 | +#include <xen/interface/event_channel.h> | |
7013 | +#include <asm/hypervisor.h> | |
7014 | +#include <xen/evtchn.h> | |
7015 | +#include <xen/xenbus.h> | |
7016 | +#include <xen/xencons.h> | |
7017 | + | |
7018 | +/* | |
7019 | + * Modes: | |
7020 | + * 'xencons=off' [XC_OFF]: Console is disabled. | |
7021 | + * 'xencons=tty' [XC_TTY]: Console attached to '/dev/tty[0-9]+'. | |
7022 | + * 'xencons=ttyS' [XC_SERIAL]: Console attached to '/dev/ttyS[0-9]+'. | |
7023 | + * 'xencons=xvc' [XC_XVC]: Console attached to '/dev/xvc0'. | |
7024 | + * default: XC_XVC | |
7025 | + * | |
7026 | + * NB. In mode XC_TTY, we create dummy consoles for tty2-63. This suppresses | |
7027 | + * warnings from standard distro startup scripts. | |
7028 | + */ | |
7029 | +static enum { | |
7030 | + XC_OFF, XC_TTY, XC_SERIAL, XC_XVC | |
7031 | +} xc_mode = XC_XVC; | |
7032 | +static int xc_num = -1; | |
7033 | + | |
7034 | +/* /dev/xvc0 device number allocated by lanana.org. */ | |
7035 | +#define XEN_XVC_MAJOR 204 | |
7036 | +#define XEN_XVC_MINOR 191 | |
7037 | + | |
7038 | +#ifdef CONFIG_MAGIC_SYSRQ | |
7039 | +static unsigned long sysrq_requested; | |
7040 | +extern int sysrq_enabled; | |
7041 | +#endif | |
7042 | + | |
7043 | +static int __init xencons_setup(char *str) | |
7044 | +{ | |
7045 | + char *q; | |
7046 | + int n; | |
7047 | + extern int console_use_vt; | |
7048 | + | |
7049 | + console_use_vt = 1; | |
7050 | + if (!strncmp(str, "ttyS", 4)) { | |
7051 | + xc_mode = XC_SERIAL; | |
7052 | + str += 4; | |
7053 | + } else if (!strncmp(str, "tty", 3)) { | |
7054 | + xc_mode = XC_TTY; | |
7055 | + str += 3; | |
7056 | + console_use_vt = 0; | |
7057 | + } else if (!strncmp(str, "xvc", 3)) { | |
7058 | + xc_mode = XC_XVC; | |
7059 | + str += 3; | |
7060 | + } else if (!strncmp(str, "off", 3)) { | |
7061 | + xc_mode = XC_OFF; | |
7062 | + str += 3; | |
7063 | + } | |
7064 | + | |
7065 | + n = simple_strtol(str, &q, 10); | |
7066 | + if (q != str) | |
7067 | + xc_num = n; | |
7068 | + | |
7069 | + return 1; | |
7070 | +} | |
7071 | +__setup("xencons=", xencons_setup); | |
7072 | + | |
7073 | +/* The kernel and user-land drivers share a common transmit buffer. */ | |
7074 | +static unsigned int wbuf_size = 4096; | |
7075 | +#define WBUF_MASK(_i) ((_i)&(wbuf_size-1)) | |
7076 | +static char *wbuf; | |
7077 | +static unsigned int wc, wp; /* write_cons, write_prod */ | |
7078 | + | |
7079 | +static int __init xencons_bufsz_setup(char *str) | |
7080 | +{ | |
7081 | + unsigned int goal; | |
7082 | + goal = simple_strtoul(str, NULL, 0); | |
7083 | + if (goal) { | |
7084 | + goal = roundup_pow_of_two(goal); | |
7085 | + if (wbuf_size < goal) | |
7086 | + wbuf_size = goal; | |
7087 | + } | |
7088 | + return 1; | |
7089 | +} | |
7090 | +__setup("xencons_bufsz=", xencons_bufsz_setup); | |
7091 | + | |
7092 | +/* This lock protects accesses to the common transmit buffer. */ | |
7093 | +static DEFINE_SPINLOCK(xencons_lock); | |
7094 | + | |
7095 | +/* Common transmit-kick routine. */ | |
7096 | +static void __xencons_tx_flush(void); | |
7097 | + | |
7098 | +static struct tty_driver *xencons_driver; | |
7099 | + | |
7100 | +/******************** Kernel console driver ********************************/ | |
7101 | + | |
7102 | +static void kcons_write(struct console *c, const char *s, unsigned int count) | |
7103 | +{ | |
7104 | + int i = 0; | |
7105 | + unsigned long flags; | |
7106 | + | |
7107 | + spin_lock_irqsave(&xencons_lock, flags); | |
7108 | + | |
7109 | + while (i < count) { | |
7110 | + for (; i < count; i++) { | |
7111 | + if ((wp - wc) >= (wbuf_size - 1)) | |
7112 | + break; | |
7113 | + if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n') | |
7114 | + wbuf[WBUF_MASK(wp++)] = '\r'; | |
7115 | + } | |
7116 | + | |
7117 | + __xencons_tx_flush(); | |
7118 | + } | |
7119 | + | |
7120 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7121 | +} | |
7122 | + | |
7123 | +static void kcons_write_dom0(struct console *c, const char *s, unsigned int count) | |
7124 | +{ | |
7125 | + | |
7126 | + while (count > 0) { | |
7127 | + int rc; | |
7128 | + rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s); | |
7129 | + if (rc <= 0) | |
7130 | + break; | |
7131 | + count -= rc; | |
7132 | + s += rc; | |
7133 | + } | |
7134 | +} | |
7135 | + | |
7136 | +static struct tty_driver *kcons_device(struct console *c, int *index) | |
7137 | +{ | |
7138 | + *index = 0; | |
7139 | + return xencons_driver; | |
7140 | +} | |
7141 | + | |
7142 | +static struct console kcons_info = { | |
7143 | + .device = kcons_device, | |
7144 | + .flags = CON_PRINTBUFFER | CON_ENABLED, | |
7145 | + .index = -1, | |
7146 | +}; | |
7147 | + | |
7148 | +static int __init xen_console_init(void) | |
7149 | +{ | |
7150 | + if (!is_running_on_xen()) | |
7151 | + goto out; | |
7152 | + | |
7153 | + if (is_initial_xendomain()) { | |
7154 | + kcons_info.write = kcons_write_dom0; | |
7155 | + } else { | |
7156 | + if (!xen_start_info->console.domU.evtchn) | |
7157 | + goto out; | |
7158 | + kcons_info.write = kcons_write; | |
7159 | + } | |
7160 | + | |
7161 | + switch (xc_mode) { | |
7162 | + case XC_XVC: | |
7163 | + strcpy(kcons_info.name, "xvc"); | |
7164 | + if (xc_num == -1) | |
7165 | + xc_num = 0; | |
7166 | + break; | |
7167 | + | |
7168 | + case XC_SERIAL: | |
7169 | + strcpy(kcons_info.name, "ttyS"); | |
7170 | + if (xc_num == -1) | |
7171 | + xc_num = 0; | |
7172 | + break; | |
7173 | + | |
7174 | + case XC_TTY: | |
7175 | + strcpy(kcons_info.name, "tty"); | |
7176 | + if (xc_num == -1) | |
7177 | + xc_num = 1; | |
7178 | + break; | |
7179 | + | |
7180 | + default: | |
7181 | + goto out; | |
7182 | + } | |
7183 | + | |
7184 | + wbuf = alloc_bootmem(wbuf_size); | |
7185 | + | |
7186 | + register_console(&kcons_info); | |
7187 | + | |
7188 | + out: | |
7189 | + return 0; | |
7190 | +} | |
7191 | +console_initcall(xen_console_init); | |
7192 | + | |
7193 | +/*** Useful function for console debugging -- goes straight to Xen. ***/ | |
7194 | +asmlinkage int xprintk(const char *fmt, ...) | |
7195 | +{ | |
7196 | + va_list args; | |
7197 | + int printk_len; | |
7198 | + static char printk_buf[1024]; | |
7199 | + | |
7200 | + /* Emit the output into the temporary buffer */ | |
7201 | + va_start(args, fmt); | |
7202 | + printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); | |
7203 | + va_end(args); | |
7204 | + | |
7205 | + /* Send the processed output directly to Xen. */ | |
7206 | + kcons_write_dom0(NULL, printk_buf, printk_len); | |
7207 | + | |
7208 | + return 0; | |
7209 | +} | |
7210 | + | |
7211 | +/*** Forcibly flush console data before dying. ***/ | |
7212 | +void xencons_force_flush(void) | |
7213 | +{ | |
7214 | + int sz; | |
7215 | + | |
7216 | + /* Emergency console is synchronous, so there's nothing to flush. */ | |
7217 | + if (!is_running_on_xen() || | |
7218 | + is_initial_xendomain() || | |
7219 | + !xen_start_info->console.domU.evtchn) | |
7220 | + return; | |
7221 | + | |
7222 | + /* Spin until console data is flushed through to the daemon. */ | |
7223 | + while (wc != wp) { | |
7224 | + int sent = 0; | |
7225 | + if ((sz = wp - wc) == 0) | |
7226 | + continue; | |
7227 | + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); | |
7228 | + if (sent > 0) | |
7229 | + wc += sent; | |
7230 | + } | |
7231 | +} | |
7232 | + | |
7233 | + | |
7234 | +void __init dom0_init_screen_info(const struct dom0_vga_console_info *info, size_t size) | |
7235 | +{ | |
7236 | + /* This is drawn from a dump from vgacon:startup in | |
7237 | + * standard Linux. */ | |
7238 | + screen_info.orig_video_mode = 3; | |
7239 | + screen_info.orig_video_isVGA = 1; | |
7240 | + screen_info.orig_video_lines = 25; | |
7241 | + screen_info.orig_video_cols = 80; | |
7242 | + screen_info.orig_video_ega_bx = 3; | |
7243 | + screen_info.orig_video_points = 16; | |
7244 | + screen_info.orig_y = screen_info.orig_video_lines - 1; | |
7245 | + | |
7246 | + switch (info->video_type) { | |
7247 | + case XEN_VGATYPE_TEXT_MODE_3: | |
7248 | + if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3) | |
7249 | + + sizeof(info->u.text_mode_3)) | |
7250 | + break; | |
7251 | + screen_info.orig_video_lines = info->u.text_mode_3.rows; | |
7252 | + screen_info.orig_video_cols = info->u.text_mode_3.columns; | |
7253 | + screen_info.orig_x = info->u.text_mode_3.cursor_x; | |
7254 | + screen_info.orig_y = info->u.text_mode_3.cursor_y; | |
7255 | + screen_info.orig_video_points = | |
7256 | + info->u.text_mode_3.font_height; | |
7257 | + break; | |
7258 | + | |
7259 | + case XEN_VGATYPE_VESA_LFB: | |
7260 | + if (size < offsetof(struct dom0_vga_console_info, | |
7261 | + u.vesa_lfb.gbl_caps)) | |
7262 | + break; | |
7263 | + screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB; | |
7264 | + screen_info.lfb_width = info->u.vesa_lfb.width; | |
7265 | + screen_info.lfb_height = info->u.vesa_lfb.height; | |
7266 | + screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel; | |
7267 | + screen_info.lfb_base = info->u.vesa_lfb.lfb_base; | |
7268 | + screen_info.lfb_size = info->u.vesa_lfb.lfb_size; | |
7269 | + screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line; | |
7270 | + screen_info.red_size = info->u.vesa_lfb.red_size; | |
7271 | + screen_info.red_pos = info->u.vesa_lfb.red_pos; | |
7272 | + screen_info.green_size = info->u.vesa_lfb.green_size; | |
7273 | + screen_info.green_pos = info->u.vesa_lfb.green_pos; | |
7274 | + screen_info.blue_size = info->u.vesa_lfb.blue_size; | |
7275 | + screen_info.blue_pos = info->u.vesa_lfb.blue_pos; | |
7276 | + screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size; | |
7277 | + screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos; | |
7278 | + if (size >= offsetof(struct dom0_vga_console_info, | |
7279 | + u.vesa_lfb.gbl_caps) | |
7280 | + + sizeof(info->u.vesa_lfb.gbl_caps)) | |
7281 | + screen_info.capabilities = info->u.vesa_lfb.gbl_caps; | |
7282 | + if (size >= offsetof(struct dom0_vga_console_info, | |
7283 | + u.vesa_lfb.mode_attrs) | |
7284 | + + sizeof(info->u.vesa_lfb.mode_attrs)) | |
7285 | + screen_info.vesa_attributes = info->u.vesa_lfb.mode_attrs; | |
7286 | + break; | |
7287 | + } | |
7288 | +} | |
7289 | + | |
7290 | + | |
7291 | +/******************** User-space console driver (/dev/console) ************/ | |
7292 | + | |
7293 | +#define DRV(_d) (_d) | |
7294 | +#define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \ | |
7295 | + ((_tty)->index != (xc_num - 1))) | |
7296 | + | |
7297 | +static struct termios *xencons_termios[MAX_NR_CONSOLES]; | |
7298 | +static struct termios *xencons_termios_locked[MAX_NR_CONSOLES]; | |
7299 | +static struct tty_struct *xencons_tty; | |
7300 | +static int xencons_priv_irq; | |
7301 | +static char x_char; | |
7302 | + | |
7303 | +void xencons_rx(char *buf, unsigned len, struct pt_regs *regs) | |
7304 | +{ | |
7305 | + int i; | |
7306 | + unsigned long flags; | |
7307 | + | |
7308 | + spin_lock_irqsave(&xencons_lock, flags); | |
7309 | + if (xencons_tty == NULL) | |
7310 | + goto out; | |
7311 | + | |
7312 | + for (i = 0; i < len; i++) { | |
7313 | +#ifdef CONFIG_MAGIC_SYSRQ | |
7314 | + if (sysrq_enabled) { | |
7315 | + if (buf[i] == '\x0f') { /* ^O */ | |
7316 | + if (!sysrq_requested) { | |
7317 | + sysrq_requested = jiffies; | |
7318 | + continue; /* don't print sysrq key */ | |
7319 | + } | |
7320 | + sysrq_requested = 0; | |
7321 | + } else if (sysrq_requested) { | |
7322 | + unsigned long sysrq_timeout = | |
7323 | + sysrq_requested + HZ*2; | |
7324 | + sysrq_requested = 0; | |
7325 | + if (time_before(jiffies, sysrq_timeout)) { | |
7326 | + spin_unlock_irqrestore( | |
7327 | + &xencons_lock, flags); | |
7328 | + handle_sysrq( | |
7329 | + buf[i], regs, xencons_tty); | |
7330 | + spin_lock_irqsave( | |
7331 | + &xencons_lock, flags); | |
7332 | + continue; | |
7333 | + } | |
7334 | + } | |
7335 | + } | |
7336 | +#endif | |
7337 | + tty_insert_flip_char(xencons_tty, buf[i], 0); | |
7338 | + } | |
7339 | + tty_flip_buffer_push(xencons_tty); | |
7340 | + | |
7341 | + out: | |
7342 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7343 | +} | |
7344 | + | |
7345 | +static void __xencons_tx_flush(void) | |
7346 | +{ | |
7347 | + int sent, sz, work_done = 0; | |
7348 | + | |
7349 | + if (x_char) { | |
7350 | + if (is_initial_xendomain()) | |
7351 | + kcons_write_dom0(NULL, &x_char, 1); | |
7352 | + else | |
7353 | + while (x_char) | |
7354 | + if (xencons_ring_send(&x_char, 1) == 1) | |
7355 | + break; | |
7356 | + x_char = 0; | |
7357 | + work_done = 1; | |
7358 | + } | |
7359 | + | |
7360 | + while (wc != wp) { | |
7361 | + sz = wp - wc; | |
7362 | + if (sz > (wbuf_size - WBUF_MASK(wc))) | |
7363 | + sz = wbuf_size - WBUF_MASK(wc); | |
7364 | + if (is_initial_xendomain()) { | |
7365 | + kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz); | |
7366 | + wc += sz; | |
7367 | + } else { | |
7368 | + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); | |
7369 | + if (sent == 0) | |
7370 | + break; | |
7371 | + wc += sent; | |
7372 | + } | |
7373 | + work_done = 1; | |
7374 | + } | |
7375 | + | |
7376 | + if (work_done && (xencons_tty != NULL)) { | |
7377 | + wake_up_interruptible(&xencons_tty->write_wait); | |
7378 | + if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && | |
7379 | + (xencons_tty->ldisc.write_wakeup != NULL)) | |
7380 | + (xencons_tty->ldisc.write_wakeup)(xencons_tty); | |
7381 | + } | |
7382 | +} | |
7383 | + | |
7384 | +void xencons_tx(void) | |
7385 | +{ | |
7386 | + unsigned long flags; | |
7387 | + | |
7388 | + spin_lock_irqsave(&xencons_lock, flags); | |
7389 | + __xencons_tx_flush(); | |
7390 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7391 | +} | |
7392 | + | |
7393 | +/* Privileged receive callback and transmit kicker. */ | |
7394 | +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id, | |
7395 | + struct pt_regs *regs) | |
7396 | +{ | |
7397 | + static char rbuf[16]; | |
7398 | + int l; | |
7399 | + | |
7400 | + while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0) | |
7401 | + xencons_rx(rbuf, l, regs); | |
7402 | + | |
7403 | + xencons_tx(); | |
7404 | + | |
7405 | + return IRQ_HANDLED; | |
7406 | +} | |
7407 | + | |
7408 | +static int xencons_write_room(struct tty_struct *tty) | |
7409 | +{ | |
7410 | + return wbuf_size - (wp - wc); | |
7411 | +} | |
7412 | + | |
7413 | +static int xencons_chars_in_buffer(struct tty_struct *tty) | |
7414 | +{ | |
7415 | + return wp - wc; | |
7416 | +} | |
7417 | + | |
7418 | +static void xencons_send_xchar(struct tty_struct *tty, char ch) | |
7419 | +{ | |
7420 | + unsigned long flags; | |
7421 | + | |
7422 | + if (DUMMY_TTY(tty)) | |
7423 | + return; | |
7424 | + | |
7425 | + spin_lock_irqsave(&xencons_lock, flags); | |
7426 | + x_char = ch; | |
7427 | + __xencons_tx_flush(); | |
7428 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7429 | +} | |
7430 | + | |
7431 | +static void xencons_throttle(struct tty_struct *tty) | |
7432 | +{ | |
7433 | + if (DUMMY_TTY(tty)) | |
7434 | + return; | |
7435 | + | |
7436 | + if (I_IXOFF(tty)) | |
7437 | + xencons_send_xchar(tty, STOP_CHAR(tty)); | |
7438 | +} | |
7439 | + | |
7440 | +static void xencons_unthrottle(struct tty_struct *tty) | |
7441 | +{ | |
7442 | + if (DUMMY_TTY(tty)) | |
7443 | + return; | |
7444 | + | |
7445 | + if (I_IXOFF(tty)) { | |
7446 | + if (x_char != 0) | |
7447 | + x_char = 0; | |
7448 | + else | |
7449 | + xencons_send_xchar(tty, START_CHAR(tty)); | |
7450 | + } | |
7451 | +} | |
7452 | + | |
7453 | +static void xencons_flush_buffer(struct tty_struct *tty) | |
7454 | +{ | |
7455 | + unsigned long flags; | |
7456 | + | |
7457 | + if (DUMMY_TTY(tty)) | |
7458 | + return; | |
7459 | + | |
7460 | + spin_lock_irqsave(&xencons_lock, flags); | |
7461 | + wc = wp = 0; | |
7462 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7463 | +} | |
7464 | + | |
7465 | +static inline int __xencons_put_char(int ch) | |
7466 | +{ | |
7467 | + char _ch = (char)ch; | |
7468 | + if ((wp - wc) == wbuf_size) | |
7469 | + return 0; | |
7470 | + wbuf[WBUF_MASK(wp++)] = _ch; | |
7471 | + return 1; | |
7472 | +} | |
7473 | + | |
7474 | +static int xencons_write( | |
7475 | + struct tty_struct *tty, | |
7476 | + const unsigned char *buf, | |
7477 | + int count) | |
7478 | +{ | |
7479 | + int i; | |
7480 | + unsigned long flags; | |
7481 | + | |
7482 | + if (DUMMY_TTY(tty)) | |
7483 | + return count; | |
7484 | + | |
7485 | + spin_lock_irqsave(&xencons_lock, flags); | |
7486 | + | |
7487 | + for (i = 0; i < count; i++) | |
7488 | + if (!__xencons_put_char(buf[i])) | |
7489 | + break; | |
7490 | + | |
7491 | + if (i != 0) | |
7492 | + __xencons_tx_flush(); | |
7493 | + | |
7494 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7495 | + | |
7496 | + return i; | |
7497 | +} | |
7498 | + | |
7499 | +static void xencons_put_char(struct tty_struct *tty, u_char ch) | |
7500 | +{ | |
7501 | + unsigned long flags; | |
7502 | + | |
7503 | + if (DUMMY_TTY(tty)) | |
7504 | + return; | |
7505 | + | |
7506 | + spin_lock_irqsave(&xencons_lock, flags); | |
7507 | + (void)__xencons_put_char(ch); | |
7508 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7509 | +} | |
7510 | + | |
7511 | +static void xencons_flush_chars(struct tty_struct *tty) | |
7512 | +{ | |
7513 | + unsigned long flags; | |
7514 | + | |
7515 | + if (DUMMY_TTY(tty)) | |
7516 | + return; | |
7517 | + | |
7518 | + spin_lock_irqsave(&xencons_lock, flags); | |
7519 | + __xencons_tx_flush(); | |
7520 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7521 | +} | |
7522 | + | |
7523 | +static void xencons_wait_until_sent(struct tty_struct *tty, int timeout) | |
7524 | +{ | |
7525 | + unsigned long orig_jiffies = jiffies; | |
7526 | + | |
7527 | + if (DUMMY_TTY(tty)) | |
7528 | + return; | |
7529 | + | |
7530 | + while (DRV(tty->driver)->chars_in_buffer(tty)) { | |
7531 | + set_current_state(TASK_INTERRUPTIBLE); | |
7532 | + schedule_timeout(1); | |
7533 | + if (signal_pending(current)) | |
7534 | + break; | |
7535 | + if (timeout && time_after(jiffies, orig_jiffies + timeout)) | |
7536 | + break; | |
7537 | + } | |
7538 | + | |
7539 | + set_current_state(TASK_RUNNING); | |
7540 | +} | |
7541 | + | |
7542 | +static int xencons_open(struct tty_struct *tty, struct file *filp) | |
7543 | +{ | |
7544 | + unsigned long flags; | |
7545 | + | |
7546 | + if (DUMMY_TTY(tty)) | |
7547 | + return 0; | |
7548 | + | |
7549 | + spin_lock_irqsave(&xencons_lock, flags); | |
7550 | + tty->driver_data = NULL; | |
7551 | + if (xencons_tty == NULL) | |
7552 | + xencons_tty = tty; | |
7553 | + __xencons_tx_flush(); | |
7554 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7555 | + | |
7556 | + return 0; | |
7557 | +} | |
7558 | + | |
7559 | +static void xencons_close(struct tty_struct *tty, struct file *filp) | |
7560 | +{ | |
7561 | + unsigned long flags; | |
7562 | + | |
7563 | + if (DUMMY_TTY(tty)) | |
7564 | + return; | |
7565 | + | |
7566 | + mutex_lock(&tty_mutex); | |
7567 | + | |
7568 | + if (tty->count != 1) { | |
7569 | + mutex_unlock(&tty_mutex); | |
7570 | + return; | |
7571 | + } | |
7572 | + | |
7573 | + /* Prevent other threads from re-opening this tty. */ | |
7574 | + set_bit(TTY_CLOSING, &tty->flags); | |
7575 | + mutex_unlock(&tty_mutex); | |
7576 | + | |
7577 | + tty->closing = 1; | |
7578 | + tty_wait_until_sent(tty, 0); | |
7579 | + if (DRV(tty->driver)->flush_buffer != NULL) | |
7580 | + DRV(tty->driver)->flush_buffer(tty); | |
7581 | + if (tty->ldisc.flush_buffer != NULL) | |
7582 | + tty->ldisc.flush_buffer(tty); | |
7583 | + tty->closing = 0; | |
7584 | + spin_lock_irqsave(&xencons_lock, flags); | |
7585 | + xencons_tty = NULL; | |
7586 | + spin_unlock_irqrestore(&xencons_lock, flags); | |
7587 | +} | |
7588 | + | |
7589 | +static struct tty_operations xencons_ops = { | |
7590 | + .open = xencons_open, | |
7591 | + .close = xencons_close, | |
7592 | + .write = xencons_write, | |
7593 | + .write_room = xencons_write_room, | |
7594 | + .put_char = xencons_put_char, | |
7595 | + .flush_chars = xencons_flush_chars, | |
7596 | + .chars_in_buffer = xencons_chars_in_buffer, | |
7597 | + .send_xchar = xencons_send_xchar, | |
7598 | + .flush_buffer = xencons_flush_buffer, | |
7599 | + .throttle = xencons_throttle, | |
7600 | + .unthrottle = xencons_unthrottle, | |
7601 | + .wait_until_sent = xencons_wait_until_sent, | |
7602 | +}; | |
7603 | + | |
7604 | +static int __init xencons_init(void) | |
7605 | +{ | |
7606 | + int rc; | |
7607 | + | |
7608 | + if (!is_running_on_xen()) | |
7609 | + return -ENODEV; | |
7610 | + | |
7611 | + if (xc_mode == XC_OFF) | |
7612 | + return 0; | |
7613 | + | |
7614 | + if (!is_initial_xendomain()) { | |
7615 | + rc = xencons_ring_init(); | |
7616 | + if (rc) | |
7617 | + return rc; | |
7618 | + } | |
7619 | + | |
7620 | + xencons_driver = alloc_tty_driver((xc_mode == XC_TTY) ? | |
7621 | + MAX_NR_CONSOLES : 1); | |
7622 | + if (xencons_driver == NULL) | |
7623 | + return -ENOMEM; | |
7624 | + | |
7625 | + DRV(xencons_driver)->name = "xencons"; | |
7626 | + DRV(xencons_driver)->major = TTY_MAJOR; | |
7627 | + DRV(xencons_driver)->type = TTY_DRIVER_TYPE_SERIAL; | |
7628 | + DRV(xencons_driver)->subtype = SERIAL_TYPE_NORMAL; | |
7629 | + DRV(xencons_driver)->init_termios = tty_std_termios; | |
7630 | + DRV(xencons_driver)->flags = | |
7631 | + TTY_DRIVER_REAL_RAW | | |
7632 | + TTY_DRIVER_RESET_TERMIOS; | |
7633 | + DRV(xencons_driver)->termios = xencons_termios; | |
7634 | + DRV(xencons_driver)->termios_locked = xencons_termios_locked; | |
7635 | + | |
7636 | + switch (xc_mode) { | |
7637 | + case XC_XVC: | |
7638 | + DRV(xencons_driver)->name = "xvc"; | |
7639 | + DRV(xencons_driver)->major = XEN_XVC_MAJOR; | |
7640 | + DRV(xencons_driver)->minor_start = XEN_XVC_MINOR; | |
7641 | + DRV(xencons_driver)->name_base = xc_num; | |
7642 | + break; | |
7643 | + case XC_SERIAL: | |
7644 | + DRV(xencons_driver)->name = "ttyS"; | |
7645 | + DRV(xencons_driver)->minor_start = 64 + xc_num; | |
7646 | + DRV(xencons_driver)->name_base = xc_num; | |
7647 | + break; | |
7648 | + default: | |
7649 | + DRV(xencons_driver)->name = "tty"; | |
7650 | + DRV(xencons_driver)->minor_start = 1; | |
7651 | + DRV(xencons_driver)->name_base = 1; | |
7652 | + break; | |
7653 | + } | |
7654 | + | |
7655 | + tty_set_operations(xencons_driver, &xencons_ops); | |
7656 | + | |
7657 | + if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) { | |
7658 | + printk("WARNING: Failed to register Xen virtual " | |
7659 | + "console driver as '%s%d'\n", | |
7660 | + DRV(xencons_driver)->name, | |
7661 | + DRV(xencons_driver)->name_base); | |
7662 | + put_tty_driver(xencons_driver); | |
7663 | + xencons_driver = NULL; | |
7664 | + return rc; | |
7665 | + } | |
7666 | + | |
7667 | + if (is_initial_xendomain()) { | |
7668 | + xencons_priv_irq = bind_virq_to_irqhandler( | |
7669 | + VIRQ_CONSOLE, | |
7670 | + 0, | |
7671 | + xencons_priv_interrupt, | |
7672 | + 0, | |
7673 | + "console", | |
7674 | + NULL); | |
7675 | + BUG_ON(xencons_priv_irq < 0); | |
7676 | + } | |
7677 | + | |
7678 | + printk("Xen virtual console successfully installed as %s%d\n", | |
7679 | + DRV(xencons_driver)->name, xc_num); | |
7680 | + | |
7681 | + return 0; | |
7682 | +} | |
7683 | + | |
7684 | +module_init(xencons_init); | |
7685 | + | |
7686 | +MODULE_LICENSE("Dual BSD/GPL"); | |
7687 | Index: head-2008-11-25/drivers/xen/console/xencons_ring.c | |
7688 | =================================================================== | |
7689 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
7690 | +++ head-2008-11-25/drivers/xen/console/xencons_ring.c 2007-06-12 13:13:44.000000000 +0200 | |
7691 | @@ -0,0 +1,143 @@ | |
7692 | +/* | |
7693 | + * This program is free software; you can redistribute it and/or | |
7694 | + * modify it under the terms of the GNU General Public License version 2 | |
7695 | + * as published by the Free Software Foundation; or, when distributed | |
7696 | + * separately from the Linux kernel or incorporated into other | |
7697 | + * software packages, subject to the following license: | |
7698 | + * | |
7699 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7700 | + * of this source file (the "Software"), to deal in the Software without | |
7701 | + * restriction, including without limitation the rights to use, copy, modify, | |
7702 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
7703 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
7704 | + * the following conditions: | |
7705 | + * | |
7706 | + * The above copyright notice and this permission notice shall be included in | |
7707 | + * all copies or substantial portions of the Software. | |
7708 | + * | |
7709 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
7710 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
7711 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
7712 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
7713 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
7714 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
7715 | + * IN THE SOFTWARE. | |
7716 | + */ | |
7717 | + | |
7718 | +#include <linux/version.h> | |
7719 | +#include <linux/module.h> | |
7720 | +#include <linux/errno.h> | |
7721 | +#include <linux/signal.h> | |
7722 | +#include <linux/sched.h> | |
7723 | +#include <linux/interrupt.h> | |
7724 | +#include <linux/tty.h> | |
7725 | +#include <linux/tty_flip.h> | |
7726 | +#include <linux/serial.h> | |
7727 | +#include <linux/major.h> | |
7728 | +#include <linux/ptrace.h> | |
7729 | +#include <linux/ioport.h> | |
7730 | +#include <linux/mm.h> | |
7731 | +#include <linux/slab.h> | |
7732 | + | |
7733 | +#include <asm/hypervisor.h> | |
7734 | +#include <xen/evtchn.h> | |
7735 | +#include <xen/xencons.h> | |
7736 | +#include <linux/wait.h> | |
7737 | +#include <linux/interrupt.h> | |
7738 | +#include <linux/sched.h> | |
7739 | +#include <linux/err.h> | |
7740 | +#include <xen/interface/io/console.h> | |
7741 | + | |
7742 | +static int xencons_irq; | |
7743 | + | |
7744 | +static inline struct xencons_interface *xencons_interface(void) | |
7745 | +{ | |
7746 | + return mfn_to_virt(xen_start_info->console.domU.mfn); | |
7747 | +} | |
7748 | + | |
7749 | +static inline void notify_daemon(void) | |
7750 | +{ | |
7751 | + /* Use evtchn: this is called early, before irq is set up. */ | |
7752 | + notify_remote_via_evtchn(xen_start_info->console.domU.evtchn); | |
7753 | +} | |
7754 | + | |
7755 | +int xencons_ring_send(const char *data, unsigned len) | |
7756 | +{ | |
7757 | + int sent = 0; | |
7758 | + struct xencons_interface *intf = xencons_interface(); | |
7759 | + XENCONS_RING_IDX cons, prod; | |
7760 | + | |
7761 | + cons = intf->out_cons; | |
7762 | + prod = intf->out_prod; | |
7763 | + mb(); | |
7764 | + BUG_ON((prod - cons) > sizeof(intf->out)); | |
7765 | + | |
7766 | + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) | |
7767 | + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; | |
7768 | + | |
7769 | + wmb(); | |
7770 | + intf->out_prod = prod; | |
7771 | + | |
7772 | + notify_daemon(); | |
7773 | + | |
7774 | + return sent; | |
7775 | +} | |
7776 | + | |
7777 | +static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs) | |
7778 | +{ | |
7779 | + struct xencons_interface *intf = xencons_interface(); | |
7780 | + XENCONS_RING_IDX cons, prod; | |
7781 | + | |
7782 | + cons = intf->in_cons; | |
7783 | + prod = intf->in_prod; | |
7784 | + mb(); | |
7785 | + BUG_ON((prod - cons) > sizeof(intf->in)); | |
7786 | + | |
7787 | + while (cons != prod) { | |
7788 | + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs); | |
7789 | + cons++; | |
7790 | + } | |
7791 | + | |
7792 | + mb(); | |
7793 | + intf->in_cons = cons; | |
7794 | + | |
7795 | + notify_daemon(); | |
7796 | + | |
7797 | + xencons_tx(); | |
7798 | + | |
7799 | + return IRQ_HANDLED; | |
7800 | +} | |
7801 | + | |
7802 | +int xencons_ring_init(void) | |
7803 | +{ | |
7804 | + int irq; | |
7805 | + | |
7806 | + if (xencons_irq) | |
7807 | + unbind_from_irqhandler(xencons_irq, NULL); | |
7808 | + xencons_irq = 0; | |
7809 | + | |
7810 | + if (!is_running_on_xen() || | |
7811 | + is_initial_xendomain() || | |
7812 | + !xen_start_info->console.domU.evtchn) | |
7813 | + return -ENODEV; | |
7814 | + | |
7815 | + irq = bind_caller_port_to_irqhandler( | |
7816 | + xen_start_info->console.domU.evtchn, | |
7817 | + handle_input, 0, "xencons", NULL); | |
7818 | + if (irq < 0) { | |
7819 | + printk(KERN_ERR "XEN console request irq failed %i\n", irq); | |
7820 | + return irq; | |
7821 | + } | |
7822 | + | |
7823 | + xencons_irq = irq; | |
7824 | + | |
7825 | + /* In case we have in-flight data after save/restore... */ | |
7826 | + notify_daemon(); | |
7827 | + | |
7828 | + return 0; | |
7829 | +} | |
7830 | + | |
7831 | +void xencons_resume(void) | |
7832 | +{ | |
7833 | + (void)xencons_ring_init(); | |
7834 | +} | |
7835 | Index: head-2008-11-25/drivers/xen/core/Makefile | |
7836 | =================================================================== | |
7837 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
7838 | +++ head-2008-11-25/drivers/xen/core/Makefile 2008-07-21 11:00:33.000000000 +0200 | |
7839 | @@ -0,0 +1,14 @@ | |
7840 | +# | |
7841 | +# Makefile for the linux kernel. | |
7842 | +# | |
7843 | + | |
7844 | +obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o firmware.o | |
7845 | + | |
7846 | +obj-$(CONFIG_PCI) += pci.o | |
7847 | +obj-$(CONFIG_PROC_FS) += xen_proc.o | |
7848 | +obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor_sysfs.o | |
7849 | +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | |
7850 | +obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o | |
7851 | +obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o | |
7852 | +obj-$(CONFIG_KEXEC) += machine_kexec.o | |
7853 | +obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | |
7854 | Index: head-2008-11-25/drivers/xen/core/cpu_hotplug.c | |
7855 | =================================================================== | |
7856 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
7857 | +++ head-2008-11-25/drivers/xen/core/cpu_hotplug.c 2008-01-21 11:15:26.000000000 +0100 | |
7858 | @@ -0,0 +1,173 @@ | |
7859 | +#include <linux/init.h> | |
7860 | +#include <linux/kernel.h> | |
7861 | +#include <linux/sched.h> | |
7862 | +#include <linux/notifier.h> | |
7863 | +#include <linux/cpu.h> | |
7864 | +#include <xen/cpu_hotplug.h> | |
7865 | +#include <xen/xenbus.h> | |
7866 | + | |
7867 | +/* | |
7868 | + * Set of CPUs that remote admin software will allow us to bring online. | |
7869 | + * Notified to us via xenbus. | |
7870 | + */ | |
7871 | +static cpumask_t xenbus_allowed_cpumask; | |
7872 | + | |
7873 | +/* Set of CPUs that local admin will allow us to bring online. */ | |
7874 | +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; | |
7875 | + | |
7876 | +static int local_cpu_hotplug_request(void) | |
7877 | +{ | |
7878 | + /* | |
7879 | + * We assume a CPU hotplug request comes from local admin if it is made | |
7880 | + * via a userspace process (i.e., one with a real mm_struct). | |
7881 | + */ | |
7882 | + return (current->mm != NULL); | |
7883 | +} | |
7884 | + | |
7885 | +static void vcpu_hotplug(unsigned int cpu) | |
7886 | +{ | |
7887 | + int err; | |
7888 | + char dir[32], state[32]; | |
7889 | + | |
7890 | + if ((cpu >= NR_CPUS) || !cpu_possible(cpu)) | |
7891 | + return; | |
7892 | + | |
7893 | + sprintf(dir, "cpu/%u", cpu); | |
7894 | + err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); | |
7895 | + if (err != 1) { | |
7896 | + printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); | |
7897 | + return; | |
7898 | + } | |
7899 | + | |
7900 | + if (strcmp(state, "online") == 0) { | |
7901 | + cpu_set(cpu, xenbus_allowed_cpumask); | |
7902 | + (void)cpu_up(cpu); | |
7903 | + } else if (strcmp(state, "offline") == 0) { | |
7904 | + cpu_clear(cpu, xenbus_allowed_cpumask); | |
7905 | + (void)cpu_down(cpu); | |
7906 | + } else { | |
7907 | + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", | |
7908 | + state, cpu); | |
7909 | + } | |
7910 | +} | |
7911 | + | |
7912 | +static void handle_vcpu_hotplug_event( | |
7913 | + struct xenbus_watch *watch, const char **vec, unsigned int len) | |
7914 | +{ | |
7915 | + unsigned int cpu; | |
7916 | + char *cpustr; | |
7917 | + const char *node = vec[XS_WATCH_PATH]; | |
7918 | + | |
7919 | + if ((cpustr = strstr(node, "cpu/")) != NULL) { | |
7920 | + sscanf(cpustr, "cpu/%u", &cpu); | |
7921 | + vcpu_hotplug(cpu); | |
7922 | + } | |
7923 | +} | |
7924 | + | |
7925 | +static int smpboot_cpu_notify(struct notifier_block *notifier, | |
7926 | + unsigned long action, void *hcpu) | |
7927 | +{ | |
7928 | + unsigned int cpu = (long)hcpu; | |
7929 | + | |
7930 | + /* | |
7931 | + * We do this in a callback notifier rather than __cpu_disable() | |
7932 | + * because local_cpu_hotplug_request() does not work in the latter | |
7933 | + * as it's always executed from within a stopmachine kthread. | |
7934 | + */ | |
7935 | + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) | |
7936 | + cpu_clear(cpu, local_allowed_cpumask); | |
7937 | + | |
7938 | + return NOTIFY_OK; | |
7939 | +} | |
7940 | + | |
7941 | +static int setup_cpu_watcher(struct notifier_block *notifier, | |
7942 | + unsigned long event, void *data) | |
7943 | +{ | |
7944 | + unsigned int i; | |
7945 | + | |
7946 | + static struct xenbus_watch cpu_watch = { | |
7947 | + .node = "cpu", | |
7948 | + .callback = handle_vcpu_hotplug_event, | |
7949 | + .flags = XBWF_new_thread }; | |
7950 | + (void)register_xenbus_watch(&cpu_watch); | |
7951 | + | |
7952 | + if (!is_initial_xendomain()) { | |
7953 | + for_each_possible_cpu(i) | |
7954 | + vcpu_hotplug(i); | |
7955 | + printk(KERN_INFO "Brought up %ld CPUs\n", | |
7956 | + (long)num_online_cpus()); | |
7957 | + } | |
7958 | + | |
7959 | + return NOTIFY_DONE; | |
7960 | +} | |
7961 | + | |
7962 | +static int __init setup_vcpu_hotplug_event(void) | |
7963 | +{ | |
7964 | + static struct notifier_block hotplug_cpu = { | |
7965 | + .notifier_call = smpboot_cpu_notify }; | |
7966 | + static struct notifier_block xsn_cpu = { | |
7967 | + .notifier_call = setup_cpu_watcher }; | |
7968 | + | |
7969 | + if (!is_running_on_xen()) | |
7970 | + return -ENODEV; | |
7971 | + | |
7972 | + register_cpu_notifier(&hotplug_cpu); | |
7973 | + register_xenstore_notifier(&xsn_cpu); | |
7974 | + | |
7975 | + return 0; | |
7976 | +} | |
7977 | + | |
7978 | +arch_initcall(setup_vcpu_hotplug_event); | |
7979 | + | |
7980 | +int smp_suspend(void) | |
7981 | +{ | |
7982 | + unsigned int cpu; | |
7983 | + int err; | |
7984 | + | |
7985 | + for_each_online_cpu(cpu) { | |
7986 | + if (cpu == 0) | |
7987 | + continue; | |
7988 | + err = cpu_down(cpu); | |
7989 | + if (err) { | |
7990 | + printk(KERN_CRIT "Failed to take all CPUs " | |
7991 | + "down: %d.\n", err); | |
7992 | + for_each_possible_cpu(cpu) | |
7993 | + vcpu_hotplug(cpu); | |
7994 | + return err; | |
7995 | + } | |
7996 | + } | |
7997 | + | |
7998 | + return 0; | |
7999 | +} | |
8000 | + | |
8001 | +void smp_resume(void) | |
8002 | +{ | |
8003 | + unsigned int cpu; | |
8004 | + | |
8005 | + for_each_possible_cpu(cpu) | |
8006 | + vcpu_hotplug(cpu); | |
8007 | +} | |
8008 | + | |
8009 | +int cpu_up_check(unsigned int cpu) | |
8010 | +{ | |
8011 | + int rc = 0; | |
8012 | + | |
8013 | + if (local_cpu_hotplug_request()) { | |
8014 | + cpu_set(cpu, local_allowed_cpumask); | |
8015 | + if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { | |
8016 | + printk("%s: attempt to bring up CPU %u disallowed by " | |
8017 | + "remote admin.\n", __FUNCTION__, cpu); | |
8018 | + rc = -EBUSY; | |
8019 | + } | |
8020 | + } else if (!cpu_isset(cpu, local_allowed_cpumask) || | |
8021 | + !cpu_isset(cpu, xenbus_allowed_cpumask)) { | |
8022 | + rc = -EBUSY; | |
8023 | + } | |
8024 | + | |
8025 | + return rc; | |
8026 | +} | |
8027 | + | |
8028 | +void init_xenbus_allowed_cpumask(void) | |
8029 | +{ | |
8030 | + xenbus_allowed_cpumask = cpu_present_map; | |
8031 | +} | |
8032 | Index: head-2008-11-25/drivers/xen/core/evtchn.c | |
8033 | =================================================================== | |
8034 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
8035 | +++ head-2008-11-25/drivers/xen/core/evtchn.c 2008-11-10 11:44:21.000000000 +0100 | |
8036 | @@ -0,0 +1,1140 @@ | |
8037 | +/****************************************************************************** | |
8038 | + * evtchn.c | |
8039 | + * | |
8040 | + * Communication via Xen event channels. | |
8041 | + * | |
8042 | + * Copyright (c) 2002-2005, K A Fraser | |
8043 | + * | |
8044 | + * This program is free software; you can redistribute it and/or | |
8045 | + * modify it under the terms of the GNU General Public License version 2 | |
8046 | + * as published by the Free Software Foundation; or, when distributed | |
8047 | + * separately from the Linux kernel or incorporated into other | |
8048 | + * software packages, subject to the following license: | |
8049 | + * | |
8050 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
8051 | + * of this source file (the "Software"), to deal in the Software without | |
8052 | + * restriction, including without limitation the rights to use, copy, modify, | |
8053 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
8054 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
8055 | + * the following conditions: | |
8056 | + * | |
8057 | + * The above copyright notice and this permission notice shall be included in | |
8058 | + * all copies or substantial portions of the Software. | |
8059 | + * | |
8060 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
8061 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
8062 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
8063 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
8064 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
8065 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
8066 | + * IN THE SOFTWARE. | |
8067 | + */ | |
8068 | + | |
8069 | +#include <linux/module.h> | |
8070 | +#include <linux/irq.h> | |
8071 | +#include <linux/interrupt.h> | |
8072 | +#include <linux/sched.h> | |
8073 | +#include <linux/kernel_stat.h> | |
8074 | +#include <linux/version.h> | |
8075 | +#include <asm/atomic.h> | |
8076 | +#include <asm/system.h> | |
8077 | +#include <asm/ptrace.h> | |
8078 | +#include <asm/synch_bitops.h> | |
8079 | +#include <xen/evtchn.h> | |
8080 | +#include <xen/interface/event_channel.h> | |
8081 | +#include <xen/interface/physdev.h> | |
8082 | +#include <asm/hypervisor.h> | |
8083 | +#include <linux/mc146818rtc.h> /* RTC_IRQ */ | |
8084 | + | |
8085 | +/* | |
8086 | + * This lock protects updates to the following mapping and reference-count | |
8087 | + * arrays. The lock does not need to be acquired to read the mapping tables. | |
8088 | + */ | |
8089 | +static DEFINE_SPINLOCK(irq_mapping_update_lock); | |
8090 | + | |
8091 | +/* IRQ <-> event-channel mappings. */ | |
8092 | +static int evtchn_to_irq[NR_EVENT_CHANNELS] = { | |
8093 | + [0 ... NR_EVENT_CHANNELS-1] = -1 }; | |
8094 | + | |
8095 | +/* Packed IRQ information: binding type, sub-type index, and event channel. */ | |
8096 | +static u32 irq_info[NR_IRQS]; | |
8097 | + | |
8098 | +/* Binding types. */ | |
8099 | +enum { | |
8100 | + IRQT_UNBOUND, | |
8101 | + IRQT_PIRQ, | |
8102 | + IRQT_VIRQ, | |
8103 | + IRQT_IPI, | |
8104 | + IRQT_LOCAL_PORT, | |
8105 | + IRQT_CALLER_PORT, | |
8106 | + _IRQT_COUNT | |
8107 | +}; | |
8108 | + | |
8109 | +#define _IRQT_BITS 4 | |
8110 | +#define _EVTCHN_BITS 12 | |
8111 | +#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS) | |
8112 | + | |
8113 | +/* Constructor for packed IRQ information. */ | |
8114 | +static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn) | |
8115 | +{ | |
8116 | + BUILD_BUG_ON(_IRQT_COUNT > (1U << _IRQT_BITS)); | |
8117 | + | |
8118 | + BUILD_BUG_ON(NR_PIRQS > (1U << _INDEX_BITS)); | |
8119 | + BUILD_BUG_ON(NR_VIRQS > (1U << _INDEX_BITS)); | |
8120 | + BUILD_BUG_ON(NR_IPIS > (1U << _INDEX_BITS)); | |
8121 | + BUG_ON(index >> _INDEX_BITS); | |
8122 | + | |
8123 | + BUILD_BUG_ON(NR_EVENT_CHANNELS > (1U << _EVTCHN_BITS)); | |
8124 | + | |
8125 | + return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn); | |
8126 | +} | |
8127 | + | |
8128 | +/* Convenient shorthand for packed representation of an unbound IRQ. */ | |
8129 | +#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) | |
8130 | + | |
8131 | +/* | |
8132 | + * Accessors for packed IRQ information. | |
8133 | + */ | |
8134 | + | |
8135 | +static inline unsigned int evtchn_from_irq(int irq) | |
8136 | +{ | |
8137 | + return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1); | |
8138 | +} | |
8139 | + | |
8140 | +static inline unsigned int index_from_irq(int irq) | |
8141 | +{ | |
8142 | + return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1); | |
8143 | +} | |
8144 | + | |
8145 | +static inline unsigned int type_from_irq(int irq) | |
8146 | +{ | |
8147 | + return irq_info[irq] >> (32 - _IRQT_BITS); | |
8148 | +} | |
8149 | + | |
8150 | +/* IRQ <-> VIRQ mapping. */ | |
8151 | +DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; | |
8152 | + | |
8153 | +/* IRQ <-> IPI mapping. */ | |
8154 | +#ifndef NR_IPIS | |
8155 | +#define NR_IPIS 1 | |
8156 | +#endif | |
8157 | +DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1}; | |
8158 | + | |
8159 | +/* Reference counts for bindings to IRQs. */ | |
8160 | +static int irq_bindcount[NR_IRQS]; | |
8161 | + | |
8162 | +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ | |
8163 | +static DECLARE_BITMAP(pirq_needs_eoi, NR_PIRQS); | |
8164 | + | |
8165 | +#ifdef CONFIG_SMP | |
8166 | + | |
8167 | +static u8 cpu_evtchn[NR_EVENT_CHANNELS]; | |
8168 | +static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; | |
8169 | + | |
8170 | +static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh, | |
8171 | + unsigned int idx) | |
8172 | +{ | |
8173 | + return (sh->evtchn_pending[idx] & | |
8174 | + cpu_evtchn_mask[cpu][idx] & | |
8175 | + ~sh->evtchn_mask[idx]); | |
8176 | +} | |
8177 | + | |
8178 | +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) | |
8179 | +{ | |
8180 | + shared_info_t *s = HYPERVISOR_shared_info; | |
8181 | + int irq = evtchn_to_irq[chn]; | |
8182 | + | |
8183 | + BUG_ON(!test_bit(chn, s->evtchn_mask)); | |
8184 | + | |
8185 | + if (irq != -1) | |
8186 | + set_native_irq_info(irq, cpumask_of_cpu(cpu)); | |
8187 | + | |
8188 | + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); | |
8189 | + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); | |
8190 | + cpu_evtchn[chn] = cpu; | |
8191 | +} | |
8192 | + | |
8193 | +static void init_evtchn_cpu_bindings(void) | |
8194 | +{ | |
8195 | + int i; | |
8196 | + | |
8197 | + /* By default all event channels notify CPU#0. */ | |
8198 | + for (i = 0; i < NR_IRQS; i++) | |
8199 | + set_native_irq_info(i, cpumask_of_cpu(0)); | |
8200 | + | |
8201 | + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); | |
8202 | + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); | |
8203 | +} | |
8204 | + | |
8205 | +static inline unsigned int cpu_from_evtchn(unsigned int evtchn) | |
8206 | +{ | |
8207 | + return cpu_evtchn[evtchn]; | |
8208 | +} | |
8209 | + | |
8210 | +#else | |
8211 | + | |
8212 | +static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh, | |
8213 | + unsigned int idx) | |
8214 | +{ | |
8215 | + return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]); | |
8216 | +} | |
8217 | + | |
8218 | +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) | |
8219 | +{ | |
8220 | +} | |
8221 | + | |
8222 | +static void init_evtchn_cpu_bindings(void) | |
8223 | +{ | |
8224 | +} | |
8225 | + | |
8226 | +static inline unsigned int cpu_from_evtchn(unsigned int evtchn) | |
8227 | +{ | |
8228 | + return 0; | |
8229 | +} | |
8230 | + | |
8231 | +#endif | |
8232 | + | |
8233 | +/* Upcall to generic IRQ layer. */ | |
8234 | +#ifdef CONFIG_X86 | |
8235 | +extern fastcall unsigned int do_IRQ(struct pt_regs *regs); | |
8236 | +void __init xen_init_IRQ(void); | |
8237 | +void __init init_IRQ(void) | |
8238 | +{ | |
8239 | + irq_ctx_init(0); | |
8240 | + xen_init_IRQ(); | |
8241 | +} | |
8242 | +#if defined (__i386__) | |
8243 | +static inline void exit_idle(void) {} | |
8244 | +#define IRQ_REG orig_eax | |
8245 | +#elif defined (__x86_64__) | |
8246 | +#include <asm/idle.h> | |
8247 | +#define IRQ_REG orig_rax | |
8248 | +#endif | |
8249 | +#define do_IRQ(irq, regs) do { \ | |
8250 | + (regs)->IRQ_REG = ~(irq); \ | |
8251 | + do_IRQ((regs)); \ | |
8252 | +} while (0) | |
8253 | +#endif | |
8254 | + | |
8255 | +/* Xen will never allocate port zero for any purpose. */ | |
8256 | +#define VALID_EVTCHN(chn) ((chn) != 0) | |
8257 | + | |
8258 | +/* | |
8259 | + * Force a proper event-channel callback from Xen after clearing the | |
8260 | + * callback mask. We do this in a very simple manner, by making a call | |
8261 | + * down into Xen. The pending flag will be checked by Xen on return. | |
8262 | + */ | |
8263 | +void force_evtchn_callback(void) | |
8264 | +{ | |
8265 | + VOID(HYPERVISOR_xen_version(0, NULL)); | |
8266 | +} | |
8267 | +/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */ | |
8268 | +EXPORT_SYMBOL(force_evtchn_callback); | |
8269 | + | |
8270 | +static DEFINE_PER_CPU(unsigned int, upcall_count) = { 0 }; | |
8271 | +static DEFINE_PER_CPU(unsigned int, last_processed_l1i) = { BITS_PER_LONG - 1 }; | |
8272 | +static DEFINE_PER_CPU(unsigned int, last_processed_l2i) = { BITS_PER_LONG - 1 }; | |
8273 | + | |
8274 | +/* NB. Interrupts are disabled on entry. */ | |
8275 | +asmlinkage void evtchn_do_upcall(struct pt_regs *regs) | |
8276 | +{ | |
8277 | + unsigned long l1, l2; | |
8278 | + unsigned long masked_l1, masked_l2; | |
8279 | + unsigned int l1i, l2i, port, count; | |
8280 | + int irq; | |
8281 | + unsigned int cpu = smp_processor_id(); | |
8282 | + shared_info_t *s = HYPERVISOR_shared_info; | |
8283 | + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; | |
8284 | + | |
8285 | + exit_idle(); | |
8286 | + irq_enter(); | |
8287 | + | |
8288 | + do { | |
8289 | + /* Avoid a callback storm when we reenable delivery. */ | |
8290 | + vcpu_info->evtchn_upcall_pending = 0; | |
8291 | + | |
8292 | + /* Nested invocations bail immediately. */ | |
8293 | + if (unlikely(per_cpu(upcall_count, cpu)++)) | |
8294 | + break; | |
8295 | + | |
8296 | +#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ | |
8297 | + /* Clear master flag /before/ clearing selector flag. */ | |
8298 | + wmb(); | |
8299 | +#endif | |
8300 | + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); | |
8301 | + | |
8302 | + l1i = per_cpu(last_processed_l1i, cpu); | |
8303 | + l2i = per_cpu(last_processed_l2i, cpu); | |
8304 | + | |
8305 | + while (l1 != 0) { | |
8306 | + | |
8307 | + l1i = (l1i + 1) % BITS_PER_LONG; | |
8308 | + masked_l1 = l1 & ((~0UL) << l1i); | |
8309 | + | |
8310 | + if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */ | |
8311 | + l1i = BITS_PER_LONG - 1; | |
8312 | + l2i = BITS_PER_LONG - 1; | |
8313 | + continue; | |
8314 | + } | |
8315 | + l1i = __ffs(masked_l1); | |
8316 | + | |
8317 | + do { | |
8318 | + l2 = active_evtchns(cpu, s, l1i); | |
8319 | + | |
8320 | + l2i = (l2i + 1) % BITS_PER_LONG; | |
8321 | + masked_l2 = l2 & ((~0UL) << l2i); | |
8322 | + | |
8323 | + if (masked_l2 == 0) { /* if we masked out all events, move on */ | |
8324 | + l2i = BITS_PER_LONG - 1; | |
8325 | + break; | |
8326 | + } | |
8327 | + | |
8328 | + l2i = __ffs(masked_l2); | |
8329 | + | |
8330 | + /* process port */ | |
8331 | + port = (l1i * BITS_PER_LONG) + l2i; | |
8332 | + if ((irq = evtchn_to_irq[port]) != -1) | |
8333 | + do_IRQ(irq, regs); | |
8334 | + else | |
8335 | + evtchn_device_upcall(port); | |
8336 | + | |
8337 | + /* if this is the final port processed, we'll pick up here+1 next time */ | |
8338 | + per_cpu(last_processed_l1i, cpu) = l1i; | |
8339 | + per_cpu(last_processed_l2i, cpu) = l2i; | |
8340 | + | |
8341 | + } while (l2i != BITS_PER_LONG - 1); | |
8342 | + | |
8343 | + l2 = active_evtchns(cpu, s, l1i); | |
8344 | + if (l2 == 0) /* we handled all ports, so we can clear the selector bit */ | |
8345 | + l1 &= ~(1UL << l1i); | |
8346 | + | |
8347 | + } | |
8348 | + | |
8349 | + /* If there were nested callbacks then we have more to do. */ | |
8350 | + count = per_cpu(upcall_count, cpu); | |
8351 | + per_cpu(upcall_count, cpu) = 0; | |
8352 | + } while (unlikely(count != 1)); | |
8353 | + | |
8354 | + irq_exit(); | |
8355 | +} | |
8356 | + | |
8357 | +static int find_unbound_irq(void) | |
8358 | +{ | |
8359 | + static int warned; | |
8360 | + int irq; | |
8361 | + | |
8362 | + for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++) | |
8363 | + if (irq_bindcount[irq] == 0) | |
8364 | + return irq; | |
8365 | + | |
8366 | + if (!warned) { | |
8367 | + warned = 1; | |
8368 | + printk(KERN_WARNING "No available IRQ to bind to: " | |
8369 | + "increase NR_DYNIRQS.\n"); | |
8370 | + } | |
8371 | + | |
8372 | + return -ENOSPC; | |
8373 | +} | |
8374 | + | |
8375 | +static int bind_caller_port_to_irq(unsigned int caller_port) | |
8376 | +{ | |
8377 | + int irq; | |
8378 | + | |
8379 | + spin_lock(&irq_mapping_update_lock); | |
8380 | + | |
8381 | + if ((irq = evtchn_to_irq[caller_port]) == -1) { | |
8382 | + if ((irq = find_unbound_irq()) < 0) | |
8383 | + goto out; | |
8384 | + | |
8385 | + evtchn_to_irq[caller_port] = irq; | |
8386 | + irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port); | |
8387 | + } | |
8388 | + | |
8389 | + irq_bindcount[irq]++; | |
8390 | + | |
8391 | + out: | |
8392 | + spin_unlock(&irq_mapping_update_lock); | |
8393 | + return irq; | |
8394 | +} | |
8395 | + | |
8396 | +static int bind_local_port_to_irq(unsigned int local_port) | |
8397 | +{ | |
8398 | + int irq; | |
8399 | + | |
8400 | + spin_lock(&irq_mapping_update_lock); | |
8401 | + | |
8402 | + BUG_ON(evtchn_to_irq[local_port] != -1); | |
8403 | + | |
8404 | + if ((irq = find_unbound_irq()) < 0) { | |
8405 | + struct evtchn_close close = { .port = local_port }; | |
8406 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) | |
8407 | + BUG(); | |
8408 | + goto out; | |
8409 | + } | |
8410 | + | |
8411 | + evtchn_to_irq[local_port] = irq; | |
8412 | + irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); | |
8413 | + irq_bindcount[irq]++; | |
8414 | + | |
8415 | + out: | |
8416 | + spin_unlock(&irq_mapping_update_lock); | |
8417 | + return irq; | |
8418 | +} | |
8419 | + | |
8420 | +static int bind_listening_port_to_irq(unsigned int remote_domain) | |
8421 | +{ | |
8422 | + struct evtchn_alloc_unbound alloc_unbound; | |
8423 | + int err; | |
8424 | + | |
8425 | + alloc_unbound.dom = DOMID_SELF; | |
8426 | + alloc_unbound.remote_dom = remote_domain; | |
8427 | + | |
8428 | + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, | |
8429 | + &alloc_unbound); | |
8430 | + | |
8431 | + return err ? : bind_local_port_to_irq(alloc_unbound.port); | |
8432 | +} | |
8433 | + | |
8434 | +static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, | |
8435 | + unsigned int remote_port) | |
8436 | +{ | |
8437 | + struct evtchn_bind_interdomain bind_interdomain; | |
8438 | + int err; | |
8439 | + | |
8440 | + bind_interdomain.remote_dom = remote_domain; | |
8441 | + bind_interdomain.remote_port = remote_port; | |
8442 | + | |
8443 | + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, | |
8444 | + &bind_interdomain); | |
8445 | + | |
8446 | + return err ? : bind_local_port_to_irq(bind_interdomain.local_port); | |
8447 | +} | |
8448 | + | |
8449 | +static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) | |
8450 | +{ | |
8451 | + struct evtchn_bind_virq bind_virq; | |
8452 | + int evtchn, irq; | |
8453 | + | |
8454 | + spin_lock(&irq_mapping_update_lock); | |
8455 | + | |
8456 | + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { | |
8457 | + if ((irq = find_unbound_irq()) < 0) | |
8458 | + goto out; | |
8459 | + | |
8460 | + bind_virq.virq = virq; | |
8461 | + bind_virq.vcpu = cpu; | |
8462 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | |
8463 | + &bind_virq) != 0) | |
8464 | + BUG(); | |
8465 | + evtchn = bind_virq.port; | |
8466 | + | |
8467 | + evtchn_to_irq[evtchn] = irq; | |
8468 | + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); | |
8469 | + | |
8470 | + per_cpu(virq_to_irq, cpu)[virq] = irq; | |
8471 | + | |
8472 | + bind_evtchn_to_cpu(evtchn, cpu); | |
8473 | + } | |
8474 | + | |
8475 | + irq_bindcount[irq]++; | |
8476 | + | |
8477 | + out: | |
8478 | + spin_unlock(&irq_mapping_update_lock); | |
8479 | + return irq; | |
8480 | +} | |
8481 | + | |
8482 | +static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) | |
8483 | +{ | |
8484 | + struct evtchn_bind_ipi bind_ipi; | |
8485 | + int evtchn, irq; | |
8486 | + | |
8487 | + spin_lock(&irq_mapping_update_lock); | |
8488 | + | |
8489 | + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) { | |
8490 | + if ((irq = find_unbound_irq()) < 0) | |
8491 | + goto out; | |
8492 | + | |
8493 | + bind_ipi.vcpu = cpu; | |
8494 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, | |
8495 | + &bind_ipi) != 0) | |
8496 | + BUG(); | |
8497 | + evtchn = bind_ipi.port; | |
8498 | + | |
8499 | + evtchn_to_irq[evtchn] = irq; | |
8500 | + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); | |
8501 | + | |
8502 | + per_cpu(ipi_to_irq, cpu)[ipi] = irq; | |
8503 | + | |
8504 | + bind_evtchn_to_cpu(evtchn, cpu); | |
8505 | + } | |
8506 | + | |
8507 | + irq_bindcount[irq]++; | |
8508 | + | |
8509 | + out: | |
8510 | + spin_unlock(&irq_mapping_update_lock); | |
8511 | + return irq; | |
8512 | +} | |
8513 | + | |
8514 | +static void unbind_from_irq(unsigned int irq) | |
8515 | +{ | |
8516 | + struct evtchn_close close; | |
8517 | + unsigned int cpu; | |
8518 | + int evtchn = evtchn_from_irq(irq); | |
8519 | + | |
8520 | + spin_lock(&irq_mapping_update_lock); | |
8521 | + | |
8522 | + if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { | |
8523 | + close.port = evtchn; | |
8524 | + if ((type_from_irq(irq) != IRQT_CALLER_PORT) && | |
8525 | + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) | |
8526 | + BUG(); | |
8527 | + | |
8528 | + switch (type_from_irq(irq)) { | |
8529 | + case IRQT_VIRQ: | |
8530 | + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) | |
8531 | + [index_from_irq(irq)] = -1; | |
8532 | + break; | |
8533 | + case IRQT_IPI: | |
8534 | + per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) | |
8535 | + [index_from_irq(irq)] = -1; | |
8536 | + break; | |
8537 | + default: | |
8538 | + break; | |
8539 | + } | |
8540 | + | |
8541 | + /* Closed ports are implicitly re-bound to VCPU0. */ | |
8542 | + bind_evtchn_to_cpu(evtchn, 0); | |
8543 | + | |
8544 | + evtchn_to_irq[evtchn] = -1; | |
8545 | + irq_info[irq] = IRQ_UNBOUND; | |
8546 | + | |
8547 | + /* Zap stats across IRQ changes of use. */ | |
8548 | + for_each_possible_cpu(cpu) | |
8549 | + kstat_cpu(cpu).irqs[irq] = 0; | |
8550 | + } | |
8551 | + | |
8552 | + spin_unlock(&irq_mapping_update_lock); | |
8553 | +} | |
8554 | + | |
8555 | +int bind_caller_port_to_irqhandler( | |
8556 | + unsigned int caller_port, | |
8557 | + irqreturn_t (*handler)(int, void *, struct pt_regs *), | |
8558 | + unsigned long irqflags, | |
8559 | + const char *devname, | |
8560 | + void *dev_id) | |
8561 | +{ | |
8562 | + int irq, retval; | |
8563 | + | |
8564 | + irq = bind_caller_port_to_irq(caller_port); | |
8565 | + if (irq < 0) | |
8566 | + return irq; | |
8567 | + | |
8568 | + retval = request_irq(irq, handler, irqflags, devname, dev_id); | |
8569 | + if (retval != 0) { | |
8570 | + unbind_from_irq(irq); | |
8571 | + return retval; | |
8572 | + } | |
8573 | + | |
8574 | + return irq; | |
8575 | +} | |
8576 | +EXPORT_SYMBOL_GPL(bind_caller_port_to_irqhandler); | |
8577 | + | |
8578 | +int bind_listening_port_to_irqhandler( | |
8579 | + unsigned int remote_domain, | |
8580 | + irqreturn_t (*handler)(int, void *, struct pt_regs *), | |
8581 | + unsigned long irqflags, | |
8582 | + const char *devname, | |
8583 | + void *dev_id) | |
8584 | +{ | |
8585 | + int irq, retval; | |
8586 | + | |
8587 | + irq = bind_listening_port_to_irq(remote_domain); | |
8588 | + if (irq < 0) | |
8589 | + return irq; | |
8590 | + | |
8591 | + retval = request_irq(irq, handler, irqflags, devname, dev_id); | |
8592 | + if (retval != 0) { | |
8593 | + unbind_from_irq(irq); | |
8594 | + return retval; | |
8595 | + } | |
8596 | + | |
8597 | + return irq; | |
8598 | +} | |
8599 | +EXPORT_SYMBOL_GPL(bind_listening_port_to_irqhandler); | |
8600 | + | |
8601 | +int bind_interdomain_evtchn_to_irqhandler( | |
8602 | + unsigned int remote_domain, | |
8603 | + unsigned int remote_port, | |
8604 | + irqreturn_t (*handler)(int, void *, struct pt_regs *), | |
8605 | + unsigned long irqflags, | |
8606 | + const char *devname, | |
8607 | + void *dev_id) | |
8608 | +{ | |
8609 | + int irq, retval; | |
8610 | + | |
8611 | + irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); | |
8612 | + if (irq < 0) | |
8613 | + return irq; | |
8614 | + | |
8615 | + retval = request_irq(irq, handler, irqflags, devname, dev_id); | |
8616 | + if (retval != 0) { | |
8617 | + unbind_from_irq(irq); | |
8618 | + return retval; | |
8619 | + } | |
8620 | + | |
8621 | + return irq; | |
8622 | +} | |
8623 | +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); | |
8624 | + | |
8625 | +int bind_virq_to_irqhandler( | |
8626 | + unsigned int virq, | |
8627 | + unsigned int cpu, | |
8628 | + irqreturn_t (*handler)(int, void *, struct pt_regs *), | |
8629 | + unsigned long irqflags, | |
8630 | + const char *devname, | |
8631 | + void *dev_id) | |
8632 | +{ | |
8633 | + int irq, retval; | |
8634 | + | |
8635 | + irq = bind_virq_to_irq(virq, cpu); | |
8636 | + if (irq < 0) | |
8637 | + return irq; | |
8638 | + | |
8639 | + retval = request_irq(irq, handler, irqflags, devname, dev_id); | |
8640 | + if (retval != 0) { | |
8641 | + unbind_from_irq(irq); | |
8642 | + return retval; | |
8643 | + } | |
8644 | + | |
8645 | + return irq; | |
8646 | +} | |
8647 | +EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); | |
8648 | + | |
8649 | +int bind_ipi_to_irqhandler( | |
8650 | + unsigned int ipi, | |
8651 | + unsigned int cpu, | |
8652 | + irqreturn_t (*handler)(int, void *, struct pt_regs *), | |
8653 | + unsigned long irqflags, | |
8654 | + const char *devname, | |
8655 | + void *dev_id) | |
8656 | +{ | |
8657 | + int irq, retval; | |
8658 | + | |
8659 | + irq = bind_ipi_to_irq(ipi, cpu); | |
8660 | + if (irq < 0) | |
8661 | + return irq; | |
8662 | + | |
8663 | + retval = request_irq(irq, handler, irqflags, devname, dev_id); | |
8664 | + if (retval != 0) { | |
8665 | + unbind_from_irq(irq); | |
8666 | + return retval; | |
8667 | + } | |
8668 | + | |
8669 | + return irq; | |
8670 | +} | |
8671 | +EXPORT_SYMBOL_GPL(bind_ipi_to_irqhandler); | |
8672 | + | |
8673 | +void unbind_from_irqhandler(unsigned int irq, void *dev_id) | |
8674 | +{ | |
8675 | + free_irq(irq, dev_id); | |
8676 | + unbind_from_irq(irq); | |
8677 | +} | |
8678 | +EXPORT_SYMBOL_GPL(unbind_from_irqhandler); | |
8679 | + | |
8680 | +#ifdef CONFIG_SMP | |
8681 | +void rebind_evtchn_to_cpu(int port, unsigned int cpu) | |
8682 | +{ | |
8683 | + struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu }; | |
8684 | + int masked; | |
8685 | + | |
8686 | + masked = test_and_set_evtchn_mask(port); | |
8687 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv) == 0) | |
8688 | + bind_evtchn_to_cpu(port, cpu); | |
8689 | + if (!masked) | |
8690 | + unmask_evtchn(port); | |
8691 | +} | |
8692 | + | |
8693 | +static void rebind_irq_to_cpu(unsigned int irq, unsigned int tcpu) | |
8694 | +{ | |
8695 | + int evtchn = evtchn_from_irq(irq); | |
8696 | + | |
8697 | + if (VALID_EVTCHN(evtchn)) | |
8698 | + rebind_evtchn_to_cpu(evtchn, tcpu); | |
8699 | +} | |
8700 | + | |
8701 | +static void set_affinity_irq(unsigned int irq, cpumask_t dest) | |
8702 | +{ | |
8703 | + unsigned tcpu = first_cpu(dest); | |
8704 | + rebind_irq_to_cpu(irq, tcpu); | |
8705 | +} | |
8706 | +#endif | |
8707 | + | |
8708 | +int resend_irq_on_evtchn(unsigned int irq) | |
8709 | +{ | |
8710 | + int masked, evtchn = evtchn_from_irq(irq); | |
8711 | + shared_info_t *s = HYPERVISOR_shared_info; | |
8712 | + | |
8713 | + if (!VALID_EVTCHN(evtchn)) | |
8714 | + return 1; | |
8715 | + | |
8716 | + masked = test_and_set_evtchn_mask(evtchn); | |
8717 | + synch_set_bit(evtchn, s->evtchn_pending); | |
8718 | + if (!masked) | |
8719 | + unmask_evtchn(evtchn); | |
8720 | + | |
8721 | + return 1; | |
8722 | +} | |
8723 | + | |
8724 | +/* | |
8725 | + * Interface to generic handling in irq.c | |
8726 | + */ | |
8727 | + | |
8728 | +static unsigned int startup_dynirq(unsigned int irq) | |
8729 | +{ | |
8730 | + int evtchn = evtchn_from_irq(irq); | |
8731 | + | |
8732 | + if (VALID_EVTCHN(evtchn)) | |
8733 | + unmask_evtchn(evtchn); | |
8734 | + return 0; | |
8735 | +} | |
8736 | + | |
8737 | +static void shutdown_dynirq(unsigned int irq) | |
8738 | +{ | |
8739 | + int evtchn = evtchn_from_irq(irq); | |
8740 | + | |
8741 | + if (VALID_EVTCHN(evtchn)) | |
8742 | + mask_evtchn(evtchn); | |
8743 | +} | |
8744 | + | |
8745 | +static void enable_dynirq(unsigned int irq) | |
8746 | +{ | |
8747 | + int evtchn = evtchn_from_irq(irq); | |
8748 | + | |
8749 | + if (VALID_EVTCHN(evtchn)) | |
8750 | + unmask_evtchn(evtchn); | |
8751 | +} | |
8752 | + | |
8753 | +static void disable_dynirq(unsigned int irq) | |
8754 | +{ | |
8755 | + int evtchn = evtchn_from_irq(irq); | |
8756 | + | |
8757 | + if (VALID_EVTCHN(evtchn)) | |
8758 | + mask_evtchn(evtchn); | |
8759 | +} | |
8760 | + | |
8761 | +static void ack_dynirq(unsigned int irq) | |
8762 | +{ | |
8763 | + int evtchn = evtchn_from_irq(irq); | |
8764 | + | |
8765 | + move_native_irq(irq); | |
8766 | + | |
8767 | + if (VALID_EVTCHN(evtchn)) { | |
8768 | + mask_evtchn(evtchn); | |
8769 | + clear_evtchn(evtchn); | |
8770 | + } | |
8771 | +} | |
8772 | + | |
8773 | +static void end_dynirq(unsigned int irq) | |
8774 | +{ | |
8775 | + int evtchn = evtchn_from_irq(irq); | |
8776 | + | |
8777 | + if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED)) | |
8778 | + unmask_evtchn(evtchn); | |
8779 | +} | |
8780 | + | |
8781 | +static struct hw_interrupt_type dynirq_type = { | |
8782 | + .typename = "Dynamic-irq", | |
8783 | + .startup = startup_dynirq, | |
8784 | + .shutdown = shutdown_dynirq, | |
8785 | + .enable = enable_dynirq, | |
8786 | + .disable = disable_dynirq, | |
8787 | + .ack = ack_dynirq, | |
8788 | + .end = end_dynirq, | |
8789 | +#ifdef CONFIG_SMP | |
8790 | + .set_affinity = set_affinity_irq, | |
8791 | +#endif | |
8792 | + .retrigger = resend_irq_on_evtchn, | |
8793 | +}; | |
8794 | + | |
8795 | +static inline void pirq_unmask_notify(int irq) | |
8796 | +{ | |
8797 | + struct physdev_eoi eoi = { .irq = evtchn_get_xen_pirq(irq) }; | |
8798 | + if (unlikely(test_bit(irq - PIRQ_BASE, pirq_needs_eoi))) | |
8799 | + VOID(HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi)); | |
8800 | +} | |
8801 | + | |
8802 | +static inline void pirq_query_unmask(int irq) | |
8803 | +{ | |
8804 | + struct physdev_irq_status_query irq_status; | |
8805 | + irq_status.irq = evtchn_get_xen_pirq(irq); | |
8806 | + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) | |
8807 | + irq_status.flags = 0; | |
8808 | + clear_bit(irq - PIRQ_BASE, pirq_needs_eoi); | |
8809 | + if (irq_status.flags & XENIRQSTAT_needs_eoi) | |
8810 | + set_bit(irq - PIRQ_BASE, pirq_needs_eoi); | |
8811 | +} | |
8812 | + | |
8813 | +/* | |
8814 | + * On startup, if there is no action associated with the IRQ then we are | |
8815 | + * probing. In this case we should not share with others as it will confuse us. | |
8816 | + */ | |
8817 | +#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL) | |
8818 | + | |
8819 | +static unsigned int startup_pirq(unsigned int irq) | |
8820 | +{ | |
8821 | + struct evtchn_bind_pirq bind_pirq; | |
8822 | + int evtchn = evtchn_from_irq(irq); | |
8823 | + | |
8824 | + if (VALID_EVTCHN(evtchn)) | |
8825 | + goto out; | |
8826 | + | |
8827 | + bind_pirq.pirq = evtchn_get_xen_pirq(irq); | |
8828 | + /* NB. We are happy to share unless we are probing. */ | |
8829 | + bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; | |
8830 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { | |
8831 | + if (!probing_irq(irq)) | |
8832 | + printk(KERN_INFO "Failed to obtain physical IRQ %d\n", | |
8833 | + irq); | |
8834 | + return 0; | |
8835 | + } | |
8836 | + evtchn = bind_pirq.port; | |
8837 | + | |
8838 | + pirq_query_unmask(irq); | |
8839 | + | |
8840 | + evtchn_to_irq[evtchn] = irq; | |
8841 | + bind_evtchn_to_cpu(evtchn, 0); | |
8842 | + irq_info[irq] = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn); | |
8843 | + | |
8844 | + out: | |
8845 | + unmask_evtchn(evtchn); | |
8846 | + pirq_unmask_notify(irq); | |
8847 | + | |
8848 | + return 0; | |
8849 | +} | |
8850 | + | |
8851 | +static void shutdown_pirq(unsigned int irq) | |
8852 | +{ | |
8853 | + struct evtchn_close close; | |
8854 | + int evtchn = evtchn_from_irq(irq); | |
8855 | + | |
8856 | + if (!VALID_EVTCHN(evtchn)) | |
8857 | + return; | |
8858 | + | |
8859 | + mask_evtchn(evtchn); | |
8860 | + | |
8861 | + close.port = evtchn; | |
8862 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | |
8863 | + BUG(); | |
8864 | + | |
8865 | + bind_evtchn_to_cpu(evtchn, 0); | |
8866 | + evtchn_to_irq[evtchn] = -1; | |
8867 | + irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0); | |
8868 | +} | |
8869 | + | |
8870 | +static void enable_pirq(unsigned int irq) | |
8871 | +{ | |
8872 | + startup_pirq(irq); | |
8873 | +} | |
8874 | + | |
8875 | +static void disable_pirq(unsigned int irq) | |
8876 | +{ | |
8877 | +} | |
8878 | + | |
8879 | +static void ack_pirq(unsigned int irq) | |
8880 | +{ | |
8881 | + int evtchn = evtchn_from_irq(irq); | |
8882 | + | |
8883 | + move_native_irq(irq); | |
8884 | + | |
8885 | + if (VALID_EVTCHN(evtchn)) { | |
8886 | + mask_evtchn(evtchn); | |
8887 | + clear_evtchn(evtchn); | |
8888 | + } | |
8889 | +} | |
8890 | + | |
8891 | +static void end_pirq(unsigned int irq) | |
8892 | +{ | |
8893 | + int evtchn = evtchn_from_irq(irq); | |
8894 | + | |
8895 | + if ((irq_desc[irq].status & (IRQ_DISABLED|IRQ_PENDING)) == | |
8896 | + (IRQ_DISABLED|IRQ_PENDING)) { | |
8897 | + shutdown_pirq(irq); | |
8898 | + } else if (VALID_EVTCHN(evtchn)) { | |
8899 | + unmask_evtchn(evtchn); | |
8900 | + pirq_unmask_notify(irq); | |
8901 | + } | |
8902 | +} | |
8903 | + | |
8904 | +static struct hw_interrupt_type pirq_type = { | |
8905 | + .typename = "Phys-irq", | |
8906 | + .startup = startup_pirq, | |
8907 | + .shutdown = shutdown_pirq, | |
8908 | + .enable = enable_pirq, | |
8909 | + .disable = disable_pirq, | |
8910 | + .ack = ack_pirq, | |
8911 | + .end = end_pirq, | |
8912 | +#ifdef CONFIG_SMP | |
8913 | + .set_affinity = set_affinity_irq, | |
8914 | +#endif | |
8915 | + .retrigger = resend_irq_on_evtchn, | |
8916 | +}; | |
8917 | + | |
8918 | +int irq_ignore_unhandled(unsigned int irq) | |
8919 | +{ | |
8920 | + struct physdev_irq_status_query irq_status = { .irq = irq }; | |
8921 | + | |
8922 | + if (!is_running_on_xen()) | |
8923 | + return 0; | |
8924 | + | |
8925 | + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) | |
8926 | + return 0; | |
8927 | + return !!(irq_status.flags & XENIRQSTAT_shared); | |
8928 | +} | |
8929 | + | |
8930 | +void notify_remote_via_irq(int irq) | |
8931 | +{ | |
8932 | + int evtchn = evtchn_from_irq(irq); | |
8933 | + | |
8934 | + if (VALID_EVTCHN(evtchn)) | |
8935 | + notify_remote_via_evtchn(evtchn); | |
8936 | +} | |
8937 | +EXPORT_SYMBOL_GPL(notify_remote_via_irq); | |
8938 | + | |
8939 | +int irq_to_evtchn_port(int irq) | |
8940 | +{ | |
8941 | + return evtchn_from_irq(irq); | |
8942 | +} | |
8943 | +EXPORT_SYMBOL_GPL(irq_to_evtchn_port); | |
8944 | + | |
8945 | +void mask_evtchn(int port) | |
8946 | +{ | |
8947 | + shared_info_t *s = HYPERVISOR_shared_info; | |
8948 | + synch_set_bit(port, s->evtchn_mask); | |
8949 | +} | |
8950 | +EXPORT_SYMBOL_GPL(mask_evtchn); | |
8951 | + | |
8952 | +void unmask_evtchn(int port) | |
8953 | +{ | |
8954 | + shared_info_t *s = HYPERVISOR_shared_info; | |
8955 | + unsigned int cpu = smp_processor_id(); | |
8956 | + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; | |
8957 | + | |
8958 | + BUG_ON(!irqs_disabled()); | |
8959 | + | |
8960 | + /* Slow path (hypercall) if this is a non-local port. */ | |
8961 | + if (unlikely(cpu != cpu_from_evtchn(port))) { | |
8962 | + struct evtchn_unmask unmask = { .port = port }; | |
8963 | + VOID(HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask)); | |
8964 | + return; | |
8965 | + } | |
8966 | + | |
8967 | + synch_clear_bit(port, s->evtchn_mask); | |
8968 | + | |
8969 | + /* Did we miss an interrupt 'edge'? Re-fire if so. */ | |
8970 | + if (synch_test_bit(port, s->evtchn_pending) && | |
8971 | + !synch_test_and_set_bit(port / BITS_PER_LONG, | |
8972 | + &vcpu_info->evtchn_pending_sel)) | |
8973 | + vcpu_info->evtchn_upcall_pending = 1; | |
8974 | +} | |
8975 | +EXPORT_SYMBOL_GPL(unmask_evtchn); | |
8976 | + | |
8977 | +void disable_all_local_evtchn(void) | |
8978 | +{ | |
8979 | + unsigned i, cpu = smp_processor_id(); | |
8980 | + shared_info_t *s = HYPERVISOR_shared_info; | |
8981 | + | |
8982 | + for (i = 0; i < NR_EVENT_CHANNELS; ++i) | |
8983 | + if (cpu_from_evtchn(i) == cpu) | |
8984 | + synch_set_bit(i, &s->evtchn_mask[0]); | |
8985 | +} | |
8986 | + | |
8987 | +static void restore_cpu_virqs(unsigned int cpu) | |
8988 | +{ | |
8989 | + struct evtchn_bind_virq bind_virq; | |
8990 | + int virq, irq, evtchn; | |
8991 | + | |
8992 | + for (virq = 0; virq < NR_VIRQS; virq++) { | |
8993 | + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) | |
8994 | + continue; | |
8995 | + | |
8996 | + BUG_ON(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0)); | |
8997 | + | |
8998 | + /* Get a new binding from Xen. */ | |
8999 | + bind_virq.virq = virq; | |
9000 | + bind_virq.vcpu = cpu; | |
9001 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | |
9002 | + &bind_virq) != 0) | |
9003 | + BUG(); | |
9004 | + evtchn = bind_virq.port; | |
9005 | + | |
9006 | + /* Record the new mapping. */ | |
9007 | + evtchn_to_irq[evtchn] = irq; | |
9008 | + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); | |
9009 | + bind_evtchn_to_cpu(evtchn, cpu); | |
9010 | + | |
9011 | + /* Ready for use. */ | |
9012 | + unmask_evtchn(evtchn); | |
9013 | + } | |
9014 | +} | |
9015 | + | |
9016 | +static void restore_cpu_ipis(unsigned int cpu) | |
9017 | +{ | |
9018 | + struct evtchn_bind_ipi bind_ipi; | |
9019 | + int ipi, irq, evtchn; | |
9020 | + | |
9021 | + for (ipi = 0; ipi < NR_IPIS; ipi++) { | |
9022 | + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) | |
9023 | + continue; | |
9024 | + | |
9025 | + BUG_ON(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0)); | |
9026 | + | |
9027 | + /* Get a new binding from Xen. */ | |
9028 | + bind_ipi.vcpu = cpu; | |
9029 | + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, | |
9030 | + &bind_ipi) != 0) | |
9031 | + BUG(); | |
9032 | + evtchn = bind_ipi.port; | |
9033 | + | |
9034 | + /* Record the new mapping. */ | |
9035 | + evtchn_to_irq[evtchn] = irq; | |
9036 | + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); | |
9037 | + bind_evtchn_to_cpu(evtchn, cpu); | |
9038 | + | |
9039 | + /* Ready for use. */ | |
9040 | + unmask_evtchn(evtchn); | |
9041 | + | |
9042 | + } | |
9043 | +} | |
9044 | + | |
9045 | +void irq_resume(void) | |
9046 | +{ | |
9047 | + unsigned int cpu, irq, evtchn; | |
9048 | + | |
9049 | + init_evtchn_cpu_bindings(); | |
9050 | + | |
9051 | + /* New event-channel space is not 'live' yet. */ | |
9052 | + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) | |
9053 | + mask_evtchn(evtchn); | |
9054 | + | |
9055 | + /* Check that no PIRQs are still bound. */ | |
9056 | + for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++) | |
9057 | + BUG_ON(irq_info[irq] != IRQ_UNBOUND); | |
9058 | + | |
9059 | + /* No IRQ <-> event-channel mappings. */ | |
9060 | + for (irq = 0; irq < NR_IRQS; irq++) | |
9061 | + irq_info[irq] &= ~((1U << _EVTCHN_BITS) - 1); | |
9062 | + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) | |
9063 | + evtchn_to_irq[evtchn] = -1; | |
9064 | + | |
9065 | + for_each_possible_cpu(cpu) { | |
9066 | + restore_cpu_virqs(cpu); | |
9067 | + restore_cpu_ipis(cpu); | |
9068 | + } | |
9069 | + | |
9070 | +} | |
9071 | + | |
9072 | +#if defined(CONFIG_X86_IO_APIC) | |
9073 | +#define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE)) | |
9074 | +#elif defined(CONFIG_X86) | |
9075 | +#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < 16) | |
9076 | +#else | |
9077 | +#define identity_mapped_irq(irq) (1) | |
9078 | +#endif | |
9079 | + | |
9080 | +void evtchn_register_pirq(int irq) | |
9081 | +{ | |
9082 | + BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS); | |
9083 | + if (identity_mapped_irq(irq)) | |
9084 | + return; | |
9085 | + irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0); | |
9086 | + irq_desc[irq].chip = &pirq_type; | |
9087 | +} | |
9088 | + | |
9089 | +int evtchn_map_pirq(int irq, int xen_pirq) | |
9090 | +{ | |
9091 | + if (irq < 0) { | |
9092 | + static DEFINE_SPINLOCK(irq_alloc_lock); | |
9093 | + | |
9094 | + irq = PIRQ_BASE + NR_PIRQS - 1; | |
9095 | + spin_lock(&irq_alloc_lock); | |
9096 | + do { | |
9097 | + if (identity_mapped_irq(irq)) | |
9098 | + continue; | |
9099 | + if (!index_from_irq(irq)) { | |
9100 | + BUG_ON(type_from_irq(irq) != IRQT_UNBOUND); | |
9101 | + irq_info[irq] = mk_irq_info(IRQT_PIRQ, | |
9102 | + xen_pirq, 0); | |
9103 | + break; | |
9104 | + } | |
9105 | + } while (--irq >= PIRQ_BASE); | |
9106 | + spin_unlock(&irq_alloc_lock); | |
9107 | + if (irq < PIRQ_BASE) | |
9108 | + return -ENOSPC; | |
9109 | + irq_desc[irq].chip = &pirq_type; | |
9110 | + } else if (!xen_pirq) { | |
9111 | + if (unlikely(type_from_irq(irq) != IRQT_PIRQ)) | |
9112 | + return -EINVAL; | |
9113 | + irq_desc[irq].chip = &no_irq_type; | |
9114 | + irq_info[irq] = IRQ_UNBOUND; | |
9115 | + return 0; | |
9116 | + } else if (type_from_irq(irq) != IRQT_PIRQ | |
9117 | + || index_from_irq(irq) != xen_pirq) { | |
9118 | + printk(KERN_ERR "IRQ#%d is already mapped to %d:%u - " | |
9119 | + "cannot map to PIRQ#%u\n", | |
9120 | + irq, type_from_irq(irq), index_from_irq(irq), xen_pirq); | |
9121 | + return -EINVAL; | |
9122 | + } | |
9123 | + return index_from_irq(irq) ? irq : -EINVAL; | |
9124 | +} | |
9125 | + | |
9126 | +int evtchn_get_xen_pirq(int irq) | |
9127 | +{ | |
9128 | + if (identity_mapped_irq(irq)) | |
9129 | + return irq; | |
9130 | + BUG_ON(type_from_irq(irq) != IRQT_PIRQ); | |
9131 | + return index_from_irq(irq); | |
9132 | +} | |
9133 | + | |
9134 | +void __init xen_init_IRQ(void) | |
9135 | +{ | |
9136 | + unsigned int i; | |
9137 | + | |
9138 | + init_evtchn_cpu_bindings(); | |
9139 | + | |
9140 | + /* No event channels are 'live' right now. */ | |
9141 | + for (i = 0; i < NR_EVENT_CHANNELS; i++) | |
9142 | + mask_evtchn(i); | |
9143 | + | |
9144 | + /* No IRQ -> event-channel mappings. */ | |
9145 | + for (i = 0; i < NR_IRQS; i++) | |
9146 | + irq_info[i] = IRQ_UNBOUND; | |
9147 | + | |
9148 | + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ | |
9149 | + for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) { | |
9150 | + irq_bindcount[i] = 0; | |
9151 | + | |
9152 | + irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE; | |
9153 | + irq_desc[i].action = NULL; | |
9154 | + irq_desc[i].depth = 1; | |
9155 | + irq_desc[i].chip = &dynirq_type; | |
9156 | + } | |
9157 | + | |
9158 | + /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */ | |
9159 | + for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) { | |
9160 | + irq_bindcount[i] = 1; | |
9161 | + | |
9162 | + if (!identity_mapped_irq(i)) | |
9163 | + continue; | |
9164 | + | |
9165 | +#ifdef RTC_IRQ | |
9166 | + /* If not domain 0, force our RTC driver to fail its probe. */ | |
9167 | + if (i - PIRQ_BASE == RTC_IRQ && !is_initial_xendomain()) | |
9168 | + continue; | |
9169 | +#endif | |
9170 | + | |
9171 | + irq_desc[i].status = IRQ_DISABLED; | |
9172 | + irq_desc[i].action = NULL; | |
9173 | + irq_desc[i].depth = 1; | |
9174 | + irq_desc[i].chip = &pirq_type; | |
9175 | + } | |
9176 | +} | |
9177 | Index: head-2008-11-25/drivers/xen/core/features.c | |
9178 | =================================================================== | |
9179 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
9180 | +++ head-2008-11-25/drivers/xen/core/features.c 2007-06-12 13:13:44.000000000 +0200 | |
9181 | @@ -0,0 +1,34 @@ | |
9182 | +/****************************************************************************** | |
9183 | + * features.c | |
9184 | + * | |
9185 | + * Xen feature flags. | |
9186 | + * | |
9187 | + * Copyright (c) 2006, Ian Campbell, XenSource Inc. | |
9188 | + */ | |
9189 | +#include <linux/types.h> | |
9190 | +#include <linux/cache.h> | |
9191 | +#include <linux/module.h> | |
9192 | +#include <asm/hypervisor.h> | |
9193 | +#include <xen/features.h> | |
9194 | + | |
9195 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
9196 | +#include <xen/platform-compat.h> | |
9197 | +#endif | |
9198 | + | |
9199 | +u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; | |
9200 | +/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */ | |
9201 | +EXPORT_SYMBOL(xen_features); | |
9202 | + | |
9203 | +void setup_xen_features(void) | |
9204 | +{ | |
9205 | + xen_feature_info_t fi; | |
9206 | + int i, j; | |
9207 | + | |
9208 | + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { | |
9209 | + fi.submap_idx = i; | |
9210 | + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) | |
9211 | + break; | |
9212 | + for (j=0; j<32; j++) | |
9213 | + xen_features[i*32+j] = !!(fi.submap & 1<<j); | |
9214 | + } | |
9215 | +} | |
9216 | Index: head-2008-11-25/drivers/xen/core/firmware.c | |
9217 | =================================================================== | |
9218 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
9219 | +++ head-2008-11-25/drivers/xen/core/firmware.c 2007-06-22 09:08:06.000000000 +0200 | |
9220 | @@ -0,0 +1,74 @@ | |
9221 | +#include <linux/kernel.h> | |
9222 | +#include <linux/errno.h> | |
9223 | +#include <linux/init.h> | |
9224 | +#include <linux/edd.h> | |
9225 | +#include <video/edid.h> | |
9226 | +#include <xen/interface/platform.h> | |
9227 | +#include <asm/hypervisor.h> | |
9228 | + | |
9229 | +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | |
9230 | +void __init copy_edd(void) | |
9231 | +{ | |
9232 | + int ret; | |
9233 | + struct xen_platform_op op; | |
9234 | + | |
9235 | + if (!is_initial_xendomain()) | |
9236 | + return; | |
9237 | + | |
9238 | + op.cmd = XENPF_firmware_info; | |
9239 | + | |
9240 | + op.u.firmware_info.type = XEN_FW_DISK_INFO; | |
9241 | + for (op.u.firmware_info.index = 0; | |
9242 | + edd.edd_info_nr < EDDMAXNR; | |
9243 | + op.u.firmware_info.index++) { | |
9244 | + struct edd_info *info = edd.edd_info + edd.edd_info_nr; | |
9245 | + | |
9246 | + info->params.length = sizeof(info->params); | |
9247 | + set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params, | |
9248 | + &info->params); | |
9249 | + ret = HYPERVISOR_platform_op(&op); | |
9250 | + if (ret) | |
9251 | + break; | |
9252 | + | |
9253 | +#define C(x) info->x = op.u.firmware_info.u.disk_info.x | |
9254 | + C(device); | |
9255 | + C(version); | |
9256 | + C(interface_support); | |
9257 | + C(legacy_max_cylinder); | |
9258 | + C(legacy_max_head); | |
9259 | + C(legacy_sectors_per_track); | |
9260 | +#undef C | |
9261 | + | |
9262 | + edd.edd_info_nr++; | |
9263 | + } | |
9264 | + | |
9265 | + op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE; | |
9266 | + for (op.u.firmware_info.index = 0; | |
9267 | + edd.mbr_signature_nr < EDD_MBR_SIG_MAX; | |
9268 | + op.u.firmware_info.index++) { | |
9269 | + ret = HYPERVISOR_platform_op(&op); | |
9270 | + if (ret) | |
9271 | + break; | |
9272 | + edd.mbr_signature[edd.mbr_signature_nr++] = | |
9273 | + op.u.firmware_info.u.disk_mbr_signature.mbr_signature; | |
9274 | + } | |
9275 | +} | |
9276 | +#endif | |
9277 | + | |
9278 | +void __init copy_edid(void) | |
9279 | +{ | |
9280 | +#if defined(CONFIG_FIRMWARE_EDID) && defined(CONFIG_X86) | |
9281 | + struct xen_platform_op op; | |
9282 | + | |
9283 | + if (!is_initial_xendomain()) | |
9284 | + return; | |
9285 | + | |
9286 | + op.cmd = XENPF_firmware_info; | |
9287 | + op.u.firmware_info.index = 0; | |
9288 | + op.u.firmware_info.type = XEN_FW_VBEDDC_INFO; | |
9289 | + set_xen_guest_handle(op.u.firmware_info.u.vbeddc_info.edid, | |
9290 | + edid_info.dummy); | |
9291 | + if (HYPERVISOR_platform_op(&op) != 0) | |
9292 | + memset(edid_info.dummy, 0x13, sizeof(edid_info.dummy)); | |
9293 | +#endif | |
9294 | +} | |
9295 | Index: head-2008-11-25/drivers/xen/core/gnttab.c | |
9296 | =================================================================== | |
9297 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
9298 | +++ head-2008-11-25/drivers/xen/core/gnttab.c 2008-11-04 11:13:10.000000000 +0100 | |
9299 | @@ -0,0 +1,772 @@ | |
9300 | +/****************************************************************************** | |
9301 | + * gnttab.c | |
9302 | + * | |
9303 | + * Granting foreign access to our memory reservation. | |
9304 | + * | |
9305 | + * Copyright (c) 2005-2006, Christopher Clark | |
9306 | + * Copyright (c) 2004-2005, K A Fraser | |
9307 | + * | |
9308 | + * This program is free software; you can redistribute it and/or | |
9309 | + * modify it under the terms of the GNU General Public License version 2 | |
9310 | + * as published by the Free Software Foundation; or, when distributed | |
9311 | + * separately from the Linux kernel or incorporated into other | |
9312 | + * software packages, subject to the following license: | |
9313 | + * | |
9314 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
9315 | + * of this source file (the "Software"), to deal in the Software without | |
9316 | + * restriction, including without limitation the rights to use, copy, modify, | |
9317 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
9318 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
9319 | + * the following conditions: | |
9320 | + * | |
9321 | + * The above copyright notice and this permission notice shall be included in | |
9322 | + * all copies or substantial portions of the Software. | |
9323 | + * | |
9324 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
9325 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
9326 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
9327 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
9328 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
9329 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
9330 | + * IN THE SOFTWARE. | |
9331 | + */ | |
9332 | + | |
9333 | +#include <linux/module.h> | |
9334 | +#include <linux/sched.h> | |
9335 | +#include <linux/mm.h> | |
9336 | +#include <linux/seqlock.h> | |
9337 | +#include <xen/interface/xen.h> | |
9338 | +#include <xen/gnttab.h> | |
9339 | +#include <asm/pgtable.h> | |
9340 | +#include <asm/uaccess.h> | |
9341 | +#include <asm/synch_bitops.h> | |
9342 | +#include <asm/io.h> | |
9343 | +#include <xen/interface/memory.h> | |
9344 | +#include <xen/driver_util.h> | |
9345 | +#include <asm/gnttab_dma.h> | |
9346 | + | |
9347 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
9348 | +#include <xen/platform-compat.h> | |
9349 | +#endif | |
9350 | + | |
9351 | +/* External tools reserve first few grant table entries. */ | |
9352 | +#define NR_RESERVED_ENTRIES 8 | |
9353 | +#define GNTTAB_LIST_END 0xffffffff | |
9354 | +#define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) | |
9355 | + | |
9356 | +static grant_ref_t **gnttab_list; | |
9357 | +static unsigned int nr_grant_frames; | |
9358 | +static unsigned int boot_max_nr_grant_frames; | |
9359 | +static int gnttab_free_count; | |
9360 | +static grant_ref_t gnttab_free_head; | |
9361 | +static DEFINE_SPINLOCK(gnttab_list_lock); | |
9362 | + | |
9363 | +static struct grant_entry *shared; | |
9364 | + | |
9365 | +static struct gnttab_free_callback *gnttab_free_callback_list; | |
9366 | + | |
9367 | +static int gnttab_expand(unsigned int req_entries); | |
9368 | + | |
9369 | +#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) | |
9370 | +#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) | |
9371 | + | |
9372 | +#define nr_freelist_frames(grant_frames) \ | |
9373 | + (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP) | |
9374 | + | |
9375 | +static int get_free_entries(int count) | |
9376 | +{ | |
9377 | + unsigned long flags; | |
9378 | + int ref, rc; | |
9379 | + grant_ref_t head; | |
9380 | + | |
9381 | + spin_lock_irqsave(&gnttab_list_lock, flags); | |
9382 | + | |
9383 | + if ((gnttab_free_count < count) && | |
9384 | + ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { | |
9385 | + spin_unlock_irqrestore(&gnttab_list_lock, flags); | |
9386 | + return rc; | |
9387 | + } | |
9388 | + | |
9389 | + ref = head = gnttab_free_head; | |
9390 | + gnttab_free_count -= count; | |
9391 | + while (count-- > 1) | |
9392 | + head = gnttab_entry(head); | |
9393 | + gnttab_free_head = gnttab_entry(head); | |
9394 | + gnttab_entry(head) = GNTTAB_LIST_END; | |
9395 | + | |
9396 | + spin_unlock_irqrestore(&gnttab_list_lock, flags); | |
9397 | + | |
9398 | + return ref; | |
9399 | +} | |
9400 | + | |
9401 | +#define get_free_entry() get_free_entries(1) | |
9402 | + | |
9403 | +static void do_free_callbacks(void) | |
9404 | +{ | |
9405 | + struct gnttab_free_callback *callback, *next; | |
9406 | + | |
9407 | + callback = gnttab_free_callback_list; | |
9408 | + gnttab_free_callback_list = NULL; | |
9409 | + | |
9410 | + while (callback != NULL) { | |
9411 | + next = callback->next; | |
9412 | + if (gnttab_free_count >= callback->count) { | |
9413 | + callback->next = NULL; | |
9414 | + callback->queued = 0; | |
9415 | + callback->fn(callback->arg); | |
9416 | + } else { | |
9417 | + callback->next = gnttab_free_callback_list; | |
9418 | + gnttab_free_callback_list = callback; | |
9419 | + } | |
9420 | + callback = next; | |
9421 | + } | |
9422 | +} | |
9423 | + | |
9424 | +static inline void check_free_callbacks(void) | |
9425 | +{ | |
9426 | + if (unlikely(gnttab_free_callback_list)) | |
9427 | + do_free_callbacks(); | |
9428 | +} | |
9429 | + | |
9430 | +static void put_free_entry(grant_ref_t ref) | |
9431 | +{ | |
9432 | + unsigned long flags; | |
9433 | + spin_lock_irqsave(&gnttab_list_lock, flags); | |
9434 | + gnttab_entry(ref) = gnttab_free_head; | |
9435 | + gnttab_free_head = ref; | |
9436 | + gnttab_free_count++; | |
9437 | + check_free_callbacks(); | |
9438 | + spin_unlock_irqrestore(&gnttab_list_lock, flags); | |
9439 | +} | |
9440 | + | |
9441 | +/* | |
9442 | + * Public grant-issuing interface functions | |
9443 | + */ | |
9444 | + | |
9445 | +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, | |
9446 | + int flags) | |
9447 | +{ | |
9448 | + int ref; | |
9449 | + | |
9450 | + if (unlikely((ref = get_free_entry()) < 0)) | |
9451 | + return -ENOSPC; | |
9452 | + | |
9453 | + shared[ref].frame = frame; | |
9454 | + shared[ref].domid = domid; | |
9455 | + wmb(); | |
9456 | + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing)); | |
9457 | + shared[ref].flags = GTF_permit_access | flags; | |
9458 | + | |
9459 | + return ref; | |
9460 | +} | |
9461 | +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); | |
9462 | + | |
9463 | +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, | |
9464 | + unsigned long frame, int flags) | |
9465 | +{ | |
9466 | + shared[ref].frame = frame; | |
9467 | + shared[ref].domid = domid; | |
9468 | + wmb(); | |
9469 | + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing)); | |
9470 | + shared[ref].flags = GTF_permit_access | flags; | |
9471 | +} | |
9472 | +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); | |
9473 | + | |
9474 | + | |
9475 | +int gnttab_query_foreign_access(grant_ref_t ref) | |
9476 | +{ | |
9477 | + u16 nflags; | |
9478 | + | |
9479 | + nflags = shared[ref].flags; | |
9480 | + | |
9481 | + return (nflags & (GTF_reading|GTF_writing)); | |
9482 | +} | |
9483 | +EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); | |
9484 | + | |
9485 | +int gnttab_end_foreign_access_ref(grant_ref_t ref) | |
9486 | +{ | |
9487 | + u16 flags, nflags; | |
9488 | + | |
9489 | + nflags = shared[ref].flags; | |
9490 | + do { | |
9491 | + if ((flags = nflags) & (GTF_reading|GTF_writing)) { | |
9492 | + printk(KERN_DEBUG "WARNING: g.e. still in use!\n"); | |
9493 | + return 0; | |
9494 | + } | |
9495 | + } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) != | |
9496 | + flags); | |
9497 | + | |
9498 | + return 1; | |
9499 | +} | |
9500 | +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); | |
9501 | + | |
9502 | +void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page) | |
9503 | +{ | |
9504 | + if (gnttab_end_foreign_access_ref(ref)) { | |
9505 | + put_free_entry(ref); | |
9506 | + if (page != 0) | |
9507 | + free_page(page); | |
9508 | + } else { | |
9509 | + /* XXX This needs to be fixed so that the ref and page are | |
9510 | + placed on a list to be freed up later. */ | |
9511 | + printk(KERN_DEBUG | |
9512 | + "WARNING: leaking g.e. and page still in use!\n"); | |
9513 | + } | |
9514 | +} | |
9515 | +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); | |
9516 | + | |
9517 | +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) | |
9518 | +{ | |
9519 | + int ref; | |
9520 | + | |
9521 | + if (unlikely((ref = get_free_entry()) < 0)) | |
9522 | + return -ENOSPC; | |
9523 | + gnttab_grant_foreign_transfer_ref(ref, domid, pfn); | |
9524 | + | |
9525 | + return ref; | |
9526 | +} | |
9527 | +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); | |
9528 | + | |
9529 | +void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, | |
9530 | + unsigned long pfn) | |
9531 | +{ | |
9532 | + shared[ref].frame = pfn; | |
9533 | + shared[ref].domid = domid; | |
9534 | + wmb(); | |
9535 | + shared[ref].flags = GTF_accept_transfer; | |
9536 | +} | |
9537 | +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); | |
9538 | + | |
9539 | +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) | |
9540 | +{ | |
9541 | + unsigned long frame; | |
9542 | + u16 flags; | |
9543 | + | |
9544 | + /* | |
9545 | + * If a transfer is not even yet started, try to reclaim the grant | |
9546 | + * reference and return failure (== 0). | |
9547 | + */ | |
9548 | + while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { | |
9549 | + if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags) | |
9550 | + return 0; | |
9551 | + cpu_relax(); | |
9552 | + } | |
9553 | + | |
9554 | + /* If a transfer is in progress then wait until it is completed. */ | |
9555 | + while (!(flags & GTF_transfer_completed)) { | |
9556 | + flags = shared[ref].flags; | |
9557 | + cpu_relax(); | |
9558 | + } | |
9559 | + | |
9560 | + /* Read the frame number /after/ reading completion status. */ | |
9561 | + rmb(); | |
9562 | + frame = shared[ref].frame; | |
9563 | + BUG_ON(frame == 0); | |
9564 | + | |
9565 | + return frame; | |
9566 | +} | |
9567 | +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); | |
9568 | + | |
9569 | +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) | |
9570 | +{ | |
9571 | + unsigned long frame = gnttab_end_foreign_transfer_ref(ref); | |
9572 | + put_free_entry(ref); | |
9573 | + return frame; | |
9574 | +} | |
9575 | +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); | |
9576 | + | |
9577 | +void gnttab_free_grant_reference(grant_ref_t ref) | |
9578 | +{ | |
9579 | + put_free_entry(ref); | |
9580 | +} | |
9581 | +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); | |
9582 | + | |
9583 | +void gnttab_free_grant_references(grant_ref_t head) | |
9584 | +{ | |
9585 | + grant_ref_t ref; | |
9586 | + unsigned long flags; | |
9587 | + int count = 1; | |
9588 | + if (head == GNTTAB_LIST_END) | |
9589 | + return; | |
9590 | + spin_lock_irqsave(&gnttab_list_lock, flags); | |
9591 | + ref = head; | |
9592 | + while (gnttab_entry(ref) != GNTTAB_LIST_END) { | |
9593 | + ref = gnttab_entry(ref); | |
9594 | + count++; | |
9595 | + } | |
9596 | + gnttab_entry(ref) = gnttab_free_head; | |
9597 | + gnttab_free_head = head; | |
9598 | + gnttab_free_count += count; | |
9599 | + check_free_callbacks(); | |
9600 | + spin_unlock_irqrestore(&gnttab_list_lock, flags); | |
9601 | +} | |
9602 | +EXPORT_SYMBOL_GPL(gnttab_free_grant_references); | |
9603 | + | |
9604 | +int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) | |
9605 | +{ | |
9606 | + int h = get_free_entries(count); | |
9607 | + | |
9608 | + if (h < 0) | |
9609 | + return -ENOSPC; | |
9610 | + | |
9611 | + *head = h; | |
9612 | + | |
9613 | + return 0; | |
9614 | +} | |
9615 | +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); | |
9616 | + | |
9617 | +int gnttab_empty_grant_references(const grant_ref_t *private_head) | |
9618 | +{ | |
9619 | + return (*private_head == GNTTAB_LIST_END); | |
9620 | +} | |
9621 | +EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); | |
9622 | + | |
9623 | +int gnttab_claim_grant_reference(grant_ref_t *private_head) | |
9624 | +{ | |
9625 | + grant_ref_t g = *private_head; | |
9626 | + if (unlikely(g == GNTTAB_LIST_END)) | |
9627 | + return -ENOSPC; | |
9628 | + *private_head = gnttab_entry(g); | |
9629 | + return g; | |
9630 | +} | |
9631 | +EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); | |
9632 | + | |
9633 | +void gnttab_release_grant_reference(grant_ref_t *private_head, | |
9634 | + grant_ref_t release) | |
9635 | +{ | |
9636 | + gnttab_entry(release) = *private_head; | |
9637 | + *private_head = release; | |
9638 | +} | |
9639 | +EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); | |
9640 | + | |
9641 | +void gnttab_request_free_callback(struct gnttab_free_callback *callback, | |
9642 | + void (*fn)(void *), void *arg, u16 count) | |
9643 | +{ | |
9644 | + unsigned long flags; | |
9645 | + spin_lock_irqsave(&gnttab_list_lock, flags); | |
9646 | + if (callback->queued) | |
9647 | + goto out; | |
9648 | + callback->fn = fn; | |
9649 | + callback->arg = arg; | |
9650 | + callback->count = count; | |
9651 | + callback->queued = 1; | |
9652 | + callback->next = gnttab_free_callback_list; | |
9653 | + gnttab_free_callback_list = callback; | |
9654 | + check_free_callbacks(); | |
9655 | +out: | |
9656 | + spin_unlock_irqrestore(&gnttab_list_lock, flags); | |
9657 | +} | |
9658 | +EXPORT_SYMBOL_GPL(gnttab_request_free_callback); | |
9659 | + | |
9660 | +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) | |
9661 | +{ | |
9662 | + struct gnttab_free_callback **pcb; | |
9663 | + unsigned long flags; | |
9664 | + | |
9665 | + spin_lock_irqsave(&gnttab_list_lock, flags); | |
9666 | + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { | |
9667 | + if (*pcb == callback) { | |
9668 | + *pcb = callback->next; | |
9669 | + callback->queued = 0; | |
9670 | + break; | |
9671 | + } | |
9672 | + } | |
9673 | + spin_unlock_irqrestore(&gnttab_list_lock, flags); | |
9674 | +} | |
9675 | +EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); | |
9676 | + | |
9677 | +static int grow_gnttab_list(unsigned int more_frames) | |
9678 | +{ | |
9679 | + unsigned int new_nr_grant_frames, extra_entries, i; | |
9680 | + unsigned int nr_glist_frames, new_nr_glist_frames; | |
9681 | + | |
9682 | + new_nr_grant_frames = nr_grant_frames + more_frames; | |
9683 | + extra_entries = more_frames * ENTRIES_PER_GRANT_FRAME; | |
9684 | + | |
9685 | + nr_glist_frames = nr_freelist_frames(nr_grant_frames); | |
9686 | + new_nr_glist_frames = nr_freelist_frames(new_nr_grant_frames); | |
9687 | + for (i = nr_glist_frames; i < new_nr_glist_frames; i++) { | |
9688 | + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); | |
9689 | + if (!gnttab_list[i]) | |
9690 | + goto grow_nomem; | |
9691 | + } | |
9692 | + | |
9693 | + for (i = ENTRIES_PER_GRANT_FRAME * nr_grant_frames; | |
9694 | + i < ENTRIES_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) | |
9695 | + gnttab_entry(i) = i + 1; | |
9696 | + | |
9697 | + gnttab_entry(i) = gnttab_free_head; | |
9698 | + gnttab_free_head = ENTRIES_PER_GRANT_FRAME * nr_grant_frames; | |
9699 | + gnttab_free_count += extra_entries; | |
9700 | + | |
9701 | + nr_grant_frames = new_nr_grant_frames; | |
9702 | + | |
9703 | + check_free_callbacks(); | |
9704 | + | |
9705 | + return 0; | |
9706 | + | |
9707 | +grow_nomem: | |
9708 | + for ( ; i >= nr_glist_frames; i--) | |
9709 | + free_page((unsigned long) gnttab_list[i]); | |
9710 | + return -ENOMEM; | |
9711 | +} | |
9712 | + | |
9713 | +static unsigned int __max_nr_grant_frames(void) | |
9714 | +{ | |
9715 | + struct gnttab_query_size query; | |
9716 | + int rc; | |
9717 | + | |
9718 | + query.dom = DOMID_SELF; | |
9719 | + | |
9720 | + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); | |
9721 | + if ((rc < 0) || (query.status != GNTST_okay)) | |
9722 | + return 4; /* Legacy max supported number of frames */ | |
9723 | + | |
9724 | + return query.max_nr_frames; | |
9725 | +} | |
9726 | + | |
9727 | +static inline unsigned int max_nr_grant_frames(void) | |
9728 | +{ | |
9729 | + unsigned int xen_max = __max_nr_grant_frames(); | |
9730 | + | |
9731 | + if (xen_max > boot_max_nr_grant_frames) | |
9732 | + return boot_max_nr_grant_frames; | |
9733 | + return xen_max; | |
9734 | +} | |
9735 | + | |
9736 | +#ifdef CONFIG_XEN | |
9737 | + | |
9738 | +static DEFINE_SEQLOCK(gnttab_dma_lock); | |
9739 | + | |
9740 | +#ifdef CONFIG_X86 | |
9741 | +static int map_pte_fn(pte_t *pte, struct page *pmd_page, | |
9742 | + unsigned long addr, void *data) | |
9743 | +{ | |
9744 | + unsigned long **frames = (unsigned long **)data; | |
9745 | + | |
9746 | + set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); | |
9747 | + (*frames)++; | |
9748 | + return 0; | |
9749 | +} | |
9750 | + | |
9751 | +static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | |
9752 | + unsigned long addr, void *data) | |
9753 | +{ | |
9754 | + | |
9755 | + set_pte_at(&init_mm, addr, pte, __pte(0)); | |
9756 | + return 0; | |
9757 | +} | |
9758 | + | |
9759 | +void *arch_gnttab_alloc_shared(unsigned long *frames) | |
9760 | +{ | |
9761 | + struct vm_struct *area; | |
9762 | + area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); | |
9763 | + BUG_ON(area == NULL); | |
9764 | + return area->addr; | |
9765 | +} | |
9766 | +#endif /* CONFIG_X86 */ | |
9767 | + | |
9768 | +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | |
9769 | +{ | |
9770 | + struct gnttab_setup_table setup; | |
9771 | + unsigned long *frames; | |
9772 | + unsigned int nr_gframes = end_idx + 1; | |
9773 | + int rc; | |
9774 | + | |
9775 | + frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); | |
9776 | + if (!frames) | |
9777 | + return -ENOMEM; | |
9778 | + | |
9779 | + setup.dom = DOMID_SELF; | |
9780 | + setup.nr_frames = nr_gframes; | |
9781 | + set_xen_guest_handle(setup.frame_list, frames); | |
9782 | + | |
9783 | + rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); | |
9784 | + if (rc == -ENOSYS) { | |
9785 | + kfree(frames); | |
9786 | + return -ENOSYS; | |
9787 | + } | |
9788 | + | |
9789 | + BUG_ON(rc || setup.status); | |
9790 | + | |
9791 | + if (shared == NULL) | |
9792 | + shared = arch_gnttab_alloc_shared(frames); | |
9793 | + | |
9794 | +#ifdef CONFIG_X86 | |
9795 | + rc = apply_to_page_range(&init_mm, (unsigned long)shared, | |
9796 | + PAGE_SIZE * nr_gframes, | |
9797 | + map_pte_fn, &frames); | |
9798 | + BUG_ON(rc); | |
9799 | + frames -= nr_gframes; /* adjust after map_pte_fn() */ | |
9800 | +#endif /* CONFIG_X86 */ | |
9801 | + | |
9802 | + kfree(frames); | |
9803 | + | |
9804 | + return 0; | |
9805 | +} | |
9806 | + | |
9807 | +static void gnttab_page_free(struct page *page) | |
9808 | +{ | |
9809 | + ClearPageForeign(page); | |
9810 | + gnttab_reset_grant_page(page); | |
9811 | + put_page(page); | |
9812 | +} | |
9813 | + | |
9814 | +/* | |
9815 | + * Must not be called with IRQs off. This should only be used on the | |
9816 | + * slow path. | |
9817 | + * | |
9818 | + * Copy a foreign granted page to local memory. | |
9819 | + */ | |
9820 | +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep) | |
9821 | +{ | |
9822 | + struct gnttab_unmap_and_replace unmap; | |
9823 | + mmu_update_t mmu; | |
9824 | + struct page *page; | |
9825 | + struct page *new_page; | |
9826 | + void *new_addr; | |
9827 | + void *addr; | |
9828 | + paddr_t pfn; | |
9829 | + maddr_t mfn; | |
9830 | + maddr_t new_mfn; | |
9831 | + int err; | |
9832 | + | |
9833 | + page = *pagep; | |
9834 | + if (!get_page_unless_zero(page)) | |
9835 | + return -ENOENT; | |
9836 | + | |
9837 | + err = -ENOMEM; | |
9838 | + new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); | |
9839 | + if (!new_page) | |
9840 | + goto out; | |
9841 | + | |
9842 | + new_addr = page_address(new_page); | |
9843 | + addr = page_address(page); | |
9844 | + memcpy(new_addr, addr, PAGE_SIZE); | |
9845 | + | |
9846 | + pfn = page_to_pfn(page); | |
9847 | + mfn = pfn_to_mfn(pfn); | |
9848 | + new_mfn = virt_to_mfn(new_addr); | |
9849 | + | |
9850 | + write_seqlock(&gnttab_dma_lock); | |
9851 | + | |
9852 | + /* Make seq visible before checking page_mapped. */ | |
9853 | + smp_mb(); | |
9854 | + | |
9855 | + /* Has the page been DMA-mapped? */ | |
9856 | + if (unlikely(page_mapped(page))) { | |
9857 | + write_sequnlock(&gnttab_dma_lock); | |
9858 | + put_page(new_page); | |
9859 | + err = -EBUSY; | |
9860 | + goto out; | |
9861 | + } | |
9862 | + | |
9863 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) | |
9864 | + set_phys_to_machine(pfn, new_mfn); | |
9865 | + | |
9866 | + gnttab_set_replace_op(&unmap, (unsigned long)addr, | |
9867 | + (unsigned long)new_addr, ref); | |
9868 | + | |
9869 | + err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, | |
9870 | + &unmap, 1); | |
9871 | + BUG_ON(err); | |
9872 | + BUG_ON(unmap.status); | |
9873 | + | |
9874 | + write_sequnlock(&gnttab_dma_lock); | |
9875 | + | |
9876 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
9877 | + set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY); | |
9878 | + | |
9879 | + mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; | |
9880 | + mmu.val = pfn; | |
9881 | + err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF); | |
9882 | + BUG_ON(err); | |
9883 | + } | |
9884 | + | |
9885 | + new_page->mapping = page->mapping; | |
9886 | + new_page->index = page->index; | |
9887 | + set_bit(PG_foreign, &new_page->flags); | |
9888 | + *pagep = new_page; | |
9889 | + | |
9890 | + SetPageForeign(page, gnttab_page_free); | |
9891 | + page->mapping = NULL; | |
9892 | + | |
9893 | +out: | |
9894 | + put_page(page); | |
9895 | + return err; | |
9896 | +} | |
9897 | +EXPORT_SYMBOL_GPL(gnttab_copy_grant_page); | |
9898 | + | |
9899 | +void gnttab_reset_grant_page(struct page *page) | |
9900 | +{ | |
9901 | + init_page_count(page); | |
9902 | + reset_page_mapcount(page); | |
9903 | +} | |
9904 | +EXPORT_SYMBOL_GPL(gnttab_reset_grant_page); | |
9905 | + | |
9906 | +/* | |
9907 | + * Keep track of foreign pages marked as PageForeign so that we don't | |
9908 | + * return them to the remote domain prematurely. | |
9909 | + * | |
9910 | + * PageForeign pages are pinned down by increasing their mapcount. | |
9911 | + * | |
9912 | + * All other pages are simply returned as is. | |
9913 | + */ | |
9914 | +void __gnttab_dma_map_page(struct page *page) | |
9915 | +{ | |
9916 | + unsigned int seq; | |
9917 | + | |
9918 | + if (!is_running_on_xen() || !PageForeign(page)) | |
9919 | + return; | |
9920 | + | |
9921 | + do { | |
9922 | + seq = read_seqbegin(&gnttab_dma_lock); | |
9923 | + | |
9924 | + if (gnttab_dma_local_pfn(page)) | |
9925 | + break; | |
9926 | + | |
9927 | + atomic_set(&page->_mapcount, 0); | |
9928 | + | |
9929 | + /* Make _mapcount visible before read_seqretry. */ | |
9930 | + smp_mb(); | |
9931 | + } while (unlikely(read_seqretry(&gnttab_dma_lock, seq))); | |
9932 | +} | |
9933 | + | |
9934 | +int gnttab_resume(void) | |
9935 | +{ | |
9936 | + if (max_nr_grant_frames() < nr_grant_frames) | |
9937 | + return -ENOSYS; | |
9938 | + return gnttab_map(0, nr_grant_frames - 1); | |
9939 | +} | |
9940 | + | |
9941 | +int gnttab_suspend(void) | |
9942 | +{ | |
9943 | +#ifdef CONFIG_X86 | |
9944 | + apply_to_page_range(&init_mm, (unsigned long)shared, | |
9945 | + PAGE_SIZE * nr_grant_frames, | |
9946 | + unmap_pte_fn, NULL); | |
9947 | +#endif | |
9948 | + return 0; | |
9949 | +} | |
9950 | + | |
9951 | +#else /* !CONFIG_XEN */ | |
9952 | + | |
9953 | +#include <platform-pci.h> | |
9954 | + | |
9955 | +static unsigned long resume_frames; | |
9956 | + | |
9957 | +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | |
9958 | +{ | |
9959 | + struct xen_add_to_physmap xatp; | |
9960 | + unsigned int i = end_idx; | |
9961 | + | |
9962 | + /* Loop backwards, so that the first hypercall has the largest index, | |
9963 | + * ensuring that the table will grow only once. | |
9964 | + */ | |
9965 | + do { | |
9966 | + xatp.domid = DOMID_SELF; | |
9967 | + xatp.idx = i; | |
9968 | + xatp.space = XENMAPSPACE_grant_table; | |
9969 | + xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; | |
9970 | + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | |
9971 | + BUG(); | |
9972 | + } while (i-- > start_idx); | |
9973 | + | |
9974 | + return 0; | |
9975 | +} | |
9976 | + | |
9977 | +int gnttab_resume(void) | |
9978 | +{ | |
9979 | + unsigned int max_nr_gframes, nr_gframes; | |
9980 | + | |
9981 | + nr_gframes = nr_grant_frames; | |
9982 | + max_nr_gframes = max_nr_grant_frames(); | |
9983 | + if (max_nr_gframes < nr_gframes) | |
9984 | + return -ENOSYS; | |
9985 | + | |
9986 | + if (!resume_frames) { | |
9987 | + resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); | |
9988 | + shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes); | |
9989 | + if (shared == NULL) { | |
9990 | + printk("error to ioremap gnttab share frames\n"); | |
9991 | + return -1; | |
9992 | + } | |
9993 | + } | |
9994 | + | |
9995 | + gnttab_map(0, nr_gframes - 1); | |
9996 | + | |
9997 | + return 0; | |
9998 | +} | |
9999 | + | |
10000 | +#endif /* !CONFIG_XEN */ | |
10001 | + | |
10002 | +static int gnttab_expand(unsigned int req_entries) | |
10003 | +{ | |
10004 | + int rc; | |
10005 | + unsigned int cur, extra; | |
10006 | + | |
10007 | + cur = nr_grant_frames; | |
10008 | + extra = ((req_entries + (ENTRIES_PER_GRANT_FRAME-1)) / | |
10009 | + ENTRIES_PER_GRANT_FRAME); | |
10010 | + if (cur + extra > max_nr_grant_frames()) | |
10011 | + return -ENOSPC; | |
10012 | + | |
10013 | + if ((rc = gnttab_map(cur, cur + extra - 1)) == 0) | |
10014 | + rc = grow_gnttab_list(extra); | |
10015 | + | |
10016 | + return rc; | |
10017 | +} | |
10018 | + | |
10019 | +int __devinit gnttab_init(void) | |
10020 | +{ | |
10021 | + int i; | |
10022 | + unsigned int max_nr_glist_frames, nr_glist_frames; | |
10023 | + unsigned int nr_init_grefs; | |
10024 | + | |
10025 | + if (!is_running_on_xen()) | |
10026 | + return -ENODEV; | |
10027 | + | |
10028 | + nr_grant_frames = 1; | |
10029 | + boot_max_nr_grant_frames = __max_nr_grant_frames(); | |
10030 | + | |
10031 | + /* Determine the maximum number of frames required for the | |
10032 | + * grant reference free list on the current hypervisor. | |
10033 | + */ | |
10034 | + max_nr_glist_frames = nr_freelist_frames(boot_max_nr_grant_frames); | |
10035 | + | |
10036 | + gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), | |
10037 | + GFP_KERNEL); | |
10038 | + if (gnttab_list == NULL) | |
10039 | + return -ENOMEM; | |
10040 | + | |
10041 | + nr_glist_frames = nr_freelist_frames(nr_grant_frames); | |
10042 | + for (i = 0; i < nr_glist_frames; i++) { | |
10043 | + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); | |
10044 | + if (gnttab_list[i] == NULL) | |
10045 | + goto ini_nomem; | |
10046 | + } | |
10047 | + | |
10048 | + if (gnttab_resume() < 0) | |
10049 | + return -ENODEV; | |
10050 | + | |
10051 | + nr_init_grefs = nr_grant_frames * ENTRIES_PER_GRANT_FRAME; | |
10052 | + | |
10053 | + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) | |
10054 | + gnttab_entry(i) = i + 1; | |
10055 | + | |
10056 | + gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; | |
10057 | + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; | |
10058 | + gnttab_free_head = NR_RESERVED_ENTRIES; | |
10059 | + | |
10060 | + return 0; | |
10061 | + | |
10062 | + ini_nomem: | |
10063 | + for (i--; i >= 0; i--) | |
10064 | + free_page((unsigned long)gnttab_list[i]); | |
10065 | + kfree(gnttab_list); | |
10066 | + return -ENOMEM; | |
10067 | +} | |
10068 | + | |
10069 | +#ifdef CONFIG_XEN | |
10070 | +core_initcall(gnttab_init); | |
10071 | +#endif | |
10072 | Index: head-2008-11-25/drivers/xen/core/hypervisor_sysfs.c | |
10073 | =================================================================== | |
10074 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
10075 | +++ head-2008-11-25/drivers/xen/core/hypervisor_sysfs.c 2007-07-10 09:42:30.000000000 +0200 | |
10076 | @@ -0,0 +1,57 @@ | |
10077 | +/* | |
10078 | + * copyright (c) 2006 IBM Corporation | |
10079 | + * Authored by: Mike D. Day <ncmike@us.ibm.com> | |
10080 | + * | |
10081 | + * This program is free software; you can redistribute it and/or modify | |
10082 | + * it under the terms of the GNU General Public License version 2 as | |
10083 | + * published by the Free Software Foundation. | |
10084 | + */ | |
10085 | + | |
10086 | +#include <linux/kernel.h> | |
10087 | +#include <linux/module.h> | |
10088 | +#include <linux/kobject.h> | |
10089 | +#include <xen/hypervisor_sysfs.h> | |
10090 | +#include <asm/hypervisor.h> | |
10091 | + | |
10092 | +static ssize_t hyp_sysfs_show(struct kobject *kobj, | |
10093 | + struct attribute *attr, | |
10094 | + char *buffer) | |
10095 | +{ | |
10096 | + struct hyp_sysfs_attr *hyp_attr; | |
10097 | + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); | |
10098 | + if (hyp_attr->show) | |
10099 | + return hyp_attr->show(hyp_attr, buffer); | |
10100 | + return 0; | |
10101 | +} | |
10102 | + | |
10103 | +static ssize_t hyp_sysfs_store(struct kobject *kobj, | |
10104 | + struct attribute *attr, | |
10105 | + const char *buffer, | |
10106 | + size_t len) | |
10107 | +{ | |
10108 | + struct hyp_sysfs_attr *hyp_attr; | |
10109 | + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); | |
10110 | + if (hyp_attr->store) | |
10111 | + return hyp_attr->store(hyp_attr, buffer, len); | |
10112 | + return 0; | |
10113 | +} | |
10114 | + | |
10115 | +static struct sysfs_ops hyp_sysfs_ops = { | |
10116 | + .show = hyp_sysfs_show, | |
10117 | + .store = hyp_sysfs_store, | |
10118 | +}; | |
10119 | + | |
10120 | +static struct kobj_type hyp_sysfs_kobj_type = { | |
10121 | + .sysfs_ops = &hyp_sysfs_ops, | |
10122 | +}; | |
10123 | + | |
10124 | +static int __init hypervisor_subsys_init(void) | |
10125 | +{ | |
10126 | + if (!is_running_on_xen()) | |
10127 | + return -ENODEV; | |
10128 | + | |
10129 | + hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type; | |
10130 | + return 0; | |
10131 | +} | |
10132 | + | |
10133 | +device_initcall(hypervisor_subsys_init); | |
10134 | Index: head-2008-11-25/drivers/xen/core/machine_kexec.c | |
10135 | =================================================================== | |
10136 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
10137 | +++ head-2008-11-25/drivers/xen/core/machine_kexec.c 2008-10-13 13:43:45.000000000 +0200 | |
10138 | @@ -0,0 +1,222 @@ | |
10139 | +/* | |
10140 | + * drivers/xen/core/machine_kexec.c | |
10141 | + * handle transition of Linux booting another kernel | |
10142 | + */ | |
10143 | + | |
10144 | +#include <linux/kexec.h> | |
10145 | +#include <xen/interface/kexec.h> | |
10146 | +#include <linux/mm.h> | |
10147 | +#include <linux/bootmem.h> | |
10148 | + | |
10149 | +extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, | |
10150 | + struct kimage *image); | |
10151 | +extern int machine_kexec_setup_resources(struct resource *hypervisor, | |
10152 | + struct resource *phys_cpus, | |
10153 | + int nr_phys_cpus); | |
10154 | +extern void machine_kexec_register_resources(struct resource *res); | |
10155 | + | |
10156 | +static int __initdata xen_max_nr_phys_cpus; | |
10157 | +static struct resource xen_hypervisor_res; | |
10158 | +static struct resource *xen_phys_cpus; | |
10159 | + | |
10160 | +size_t vmcoreinfo_size_xen; | |
10161 | +unsigned long paddr_vmcoreinfo_xen; | |
10162 | + | |
10163 | +void __init xen_machine_kexec_setup_resources(void) | |
10164 | +{ | |
10165 | + xen_kexec_range_t range; | |
10166 | + struct resource *res; | |
10167 | + int k = 0; | |
10168 | + int rc; | |
10169 | + | |
10170 | + if (!is_initial_xendomain()) | |
10171 | + return; | |
10172 | + | |
10173 | + /* determine maximum number of physical cpus */ | |
10174 | + | |
10175 | + while (1) { | |
10176 | + memset(&range, 0, sizeof(range)); | |
10177 | + range.range = KEXEC_RANGE_MA_CPU; | |
10178 | + range.nr = k; | |
10179 | + | |
10180 | + if(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) | |
10181 | + break; | |
10182 | + | |
10183 | + k++; | |
10184 | + } | |
10185 | + | |
10186 | + if (k == 0) | |
10187 | + return; | |
10188 | + | |
10189 | + xen_max_nr_phys_cpus = k; | |
10190 | + | |
10191 | + /* allocate xen_phys_cpus */ | |
10192 | + | |
10193 | + xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource)); | |
10194 | + BUG_ON(xen_phys_cpus == NULL); | |
10195 | + | |
10196 | + /* fill in xen_phys_cpus with per-cpu crash note information */ | |
10197 | + | |
10198 | + for (k = 0; k < xen_max_nr_phys_cpus; k++) { | |
10199 | + memset(&range, 0, sizeof(range)); | |
10200 | + range.range = KEXEC_RANGE_MA_CPU; | |
10201 | + range.nr = k; | |
10202 | + | |
10203 | + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) | |
10204 | + goto err; | |
10205 | + | |
10206 | + res = xen_phys_cpus + k; | |
10207 | + | |
10208 | + memset(res, 0, sizeof(*res)); | |
10209 | + res->name = "Crash note"; | |
10210 | + res->start = range.start; | |
10211 | + res->end = range.start + range.size - 1; | |
10212 | + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; | |
10213 | + } | |
10214 | + | |
10215 | + /* fill in xen_hypervisor_res with hypervisor machine address range */ | |
10216 | + | |
10217 | + memset(&range, 0, sizeof(range)); | |
10218 | + range.range = KEXEC_RANGE_MA_XEN; | |
10219 | + | |
10220 | + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) | |
10221 | + goto err; | |
10222 | + | |
10223 | + xen_hypervisor_res.name = "Hypervisor code and data"; | |
10224 | + xen_hypervisor_res.start = range.start; | |
10225 | + xen_hypervisor_res.end = range.start + range.size - 1; | |
10226 | + xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM; | |
10227 | + | |
10228 | + /* fill in crashk_res if range is reserved by hypervisor */ | |
10229 | + | |
10230 | + memset(&range, 0, sizeof(range)); | |
10231 | + range.range = KEXEC_RANGE_MA_CRASH; | |
10232 | + | |
10233 | + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) | |
10234 | + goto err; | |
10235 | + | |
10236 | + if (range.size) { | |
10237 | + crashk_res.start = range.start; | |
10238 | + crashk_res.end = range.start + range.size - 1; | |
10239 | + } | |
10240 | + | |
10241 | + /* get physical address of vmcoreinfo */ | |
10242 | + memset(&range, 0, sizeof(range)); | |
10243 | + range.range = KEXEC_RANGE_MA_VMCOREINFO; | |
10244 | + | |
10245 | + rc = HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range); | |
10246 | + | |
10247 | + if (rc == 0) { | |
10248 | + /* Hypercall succeeded */ | |
10249 | + vmcoreinfo_size_xen = range.size; | |
10250 | + paddr_vmcoreinfo_xen = range.start; | |
10251 | + | |
10252 | + } else { | |
10253 | + /* Hypercall failed. | |
10254 | + * Indicate not to create sysfs file by resetting globals | |
10255 | + */ | |
10256 | + vmcoreinfo_size_xen = 0; | |
10257 | + paddr_vmcoreinfo_xen = 0; | |
10258 | + | |
10259 | + /* The KEXEC_CMD_kexec_get_range hypercall did not implement | |
10260 | + * KEXEC_RANGE_MA_VMCOREINFO until Xen 3.3. | |
10261 | + * Do not bail out if it fails for this reason. | |
10262 | + */ | |
10263 | + if (rc != -EINVAL) | |
10264 | + return; | |
10265 | + } | |
10266 | + | |
10267 | + if (machine_kexec_setup_resources(&xen_hypervisor_res, xen_phys_cpus, | |
10268 | + xen_max_nr_phys_cpus)) | |
10269 | + goto err; | |
10270 | + | |
10271 | + return; | |
10272 | + | |
10273 | + err: | |
10274 | + /* | |
10275 | + * It isn't possible to free xen_phys_cpus this early in the | |
10276 | + * boot. Failure at this stage is unexpected and the amount of | |
10277 | + * memory is small therefore we tolerate the potential leak. | |
10278 | + */ | |
10279 | + xen_max_nr_phys_cpus = 0; | |
10280 | + return; | |
10281 | +} | |
10282 | + | |
10283 | +void __init xen_machine_kexec_register_resources(struct resource *res) | |
10284 | +{ | |
10285 | + request_resource(res, &xen_hypervisor_res); | |
10286 | + machine_kexec_register_resources(res); | |
10287 | +} | |
10288 | + | |
10289 | +static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) | |
10290 | +{ | |
10291 | + machine_kexec_setup_load_arg(xki, image); | |
10292 | + | |
10293 | + xki->indirection_page = image->head; | |
10294 | + xki->start_address = image->start; | |
10295 | +} | |
10296 | + | |
10297 | +/* | |
10298 | + * Load the image into xen so xen can kdump itself | |
10299 | + * This might have been done in prepare, but prepare | |
10300 | + * is currently called too early. It might make sense | |
10301 | + * to move prepare, but for now, just add an extra hook. | |
10302 | + */ | |
10303 | +int xen_machine_kexec_load(struct kimage *image) | |
10304 | +{ | |
10305 | + xen_kexec_load_t xkl; | |
10306 | + | |
10307 | + memset(&xkl, 0, sizeof(xkl)); | |
10308 | + xkl.type = image->type; | |
10309 | + setup_load_arg(&xkl.image, image); | |
10310 | + return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl); | |
10311 | +} | |
10312 | + | |
10313 | +/* | |
10314 | + * Unload the image that was stored by machine_kexec_load() | |
10315 | + * This might have been done in machine_kexec_cleanup() but it | |
10316 | + * is called too late, and its possible xen could try and kdump | |
10317 | + * using resources that have been freed. | |
10318 | + */ | |
10319 | +void xen_machine_kexec_unload(struct kimage *image) | |
10320 | +{ | |
10321 | + xen_kexec_load_t xkl; | |
10322 | + | |
10323 | + memset(&xkl, 0, sizeof(xkl)); | |
10324 | + xkl.type = image->type; | |
10325 | + WARN_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl)); | |
10326 | +} | |
10327 | + | |
10328 | +/* | |
10329 | + * Do not allocate memory (or fail in any way) in machine_kexec(). | |
10330 | + * We are past the point of no return, committed to rebooting now. | |
10331 | + * | |
10332 | + * This has the hypervisor move to the prefered reboot CPU, | |
10333 | + * stop all CPUs and kexec. That is it combines machine_shutdown() | |
10334 | + * and machine_kexec() in Linux kexec terms. | |
10335 | + */ | |
10336 | +NORET_TYPE void machine_kexec(struct kimage *image) | |
10337 | +{ | |
10338 | + xen_kexec_exec_t xke; | |
10339 | + | |
10340 | + memset(&xke, 0, sizeof(xke)); | |
10341 | + xke.type = image->type; | |
10342 | + VOID(HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke)); | |
10343 | + panic("KEXEC_CMD_kexec hypercall should not return\n"); | |
10344 | +} | |
10345 | + | |
10346 | +void machine_shutdown(void) | |
10347 | +{ | |
10348 | + /* do nothing */ | |
10349 | +} | |
10350 | + | |
10351 | + | |
10352 | +/* | |
10353 | + * Local variables: | |
10354 | + * c-file-style: "linux" | |
10355 | + * indent-tabs-mode: t | |
10356 | + * c-indent-level: 8 | |
10357 | + * c-basic-offset: 8 | |
10358 | + * tab-width: 8 | |
10359 | + * End: | |
10360 | + */ | |
10361 | Index: head-2008-11-25/drivers/xen/core/machine_reboot.c | |
10362 | =================================================================== | |
10363 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
10364 | +++ head-2008-11-25/drivers/xen/core/machine_reboot.c 2008-09-01 12:07:31.000000000 +0200 | |
10365 | @@ -0,0 +1,247 @@ | |
10366 | +#include <linux/version.h> | |
10367 | +#include <linux/kernel.h> | |
10368 | +#include <linux/mm.h> | |
10369 | +#include <linux/unistd.h> | |
10370 | +#include <linux/module.h> | |
10371 | +#include <linux/reboot.h> | |
10372 | +#include <linux/sysrq.h> | |
10373 | +#include <linux/stringify.h> | |
10374 | +#include <linux/stop_machine.h> | |
10375 | +#include <asm/irq.h> | |
10376 | +#include <asm/mmu_context.h> | |
10377 | +#include <xen/evtchn.h> | |
10378 | +#include <asm/hypervisor.h> | |
10379 | +#include <xen/xenbus.h> | |
10380 | +#include <linux/cpu.h> | |
10381 | +#include <xen/gnttab.h> | |
10382 | +#include <xen/xencons.h> | |
10383 | +#include <xen/cpu_hotplug.h> | |
10384 | +#include <xen/interface/vcpu.h> | |
10385 | + | |
10386 | +#if defined(__i386__) || defined(__x86_64__) | |
10387 | + | |
10388 | +/* | |
10389 | + * Power off function, if any | |
10390 | + */ | |
10391 | +void (*pm_power_off)(void); | |
10392 | +EXPORT_SYMBOL(pm_power_off); | |
10393 | + | |
10394 | +void machine_emergency_restart(void) | |
10395 | +{ | |
10396 | + /* We really want to get pending console data out before we die. */ | |
10397 | + xencons_force_flush(); | |
10398 | + HYPERVISOR_shutdown(SHUTDOWN_reboot); | |
10399 | +} | |
10400 | + | |
10401 | +void machine_restart(char * __unused) | |
10402 | +{ | |
10403 | + machine_emergency_restart(); | |
10404 | +} | |
10405 | + | |
10406 | +void machine_halt(void) | |
10407 | +{ | |
10408 | + machine_power_off(); | |
10409 | +} | |
10410 | + | |
10411 | +void machine_power_off(void) | |
10412 | +{ | |
10413 | + /* We really want to get pending console data out before we die. */ | |
10414 | + xencons_force_flush(); | |
10415 | + if (pm_power_off) | |
10416 | + pm_power_off(); | |
10417 | + HYPERVISOR_shutdown(SHUTDOWN_poweroff); | |
10418 | +} | |
10419 | + | |
10420 | +int reboot_thru_bios = 0; /* for dmi_scan.c */ | |
10421 | +EXPORT_SYMBOL(machine_restart); | |
10422 | +EXPORT_SYMBOL(machine_halt); | |
10423 | +EXPORT_SYMBOL(machine_power_off); | |
10424 | + | |
10425 | +static void pre_suspend(void) | |
10426 | +{ | |
10427 | + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; | |
10428 | + WARN_ON(HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO), | |
10429 | + __pte_ma(0), 0)); | |
10430 | + | |
10431 | + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); | |
10432 | + xen_start_info->console.domU.mfn = | |
10433 | + mfn_to_pfn(xen_start_info->console.domU.mfn); | |
10434 | +} | |
10435 | + | |
10436 | +static void post_suspend(int suspend_cancelled) | |
10437 | +{ | |
10438 | + int i, j, k, fpp; | |
10439 | + unsigned long shinfo_mfn; | |
10440 | + extern unsigned long max_pfn; | |
10441 | + extern unsigned long *pfn_to_mfn_frame_list_list; | |
10442 | + extern unsigned long *pfn_to_mfn_frame_list[]; | |
10443 | + | |
10444 | + if (suspend_cancelled) { | |
10445 | + xen_start_info->store_mfn = | |
10446 | + pfn_to_mfn(xen_start_info->store_mfn); | |
10447 | + xen_start_info->console.domU.mfn = | |
10448 | + pfn_to_mfn(xen_start_info->console.domU.mfn); | |
10449 | + } else { | |
10450 | +#ifdef CONFIG_SMP | |
10451 | + cpu_initialized_map = cpu_online_map; | |
10452 | +#endif | |
10453 | + } | |
10454 | + | |
10455 | + shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT; | |
10456 | + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO), | |
10457 | + pfn_pte_ma(shinfo_mfn, PAGE_KERNEL), | |
10458 | + 0)) | |
10459 | + BUG(); | |
10460 | + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); | |
10461 | + | |
10462 | + memset(empty_zero_page, 0, PAGE_SIZE); | |
10463 | + | |
10464 | + fpp = PAGE_SIZE/sizeof(unsigned long); | |
10465 | + for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) { | |
10466 | + if ((j % fpp) == 0) { | |
10467 | + k++; | |
10468 | + pfn_to_mfn_frame_list_list[k] = | |
10469 | + virt_to_mfn(pfn_to_mfn_frame_list[k]); | |
10470 | + j = 0; | |
10471 | + } | |
10472 | + pfn_to_mfn_frame_list[k][j] = | |
10473 | + virt_to_mfn(&phys_to_machine_mapping[i]); | |
10474 | + } | |
10475 | + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; | |
10476 | + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
10477 | + virt_to_mfn(pfn_to_mfn_frame_list_list); | |
10478 | +} | |
10479 | + | |
10480 | +#else /* !(defined(__i386__) || defined(__x86_64__)) */ | |
10481 | + | |
10482 | +#ifndef HAVE_XEN_PRE_SUSPEND | |
10483 | +#define xen_pre_suspend() ((void)0) | |
10484 | +#endif | |
10485 | + | |
10486 | +#ifndef HAVE_XEN_POST_SUSPEND | |
10487 | +#define xen_post_suspend(x) ((void)0) | |
10488 | +#endif | |
10489 | + | |
10490 | +#define switch_idle_mm() ((void)0) | |
10491 | +#define mm_pin_all() ((void)0) | |
10492 | +#define pre_suspend() xen_pre_suspend() | |
10493 | +#define post_suspend(x) xen_post_suspend(x) | |
10494 | + | |
10495 | +#endif | |
10496 | + | |
10497 | +struct suspend { | |
10498 | + int fast_suspend; | |
10499 | + void (*resume_notifier)(int); | |
10500 | +}; | |
10501 | + | |
10502 | +static int take_machine_down(void *_suspend) | |
10503 | +{ | |
10504 | + struct suspend *suspend = _suspend; | |
10505 | + int suspend_cancelled, err; | |
10506 | + extern void time_resume(void); | |
10507 | + | |
10508 | + if (suspend->fast_suspend) { | |
10509 | + BUG_ON(!irqs_disabled()); | |
10510 | + } else { | |
10511 | + BUG_ON(irqs_disabled()); | |
10512 | + | |
10513 | + for (;;) { | |
10514 | + err = smp_suspend(); | |
10515 | + if (err) | |
10516 | + return err; | |
10517 | + | |
10518 | + xenbus_suspend(); | |
10519 | + preempt_disable(); | |
10520 | + | |
10521 | + if (num_online_cpus() == 1) | |
10522 | + break; | |
10523 | + | |
10524 | + preempt_enable(); | |
10525 | + xenbus_suspend_cancel(); | |
10526 | + } | |
10527 | + | |
10528 | + local_irq_disable(); | |
10529 | + } | |
10530 | + | |
10531 | + mm_pin_all(); | |
10532 | + gnttab_suspend(); | |
10533 | + pre_suspend(); | |
10534 | + | |
10535 | + /* | |
10536 | + * This hypercall returns 1 if suspend was cancelled or the domain was | |
10537 | + * merely checkpointed, and 0 if it is resuming in a new domain. | |
10538 | + */ | |
10539 | + suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); | |
10540 | + | |
10541 | + suspend->resume_notifier(suspend_cancelled); | |
10542 | + post_suspend(suspend_cancelled); | |
10543 | + gnttab_resume(); | |
10544 | + if (!suspend_cancelled) { | |
10545 | + irq_resume(); | |
10546 | +#ifdef __x86_64__ | |
10547 | + /* | |
10548 | + * Older versions of Xen do not save/restore the user %cr3. | |
10549 | + * We do it here just in case, but there's no need if we are | |
10550 | + * in fast-suspend mode as that implies a new enough Xen. | |
10551 | + */ | |
10552 | + if (!suspend->fast_suspend) | |
10553 | + xen_new_user_pt(__pa(__user_pgd( | |
10554 | + current->active_mm->pgd))); | |
10555 | +#endif | |
10556 | + } | |
10557 | + time_resume(); | |
10558 | + | |
10559 | + if (!suspend->fast_suspend) | |
10560 | + local_irq_enable(); | |
10561 | + | |
10562 | + return suspend_cancelled; | |
10563 | +} | |
10564 | + | |
10565 | +int __xen_suspend(int fast_suspend, void (*resume_notifier)(int)) | |
10566 | +{ | |
10567 | + int err, suspend_cancelled; | |
10568 | + struct suspend suspend; | |
10569 | + | |
10570 | + BUG_ON(smp_processor_id() != 0); | |
10571 | + BUG_ON(in_interrupt()); | |
10572 | + | |
10573 | +#if defined(__i386__) || defined(__x86_64__) | |
10574 | + if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
10575 | + printk(KERN_WARNING "Cannot suspend in " | |
10576 | + "auto_translated_physmap mode.\n"); | |
10577 | + return -EOPNOTSUPP; | |
10578 | + } | |
10579 | +#endif | |
10580 | + | |
10581 | + /* If we are definitely UP then 'slow mode' is actually faster. */ | |
10582 | + if (num_possible_cpus() == 1) | |
10583 | + fast_suspend = 0; | |
10584 | + | |
10585 | + suspend.fast_suspend = fast_suspend; | |
10586 | + suspend.resume_notifier = resume_notifier; | |
10587 | + | |
10588 | + if (fast_suspend) { | |
10589 | + xenbus_suspend(); | |
10590 | + err = stop_machine_run(take_machine_down, &suspend, 0); | |
10591 | + if (err < 0) | |
10592 | + xenbus_suspend_cancel(); | |
10593 | + } else { | |
10594 | + err = take_machine_down(&suspend); | |
10595 | + } | |
10596 | + | |
10597 | + if (err < 0) | |
10598 | + return err; | |
10599 | + | |
10600 | + suspend_cancelled = err; | |
10601 | + if (!suspend_cancelled) { | |
10602 | + xencons_resume(); | |
10603 | + xenbus_resume(); | |
10604 | + } else { | |
10605 | + xenbus_suspend_cancel(); | |
10606 | + } | |
10607 | + | |
10608 | + if (!fast_suspend) | |
10609 | + smp_resume(); | |
10610 | + | |
10611 | + return 0; | |
10612 | +} | |
10613 | Index: head-2008-11-25/drivers/xen/core/pci.c | |
10614 | =================================================================== | |
10615 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
10616 | +++ head-2008-11-25/drivers/xen/core/pci.c 2008-11-10 11:44:21.000000000 +0100 | |
10617 | @@ -0,0 +1,59 @@ | |
10618 | +/* | |
10619 | + * vim:shiftwidth=8:noexpandtab | |
10620 | + */ | |
10621 | + | |
10622 | +#include <linux/kernel.h> | |
10623 | +#include <linux/init.h> | |
10624 | +#include <linux/pci.h> | |
10625 | +#include <xen/interface/physdev.h> | |
10626 | + | |
10627 | +static int (*pci_bus_probe)(struct device *dev); | |
10628 | +static int (*pci_bus_remove)(struct device *dev); | |
10629 | + | |
10630 | +static int pci_bus_probe_wrapper(struct device *dev) | |
10631 | +{ | |
10632 | + int r; | |
10633 | + struct pci_dev *pci_dev = to_pci_dev(dev); | |
10634 | + struct physdev_manage_pci manage_pci; | |
10635 | + manage_pci.bus = pci_dev->bus->number; | |
10636 | + manage_pci.devfn = pci_dev->devfn; | |
10637 | + | |
10638 | + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); | |
10639 | + if (r && r != -ENOSYS) | |
10640 | + return r; | |
10641 | + | |
10642 | + r = pci_bus_probe(dev); | |
10643 | + return r; | |
10644 | +} | |
10645 | + | |
10646 | +static int pci_bus_remove_wrapper(struct device *dev) | |
10647 | +{ | |
10648 | + int r; | |
10649 | + struct pci_dev *pci_dev = to_pci_dev(dev); | |
10650 | + struct physdev_manage_pci manage_pci; | |
10651 | + manage_pci.bus = pci_dev->bus->number; | |
10652 | + manage_pci.devfn = pci_dev->devfn; | |
10653 | + | |
10654 | + r = pci_bus_remove(dev); | |
10655 | + /* dev and pci_dev are no longer valid!! */ | |
10656 | + | |
10657 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, | |
10658 | + &manage_pci)); | |
10659 | + return r; | |
10660 | +} | |
10661 | + | |
10662 | +static int __init hook_pci_bus(void) | |
10663 | +{ | |
10664 | + if (!is_running_on_xen() || !is_initial_xendomain()) | |
10665 | + return 0; | |
10666 | + | |
10667 | + pci_bus_probe = pci_bus_type.probe; | |
10668 | + pci_bus_type.probe = pci_bus_probe_wrapper; | |
10669 | + | |
10670 | + pci_bus_remove = pci_bus_type.remove; | |
10671 | + pci_bus_type.remove = pci_bus_remove_wrapper; | |
10672 | + | |
10673 | + return 0; | |
10674 | +} | |
10675 | + | |
10676 | +core_initcall(hook_pci_bus); | |
10677 | Index: head-2008-11-25/drivers/xen/core/reboot.c | |
10678 | =================================================================== | |
10679 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
10680 | +++ head-2008-11-25/drivers/xen/core/reboot.c 2008-08-07 12:44:36.000000000 +0200 | |
10681 | @@ -0,0 +1,335 @@ | |
10682 | +#define __KERNEL_SYSCALLS__ | |
10683 | +#include <linux/version.h> | |
10684 | +#include <linux/kernel.h> | |
10685 | +#include <linux/unistd.h> | |
10686 | +#include <linux/module.h> | |
10687 | +#include <linux/reboot.h> | |
10688 | +#include <linux/sysrq.h> | |
10689 | +#include <asm/hypervisor.h> | |
10690 | +#include <xen/xenbus.h> | |
10691 | +#include <xen/evtchn.h> | |
10692 | +#include <linux/kmod.h> | |
10693 | +#include <linux/slab.h> | |
10694 | +#include <linux/workqueue.h> | |
10695 | + | |
10696 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
10697 | +#include <xen/platform-compat.h> | |
10698 | +#endif | |
10699 | + | |
10700 | +MODULE_LICENSE("Dual BSD/GPL"); | |
10701 | + | |
10702 | +#define SHUTDOWN_INVALID -1 | |
10703 | +#define SHUTDOWN_POWEROFF 0 | |
10704 | +#define SHUTDOWN_SUSPEND 2 | |
10705 | +#define SHUTDOWN_RESUMING 3 | |
10706 | +#define SHUTDOWN_HALT 4 | |
10707 | + | |
10708 | +/* Ignore multiple shutdown requests. */ | |
10709 | +static int shutting_down = SHUTDOWN_INVALID; | |
10710 | + | |
10711 | +/* Was last suspend request cancelled? */ | |
10712 | +static int suspend_cancelled; | |
10713 | + | |
10714 | +/* Can we leave APs online when we suspend? */ | |
10715 | +static int fast_suspend; | |
10716 | + | |
10717 | +static void __shutdown_handler(void *unused); | |
10718 | +static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); | |
10719 | + | |
10720 | +static int setup_suspend_evtchn(void); | |
10721 | + | |
10722 | +int __xen_suspend(int fast_suspend, void (*resume_notifier)(int)); | |
10723 | + | |
10724 | +static int shutdown_process(void *__unused) | |
10725 | +{ | |
10726 | + static char *envp[] = { "HOME=/", "TERM=linux", | |
10727 | + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; | |
10728 | + static char *poweroff_argv[] = { "/sbin/poweroff", NULL }; | |
10729 | + | |
10730 | + extern asmlinkage long sys_reboot(int magic1, int magic2, | |
10731 | + unsigned int cmd, void *arg); | |
10732 | + | |
10733 | + if ((shutting_down == SHUTDOWN_POWEROFF) || | |
10734 | + (shutting_down == SHUTDOWN_HALT)) { | |
10735 | + if (call_usermodehelper("/sbin/poweroff", poweroff_argv, | |
10736 | + envp, 0) < 0) { | |
10737 | +#ifdef CONFIG_XEN | |
10738 | + sys_reboot(LINUX_REBOOT_MAGIC1, | |
10739 | + LINUX_REBOOT_MAGIC2, | |
10740 | + LINUX_REBOOT_CMD_POWER_OFF, | |
10741 | + NULL); | |
10742 | +#endif /* CONFIG_XEN */ | |
10743 | + } | |
10744 | + } | |
10745 | + | |
10746 | + shutting_down = SHUTDOWN_INVALID; /* could try again */ | |
10747 | + | |
10748 | + return 0; | |
10749 | +} | |
10750 | + | |
10751 | +static void xen_resume_notifier(int _suspend_cancelled) | |
10752 | +{ | |
10753 | + int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING); | |
10754 | + BUG_ON(old_state != SHUTDOWN_SUSPEND); | |
10755 | + suspend_cancelled = _suspend_cancelled; | |
10756 | +} | |
10757 | + | |
10758 | +static int xen_suspend(void *__unused) | |
10759 | +{ | |
10760 | + int err, old_state; | |
10761 | + | |
10762 | + daemonize("suspend"); | |
10763 | + err = set_cpus_allowed(current, cpumask_of_cpu(0)); | |
10764 | + if (err) { | |
10765 | + printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err); | |
10766 | + goto fail; | |
10767 | + } | |
10768 | + | |
10769 | + do { | |
10770 | + err = __xen_suspend(fast_suspend, xen_resume_notifier); | |
10771 | + if (err) { | |
10772 | + printk(KERN_ERR "Xen suspend failed (%d)\n", err); | |
10773 | + goto fail; | |
10774 | + } | |
10775 | + if (!suspend_cancelled) | |
10776 | + setup_suspend_evtchn(); | |
10777 | + old_state = cmpxchg( | |
10778 | + &shutting_down, SHUTDOWN_RESUMING, SHUTDOWN_INVALID); | |
10779 | + } while (old_state == SHUTDOWN_SUSPEND); | |
10780 | + | |
10781 | + switch (old_state) { | |
10782 | + case SHUTDOWN_INVALID: | |
10783 | + case SHUTDOWN_SUSPEND: | |
10784 | + BUG(); | |
10785 | + case SHUTDOWN_RESUMING: | |
10786 | + break; | |
10787 | + default: | |
10788 | + schedule_work(&shutdown_work); | |
10789 | + break; | |
10790 | + } | |
10791 | + | |
10792 | + return 0; | |
10793 | + | |
10794 | + fail: | |
10795 | + old_state = xchg(&shutting_down, SHUTDOWN_INVALID); | |
10796 | + BUG_ON(old_state != SHUTDOWN_SUSPEND); | |
10797 | + return 0; | |
10798 | +} | |
10799 | + | |
10800 | +static void switch_shutdown_state(int new_state) | |
10801 | +{ | |
10802 | + int prev_state, old_state = SHUTDOWN_INVALID; | |
10803 | + | |
10804 | + /* We only drive shutdown_state into an active state. */ | |
10805 | + if (new_state == SHUTDOWN_INVALID) | |
10806 | + return; | |
10807 | + | |
10808 | + do { | |
10809 | + /* We drop this transition if already in an active state. */ | |
10810 | + if ((old_state != SHUTDOWN_INVALID) && | |
10811 | + (old_state != SHUTDOWN_RESUMING)) | |
10812 | + return; | |
10813 | + /* Attempt to transition. */ | |
10814 | + prev_state = old_state; | |
10815 | + old_state = cmpxchg(&shutting_down, old_state, new_state); | |
10816 | + } while (old_state != prev_state); | |
10817 | + | |
10818 | + /* Either we kick off the work, or we leave it to xen_suspend(). */ | |
10819 | + if (old_state == SHUTDOWN_INVALID) | |
10820 | + schedule_work(&shutdown_work); | |
10821 | + else | |
10822 | + BUG_ON(old_state != SHUTDOWN_RESUMING); | |
10823 | +} | |
10824 | + | |
10825 | +static void __shutdown_handler(void *unused) | |
10826 | +{ | |
10827 | + int err; | |
10828 | + | |
10829 | + err = kernel_thread((shutting_down == SHUTDOWN_SUSPEND) ? | |
10830 | + xen_suspend : shutdown_process, | |
10831 | + NULL, CLONE_FS | CLONE_FILES); | |
10832 | + | |
10833 | + if (err < 0) { | |
10834 | + printk(KERN_WARNING "Error creating shutdown process (%d): " | |
10835 | + "retrying...\n", -err); | |
10836 | + schedule_delayed_work(&shutdown_work, HZ/2); | |
10837 | + } | |
10838 | +} | |
10839 | + | |
10840 | +static void shutdown_handler(struct xenbus_watch *watch, | |
10841 | + const char **vec, unsigned int len) | |
10842 | +{ | |
10843 | + extern void ctrl_alt_del(void); | |
10844 | + char *str; | |
10845 | + struct xenbus_transaction xbt; | |
10846 | + int err, new_state = SHUTDOWN_INVALID; | |
10847 | + | |
10848 | + if ((shutting_down != SHUTDOWN_INVALID) && | |
10849 | + (shutting_down != SHUTDOWN_RESUMING)) | |
10850 | + return; | |
10851 | + | |
10852 | + again: | |
10853 | + err = xenbus_transaction_start(&xbt); | |
10854 | + if (err) | |
10855 | + return; | |
10856 | + | |
10857 | + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); | |
10858 | + /* Ignore read errors and empty reads. */ | |
10859 | + if (XENBUS_IS_ERR_READ(str)) { | |
10860 | + xenbus_transaction_end(xbt, 1); | |
10861 | + return; | |
10862 | + } | |
10863 | + | |
10864 | + xenbus_write(xbt, "control", "shutdown", ""); | |
10865 | + | |
10866 | + err = xenbus_transaction_end(xbt, 0); | |
10867 | + if (err == -EAGAIN) { | |
10868 | + kfree(str); | |
10869 | + goto again; | |
10870 | + } | |
10871 | + | |
10872 | + if (strcmp(str, "poweroff") == 0) | |
10873 | + new_state = SHUTDOWN_POWEROFF; | |
10874 | + else if (strcmp(str, "reboot") == 0) | |
10875 | + ctrl_alt_del(); | |
10876 | + else if (strcmp(str, "suspend") == 0) | |
10877 | + new_state = SHUTDOWN_SUSPEND; | |
10878 | + else if (strcmp(str, "halt") == 0) | |
10879 | + new_state = SHUTDOWN_HALT; | |
10880 | + else | |
10881 | + printk("Ignoring shutdown request: %s\n", str); | |
10882 | + | |
10883 | + switch_shutdown_state(new_state); | |
10884 | + | |
10885 | + kfree(str); | |
10886 | +} | |
10887 | + | |
10888 | +static void sysrq_handler(struct xenbus_watch *watch, const char **vec, | |
10889 | + unsigned int len) | |
10890 | +{ | |
10891 | + char sysrq_key = '\0'; | |
10892 | + struct xenbus_transaction xbt; | |
10893 | + int err; | |
10894 | + | |
10895 | + again: | |
10896 | + err = xenbus_transaction_start(&xbt); | |
10897 | + if (err) | |
10898 | + return; | |
10899 | + if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { | |
10900 | + printk(KERN_ERR "Unable to read sysrq code in " | |
10901 | + "control/sysrq\n"); | |
10902 | + xenbus_transaction_end(xbt, 1); | |
10903 | + return; | |
10904 | + } | |
10905 | + | |
10906 | + if (sysrq_key != '\0') | |
10907 | + xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); | |
10908 | + | |
10909 | + err = xenbus_transaction_end(xbt, 0); | |
10910 | + if (err == -EAGAIN) | |
10911 | + goto again; | |
10912 | + | |
10913 | +#ifdef CONFIG_MAGIC_SYSRQ | |
10914 | + if (sysrq_key != '\0') | |
10915 | + handle_sysrq(sysrq_key, NULL, NULL); | |
10916 | +#endif | |
10917 | +} | |
10918 | + | |
10919 | +static struct xenbus_watch shutdown_watch = { | |
10920 | + .node = "control/shutdown", | |
10921 | + .callback = shutdown_handler | |
10922 | +}; | |
10923 | + | |
10924 | +static struct xenbus_watch sysrq_watch = { | |
10925 | + .node = "control/sysrq", | |
10926 | + .callback = sysrq_handler | |
10927 | +}; | |
10928 | + | |
10929 | +static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs) | |
10930 | +{ | |
10931 | + switch_shutdown_state(SHUTDOWN_SUSPEND); | |
10932 | + return IRQ_HANDLED; | |
10933 | +} | |
10934 | + | |
10935 | +static int setup_suspend_evtchn(void) | |
10936 | +{ | |
10937 | + static int irq; | |
10938 | + int port; | |
10939 | + char portstr[16]; | |
10940 | + | |
10941 | + if (irq > 0) | |
10942 | + unbind_from_irqhandler(irq, NULL); | |
10943 | + | |
10944 | + irq = bind_listening_port_to_irqhandler(0, suspend_int, 0, "suspend", | |
10945 | + NULL); | |
10946 | + if (irq <= 0) | |
10947 | + return -1; | |
10948 | + | |
10949 | + port = irq_to_evtchn_port(irq); | |
10950 | + printk(KERN_INFO "suspend: event channel %d\n", port); | |
10951 | + sprintf(portstr, "%d", port); | |
10952 | + xenbus_write(XBT_NIL, "device/suspend", "event-channel", portstr); | |
10953 | + | |
10954 | + return 0; | |
10955 | +} | |
10956 | + | |
10957 | +static int setup_shutdown_watcher(void) | |
10958 | +{ | |
10959 | + int err; | |
10960 | + | |
10961 | + xenbus_scanf(XBT_NIL, "control", | |
10962 | + "platform-feature-multiprocessor-suspend", | |
10963 | + "%d", &fast_suspend); | |
10964 | + | |
10965 | + err = register_xenbus_watch(&shutdown_watch); | |
10966 | + if (err) { | |
10967 | + printk(KERN_ERR "Failed to set shutdown watcher\n"); | |
10968 | + return err; | |
10969 | + } | |
10970 | + | |
10971 | + err = register_xenbus_watch(&sysrq_watch); | |
10972 | + if (err) { | |
10973 | + printk(KERN_ERR "Failed to set sysrq watcher\n"); | |
10974 | + return err; | |
10975 | + } | |
10976 | + | |
10977 | + /* suspend event channel */ | |
10978 | + err = setup_suspend_evtchn(); | |
10979 | + if (err) { | |
10980 | + printk(KERN_ERR "Failed to register suspend event channel\n"); | |
10981 | + return err; | |
10982 | + } | |
10983 | + | |
10984 | + return 0; | |
10985 | +} | |
10986 | + | |
10987 | +#ifdef CONFIG_XEN | |
10988 | + | |
10989 | +static int shutdown_event(struct notifier_block *notifier, | |
10990 | + unsigned long event, | |
10991 | + void *data) | |
10992 | +{ | |
10993 | + setup_shutdown_watcher(); | |
10994 | + return NOTIFY_DONE; | |
10995 | +} | |
10996 | + | |
10997 | +static int __init setup_shutdown_event(void) | |
10998 | +{ | |
10999 | + static struct notifier_block xenstore_notifier = { | |
11000 | + .notifier_call = shutdown_event | |
11001 | + }; | |
11002 | + register_xenstore_notifier(&xenstore_notifier); | |
11003 | + | |
11004 | + return 0; | |
11005 | +} | |
11006 | + | |
11007 | +subsys_initcall(setup_shutdown_event); | |
11008 | + | |
11009 | +#else /* !defined(CONFIG_XEN) */ | |
11010 | + | |
11011 | +int xen_reboot_init(void) | |
11012 | +{ | |
11013 | + return setup_shutdown_watcher(); | |
11014 | +} | |
11015 | + | |
11016 | +#endif /* !defined(CONFIG_XEN) */ | |
11017 | Index: head-2008-11-25/drivers/xen/core/smpboot.c | |
11018 | =================================================================== | |
11019 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
11020 | +++ head-2008-11-25/drivers/xen/core/smpboot.c 2008-03-06 08:54:32.000000000 +0100 | |
11021 | @@ -0,0 +1,464 @@ | |
11022 | +/* | |
11023 | + * Xen SMP booting functions | |
11024 | + * | |
11025 | + * See arch/i386/kernel/smpboot.c for copyright and credits for derived | |
11026 | + * portions of this file. | |
11027 | + */ | |
11028 | + | |
11029 | +#include <linux/module.h> | |
11030 | +#include <linux/init.h> | |
11031 | +#include <linux/kernel.h> | |
11032 | +#include <linux/mm.h> | |
11033 | +#include <linux/sched.h> | |
11034 | +#include <linux/kernel_stat.h> | |
11035 | +#include <linux/smp_lock.h> | |
11036 | +#include <linux/irq.h> | |
11037 | +#include <linux/bootmem.h> | |
11038 | +#include <linux/notifier.h> | |
11039 | +#include <linux/cpu.h> | |
11040 | +#include <linux/percpu.h> | |
11041 | +#include <asm/desc.h> | |
11042 | +#include <asm/arch_hooks.h> | |
11043 | +#include <asm/pgalloc.h> | |
11044 | +#include <xen/evtchn.h> | |
11045 | +#include <xen/interface/vcpu.h> | |
11046 | +#include <xen/cpu_hotplug.h> | |
11047 | +#include <xen/xenbus.h> | |
11048 | + | |
11049 | +extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); | |
11050 | +extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); | |
11051 | + | |
11052 | +extern int local_setup_timer(unsigned int cpu); | |
11053 | +extern void local_teardown_timer(unsigned int cpu); | |
11054 | + | |
11055 | +extern void hypervisor_callback(void); | |
11056 | +extern void failsafe_callback(void); | |
11057 | +extern void system_call(void); | |
11058 | +extern void smp_trap_init(trap_info_t *); | |
11059 | + | |
11060 | +/* Number of siblings per CPU package */ | |
11061 | +int smp_num_siblings = 1; | |
11062 | + | |
11063 | +cpumask_t cpu_online_map; | |
11064 | +EXPORT_SYMBOL(cpu_online_map); | |
11065 | +cpumask_t cpu_possible_map; | |
11066 | +EXPORT_SYMBOL(cpu_possible_map); | |
11067 | +cpumask_t cpu_initialized_map; | |
11068 | + | |
11069 | +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | |
11070 | +EXPORT_SYMBOL(cpu_data); | |
11071 | + | |
11072 | +#ifdef CONFIG_HOTPLUG_CPU | |
11073 | +DEFINE_PER_CPU(int, cpu_state) = { 0 }; | |
11074 | +#endif | |
11075 | + | |
11076 | +static DEFINE_PER_CPU(int, resched_irq); | |
11077 | +static DEFINE_PER_CPU(int, callfunc_irq); | |
11078 | +static char resched_name[NR_CPUS][15]; | |
11079 | +static char callfunc_name[NR_CPUS][15]; | |
11080 | + | |
11081 | +u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | |
11082 | + | |
11083 | +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; | |
11084 | +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; | |
11085 | +EXPORT_SYMBOL(cpu_core_map); | |
11086 | + | |
11087 | +#if defined(__i386__) | |
11088 | +u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; | |
11089 | +EXPORT_SYMBOL(x86_cpu_to_apicid); | |
11090 | +#elif !defined(CONFIG_X86_IO_APIC) | |
11091 | +unsigned int maxcpus = NR_CPUS; | |
11092 | +#endif | |
11093 | + | |
11094 | +void __init prefill_possible_map(void) | |
11095 | +{ | |
11096 | + int i, rc; | |
11097 | + | |
11098 | + for_each_possible_cpu(i) | |
11099 | + if (i != smp_processor_id()) | |
11100 | + return; | |
11101 | + | |
11102 | + for (i = 0; i < NR_CPUS; i++) { | |
11103 | + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | |
11104 | + if (rc >= 0) | |
11105 | + cpu_set(i, cpu_possible_map); | |
11106 | + } | |
11107 | +} | |
11108 | + | |
11109 | +void __init smp_alloc_memory(void) | |
11110 | +{ | |
11111 | +} | |
11112 | + | |
11113 | +static inline void | |
11114 | +set_cpu_sibling_map(unsigned int cpu) | |
11115 | +{ | |
11116 | + cpu_data[cpu].phys_proc_id = cpu; | |
11117 | + cpu_data[cpu].cpu_core_id = 0; | |
11118 | + | |
11119 | + cpu_sibling_map[cpu] = cpumask_of_cpu(cpu); | |
11120 | + cpu_core_map[cpu] = cpumask_of_cpu(cpu); | |
11121 | + | |
11122 | + cpu_data[cpu].booted_cores = 1; | |
11123 | +} | |
11124 | + | |
11125 | +static void | |
11126 | +remove_siblinginfo(unsigned int cpu) | |
11127 | +{ | |
11128 | + cpu_data[cpu].phys_proc_id = BAD_APICID; | |
11129 | + cpu_data[cpu].cpu_core_id = BAD_APICID; | |
11130 | + | |
11131 | + cpus_clear(cpu_sibling_map[cpu]); | |
11132 | + cpus_clear(cpu_core_map[cpu]); | |
11133 | + | |
11134 | + cpu_data[cpu].booted_cores = 0; | |
11135 | +} | |
11136 | + | |
11137 | +static int __cpuinit xen_smp_intr_init(unsigned int cpu) | |
11138 | +{ | |
11139 | + int rc; | |
11140 | + | |
11141 | + per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; | |
11142 | + | |
11143 | + sprintf(resched_name[cpu], "resched%u", cpu); | |
11144 | + rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, | |
11145 | + cpu, | |
11146 | + smp_reschedule_interrupt, | |
11147 | + SA_INTERRUPT, | |
11148 | + resched_name[cpu], | |
11149 | + NULL); | |
11150 | + if (rc < 0) | |
11151 | + goto fail; | |
11152 | + per_cpu(resched_irq, cpu) = rc; | |
11153 | + | |
11154 | + sprintf(callfunc_name[cpu], "callfunc%u", cpu); | |
11155 | + rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, | |
11156 | + cpu, | |
11157 | + smp_call_function_interrupt, | |
11158 | + SA_INTERRUPT, | |
11159 | + callfunc_name[cpu], | |
11160 | + NULL); | |
11161 | + if (rc < 0) | |
11162 | + goto fail; | |
11163 | + per_cpu(callfunc_irq, cpu) = rc; | |
11164 | + | |
11165 | + if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0)) | |
11166 | + goto fail; | |
11167 | + | |
11168 | + return 0; | |
11169 | + | |
11170 | + fail: | |
11171 | + if (per_cpu(resched_irq, cpu) >= 0) | |
11172 | + unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | |
11173 | + if (per_cpu(callfunc_irq, cpu) >= 0) | |
11174 | + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | |
11175 | + return rc; | |
11176 | +} | |
11177 | + | |
11178 | +#ifdef CONFIG_HOTPLUG_CPU | |
11179 | +static void xen_smp_intr_exit(unsigned int cpu) | |
11180 | +{ | |
11181 | + if (cpu != 0) | |
11182 | + local_teardown_timer(cpu); | |
11183 | + | |
11184 | + unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | |
11185 | + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | |
11186 | +} | |
11187 | +#endif | |
11188 | + | |
11189 | +void __cpuinit cpu_bringup(void) | |
11190 | +{ | |
11191 | + cpu_init(); | |
11192 | + identify_cpu(cpu_data + smp_processor_id()); | |
11193 | + touch_softlockup_watchdog(); | |
11194 | + preempt_disable(); | |
11195 | + local_irq_enable(); | |
11196 | +} | |
11197 | + | |
11198 | +static void __cpuinit cpu_bringup_and_idle(void) | |
11199 | +{ | |
11200 | + cpu_bringup(); | |
11201 | + cpu_idle(); | |
11202 | +} | |
11203 | + | |
11204 | +static void __cpuinit cpu_initialize_context(unsigned int cpu) | |
11205 | +{ | |
11206 | + /* vcpu_guest_context_t is too large to allocate on the stack. | |
11207 | + * Hence we allocate statically and protect it with a lock */ | |
11208 | + static vcpu_guest_context_t ctxt; | |
11209 | + static DEFINE_SPINLOCK(ctxt_lock); | |
11210 | + | |
11211 | + struct task_struct *idle = idle_task(cpu); | |
11212 | +#ifdef __x86_64__ | |
11213 | + struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; | |
11214 | +#else | |
11215 | + struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
11216 | +#endif | |
11217 | + | |
11218 | + if (cpu_test_and_set(cpu, cpu_initialized_map)) | |
11219 | + return; | |
11220 | + | |
11221 | + spin_lock(&ctxt_lock); | |
11222 | + | |
11223 | + memset(&ctxt, 0, sizeof(ctxt)); | |
11224 | + | |
11225 | + ctxt.flags = VGCF_IN_KERNEL; | |
11226 | + ctxt.user_regs.ds = __USER_DS; | |
11227 | + ctxt.user_regs.es = __USER_DS; | |
11228 | + ctxt.user_regs.fs = 0; | |
11229 | + ctxt.user_regs.gs = 0; | |
11230 | + ctxt.user_regs.ss = __KERNEL_DS; | |
11231 | + ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle; | |
11232 | + ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */ | |
11233 | + | |
11234 | + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); | |
11235 | + | |
11236 | + smp_trap_init(ctxt.trap_ctxt); | |
11237 | + | |
11238 | + ctxt.ldt_ents = 0; | |
11239 | + | |
11240 | + ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address); | |
11241 | + ctxt.gdt_ents = gdt_descr->size / 8; | |
11242 | + | |
11243 | +#ifdef __i386__ | |
11244 | + ctxt.user_regs.cs = __KERNEL_CS; | |
11245 | + ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); | |
11246 | + | |
11247 | + ctxt.kernel_ss = __KERNEL_DS; | |
11248 | + ctxt.kernel_sp = idle->thread.esp0; | |
11249 | + | |
11250 | + ctxt.event_callback_cs = __KERNEL_CS; | |
11251 | + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; | |
11252 | + ctxt.failsafe_callback_cs = __KERNEL_CS; | |
11253 | + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; | |
11254 | + | |
11255 | + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | |
11256 | +#else /* __x86_64__ */ | |
11257 | + ctxt.user_regs.cs = __KERNEL_CS; | |
11258 | + ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); | |
11259 | + | |
11260 | + ctxt.kernel_ss = __KERNEL_DS; | |
11261 | + ctxt.kernel_sp = idle->thread.rsp0; | |
11262 | + | |
11263 | + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; | |
11264 | + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; | |
11265 | + ctxt.syscall_callback_eip = (unsigned long)system_call; | |
11266 | + | |
11267 | + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); | |
11268 | + | |
11269 | + ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); | |
11270 | +#endif | |
11271 | + | |
11272 | + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)) | |
11273 | + BUG(); | |
11274 | + | |
11275 | + spin_unlock(&ctxt_lock); | |
11276 | +} | |
11277 | + | |
11278 | +void __init smp_prepare_cpus(unsigned int max_cpus) | |
11279 | +{ | |
11280 | + unsigned int cpu; | |
11281 | + struct task_struct *idle; | |
11282 | + int apicid, acpiid; | |
11283 | + struct vcpu_get_physid cpu_id; | |
11284 | +#ifdef __x86_64__ | |
11285 | + struct desc_ptr *gdt_descr; | |
11286 | +#else | |
11287 | + struct Xgt_desc_struct *gdt_descr; | |
11288 | +#endif | |
11289 | + | |
11290 | + apicid = 0; | |
11291 | + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) { | |
11292 | + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); | |
11293 | + acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); | |
11294 | +#ifdef CONFIG_ACPI | |
11295 | + if (acpiid != 0xff) | |
11296 | + x86_acpiid_to_apicid[acpiid] = apicid; | |
11297 | +#endif | |
11298 | + } | |
11299 | + boot_cpu_data.apicid = apicid; | |
11300 | + cpu_data[0] = boot_cpu_data; | |
11301 | + | |
11302 | + cpu_2_logical_apicid[0] = apicid; | |
11303 | + x86_cpu_to_apicid[0] = apicid; | |
11304 | + | |
11305 | + current_thread_info()->cpu = 0; | |
11306 | + | |
11307 | + for (cpu = 0; cpu < NR_CPUS; cpu++) { | |
11308 | + cpus_clear(cpu_sibling_map[cpu]); | |
11309 | + cpus_clear(cpu_core_map[cpu]); | |
11310 | + } | |
11311 | + | |
11312 | + set_cpu_sibling_map(0); | |
11313 | + | |
11314 | + if (xen_smp_intr_init(0)) | |
11315 | + BUG(); | |
11316 | + | |
11317 | + cpu_initialized_map = cpumask_of_cpu(0); | |
11318 | + | |
11319 | + /* Restrict the possible_map according to max_cpus. */ | |
11320 | + while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { | |
11321 | + for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) | |
11322 | + continue; | |
11323 | + cpu_clear(cpu, cpu_possible_map); | |
11324 | + } | |
11325 | + | |
11326 | + for_each_possible_cpu (cpu) { | |
11327 | + if (cpu == 0) | |
11328 | + continue; | |
11329 | + | |
11330 | +#ifdef __x86_64__ | |
11331 | + gdt_descr = &cpu_gdt_descr[cpu]; | |
11332 | +#else | |
11333 | + gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | |
11334 | +#endif | |
11335 | + gdt_descr->address = get_zeroed_page(GFP_KERNEL); | |
11336 | + if (unlikely(!gdt_descr->address)) { | |
11337 | + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", | |
11338 | + cpu); | |
11339 | + continue; | |
11340 | + } | |
11341 | + gdt_descr->size = GDT_SIZE; | |
11342 | + memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); | |
11343 | + make_page_readonly( | |
11344 | + (void *)gdt_descr->address, | |
11345 | + XENFEAT_writable_descriptor_tables); | |
11346 | + | |
11347 | + apicid = cpu; | |
11348 | + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { | |
11349 | + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); | |
11350 | + acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); | |
11351 | +#ifdef CONFIG_ACPI | |
11352 | + if (acpiid != 0xff) | |
11353 | + x86_acpiid_to_apicid[acpiid] = apicid; | |
11354 | +#endif | |
11355 | + } | |
11356 | + cpu_data[cpu] = boot_cpu_data; | |
11357 | + cpu_data[cpu].apicid = apicid; | |
11358 | + | |
11359 | + cpu_2_logical_apicid[cpu] = apicid; | |
11360 | + x86_cpu_to_apicid[cpu] = apicid; | |
11361 | + | |
11362 | + idle = fork_idle(cpu); | |
11363 | + if (IS_ERR(idle)) | |
11364 | + panic("failed fork for CPU %d", cpu); | |
11365 | + | |
11366 | +#ifdef __x86_64__ | |
11367 | + cpu_pda(cpu)->pcurrent = idle; | |
11368 | + cpu_pda(cpu)->cpunumber = cpu; | |
11369 | + clear_ti_thread_flag(idle->thread_info, TIF_FORK); | |
11370 | +#endif | |
11371 | + | |
11372 | + irq_ctx_init(cpu); | |
11373 | + | |
11374 | +#ifdef CONFIG_HOTPLUG_CPU | |
11375 | + if (is_initial_xendomain()) | |
11376 | + cpu_set(cpu, cpu_present_map); | |
11377 | +#else | |
11378 | + cpu_set(cpu, cpu_present_map); | |
11379 | +#endif | |
11380 | + } | |
11381 | + | |
11382 | + init_xenbus_allowed_cpumask(); | |
11383 | + | |
11384 | +#ifdef CONFIG_X86_IO_APIC | |
11385 | + /* | |
11386 | + * Here we can be sure that there is an IO-APIC in the system. Let's | |
11387 | + * go and set it up: | |
11388 | + */ | |
11389 | + if (!skip_ioapic_setup && nr_ioapics) | |
11390 | + setup_IO_APIC(); | |
11391 | +#endif | |
11392 | +} | |
11393 | + | |
11394 | +void __devinit smp_prepare_boot_cpu(void) | |
11395 | +{ | |
11396 | + prefill_possible_map(); | |
11397 | +} | |
11398 | + | |
11399 | +#ifdef CONFIG_HOTPLUG_CPU | |
11400 | + | |
11401 | +/* | |
11402 | + * Initialize cpu_present_map late to skip SMP boot code in init/main.c. | |
11403 | + * But do it early enough to catch critical for_each_present_cpu() loops | |
11404 | + * in i386-specific code. | |
11405 | + */ | |
11406 | +static int __init initialize_cpu_present_map(void) | |
11407 | +{ | |
11408 | + cpu_present_map = cpu_possible_map; | |
11409 | + return 0; | |
11410 | +} | |
11411 | +core_initcall(initialize_cpu_present_map); | |
11412 | + | |
11413 | +int __cpu_disable(void) | |
11414 | +{ | |
11415 | + cpumask_t map = cpu_online_map; | |
11416 | + unsigned int cpu = smp_processor_id(); | |
11417 | + | |
11418 | + if (cpu == 0) | |
11419 | + return -EBUSY; | |
11420 | + | |
11421 | + remove_siblinginfo(cpu); | |
11422 | + | |
11423 | + cpu_clear(cpu, map); | |
11424 | + fixup_irqs(map); | |
11425 | + cpu_clear(cpu, cpu_online_map); | |
11426 | + | |
11427 | + return 0; | |
11428 | +} | |
11429 | + | |
11430 | +void __cpu_die(unsigned int cpu) | |
11431 | +{ | |
11432 | + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { | |
11433 | + current->state = TASK_UNINTERRUPTIBLE; | |
11434 | + schedule_timeout(HZ/10); | |
11435 | + } | |
11436 | + | |
11437 | + xen_smp_intr_exit(cpu); | |
11438 | + | |
11439 | + if (num_online_cpus() == 1) | |
11440 | + alternatives_smp_switch(0); | |
11441 | +} | |
11442 | + | |
11443 | +#endif /* CONFIG_HOTPLUG_CPU */ | |
11444 | + | |
11445 | +int __cpuinit __cpu_up(unsigned int cpu) | |
11446 | +{ | |
11447 | + int rc; | |
11448 | + | |
11449 | + rc = cpu_up_check(cpu); | |
11450 | + if (rc) | |
11451 | + return rc; | |
11452 | + | |
11453 | + cpu_initialize_context(cpu); | |
11454 | + | |
11455 | + if (num_online_cpus() == 1) | |
11456 | + alternatives_smp_switch(1); | |
11457 | + | |
11458 | + /* This must be done before setting cpu_online_map */ | |
11459 | + set_cpu_sibling_map(cpu); | |
11460 | + wmb(); | |
11461 | + | |
11462 | + rc = xen_smp_intr_init(cpu); | |
11463 | + if (rc) { | |
11464 | + remove_siblinginfo(cpu); | |
11465 | + return rc; | |
11466 | + } | |
11467 | + | |
11468 | + cpu_set(cpu, cpu_online_map); | |
11469 | + | |
11470 | + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); | |
11471 | + BUG_ON(rc); | |
11472 | + | |
11473 | + return 0; | |
11474 | +} | |
11475 | + | |
11476 | +void __init smp_cpus_done(unsigned int max_cpus) | |
11477 | +{ | |
11478 | +} | |
11479 | + | |
11480 | +#ifndef CONFIG_X86_LOCAL_APIC | |
11481 | +int setup_profiling_timer(unsigned int multiplier) | |
11482 | +{ | |
11483 | + return -EINVAL; | |
11484 | +} | |
11485 | +#endif | |
11486 | Index: head-2008-11-25/drivers/xen/core/xen_proc.c | |
11487 | =================================================================== | |
11488 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
11489 | +++ head-2008-11-25/drivers/xen/core/xen_proc.c 2007-06-12 13:13:44.000000000 +0200 | |
11490 | @@ -0,0 +1,23 @@ | |
11491 | + | |
11492 | +#include <linux/module.h> | |
11493 | +#include <linux/proc_fs.h> | |
11494 | +#include <xen/xen_proc.h> | |
11495 | + | |
11496 | +static struct proc_dir_entry *xen_base; | |
11497 | + | |
11498 | +struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode) | |
11499 | +{ | |
11500 | + if ( xen_base == NULL ) | |
11501 | + if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL ) | |
11502 | + panic("Couldn't create /proc/xen"); | |
11503 | + return create_proc_entry(name, mode, xen_base); | |
11504 | +} | |
11505 | + | |
11506 | +EXPORT_SYMBOL_GPL(create_xen_proc_entry); | |
11507 | + | |
11508 | +void remove_xen_proc_entry(const char *name) | |
11509 | +{ | |
11510 | + remove_proc_entry(name, xen_base); | |
11511 | +} | |
11512 | + | |
11513 | +EXPORT_SYMBOL_GPL(remove_xen_proc_entry); | |
11514 | Index: head-2008-11-25/drivers/xen/core/xen_sysfs.c | |
11515 | =================================================================== | |
11516 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
11517 | +++ head-2008-11-25/drivers/xen/core/xen_sysfs.c 2008-10-29 09:55:56.000000000 +0100 | |
11518 | @@ -0,0 +1,427 @@ | |
11519 | +/* | |
11520 | + * copyright (c) 2006 IBM Corporation | |
11521 | + * Authored by: Mike D. Day <ncmike@us.ibm.com> | |
11522 | + * | |
11523 | + * This program is free software; you can redistribute it and/or modify | |
11524 | + * it under the terms of the GNU General Public License version 2 as | |
11525 | + * published by the Free Software Foundation. | |
11526 | + */ | |
11527 | + | |
11528 | +#include <linux/err.h> | |
11529 | +#include <linux/kernel.h> | |
11530 | +#include <linux/module.h> | |
11531 | +#include <linux/init.h> | |
11532 | +#include <asm/hypervisor.h> | |
11533 | +#include <xen/features.h> | |
11534 | +#include <xen/hypervisor_sysfs.h> | |
11535 | +#include <xen/xenbus.h> | |
11536 | +#include <xen/interface/kexec.h> | |
11537 | + | |
11538 | +MODULE_LICENSE("GPL"); | |
11539 | +MODULE_AUTHOR("Mike D. Day <ncmike@us.ibm.com>"); | |
11540 | + | |
11541 | +static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11542 | +{ | |
11543 | + return sprintf(buffer, "xen\n"); | |
11544 | +} | |
11545 | + | |
11546 | +HYPERVISOR_ATTR_RO(type); | |
11547 | + | |
11548 | +static int __init xen_sysfs_type_init(void) | |
11549 | +{ | |
11550 | + return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); | |
11551 | +} | |
11552 | + | |
11553 | +static void xen_sysfs_type_destroy(void) | |
11554 | +{ | |
11555 | + sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); | |
11556 | +} | |
11557 | + | |
11558 | +/* xen version attributes */ | |
11559 | +static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11560 | +{ | |
11561 | + int version = HYPERVISOR_xen_version(XENVER_version, NULL); | |
11562 | + if (version) | |
11563 | + return sprintf(buffer, "%d\n", version >> 16); | |
11564 | + return -ENODEV; | |
11565 | +} | |
11566 | + | |
11567 | +HYPERVISOR_ATTR_RO(major); | |
11568 | + | |
11569 | +static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11570 | +{ | |
11571 | + int version = HYPERVISOR_xen_version(XENVER_version, NULL); | |
11572 | + if (version) | |
11573 | + return sprintf(buffer, "%d\n", version & 0xff); | |
11574 | + return -ENODEV; | |
11575 | +} | |
11576 | + | |
11577 | +HYPERVISOR_ATTR_RO(minor); | |
11578 | + | |
11579 | +static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11580 | +{ | |
11581 | + int ret = -ENOMEM; | |
11582 | + char *extra; | |
11583 | + | |
11584 | + extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL); | |
11585 | + if (extra) { | |
11586 | + ret = HYPERVISOR_xen_version(XENVER_extraversion, extra); | |
11587 | + if (!ret) | |
11588 | + ret = sprintf(buffer, "%s\n", extra); | |
11589 | + kfree(extra); | |
11590 | + } | |
11591 | + | |
11592 | + return ret; | |
11593 | +} | |
11594 | + | |
11595 | +HYPERVISOR_ATTR_RO(extra); | |
11596 | + | |
11597 | +static struct attribute *version_attrs[] = { | |
11598 | + &major_attr.attr, | |
11599 | + &minor_attr.attr, | |
11600 | + &extra_attr.attr, | |
11601 | + NULL | |
11602 | +}; | |
11603 | + | |
11604 | +static struct attribute_group version_group = { | |
11605 | + .name = "version", | |
11606 | + .attrs = version_attrs, | |
11607 | +}; | |
11608 | + | |
11609 | +static int __init xen_sysfs_version_init(void) | |
11610 | +{ | |
11611 | + return sysfs_create_group(&hypervisor_subsys.kset.kobj, | |
11612 | + &version_group); | |
11613 | +} | |
11614 | + | |
11615 | +static void xen_sysfs_version_destroy(void) | |
11616 | +{ | |
11617 | + sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group); | |
11618 | +} | |
11619 | + | |
11620 | +/* UUID */ | |
11621 | + | |
11622 | +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11623 | +{ | |
11624 | + char *vm, *val; | |
11625 | + int ret; | |
11626 | + extern int xenstored_ready; | |
11627 | + | |
11628 | + if (!xenstored_ready) | |
11629 | + return -EBUSY; | |
11630 | + | |
11631 | + vm = xenbus_read(XBT_NIL, "vm", "", NULL); | |
11632 | + if (IS_ERR(vm)) | |
11633 | + return PTR_ERR(vm); | |
11634 | + val = xenbus_read(XBT_NIL, vm, "uuid", NULL); | |
11635 | + kfree(vm); | |
11636 | + if (IS_ERR(val)) | |
11637 | + return PTR_ERR(val); | |
11638 | + ret = sprintf(buffer, "%s\n", val); | |
11639 | + kfree(val); | |
11640 | + return ret; | |
11641 | +} | |
11642 | + | |
11643 | +HYPERVISOR_ATTR_RO(uuid); | |
11644 | + | |
11645 | +static int __init xen_sysfs_uuid_init(void) | |
11646 | +{ | |
11647 | + return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); | |
11648 | +} | |
11649 | + | |
11650 | +static void xen_sysfs_uuid_destroy(void) | |
11651 | +{ | |
11652 | + sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); | |
11653 | +} | |
11654 | + | |
11655 | +/* xen compilation attributes */ | |
11656 | + | |
11657 | +static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11658 | +{ | |
11659 | + int ret = -ENOMEM; | |
11660 | + struct xen_compile_info *info; | |
11661 | + | |
11662 | + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | |
11663 | + if (info) { | |
11664 | + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | |
11665 | + if (!ret) | |
11666 | + ret = sprintf(buffer, "%s\n", info->compiler); | |
11667 | + kfree(info); | |
11668 | + } | |
11669 | + | |
11670 | + return ret; | |
11671 | +} | |
11672 | + | |
11673 | +HYPERVISOR_ATTR_RO(compiler); | |
11674 | + | |
11675 | +static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11676 | +{ | |
11677 | + int ret = -ENOMEM; | |
11678 | + struct xen_compile_info *info; | |
11679 | + | |
11680 | + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | |
11681 | + if (info) { | |
11682 | + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | |
11683 | + if (!ret) | |
11684 | + ret = sprintf(buffer, "%s\n", info->compile_by); | |
11685 | + kfree(info); | |
11686 | + } | |
11687 | + | |
11688 | + return ret; | |
11689 | +} | |
11690 | + | |
11691 | +HYPERVISOR_ATTR_RO(compiled_by); | |
11692 | + | |
11693 | +static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11694 | +{ | |
11695 | + int ret = -ENOMEM; | |
11696 | + struct xen_compile_info *info; | |
11697 | + | |
11698 | + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); | |
11699 | + if (info) { | |
11700 | + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); | |
11701 | + if (!ret) | |
11702 | + ret = sprintf(buffer, "%s\n", info->compile_date); | |
11703 | + kfree(info); | |
11704 | + } | |
11705 | + | |
11706 | + return ret; | |
11707 | +} | |
11708 | + | |
11709 | +HYPERVISOR_ATTR_RO(compile_date); | |
11710 | + | |
11711 | +static struct attribute *xen_compile_attrs[] = { | |
11712 | + &compiler_attr.attr, | |
11713 | + &compiled_by_attr.attr, | |
11714 | + &compile_date_attr.attr, | |
11715 | + NULL | |
11716 | +}; | |
11717 | + | |
11718 | +static struct attribute_group xen_compilation_group = { | |
11719 | + .name = "compilation", | |
11720 | + .attrs = xen_compile_attrs, | |
11721 | +}; | |
11722 | + | |
11723 | +int __init static xen_compilation_init(void) | |
11724 | +{ | |
11725 | + return sysfs_create_group(&hypervisor_subsys.kset.kobj, | |
11726 | + &xen_compilation_group); | |
11727 | +} | |
11728 | + | |
11729 | +static void xen_compilation_destroy(void) | |
11730 | +{ | |
11731 | + sysfs_remove_group(&hypervisor_subsys.kset.kobj, | |
11732 | + &xen_compilation_group); | |
11733 | +} | |
11734 | + | |
11735 | +/* xen properties info */ | |
11736 | + | |
11737 | +static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11738 | +{ | |
11739 | + int ret = -ENOMEM; | |
11740 | + char *caps; | |
11741 | + | |
11742 | + caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL); | |
11743 | + if (caps) { | |
11744 | + ret = HYPERVISOR_xen_version(XENVER_capabilities, caps); | |
11745 | + if (!ret) | |
11746 | + ret = sprintf(buffer, "%s\n", caps); | |
11747 | + kfree(caps); | |
11748 | + } | |
11749 | + | |
11750 | + return ret; | |
11751 | +} | |
11752 | + | |
11753 | +HYPERVISOR_ATTR_RO(capabilities); | |
11754 | + | |
11755 | +static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11756 | +{ | |
11757 | + int ret = -ENOMEM; | |
11758 | + char *cset; | |
11759 | + | |
11760 | + cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL); | |
11761 | + if (cset) { | |
11762 | + ret = HYPERVISOR_xen_version(XENVER_changeset, cset); | |
11763 | + if (!ret) | |
11764 | + ret = sprintf(buffer, "%s\n", cset); | |
11765 | + kfree(cset); | |
11766 | + } | |
11767 | + | |
11768 | + return ret; | |
11769 | +} | |
11770 | + | |
11771 | +HYPERVISOR_ATTR_RO(changeset); | |
11772 | + | |
11773 | +static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11774 | +{ | |
11775 | + int ret = -ENOMEM; | |
11776 | + struct xen_platform_parameters *parms; | |
11777 | + | |
11778 | + parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL); | |
11779 | + if (parms) { | |
11780 | + ret = HYPERVISOR_xen_version(XENVER_platform_parameters, | |
11781 | + parms); | |
11782 | + if (!ret) | |
11783 | + ret = sprintf(buffer, "%lx\n", parms->virt_start); | |
11784 | + kfree(parms); | |
11785 | + } | |
11786 | + | |
11787 | + return ret; | |
11788 | +} | |
11789 | + | |
11790 | +HYPERVISOR_ATTR_RO(virtual_start); | |
11791 | + | |
11792 | +static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11793 | +{ | |
11794 | + int ret; | |
11795 | + | |
11796 | + ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL); | |
11797 | + if (ret > 0) | |
11798 | + ret = sprintf(buffer, "%x\n", ret); | |
11799 | + | |
11800 | + return ret; | |
11801 | +} | |
11802 | + | |
11803 | +HYPERVISOR_ATTR_RO(pagesize); | |
11804 | + | |
11805 | +/* eventually there will be several more features to export */ | |
11806 | +static ssize_t xen_feature_show(int index, char *buffer) | |
11807 | +{ | |
11808 | + int ret = -ENOMEM; | |
11809 | + struct xen_feature_info *info; | |
11810 | + | |
11811 | + info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL); | |
11812 | + if (info) { | |
11813 | + info->submap_idx = index; | |
11814 | + ret = HYPERVISOR_xen_version(XENVER_get_features, info); | |
11815 | + if (!ret) | |
11816 | + ret = sprintf(buffer, "%d\n", info->submap); | |
11817 | + kfree(info); | |
11818 | + } | |
11819 | + | |
11820 | + return ret; | |
11821 | +} | |
11822 | + | |
11823 | +static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer) | |
11824 | +{ | |
11825 | + return xen_feature_show(XENFEAT_writable_page_tables, buffer); | |
11826 | +} | |
11827 | + | |
11828 | +HYPERVISOR_ATTR_RO(writable_pt); | |
11829 | + | |
11830 | +static struct attribute *xen_properties_attrs[] = { | |
11831 | + &capabilities_attr.attr, | |
11832 | + &changeset_attr.attr, | |
11833 | + &virtual_start_attr.attr, | |
11834 | + &pagesize_attr.attr, | |
11835 | + &writable_pt_attr.attr, | |
11836 | + NULL | |
11837 | +}; | |
11838 | + | |
11839 | +static struct attribute_group xen_properties_group = { | |
11840 | + .name = "properties", | |
11841 | + .attrs = xen_properties_attrs, | |
11842 | +}; | |
11843 | + | |
11844 | +static int __init xen_properties_init(void) | |
11845 | +{ | |
11846 | + return sysfs_create_group(&hypervisor_subsys.kset.kobj, | |
11847 | + &xen_properties_group); | |
11848 | +} | |
11849 | + | |
11850 | +static void xen_properties_destroy(void) | |
11851 | +{ | |
11852 | + sysfs_remove_group(&hypervisor_subsys.kset.kobj, | |
11853 | + &xen_properties_group); | |
11854 | +} | |
11855 | + | |
11856 | +#ifdef CONFIG_KEXEC | |
11857 | + | |
11858 | +extern size_t vmcoreinfo_size_xen; | |
11859 | +extern unsigned long paddr_vmcoreinfo_xen; | |
11860 | + | |
11861 | +static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page) | |
11862 | +{ | |
11863 | + return sprintf(page, "%lx %zx\n", | |
11864 | + paddr_vmcoreinfo_xen, vmcoreinfo_size_xen); | |
11865 | +} | |
11866 | + | |
11867 | +HYPERVISOR_ATTR_RO(vmcoreinfo); | |
11868 | + | |
11869 | +static int __init xen_sysfs_vmcoreinfo_init(void) | |
11870 | +{ | |
11871 | + return sysfs_create_file(&hypervisor_subsys.kset.kobj, | |
11872 | + &vmcoreinfo_attr.attr); | |
11873 | +} | |
11874 | + | |
11875 | +static void xen_sysfs_vmcoreinfo_destroy(void) | |
11876 | +{ | |
11877 | + sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr); | |
11878 | +} | |
11879 | + | |
11880 | +#endif | |
11881 | + | |
11882 | +static int __init hyper_sysfs_init(void) | |
11883 | +{ | |
11884 | + int ret; | |
11885 | + | |
11886 | + if (!is_running_on_xen()) | |
11887 | + return -ENODEV; | |
11888 | + | |
11889 | + ret = xen_sysfs_type_init(); | |
11890 | + if (ret) | |
11891 | + goto out; | |
11892 | + ret = xen_sysfs_version_init(); | |
11893 | + if (ret) | |
11894 | + goto version_out; | |
11895 | + ret = xen_compilation_init(); | |
11896 | + if (ret) | |
11897 | + goto comp_out; | |
11898 | + ret = xen_sysfs_uuid_init(); | |
11899 | + if (ret) | |
11900 | + goto uuid_out; | |
11901 | + ret = xen_properties_init(); | |
11902 | + if (ret) | |
11903 | + goto prop_out; | |
11904 | +#ifdef CONFIG_KEXEC | |
11905 | + if (vmcoreinfo_size_xen != 0) { | |
11906 | + ret = xen_sysfs_vmcoreinfo_init(); | |
11907 | + if (ret) | |
11908 | + goto vmcoreinfo_out; | |
11909 | + } | |
11910 | +#endif | |
11911 | + | |
11912 | + goto out; | |
11913 | + | |
11914 | +#ifdef CONFIG_KEXEC | |
11915 | +vmcoreinfo_out: | |
11916 | +#endif | |
11917 | + xen_properties_destroy(); | |
11918 | +prop_out: | |
11919 | + xen_sysfs_uuid_destroy(); | |
11920 | +uuid_out: | |
11921 | + xen_compilation_destroy(); | |
11922 | +comp_out: | |
11923 | + xen_sysfs_version_destroy(); | |
11924 | +version_out: | |
11925 | + xen_sysfs_type_destroy(); | |
11926 | +out: | |
11927 | + return ret; | |
11928 | +} | |
11929 | + | |
11930 | +static void __exit hyper_sysfs_exit(void) | |
11931 | +{ | |
11932 | +#ifdef CONFIG_KEXEC | |
11933 | + if (vmcoreinfo_size_xen != 0) | |
11934 | + xen_sysfs_vmcoreinfo_destroy(); | |
11935 | +#endif | |
11936 | + xen_properties_destroy(); | |
11937 | + xen_compilation_destroy(); | |
11938 | + xen_sysfs_uuid_destroy(); | |
11939 | + xen_sysfs_version_destroy(); | |
11940 | + xen_sysfs_type_destroy(); | |
11941 | + | |
11942 | +} | |
11943 | + | |
11944 | +module_init(hyper_sysfs_init); | |
11945 | +module_exit(hyper_sysfs_exit); | |
11946 | Index: head-2008-11-25/drivers/xen/core/xencomm.c | |
11947 | =================================================================== | |
11948 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
11949 | +++ head-2008-11-25/drivers/xen/core/xencomm.c 2007-11-12 08:41:05.000000000 +0100 | |
11950 | @@ -0,0 +1,229 @@ | |
11951 | +/* | |
11952 | + * This program is free software; you can redistribute it and/or modify | |
11953 | + * it under the terms of the GNU General Public License as published by | |
11954 | + * the Free Software Foundation; either version 2 of the License, or | |
11955 | + * (at your option) any later version. | |
11956 | + * | |
11957 | + * This program is distributed in the hope that it will be useful, | |
11958 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11959 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11960 | + * GNU General Public License for more details. | |
11961 | + * | |
11962 | + * You should have received a copy of the GNU General Public License | |
11963 | + * along with this program; if not, write to the Free Software | |
11964 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
11965 | + * | |
11966 | + * Copyright (C) IBM Corp. 2006 | |
11967 | + * | |
11968 | + * Authors: Hollis Blanchard <hollisb@us.ibm.com> | |
11969 | + */ | |
11970 | + | |
11971 | +#include <linux/gfp.h> | |
11972 | +#include <linux/mm.h> | |
11973 | +#include <asm/page.h> | |
11974 | +#include <xen/xencomm.h> | |
11975 | +#include <xen/interface/xen.h> | |
11976 | +#ifdef __ia64__ | |
11977 | +#include <asm/xen/xencomm.h> /* for is_kern_addr() */ | |
11978 | +#endif | |
11979 | + | |
11980 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
11981 | +#include <xen/platform-compat.h> | |
11982 | +#endif | |
11983 | + | |
11984 | +static int xencomm_init(struct xencomm_desc *desc, | |
11985 | + void *buffer, unsigned long bytes) | |
11986 | +{ | |
11987 | + unsigned long recorded = 0; | |
11988 | + int i = 0; | |
11989 | + | |
11990 | + while ((recorded < bytes) && (i < desc->nr_addrs)) { | |
11991 | + unsigned long vaddr = (unsigned long)buffer + recorded; | |
11992 | + unsigned long paddr; | |
11993 | + int offset; | |
11994 | + int chunksz; | |
11995 | + | |
11996 | + offset = vaddr % PAGE_SIZE; /* handle partial pages */ | |
11997 | + chunksz = min(PAGE_SIZE - offset, bytes - recorded); | |
11998 | + | |
11999 | + paddr = xencomm_vtop(vaddr); | |
12000 | + if (paddr == ~0UL) { | |
12001 | + printk("%s: couldn't translate vaddr %lx\n", | |
12002 | + __func__, vaddr); | |
12003 | + return -EINVAL; | |
12004 | + } | |
12005 | + | |
12006 | + desc->address[i++] = paddr; | |
12007 | + recorded += chunksz; | |
12008 | + } | |
12009 | + | |
12010 | + if (recorded < bytes) { | |
12011 | + printk("%s: could only translate %ld of %ld bytes\n", | |
12012 | + __func__, recorded, bytes); | |
12013 | + return -ENOSPC; | |
12014 | + } | |
12015 | + | |
12016 | + /* mark remaining addresses invalid (just for safety) */ | |
12017 | + while (i < desc->nr_addrs) | |
12018 | + desc->address[i++] = XENCOMM_INVALID; | |
12019 | + | |
12020 | + desc->magic = XENCOMM_MAGIC; | |
12021 | + | |
12022 | + return 0; | |
12023 | +} | |
12024 | + | |
12025 | +static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask, | |
12026 | + void *buffer, unsigned long bytes) | |
12027 | +{ | |
12028 | + struct xencomm_desc *desc; | |
12029 | + unsigned long buffer_ulong = (unsigned long)buffer; | |
12030 | + unsigned long start = buffer_ulong & PAGE_MASK; | |
12031 | + unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK; | |
12032 | + unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT; | |
12033 | + unsigned long size = sizeof(*desc) + | |
12034 | + sizeof(desc->address[0]) * nr_addrs; | |
12035 | + | |
12036 | + /* | |
12037 | + * slab allocator returns at least sizeof(void*) aligned pointer. | |
12038 | + * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might | |
12039 | + * cross page boundary. | |
12040 | + */ | |
12041 | + if (sizeof(*desc) > sizeof(void*)) { | |
12042 | + unsigned long order = get_order(size); | |
12043 | + desc = (struct xencomm_desc *)__get_free_pages(gfp_mask, | |
12044 | + order); | |
12045 | + if (desc == NULL) | |
12046 | + return NULL; | |
12047 | + | |
12048 | + desc->nr_addrs = | |
12049 | + ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) / | |
12050 | + sizeof(*desc->address); | |
12051 | + } else { | |
12052 | + desc = kmalloc(size, gfp_mask); | |
12053 | + if (desc == NULL) | |
12054 | + return NULL; | |
12055 | + | |
12056 | + desc->nr_addrs = nr_addrs; | |
12057 | + } | |
12058 | + return desc; | |
12059 | +} | |
12060 | + | |
12061 | +void xencomm_free(struct xencomm_handle *desc) | |
12062 | +{ | |
12063 | + if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) { | |
12064 | + struct xencomm_desc *desc__ = (struct xencomm_desc*)desc; | |
12065 | + if (sizeof(*desc__) > sizeof(void*)) { | |
12066 | + unsigned long size = sizeof(*desc__) + | |
12067 | + sizeof(desc__->address[0]) * desc__->nr_addrs; | |
12068 | + unsigned long order = get_order(size); | |
12069 | + free_pages((unsigned long)__va(desc), order); | |
12070 | + } else | |
12071 | + kfree(__va(desc)); | |
12072 | + } | |
12073 | +} | |
12074 | + | |
12075 | +static int xencomm_create(void *buffer, unsigned long bytes, struct xencomm_desc **ret, gfp_t gfp_mask) | |
12076 | +{ | |
12077 | + struct xencomm_desc *desc; | |
12078 | + int rc; | |
12079 | + | |
12080 | + pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes); | |
12081 | + | |
12082 | + if (bytes == 0) { | |
12083 | + /* don't create a descriptor; Xen recognizes NULL. */ | |
12084 | + BUG_ON(buffer != NULL); | |
12085 | + *ret = NULL; | |
12086 | + return 0; | |
12087 | + } | |
12088 | + | |
12089 | + BUG_ON(buffer == NULL); /* 'bytes' is non-zero */ | |
12090 | + | |
12091 | + desc = xencomm_alloc(gfp_mask, buffer, bytes); | |
12092 | + if (!desc) { | |
12093 | + printk("%s failure\n", "xencomm_alloc"); | |
12094 | + return -ENOMEM; | |
12095 | + } | |
12096 | + | |
12097 | + rc = xencomm_init(desc, buffer, bytes); | |
12098 | + if (rc) { | |
12099 | + printk("%s failure: %d\n", "xencomm_init", rc); | |
12100 | + xencomm_free((struct xencomm_handle *)__pa(desc)); | |
12101 | + return rc; | |
12102 | + } | |
12103 | + | |
12104 | + *ret = desc; | |
12105 | + return 0; | |
12106 | +} | |
12107 | + | |
12108 | +/* check if memory address is within VMALLOC region */ | |
12109 | +static int is_phys_contiguous(unsigned long addr) | |
12110 | +{ | |
12111 | + if (!is_kernel_addr(addr)) | |
12112 | + return 0; | |
12113 | + | |
12114 | + return (addr < VMALLOC_START) || (addr >= VMALLOC_END); | |
12115 | +} | |
12116 | + | |
12117 | +static struct xencomm_handle *xencomm_create_inline(void *ptr) | |
12118 | +{ | |
12119 | + unsigned long paddr; | |
12120 | + | |
12121 | + BUG_ON(!is_phys_contiguous((unsigned long)ptr)); | |
12122 | + | |
12123 | + paddr = (unsigned long)xencomm_pa(ptr); | |
12124 | + BUG_ON(paddr & XENCOMM_INLINE_FLAG); | |
12125 | + return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG); | |
12126 | +} | |
12127 | + | |
12128 | +/* "mini" routine, for stack-based communications: */ | |
12129 | +static int xencomm_create_mini(void *buffer, | |
12130 | + unsigned long bytes, struct xencomm_mini *xc_desc, | |
12131 | + struct xencomm_desc **ret) | |
12132 | +{ | |
12133 | + int rc = 0; | |
12134 | + struct xencomm_desc *desc; | |
12135 | + BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0); | |
12136 | + | |
12137 | + desc = (void *)xc_desc; | |
12138 | + | |
12139 | + desc->nr_addrs = XENCOMM_MINI_ADDRS; | |
12140 | + | |
12141 | + if (!(rc = xencomm_init(desc, buffer, bytes))) | |
12142 | + *ret = desc; | |
12143 | + | |
12144 | + return rc; | |
12145 | +} | |
12146 | + | |
12147 | +struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes) | |
12148 | +{ | |
12149 | + int rc; | |
12150 | + struct xencomm_desc *desc; | |
12151 | + | |
12152 | + if (is_phys_contiguous((unsigned long)ptr)) | |
12153 | + return xencomm_create_inline(ptr); | |
12154 | + | |
12155 | + rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL); | |
12156 | + | |
12157 | + if (rc || desc == NULL) | |
12158 | + return NULL; | |
12159 | + | |
12160 | + return xencomm_pa(desc); | |
12161 | +} | |
12162 | + | |
12163 | +struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes, | |
12164 | + struct xencomm_mini *xc_desc) | |
12165 | +{ | |
12166 | + int rc; | |
12167 | + struct xencomm_desc *desc = NULL; | |
12168 | + | |
12169 | + if (is_phys_contiguous((unsigned long)ptr)) | |
12170 | + return xencomm_create_inline(ptr); | |
12171 | + | |
12172 | + rc = xencomm_create_mini(ptr, bytes, xc_desc, | |
12173 | + &desc); | |
12174 | + | |
12175 | + if (rc) | |
12176 | + return NULL; | |
12177 | + | |
12178 | + return xencomm_pa(desc); | |
12179 | +} | |
12180 | Index: head-2008-11-25/drivers/xen/evtchn/Makefile | |
12181 | =================================================================== | |
12182 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
12183 | +++ head-2008-11-25/drivers/xen/evtchn/Makefile 2007-06-12 13:13:44.000000000 +0200 | |
12184 | @@ -0,0 +1,2 @@ | |
12185 | + | |
12186 | +obj-y := evtchn.o | |
12187 | Index: head-2008-11-25/drivers/xen/evtchn/evtchn.c | |
12188 | =================================================================== | |
12189 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
12190 | +++ head-2008-11-25/drivers/xen/evtchn/evtchn.c 2008-08-07 12:44:36.000000000 +0200 | |
12191 | @@ -0,0 +1,560 @@ | |
12192 | +/****************************************************************************** | |
12193 | + * evtchn.c | |
12194 | + * | |
12195 | + * Driver for receiving and demuxing event-channel signals. | |
12196 | + * | |
12197 | + * Copyright (c) 2004-2005, K A Fraser | |
12198 | + * Multi-process extensions Copyright (c) 2004, Steven Smith | |
12199 | + * | |
12200 | + * This program is free software; you can redistribute it and/or | |
12201 | + * modify it under the terms of the GNU General Public License version 2 | |
12202 | + * as published by the Free Software Foundation; or, when distributed | |
12203 | + * separately from the Linux kernel or incorporated into other | |
12204 | + * software packages, subject to the following license: | |
12205 | + * | |
12206 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
12207 | + * of this source file (the "Software"), to deal in the Software without | |
12208 | + * restriction, including without limitation the rights to use, copy, modify, | |
12209 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
12210 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
12211 | + * the following conditions: | |
12212 | + * | |
12213 | + * The above copyright notice and this permission notice shall be included in | |
12214 | + * all copies or substantial portions of the Software. | |
12215 | + * | |
12216 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
12217 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
12218 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
12219 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
12220 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
12221 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
12222 | + * IN THE SOFTWARE. | |
12223 | + */ | |
12224 | + | |
12225 | +#include <linux/module.h> | |
12226 | +#include <linux/kernel.h> | |
12227 | +#include <linux/sched.h> | |
12228 | +#include <linux/slab.h> | |
12229 | +#include <linux/string.h> | |
12230 | +#include <linux/errno.h> | |
12231 | +#include <linux/fs.h> | |
12232 | +#include <linux/errno.h> | |
12233 | +#include <linux/miscdevice.h> | |
12234 | +#include <linux/major.h> | |
12235 | +#include <linux/proc_fs.h> | |
12236 | +#include <linux/stat.h> | |
12237 | +#include <linux/poll.h> | |
12238 | +#include <linux/irq.h> | |
12239 | +#include <linux/init.h> | |
12240 | +#include <linux/gfp.h> | |
12241 | +#include <linux/mutex.h> | |
12242 | +#include <linux/cpu.h> | |
12243 | +#include <xen/evtchn.h> | |
12244 | +#include <xen/public/evtchn.h> | |
12245 | + | |
12246 | +struct per_user_data { | |
12247 | + /* Notification ring, accessed via /dev/xen/evtchn. */ | |
12248 | +#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) | |
12249 | +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) | |
12250 | + evtchn_port_t *ring; | |
12251 | + unsigned int ring_cons, ring_prod, ring_overflow; | |
12252 | + struct mutex ring_cons_mutex; /* protect against concurrent readers */ | |
12253 | + | |
12254 | + /* Processes wait on this queue when ring is empty. */ | |
12255 | + wait_queue_head_t evtchn_wait; | |
12256 | + struct fasync_struct *evtchn_async_queue; | |
12257 | + | |
12258 | + int bind_cpu; | |
12259 | + int nr_event_wrong_delivery; | |
12260 | +}; | |
12261 | + | |
12262 | +/* Who's bound to each port? */ | |
12263 | +static struct per_user_data *port_user[NR_EVENT_CHANNELS]; | |
12264 | +static spinlock_t port_user_lock; | |
12265 | + | |
12266 | +void evtchn_device_upcall(int port) | |
12267 | +{ | |
12268 | + struct per_user_data *u; | |
12269 | + | |
12270 | + spin_lock(&port_user_lock); | |
12271 | + | |
12272 | + mask_evtchn(port); | |
12273 | + clear_evtchn(port); | |
12274 | + | |
12275 | + if ((u = port_user[port]) != NULL) { | |
12276 | + if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { | |
12277 | + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; | |
12278 | + wmb(); /* Ensure ring contents visible */ | |
12279 | + if (u->ring_cons == u->ring_prod++) { | |
12280 | + wake_up_interruptible(&u->evtchn_wait); | |
12281 | + kill_fasync(&u->evtchn_async_queue, | |
12282 | + SIGIO, POLL_IN); | |
12283 | + } | |
12284 | + } else { | |
12285 | + u->ring_overflow = 1; | |
12286 | + } | |
12287 | + } | |
12288 | + | |
12289 | + spin_unlock(&port_user_lock); | |
12290 | +} | |
12291 | + | |
12292 | +static void evtchn_check_wrong_delivery(struct per_user_data *u) | |
12293 | +{ | |
12294 | + evtchn_port_t port; | |
12295 | + unsigned int current_cpu = smp_processor_id(); | |
12296 | + | |
12297 | + /* Delivered to correct CPU? All is good. */ | |
12298 | + if (u->bind_cpu == current_cpu) { | |
12299 | + u->nr_event_wrong_delivery = 0; | |
12300 | + return; | |
12301 | + } | |
12302 | + | |
12303 | + /* Tolerate up to 100 consecutive misdeliveries. */ | |
12304 | + if (++u->nr_event_wrong_delivery < 100) | |
12305 | + return; | |
12306 | + | |
12307 | + spin_lock_irq(&port_user_lock); | |
12308 | + | |
12309 | + for (port = 0; port < NR_EVENT_CHANNELS; port++) | |
12310 | + if (port_user[port] == u) | |
12311 | + rebind_evtchn_to_cpu(port, current_cpu); | |
12312 | + | |
12313 | + u->bind_cpu = current_cpu; | |
12314 | + u->nr_event_wrong_delivery = 0; | |
12315 | + | |
12316 | + spin_unlock_irq(&port_user_lock); | |
12317 | +} | |
12318 | + | |
12319 | +static ssize_t evtchn_read(struct file *file, char __user *buf, | |
12320 | + size_t count, loff_t *ppos) | |
12321 | +{ | |
12322 | + int rc; | |
12323 | + unsigned int c, p, bytes1 = 0, bytes2 = 0; | |
12324 | + struct per_user_data *u = file->private_data; | |
12325 | + | |
12326 | + /* Whole number of ports. */ | |
12327 | + count &= ~(sizeof(evtchn_port_t)-1); | |
12328 | + | |
12329 | + if (count == 0) | |
12330 | + return 0; | |
12331 | + | |
12332 | + if (count > PAGE_SIZE) | |
12333 | + count = PAGE_SIZE; | |
12334 | + | |
12335 | + for (;;) { | |
12336 | + mutex_lock(&u->ring_cons_mutex); | |
12337 | + | |
12338 | + rc = -EFBIG; | |
12339 | + if (u->ring_overflow) | |
12340 | + goto unlock_out; | |
12341 | + | |
12342 | + if ((c = u->ring_cons) != (p = u->ring_prod)) | |
12343 | + break; | |
12344 | + | |
12345 | + mutex_unlock(&u->ring_cons_mutex); | |
12346 | + | |
12347 | + if (file->f_flags & O_NONBLOCK) | |
12348 | + return -EAGAIN; | |
12349 | + | |
12350 | + rc = wait_event_interruptible( | |
12351 | + u->evtchn_wait, u->ring_cons != u->ring_prod); | |
12352 | + if (rc) | |
12353 | + return rc; | |
12354 | + } | |
12355 | + | |
12356 | + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ | |
12357 | + if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { | |
12358 | + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * | |
12359 | + sizeof(evtchn_port_t); | |
12360 | + bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); | |
12361 | + } else { | |
12362 | + bytes1 = (p - c) * sizeof(evtchn_port_t); | |
12363 | + bytes2 = 0; | |
12364 | + } | |
12365 | + | |
12366 | + /* Truncate chunks according to caller's maximum byte count. */ | |
12367 | + if (bytes1 > count) { | |
12368 | + bytes1 = count; | |
12369 | + bytes2 = 0; | |
12370 | + } else if ((bytes1 + bytes2) > count) { | |
12371 | + bytes2 = count - bytes1; | |
12372 | + } | |
12373 | + | |
12374 | + rc = -EFAULT; | |
12375 | + rmb(); /* Ensure that we see the port before we copy it. */ | |
12376 | + if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || | |
12377 | + ((bytes2 != 0) && | |
12378 | + copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) | |
12379 | + goto unlock_out; | |
12380 | + | |
12381 | + evtchn_check_wrong_delivery(u); | |
12382 | + | |
12383 | + u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); | |
12384 | + rc = bytes1 + bytes2; | |
12385 | + | |
12386 | + unlock_out: | |
12387 | + mutex_unlock(&u->ring_cons_mutex); | |
12388 | + return rc; | |
12389 | +} | |
12390 | + | |
12391 | +static ssize_t evtchn_write(struct file *file, const char __user *buf, | |
12392 | + size_t count, loff_t *ppos) | |
12393 | +{ | |
12394 | + int rc, i; | |
12395 | + evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); | |
12396 | + struct per_user_data *u = file->private_data; | |
12397 | + | |
12398 | + if (kbuf == NULL) | |
12399 | + return -ENOMEM; | |
12400 | + | |
12401 | + /* Whole number of ports. */ | |
12402 | + count &= ~(sizeof(evtchn_port_t)-1); | |
12403 | + | |
12404 | + rc = 0; | |
12405 | + if (count == 0) | |
12406 | + goto out; | |
12407 | + | |
12408 | + if (count > PAGE_SIZE) | |
12409 | + count = PAGE_SIZE; | |
12410 | + | |
12411 | + rc = -EFAULT; | |
12412 | + if (copy_from_user(kbuf, buf, count) != 0) | |
12413 | + goto out; | |
12414 | + | |
12415 | + spin_lock_irq(&port_user_lock); | |
12416 | + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) | |
12417 | + if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) | |
12418 | + unmask_evtchn(kbuf[i]); | |
12419 | + spin_unlock_irq(&port_user_lock); | |
12420 | + | |
12421 | + rc = count; | |
12422 | + | |
12423 | + out: | |
12424 | + free_page((unsigned long)kbuf); | |
12425 | + return rc; | |
12426 | +} | |
12427 | + | |
12428 | +static unsigned int next_bind_cpu(cpumask_t map) | |
12429 | +{ | |
12430 | + static unsigned int bind_cpu; | |
12431 | + bind_cpu = next_cpu(bind_cpu, map); | |
12432 | + if (bind_cpu >= NR_CPUS) | |
12433 | + bind_cpu = first_cpu(map); | |
12434 | + return bind_cpu; | |
12435 | +} | |
12436 | + | |
12437 | +static void evtchn_bind_to_user(struct per_user_data *u, int port) | |
12438 | +{ | |
12439 | + spin_lock_irq(&port_user_lock); | |
12440 | + | |
12441 | + BUG_ON(port_user[port] != NULL); | |
12442 | + port_user[port] = u; | |
12443 | + | |
12444 | + if (u->bind_cpu == -1) | |
12445 | + u->bind_cpu = next_bind_cpu(cpu_online_map); | |
12446 | + | |
12447 | + rebind_evtchn_to_cpu(port, u->bind_cpu); | |
12448 | + | |
12449 | + unmask_evtchn(port); | |
12450 | + | |
12451 | + spin_unlock_irq(&port_user_lock); | |
12452 | +} | |
12453 | + | |
12454 | +static long evtchn_ioctl(struct file *file, | |
12455 | + unsigned int cmd, unsigned long arg) | |
12456 | +{ | |
12457 | + int rc; | |
12458 | + struct per_user_data *u = file->private_data; | |
12459 | + void __user *uarg = (void __user *) arg; | |
12460 | + | |
12461 | + switch (cmd) { | |
12462 | + case IOCTL_EVTCHN_BIND_VIRQ: { | |
12463 | + struct ioctl_evtchn_bind_virq bind; | |
12464 | + struct evtchn_bind_virq bind_virq; | |
12465 | + | |
12466 | + rc = -EFAULT; | |
12467 | + if (copy_from_user(&bind, uarg, sizeof(bind))) | |
12468 | + break; | |
12469 | + | |
12470 | + bind_virq.virq = bind.virq; | |
12471 | + bind_virq.vcpu = 0; | |
12472 | + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | |
12473 | + &bind_virq); | |
12474 | + if (rc != 0) | |
12475 | + break; | |
12476 | + | |
12477 | + rc = bind_virq.port; | |
12478 | + evtchn_bind_to_user(u, rc); | |
12479 | + break; | |
12480 | + } | |
12481 | + | |
12482 | + case IOCTL_EVTCHN_BIND_INTERDOMAIN: { | |
12483 | + struct ioctl_evtchn_bind_interdomain bind; | |
12484 | + struct evtchn_bind_interdomain bind_interdomain; | |
12485 | + | |
12486 | + rc = -EFAULT; | |
12487 | + if (copy_from_user(&bind, uarg, sizeof(bind))) | |
12488 | + break; | |
12489 | + | |
12490 | + bind_interdomain.remote_dom = bind.remote_domain; | |
12491 | + bind_interdomain.remote_port = bind.remote_port; | |
12492 | + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, | |
12493 | + &bind_interdomain); | |
12494 | + if (rc != 0) | |
12495 | + break; | |
12496 | + | |
12497 | + rc = bind_interdomain.local_port; | |
12498 | + evtchn_bind_to_user(u, rc); | |
12499 | + break; | |
12500 | + } | |
12501 | + | |
12502 | + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { | |
12503 | + struct ioctl_evtchn_bind_unbound_port bind; | |
12504 | + struct evtchn_alloc_unbound alloc_unbound; | |
12505 | + | |
12506 | + rc = -EFAULT; | |
12507 | + if (copy_from_user(&bind, uarg, sizeof(bind))) | |
12508 | + break; | |
12509 | + | |
12510 | + alloc_unbound.dom = DOMID_SELF; | |
12511 | + alloc_unbound.remote_dom = bind.remote_domain; | |
12512 | + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, | |
12513 | + &alloc_unbound); | |
12514 | + if (rc != 0) | |
12515 | + break; | |
12516 | + | |
12517 | + rc = alloc_unbound.port; | |
12518 | + evtchn_bind_to_user(u, rc); | |
12519 | + break; | |
12520 | + } | |
12521 | + | |
12522 | + case IOCTL_EVTCHN_UNBIND: { | |
12523 | + struct ioctl_evtchn_unbind unbind; | |
12524 | + struct evtchn_close close; | |
12525 | + int ret; | |
12526 | + | |
12527 | + rc = -EFAULT; | |
12528 | + if (copy_from_user(&unbind, uarg, sizeof(unbind))) | |
12529 | + break; | |
12530 | + | |
12531 | + rc = -EINVAL; | |
12532 | + if (unbind.port >= NR_EVENT_CHANNELS) | |
12533 | + break; | |
12534 | + | |
12535 | + spin_lock_irq(&port_user_lock); | |
12536 | + | |
12537 | + rc = -ENOTCONN; | |
12538 | + if (port_user[unbind.port] != u) { | |
12539 | + spin_unlock_irq(&port_user_lock); | |
12540 | + break; | |
12541 | + } | |
12542 | + | |
12543 | + port_user[unbind.port] = NULL; | |
12544 | + mask_evtchn(unbind.port); | |
12545 | + rebind_evtchn_to_cpu(unbind.port, 0); | |
12546 | + | |
12547 | + spin_unlock_irq(&port_user_lock); | |
12548 | + | |
12549 | + close.port = unbind.port; | |
12550 | + ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); | |
12551 | + BUG_ON(ret); | |
12552 | + | |
12553 | + rc = 0; | |
12554 | + break; | |
12555 | + } | |
12556 | + | |
12557 | + case IOCTL_EVTCHN_NOTIFY: { | |
12558 | + struct ioctl_evtchn_notify notify; | |
12559 | + | |
12560 | + rc = -EFAULT; | |
12561 | + if (copy_from_user(¬ify, uarg, sizeof(notify))) | |
12562 | + break; | |
12563 | + | |
12564 | + if (notify.port >= NR_EVENT_CHANNELS) { | |
12565 | + rc = -EINVAL; | |
12566 | + } else if (port_user[notify.port] != u) { | |
12567 | + rc = -ENOTCONN; | |
12568 | + } else { | |
12569 | + notify_remote_via_evtchn(notify.port); | |
12570 | + rc = 0; | |
12571 | + } | |
12572 | + break; | |
12573 | + } | |
12574 | + | |
12575 | + case IOCTL_EVTCHN_RESET: { | |
12576 | + /* Initialise the ring to empty. Clear errors. */ | |
12577 | + mutex_lock(&u->ring_cons_mutex); | |
12578 | + spin_lock_irq(&port_user_lock); | |
12579 | + u->ring_cons = u->ring_prod = u->ring_overflow = 0; | |
12580 | + spin_unlock_irq(&port_user_lock); | |
12581 | + mutex_unlock(&u->ring_cons_mutex); | |
12582 | + rc = 0; | |
12583 | + break; | |
12584 | + } | |
12585 | + | |
12586 | + default: | |
12587 | + rc = -ENOSYS; | |
12588 | + break; | |
12589 | + } | |
12590 | + | |
12591 | + return rc; | |
12592 | +} | |
12593 | + | |
12594 | +static unsigned int evtchn_poll(struct file *file, poll_table *wait) | |
12595 | +{ | |
12596 | + unsigned int mask = POLLOUT | POLLWRNORM; | |
12597 | + struct per_user_data *u = file->private_data; | |
12598 | + | |
12599 | + poll_wait(file, &u->evtchn_wait, wait); | |
12600 | + if (u->ring_cons != u->ring_prod) | |
12601 | + mask |= POLLIN | POLLRDNORM; | |
12602 | + if (u->ring_overflow) | |
12603 | + mask = POLLERR; | |
12604 | + return mask; | |
12605 | +} | |
12606 | + | |
12607 | +static int evtchn_fasync(int fd, struct file *filp, int on) | |
12608 | +{ | |
12609 | + struct per_user_data *u = filp->private_data; | |
12610 | + return fasync_helper(fd, filp, on, &u->evtchn_async_queue); | |
12611 | +} | |
12612 | + | |
12613 | +static int evtchn_open(struct inode *inode, struct file *filp) | |
12614 | +{ | |
12615 | + struct per_user_data *u; | |
12616 | + | |
12617 | + if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL) | |
12618 | + return -ENOMEM; | |
12619 | + | |
12620 | + memset(u, 0, sizeof(*u)); | |
12621 | + init_waitqueue_head(&u->evtchn_wait); | |
12622 | + | |
12623 | + u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); | |
12624 | + if (u->ring == NULL) { | |
12625 | + kfree(u); | |
12626 | + return -ENOMEM; | |
12627 | + } | |
12628 | + | |
12629 | + mutex_init(&u->ring_cons_mutex); | |
12630 | + | |
12631 | + filp->private_data = u; | |
12632 | + | |
12633 | + u->bind_cpu = -1; | |
12634 | + | |
12635 | + return 0; | |
12636 | +} | |
12637 | + | |
12638 | +static int evtchn_release(struct inode *inode, struct file *filp) | |
12639 | +{ | |
12640 | + int i; | |
12641 | + struct per_user_data *u = filp->private_data; | |
12642 | + struct evtchn_close close; | |
12643 | + | |
12644 | + spin_lock_irq(&port_user_lock); | |
12645 | + | |
12646 | + free_page((unsigned long)u->ring); | |
12647 | + | |
12648 | + for (i = 0; i < NR_EVENT_CHANNELS; i++) { | |
12649 | + int ret; | |
12650 | + if (port_user[i] != u) | |
12651 | + continue; | |
12652 | + | |
12653 | + port_user[i] = NULL; | |
12654 | + mask_evtchn(i); | |
12655 | + rebind_evtchn_to_cpu(i, 0); | |
12656 | + | |
12657 | + close.port = i; | |
12658 | + ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); | |
12659 | + BUG_ON(ret); | |
12660 | + } | |
12661 | + | |
12662 | + spin_unlock_irq(&port_user_lock); | |
12663 | + | |
12664 | + kfree(u); | |
12665 | + | |
12666 | + return 0; | |
12667 | +} | |
12668 | + | |
12669 | +static const struct file_operations evtchn_fops = { | |
12670 | + .owner = THIS_MODULE, | |
12671 | + .read = evtchn_read, | |
12672 | + .write = evtchn_write, | |
12673 | + .unlocked_ioctl = evtchn_ioctl, | |
12674 | + .poll = evtchn_poll, | |
12675 | + .fasync = evtchn_fasync, | |
12676 | + .open = evtchn_open, | |
12677 | + .release = evtchn_release, | |
12678 | +}; | |
12679 | + | |
12680 | +static struct miscdevice evtchn_miscdev = { | |
12681 | + .minor = MISC_DYNAMIC_MINOR, | |
12682 | + .name = "evtchn", | |
12683 | + .fops = &evtchn_fops, | |
12684 | +}; | |
12685 | + | |
12686 | +static int __cpuinit evtchn_cpu_notify(struct notifier_block *nfb, | |
12687 | + unsigned long action, void *hcpu) | |
12688 | +{ | |
12689 | + int hotcpu = (unsigned long)hcpu; | |
12690 | + cpumask_t map = cpu_online_map; | |
12691 | + int port, newcpu; | |
12692 | + struct per_user_data *u; | |
12693 | + | |
12694 | + switch (action) { | |
12695 | + case CPU_DOWN_PREPARE: | |
12696 | + cpu_clear(hotcpu, map); | |
12697 | + spin_lock_irq(&port_user_lock); | |
12698 | + for (port = 0; port < NR_EVENT_CHANNELS; port++) { | |
12699 | + if ((u = port_user[port]) != NULL && | |
12700 | + u->bind_cpu == hotcpu && | |
12701 | + (newcpu = next_bind_cpu(map)) < NR_CPUS) { | |
12702 | + rebind_evtchn_to_cpu(port, newcpu); | |
12703 | + u->bind_cpu = newcpu; | |
12704 | + } | |
12705 | + } | |
12706 | + spin_unlock_irq(&port_user_lock); | |
12707 | + break; | |
12708 | + default: | |
12709 | + return NOTIFY_DONE; | |
12710 | + } | |
12711 | + return NOTIFY_OK; | |
12712 | +} | |
12713 | + | |
12714 | +static struct notifier_block __cpuinitdata evtchn_cpu_nfb = { | |
12715 | + .notifier_call = evtchn_cpu_notify | |
12716 | +}; | |
12717 | + | |
12718 | +static int __init evtchn_init(void) | |
12719 | +{ | |
12720 | + int err; | |
12721 | + | |
12722 | + if (!is_running_on_xen()) | |
12723 | + return -ENODEV; | |
12724 | + | |
12725 | + spin_lock_init(&port_user_lock); | |
12726 | + memset(port_user, 0, sizeof(port_user)); | |
12727 | + | |
12728 | + /* Create '/dev/misc/evtchn'. */ | |
12729 | + err = misc_register(&evtchn_miscdev); | |
12730 | + if (err != 0) { | |
12731 | + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); | |
12732 | + return err; | |
12733 | + } | |
12734 | + | |
12735 | + register_cpu_notifier(&evtchn_cpu_nfb); | |
12736 | + | |
12737 | + printk("Event-channel device installed.\n"); | |
12738 | + | |
12739 | + return 0; | |
12740 | +} | |
12741 | + | |
12742 | +static void __exit evtchn_cleanup(void) | |
12743 | +{ | |
12744 | + misc_deregister(&evtchn_miscdev); | |
12745 | + unregister_cpu_notifier(&evtchn_cpu_nfb); | |
12746 | +} | |
12747 | + | |
12748 | +module_init(evtchn_init); | |
12749 | +module_exit(evtchn_cleanup); | |
12750 | + | |
12751 | +MODULE_LICENSE("Dual BSD/GPL"); | |
12752 | Index: head-2008-11-25/drivers/xen/fbfront/Makefile | |
12753 | =================================================================== | |
12754 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
12755 | +++ head-2008-11-25/drivers/xen/fbfront/Makefile 2007-06-12 13:13:45.000000000 +0200 | |
12756 | @@ -0,0 +1,2 @@ | |
12757 | +obj-$(CONFIG_XEN_FRAMEBUFFER) := xenfb.o | |
12758 | +obj-$(CONFIG_XEN_KEYBOARD) += xenkbd.o | |
12759 | Index: head-2008-11-25/drivers/xen/fbfront/xenfb.c | |
12760 | =================================================================== | |
12761 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
12762 | +++ head-2008-11-25/drivers/xen/fbfront/xenfb.c 2008-11-25 12:22:34.000000000 +0100 | |
12763 | @@ -0,0 +1,887 @@ | |
12764 | +/* | |
12765 | + * linux/drivers/video/xenfb.c -- Xen para-virtual frame buffer device | |
12766 | + * | |
12767 | + * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com> | |
12768 | + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> | |
12769 | + * | |
12770 | + * Based on linux/drivers/video/q40fb.c | |
12771 | + * | |
12772 | + * This file is subject to the terms and conditions of the GNU General Public | |
12773 | + * License. See the file COPYING in the main directory of this archive for | |
12774 | + * more details. | |
12775 | + */ | |
12776 | + | |
12777 | +/* | |
12778 | + * TODO: | |
12779 | + * | |
12780 | + * Switch to grant tables when they become capable of dealing with the | |
12781 | + * frame buffer. | |
12782 | + */ | |
12783 | + | |
12784 | +#include <linux/kernel.h> | |
12785 | +#include <linux/errno.h> | |
12786 | +#include <linux/fb.h> | |
12787 | +#include <linux/module.h> | |
12788 | +#include <linux/vmalloc.h> | |
12789 | +#include <linux/mm.h> | |
12790 | +#include <linux/mutex.h> | |
12791 | +#include <asm/hypervisor.h> | |
12792 | +#include <xen/evtchn.h> | |
12793 | +#include <xen/interface/io/fbif.h> | |
12794 | +#include <xen/interface/io/protocols.h> | |
12795 | +#include <xen/xenbus.h> | |
12796 | +#include <linux/kthread.h> | |
12797 | + | |
12798 | +struct xenfb_mapping | |
12799 | +{ | |
12800 | + struct list_head link; | |
12801 | + struct vm_area_struct *vma; | |
12802 | + atomic_t map_refs; | |
12803 | + int faults; | |
12804 | + struct xenfb_info *info; | |
12805 | +}; | |
12806 | + | |
12807 | +struct xenfb_info | |
12808 | +{ | |
12809 | + struct task_struct *kthread; | |
12810 | + wait_queue_head_t wq; | |
12811 | + | |
12812 | + unsigned char *fb; | |
12813 | + struct fb_info *fb_info; | |
12814 | + struct timer_list refresh; | |
12815 | + int dirty; | |
12816 | + int x1, y1, x2, y2; /* dirty rectangle, | |
12817 | + protected by dirty_lock */ | |
12818 | + spinlock_t dirty_lock; | |
12819 | + struct mutex mm_lock; | |
12820 | + int nr_pages; | |
12821 | + struct page **pages; | |
12822 | + struct list_head mappings; /* protected by mm_lock */ | |
12823 | + | |
12824 | + int irq; | |
12825 | + struct xenfb_page *page; | |
12826 | + unsigned long *mfns; | |
12827 | + int update_wanted; /* XENFB_TYPE_UPDATE wanted */ | |
12828 | + int feature_resize; /* Backend has resize feature */ | |
12829 | + struct xenfb_resize resize; | |
12830 | + int resize_dpy; | |
12831 | + spinlock_t resize_lock; | |
12832 | + | |
12833 | + struct xenbus_device *xbdev; | |
12834 | +}; | |
12835 | + | |
12836 | +/* | |
12837 | + * There are three locks: | |
12838 | + * spinlock resize_lock protecting resize_dpy and resize | |
12839 | + * spinlock dirty_lock protecting the dirty rectangle | |
12840 | + * mutex mm_lock protecting mappings. | |
12841 | + * | |
12842 | + * How the dirty and mapping locks work together | |
12843 | + * | |
12844 | + * The problem is that dirty rectangle and mappings aren't | |
12845 | + * independent: the dirty rectangle must cover all faulted pages in | |
12846 | + * mappings. We need to prove that our locking maintains this | |
12847 | + * invariant. | |
12848 | + * | |
12849 | + * There are several kinds of critical regions: | |
12850 | + * | |
12851 | + * 1. Holding only dirty_lock: xenfb_refresh(). May run in | |
12852 | + * interrupts. Extends the dirty rectangle. Trivially preserves | |
12853 | + * invariant. | |
12854 | + * | |
12855 | + * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close(). Touch | |
12856 | + * only mappings. The former creates unfaulted pages. Preserves | |
12857 | + * invariant. The latter removes pages. Preserves invariant. | |
12858 | + * | |
12859 | + * 3. Holding both locks: xenfb_vm_nopage(). Extends the dirty | |
12860 | + * rectangle and updates mappings consistently. Preserves | |
12861 | + * invariant. | |
12862 | + * | |
12863 | + * 4. The ugliest one: xenfb_update_screen(). Clear the dirty | |
12864 | + * rectangle and update mappings consistently. | |
12865 | + * | |
12866 | + * We can't simply hold both locks, because zap_page_range() cannot | |
12867 | + * be called with a spinlock held. | |
12868 | + * | |
12869 | + * Therefore, we first clear the dirty rectangle with both locks | |
12870 | + * held. Then we unlock dirty_lock and update the mappings. | |
12871 | + * Critical regions that hold only dirty_lock may interfere with | |
12872 | + * that. This can only be region 1: xenfb_refresh(). But that | |
12873 | + * just extends the dirty rectangle, which can't harm the | |
12874 | + * invariant. | |
12875 | + * | |
12876 | + * But FIXME: the invariant is too weak. It misses that the fault | |
12877 | + * record in mappings must be consistent with the mapping of pages in | |
12878 | + * the associated address space! do_no_page() updates the PTE after | |
12879 | + * xenfb_vm_nopage() returns, i.e. outside the critical region. This | |
12880 | + * allows the following race: | |
12881 | + * | |
12882 | + * X writes to some address in the Xen frame buffer | |
12883 | + * Fault - call do_no_page() | |
12884 | + * call xenfb_vm_nopage() | |
12885 | + * grab mm_lock | |
12886 | + * map->faults++; | |
12887 | + * release mm_lock | |
12888 | + * return back to do_no_page() | |
12889 | + * (preempted, or SMP) | |
12890 | + * Xen worker thread runs. | |
12891 | + * grab mm_lock | |
12892 | + * look at mappings | |
12893 | + * find this mapping, zaps its pages (but page not in pte yet) | |
12894 | + * clear map->faults | |
12895 | + * releases mm_lock | |
12896 | + * (back to X process) | |
12897 | + * put page in X's pte | |
12898 | + * | |
12899 | + * Oh well, we wont be updating the writes to this page anytime soon. | |
12900 | + */ | |
12901 | +#define MB_ (1024*1024) | |
12902 | +#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8) | |
12903 | + | |
12904 | +enum {KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT}; | |
12905 | +static int video[KPARAM_CNT] = {2, XENFB_WIDTH, XENFB_HEIGHT}; | |
12906 | +module_param_array(video, int, NULL, 0); | |
12907 | +MODULE_PARM_DESC(video, | |
12908 | + "Size of video memory in MB and width,height in pixels, default = (2,800,600)"); | |
12909 | + | |
12910 | +static int xenfb_fps = 20; | |
12911 | + | |
12912 | +static int xenfb_remove(struct xenbus_device *); | |
12913 | +static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); | |
12914 | +static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); | |
12915 | +static void xenfb_disconnect_backend(struct xenfb_info *); | |
12916 | + | |
12917 | +static void xenfb_send_event(struct xenfb_info *info, | |
12918 | + union xenfb_out_event *event) | |
12919 | +{ | |
12920 | + __u32 prod; | |
12921 | + | |
12922 | + prod = info->page->out_prod; | |
12923 | + /* caller ensures !xenfb_queue_full() */ | |
12924 | + mb(); /* ensure ring space available */ | |
12925 | + XENFB_OUT_RING_REF(info->page, prod) = *event; | |
12926 | + wmb(); /* ensure ring contents visible */ | |
12927 | + info->page->out_prod = prod + 1; | |
12928 | + | |
12929 | + notify_remote_via_irq(info->irq); | |
12930 | +} | |
12931 | + | |
12932 | +static void xenfb_do_update(struct xenfb_info *info, | |
12933 | + int x, int y, int w, int h) | |
12934 | +{ | |
12935 | + union xenfb_out_event event; | |
12936 | + | |
12937 | + memset(&event, 0, sizeof(event)); | |
12938 | + event.type = XENFB_TYPE_UPDATE; | |
12939 | + event.update.x = x; | |
12940 | + event.update.y = y; | |
12941 | + event.update.width = w; | |
12942 | + event.update.height = h; | |
12943 | + | |
12944 | + /* caller ensures !xenfb_queue_full() */ | |
12945 | + xenfb_send_event(info, &event); | |
12946 | +} | |
12947 | + | |
12948 | +static void xenfb_do_resize(struct xenfb_info *info) | |
12949 | +{ | |
12950 | + union xenfb_out_event event; | |
12951 | + | |
12952 | + memset(&event, 0, sizeof(event)); | |
12953 | + event.resize = info->resize; | |
12954 | + | |
12955 | + /* caller ensures !xenfb_queue_full() */ | |
12956 | + xenfb_send_event(info, &event); | |
12957 | +} | |
12958 | + | |
12959 | +static int xenfb_queue_full(struct xenfb_info *info) | |
12960 | +{ | |
12961 | + __u32 cons, prod; | |
12962 | + | |
12963 | + prod = info->page->out_prod; | |
12964 | + cons = info->page->out_cons; | |
12965 | + return prod - cons == XENFB_OUT_RING_LEN; | |
12966 | +} | |
12967 | + | |
12968 | +static void xenfb_update_screen(struct xenfb_info *info) | |
12969 | +{ | |
12970 | + unsigned long flags; | |
12971 | + int y1, y2, x1, x2; | |
12972 | + struct xenfb_mapping *map; | |
12973 | + | |
12974 | + if (!info->update_wanted) | |
12975 | + return; | |
12976 | + if (xenfb_queue_full(info)) | |
12977 | + return; | |
12978 | + | |
12979 | + mutex_lock(&info->mm_lock); | |
12980 | + | |
12981 | + spin_lock_irqsave(&info->dirty_lock, flags); | |
12982 | + y1 = info->y1; | |
12983 | + y2 = info->y2; | |
12984 | + x1 = info->x1; | |
12985 | + x2 = info->x2; | |
12986 | + info->x1 = info->y1 = INT_MAX; | |
12987 | + info->x2 = info->y2 = 0; | |
12988 | + spin_unlock_irqrestore(&info->dirty_lock, flags); | |
12989 | + | |
12990 | + list_for_each_entry(map, &info->mappings, link) { | |
12991 | + if (!map->faults) | |
12992 | + continue; | |
12993 | + zap_page_range(map->vma, map->vma->vm_start, | |
12994 | + map->vma->vm_end - map->vma->vm_start, NULL); | |
12995 | + map->faults = 0; | |
12996 | + } | |
12997 | + | |
12998 | + mutex_unlock(&info->mm_lock); | |
12999 | + | |
13000 | + if (x2 < x1 || y2 < y1) { | |
13001 | + printk("xenfb_update_screen bogus rect %d %d %d %d\n", | |
13002 | + x1, x2, y1, y2); | |
13003 | + WARN_ON(1); | |
13004 | + } | |
13005 | + xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1); | |
13006 | +} | |
13007 | + | |
13008 | +static void xenfb_handle_resize_dpy(struct xenfb_info *info) | |
13009 | +{ | |
13010 | + unsigned long flags; | |
13011 | + | |
13012 | + spin_lock_irqsave(&info->resize_lock, flags); | |
13013 | + if (info->resize_dpy) { | |
13014 | + if (!xenfb_queue_full(info)) { | |
13015 | + info->resize_dpy = 0; | |
13016 | + xenfb_do_resize(info); | |
13017 | + } | |
13018 | + } | |
13019 | + spin_unlock_irqrestore(&info->resize_lock, flags); | |
13020 | +} | |
13021 | + | |
13022 | +static int xenfb_thread(void *data) | |
13023 | +{ | |
13024 | + struct xenfb_info *info = data; | |
13025 | + | |
13026 | + while (!kthread_should_stop()) { | |
13027 | + xenfb_handle_resize_dpy(info); | |
13028 | + if (info->dirty) { | |
13029 | + info->dirty = 0; | |
13030 | + xenfb_update_screen(info); | |
13031 | + } | |
13032 | + wait_event_interruptible(info->wq, | |
13033 | + kthread_should_stop() || info->dirty); | |
13034 | + try_to_freeze(); | |
13035 | + } | |
13036 | + return 0; | |
13037 | +} | |
13038 | + | |
13039 | +static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green, | |
13040 | + unsigned blue, unsigned transp, | |
13041 | + struct fb_info *info) | |
13042 | +{ | |
13043 | + u32 v; | |
13044 | + | |
13045 | + if (regno > info->cmap.len) | |
13046 | + return 1; | |
13047 | + | |
13048 | + red >>= (16 - info->var.red.length); | |
13049 | + green >>= (16 - info->var.green.length); | |
13050 | + blue >>= (16 - info->var.blue.length); | |
13051 | + | |
13052 | + v = (red << info->var.red.offset) | | |
13053 | + (green << info->var.green.offset) | | |
13054 | + (blue << info->var.blue.offset); | |
13055 | + | |
13056 | + /* FIXME is this sane? check against xxxfb_setcolreg()! */ | |
13057 | + switch (info->var.bits_per_pixel) { | |
13058 | + case 16: | |
13059 | + case 24: | |
13060 | + case 32: | |
13061 | + ((u32 *)info->pseudo_palette)[regno] = v; | |
13062 | + break; | |
13063 | + } | |
13064 | + | |
13065 | + return 0; | |
13066 | +} | |
13067 | + | |
13068 | +static void xenfb_timer(unsigned long data) | |
13069 | +{ | |
13070 | + struct xenfb_info *info = (struct xenfb_info *)data; | |
13071 | + wake_up(&info->wq); | |
13072 | +} | |
13073 | + | |
13074 | +static void __xenfb_refresh(struct xenfb_info *info, | |
13075 | + int x1, int y1, int w, int h) | |
13076 | +{ | |
13077 | + int y2, x2; | |
13078 | + | |
13079 | + y2 = y1 + h; | |
13080 | + x2 = x1 + w; | |
13081 | + | |
13082 | + if (info->y1 > y1) | |
13083 | + info->y1 = y1; | |
13084 | + if (info->y2 < y2) | |
13085 | + info->y2 = y2; | |
13086 | + if (info->x1 > x1) | |
13087 | + info->x1 = x1; | |
13088 | + if (info->x2 < x2) | |
13089 | + info->x2 = x2; | |
13090 | + info->dirty = 1; | |
13091 | + | |
13092 | + if (timer_pending(&info->refresh)) | |
13093 | + return; | |
13094 | + | |
13095 | + mod_timer(&info->refresh, jiffies + HZ/xenfb_fps); | |
13096 | +} | |
13097 | + | |
13098 | +static void xenfb_refresh(struct xenfb_info *info, | |
13099 | + int x1, int y1, int w, int h) | |
13100 | +{ | |
13101 | + unsigned long flags; | |
13102 | + | |
13103 | + spin_lock_irqsave(&info->dirty_lock, flags); | |
13104 | + __xenfb_refresh(info, x1, y1, w, h); | |
13105 | + spin_unlock_irqrestore(&info->dirty_lock, flags); | |
13106 | +} | |
13107 | + | |
13108 | +static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect) | |
13109 | +{ | |
13110 | + struct xenfb_info *info = p->par; | |
13111 | + | |
13112 | + cfb_fillrect(p, rect); | |
13113 | + xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height); | |
13114 | +} | |
13115 | + | |
13116 | +static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image) | |
13117 | +{ | |
13118 | + struct xenfb_info *info = p->par; | |
13119 | + | |
13120 | + cfb_imageblit(p, image); | |
13121 | + xenfb_refresh(info, image->dx, image->dy, image->width, image->height); | |
13122 | +} | |
13123 | + | |
13124 | +static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) | |
13125 | +{ | |
13126 | + struct xenfb_info *info = p->par; | |
13127 | + | |
13128 | + cfb_copyarea(p, area); | |
13129 | + xenfb_refresh(info, area->dx, area->dy, area->width, area->height); | |
13130 | +} | |
13131 | + | |
13132 | +static void xenfb_vm_open(struct vm_area_struct *vma) | |
13133 | +{ | |
13134 | + struct xenfb_mapping *map = vma->vm_private_data; | |
13135 | + atomic_inc(&map->map_refs); | |
13136 | +} | |
13137 | + | |
13138 | +static void xenfb_vm_close(struct vm_area_struct *vma) | |
13139 | +{ | |
13140 | + struct xenfb_mapping *map = vma->vm_private_data; | |
13141 | + struct xenfb_info *info = map->info; | |
13142 | + | |
13143 | + mutex_lock(&info->mm_lock); | |
13144 | + if (atomic_dec_and_test(&map->map_refs)) { | |
13145 | + list_del(&map->link); | |
13146 | + kfree(map); | |
13147 | + } | |
13148 | + mutex_unlock(&info->mm_lock); | |
13149 | +} | |
13150 | + | |
13151 | +static struct page *xenfb_vm_nopage(struct vm_area_struct *vma, | |
13152 | + unsigned long vaddr, int *type) | |
13153 | +{ | |
13154 | + struct xenfb_mapping *map = vma->vm_private_data; | |
13155 | + struct xenfb_info *info = map->info; | |
13156 | + int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT; | |
13157 | + unsigned long flags; | |
13158 | + struct page *page; | |
13159 | + int y1, y2; | |
13160 | + | |
13161 | + if (pgnr >= info->nr_pages) | |
13162 | + return NOPAGE_SIGBUS; | |
13163 | + | |
13164 | + mutex_lock(&info->mm_lock); | |
13165 | + spin_lock_irqsave(&info->dirty_lock, flags); | |
13166 | + page = info->pages[pgnr]; | |
13167 | + get_page(page); | |
13168 | + map->faults++; | |
13169 | + | |
13170 | + y1 = pgnr * PAGE_SIZE / info->fb_info->fix.line_length; | |
13171 | + y2 = (pgnr * PAGE_SIZE + PAGE_SIZE - 1) / info->fb_info->fix.line_length; | |
13172 | + if (y2 > info->fb_info->var.yres) | |
13173 | + y2 = info->fb_info->var.yres; | |
13174 | + __xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1); | |
13175 | + spin_unlock_irqrestore(&info->dirty_lock, flags); | |
13176 | + mutex_unlock(&info->mm_lock); | |
13177 | + | |
13178 | + if (type) | |
13179 | + *type = VM_FAULT_MINOR; | |
13180 | + | |
13181 | + return page; | |
13182 | +} | |
13183 | + | |
13184 | +static struct vm_operations_struct xenfb_vm_ops = { | |
13185 | + .open = xenfb_vm_open, | |
13186 | + .close = xenfb_vm_close, | |
13187 | + .nopage = xenfb_vm_nopage, | |
13188 | +}; | |
13189 | + | |
13190 | +static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma) | |
13191 | +{ | |
13192 | + struct xenfb_info *info = fb_info->par; | |
13193 | + struct xenfb_mapping *map; | |
13194 | + int map_pages; | |
13195 | + | |
13196 | + if (!(vma->vm_flags & VM_WRITE)) | |
13197 | + return -EINVAL; | |
13198 | + if (!(vma->vm_flags & VM_SHARED)) | |
13199 | + return -EINVAL; | |
13200 | + if (vma->vm_pgoff != 0) | |
13201 | + return -EINVAL; | |
13202 | + | |
13203 | + map_pages = (vma->vm_end - vma->vm_start + PAGE_SIZE-1) >> PAGE_SHIFT; | |
13204 | + if (map_pages > info->nr_pages) | |
13205 | + return -EINVAL; | |
13206 | + | |
13207 | + map = kzalloc(sizeof(*map), GFP_KERNEL); | |
13208 | + if (map == NULL) | |
13209 | + return -ENOMEM; | |
13210 | + | |
13211 | + map->vma = vma; | |
13212 | + map->faults = 0; | |
13213 | + map->info = info; | |
13214 | + atomic_set(&map->map_refs, 1); | |
13215 | + | |
13216 | + mutex_lock(&info->mm_lock); | |
13217 | + list_add(&map->link, &info->mappings); | |
13218 | + mutex_unlock(&info->mm_lock); | |
13219 | + | |
13220 | + vma->vm_ops = &xenfb_vm_ops; | |
13221 | + vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED); | |
13222 | + vma->vm_private_data = map; | |
13223 | + | |
13224 | + return 0; | |
13225 | +} | |
13226 | + | |
13227 | +static int | |
13228 | +xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) | |
13229 | +{ | |
13230 | + struct xenfb_info *xenfb_info; | |
13231 | + int required_mem_len; | |
13232 | + | |
13233 | + xenfb_info = info->par; | |
13234 | + | |
13235 | + if (!xenfb_info->feature_resize) { | |
13236 | + if (var->xres == video[KPARAM_WIDTH] && | |
13237 | + var->yres == video[KPARAM_HEIGHT] && | |
13238 | + var->bits_per_pixel == xenfb_info->page->depth) { | |
13239 | + return 0; | |
13240 | + } | |
13241 | + return -EINVAL; | |
13242 | + } | |
13243 | + | |
13244 | + /* Can't resize past initial width and height */ | |
13245 | + if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT]) | |
13246 | + return -EINVAL; | |
13247 | + | |
13248 | + required_mem_len = var->xres * var->yres * (xenfb_info->page->depth / 8); | |
13249 | + if (var->bits_per_pixel == xenfb_info->page->depth && | |
13250 | + var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) && | |
13251 | + required_mem_len <= info->fix.smem_len) { | |
13252 | + var->xres_virtual = var->xres; | |
13253 | + var->yres_virtual = var->yres; | |
13254 | + return 0; | |
13255 | + } | |
13256 | + return -EINVAL; | |
13257 | +} | |
13258 | + | |
13259 | +static int xenfb_set_par(struct fb_info *info) | |
13260 | +{ | |
13261 | + struct xenfb_info *xenfb_info; | |
13262 | + unsigned long flags; | |
13263 | + | |
13264 | + xenfb_info = info->par; | |
13265 | + | |
13266 | + spin_lock_irqsave(&xenfb_info->resize_lock, flags); | |
13267 | + xenfb_info->resize.type = XENFB_TYPE_RESIZE; | |
13268 | + xenfb_info->resize.width = info->var.xres; | |
13269 | + xenfb_info->resize.height = info->var.yres; | |
13270 | + xenfb_info->resize.stride = info->fix.line_length; | |
13271 | + xenfb_info->resize.depth = info->var.bits_per_pixel; | |
13272 | + xenfb_info->resize.offset = 0; | |
13273 | + xenfb_info->resize_dpy = 1; | |
13274 | + spin_unlock_irqrestore(&xenfb_info->resize_lock, flags); | |
13275 | + return 0; | |
13276 | +} | |
13277 | + | |
13278 | +static struct fb_ops xenfb_fb_ops = { | |
13279 | + .owner = THIS_MODULE, | |
13280 | + .fb_setcolreg = xenfb_setcolreg, | |
13281 | + .fb_fillrect = xenfb_fillrect, | |
13282 | + .fb_copyarea = xenfb_copyarea, | |
13283 | + .fb_imageblit = xenfb_imageblit, | |
13284 | + .fb_mmap = xenfb_mmap, | |
13285 | + .fb_check_var = xenfb_check_var, | |
13286 | + .fb_set_par = xenfb_set_par, | |
13287 | +}; | |
13288 | + | |
13289 | +static irqreturn_t xenfb_event_handler(int rq, void *dev_id, | |
13290 | + struct pt_regs *regs) | |
13291 | +{ | |
13292 | + /* | |
13293 | + * No in events recognized, simply ignore them all. | |
13294 | + * If you need to recognize some, see xenbkd's input_handler() | |
13295 | + * for how to do that. | |
13296 | + */ | |
13297 | + struct xenfb_info *info = dev_id; | |
13298 | + struct xenfb_page *page = info->page; | |
13299 | + | |
13300 | + if (page->in_cons != page->in_prod) { | |
13301 | + info->page->in_cons = info->page->in_prod; | |
13302 | + notify_remote_via_irq(info->irq); | |
13303 | + } | |
13304 | + return IRQ_HANDLED; | |
13305 | +} | |
13306 | + | |
13307 | +static unsigned long vmalloc_to_mfn(void *address) | |
13308 | +{ | |
13309 | + return pfn_to_mfn(vmalloc_to_pfn(address)); | |
13310 | +} | |
13311 | + | |
13312 | +static int __devinit xenfb_probe(struct xenbus_device *dev, | |
13313 | + const struct xenbus_device_id *id) | |
13314 | +{ | |
13315 | + struct xenfb_info *info; | |
13316 | + struct fb_info *fb_info; | |
13317 | + int fb_size; | |
13318 | + int val; | |
13319 | + int ret; | |
13320 | + | |
13321 | + info = kzalloc(sizeof(*info), GFP_KERNEL); | |
13322 | + if (info == NULL) { | |
13323 | + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | |
13324 | + return -ENOMEM; | |
13325 | + } | |
13326 | + | |
13327 | + /* Limit kernel param videoram amount to what is in xenstore */ | |
13328 | + if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) { | |
13329 | + if (val < video[KPARAM_MEM]) | |
13330 | + video[KPARAM_MEM] = val; | |
13331 | + } | |
13332 | + | |
13333 | + /* If requested res does not fit in available memory, use default */ | |
13334 | + fb_size = video[KPARAM_MEM] * MB_; | |
13335 | + if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH/8 > fb_size) { | |
13336 | + video[KPARAM_WIDTH] = XENFB_WIDTH; | |
13337 | + video[KPARAM_HEIGHT] = XENFB_HEIGHT; | |
13338 | + fb_size = XENFB_DEFAULT_FB_LEN; | |
13339 | + } | |
13340 | + | |
13341 | + dev->dev.driver_data = info; | |
13342 | + info->xbdev = dev; | |
13343 | + info->irq = -1; | |
13344 | + info->x1 = info->y1 = INT_MAX; | |
13345 | + spin_lock_init(&info->dirty_lock); | |
13346 | + spin_lock_init(&info->resize_lock); | |
13347 | + mutex_init(&info->mm_lock); | |
13348 | + init_waitqueue_head(&info->wq); | |
13349 | + init_timer(&info->refresh); | |
13350 | + info->refresh.function = xenfb_timer; | |
13351 | + info->refresh.data = (unsigned long)info; | |
13352 | + INIT_LIST_HEAD(&info->mappings); | |
13353 | + | |
13354 | + info->fb = vmalloc(fb_size); | |
13355 | + if (info->fb == NULL) | |
13356 | + goto error_nomem; | |
13357 | + memset(info->fb, 0, fb_size); | |
13358 | + | |
13359 | + info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
13360 | + | |
13361 | + info->pages = kmalloc(sizeof(struct page *) * info->nr_pages, | |
13362 | + GFP_KERNEL); | |
13363 | + if (info->pages == NULL) | |
13364 | + goto error_nomem; | |
13365 | + | |
13366 | + info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); | |
13367 | + if (!info->mfns) | |
13368 | + goto error_nomem; | |
13369 | + | |
13370 | + /* set up shared page */ | |
13371 | + info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | |
13372 | + if (!info->page) | |
13373 | + goto error_nomem; | |
13374 | + | |
13375 | + fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); | |
13376 | + /* see fishy hackery below */ | |
13377 | + if (fb_info == NULL) | |
13378 | + goto error_nomem; | |
13379 | + | |
13380 | + /* FIXME fishy hackery */ | |
13381 | + fb_info->pseudo_palette = fb_info->par; | |
13382 | + fb_info->par = info; | |
13383 | + /* /FIXME */ | |
13384 | + fb_info->screen_base = info->fb; | |
13385 | + | |
13386 | + fb_info->fbops = &xenfb_fb_ops; | |
13387 | + fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH]; | |
13388 | + fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT]; | |
13389 | + fb_info->var.bits_per_pixel = XENFB_DEPTH; | |
13390 | + | |
13391 | + fb_info->var.red = (struct fb_bitfield){16, 8, 0}; | |
13392 | + fb_info->var.green = (struct fb_bitfield){8, 8, 0}; | |
13393 | + fb_info->var.blue = (struct fb_bitfield){0, 8, 0}; | |
13394 | + | |
13395 | + fb_info->var.activate = FB_ACTIVATE_NOW; | |
13396 | + fb_info->var.height = -1; | |
13397 | + fb_info->var.width = -1; | |
13398 | + fb_info->var.vmode = FB_VMODE_NONINTERLACED; | |
13399 | + | |
13400 | + fb_info->fix.visual = FB_VISUAL_TRUECOLOR; | |
13401 | + fb_info->fix.line_length = fb_info->var.xres * (XENFB_DEPTH / 8); | |
13402 | + fb_info->fix.smem_start = 0; | |
13403 | + fb_info->fix.smem_len = fb_size; | |
13404 | + strcpy(fb_info->fix.id, "xen"); | |
13405 | + fb_info->fix.type = FB_TYPE_PACKED_PIXELS; | |
13406 | + fb_info->fix.accel = FB_ACCEL_NONE; | |
13407 | + | |
13408 | + fb_info->flags = FBINFO_FLAG_DEFAULT; | |
13409 | + | |
13410 | + ret = fb_alloc_cmap(&fb_info->cmap, 256, 0); | |
13411 | + if (ret < 0) { | |
13412 | + framebuffer_release(fb_info); | |
13413 | + xenbus_dev_fatal(dev, ret, "fb_alloc_cmap"); | |
13414 | + goto error; | |
13415 | + } | |
13416 | + | |
13417 | + xenfb_init_shared_page(info, fb_info); | |
13418 | + | |
13419 | + ret = register_framebuffer(fb_info); | |
13420 | + if (ret) { | |
13421 | + fb_dealloc_cmap(&info->fb_info->cmap); | |
13422 | + framebuffer_release(fb_info); | |
13423 | + xenbus_dev_fatal(dev, ret, "register_framebuffer"); | |
13424 | + goto error; | |
13425 | + } | |
13426 | + info->fb_info = fb_info; | |
13427 | + | |
13428 | + ret = xenfb_connect_backend(dev, info); | |
13429 | + if (ret < 0) | |
13430 | + goto error; | |
13431 | + | |
13432 | + /* FIXME should this be delayed until backend XenbusStateConnected? */ | |
13433 | + info->kthread = kthread_run(xenfb_thread, info, "xenfb thread"); | |
13434 | + if (IS_ERR(info->kthread)) { | |
13435 | + ret = PTR_ERR(info->kthread); | |
13436 | + info->kthread = NULL; | |
13437 | + xenbus_dev_fatal(dev, ret, "register_framebuffer"); | |
13438 | + goto error; | |
13439 | + } | |
13440 | + | |
13441 | + return 0; | |
13442 | + | |
13443 | + error_nomem: | |
13444 | + ret = -ENOMEM; | |
13445 | + xenbus_dev_fatal(dev, ret, "allocating device memory"); | |
13446 | + error: | |
13447 | + xenfb_remove(dev); | |
13448 | + return ret; | |
13449 | +} | |
13450 | + | |
13451 | +static int xenfb_resume(struct xenbus_device *dev) | |
13452 | +{ | |
13453 | + struct xenfb_info *info = dev->dev.driver_data; | |
13454 | + | |
13455 | + xenfb_disconnect_backend(info); | |
13456 | + xenfb_init_shared_page(info, info->fb_info); | |
13457 | + return xenfb_connect_backend(dev, info); | |
13458 | +} | |
13459 | + | |
13460 | +static int xenfb_remove(struct xenbus_device *dev) | |
13461 | +{ | |
13462 | + struct xenfb_info *info = dev->dev.driver_data; | |
13463 | + | |
13464 | + del_timer(&info->refresh); | |
13465 | + if (info->kthread) | |
13466 | + kthread_stop(info->kthread); | |
13467 | + xenfb_disconnect_backend(info); | |
13468 | + if (info->fb_info) { | |
13469 | + unregister_framebuffer(info->fb_info); | |
13470 | + fb_dealloc_cmap(&info->fb_info->cmap); | |
13471 | + framebuffer_release(info->fb_info); | |
13472 | + } | |
13473 | + free_page((unsigned long)info->page); | |
13474 | + vfree(info->mfns); | |
13475 | + kfree(info->pages); | |
13476 | + vfree(info->fb); | |
13477 | + kfree(info); | |
13478 | + | |
13479 | + return 0; | |
13480 | +} | |
13481 | + | |
13482 | +static void xenfb_init_shared_page(struct xenfb_info *info, | |
13483 | + struct fb_info * fb_info) | |
13484 | +{ | |
13485 | + int i; | |
13486 | + int epd = PAGE_SIZE / sizeof(info->mfns[0]); | |
13487 | + | |
13488 | + for (i = 0; i < info->nr_pages; i++) | |
13489 | + info->pages[i] = vmalloc_to_page(info->fb + i * PAGE_SIZE); | |
13490 | + | |
13491 | + for (i = 0; i < info->nr_pages; i++) | |
13492 | + info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); | |
13493 | + | |
13494 | + for (i = 0; i * epd < info->nr_pages; i++) | |
13495 | + info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]); | |
13496 | + | |
13497 | + info->page->width = fb_info->var.xres; | |
13498 | + info->page->height = fb_info->var.yres; | |
13499 | + info->page->depth = fb_info->var.bits_per_pixel; | |
13500 | + info->page->line_length = fb_info->fix.line_length; | |
13501 | + info->page->mem_length = fb_info->fix.smem_len; | |
13502 | + info->page->in_cons = info->page->in_prod = 0; | |
13503 | + info->page->out_cons = info->page->out_prod = 0; | |
13504 | +} | |
13505 | + | |
13506 | +static int xenfb_connect_backend(struct xenbus_device *dev, | |
13507 | + struct xenfb_info *info) | |
13508 | +{ | |
13509 | + int ret; | |
13510 | + struct xenbus_transaction xbt; | |
13511 | + | |
13512 | + ret = bind_listening_port_to_irqhandler( | |
13513 | + dev->otherend_id, xenfb_event_handler, 0, "xenfb", info); | |
13514 | + if (ret < 0) { | |
13515 | + xenbus_dev_fatal(dev, ret, | |
13516 | + "bind_listening_port_to_irqhandler"); | |
13517 | + return ret; | |
13518 | + } | |
13519 | + info->irq = ret; | |
13520 | + | |
13521 | + again: | |
13522 | + ret = xenbus_transaction_start(&xbt); | |
13523 | + if (ret) { | |
13524 | + xenbus_dev_fatal(dev, ret, "starting transaction"); | |
13525 | + return ret; | |
13526 | + } | |
13527 | + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", | |
13528 | + virt_to_mfn(info->page)); | |
13529 | + if (ret) | |
13530 | + goto error_xenbus; | |
13531 | + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | |
13532 | + irq_to_evtchn_port(info->irq)); | |
13533 | + if (ret) | |
13534 | + goto error_xenbus; | |
13535 | + ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s", | |
13536 | + XEN_IO_PROTO_ABI_NATIVE); | |
13537 | + if (ret) | |
13538 | + goto error_xenbus; | |
13539 | + ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1"); | |
13540 | + if (ret) | |
13541 | + goto error_xenbus; | |
13542 | + ret = xenbus_transaction_end(xbt, 0); | |
13543 | + if (ret) { | |
13544 | + if (ret == -EAGAIN) | |
13545 | + goto again; | |
13546 | + xenbus_dev_fatal(dev, ret, "completing transaction"); | |
13547 | + return ret; | |
13548 | + } | |
13549 | + | |
13550 | + xenbus_switch_state(dev, XenbusStateInitialised); | |
13551 | + return 0; | |
13552 | + | |
13553 | + error_xenbus: | |
13554 | + xenbus_transaction_end(xbt, 1); | |
13555 | + xenbus_dev_fatal(dev, ret, "writing xenstore"); | |
13556 | + return ret; | |
13557 | +} | |
13558 | + | |
13559 | +static void xenfb_disconnect_backend(struct xenfb_info *info) | |
13560 | +{ | |
13561 | + if (info->irq >= 0) | |
13562 | + unbind_from_irqhandler(info->irq, info); | |
13563 | + info->irq = -1; | |
13564 | +} | |
13565 | + | |
13566 | +static void xenfb_backend_changed(struct xenbus_device *dev, | |
13567 | + enum xenbus_state backend_state) | |
13568 | +{ | |
13569 | + struct xenfb_info *info = dev->dev.driver_data; | |
13570 | + int val; | |
13571 | + | |
13572 | + switch (backend_state) { | |
13573 | + case XenbusStateInitialising: | |
13574 | + case XenbusStateInitialised: | |
13575 | + case XenbusStateReconfiguring: | |
13576 | + case XenbusStateReconfigured: | |
13577 | + case XenbusStateUnknown: | |
13578 | + case XenbusStateClosed: | |
13579 | + break; | |
13580 | + | |
13581 | + case XenbusStateInitWait: | |
13582 | + InitWait: | |
13583 | + xenbus_switch_state(dev, XenbusStateConnected); | |
13584 | + break; | |
13585 | + | |
13586 | + case XenbusStateConnected: | |
13587 | + /* | |
13588 | + * Work around xenbus race condition: If backend goes | |
13589 | + * through InitWait to Connected fast enough, we can | |
13590 | + * get Connected twice here. | |
13591 | + */ | |
13592 | + if (dev->state != XenbusStateConnected) | |
13593 | + goto InitWait; /* no InitWait seen yet, fudge it */ | |
13594 | + | |
13595 | + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, | |
13596 | + "request-update", "%d", &val) < 0) | |
13597 | + val = 0; | |
13598 | + if (val) | |
13599 | + info->update_wanted = 1; | |
13600 | + | |
13601 | + if (xenbus_scanf(XBT_NIL, dev->otherend, | |
13602 | + "feature-resize", "%d", &val) < 0) | |
13603 | + val = 0; | |
13604 | + info->feature_resize = val; | |
13605 | + break; | |
13606 | + | |
13607 | + case XenbusStateClosing: | |
13608 | + // FIXME is this safe in any dev->state? | |
13609 | + xenbus_frontend_closed(dev); | |
13610 | + break; | |
13611 | + } | |
13612 | +} | |
13613 | + | |
13614 | +static const struct xenbus_device_id xenfb_ids[] = { | |
13615 | + { "vfb" }, | |
13616 | + { "" } | |
13617 | +}; | |
13618 | +MODULE_ALIAS("xen:vfb"); | |
13619 | + | |
13620 | +static struct xenbus_driver xenfb_driver = { | |
13621 | + .name = "vfb", | |
13622 | + .owner = THIS_MODULE, | |
13623 | + .ids = xenfb_ids, | |
13624 | + .probe = xenfb_probe, | |
13625 | + .remove = xenfb_remove, | |
13626 | + .resume = xenfb_resume, | |
13627 | + .otherend_changed = xenfb_backend_changed, | |
13628 | +}; | |
13629 | + | |
13630 | +static int __init xenfb_init(void) | |
13631 | +{ | |
13632 | + if (!is_running_on_xen()) | |
13633 | + return -ENODEV; | |
13634 | + | |
13635 | + /* Nothing to do if running in dom0. */ | |
13636 | + if (is_initial_xendomain()) | |
13637 | + return -ENODEV; | |
13638 | + | |
13639 | + return xenbus_register_frontend(&xenfb_driver); | |
13640 | +} | |
13641 | + | |
13642 | +static void __exit xenfb_cleanup(void) | |
13643 | +{ | |
13644 | + return xenbus_unregister_driver(&xenfb_driver); | |
13645 | +} | |
13646 | + | |
13647 | +module_init(xenfb_init); | |
13648 | +module_exit(xenfb_cleanup); | |
13649 | + | |
13650 | +MODULE_LICENSE("GPL"); | |
13651 | Index: head-2008-11-25/drivers/xen/fbfront/xenkbd.c | |
13652 | =================================================================== | |
13653 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
13654 | +++ head-2008-11-25/drivers/xen/fbfront/xenkbd.c 2008-04-02 12:34:02.000000000 +0200 | |
13655 | @@ -0,0 +1,354 @@ | |
13656 | +/* | |
13657 | + * linux/drivers/input/keyboard/xenkbd.c -- Xen para-virtual input device | |
13658 | + * | |
13659 | + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> | |
13660 | + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> | |
13661 | + * | |
13662 | + * Based on linux/drivers/input/mouse/sermouse.c | |
13663 | + * | |
13664 | + * This file is subject to the terms and conditions of the GNU General Public | |
13665 | + * License. See the file COPYING in the main directory of this archive for | |
13666 | + * more details. | |
13667 | + */ | |
13668 | + | |
13669 | +/* | |
13670 | + * TODO: | |
13671 | + * | |
13672 | + * Switch to grant tables together with xenfb.c. | |
13673 | + */ | |
13674 | + | |
13675 | +#include <linux/kernel.h> | |
13676 | +#include <linux/errno.h> | |
13677 | +#include <linux/module.h> | |
13678 | +#include <linux/input.h> | |
13679 | +#include <asm/hypervisor.h> | |
13680 | +#include <xen/evtchn.h> | |
13681 | +#include <xen/interface/io/fbif.h> | |
13682 | +#include <xen/interface/io/kbdif.h> | |
13683 | +#include <xen/xenbus.h> | |
13684 | + | |
13685 | +struct xenkbd_info | |
13686 | +{ | |
13687 | + struct input_dev *kbd; | |
13688 | + struct input_dev *ptr; | |
13689 | + struct xenkbd_page *page; | |
13690 | + int irq; | |
13691 | + struct xenbus_device *xbdev; | |
13692 | + char phys[32]; | |
13693 | +}; | |
13694 | + | |
13695 | +static int xenkbd_remove(struct xenbus_device *); | |
13696 | +static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *); | |
13697 | +static void xenkbd_disconnect_backend(struct xenkbd_info *); | |
13698 | + | |
13699 | +/* | |
13700 | + * Note: if you need to send out events, see xenfb_do_update() for how | |
13701 | + * to do that. | |
13702 | + */ | |
13703 | + | |
13704 | +static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs) | |
13705 | +{ | |
13706 | + struct xenkbd_info *info = dev_id; | |
13707 | + struct xenkbd_page *page = info->page; | |
13708 | + __u32 cons, prod; | |
13709 | + | |
13710 | + prod = page->in_prod; | |
13711 | + if (prod == page->in_cons) | |
13712 | + return IRQ_HANDLED; | |
13713 | + rmb(); /* ensure we see ring contents up to prod */ | |
13714 | + for (cons = page->in_cons; cons != prod; cons++) { | |
13715 | + union xenkbd_in_event *event; | |
13716 | + struct input_dev *dev; | |
13717 | + event = &XENKBD_IN_RING_REF(page, cons); | |
13718 | + | |
13719 | + dev = info->ptr; | |
13720 | + switch (event->type) { | |
13721 | + case XENKBD_TYPE_MOTION: | |
13722 | + if (event->motion.rel_z) | |
13723 | + input_report_rel(dev, REL_WHEEL, | |
13724 | + -event->motion.rel_z); | |
13725 | + input_report_rel(dev, REL_X, event->motion.rel_x); | |
13726 | + input_report_rel(dev, REL_Y, event->motion.rel_y); | |
13727 | + break; | |
13728 | + case XENKBD_TYPE_KEY: | |
13729 | + dev = NULL; | |
13730 | + if (test_bit(event->key.keycode, info->kbd->keybit)) | |
13731 | + dev = info->kbd; | |
13732 | + if (test_bit(event->key.keycode, info->ptr->keybit)) | |
13733 | + dev = info->ptr; | |
13734 | + if (dev) | |
13735 | + input_report_key(dev, event->key.keycode, | |
13736 | + event->key.pressed); | |
13737 | + else | |
13738 | + printk("xenkbd: unhandled keycode 0x%x\n", | |
13739 | + event->key.keycode); | |
13740 | + break; | |
13741 | + case XENKBD_TYPE_POS: | |
13742 | + if (event->pos.rel_z) | |
13743 | + input_report_rel(dev, REL_WHEEL, | |
13744 | + -event->pos.rel_z); | |
13745 | + input_report_abs(dev, ABS_X, event->pos.abs_x); | |
13746 | + input_report_abs(dev, ABS_Y, event->pos.abs_y); | |
13747 | + break; | |
13748 | + } | |
13749 | + if (dev) | |
13750 | + input_sync(dev); | |
13751 | + } | |
13752 | + mb(); /* ensure we got ring contents */ | |
13753 | + page->in_cons = cons; | |
13754 | + notify_remote_via_irq(info->irq); | |
13755 | + | |
13756 | + return IRQ_HANDLED; | |
13757 | +} | |
13758 | + | |
13759 | +int __devinit xenkbd_probe(struct xenbus_device *dev, | |
13760 | + const struct xenbus_device_id *id) | |
13761 | +{ | |
13762 | + int ret, i; | |
13763 | + struct xenkbd_info *info; | |
13764 | + struct input_dev *kbd, *ptr; | |
13765 | + | |
13766 | + info = kzalloc(sizeof(*info), GFP_KERNEL); | |
13767 | + if (!info) { | |
13768 | + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | |
13769 | + return -ENOMEM; | |
13770 | + } | |
13771 | + dev->dev.driver_data = info; | |
13772 | + info->xbdev = dev; | |
13773 | + snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename); | |
13774 | + | |
13775 | + info->page = (void *)__get_free_page(GFP_KERNEL); | |
13776 | + if (!info->page) | |
13777 | + goto error_nomem; | |
13778 | + info->page->in_cons = info->page->in_prod = 0; | |
13779 | + info->page->out_cons = info->page->out_prod = 0; | |
13780 | + | |
13781 | + /* keyboard */ | |
13782 | + kbd = input_allocate_device(); | |
13783 | + if (!kbd) | |
13784 | + goto error_nomem; | |
13785 | + kbd->name = "Xen Virtual Keyboard"; | |
13786 | + kbd->phys = info->phys; | |
13787 | + kbd->id.bustype = BUS_PCI; | |
13788 | + kbd->id.vendor = 0x5853; | |
13789 | + kbd->id.product = 0xffff; | |
13790 | + kbd->evbit[0] = BIT(EV_KEY); | |
13791 | + for (i = KEY_ESC; i < KEY_UNKNOWN; i++) | |
13792 | + set_bit(i, kbd->keybit); | |
13793 | + for (i = KEY_OK; i < KEY_MAX; i++) | |
13794 | + set_bit(i, kbd->keybit); | |
13795 | + | |
13796 | + ret = input_register_device(kbd); | |
13797 | + if (ret) { | |
13798 | + input_free_device(kbd); | |
13799 | + xenbus_dev_fatal(dev, ret, "input_register_device(kbd)"); | |
13800 | + goto error; | |
13801 | + } | |
13802 | + info->kbd = kbd; | |
13803 | + | |
13804 | + /* pointing device */ | |
13805 | + ptr = input_allocate_device(); | |
13806 | + if (!ptr) | |
13807 | + goto error_nomem; | |
13808 | + ptr->name = "Xen Virtual Pointer"; | |
13809 | + ptr->phys = info->phys; | |
13810 | + ptr->id.bustype = BUS_PCI; | |
13811 | + ptr->id.vendor = 0x5853; | |
13812 | + ptr->id.product = 0xfffe; | |
13813 | + ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); | |
13814 | + for (i = BTN_LEFT; i <= BTN_TASK; i++) | |
13815 | + set_bit(i, ptr->keybit); | |
13816 | + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL); | |
13817 | + input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); | |
13818 | + input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); | |
13819 | + | |
13820 | + ret = input_register_device(ptr); | |
13821 | + if (ret) { | |
13822 | + input_free_device(ptr); | |
13823 | + xenbus_dev_fatal(dev, ret, "input_register_device(ptr)"); | |
13824 | + goto error; | |
13825 | + } | |
13826 | + info->ptr = ptr; | |
13827 | + | |
13828 | + ret = xenkbd_connect_backend(dev, info); | |
13829 | + if (ret < 0) | |
13830 | + goto error; | |
13831 | + | |
13832 | + return 0; | |
13833 | + | |
13834 | + error_nomem: | |
13835 | + ret = -ENOMEM; | |
13836 | + xenbus_dev_fatal(dev, ret, "allocating device memory"); | |
13837 | + error: | |
13838 | + xenkbd_remove(dev); | |
13839 | + return ret; | |
13840 | +} | |
13841 | + | |
13842 | +static int xenkbd_resume(struct xenbus_device *dev) | |
13843 | +{ | |
13844 | + struct xenkbd_info *info = dev->dev.driver_data; | |
13845 | + | |
13846 | + xenkbd_disconnect_backend(info); | |
13847 | + info->page->in_cons = info->page->in_prod = 0; | |
13848 | + info->page->out_cons = info->page->out_prod = 0; | |
13849 | + return xenkbd_connect_backend(dev, info); | |
13850 | +} | |
13851 | + | |
13852 | +static int xenkbd_remove(struct xenbus_device *dev) | |
13853 | +{ | |
13854 | + struct xenkbd_info *info = dev->dev.driver_data; | |
13855 | + | |
13856 | + xenkbd_disconnect_backend(info); | |
13857 | + input_unregister_device(info->kbd); | |
13858 | + input_unregister_device(info->ptr); | |
13859 | + free_page((unsigned long)info->page); | |
13860 | + kfree(info); | |
13861 | + return 0; | |
13862 | +} | |
13863 | + | |
13864 | +static int xenkbd_connect_backend(struct xenbus_device *dev, | |
13865 | + struct xenkbd_info *info) | |
13866 | +{ | |
13867 | + int ret; | |
13868 | + struct xenbus_transaction xbt; | |
13869 | + | |
13870 | + ret = bind_listening_port_to_irqhandler( | |
13871 | + dev->otherend_id, input_handler, 0, "xenkbd", info); | |
13872 | + if (ret < 0) { | |
13873 | + xenbus_dev_fatal(dev, ret, | |
13874 | + "bind_listening_port_to_irqhandler"); | |
13875 | + return ret; | |
13876 | + } | |
13877 | + info->irq = ret; | |
13878 | + | |
13879 | + again: | |
13880 | + ret = xenbus_transaction_start(&xbt); | |
13881 | + if (ret) { | |
13882 | + xenbus_dev_fatal(dev, ret, "starting transaction"); | |
13883 | + return ret; | |
13884 | + } | |
13885 | + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", | |
13886 | + virt_to_mfn(info->page)); | |
13887 | + if (ret) | |
13888 | + goto error_xenbus; | |
13889 | + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | |
13890 | + irq_to_evtchn_port(info->irq)); | |
13891 | + if (ret) | |
13892 | + goto error_xenbus; | |
13893 | + ret = xenbus_transaction_end(xbt, 0); | |
13894 | + if (ret) { | |
13895 | + if (ret == -EAGAIN) | |
13896 | + goto again; | |
13897 | + xenbus_dev_fatal(dev, ret, "completing transaction"); | |
13898 | + return ret; | |
13899 | + } | |
13900 | + | |
13901 | + xenbus_switch_state(dev, XenbusStateInitialised); | |
13902 | + return 0; | |
13903 | + | |
13904 | + error_xenbus: | |
13905 | + xenbus_transaction_end(xbt, 1); | |
13906 | + xenbus_dev_fatal(dev, ret, "writing xenstore"); | |
13907 | + return ret; | |
13908 | +} | |
13909 | + | |
13910 | +static void xenkbd_disconnect_backend(struct xenkbd_info *info) | |
13911 | +{ | |
13912 | + if (info->irq >= 0) | |
13913 | + unbind_from_irqhandler(info->irq, info); | |
13914 | + info->irq = -1; | |
13915 | +} | |
13916 | + | |
13917 | +static void xenkbd_backend_changed(struct xenbus_device *dev, | |
13918 | + enum xenbus_state backend_state) | |
13919 | +{ | |
13920 | + struct xenkbd_info *info = dev->dev.driver_data; | |
13921 | + int ret, val; | |
13922 | + | |
13923 | + switch (backend_state) { | |
13924 | + case XenbusStateInitialising: | |
13925 | + case XenbusStateInitialised: | |
13926 | + case XenbusStateReconfiguring: | |
13927 | + case XenbusStateReconfigured: | |
13928 | + case XenbusStateUnknown: | |
13929 | + case XenbusStateClosed: | |
13930 | + break; | |
13931 | + | |
13932 | + case XenbusStateInitWait: | |
13933 | + InitWait: | |
13934 | + ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend, | |
13935 | + "feature-abs-pointer", "%d", &val); | |
13936 | + if (ret < 0) | |
13937 | + val = 0; | |
13938 | + if (val) { | |
13939 | + ret = xenbus_printf(XBT_NIL, info->xbdev->nodename, | |
13940 | + "request-abs-pointer", "1"); | |
13941 | + if (ret) | |
13942 | + ; /* FIXME */ | |
13943 | + } | |
13944 | + xenbus_switch_state(dev, XenbusStateConnected); | |
13945 | + break; | |
13946 | + | |
13947 | + case XenbusStateConnected: | |
13948 | + /* | |
13949 | + * Work around xenbus race condition: If backend goes | |
13950 | + * through InitWait to Connected fast enough, we can | |
13951 | + * get Connected twice here. | |
13952 | + */ | |
13953 | + if (dev->state != XenbusStateConnected) | |
13954 | + goto InitWait; /* no InitWait seen yet, fudge it */ | |
13955 | + | |
13956 | + /* Set input abs params to match backend screen res */ | |
13957 | + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, | |
13958 | + "width", "%d", &val) > 0 ) | |
13959 | + input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0); | |
13960 | + | |
13961 | + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, | |
13962 | + "height", "%d", &val) > 0 ) | |
13963 | + input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0); | |
13964 | + | |
13965 | + break; | |
13966 | + | |
13967 | + case XenbusStateClosing: | |
13968 | + xenbus_frontend_closed(dev); | |
13969 | + break; | |
13970 | + } | |
13971 | +} | |
13972 | + | |
13973 | +static const struct xenbus_device_id xenkbd_ids[] = { | |
13974 | + { "vkbd" }, | |
13975 | + { "" } | |
13976 | +}; | |
13977 | +MODULE_ALIAS("xen:vkbd"); | |
13978 | + | |
13979 | +static struct xenbus_driver xenkbd_driver = { | |
13980 | + .name = "vkbd", | |
13981 | + .owner = THIS_MODULE, | |
13982 | + .ids = xenkbd_ids, | |
13983 | + .probe = xenkbd_probe, | |
13984 | + .remove = xenkbd_remove, | |
13985 | + .resume = xenkbd_resume, | |
13986 | + .otherend_changed = xenkbd_backend_changed, | |
13987 | +}; | |
13988 | + | |
13989 | +static int __init xenkbd_init(void) | |
13990 | +{ | |
13991 | + if (!is_running_on_xen()) | |
13992 | + return -ENODEV; | |
13993 | + | |
13994 | + /* Nothing to do if running in dom0. */ | |
13995 | + if (is_initial_xendomain()) | |
13996 | + return -ENODEV; | |
13997 | + | |
13998 | + return xenbus_register_frontend(&xenkbd_driver); | |
13999 | +} | |
14000 | + | |
14001 | +static void __exit xenkbd_cleanup(void) | |
14002 | +{ | |
14003 | + return xenbus_unregister_driver(&xenkbd_driver); | |
14004 | +} | |
14005 | + | |
14006 | +module_init(xenkbd_init); | |
14007 | +module_exit(xenkbd_cleanup); | |
14008 | + | |
14009 | +MODULE_LICENSE("GPL"); | |
14010 | Index: head-2008-11-25/drivers/xen/gntdev/Makefile | |
14011 | =================================================================== | |
14012 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
14013 | +++ head-2008-11-25/drivers/xen/gntdev/Makefile 2008-01-07 13:19:18.000000000 +0100 | |
14014 | @@ -0,0 +1 @@ | |
14015 | +obj-$(CONFIG_XEN_GRANT_DEV) := gntdev.o | |
14016 | Index: head-2008-11-25/drivers/xen/gntdev/gntdev.c | |
14017 | =================================================================== | |
14018 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
14019 | +++ head-2008-11-25/drivers/xen/gntdev/gntdev.c 2008-07-21 11:00:33.000000000 +0200 | |
14020 | @@ -0,0 +1,1074 @@ | |
14021 | +/****************************************************************************** | |
14022 | + * gntdev.c | |
14023 | + * | |
14024 | + * Device for accessing (in user-space) pages that have been granted by other | |
14025 | + * domains. | |
14026 | + * | |
14027 | + * Copyright (c) 2006-2007, D G Murray. | |
14028 | + * | |
14029 | + * This program is distributed in the hope that it will be useful, | |
14030 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14031 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14032 | + * GNU General Public License for more details. | |
14033 | + * | |
14034 | + * You should have received a copy of the GNU General Public License | |
14035 | + * along with this program; if not, write to the Free Software | |
14036 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
14037 | + */ | |
14038 | + | |
14039 | +#include <asm/atomic.h> | |
14040 | +#include <linux/module.h> | |
14041 | +#include <linux/kernel.h> | |
14042 | +#include <linux/init.h> | |
14043 | +#include <linux/fs.h> | |
14044 | +#include <linux/device.h> | |
14045 | +#include <linux/mm.h> | |
14046 | +#include <linux/mman.h> | |
14047 | +#include <asm/uaccess.h> | |
14048 | +#include <asm/io.h> | |
14049 | +#include <xen/gnttab.h> | |
14050 | +#include <asm/hypervisor.h> | |
14051 | +#include <xen/balloon.h> | |
14052 | +#include <xen/evtchn.h> | |
14053 | +#include <xen/driver_util.h> | |
14054 | + | |
14055 | +#include <linux/types.h> | |
14056 | +#include <xen/public/gntdev.h> | |
14057 | + | |
14058 | + | |
14059 | +#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>" | |
14060 | +#define DRIVER_DESC "User-space granted page access driver" | |
14061 | + | |
14062 | +MODULE_LICENSE("GPL"); | |
14063 | +MODULE_AUTHOR(DRIVER_AUTHOR); | |
14064 | +MODULE_DESCRIPTION(DRIVER_DESC); | |
14065 | + | |
14066 | +#define MAX_GRANTS_LIMIT 1024 | |
14067 | +#define DEFAULT_MAX_GRANTS 128 | |
14068 | + | |
14069 | +/* A slot can be in one of three states: | |
14070 | + * | |
14071 | + * 0. GNTDEV_SLOT_INVALID: | |
14072 | + * This slot is not associated with a grant reference, and is therefore free | |
14073 | + * to be overwritten by a new grant reference. | |
14074 | + * | |
14075 | + * 1. GNTDEV_SLOT_NOT_YET_MAPPED: | |
14076 | + * This slot is associated with a grant reference (via the | |
14077 | + * IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed. | |
14078 | + * | |
14079 | + * 2. GNTDEV_SLOT_MAPPED: | |
14080 | + * This slot is associated with a grant reference, and has been mmap()-ed. | |
14081 | + */ | |
14082 | +typedef enum gntdev_slot_state { | |
14083 | + GNTDEV_SLOT_INVALID = 0, | |
14084 | + GNTDEV_SLOT_NOT_YET_MAPPED, | |
14085 | + GNTDEV_SLOT_MAPPED | |
14086 | +} gntdev_slot_state_t; | |
14087 | + | |
14088 | +#define GNTDEV_INVALID_HANDLE -1 | |
14089 | +#define GNTDEV_FREE_LIST_INVALID -1 | |
14090 | +/* Each opened instance of gntdev is associated with a list of grants, | |
14091 | + * represented by an array of elements of the following type, | |
14092 | + * gntdev_grant_info_t. | |
14093 | + */ | |
14094 | +typedef struct gntdev_grant_info { | |
14095 | + gntdev_slot_state_t state; | |
14096 | + union { | |
14097 | + uint32_t free_list_index; | |
14098 | + struct { | |
14099 | + domid_t domid; | |
14100 | + grant_ref_t ref; | |
14101 | + grant_handle_t kernel_handle; | |
14102 | + grant_handle_t user_handle; | |
14103 | + uint64_t dev_bus_addr; | |
14104 | + } valid; | |
14105 | + } u; | |
14106 | +} gntdev_grant_info_t; | |
14107 | + | |
14108 | +/* Private data structure, which is stored in the file pointer for files | |
14109 | + * associated with this device. | |
14110 | + */ | |
14111 | +typedef struct gntdev_file_private_data { | |
14112 | + | |
14113 | + /* Array of grant information. */ | |
14114 | + gntdev_grant_info_t *grants; | |
14115 | + uint32_t grants_size; | |
14116 | + | |
14117 | + /* Read/write semaphore used to protect the grants array. */ | |
14118 | + struct rw_semaphore grants_sem; | |
14119 | + | |
14120 | + /* An array of indices of free slots in the grants array. | |
14121 | + * N.B. An entry in this list may temporarily have the value | |
14122 | + * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed | |
14123 | + * from the list by the contiguous allocator, but the list has not yet | |
14124 | + * been compressed. However, this is not visible across invocations of | |
14125 | + * the device. | |
14126 | + */ | |
14127 | + int32_t *free_list; | |
14128 | + | |
14129 | + /* The number of free slots in the grants array. */ | |
14130 | + uint32_t free_list_size; | |
14131 | + | |
14132 | + /* Read/write semaphore used to protect the free list. */ | |
14133 | + struct rw_semaphore free_list_sem; | |
14134 | + | |
14135 | + /* Index of the next slot after the most recent contiguous allocation, | |
14136 | + * for use in a next-fit allocator. | |
14137 | + */ | |
14138 | + uint32_t next_fit_index; | |
14139 | + | |
14140 | + /* Used to map grants into the kernel, before mapping them into user | |
14141 | + * space. | |
14142 | + */ | |
14143 | + struct page **foreign_pages; | |
14144 | + | |
14145 | +} gntdev_file_private_data_t; | |
14146 | + | |
14147 | +/* Module lifecycle operations. */ | |
14148 | +static int __init gntdev_init(void); | |
14149 | +static void __exit gntdev_exit(void); | |
14150 | + | |
14151 | +module_init(gntdev_init); | |
14152 | +module_exit(gntdev_exit); | |
14153 | + | |
14154 | +/* File operations. */ | |
14155 | +static int gntdev_open(struct inode *inode, struct file *flip); | |
14156 | +static int gntdev_release(struct inode *inode, struct file *flip); | |
14157 | +static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma); | |
14158 | +static long gntdev_ioctl(struct file *flip, | |
14159 | + unsigned int cmd, unsigned long arg); | |
14160 | + | |
14161 | +static const struct file_operations gntdev_fops = { | |
14162 | + .owner = THIS_MODULE, | |
14163 | + .open = gntdev_open, | |
14164 | + .release = gntdev_release, | |
14165 | + .mmap = gntdev_mmap, | |
14166 | + .unlocked_ioctl = gntdev_ioctl | |
14167 | +}; | |
14168 | + | |
14169 | +/* VM operations. */ | |
14170 | +static void gntdev_vma_close(struct vm_area_struct *vma); | |
14171 | +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr, | |
14172 | + pte_t *ptep, int is_fullmm); | |
14173 | + | |
14174 | +static struct vm_operations_struct gntdev_vmops = { | |
14175 | + .close = gntdev_vma_close, | |
14176 | + .zap_pte = gntdev_clear_pte | |
14177 | +}; | |
14178 | + | |
14179 | +/* Global variables. */ | |
14180 | + | |
14181 | +/* The driver major number, for use when unregistering the driver. */ | |
14182 | +static int gntdev_major; | |
14183 | + | |
14184 | +#define GNTDEV_NAME "gntdev" | |
14185 | + | |
14186 | +/* Memory mapping functions | |
14187 | + * ------------------------ | |
14188 | + * | |
14189 | + * Every granted page is mapped into both kernel and user space, and the two | |
14190 | + * following functions return the respective virtual addresses of these pages. | |
14191 | + * | |
14192 | + * When shadow paging is disabled, the granted page is mapped directly into | |
14193 | + * user space; when it is enabled, it is mapped into the kernel and remapped | |
14194 | + * into user space using vm_insert_page() (see gntdev_mmap(), below). | |
14195 | + */ | |
14196 | + | |
14197 | +/* Returns the virtual address (in user space) of the @page_index'th page | |
14198 | + * in the given VM area. | |
14199 | + */ | |
14200 | +static inline unsigned long get_user_vaddr (struct vm_area_struct *vma, | |
14201 | + int page_index) | |
14202 | +{ | |
14203 | + return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT); | |
14204 | +} | |
14205 | + | |
14206 | +/* Returns the virtual address (in kernel space) of the @slot_index'th page | |
14207 | + * mapped by the gntdev instance that owns the given private data struct. | |
14208 | + */ | |
14209 | +static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv, | |
14210 | + int slot_index) | |
14211 | +{ | |
14212 | + unsigned long pfn; | |
14213 | + void *kaddr; | |
14214 | + pfn = page_to_pfn(priv->foreign_pages[slot_index]); | |
14215 | + kaddr = pfn_to_kaddr(pfn); | |
14216 | + return (unsigned long) kaddr; | |
14217 | +} | |
14218 | + | |
14219 | +/* Helper functions. */ | |
14220 | + | |
14221 | +/* Adds information about a grant reference to the list of grants in the file's | |
14222 | + * private data structure. Returns non-zero on failure. On success, sets the | |
14223 | + * value of *offset to the offset that should be mmap()-ed in order to map the | |
14224 | + * grant reference. | |
14225 | + */ | |
14226 | +static int add_grant_reference(struct file *flip, | |
14227 | + struct ioctl_gntdev_grant_ref *op, | |
14228 | + uint64_t *offset) | |
14229 | +{ | |
14230 | + gntdev_file_private_data_t *private_data | |
14231 | + = (gntdev_file_private_data_t *) flip->private_data; | |
14232 | + | |
14233 | + uint32_t slot_index; | |
14234 | + | |
14235 | + if (unlikely(private_data->free_list_size == 0)) { | |
14236 | + return -ENOMEM; | |
14237 | + } | |
14238 | + | |
14239 | + slot_index = private_data->free_list[--private_data->free_list_size]; | |
14240 | + private_data->free_list[private_data->free_list_size] | |
14241 | + = GNTDEV_FREE_LIST_INVALID; | |
14242 | + | |
14243 | + /* Copy the grant information into file's private data. */ | |
14244 | + private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED; | |
14245 | + private_data->grants[slot_index].u.valid.domid = op->domid; | |
14246 | + private_data->grants[slot_index].u.valid.ref = op->ref; | |
14247 | + | |
14248 | + /* The offset is calculated as the index of the chosen entry in the | |
14249 | + * file's private data's array of grant information. This is then | |
14250 | + * shifted to give an offset into the virtual "file address space". | |
14251 | + */ | |
14252 | + *offset = slot_index << PAGE_SHIFT; | |
14253 | + | |
14254 | + return 0; | |
14255 | +} | |
14256 | + | |
14257 | +/* Adds the @count grant references to the contiguous range in the slot array | |
14258 | + * beginning at @first_slot. It is assumed that @first_slot was returned by a | |
14259 | + * previous invocation of find_contiguous_free_range(), during the same | |
14260 | + * invocation of the driver. | |
14261 | + */ | |
14262 | +static int add_grant_references(struct file *flip, | |
14263 | + int count, | |
14264 | + struct ioctl_gntdev_grant_ref *ops, | |
14265 | + uint32_t first_slot) | |
14266 | +{ | |
14267 | + gntdev_file_private_data_t *private_data | |
14268 | + = (gntdev_file_private_data_t *) flip->private_data; | |
14269 | + int i; | |
14270 | + | |
14271 | + for (i = 0; i < count; ++i) { | |
14272 | + | |
14273 | + /* First, mark the slot's entry in the free list as invalid. */ | |
14274 | + int free_list_index = | |
14275 | + private_data->grants[first_slot+i].u.free_list_index; | |
14276 | + private_data->free_list[free_list_index] = | |
14277 | + GNTDEV_FREE_LIST_INVALID; | |
14278 | + | |
14279 | + /* Now, update the slot. */ | |
14280 | + private_data->grants[first_slot+i].state = | |
14281 | + GNTDEV_SLOT_NOT_YET_MAPPED; | |
14282 | + private_data->grants[first_slot+i].u.valid.domid = | |
14283 | + ops[i].domid; | |
14284 | + private_data->grants[first_slot+i].u.valid.ref = ops[i].ref; | |
14285 | + } | |
14286 | + | |
14287 | + return 0; | |
14288 | +} | |
14289 | + | |
14290 | +/* Scans through the free list for @flip, removing entries that are marked as | |
14291 | + * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to | |
14292 | + * the number of valid entries. | |
14293 | + */ | |
14294 | +static void compress_free_list(struct file *flip) | |
14295 | +{ | |
14296 | + gntdev_file_private_data_t *private_data | |
14297 | + = (gntdev_file_private_data_t *) flip->private_data; | |
14298 | + int i, j = 0, old_size, slot_index; | |
14299 | + | |
14300 | + old_size = private_data->free_list_size; | |
14301 | + for (i = 0; i < old_size; ++i) { | |
14302 | + if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) { | |
14303 | + if (i > j) { | |
14304 | + slot_index = private_data->free_list[i]; | |
14305 | + private_data->free_list[j] = slot_index; | |
14306 | + private_data->grants[slot_index].u | |
14307 | + .free_list_index = j; | |
14308 | + private_data->free_list[i] | |
14309 | + = GNTDEV_FREE_LIST_INVALID; | |
14310 | + } | |
14311 | + ++j; | |
14312 | + } else { | |
14313 | + --private_data->free_list_size; | |
14314 | + } | |
14315 | + } | |
14316 | +} | |
14317 | + | |
14318 | +/* Searches the grant array in the private data of @flip for a range of | |
14319 | + * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state. | |
14320 | + * | |
14321 | + * Returns the index of the first slot if a range is found, otherwise -ENOMEM. | |
14322 | + */ | |
14323 | +static int find_contiguous_free_range(struct file *flip, | |
14324 | + uint32_t num_slots) | |
14325 | +{ | |
14326 | + gntdev_file_private_data_t *private_data | |
14327 | + = (gntdev_file_private_data_t *) flip->private_data; | |
14328 | + | |
14329 | + int i; | |
14330 | + int start_index = private_data->next_fit_index; | |
14331 | + int range_start = 0, range_length; | |
14332 | + | |
14333 | + if (private_data->free_list_size < num_slots) { | |
14334 | + return -ENOMEM; | |
14335 | + } | |
14336 | + | |
14337 | + /* First search from the start_index to the end of the array. */ | |
14338 | + range_length = 0; | |
14339 | + for (i = start_index; i < private_data->grants_size; ++i) { | |
14340 | + if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) { | |
14341 | + if (range_length == 0) { | |
14342 | + range_start = i; | |
14343 | + } | |
14344 | + ++range_length; | |
14345 | + if (range_length == num_slots) { | |
14346 | + return range_start; | |
14347 | + } | |
14348 | + } | |
14349 | + } | |
14350 | + | |
14351 | + /* Now search from the start of the array to the start_index. */ | |
14352 | + range_length = 0; | |
14353 | + for (i = 0; i < start_index; ++i) { | |
14354 | + if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) { | |
14355 | + if (range_length == 0) { | |
14356 | + range_start = i; | |
14357 | + } | |
14358 | + ++range_length; | |
14359 | + if (range_length == num_slots) { | |
14360 | + return range_start; | |
14361 | + } | |
14362 | + } | |
14363 | + } | |
14364 | + | |
14365 | + return -ENOMEM; | |
14366 | +} | |
14367 | + | |
14368 | +static int init_private_data(gntdev_file_private_data_t *priv, | |
14369 | + uint32_t max_grants) | |
14370 | +{ | |
14371 | + int i; | |
14372 | + | |
14373 | + /* Allocate space for the kernel-mapping of granted pages. */ | |
14374 | + priv->foreign_pages = | |
14375 | + alloc_empty_pages_and_pagevec(max_grants); | |
14376 | + if (!priv->foreign_pages) | |
14377 | + goto nomem_out; | |
14378 | + | |
14379 | + /* Allocate the grant list and free-list. */ | |
14380 | + priv->grants = kmalloc(max_grants * sizeof(gntdev_grant_info_t), | |
14381 | + GFP_KERNEL); | |
14382 | + if (!priv->grants) | |
14383 | + goto nomem_out2; | |
14384 | + priv->free_list = kmalloc(max_grants * sizeof(int32_t), GFP_KERNEL); | |
14385 | + if (!priv->free_list) | |
14386 | + goto nomem_out3; | |
14387 | + | |
14388 | + /* Initialise the free-list, which contains all slots at first. */ | |
14389 | + for (i = 0; i < max_grants; ++i) { | |
14390 | + priv->free_list[max_grants - i - 1] = i; | |
14391 | + priv->grants[i].state = GNTDEV_SLOT_INVALID; | |
14392 | + priv->grants[i].u.free_list_index = max_grants - i - 1; | |
14393 | + } | |
14394 | + priv->grants_size = max_grants; | |
14395 | + priv->free_list_size = max_grants; | |
14396 | + priv->next_fit_index = 0; | |
14397 | + | |
14398 | + return 0; | |
14399 | + | |
14400 | +nomem_out3: | |
14401 | + kfree(priv->grants); | |
14402 | +nomem_out2: | |
14403 | + free_empty_pages_and_pagevec(priv->foreign_pages, max_grants); | |
14404 | +nomem_out: | |
14405 | + return -ENOMEM; | |
14406 | + | |
14407 | +} | |
14408 | + | |
14409 | +/* Interface functions. */ | |
14410 | + | |
14411 | +/* Initialises the driver. Called when the module is loaded. */ | |
14412 | +static int __init gntdev_init(void) | |
14413 | +{ | |
14414 | + struct class *class; | |
14415 | + struct class_device *device; | |
14416 | + | |
14417 | + if (!is_running_on_xen()) { | |
14418 | + printk(KERN_ERR "You must be running Xen to use gntdev\n"); | |
14419 | + return -ENODEV; | |
14420 | + } | |
14421 | + | |
14422 | + gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops); | |
14423 | + if (gntdev_major < 0) | |
14424 | + { | |
14425 | + printk(KERN_ERR "Could not register gntdev device\n"); | |
14426 | + return -ENOMEM; | |
14427 | + } | |
14428 | + | |
14429 | + /* Note that if the sysfs code fails, we will still initialise the | |
14430 | + * device, and output the major number so that the device can be | |
14431 | + * created manually using mknod. | |
14432 | + */ | |
14433 | + if ((class = get_xen_class()) == NULL) { | |
14434 | + printk(KERN_ERR "Error setting up xen_class\n"); | |
14435 | + printk(KERN_ERR "gntdev created with major number = %d\n", | |
14436 | + gntdev_major); | |
14437 | + return 0; | |
14438 | + } | |
14439 | + | |
14440 | + device = class_device_create(class, NULL, MKDEV(gntdev_major, 0), | |
14441 | + NULL, GNTDEV_NAME); | |
14442 | + if (IS_ERR(device)) { | |
14443 | + printk(KERN_ERR "Error creating gntdev device in xen_class\n"); | |
14444 | + printk(KERN_ERR "gntdev created with major number = %d\n", | |
14445 | + gntdev_major); | |
14446 | + return 0; | |
14447 | + } | |
14448 | + | |
14449 | + return 0; | |
14450 | +} | |
14451 | + | |
14452 | +/* Cleans up and unregisters the driver. Called when the driver is unloaded. | |
14453 | + */ | |
14454 | +static void __exit gntdev_exit(void) | |
14455 | +{ | |
14456 | + struct class *class; | |
14457 | + if ((class = get_xen_class()) != NULL) | |
14458 | + class_device_destroy(class, MKDEV(gntdev_major, 0)); | |
14459 | + unregister_chrdev(gntdev_major, GNTDEV_NAME); | |
14460 | +} | |
14461 | + | |
14462 | +/* Called when the device is opened. */ | |
14463 | +static int gntdev_open(struct inode *inode, struct file *flip) | |
14464 | +{ | |
14465 | + gntdev_file_private_data_t *private_data; | |
14466 | + | |
14467 | + try_module_get(THIS_MODULE); | |
14468 | + | |
14469 | + /* Allocate space for the per-instance private data. */ | |
14470 | + private_data = kmalloc(sizeof(*private_data), GFP_KERNEL); | |
14471 | + if (!private_data) | |
14472 | + goto nomem_out; | |
14473 | + | |
14474 | + /* These will be lazily initialised by init_private_data. */ | |
14475 | + private_data->grants = NULL; | |
14476 | + private_data->free_list = NULL; | |
14477 | + private_data->foreign_pages = NULL; | |
14478 | + | |
14479 | + init_rwsem(&private_data->grants_sem); | |
14480 | + init_rwsem(&private_data->free_list_sem); | |
14481 | + | |
14482 | + flip->private_data = private_data; | |
14483 | + | |
14484 | + return 0; | |
14485 | + | |
14486 | +nomem_out: | |
14487 | + return -ENOMEM; | |
14488 | +} | |
14489 | + | |
14490 | +/* Called when the device is closed. | |
14491 | + */ | |
14492 | +static int gntdev_release(struct inode *inode, struct file *flip) | |
14493 | +{ | |
14494 | + if (flip->private_data) { | |
14495 | + gntdev_file_private_data_t *private_data = | |
14496 | + (gntdev_file_private_data_t *) flip->private_data; | |
14497 | + if (private_data->foreign_pages) | |
14498 | + free_empty_pages_and_pagevec | |
14499 | + (private_data->foreign_pages, | |
14500 | + private_data->grants_size); | |
14501 | + if (private_data->grants) | |
14502 | + kfree(private_data->grants); | |
14503 | + if (private_data->free_list) | |
14504 | + kfree(private_data->free_list); | |
14505 | + kfree(private_data); | |
14506 | + } | |
14507 | + module_put(THIS_MODULE); | |
14508 | + return 0; | |
14509 | +} | |
14510 | + | |
14511 | +/* Called when an attempt is made to mmap() the device. The private data from | |
14512 | + * @flip contains the list of grant references that can be mapped. The vm_pgoff | |
14513 | + * field of @vma contains the index into that list that refers to the grant | |
14514 | + * reference that will be mapped. Only mappings that are a multiple of | |
14515 | + * PAGE_SIZE are handled. | |
14516 | + */ | |
14517 | +static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) | |
14518 | +{ | |
14519 | + struct gnttab_map_grant_ref op; | |
14520 | + unsigned long slot_index = vma->vm_pgoff; | |
14521 | + unsigned long kernel_vaddr, user_vaddr; | |
14522 | + uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | |
14523 | + uint64_t ptep; | |
14524 | + int ret; | |
14525 | + int flags; | |
14526 | + int i; | |
14527 | + struct page *page; | |
14528 | + gntdev_file_private_data_t *private_data = flip->private_data; | |
14529 | + | |
14530 | + if (unlikely(!private_data)) { | |
14531 | + printk(KERN_ERR "File's private data is NULL.\n"); | |
14532 | + return -EINVAL; | |
14533 | + } | |
14534 | + | |
14535 | + /* Test to make sure that the grants array has been initialised. */ | |
14536 | + down_read(&private_data->grants_sem); | |
14537 | + if (unlikely(!private_data->grants)) { | |
14538 | + up_read(&private_data->grants_sem); | |
14539 | + printk(KERN_ERR "Attempted to mmap before ioctl.\n"); | |
14540 | + return -EINVAL; | |
14541 | + } | |
14542 | + up_read(&private_data->grants_sem); | |
14543 | + | |
14544 | + if (unlikely((size <= 0) || | |
14545 | + (size + slot_index) > private_data->grants_size)) { | |
14546 | + printk(KERN_ERR "Invalid number of pages or offset" | |
14547 | + "(num_pages = %d, first_slot = %ld).\n", | |
14548 | + size, slot_index); | |
14549 | + return -ENXIO; | |
14550 | + } | |
14551 | + | |
14552 | + if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) { | |
14553 | + printk(KERN_ERR "Writable mappings must be shared.\n"); | |
14554 | + return -EINVAL; | |
14555 | + } | |
14556 | + | |
14557 | + /* Slots must be in the NOT_YET_MAPPED state. */ | |
14558 | + down_write(&private_data->grants_sem); | |
14559 | + for (i = 0; i < size; ++i) { | |
14560 | + if (private_data->grants[slot_index + i].state != | |
14561 | + GNTDEV_SLOT_NOT_YET_MAPPED) { | |
14562 | + printk(KERN_ERR "Slot (index = %ld) is in the wrong " | |
14563 | + "state (%d).\n", slot_index + i, | |
14564 | + private_data->grants[slot_index + i].state); | |
14565 | + up_write(&private_data->grants_sem); | |
14566 | + return -EINVAL; | |
14567 | + } | |
14568 | + } | |
14569 | + | |
14570 | + /* Install the hook for unmapping. */ | |
14571 | + vma->vm_ops = &gntdev_vmops; | |
14572 | + | |
14573 | + /* The VM area contains pages from another VM. */ | |
14574 | + vma->vm_flags |= VM_FOREIGN; | |
14575 | + vma->vm_private_data = kzalloc(size * sizeof(struct page *), | |
14576 | + GFP_KERNEL); | |
14577 | + if (vma->vm_private_data == NULL) { | |
14578 | + printk(KERN_ERR "Couldn't allocate mapping structure for VM " | |
14579 | + "area.\n"); | |
14580 | + return -ENOMEM; | |
14581 | + } | |
14582 | + | |
14583 | + /* This flag prevents Bad PTE errors when the memory is unmapped. */ | |
14584 | + vma->vm_flags |= VM_RESERVED; | |
14585 | + | |
14586 | + /* This flag prevents this VM area being copied on a fork(). A better | |
14587 | + * behaviour might be to explicitly carry out the appropriate mappings | |
14588 | + * on fork(), but I don't know if there's a hook for this. | |
14589 | + */ | |
14590 | + vma->vm_flags |= VM_DONTCOPY; | |
14591 | + | |
14592 | +#ifdef CONFIG_X86 | |
14593 | + /* This flag ensures that the page tables are not unpinned before the | |
14594 | + * VM area is unmapped. Therefore Xen still recognises the PTE as | |
14595 | + * belonging to an L1 pagetable, and the grant unmap operation will | |
14596 | + * succeed, even if the process does not exit cleanly. | |
14597 | + */ | |
14598 | + vma->vm_mm->context.has_foreign_mappings = 1; | |
14599 | +#endif | |
14600 | + | |
14601 | + for (i = 0; i < size; ++i) { | |
14602 | + | |
14603 | + flags = GNTMAP_host_map; | |
14604 | + if (!(vma->vm_flags & VM_WRITE)) | |
14605 | + flags |= GNTMAP_readonly; | |
14606 | + | |
14607 | + kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i); | |
14608 | + user_vaddr = get_user_vaddr(vma, i); | |
14609 | + page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT); | |
14610 | + | |
14611 | + gnttab_set_map_op(&op, kernel_vaddr, flags, | |
14612 | + private_data->grants[slot_index+i] | |
14613 | + .u.valid.ref, | |
14614 | + private_data->grants[slot_index+i] | |
14615 | + .u.valid.domid); | |
14616 | + | |
14617 | + /* Carry out the mapping of the grant reference. */ | |
14618 | + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
14619 | + &op, 1); | |
14620 | + BUG_ON(ret); | |
14621 | + if (op.status) { | |
14622 | + printk(KERN_ERR "Error mapping the grant reference " | |
14623 | + "into the kernel (%d). domid = %d; ref = %d\n", | |
14624 | + op.status, | |
14625 | + private_data->grants[slot_index+i] | |
14626 | + .u.valid.domid, | |
14627 | + private_data->grants[slot_index+i] | |
14628 | + .u.valid.ref); | |
14629 | + goto undo_map_out; | |
14630 | + } | |
14631 | + | |
14632 | + /* Store a reference to the page that will be mapped into user | |
14633 | + * space. | |
14634 | + */ | |
14635 | + ((struct page **) vma->vm_private_data)[i] = page; | |
14636 | + | |
14637 | + /* Mark mapped page as reserved. */ | |
14638 | + SetPageReserved(page); | |
14639 | + | |
14640 | + /* Record the grant handle, for use in the unmap operation. */ | |
14641 | + private_data->grants[slot_index+i].u.valid.kernel_handle = | |
14642 | + op.handle; | |
14643 | + private_data->grants[slot_index+i].u.valid.dev_bus_addr = | |
14644 | + op.dev_bus_addr; | |
14645 | + | |
14646 | + private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED; | |
14647 | + private_data->grants[slot_index+i].u.valid.user_handle = | |
14648 | + GNTDEV_INVALID_HANDLE; | |
14649 | + | |
14650 | + /* Now perform the mapping to user space. */ | |
14651 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
14652 | + | |
14653 | + /* NOT USING SHADOW PAGE TABLES. */ | |
14654 | + /* In this case, we map the grant(s) straight into user | |
14655 | + * space. | |
14656 | + */ | |
14657 | + | |
14658 | + /* Get the machine address of the PTE for the user | |
14659 | + * page. | |
14660 | + */ | |
14661 | + if ((ret = create_lookup_pte_addr(vma->vm_mm, | |
14662 | + vma->vm_start | |
14663 | + + (i << PAGE_SHIFT), | |
14664 | + &ptep))) | |
14665 | + { | |
14666 | + printk(KERN_ERR "Error obtaining PTE pointer " | |
14667 | + "(%d).\n", ret); | |
14668 | + goto undo_map_out; | |
14669 | + } | |
14670 | + | |
14671 | + /* Configure the map operation. */ | |
14672 | + | |
14673 | + /* The reference is to be used by host CPUs. */ | |
14674 | + flags = GNTMAP_host_map; | |
14675 | + | |
14676 | + /* Specifies a user space mapping. */ | |
14677 | + flags |= GNTMAP_application_map; | |
14678 | + | |
14679 | + /* The map request contains the machine address of the | |
14680 | + * PTE to update. | |
14681 | + */ | |
14682 | + flags |= GNTMAP_contains_pte; | |
14683 | + | |
14684 | + if (!(vma->vm_flags & VM_WRITE)) | |
14685 | + flags |= GNTMAP_readonly; | |
14686 | + | |
14687 | + gnttab_set_map_op(&op, ptep, flags, | |
14688 | + private_data->grants[slot_index+i] | |
14689 | + .u.valid.ref, | |
14690 | + private_data->grants[slot_index+i] | |
14691 | + .u.valid.domid); | |
14692 | + | |
14693 | + /* Carry out the mapping of the grant reference. */ | |
14694 | + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
14695 | + &op, 1); | |
14696 | + BUG_ON(ret); | |
14697 | + if (op.status) { | |
14698 | + printk(KERN_ERR "Error mapping the grant " | |
14699 | + "reference into user space (%d). domid " | |
14700 | + "= %d; ref = %d\n", op.status, | |
14701 | + private_data->grants[slot_index+i].u | |
14702 | + .valid.domid, | |
14703 | + private_data->grants[slot_index+i].u | |
14704 | + .valid.ref); | |
14705 | + goto undo_map_out; | |
14706 | + } | |
14707 | + | |
14708 | + /* Record the grant handle, for use in the unmap | |
14709 | + * operation. | |
14710 | + */ | |
14711 | + private_data->grants[slot_index+i].u. | |
14712 | + valid.user_handle = op.handle; | |
14713 | + | |
14714 | + /* Update p2m structure with the new mapping. */ | |
14715 | + set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT, | |
14716 | + FOREIGN_FRAME(private_data-> | |
14717 | + grants[slot_index+i] | |
14718 | + .u.valid.dev_bus_addr | |
14719 | + >> PAGE_SHIFT)); | |
14720 | + } else { | |
14721 | + /* USING SHADOW PAGE TABLES. */ | |
14722 | + /* In this case, we simply insert the page into the VM | |
14723 | + * area. */ | |
14724 | + ret = vm_insert_page(vma, user_vaddr, page); | |
14725 | + } | |
14726 | + | |
14727 | + } | |
14728 | + | |
14729 | + up_write(&private_data->grants_sem); | |
14730 | + return 0; | |
14731 | + | |
14732 | +undo_map_out: | |
14733 | + /* If we have a mapping failure, the unmapping will be taken care of | |
14734 | + * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte(). | |
14735 | + * All we need to do here is free the vma_private_data. | |
14736 | + */ | |
14737 | + kfree(vma->vm_private_data); | |
14738 | + | |
14739 | + /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file | |
14740 | + * to NULL on failure. However, we need this in gntdev_clear_pte() to | |
14741 | + * unmap the grants. Therefore, we smuggle a reference to the file's | |
14742 | + * private data in the VM area's private data pointer. | |
14743 | + */ | |
14744 | + vma->vm_private_data = private_data; | |
14745 | + | |
14746 | + up_write(&private_data->grants_sem); | |
14747 | + | |
14748 | + return -ENOMEM; | |
14749 | +} | |
14750 | + | |
14751 | +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr, | |
14752 | + pte_t *ptep, int is_fullmm) | |
14753 | +{ | |
14754 | + int slot_index, ret; | |
14755 | + pte_t copy; | |
14756 | + struct gnttab_unmap_grant_ref op; | |
14757 | + gntdev_file_private_data_t *private_data; | |
14758 | + | |
14759 | + /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file | |
14760 | + * to NULL on failure. However, we need this in gntdev_clear_pte() to | |
14761 | + * unmap the grants. Therefore, we smuggle a reference to the file's | |
14762 | + * private data in the VM area's private data pointer. | |
14763 | + */ | |
14764 | + if (vma->vm_file) { | |
14765 | + private_data = (gntdev_file_private_data_t *) | |
14766 | + vma->vm_file->private_data; | |
14767 | + } else if (vma->vm_private_data) { | |
14768 | + private_data = (gntdev_file_private_data_t *) | |
14769 | + vma->vm_private_data; | |
14770 | + } else { | |
14771 | + private_data = NULL; /* gcc warning */ | |
14772 | + BUG(); | |
14773 | + } | |
14774 | + | |
14775 | + /* Copy the existing value of the PTE for returning. */ | |
14776 | + copy = *ptep; | |
14777 | + | |
14778 | + /* Calculate the grant relating to this PTE. */ | |
14779 | + slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); | |
14780 | + | |
14781 | + /* Only unmap grants if the slot has been mapped. This could be being | |
14782 | + * called from a failing mmap(). | |
14783 | + */ | |
14784 | + if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) { | |
14785 | + | |
14786 | + /* First, we clear the user space mapping, if it has been made. | |
14787 | + */ | |
14788 | + if (private_data->grants[slot_index].u.valid.user_handle != | |
14789 | + GNTDEV_INVALID_HANDLE && | |
14790 | + !xen_feature(XENFEAT_auto_translated_physmap)) { | |
14791 | + /* NOT USING SHADOW PAGE TABLES. */ | |
14792 | + gnttab_set_unmap_op(&op, virt_to_machine(ptep), | |
14793 | + GNTMAP_contains_pte, | |
14794 | + private_data->grants[slot_index] | |
14795 | + .u.valid.user_handle); | |
14796 | + ret = HYPERVISOR_grant_table_op( | |
14797 | + GNTTABOP_unmap_grant_ref, &op, 1); | |
14798 | + BUG_ON(ret); | |
14799 | + if (op.status) | |
14800 | + printk("User unmap grant status = %d\n", | |
14801 | + op.status); | |
14802 | + } else { | |
14803 | + /* USING SHADOW PAGE TABLES. */ | |
14804 | + pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); | |
14805 | + } | |
14806 | + | |
14807 | + /* Finally, we unmap the grant from kernel space. */ | |
14808 | + gnttab_set_unmap_op(&op, | |
14809 | + get_kernel_vaddr(private_data, slot_index), | |
14810 | + GNTMAP_host_map, | |
14811 | + private_data->grants[slot_index].u.valid | |
14812 | + .kernel_handle); | |
14813 | + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, | |
14814 | + &op, 1); | |
14815 | + BUG_ON(ret); | |
14816 | + if (op.status) | |
14817 | + printk("Kernel unmap grant status = %d\n", op.status); | |
14818 | + | |
14819 | + | |
14820 | + /* Return slot to the not-yet-mapped state, so that it may be | |
14821 | + * mapped again, or removed by a subsequent ioctl. | |
14822 | + */ | |
14823 | + private_data->grants[slot_index].state = | |
14824 | + GNTDEV_SLOT_NOT_YET_MAPPED; | |
14825 | + | |
14826 | + /* Invalidate the physical to machine mapping for this page. */ | |
14827 | + set_phys_to_machine(__pa(get_kernel_vaddr(private_data, | |
14828 | + slot_index)) | |
14829 | + >> PAGE_SHIFT, INVALID_P2M_ENTRY); | |
14830 | + | |
14831 | + } else { | |
14832 | + pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); | |
14833 | + } | |
14834 | + | |
14835 | + return copy; | |
14836 | +} | |
14837 | + | |
14838 | +/* "Destructor" for a VM area. | |
14839 | + */ | |
14840 | +static void gntdev_vma_close(struct vm_area_struct *vma) { | |
14841 | + if (vma->vm_private_data) { | |
14842 | + kfree(vma->vm_private_data); | |
14843 | + } | |
14844 | +} | |
14845 | + | |
14846 | +/* Called when an ioctl is made on the device. | |
14847 | + */ | |
14848 | +static long gntdev_ioctl(struct file *flip, | |
14849 | + unsigned int cmd, unsigned long arg) | |
14850 | +{ | |
14851 | + int rc = 0; | |
14852 | + gntdev_file_private_data_t *private_data = | |
14853 | + (gntdev_file_private_data_t *) flip->private_data; | |
14854 | + | |
14855 | + /* On the first invocation, we will lazily initialise the grant array | |
14856 | + * and free-list. | |
14857 | + */ | |
14858 | + if (unlikely(!private_data->grants) | |
14859 | + && likely(cmd != IOCTL_GNTDEV_SET_MAX_GRANTS)) { | |
14860 | + down_write(&private_data->grants_sem); | |
14861 | + | |
14862 | + if (unlikely(private_data->grants)) { | |
14863 | + up_write(&private_data->grants_sem); | |
14864 | + goto private_data_initialised; | |
14865 | + } | |
14866 | + | |
14867 | + /* Just use the default. Setting to a non-default is handled | |
14868 | + * in the ioctl switch. | |
14869 | + */ | |
14870 | + rc = init_private_data(private_data, DEFAULT_MAX_GRANTS); | |
14871 | + | |
14872 | + up_write(&private_data->grants_sem); | |
14873 | + | |
14874 | + if (rc) { | |
14875 | + printk (KERN_ERR "Initialising gntdev private data " | |
14876 | + "failed.\n"); | |
14877 | + return rc; | |
14878 | + } | |
14879 | + } | |
14880 | + | |
14881 | +private_data_initialised: | |
14882 | + switch (cmd) { | |
14883 | + case IOCTL_GNTDEV_MAP_GRANT_REF: | |
14884 | + { | |
14885 | + struct ioctl_gntdev_map_grant_ref op; | |
14886 | + down_write(&private_data->grants_sem); | |
14887 | + down_write(&private_data->free_list_sem); | |
14888 | + | |
14889 | + if ((rc = copy_from_user(&op, (void __user *) arg, | |
14890 | + sizeof(op)))) { | |
14891 | + rc = -EFAULT; | |
14892 | + goto map_out; | |
14893 | + } | |
14894 | + if (unlikely(op.count <= 0)) { | |
14895 | + rc = -EINVAL; | |
14896 | + goto map_out; | |
14897 | + } | |
14898 | + | |
14899 | + if (op.count == 1) { | |
14900 | + if ((rc = add_grant_reference(flip, &op.refs[0], | |
14901 | + &op.index)) < 0) { | |
14902 | + printk(KERN_ERR "Adding grant reference " | |
14903 | + "failed (%d).\n", rc); | |
14904 | + goto map_out; | |
14905 | + } | |
14906 | + } else { | |
14907 | + struct ioctl_gntdev_grant_ref *refs, *u; | |
14908 | + refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL); | |
14909 | + if (!refs) { | |
14910 | + rc = -ENOMEM; | |
14911 | + goto map_out; | |
14912 | + } | |
14913 | + u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs; | |
14914 | + if ((rc = copy_from_user(refs, | |
14915 | + (void __user *)u, | |
14916 | + sizeof(*refs) * op.count))) { | |
14917 | + printk(KERN_ERR "Copying refs from user failed" | |
14918 | + " (%d).\n", rc); | |
14919 | + rc = -EINVAL; | |
14920 | + goto map_out; | |
14921 | + } | |
14922 | + if ((rc = find_contiguous_free_range(flip, op.count)) | |
14923 | + < 0) { | |
14924 | + printk(KERN_ERR "Finding contiguous range " | |
14925 | + "failed (%d).\n", rc); | |
14926 | + kfree(refs); | |
14927 | + goto map_out; | |
14928 | + } | |
14929 | + op.index = rc << PAGE_SHIFT; | |
14930 | + if ((rc = add_grant_references(flip, op.count, | |
14931 | + refs, rc))) { | |
14932 | + printk(KERN_ERR "Adding grant references " | |
14933 | + "failed (%d).\n", rc); | |
14934 | + kfree(refs); | |
14935 | + goto map_out; | |
14936 | + } | |
14937 | + compress_free_list(flip); | |
14938 | + kfree(refs); | |
14939 | + } | |
14940 | + if ((rc = copy_to_user((void __user *) arg, | |
14941 | + &op, | |
14942 | + sizeof(op)))) { | |
14943 | + printk(KERN_ERR "Copying result back to user failed " | |
14944 | + "(%d)\n", rc); | |
14945 | + rc = -EFAULT; | |
14946 | + goto map_out; | |
14947 | + } | |
14948 | + map_out: | |
14949 | + up_write(&private_data->grants_sem); | |
14950 | + up_write(&private_data->free_list_sem); | |
14951 | + return rc; | |
14952 | + } | |
14953 | + case IOCTL_GNTDEV_UNMAP_GRANT_REF: | |
14954 | + { | |
14955 | + struct ioctl_gntdev_unmap_grant_ref op; | |
14956 | + int i, start_index; | |
14957 | + | |
14958 | + down_write(&private_data->grants_sem); | |
14959 | + down_write(&private_data->free_list_sem); | |
14960 | + | |
14961 | + if ((rc = copy_from_user(&op, | |
14962 | + (void __user *) arg, | |
14963 | + sizeof(op)))) { | |
14964 | + rc = -EFAULT; | |
14965 | + goto unmap_out; | |
14966 | + } | |
14967 | + | |
14968 | + start_index = op.index >> PAGE_SHIFT; | |
14969 | + | |
14970 | + /* First, check that all pages are in the NOT_YET_MAPPED | |
14971 | + * state. | |
14972 | + */ | |
14973 | + for (i = 0; i < op.count; ++i) { | |
14974 | + if (unlikely | |
14975 | + (private_data->grants[start_index + i].state | |
14976 | + != GNTDEV_SLOT_NOT_YET_MAPPED)) { | |
14977 | + if (private_data->grants[start_index + i].state | |
14978 | + == GNTDEV_SLOT_INVALID) { | |
14979 | + printk(KERN_ERR | |
14980 | + "Tried to remove an invalid " | |
14981 | + "grant at offset 0x%x.", | |
14982 | + (start_index + i) | |
14983 | + << PAGE_SHIFT); | |
14984 | + rc = -EINVAL; | |
14985 | + } else { | |
14986 | + printk(KERN_ERR | |
14987 | + "Tried to remove a grant which " | |
14988 | + "is currently mmap()-ed at " | |
14989 | + "offset 0x%x.", | |
14990 | + (start_index + i) | |
14991 | + << PAGE_SHIFT); | |
14992 | + rc = -EBUSY; | |
14993 | + } | |
14994 | + goto unmap_out; | |
14995 | + } | |
14996 | + } | |
14997 | + | |
14998 | + /* Unmap pages and add them to the free list. | |
14999 | + */ | |
15000 | + for (i = 0; i < op.count; ++i) { | |
15001 | + private_data->grants[start_index+i].state = | |
15002 | + GNTDEV_SLOT_INVALID; | |
15003 | + private_data->grants[start_index+i].u.free_list_index = | |
15004 | + private_data->free_list_size; | |
15005 | + private_data->free_list[private_data->free_list_size] = | |
15006 | + start_index + i; | |
15007 | + ++private_data->free_list_size; | |
15008 | + } | |
15009 | + | |
15010 | + unmap_out: | |
15011 | + up_write(&private_data->grants_sem); | |
15012 | + up_write(&private_data->free_list_sem); | |
15013 | + return rc; | |
15014 | + } | |
15015 | + case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: | |
15016 | + { | |
15017 | + struct ioctl_gntdev_get_offset_for_vaddr op; | |
15018 | + struct vm_area_struct *vma; | |
15019 | + unsigned long vaddr; | |
15020 | + | |
15021 | + if ((rc = copy_from_user(&op, | |
15022 | + (void __user *) arg, | |
15023 | + sizeof(op)))) { | |
15024 | + rc = -EFAULT; | |
15025 | + goto get_offset_out; | |
15026 | + } | |
15027 | + vaddr = (unsigned long)op.vaddr; | |
15028 | + | |
15029 | + down_read(¤t->mm->mmap_sem); | |
15030 | + vma = find_vma(current->mm, vaddr); | |
15031 | + if (vma == NULL) { | |
15032 | + rc = -EFAULT; | |
15033 | + goto get_offset_unlock_out; | |
15034 | + } | |
15035 | + if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) { | |
15036 | + printk(KERN_ERR "The vaddr specified does not belong " | |
15037 | + "to a gntdev instance: %#lx\n", vaddr); | |
15038 | + rc = -EFAULT; | |
15039 | + goto get_offset_unlock_out; | |
15040 | + } | |
15041 | + if (vma->vm_start != vaddr) { | |
15042 | + printk(KERN_ERR "The vaddr specified in an " | |
15043 | + "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at " | |
15044 | + "the start of the VM area. vma->vm_start = " | |
15045 | + "%#lx; vaddr = %#lx\n", | |
15046 | + vma->vm_start, vaddr); | |
15047 | + rc = -EFAULT; | |
15048 | + goto get_offset_unlock_out; | |
15049 | + } | |
15050 | + op.offset = vma->vm_pgoff << PAGE_SHIFT; | |
15051 | + op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | |
15052 | + up_read(¤t->mm->mmap_sem); | |
15053 | + if ((rc = copy_to_user((void __user *) arg, | |
15054 | + &op, | |
15055 | + sizeof(op)))) { | |
15056 | + rc = -EFAULT; | |
15057 | + goto get_offset_out; | |
15058 | + } | |
15059 | + goto get_offset_out; | |
15060 | + get_offset_unlock_out: | |
15061 | + up_read(¤t->mm->mmap_sem); | |
15062 | + get_offset_out: | |
15063 | + return rc; | |
15064 | + } | |
15065 | + case IOCTL_GNTDEV_SET_MAX_GRANTS: | |
15066 | + { | |
15067 | + struct ioctl_gntdev_set_max_grants op; | |
15068 | + if ((rc = copy_from_user(&op, | |
15069 | + (void __user *) arg, | |
15070 | + sizeof(op)))) { | |
15071 | + rc = -EFAULT; | |
15072 | + goto set_max_out; | |
15073 | + } | |
15074 | + down_write(&private_data->grants_sem); | |
15075 | + if (private_data->grants) { | |
15076 | + rc = -EBUSY; | |
15077 | + goto set_max_unlock_out; | |
15078 | + } | |
15079 | + if (op.count > MAX_GRANTS_LIMIT) { | |
15080 | + rc = -EINVAL; | |
15081 | + goto set_max_unlock_out; | |
15082 | + } | |
15083 | + rc = init_private_data(private_data, op.count); | |
15084 | + set_max_unlock_out: | |
15085 | + up_write(&private_data->grants_sem); | |
15086 | + set_max_out: | |
15087 | + return rc; | |
15088 | + } | |
15089 | + default: | |
15090 | + return -ENOIOCTLCMD; | |
15091 | + } | |
15092 | + | |
15093 | + return 0; | |
15094 | +} | |
15095 | Index: head-2008-11-25/drivers/xen/netback/Makefile | |
15096 | =================================================================== | |
15097 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
15098 | +++ head-2008-11-25/drivers/xen/netback/Makefile 2007-07-12 08:54:23.000000000 +0200 | |
15099 | @@ -0,0 +1,5 @@ | |
15100 | +obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o | |
15101 | +obj-$(CONFIG_XEN_NETDEV_LOOPBACK) += netloop.o | |
15102 | + | |
15103 | +netbk-y := netback.o xenbus.o interface.o accel.o | |
15104 | +netloop-y := loopback.o | |
15105 | Index: head-2008-11-25/drivers/xen/netback/accel.c | |
15106 | =================================================================== | |
15107 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
15108 | +++ head-2008-11-25/drivers/xen/netback/accel.c 2008-01-07 13:19:18.000000000 +0100 | |
15109 | @@ -0,0 +1,269 @@ | |
15110 | +/****************************************************************************** | |
15111 | + * drivers/xen/netback/accel.c | |
15112 | + * | |
15113 | + * Interface between backend virtual network device and accelerated plugin. | |
15114 | + * | |
15115 | + * Copyright (C) 2007 Solarflare Communications, Inc | |
15116 | + * | |
15117 | + * This program is free software; you can redistribute it and/or | |
15118 | + * modify it under the terms of the GNU General Public License version 2 | |
15119 | + * as published by the Free Software Foundation; or, when distributed | |
15120 | + * separately from the Linux kernel or incorporated into other | |
15121 | + * software packages, subject to the following license: | |
15122 | + * | |
15123 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15124 | + * of this source file (the "Software"), to deal in the Software without | |
15125 | + * restriction, including without limitation the rights to use, copy, modify, | |
15126 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
15127 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
15128 | + * the following conditions: | |
15129 | + * | |
15130 | + * The above copyright notice and this permission notice shall be included in | |
15131 | + * all copies or substantial portions of the Software. | |
15132 | + * | |
15133 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15134 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15135 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
15136 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
15137 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
15138 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
15139 | + * IN THE SOFTWARE. | |
15140 | + */ | |
15141 | + | |
15142 | +#include <linux/list.h> | |
15143 | +#include <asm/atomic.h> | |
15144 | +#include <xen/xenbus.h> | |
15145 | +#include <linux/mutex.h> | |
15146 | + | |
15147 | +#include "common.h" | |
15148 | + | |
15149 | +#if 0 | |
15150 | +#undef DPRINTK | |
15151 | +#define DPRINTK(fmt, args...) \ | |
15152 | + printk("netback/accel (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) | |
15153 | +#endif | |
15154 | + | |
15155 | +/* | |
15156 | + * A list of available netback accelerator plugin modules (each list | |
15157 | + * entry is of type struct netback_accelerator) | |
15158 | + */ | |
15159 | +static struct list_head accelerators_list; | |
15160 | +/* Lock used to protect access to accelerators_list */ | |
15161 | +DEFINE_MUTEX(accelerators_mutex); | |
15162 | + | |
15163 | +/* | |
15164 | + * Compare a backend to an accelerator, and decide if they are | |
15165 | + * compatible (i.e. if the accelerator should be used by the | |
15166 | + * backend) | |
15167 | + */ | |
15168 | +static int match_accelerator(struct xenbus_device *xendev, | |
15169 | + struct backend_info *be, | |
15170 | + struct netback_accelerator *accelerator) | |
15171 | +{ | |
15172 | + int rc = 0; | |
15173 | + char *eth_name = xenbus_read(XBT_NIL, xendev->nodename, "accel", NULL); | |
15174 | + | |
15175 | + if (IS_ERR(eth_name)) { | |
15176 | + /* Probably means not present */ | |
15177 | + DPRINTK("%s: no match due to xenbus_read accel error %d\n", | |
15178 | + __FUNCTION__, PTR_ERR(eth_name)); | |
15179 | + return 0; | |
15180 | + } else { | |
15181 | + if (!strcmp(eth_name, accelerator->eth_name)) | |
15182 | + rc = 1; | |
15183 | + kfree(eth_name); | |
15184 | + return rc; | |
15185 | + } | |
15186 | +} | |
15187 | + | |
15188 | + | |
15189 | +static void do_probe(struct backend_info *be, | |
15190 | + struct netback_accelerator *accelerator, | |
15191 | + struct xenbus_device *xendev) | |
15192 | +{ | |
15193 | + be->accelerator = accelerator; | |
15194 | + atomic_inc(&be->accelerator->use_count); | |
15195 | + if (be->accelerator->hooks->probe(xendev) != 0) { | |
15196 | + atomic_dec(&be->accelerator->use_count); | |
15197 | + module_put(be->accelerator->hooks->owner); | |
15198 | + be->accelerator = NULL; | |
15199 | + } | |
15200 | +} | |
15201 | + | |
15202 | + | |
15203 | +/* | |
15204 | + * Notify suitable backends that a new accelerator is available and | |
15205 | + * connected. This will also notify the accelerator plugin module | |
15206 | + * that it is being used for a device through the probe hook. | |
15207 | + */ | |
15208 | +static int netback_accelerator_probe_backend(struct device *dev, void *arg) | |
15209 | +{ | |
15210 | + struct netback_accelerator *accelerator = | |
15211 | + (struct netback_accelerator *)arg; | |
15212 | + struct xenbus_device *xendev = to_xenbus_device(dev); | |
15213 | + | |
15214 | + if (!strcmp("vif", xendev->devicetype)) { | |
15215 | + struct backend_info *be = xendev->dev.driver_data; | |
15216 | + | |
15217 | + if (match_accelerator(xendev, be, accelerator) && | |
15218 | + try_module_get(accelerator->hooks->owner)) { | |
15219 | + do_probe(be, accelerator, xendev); | |
15220 | + } | |
15221 | + } | |
15222 | + return 0; | |
15223 | +} | |
15224 | + | |
15225 | + | |
15226 | +/* | |
15227 | + * Notify suitable backends that an accelerator is unavailable. | |
15228 | + */ | |
15229 | +static int netback_accelerator_remove_backend(struct device *dev, void *arg) | |
15230 | +{ | |
15231 | + struct xenbus_device *xendev = to_xenbus_device(dev); | |
15232 | + struct netback_accelerator *accelerator = | |
15233 | + (struct netback_accelerator *)arg; | |
15234 | + | |
15235 | + if (!strcmp("vif", xendev->devicetype)) { | |
15236 | + struct backend_info *be = xendev->dev.driver_data; | |
15237 | + | |
15238 | + if (be->accelerator == accelerator) { | |
15239 | + be->accelerator->hooks->remove(xendev); | |
15240 | + atomic_dec(&be->accelerator->use_count); | |
15241 | + module_put(be->accelerator->hooks->owner); | |
15242 | + be->accelerator = NULL; | |
15243 | + } | |
15244 | + } | |
15245 | + return 0; | |
15246 | +} | |
15247 | + | |
15248 | + | |
15249 | + | |
15250 | +/* | |
15251 | + * Entry point for an netback accelerator plugin module. Called to | |
15252 | + * advertise its presence, and connect to any suitable backends. | |
15253 | + */ | |
15254 | +int netback_connect_accelerator(unsigned version, int id, const char *eth_name, | |
15255 | + struct netback_accel_hooks *hooks) | |
15256 | +{ | |
15257 | + struct netback_accelerator *new_accelerator; | |
15258 | + unsigned eth_name_len; | |
15259 | + | |
15260 | + if (version != NETBACK_ACCEL_VERSION) { | |
15261 | + if (version > NETBACK_ACCEL_VERSION) { | |
15262 | + /* Caller has higher version number, leave it | |
15263 | + up to them to decide whether to continue. | |
15264 | + They can recall with a lower number if | |
15265 | + they're happy to be compatible with us */ | |
15266 | + return NETBACK_ACCEL_VERSION; | |
15267 | + } else { | |
15268 | + /* We have a more recent version than caller. | |
15269 | + Currently reject, but may in future be able | |
15270 | + to be backwardly compatible */ | |
15271 | + return -EPROTO; | |
15272 | + } | |
15273 | + } | |
15274 | + | |
15275 | + new_accelerator = | |
15276 | + kmalloc(sizeof(struct netback_accelerator), GFP_KERNEL); | |
15277 | + if (!new_accelerator) { | |
15278 | + DPRINTK("%s: failed to allocate memory for accelerator\n", | |
15279 | + __FUNCTION__); | |
15280 | + return -ENOMEM; | |
15281 | + } | |
15282 | + | |
15283 | + new_accelerator->id = id; | |
15284 | + | |
15285 | + eth_name_len = strlen(eth_name)+1; | |
15286 | + new_accelerator->eth_name = kmalloc(eth_name_len, GFP_KERNEL); | |
15287 | + if (!new_accelerator->eth_name) { | |
15288 | + DPRINTK("%s: failed to allocate memory for eth_name string\n", | |
15289 | + __FUNCTION__); | |
15290 | + kfree(new_accelerator); | |
15291 | + return -ENOMEM; | |
15292 | + } | |
15293 | + strlcpy(new_accelerator->eth_name, eth_name, eth_name_len); | |
15294 | + | |
15295 | + new_accelerator->hooks = hooks; | |
15296 | + | |
15297 | + atomic_set(&new_accelerator->use_count, 0); | |
15298 | + | |
15299 | + mutex_lock(&accelerators_mutex); | |
15300 | + list_add(&new_accelerator->link, &accelerators_list); | |
15301 | + | |
15302 | + /* tell existing backends about new plugin */ | |
15303 | + xenbus_for_each_backend(new_accelerator, | |
15304 | + netback_accelerator_probe_backend); | |
15305 | + | |
15306 | + mutex_unlock(&accelerators_mutex); | |
15307 | + | |
15308 | + return 0; | |
15309 | + | |
15310 | +} | |
15311 | +EXPORT_SYMBOL_GPL(netback_connect_accelerator); | |
15312 | + | |
15313 | + | |
15314 | +/* | |
15315 | + * Disconnect an accelerator plugin module that has previously been | |
15316 | + * connected. | |
15317 | + */ | |
15318 | +void netback_disconnect_accelerator(int id, const char *eth_name) | |
15319 | +{ | |
15320 | + struct netback_accelerator *accelerator, *next; | |
15321 | + | |
15322 | + mutex_lock(&accelerators_mutex); | |
15323 | + list_for_each_entry_safe(accelerator, next, &accelerators_list, link) { | |
15324 | + if (!strcmp(eth_name, accelerator->eth_name)) { | |
15325 | + xenbus_for_each_backend | |
15326 | + (accelerator, netback_accelerator_remove_backend); | |
15327 | + BUG_ON(atomic_read(&accelerator->use_count) != 0); | |
15328 | + list_del(&accelerator->link); | |
15329 | + kfree(accelerator->eth_name); | |
15330 | + kfree(accelerator); | |
15331 | + break; | |
15332 | + } | |
15333 | + } | |
15334 | + mutex_unlock(&accelerators_mutex); | |
15335 | +} | |
15336 | +EXPORT_SYMBOL_GPL(netback_disconnect_accelerator); | |
15337 | + | |
15338 | + | |
15339 | +void netback_probe_accelerators(struct backend_info *be, | |
15340 | + struct xenbus_device *dev) | |
15341 | +{ | |
15342 | + struct netback_accelerator *accelerator; | |
15343 | + | |
15344 | + /* | |
15345 | + * Check list of accelerators to see if any is suitable, and | |
15346 | + * use it if it is. | |
15347 | + */ | |
15348 | + mutex_lock(&accelerators_mutex); | |
15349 | + list_for_each_entry(accelerator, &accelerators_list, link) { | |
15350 | + if (match_accelerator(dev, be, accelerator) && | |
15351 | + try_module_get(accelerator->hooks->owner)) { | |
15352 | + do_probe(be, accelerator, dev); | |
15353 | + break; | |
15354 | + } | |
15355 | + } | |
15356 | + mutex_unlock(&accelerators_mutex); | |
15357 | +} | |
15358 | + | |
15359 | + | |
15360 | +void netback_remove_accelerators(struct backend_info *be, | |
15361 | + struct xenbus_device *dev) | |
15362 | +{ | |
15363 | + mutex_lock(&accelerators_mutex); | |
15364 | + /* Notify the accelerator (if any) of this device's removal */ | |
15365 | + if (be->accelerator != NULL) { | |
15366 | + be->accelerator->hooks->remove(dev); | |
15367 | + atomic_dec(&be->accelerator->use_count); | |
15368 | + module_put(be->accelerator->hooks->owner); | |
15369 | + be->accelerator = NULL; | |
15370 | + } | |
15371 | + mutex_unlock(&accelerators_mutex); | |
15372 | +} | |
15373 | + | |
15374 | + | |
15375 | +void netif_accel_init(void) | |
15376 | +{ | |
15377 | + INIT_LIST_HEAD(&accelerators_list); | |
15378 | +} | |
15379 | Index: head-2008-11-25/drivers/xen/netback/common.h | |
15380 | =================================================================== | |
15381 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
15382 | +++ head-2008-11-25/drivers/xen/netback/common.h 2008-01-07 13:19:18.000000000 +0100 | |
15383 | @@ -0,0 +1,217 @@ | |
15384 | +/****************************************************************************** | |
15385 | + * arch/xen/drivers/netif/backend/common.h | |
15386 | + * | |
15387 | + * This program is free software; you can redistribute it and/or | |
15388 | + * modify it under the terms of the GNU General Public License version 2 | |
15389 | + * as published by the Free Software Foundation; or, when distributed | |
15390 | + * separately from the Linux kernel or incorporated into other | |
15391 | + * software packages, subject to the following license: | |
15392 | + * | |
15393 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15394 | + * of this source file (the "Software"), to deal in the Software without | |
15395 | + * restriction, including without limitation the rights to use, copy, modify, | |
15396 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
15397 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
15398 | + * the following conditions: | |
15399 | + * | |
15400 | + * The above copyright notice and this permission notice shall be included in | |
15401 | + * all copies or substantial portions of the Software. | |
15402 | + * | |
15403 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15404 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15405 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
15406 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
15407 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
15408 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
15409 | + * IN THE SOFTWARE. | |
15410 | + */ | |
15411 | + | |
15412 | +#ifndef __NETIF__BACKEND__COMMON_H__ | |
15413 | +#define __NETIF__BACKEND__COMMON_H__ | |
15414 | + | |
15415 | +#include <linux/version.h> | |
15416 | +#include <linux/module.h> | |
15417 | +#include <linux/interrupt.h> | |
15418 | +#include <linux/slab.h> | |
15419 | +#include <linux/ip.h> | |
15420 | +#include <linux/in.h> | |
15421 | +#include <linux/netdevice.h> | |
15422 | +#include <linux/etherdevice.h> | |
15423 | +#include <linux/wait.h> | |
15424 | +#include <xen/evtchn.h> | |
15425 | +#include <xen/interface/io/netif.h> | |
15426 | +#include <asm/io.h> | |
15427 | +#include <asm/pgalloc.h> | |
15428 | +#include <xen/interface/grant_table.h> | |
15429 | +#include <xen/gnttab.h> | |
15430 | +#include <xen/driver_util.h> | |
15431 | +#include <xen/xenbus.h> | |
15432 | + | |
15433 | +#define DPRINTK(_f, _a...) \ | |
15434 | + pr_debug("(file=%s, line=%d) " _f, \ | |
15435 | + __FILE__ , __LINE__ , ## _a ) | |
15436 | +#define IPRINTK(fmt, args...) \ | |
15437 | + printk(KERN_INFO "xen_net: " fmt, ##args) | |
15438 | +#define WPRINTK(fmt, args...) \ | |
15439 | + printk(KERN_WARNING "xen_net: " fmt, ##args) | |
15440 | + | |
15441 | +typedef struct netif_st { | |
15442 | + /* Unique identifier for this interface. */ | |
15443 | + domid_t domid; | |
15444 | + unsigned int handle; | |
15445 | + | |
15446 | + u8 fe_dev_addr[6]; | |
15447 | + | |
15448 | + /* Physical parameters of the comms window. */ | |
15449 | + grant_handle_t tx_shmem_handle; | |
15450 | + grant_ref_t tx_shmem_ref; | |
15451 | + grant_handle_t rx_shmem_handle; | |
15452 | + grant_ref_t rx_shmem_ref; | |
15453 | + unsigned int irq; | |
15454 | + | |
15455 | + /* The shared rings and indexes. */ | |
15456 | + netif_tx_back_ring_t tx; | |
15457 | + netif_rx_back_ring_t rx; | |
15458 | + struct vm_struct *tx_comms_area; | |
15459 | + struct vm_struct *rx_comms_area; | |
15460 | + | |
15461 | + /* Set of features that can be turned on in dev->features. */ | |
15462 | + int features; | |
15463 | + | |
15464 | + /* Internal feature information. */ | |
15465 | + u8 can_queue:1; /* can queue packets for receiver? */ | |
15466 | + u8 copying_receiver:1; /* copy packets to receiver? */ | |
15467 | + | |
15468 | + /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ | |
15469 | + RING_IDX rx_req_cons_peek; | |
15470 | + | |
15471 | + /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ | |
15472 | + unsigned long credit_bytes; | |
15473 | + unsigned long credit_usec; | |
15474 | + unsigned long remaining_credit; | |
15475 | + struct timer_list credit_timeout; | |
15476 | + | |
15477 | + /* Enforce draining of the transmit queue. */ | |
15478 | + struct timer_list tx_queue_timeout; | |
15479 | + | |
15480 | + /* Miscellaneous private stuff. */ | |
15481 | + struct list_head list; /* scheduling list */ | |
15482 | + atomic_t refcnt; | |
15483 | + struct net_device *dev; | |
15484 | + struct net_device_stats stats; | |
15485 | + | |
15486 | + unsigned int carrier; | |
15487 | + | |
15488 | + wait_queue_head_t waiting_to_free; | |
15489 | +} netif_t; | |
15490 | + | |
15491 | +/* | |
15492 | + * Implement our own carrier flag: the network stack's version causes delays | |
15493 | + * when the carrier is re-enabled (in particular, dev_activate() may not | |
15494 | + * immediately be called, which can cause packet loss; also the etherbridge | |
15495 | + * can be rather lazy in activating its port). | |
15496 | + */ | |
15497 | +#define netback_carrier_on(netif) ((netif)->carrier = 1) | |
15498 | +#define netback_carrier_off(netif) ((netif)->carrier = 0) | |
15499 | +#define netback_carrier_ok(netif) ((netif)->carrier) | |
15500 | + | |
15501 | +enum { | |
15502 | + NETBK_DONT_COPY_SKB, | |
15503 | + NETBK_DELAYED_COPY_SKB, | |
15504 | + NETBK_ALWAYS_COPY_SKB, | |
15505 | +}; | |
15506 | + | |
15507 | +extern int netbk_copy_skb_mode; | |
15508 | + | |
15509 | +/* Function pointers into netback accelerator plugin modules */ | |
15510 | +struct netback_accel_hooks { | |
15511 | + struct module *owner; | |
15512 | + int (*probe)(struct xenbus_device *dev); | |
15513 | + int (*remove)(struct xenbus_device *dev); | |
15514 | +}; | |
15515 | + | |
15516 | +/* Structure to track the state of a netback accelerator plugin */ | |
15517 | +struct netback_accelerator { | |
15518 | + struct list_head link; | |
15519 | + int id; | |
15520 | + char *eth_name; | |
15521 | + atomic_t use_count; | |
15522 | + struct netback_accel_hooks *hooks; | |
15523 | +}; | |
15524 | + | |
15525 | +struct backend_info { | |
15526 | + struct xenbus_device *dev; | |
15527 | + netif_t *netif; | |
15528 | + enum xenbus_state frontend_state; | |
15529 | + | |
15530 | + /* State relating to the netback accelerator */ | |
15531 | + void *netback_accel_priv; | |
15532 | + /* The accelerator that this backend is currently using */ | |
15533 | + struct netback_accelerator *accelerator; | |
15534 | +}; | |
15535 | + | |
15536 | +#define NETBACK_ACCEL_VERSION 0x00010001 | |
15537 | + | |
15538 | +/* | |
15539 | + * Connect an accelerator plugin module to netback. Returns zero on | |
15540 | + * success, < 0 on error, > 0 (with highest version number supported) | |
15541 | + * if version mismatch. | |
15542 | + */ | |
15543 | +extern int netback_connect_accelerator(unsigned version, | |
15544 | + int id, const char *eth_name, | |
15545 | + struct netback_accel_hooks *hooks); | |
15546 | +/* Disconnect a previously connected accelerator plugin module */ | |
15547 | +extern void netback_disconnect_accelerator(int id, const char *eth_name); | |
15548 | + | |
15549 | + | |
15550 | +extern | |
15551 | +void netback_probe_accelerators(struct backend_info *be, | |
15552 | + struct xenbus_device *dev); | |
15553 | +extern | |
15554 | +void netback_remove_accelerators(struct backend_info *be, | |
15555 | + struct xenbus_device *dev); | |
15556 | +extern | |
15557 | +void netif_accel_init(void); | |
15558 | + | |
15559 | + | |
15560 | +#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) | |
15561 | +#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) | |
15562 | + | |
15563 | +void netif_disconnect(netif_t *netif); | |
15564 | + | |
15565 | +netif_t *netif_alloc(domid_t domid, unsigned int handle); | |
15566 | +int netif_map(netif_t *netif, unsigned long tx_ring_ref, | |
15567 | + unsigned long rx_ring_ref, unsigned int evtchn); | |
15568 | + | |
15569 | +#define netif_get(_b) (atomic_inc(&(_b)->refcnt)) | |
15570 | +#define netif_put(_b) \ | |
15571 | + do { \ | |
15572 | + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ | |
15573 | + wake_up(&(_b)->waiting_to_free); \ | |
15574 | + } while (0) | |
15575 | + | |
15576 | +void netif_xenbus_init(void); | |
15577 | + | |
15578 | +#define netif_schedulable(netif) \ | |
15579 | + (netif_running((netif)->dev) && netback_carrier_ok(netif)) | |
15580 | + | |
15581 | +void netif_schedule_work(netif_t *netif); | |
15582 | +void netif_deschedule_work(netif_t *netif); | |
15583 | + | |
15584 | +int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); | |
15585 | +struct net_device_stats *netif_be_get_stats(struct net_device *dev); | |
15586 | +irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); | |
15587 | + | |
15588 | +static inline int netbk_can_queue(struct net_device *dev) | |
15589 | +{ | |
15590 | + netif_t *netif = netdev_priv(dev); | |
15591 | + return netif->can_queue; | |
15592 | +} | |
15593 | + | |
15594 | +static inline int netbk_can_sg(struct net_device *dev) | |
15595 | +{ | |
15596 | + netif_t *netif = netdev_priv(dev); | |
15597 | + return netif->features & NETIF_F_SG; | |
15598 | +} | |
15599 | + | |
15600 | +#endif /* __NETIF__BACKEND__COMMON_H__ */ | |
15601 | Index: head-2008-11-25/drivers/xen/netback/interface.c | |
15602 | =================================================================== | |
15603 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
15604 | +++ head-2008-11-25/drivers/xen/netback/interface.c 2007-06-12 13:13:45.000000000 +0200 | |
15605 | @@ -0,0 +1,336 @@ | |
15606 | +/****************************************************************************** | |
15607 | + * arch/xen/drivers/netif/backend/interface.c | |
15608 | + * | |
15609 | + * Network-device interface management. | |
15610 | + * | |
15611 | + * Copyright (c) 2004-2005, Keir Fraser | |
15612 | + * | |
15613 | + * This program is free software; you can redistribute it and/or | |
15614 | + * modify it under the terms of the GNU General Public License version 2 | |
15615 | + * as published by the Free Software Foundation; or, when distributed | |
15616 | + * separately from the Linux kernel or incorporated into other | |
15617 | + * software packages, subject to the following license: | |
15618 | + * | |
15619 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15620 | + * of this source file (the "Software"), to deal in the Software without | |
15621 | + * restriction, including without limitation the rights to use, copy, modify, | |
15622 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
15623 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
15624 | + * the following conditions: | |
15625 | + * | |
15626 | + * The above copyright notice and this permission notice shall be included in | |
15627 | + * all copies or substantial portions of the Software. | |
15628 | + * | |
15629 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15630 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15631 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
15632 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
15633 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
15634 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
15635 | + * IN THE SOFTWARE. | |
15636 | + */ | |
15637 | + | |
15638 | +#include "common.h" | |
15639 | +#include <linux/ethtool.h> | |
15640 | +#include <linux/rtnetlink.h> | |
15641 | + | |
15642 | +/* | |
15643 | + * Module parameter 'queue_length': | |
15644 | + * | |
15645 | + * Enables queuing in the network stack when a client has run out of receive | |
15646 | + * descriptors. Although this feature can improve receive bandwidth by avoiding | |
15647 | + * packet loss, it can also result in packets sitting in the 'tx_queue' for | |
15648 | + * unbounded time. This is bad if those packets hold onto foreign resources. | |
15649 | + * For example, consider a packet that holds onto resources belonging to the | |
15650 | + * guest for which it is queued (e.g., packet received on vif1.0, destined for | |
15651 | + * vif1.1 which is not activated in the guest): in this situation the guest | |
15652 | + * will never be destroyed, unless vif1.1 is taken down. To avoid this, we | |
15653 | + * run a timer (tx_queue_timeout) to drain the queue when the interface is | |
15654 | + * blocked. | |
15655 | + */ | |
15656 | +static unsigned long netbk_queue_length = 32; | |
15657 | +module_param_named(queue_length, netbk_queue_length, ulong, 0); | |
15658 | + | |
15659 | +static void __netif_up(netif_t *netif) | |
15660 | +{ | |
15661 | + enable_irq(netif->irq); | |
15662 | + netif_schedule_work(netif); | |
15663 | +} | |
15664 | + | |
15665 | +static void __netif_down(netif_t *netif) | |
15666 | +{ | |
15667 | + disable_irq(netif->irq); | |
15668 | + netif_deschedule_work(netif); | |
15669 | +} | |
15670 | + | |
15671 | +static int net_open(struct net_device *dev) | |
15672 | +{ | |
15673 | + netif_t *netif = netdev_priv(dev); | |
15674 | + if (netback_carrier_ok(netif)) { | |
15675 | + __netif_up(netif); | |
15676 | + netif_start_queue(dev); | |
15677 | + } | |
15678 | + return 0; | |
15679 | +} | |
15680 | + | |
15681 | +static int net_close(struct net_device *dev) | |
15682 | +{ | |
15683 | + netif_t *netif = netdev_priv(dev); | |
15684 | + if (netback_carrier_ok(netif)) | |
15685 | + __netif_down(netif); | |
15686 | + netif_stop_queue(dev); | |
15687 | + return 0; | |
15688 | +} | |
15689 | + | |
15690 | +static int netbk_change_mtu(struct net_device *dev, int mtu) | |
15691 | +{ | |
15692 | + int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; | |
15693 | + | |
15694 | + if (mtu > max) | |
15695 | + return -EINVAL; | |
15696 | + dev->mtu = mtu; | |
15697 | + return 0; | |
15698 | +} | |
15699 | + | |
15700 | +static int netbk_set_sg(struct net_device *dev, u32 data) | |
15701 | +{ | |
15702 | + if (data) { | |
15703 | + netif_t *netif = netdev_priv(dev); | |
15704 | + | |
15705 | + if (!(netif->features & NETIF_F_SG)) | |
15706 | + return -ENOSYS; | |
15707 | + } | |
15708 | + | |
15709 | + return ethtool_op_set_sg(dev, data); | |
15710 | +} | |
15711 | + | |
15712 | +static int netbk_set_tso(struct net_device *dev, u32 data) | |
15713 | +{ | |
15714 | + if (data) { | |
15715 | + netif_t *netif = netdev_priv(dev); | |
15716 | + | |
15717 | + if (!(netif->features & NETIF_F_TSO)) | |
15718 | + return -ENOSYS; | |
15719 | + } | |
15720 | + | |
15721 | + return ethtool_op_set_tso(dev, data); | |
15722 | +} | |
15723 | + | |
15724 | +static struct ethtool_ops network_ethtool_ops = | |
15725 | +{ | |
15726 | + .get_tx_csum = ethtool_op_get_tx_csum, | |
15727 | + .set_tx_csum = ethtool_op_set_tx_csum, | |
15728 | + .get_sg = ethtool_op_get_sg, | |
15729 | + .set_sg = netbk_set_sg, | |
15730 | + .get_tso = ethtool_op_get_tso, | |
15731 | + .set_tso = netbk_set_tso, | |
15732 | + .get_link = ethtool_op_get_link, | |
15733 | +}; | |
15734 | + | |
15735 | +netif_t *netif_alloc(domid_t domid, unsigned int handle) | |
15736 | +{ | |
15737 | + int err = 0; | |
15738 | + struct net_device *dev; | |
15739 | + netif_t *netif; | |
15740 | + char name[IFNAMSIZ] = {}; | |
15741 | + | |
15742 | + snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); | |
15743 | + dev = alloc_netdev(sizeof(netif_t), name, ether_setup); | |
15744 | + if (dev == NULL) { | |
15745 | + DPRINTK("Could not create netif: out of memory\n"); | |
15746 | + return ERR_PTR(-ENOMEM); | |
15747 | + } | |
15748 | + | |
15749 | + netif = netdev_priv(dev); | |
15750 | + memset(netif, 0, sizeof(*netif)); | |
15751 | + netif->domid = domid; | |
15752 | + netif->handle = handle; | |
15753 | + atomic_set(&netif->refcnt, 1); | |
15754 | + init_waitqueue_head(&netif->waiting_to_free); | |
15755 | + netif->dev = dev; | |
15756 | + | |
15757 | + netback_carrier_off(netif); | |
15758 | + | |
15759 | + netif->credit_bytes = netif->remaining_credit = ~0UL; | |
15760 | + netif->credit_usec = 0UL; | |
15761 | + init_timer(&netif->credit_timeout); | |
15762 | + /* Initialize 'expires' now: it's used to track the credit window. */ | |
15763 | + netif->credit_timeout.expires = jiffies; | |
15764 | + | |
15765 | + init_timer(&netif->tx_queue_timeout); | |
15766 | + | |
15767 | + dev->hard_start_xmit = netif_be_start_xmit; | |
15768 | + dev->get_stats = netif_be_get_stats; | |
15769 | + dev->open = net_open; | |
15770 | + dev->stop = net_close; | |
15771 | + dev->change_mtu = netbk_change_mtu; | |
15772 | + dev->features = NETIF_F_IP_CSUM; | |
15773 | + | |
15774 | + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); | |
15775 | + | |
15776 | + dev->tx_queue_len = netbk_queue_length; | |
15777 | + | |
15778 | + /* | |
15779 | + * Initialise a dummy MAC address. We choose the numerically | |
15780 | + * largest non-broadcast address to prevent the address getting | |
15781 | + * stolen by an Ethernet bridge for STP purposes. | |
15782 | + * (FE:FF:FF:FF:FF:FF) | |
15783 | + */ | |
15784 | + memset(dev->dev_addr, 0xFF, ETH_ALEN); | |
15785 | + dev->dev_addr[0] &= ~0x01; | |
15786 | + | |
15787 | + rtnl_lock(); | |
15788 | + err = register_netdevice(dev); | |
15789 | + rtnl_unlock(); | |
15790 | + if (err) { | |
15791 | + DPRINTK("Could not register new net device %s: err=%d\n", | |
15792 | + dev->name, err); | |
15793 | + free_netdev(dev); | |
15794 | + return ERR_PTR(err); | |
15795 | + } | |
15796 | + | |
15797 | + DPRINTK("Successfully created netif\n"); | |
15798 | + return netif; | |
15799 | +} | |
15800 | + | |
15801 | +static int map_frontend_pages( | |
15802 | + netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) | |
15803 | +{ | |
15804 | + struct gnttab_map_grant_ref op; | |
15805 | + | |
15806 | + gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr, | |
15807 | + GNTMAP_host_map, tx_ring_ref, netif->domid); | |
15808 | + | |
15809 | + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | |
15810 | + BUG(); | |
15811 | + | |
15812 | + if (op.status) { | |
15813 | + DPRINTK(" Gnttab failure mapping tx_ring_ref!\n"); | |
15814 | + return op.status; | |
15815 | + } | |
15816 | + | |
15817 | + netif->tx_shmem_ref = tx_ring_ref; | |
15818 | + netif->tx_shmem_handle = op.handle; | |
15819 | + | |
15820 | + gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr, | |
15821 | + GNTMAP_host_map, rx_ring_ref, netif->domid); | |
15822 | + | |
15823 | + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | |
15824 | + BUG(); | |
15825 | + | |
15826 | + if (op.status) { | |
15827 | + DPRINTK(" Gnttab failure mapping rx_ring_ref!\n"); | |
15828 | + return op.status; | |
15829 | + } | |
15830 | + | |
15831 | + netif->rx_shmem_ref = rx_ring_ref; | |
15832 | + netif->rx_shmem_handle = op.handle; | |
15833 | + | |
15834 | + return 0; | |
15835 | +} | |
15836 | + | |
15837 | +static void unmap_frontend_pages(netif_t *netif) | |
15838 | +{ | |
15839 | + struct gnttab_unmap_grant_ref op; | |
15840 | + | |
15841 | + gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr, | |
15842 | + GNTMAP_host_map, netif->tx_shmem_handle); | |
15843 | + | |
15844 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | |
15845 | + BUG(); | |
15846 | + | |
15847 | + gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr, | |
15848 | + GNTMAP_host_map, netif->rx_shmem_handle); | |
15849 | + | |
15850 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | |
15851 | + BUG(); | |
15852 | +} | |
15853 | + | |
15854 | +int netif_map(netif_t *netif, unsigned long tx_ring_ref, | |
15855 | + unsigned long rx_ring_ref, unsigned int evtchn) | |
15856 | +{ | |
15857 | + int err = -ENOMEM; | |
15858 | + netif_tx_sring_t *txs; | |
15859 | + netif_rx_sring_t *rxs; | |
15860 | + | |
15861 | + /* Already connected through? */ | |
15862 | + if (netif->irq) | |
15863 | + return 0; | |
15864 | + | |
15865 | + netif->tx_comms_area = alloc_vm_area(PAGE_SIZE); | |
15866 | + if (netif->tx_comms_area == NULL) | |
15867 | + return -ENOMEM; | |
15868 | + netif->rx_comms_area = alloc_vm_area(PAGE_SIZE); | |
15869 | + if (netif->rx_comms_area == NULL) | |
15870 | + goto err_rx; | |
15871 | + | |
15872 | + err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref); | |
15873 | + if (err) | |
15874 | + goto err_map; | |
15875 | + | |
15876 | + err = bind_interdomain_evtchn_to_irqhandler( | |
15877 | + netif->domid, evtchn, netif_be_int, 0, | |
15878 | + netif->dev->name, netif); | |
15879 | + if (err < 0) | |
15880 | + goto err_hypervisor; | |
15881 | + netif->irq = err; | |
15882 | + disable_irq(netif->irq); | |
15883 | + | |
15884 | + txs = (netif_tx_sring_t *)netif->tx_comms_area->addr; | |
15885 | + BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); | |
15886 | + | |
15887 | + rxs = (netif_rx_sring_t *) | |
15888 | + ((char *)netif->rx_comms_area->addr); | |
15889 | + BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); | |
15890 | + | |
15891 | + netif->rx_req_cons_peek = 0; | |
15892 | + | |
15893 | + netif_get(netif); | |
15894 | + | |
15895 | + rtnl_lock(); | |
15896 | + netback_carrier_on(netif); | |
15897 | + if (netif_running(netif->dev)) | |
15898 | + __netif_up(netif); | |
15899 | + rtnl_unlock(); | |
15900 | + | |
15901 | + return 0; | |
15902 | +err_hypervisor: | |
15903 | + unmap_frontend_pages(netif); | |
15904 | +err_map: | |
15905 | + free_vm_area(netif->rx_comms_area); | |
15906 | +err_rx: | |
15907 | + free_vm_area(netif->tx_comms_area); | |
15908 | + return err; | |
15909 | +} | |
15910 | + | |
15911 | +void netif_disconnect(netif_t *netif) | |
15912 | +{ | |
15913 | + if (netback_carrier_ok(netif)) { | |
15914 | + rtnl_lock(); | |
15915 | + netback_carrier_off(netif); | |
15916 | + netif_carrier_off(netif->dev); /* discard queued packets */ | |
15917 | + if (netif_running(netif->dev)) | |
15918 | + __netif_down(netif); | |
15919 | + rtnl_unlock(); | |
15920 | + netif_put(netif); | |
15921 | + } | |
15922 | + | |
15923 | + atomic_dec(&netif->refcnt); | |
15924 | + wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); | |
15925 | + | |
15926 | + del_timer_sync(&netif->credit_timeout); | |
15927 | + del_timer_sync(&netif->tx_queue_timeout); | |
15928 | + | |
15929 | + if (netif->irq) | |
15930 | + unbind_from_irqhandler(netif->irq, netif); | |
15931 | + | |
15932 | + unregister_netdev(netif->dev); | |
15933 | + | |
15934 | + if (netif->tx.sring) { | |
15935 | + unmap_frontend_pages(netif); | |
15936 | + free_vm_area(netif->tx_comms_area); | |
15937 | + free_vm_area(netif->rx_comms_area); | |
15938 | + } | |
15939 | + | |
15940 | + free_netdev(netif->dev); | |
15941 | +} | |
15942 | Index: head-2008-11-25/drivers/xen/netback/loopback.c | |
15943 | =================================================================== | |
15944 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
15945 | +++ head-2008-11-25/drivers/xen/netback/loopback.c 2007-08-06 15:10:49.000000000 +0200 | |
15946 | @@ -0,0 +1,324 @@ | |
15947 | +/****************************************************************************** | |
15948 | + * netback/loopback.c | |
15949 | + * | |
15950 | + * A two-interface loopback device to emulate a local netfront-netback | |
15951 | + * connection. This ensures that local packet delivery looks identical | |
15952 | + * to inter-domain delivery. Most importantly, packets delivered locally | |
15953 | + * originating from other domains will get *copied* when they traverse this | |
15954 | + * driver. This prevents unbounded delays in socket-buffer queues from | |
15955 | + * causing the netback driver to "seize up". | |
15956 | + * | |
15957 | + * This driver creates a symmetric pair of loopback interfaces with names | |
15958 | + * vif0.0 and veth0. The intention is that 'vif0.0' is bound to an Ethernet | |
15959 | + * bridge, just like a proper netback interface, while a local IP interface | |
15960 | + * is configured on 'veth0'. | |
15961 | + * | |
15962 | + * As with a real netback interface, vif0.0 is configured with a suitable | |
15963 | + * dummy MAC address. No default is provided for veth0: a reasonable strategy | |
15964 | + * is to transfer eth0's MAC address to veth0, and give eth0 a dummy address | |
15965 | + * (to avoid confusing the Etherbridge). | |
15966 | + * | |
15967 | + * Copyright (c) 2005 K A Fraser | |
15968 | + * | |
15969 | + * This program is free software; you can redistribute it and/or | |
15970 | + * modify it under the terms of the GNU General Public License version 2 | |
15971 | + * as published by the Free Software Foundation; or, when distributed | |
15972 | + * separately from the Linux kernel or incorporated into other | |
15973 | + * software packages, subject to the following license: | |
15974 | + * | |
15975 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15976 | + * of this source file (the "Software"), to deal in the Software without | |
15977 | + * restriction, including without limitation the rights to use, copy, modify, | |
15978 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
15979 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
15980 | + * the following conditions: | |
15981 | + * | |
15982 | + * The above copyright notice and this permission notice shall be included in | |
15983 | + * all copies or substantial portions of the Software. | |
15984 | + * | |
15985 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15986 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15987 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
15988 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
15989 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
15990 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
15991 | + * IN THE SOFTWARE. | |
15992 | + */ | |
15993 | + | |
15994 | +#include <linux/module.h> | |
15995 | +#include <linux/netdevice.h> | |
15996 | +#include <linux/inetdevice.h> | |
15997 | +#include <linux/etherdevice.h> | |
15998 | +#include <linux/skbuff.h> | |
15999 | +#include <linux/ethtool.h> | |
16000 | +#include <net/dst.h> | |
16001 | +#include <net/xfrm.h> /* secpath_reset() */ | |
16002 | +#include <asm/hypervisor.h> /* is_initial_xendomain() */ | |
16003 | + | |
16004 | +static int nloopbacks = -1; | |
16005 | +module_param(nloopbacks, int, 0); | |
16006 | +MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create"); | |
16007 | + | |
16008 | +struct net_private { | |
16009 | + struct net_device *loopback_dev; | |
16010 | + struct net_device_stats stats; | |
16011 | +}; | |
16012 | + | |
16013 | +static int loopback_open(struct net_device *dev) | |
16014 | +{ | |
16015 | + struct net_private *np = netdev_priv(dev); | |
16016 | + memset(&np->stats, 0, sizeof(np->stats)); | |
16017 | + netif_start_queue(dev); | |
16018 | + return 0; | |
16019 | +} | |
16020 | + | |
16021 | +static int loopback_close(struct net_device *dev) | |
16022 | +{ | |
16023 | + netif_stop_queue(dev); | |
16024 | + return 0; | |
16025 | +} | |
16026 | + | |
16027 | +#ifdef CONFIG_X86 | |
16028 | +static int is_foreign(unsigned long pfn) | |
16029 | +{ | |
16030 | + /* NB. Play it safe for auto-translation mode. */ | |
16031 | + return (xen_feature(XENFEAT_auto_translated_physmap) || | |
16032 | + (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT)); | |
16033 | +} | |
16034 | +#else | |
16035 | +/* How to detect a foreign mapping? Play it safe. */ | |
16036 | +#define is_foreign(pfn) (1) | |
16037 | +#endif | |
16038 | + | |
16039 | +static int skb_remove_foreign_references(struct sk_buff *skb) | |
16040 | +{ | |
16041 | + struct page *page; | |
16042 | + unsigned long pfn; | |
16043 | + int i, off; | |
16044 | + char *vaddr; | |
16045 | + | |
16046 | + BUG_ON(skb_shinfo(skb)->frag_list); | |
16047 | + | |
16048 | + if (skb_cloned(skb) && | |
16049 | + unlikely(pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | |
16050 | + return 0; | |
16051 | + | |
16052 | + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
16053 | + pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page); | |
16054 | + if (!is_foreign(pfn)) | |
16055 | + continue; | |
16056 | + | |
16057 | + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); | |
16058 | + if (unlikely(!page)) | |
16059 | + return 0; | |
16060 | + | |
16061 | + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); | |
16062 | + off = skb_shinfo(skb)->frags[i].page_offset; | |
16063 | + memcpy(page_address(page) + off, | |
16064 | + vaddr + off, | |
16065 | + skb_shinfo(skb)->frags[i].size); | |
16066 | + kunmap_skb_frag(vaddr); | |
16067 | + | |
16068 | + put_page(skb_shinfo(skb)->frags[i].page); | |
16069 | + skb_shinfo(skb)->frags[i].page = page; | |
16070 | + } | |
16071 | + | |
16072 | + return 1; | |
16073 | +} | |
16074 | + | |
16075 | +static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev) | |
16076 | +{ | |
16077 | + struct net_private *np = netdev_priv(dev); | |
16078 | + | |
16079 | + if (!skb_remove_foreign_references(skb)) { | |
16080 | + np->stats.tx_dropped++; | |
16081 | + dev_kfree_skb(skb); | |
16082 | + return 0; | |
16083 | + } | |
16084 | + | |
16085 | + dst_release(skb->dst); | |
16086 | + skb->dst = NULL; | |
16087 | + | |
16088 | + skb_orphan(skb); | |
16089 | + | |
16090 | + np->stats.tx_bytes += skb->len; | |
16091 | + np->stats.tx_packets++; | |
16092 | + | |
16093 | + /* Switch to loopback context. */ | |
16094 | + dev = np->loopback_dev; | |
16095 | + np = netdev_priv(dev); | |
16096 | + | |
16097 | + np->stats.rx_bytes += skb->len; | |
16098 | + np->stats.rx_packets++; | |
16099 | + | |
16100 | + if (skb->ip_summed == CHECKSUM_HW) { | |
16101 | + /* Defer checksum calculation. */ | |
16102 | + skb->proto_csum_blank = 1; | |
16103 | + /* Must be a local packet: assert its integrity. */ | |
16104 | + skb->proto_data_valid = 1; | |
16105 | + } | |
16106 | + | |
16107 | + skb->ip_summed = skb->proto_data_valid ? | |
16108 | + CHECKSUM_UNNECESSARY : CHECKSUM_NONE; | |
16109 | + | |
16110 | + skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */ | |
16111 | + skb->protocol = eth_type_trans(skb, dev); | |
16112 | + skb->dev = dev; | |
16113 | + dev->last_rx = jiffies; | |
16114 | + | |
16115 | + /* Flush netfilter context: rx'ed skbuffs not expected to have any. */ | |
16116 | + nf_reset(skb); | |
16117 | + secpath_reset(skb); | |
16118 | + | |
16119 | + netif_rx(skb); | |
16120 | + | |
16121 | + return 0; | |
16122 | +} | |
16123 | + | |
16124 | +static struct net_device_stats *loopback_get_stats(struct net_device *dev) | |
16125 | +{ | |
16126 | + struct net_private *np = netdev_priv(dev); | |
16127 | + return &np->stats; | |
16128 | +} | |
16129 | + | |
16130 | +static struct ethtool_ops network_ethtool_ops = | |
16131 | +{ | |
16132 | + .get_tx_csum = ethtool_op_get_tx_csum, | |
16133 | + .set_tx_csum = ethtool_op_set_tx_csum, | |
16134 | + .get_sg = ethtool_op_get_sg, | |
16135 | + .set_sg = ethtool_op_set_sg, | |
16136 | + .get_tso = ethtool_op_get_tso, | |
16137 | + .set_tso = ethtool_op_set_tso, | |
16138 | + .get_link = ethtool_op_get_link, | |
16139 | +}; | |
16140 | + | |
16141 | +/* | |
16142 | + * Nothing to do here. Virtual interface is point-to-point and the | |
16143 | + * physical interface is probably promiscuous anyway. | |
16144 | + */ | |
16145 | +static void loopback_set_multicast_list(struct net_device *dev) | |
16146 | +{ | |
16147 | +} | |
16148 | + | |
16149 | +static void loopback_construct(struct net_device *dev, struct net_device *lo) | |
16150 | +{ | |
16151 | + struct net_private *np = netdev_priv(dev); | |
16152 | + | |
16153 | + np->loopback_dev = lo; | |
16154 | + | |
16155 | + dev->open = loopback_open; | |
16156 | + dev->stop = loopback_close; | |
16157 | + dev->hard_start_xmit = loopback_start_xmit; | |
16158 | + dev->get_stats = loopback_get_stats; | |
16159 | + dev->set_multicast_list = loopback_set_multicast_list; | |
16160 | + dev->change_mtu = NULL; /* allow arbitrary mtu */ | |
16161 | + | |
16162 | + dev->tx_queue_len = 0; | |
16163 | + | |
16164 | + dev->features = (NETIF_F_HIGHDMA | | |
16165 | + NETIF_F_LLTX | | |
16166 | + NETIF_F_TSO | | |
16167 | + NETIF_F_SG | | |
16168 | + NETIF_F_IP_CSUM); | |
16169 | + | |
16170 | + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); | |
16171 | + | |
16172 | + /* | |
16173 | + * We do not set a jumbo MTU on the interface. Otherwise the network | |
16174 | + * stack will try to send large packets that will get dropped by the | |
16175 | + * Ethernet bridge (unless the physical Ethernet interface is | |
16176 | + * configured to transfer jumbo packets). If a larger MTU is desired | |
16177 | + * then the system administrator can specify it using the 'ifconfig' | |
16178 | + * command. | |
16179 | + */ | |
16180 | + /*dev->mtu = 16*1024;*/ | |
16181 | +} | |
16182 | + | |
16183 | +static int __init make_loopback(int i) | |
16184 | +{ | |
16185 | + struct net_device *dev1, *dev2; | |
16186 | + char dev_name[IFNAMSIZ]; | |
16187 | + int err = -ENOMEM; | |
16188 | + | |
16189 | + sprintf(dev_name, "vif0.%d", i); | |
16190 | + dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup); | |
16191 | + if (!dev1) | |
16192 | + return err; | |
16193 | + | |
16194 | + sprintf(dev_name, "veth%d", i); | |
16195 | + dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup); | |
16196 | + if (!dev2) | |
16197 | + goto fail_netdev2; | |
16198 | + | |
16199 | + loopback_construct(dev1, dev2); | |
16200 | + loopback_construct(dev2, dev1); | |
16201 | + | |
16202 | + /* | |
16203 | + * Initialise a dummy MAC address for the 'dummy backend' interface. We | |
16204 | + * choose the numerically largest non-broadcast address to prevent the | |
16205 | + * address getting stolen by an Ethernet bridge for STP purposes. | |
16206 | + */ | |
16207 | + memset(dev1->dev_addr, 0xFF, ETH_ALEN); | |
16208 | + dev1->dev_addr[0] &= ~0x01; | |
16209 | + | |
16210 | + if ((err = register_netdev(dev1)) != 0) | |
16211 | + goto fail; | |
16212 | + | |
16213 | + if ((err = register_netdev(dev2)) != 0) { | |
16214 | + unregister_netdev(dev1); | |
16215 | + goto fail; | |
16216 | + } | |
16217 | + | |
16218 | + return 0; | |
16219 | + | |
16220 | + fail: | |
16221 | + free_netdev(dev2); | |
16222 | + fail_netdev2: | |
16223 | + free_netdev(dev1); | |
16224 | + return err; | |
16225 | +} | |
16226 | + | |
16227 | +static void __exit clean_loopback(int i) | |
16228 | +{ | |
16229 | + struct net_device *dev1, *dev2; | |
16230 | + char dev_name[IFNAMSIZ]; | |
16231 | + | |
16232 | + sprintf(dev_name, "vif0.%d", i); | |
16233 | + dev1 = dev_get_by_name(dev_name); | |
16234 | + sprintf(dev_name, "veth%d", i); | |
16235 | + dev2 = dev_get_by_name(dev_name); | |
16236 | + if (dev1 && dev2) { | |
16237 | + unregister_netdev(dev2); | |
16238 | + unregister_netdev(dev1); | |
16239 | + free_netdev(dev2); | |
16240 | + free_netdev(dev1); | |
16241 | + } | |
16242 | +} | |
16243 | + | |
16244 | +static int __init loopback_init(void) | |
16245 | +{ | |
16246 | + int i, err = 0; | |
16247 | + | |
16248 | + if (nloopbacks == -1) | |
16249 | + nloopbacks = is_initial_xendomain() ? 4 : 0; | |
16250 | + | |
16251 | + for (i = 0; i < nloopbacks; i++) | |
16252 | + if ((err = make_loopback(i)) != 0) | |
16253 | + break; | |
16254 | + | |
16255 | + return err; | |
16256 | +} | |
16257 | + | |
16258 | +module_init(loopback_init); | |
16259 | + | |
16260 | +static void __exit loopback_exit(void) | |
16261 | +{ | |
16262 | + int i; | |
16263 | + | |
16264 | + for (i = nloopbacks; i-- > 0; ) | |
16265 | + clean_loopback(i); | |
16266 | +} | |
16267 | + | |
16268 | +module_exit(loopback_exit); | |
16269 | + | |
16270 | +MODULE_LICENSE("Dual BSD/GPL"); | |
16271 | Index: head-2008-11-25/drivers/xen/netback/netback.c | |
16272 | =================================================================== | |
16273 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
16274 | +++ head-2008-11-25/drivers/xen/netback/netback.c 2008-02-20 09:32:49.000000000 +0100 | |
16275 | @@ -0,0 +1,1614 @@ | |
16276 | +/****************************************************************************** | |
16277 | + * drivers/xen/netback/netback.c | |
16278 | + * | |
16279 | + * Back-end of the driver for virtual network devices. This portion of the | |
16280 | + * driver exports a 'unified' network-device interface that can be accessed | |
16281 | + * by any operating system that implements a compatible front end. A | |
16282 | + * reference front-end implementation can be found in: | |
16283 | + * drivers/xen/netfront/netfront.c | |
16284 | + * | |
16285 | + * Copyright (c) 2002-2005, K A Fraser | |
16286 | + * | |
16287 | + * This program is free software; you can redistribute it and/or | |
16288 | + * modify it under the terms of the GNU General Public License version 2 | |
16289 | + * as published by the Free Software Foundation; or, when distributed | |
16290 | + * separately from the Linux kernel or incorporated into other | |
16291 | + * software packages, subject to the following license: | |
16292 | + * | |
16293 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
16294 | + * of this source file (the "Software"), to deal in the Software without | |
16295 | + * restriction, including without limitation the rights to use, copy, modify, | |
16296 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
16297 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
16298 | + * the following conditions: | |
16299 | + * | |
16300 | + * The above copyright notice and this permission notice shall be included in | |
16301 | + * all copies or substantial portions of the Software. | |
16302 | + * | |
16303 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16304 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16305 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
16306 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
16307 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
16308 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
16309 | + * IN THE SOFTWARE. | |
16310 | + */ | |
16311 | + | |
16312 | +#include "common.h" | |
16313 | +#include <xen/balloon.h> | |
16314 | +#include <xen/interface/memory.h> | |
16315 | + | |
16316 | +/*define NETBE_DEBUG_INTERRUPT*/ | |
16317 | + | |
16318 | +/* extra field used in struct page */ | |
16319 | +#define netif_page_index(pg) (*(long *)&(pg)->mapping) | |
16320 | + | |
16321 | +struct netbk_rx_meta { | |
16322 | + skb_frag_t frag; | |
16323 | + int id; | |
16324 | + u8 copy:1; | |
16325 | +}; | |
16326 | + | |
16327 | +struct netbk_tx_pending_inuse { | |
16328 | + struct list_head list; | |
16329 | + unsigned long alloc_time; | |
16330 | +}; | |
16331 | + | |
16332 | +static void netif_idx_release(u16 pending_idx); | |
16333 | +static void netif_page_release(struct page *page); | |
16334 | +static void make_tx_response(netif_t *netif, | |
16335 | + netif_tx_request_t *txp, | |
16336 | + s8 st); | |
16337 | +static netif_rx_response_t *make_rx_response(netif_t *netif, | |
16338 | + u16 id, | |
16339 | + s8 st, | |
16340 | + u16 offset, | |
16341 | + u16 size, | |
16342 | + u16 flags); | |
16343 | + | |
16344 | +static void net_tx_action(unsigned long unused); | |
16345 | +static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); | |
16346 | + | |
16347 | +static void net_rx_action(unsigned long unused); | |
16348 | +static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0); | |
16349 | + | |
16350 | +static struct timer_list net_timer; | |
16351 | +static struct timer_list netbk_tx_pending_timer; | |
16352 | + | |
16353 | +#define MAX_PENDING_REQS 256 | |
16354 | + | |
16355 | +static struct sk_buff_head rx_queue; | |
16356 | + | |
16357 | +static struct page **mmap_pages; | |
16358 | +static inline unsigned long idx_to_pfn(unsigned int idx) | |
16359 | +{ | |
16360 | + return page_to_pfn(mmap_pages[idx]); | |
16361 | +} | |
16362 | + | |
16363 | +static inline unsigned long idx_to_kaddr(unsigned int idx) | |
16364 | +{ | |
16365 | + return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx)); | |
16366 | +} | |
16367 | + | |
16368 | +#define PKT_PROT_LEN 64 | |
16369 | + | |
16370 | +static struct pending_tx_info { | |
16371 | + netif_tx_request_t req; | |
16372 | + netif_t *netif; | |
16373 | +} pending_tx_info[MAX_PENDING_REQS]; | |
16374 | +static u16 pending_ring[MAX_PENDING_REQS]; | |
16375 | +typedef unsigned int PEND_RING_IDX; | |
16376 | +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) | |
16377 | +static PEND_RING_IDX pending_prod, pending_cons; | |
16378 | +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) | |
16379 | + | |
16380 | +/* Freed TX SKBs get batched on this ring before return to pending_ring. */ | |
16381 | +static u16 dealloc_ring[MAX_PENDING_REQS]; | |
16382 | +static PEND_RING_IDX dealloc_prod, dealloc_cons; | |
16383 | + | |
16384 | +/* Doubly-linked list of in-use pending entries. */ | |
16385 | +static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; | |
16386 | +static LIST_HEAD(pending_inuse_head); | |
16387 | + | |
16388 | +static struct sk_buff_head tx_queue; | |
16389 | + | |
16390 | +static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; | |
16391 | +static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; | |
16392 | +static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; | |
16393 | + | |
16394 | +static struct list_head net_schedule_list; | |
16395 | +static spinlock_t net_schedule_list_lock; | |
16396 | + | |
16397 | +#define MAX_MFN_ALLOC 64 | |
16398 | +static unsigned long mfn_list[MAX_MFN_ALLOC]; | |
16399 | +static unsigned int alloc_index = 0; | |
16400 | + | |
16401 | +/* Setting this allows the safe use of this driver without netloop. */ | |
16402 | +static int MODPARM_copy_skb = 1; | |
16403 | +module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); | |
16404 | +MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); | |
16405 | + | |
16406 | +int netbk_copy_skb_mode; | |
16407 | + | |
16408 | +static inline unsigned long alloc_mfn(void) | |
16409 | +{ | |
16410 | + BUG_ON(alloc_index == 0); | |
16411 | + return mfn_list[--alloc_index]; | |
16412 | +} | |
16413 | + | |
16414 | +static int check_mfn(int nr) | |
16415 | +{ | |
16416 | + struct xen_memory_reservation reservation = { | |
16417 | + .extent_order = 0, | |
16418 | + .domid = DOMID_SELF | |
16419 | + }; | |
16420 | + int rc; | |
16421 | + | |
16422 | + if (likely(alloc_index >= nr)) | |
16423 | + return 0; | |
16424 | + | |
16425 | + set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index); | |
16426 | + reservation.nr_extents = MAX_MFN_ALLOC - alloc_index; | |
16427 | + rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation); | |
16428 | + if (likely(rc > 0)) | |
16429 | + alloc_index += rc; | |
16430 | + | |
16431 | + return alloc_index >= nr ? 0 : -ENOMEM; | |
16432 | +} | |
16433 | + | |
16434 | +static inline void maybe_schedule_tx_action(void) | |
16435 | +{ | |
16436 | + smp_mb(); | |
16437 | + if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && | |
16438 | + !list_empty(&net_schedule_list)) | |
16439 | + tasklet_schedule(&net_tx_tasklet); | |
16440 | +} | |
16441 | + | |
16442 | +static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) | |
16443 | +{ | |
16444 | + struct skb_shared_info *ninfo; | |
16445 | + struct sk_buff *nskb; | |
16446 | + unsigned long offset; | |
16447 | + int ret; | |
16448 | + int len; | |
16449 | + int headlen; | |
16450 | + | |
16451 | + BUG_ON(skb_shinfo(skb)->frag_list != NULL); | |
16452 | + | |
16453 | + nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN); | |
16454 | + if (unlikely(!nskb)) | |
16455 | + goto err; | |
16456 | + | |
16457 | + skb_reserve(nskb, 16 + NET_IP_ALIGN); | |
16458 | + headlen = nskb->end - nskb->data; | |
16459 | + if (headlen > skb_headlen(skb)) | |
16460 | + headlen = skb_headlen(skb); | |
16461 | + ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); | |
16462 | + BUG_ON(ret); | |
16463 | + | |
16464 | + ninfo = skb_shinfo(nskb); | |
16465 | + ninfo->gso_size = skb_shinfo(skb)->gso_size; | |
16466 | + ninfo->gso_type = skb_shinfo(skb)->gso_type; | |
16467 | + | |
16468 | + offset = headlen; | |
16469 | + len = skb->len - headlen; | |
16470 | + | |
16471 | + nskb->len = skb->len; | |
16472 | + nskb->data_len = len; | |
16473 | + nskb->truesize += len; | |
16474 | + | |
16475 | + while (len) { | |
16476 | + struct page *page; | |
16477 | + int copy; | |
16478 | + int zero; | |
16479 | + | |
16480 | + if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { | |
16481 | + dump_stack(); | |
16482 | + goto err_free; | |
16483 | + } | |
16484 | + | |
16485 | + copy = len >= PAGE_SIZE ? PAGE_SIZE : len; | |
16486 | + zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; | |
16487 | + | |
16488 | + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); | |
16489 | + if (unlikely(!page)) | |
16490 | + goto err_free; | |
16491 | + | |
16492 | + ret = skb_copy_bits(skb, offset, page_address(page), copy); | |
16493 | + BUG_ON(ret); | |
16494 | + | |
16495 | + ninfo->frags[ninfo->nr_frags].page = page; | |
16496 | + ninfo->frags[ninfo->nr_frags].page_offset = 0; | |
16497 | + ninfo->frags[ninfo->nr_frags].size = copy; | |
16498 | + ninfo->nr_frags++; | |
16499 | + | |
16500 | + offset += copy; | |
16501 | + len -= copy; | |
16502 | + } | |
16503 | + | |
16504 | + offset = nskb->data - skb->data; | |
16505 | + | |
16506 | + nskb->h.raw = skb->h.raw + offset; | |
16507 | + nskb->nh.raw = skb->nh.raw + offset; | |
16508 | + nskb->mac.raw = skb->mac.raw + offset; | |
16509 | + | |
16510 | + return nskb; | |
16511 | + | |
16512 | + err_free: | |
16513 | + kfree_skb(nskb); | |
16514 | + err: | |
16515 | + return NULL; | |
16516 | +} | |
16517 | + | |
16518 | +static inline int netbk_max_required_rx_slots(netif_t *netif) | |
16519 | +{ | |
16520 | + if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) | |
16521 | + return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ | |
16522 | + return 1; /* all in one */ | |
16523 | +} | |
16524 | + | |
16525 | +static inline int netbk_queue_full(netif_t *netif) | |
16526 | +{ | |
16527 | + RING_IDX peek = netif->rx_req_cons_peek; | |
16528 | + RING_IDX needed = netbk_max_required_rx_slots(netif); | |
16529 | + | |
16530 | + return ((netif->rx.sring->req_prod - peek) < needed) || | |
16531 | + ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); | |
16532 | +} | |
16533 | + | |
16534 | +static void tx_queue_callback(unsigned long data) | |
16535 | +{ | |
16536 | + netif_t *netif = (netif_t *)data; | |
16537 | + if (netif_schedulable(netif)) | |
16538 | + netif_wake_queue(netif->dev); | |
16539 | +} | |
16540 | + | |
16541 | +int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) | |
16542 | +{ | |
16543 | + netif_t *netif = netdev_priv(dev); | |
16544 | + | |
16545 | + BUG_ON(skb->dev != dev); | |
16546 | + | |
16547 | + /* Drop the packet if the target domain has no receive buffers. */ | |
16548 | + if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) | |
16549 | + goto drop; | |
16550 | + | |
16551 | + /* | |
16552 | + * Copy the packet here if it's destined for a flipping interface | |
16553 | + * but isn't flippable (e.g. extra references to data). | |
16554 | + * XXX For now we also copy skbuffs whose head crosses a page | |
16555 | + * boundary, because netbk_gop_skb can't handle them. | |
16556 | + */ | |
16557 | + if (!netif->copying_receiver || | |
16558 | + ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) { | |
16559 | + struct sk_buff *nskb = netbk_copy_skb(skb); | |
16560 | + if ( unlikely(nskb == NULL) ) | |
16561 | + goto drop; | |
16562 | + /* Copy only the header fields we use in this driver. */ | |
16563 | + nskb->dev = skb->dev; | |
16564 | + nskb->ip_summed = skb->ip_summed; | |
16565 | + nskb->proto_data_valid = skb->proto_data_valid; | |
16566 | + dev_kfree_skb(skb); | |
16567 | + skb = nskb; | |
16568 | + } | |
16569 | + | |
16570 | + netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + | |
16571 | + !!skb_shinfo(skb)->gso_size; | |
16572 | + netif_get(netif); | |
16573 | + | |
16574 | + if (netbk_can_queue(dev) && netbk_queue_full(netif)) { | |
16575 | + netif->rx.sring->req_event = netif->rx_req_cons_peek + | |
16576 | + netbk_max_required_rx_slots(netif); | |
16577 | + mb(); /* request notification /then/ check & stop the queue */ | |
16578 | + if (netbk_queue_full(netif)) { | |
16579 | + netif_stop_queue(dev); | |
16580 | + /* | |
16581 | + * Schedule 500ms timeout to restart the queue, thus | |
16582 | + * ensuring that an inactive queue will be drained. | |
16583 | + * Packets will be immediately be dropped until more | |
16584 | + * receive buffers become available (see | |
16585 | + * netbk_queue_full() check above). | |
16586 | + */ | |
16587 | + netif->tx_queue_timeout.data = (unsigned long)netif; | |
16588 | + netif->tx_queue_timeout.function = tx_queue_callback; | |
16589 | + __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); | |
16590 | + } | |
16591 | + } | |
16592 | + | |
16593 | + skb_queue_tail(&rx_queue, skb); | |
16594 | + tasklet_schedule(&net_rx_tasklet); | |
16595 | + | |
16596 | + return 0; | |
16597 | + | |
16598 | + drop: | |
16599 | + netif->stats.tx_dropped++; | |
16600 | + dev_kfree_skb(skb); | |
16601 | + return 0; | |
16602 | +} | |
16603 | + | |
16604 | +#if 0 | |
16605 | +static void xen_network_done_notify(void) | |
16606 | +{ | |
16607 | + static struct net_device *eth0_dev = NULL; | |
16608 | + if (unlikely(eth0_dev == NULL)) | |
16609 | + eth0_dev = __dev_get_by_name("eth0"); | |
16610 | + netif_rx_schedule(eth0_dev); | |
16611 | +} | |
16612 | +/* | |
16613 | + * Add following to poll() function in NAPI driver (Tigon3 is example): | |
16614 | + * if ( xen_network_done() ) | |
16615 | + * tg3_enable_ints(tp); | |
16616 | + */ | |
16617 | +int xen_network_done(void) | |
16618 | +{ | |
16619 | + return skb_queue_empty(&rx_queue); | |
16620 | +} | |
16621 | +#endif | |
16622 | + | |
16623 | +struct netrx_pending_operations { | |
16624 | + unsigned trans_prod, trans_cons; | |
16625 | + unsigned mmu_prod, mmu_mcl; | |
16626 | + unsigned mcl_prod, mcl_cons; | |
16627 | + unsigned copy_prod, copy_cons; | |
16628 | + unsigned meta_prod, meta_cons; | |
16629 | + mmu_update_t *mmu; | |
16630 | + gnttab_transfer_t *trans; | |
16631 | + gnttab_copy_t *copy; | |
16632 | + multicall_entry_t *mcl; | |
16633 | + struct netbk_rx_meta *meta; | |
16634 | +}; | |
16635 | + | |
16636 | +/* Set up the grant operations for this fragment. If it's a flipping | |
16637 | + interface, we also set up the unmap request from here. */ | |
16638 | +static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, | |
16639 | + int i, struct netrx_pending_operations *npo, | |
16640 | + struct page *page, unsigned long size, | |
16641 | + unsigned long offset) | |
16642 | +{ | |
16643 | + mmu_update_t *mmu; | |
16644 | + gnttab_transfer_t *gop; | |
16645 | + gnttab_copy_t *copy_gop; | |
16646 | + multicall_entry_t *mcl; | |
16647 | + netif_rx_request_t *req; | |
16648 | + unsigned long old_mfn, new_mfn; | |
16649 | + | |
16650 | + old_mfn = virt_to_mfn(page_address(page)); | |
16651 | + | |
16652 | + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); | |
16653 | + if (netif->copying_receiver) { | |
16654 | + /* The fragment needs to be copied rather than | |
16655 | + flipped. */ | |
16656 | + meta->copy = 1; | |
16657 | + copy_gop = npo->copy + npo->copy_prod++; | |
16658 | + copy_gop->flags = GNTCOPY_dest_gref; | |
16659 | + if (PageForeign(page)) { | |
16660 | + struct pending_tx_info *src_pend = | |
16661 | + &pending_tx_info[netif_page_index(page)]; | |
16662 | + copy_gop->source.domid = src_pend->netif->domid; | |
16663 | + copy_gop->source.u.ref = src_pend->req.gref; | |
16664 | + copy_gop->flags |= GNTCOPY_source_gref; | |
16665 | + } else { | |
16666 | + copy_gop->source.domid = DOMID_SELF; | |
16667 | + copy_gop->source.u.gmfn = old_mfn; | |
16668 | + } | |
16669 | + copy_gop->source.offset = offset; | |
16670 | + copy_gop->dest.domid = netif->domid; | |
16671 | + copy_gop->dest.offset = 0; | |
16672 | + copy_gop->dest.u.ref = req->gref; | |
16673 | + copy_gop->len = size; | |
16674 | + } else { | |
16675 | + meta->copy = 0; | |
16676 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
16677 | + new_mfn = alloc_mfn(); | |
16678 | + | |
16679 | + /* | |
16680 | + * Set the new P2M table entry before | |
16681 | + * reassigning the old data page. Heed the | |
16682 | + * comment in pgtable-2level.h:pte_page(). :-) | |
16683 | + */ | |
16684 | + set_phys_to_machine(page_to_pfn(page), new_mfn); | |
16685 | + | |
16686 | + mcl = npo->mcl + npo->mcl_prod++; | |
16687 | + MULTI_update_va_mapping(mcl, | |
16688 | + (unsigned long)page_address(page), | |
16689 | + pfn_pte_ma(new_mfn, PAGE_KERNEL), | |
16690 | + 0); | |
16691 | + | |
16692 | + mmu = npo->mmu + npo->mmu_prod++; | |
16693 | + mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | | |
16694 | + MMU_MACHPHYS_UPDATE; | |
16695 | + mmu->val = page_to_pfn(page); | |
16696 | + } | |
16697 | + | |
16698 | + gop = npo->trans + npo->trans_prod++; | |
16699 | + gop->mfn = old_mfn; | |
16700 | + gop->domid = netif->domid; | |
16701 | + gop->ref = req->gref; | |
16702 | + } | |
16703 | + return req->id; | |
16704 | +} | |
16705 | + | |
16706 | +static void netbk_gop_skb(struct sk_buff *skb, | |
16707 | + struct netrx_pending_operations *npo) | |
16708 | +{ | |
16709 | + netif_t *netif = netdev_priv(skb->dev); | |
16710 | + int nr_frags = skb_shinfo(skb)->nr_frags; | |
16711 | + int i; | |
16712 | + int extra; | |
16713 | + struct netbk_rx_meta *head_meta, *meta; | |
16714 | + | |
16715 | + head_meta = npo->meta + npo->meta_prod++; | |
16716 | + head_meta->frag.page_offset = skb_shinfo(skb)->gso_type; | |
16717 | + head_meta->frag.size = skb_shinfo(skb)->gso_size; | |
16718 | + extra = !!head_meta->frag.size + 1; | |
16719 | + | |
16720 | + for (i = 0; i < nr_frags; i++) { | |
16721 | + meta = npo->meta + npo->meta_prod++; | |
16722 | + meta->frag = skb_shinfo(skb)->frags[i]; | |
16723 | + meta->id = netbk_gop_frag(netif, meta, i + extra, npo, | |
16724 | + meta->frag.page, | |
16725 | + meta->frag.size, | |
16726 | + meta->frag.page_offset); | |
16727 | + } | |
16728 | + | |
16729 | + /* | |
16730 | + * This must occur at the end to ensure that we don't trash skb_shinfo | |
16731 | + * until we're done. We know that the head doesn't cross a page | |
16732 | + * boundary because such packets get copied in netif_be_start_xmit. | |
16733 | + */ | |
16734 | + head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo, | |
16735 | + virt_to_page(skb->data), | |
16736 | + skb_headlen(skb), | |
16737 | + offset_in_page(skb->data)); | |
16738 | + | |
16739 | + netif->rx.req_cons += nr_frags + extra; | |
16740 | +} | |
16741 | + | |
16742 | +static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) | |
16743 | +{ | |
16744 | + int i; | |
16745 | + | |
16746 | + for (i = 0; i < nr_frags; i++) | |
16747 | + put_page(meta[i].frag.page); | |
16748 | +} | |
16749 | + | |
16750 | +/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was | |
16751 | + used to set up the operations on the top of | |
16752 | + netrx_pending_operations, which have since been done. Check that | |
16753 | + they didn't give any errors and advance over them. */ | |
16754 | +static int netbk_check_gop(int nr_frags, domid_t domid, | |
16755 | + struct netrx_pending_operations *npo) | |
16756 | +{ | |
16757 | + multicall_entry_t *mcl; | |
16758 | + gnttab_transfer_t *gop; | |
16759 | + gnttab_copy_t *copy_op; | |
16760 | + int status = NETIF_RSP_OKAY; | |
16761 | + int i; | |
16762 | + | |
16763 | + for (i = 0; i <= nr_frags; i++) { | |
16764 | + if (npo->meta[npo->meta_cons + i].copy) { | |
16765 | + copy_op = npo->copy + npo->copy_cons++; | |
16766 | + if (copy_op->status != GNTST_okay) { | |
16767 | + DPRINTK("Bad status %d from copy to DOM%d.\n", | |
16768 | + copy_op->status, domid); | |
16769 | + status = NETIF_RSP_ERROR; | |
16770 | + } | |
16771 | + } else { | |
16772 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
16773 | + mcl = npo->mcl + npo->mcl_cons++; | |
16774 | + /* The update_va_mapping() must not fail. */ | |
16775 | + BUG_ON(mcl->result != 0); | |
16776 | + } | |
16777 | + | |
16778 | + gop = npo->trans + npo->trans_cons++; | |
16779 | + /* Check the reassignment error code. */ | |
16780 | + if (gop->status != 0) { | |
16781 | + DPRINTK("Bad status %d from grant transfer to DOM%u\n", | |
16782 | + gop->status, domid); | |
16783 | + /* | |
16784 | + * Page no longer belongs to us unless | |
16785 | + * GNTST_bad_page, but that should be | |
16786 | + * a fatal error anyway. | |
16787 | + */ | |
16788 | + BUG_ON(gop->status == GNTST_bad_page); | |
16789 | + status = NETIF_RSP_ERROR; | |
16790 | + } | |
16791 | + } | |
16792 | + } | |
16793 | + | |
16794 | + return status; | |
16795 | +} | |
16796 | + | |
16797 | +static void netbk_add_frag_responses(netif_t *netif, int status, | |
16798 | + struct netbk_rx_meta *meta, int nr_frags) | |
16799 | +{ | |
16800 | + int i; | |
16801 | + unsigned long offset; | |
16802 | + | |
16803 | + for (i = 0; i < nr_frags; i++) { | |
16804 | + int id = meta[i].id; | |
16805 | + int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; | |
16806 | + | |
16807 | + if (meta[i].copy) | |
16808 | + offset = 0; | |
16809 | + else | |
16810 | + offset = meta[i].frag.page_offset; | |
16811 | + make_rx_response(netif, id, status, offset, | |
16812 | + meta[i].frag.size, flags); | |
16813 | + } | |
16814 | +} | |
16815 | + | |
16816 | +static void net_rx_action(unsigned long unused) | |
16817 | +{ | |
16818 | + netif_t *netif = NULL; | |
16819 | + s8 status; | |
16820 | + u16 id, irq, flags; | |
16821 | + netif_rx_response_t *resp; | |
16822 | + multicall_entry_t *mcl; | |
16823 | + struct sk_buff_head rxq; | |
16824 | + struct sk_buff *skb; | |
16825 | + int notify_nr = 0; | |
16826 | + int ret; | |
16827 | + int nr_frags; | |
16828 | + int count; | |
16829 | + unsigned long offset; | |
16830 | + | |
16831 | + /* | |
16832 | + * Putting hundreds of bytes on the stack is considered rude. | |
16833 | + * Static works because a tasklet can only be on one CPU at any time. | |
16834 | + */ | |
16835 | + static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3]; | |
16836 | + static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; | |
16837 | + static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE]; | |
16838 | + static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE]; | |
16839 | + static unsigned char rx_notify[NR_IRQS]; | |
16840 | + static u16 notify_list[NET_RX_RING_SIZE]; | |
16841 | + static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; | |
16842 | + | |
16843 | + struct netrx_pending_operations npo = { | |
16844 | + mmu: rx_mmu, | |
16845 | + trans: grant_trans_op, | |
16846 | + copy: grant_copy_op, | |
16847 | + mcl: rx_mcl, | |
16848 | + meta: meta}; | |
16849 | + | |
16850 | + skb_queue_head_init(&rxq); | |
16851 | + | |
16852 | + count = 0; | |
16853 | + | |
16854 | + while ((skb = skb_dequeue(&rx_queue)) != NULL) { | |
16855 | + nr_frags = skb_shinfo(skb)->nr_frags; | |
16856 | + *(int *)skb->cb = nr_frags; | |
16857 | + | |
16858 | + if (!xen_feature(XENFEAT_auto_translated_physmap) && | |
16859 | + !((netif_t *)netdev_priv(skb->dev))->copying_receiver && | |
16860 | + check_mfn(nr_frags + 1)) { | |
16861 | + /* Memory squeeze? Back off for an arbitrary while. */ | |
16862 | + if ( net_ratelimit() ) | |
16863 | + WPRINTK("Memory squeeze in netback " | |
16864 | + "driver.\n"); | |
16865 | + mod_timer(&net_timer, jiffies + HZ); | |
16866 | + skb_queue_head(&rx_queue, skb); | |
16867 | + break; | |
16868 | + } | |
16869 | + | |
16870 | + netbk_gop_skb(skb, &npo); | |
16871 | + | |
16872 | + count += nr_frags + 1; | |
16873 | + | |
16874 | + __skb_queue_tail(&rxq, skb); | |
16875 | + | |
16876 | + /* Filled the batch queue? */ | |
16877 | + if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) | |
16878 | + break; | |
16879 | + } | |
16880 | + | |
16881 | + BUG_ON(npo.meta_prod > ARRAY_SIZE(meta)); | |
16882 | + | |
16883 | + npo.mmu_mcl = npo.mcl_prod; | |
16884 | + if (npo.mcl_prod) { | |
16885 | + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); | |
16886 | + BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu)); | |
16887 | + mcl = npo.mcl + npo.mcl_prod++; | |
16888 | + | |
16889 | + BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping); | |
16890 | + mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; | |
16891 | + | |
16892 | + mcl->op = __HYPERVISOR_mmu_update; | |
16893 | + mcl->args[0] = (unsigned long)rx_mmu; | |
16894 | + mcl->args[1] = npo.mmu_prod; | |
16895 | + mcl->args[2] = 0; | |
16896 | + mcl->args[3] = DOMID_SELF; | |
16897 | + } | |
16898 | + | |
16899 | + if (npo.trans_prod) { | |
16900 | + BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op)); | |
16901 | + mcl = npo.mcl + npo.mcl_prod++; | |
16902 | + mcl->op = __HYPERVISOR_grant_table_op; | |
16903 | + mcl->args[0] = GNTTABOP_transfer; | |
16904 | + mcl->args[1] = (unsigned long)grant_trans_op; | |
16905 | + mcl->args[2] = npo.trans_prod; | |
16906 | + } | |
16907 | + | |
16908 | + if (npo.copy_prod) { | |
16909 | + BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op)); | |
16910 | + mcl = npo.mcl + npo.mcl_prod++; | |
16911 | + mcl->op = __HYPERVISOR_grant_table_op; | |
16912 | + mcl->args[0] = GNTTABOP_copy; | |
16913 | + mcl->args[1] = (unsigned long)grant_copy_op; | |
16914 | + mcl->args[2] = npo.copy_prod; | |
16915 | + } | |
16916 | + | |
16917 | + /* Nothing to do? */ | |
16918 | + if (!npo.mcl_prod) | |
16919 | + return; | |
16920 | + | |
16921 | + BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl)); | |
16922 | + | |
16923 | + ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod); | |
16924 | + BUG_ON(ret != 0); | |
16925 | + /* The mmu_machphys_update() must not fail. */ | |
16926 | + BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0); | |
16927 | + | |
16928 | + while ((skb = __skb_dequeue(&rxq)) != NULL) { | |
16929 | + nr_frags = *(int *)skb->cb; | |
16930 | + | |
16931 | + netif = netdev_priv(skb->dev); | |
16932 | + /* We can't rely on skb_release_data to release the | |
16933 | + pages used by fragments for us, since it tries to | |
16934 | + touch the pages in the fraglist. If we're in | |
16935 | + flipping mode, that doesn't work. In copying mode, | |
16936 | + we still have access to all of the pages, and so | |
16937 | + it's safe to let release_data deal with it. */ | |
16938 | + /* (Freeing the fragments is safe since we copy | |
16939 | + non-linear skbs destined for flipping interfaces) */ | |
16940 | + if (!netif->copying_receiver) { | |
16941 | + atomic_set(&(skb_shinfo(skb)->dataref), 1); | |
16942 | + skb_shinfo(skb)->frag_list = NULL; | |
16943 | + skb_shinfo(skb)->nr_frags = 0; | |
16944 | + netbk_free_pages(nr_frags, meta + npo.meta_cons + 1); | |
16945 | + } | |
16946 | + | |
16947 | + netif->stats.tx_bytes += skb->len; | |
16948 | + netif->stats.tx_packets++; | |
16949 | + | |
16950 | + status = netbk_check_gop(nr_frags, netif->domid, &npo); | |
16951 | + | |
16952 | + id = meta[npo.meta_cons].id; | |
16953 | + flags = nr_frags ? NETRXF_more_data : 0; | |
16954 | + | |
16955 | + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ | |
16956 | + flags |= NETRXF_csum_blank | NETRXF_data_validated; | |
16957 | + else if (skb->proto_data_valid) /* remote but checksummed? */ | |
16958 | + flags |= NETRXF_data_validated; | |
16959 | + | |
16960 | + if (meta[npo.meta_cons].copy) | |
16961 | + offset = 0; | |
16962 | + else | |
16963 | + offset = offset_in_page(skb->data); | |
16964 | + resp = make_rx_response(netif, id, status, offset, | |
16965 | + skb_headlen(skb), flags); | |
16966 | + | |
16967 | + if (meta[npo.meta_cons].frag.size) { | |
16968 | + struct netif_extra_info *gso = | |
16969 | + (struct netif_extra_info *) | |
16970 | + RING_GET_RESPONSE(&netif->rx, | |
16971 | + netif->rx.rsp_prod_pvt++); | |
16972 | + | |
16973 | + resp->flags |= NETRXF_extra_info; | |
16974 | + | |
16975 | + gso->u.gso.size = meta[npo.meta_cons].frag.size; | |
16976 | + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; | |
16977 | + gso->u.gso.pad = 0; | |
16978 | + gso->u.gso.features = 0; | |
16979 | + | |
16980 | + gso->type = XEN_NETIF_EXTRA_TYPE_GSO; | |
16981 | + gso->flags = 0; | |
16982 | + } | |
16983 | + | |
16984 | + netbk_add_frag_responses(netif, status, | |
16985 | + meta + npo.meta_cons + 1, | |
16986 | + nr_frags); | |
16987 | + | |
16988 | + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); | |
16989 | + irq = netif->irq; | |
16990 | + if (ret && !rx_notify[irq]) { | |
16991 | + rx_notify[irq] = 1; | |
16992 | + notify_list[notify_nr++] = irq; | |
16993 | + } | |
16994 | + | |
16995 | + if (netif_queue_stopped(netif->dev) && | |
16996 | + netif_schedulable(netif) && | |
16997 | + !netbk_queue_full(netif)) | |
16998 | + netif_wake_queue(netif->dev); | |
16999 | + | |
17000 | + netif_put(netif); | |
17001 | + dev_kfree_skb(skb); | |
17002 | + npo.meta_cons += nr_frags + 1; | |
17003 | + } | |
17004 | + | |
17005 | + while (notify_nr != 0) { | |
17006 | + irq = notify_list[--notify_nr]; | |
17007 | + rx_notify[irq] = 0; | |
17008 | + notify_remote_via_irq(irq); | |
17009 | + } | |
17010 | + | |
17011 | + /* More work to do? */ | |
17012 | + if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer)) | |
17013 | + tasklet_schedule(&net_rx_tasklet); | |
17014 | +#if 0 | |
17015 | + else | |
17016 | + xen_network_done_notify(); | |
17017 | +#endif | |
17018 | +} | |
17019 | + | |
17020 | +static void net_alarm(unsigned long unused) | |
17021 | +{ | |
17022 | + tasklet_schedule(&net_rx_tasklet); | |
17023 | +} | |
17024 | + | |
17025 | +static void netbk_tx_pending_timeout(unsigned long unused) | |
17026 | +{ | |
17027 | + tasklet_schedule(&net_tx_tasklet); | |
17028 | +} | |
17029 | + | |
17030 | +struct net_device_stats *netif_be_get_stats(struct net_device *dev) | |
17031 | +{ | |
17032 | + netif_t *netif = netdev_priv(dev); | |
17033 | + return &netif->stats; | |
17034 | +} | |
17035 | + | |
17036 | +static int __on_net_schedule_list(netif_t *netif) | |
17037 | +{ | |
17038 | + return netif->list.next != NULL; | |
17039 | +} | |
17040 | + | |
17041 | +static void remove_from_net_schedule_list(netif_t *netif) | |
17042 | +{ | |
17043 | + spin_lock_irq(&net_schedule_list_lock); | |
17044 | + if (likely(__on_net_schedule_list(netif))) { | |
17045 | + list_del(&netif->list); | |
17046 | + netif->list.next = NULL; | |
17047 | + netif_put(netif); | |
17048 | + } | |
17049 | + spin_unlock_irq(&net_schedule_list_lock); | |
17050 | +} | |
17051 | + | |
17052 | +static void add_to_net_schedule_list_tail(netif_t *netif) | |
17053 | +{ | |
17054 | + if (__on_net_schedule_list(netif)) | |
17055 | + return; | |
17056 | + | |
17057 | + spin_lock_irq(&net_schedule_list_lock); | |
17058 | + if (!__on_net_schedule_list(netif) && | |
17059 | + likely(netif_schedulable(netif))) { | |
17060 | + list_add_tail(&netif->list, &net_schedule_list); | |
17061 | + netif_get(netif); | |
17062 | + } | |
17063 | + spin_unlock_irq(&net_schedule_list_lock); | |
17064 | +} | |
17065 | + | |
17066 | +/* | |
17067 | + * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: | |
17068 | + * If this driver is pipelining transmit requests then we can be very | |
17069 | + * aggressive in avoiding new-packet notifications -- frontend only needs to | |
17070 | + * send a notification if there are no outstanding unreceived responses. | |
17071 | + * If we may be buffer transmit buffers for any reason then we must be rather | |
17072 | + * more conservative and treat this as the final check for pending work. | |
17073 | + */ | |
17074 | +void netif_schedule_work(netif_t *netif) | |
17075 | +{ | |
17076 | + int more_to_do; | |
17077 | + | |
17078 | +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER | |
17079 | + more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); | |
17080 | +#else | |
17081 | + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); | |
17082 | +#endif | |
17083 | + | |
17084 | + if (more_to_do) { | |
17085 | + add_to_net_schedule_list_tail(netif); | |
17086 | + maybe_schedule_tx_action(); | |
17087 | + } | |
17088 | +} | |
17089 | + | |
17090 | +void netif_deschedule_work(netif_t *netif) | |
17091 | +{ | |
17092 | + remove_from_net_schedule_list(netif); | |
17093 | +} | |
17094 | + | |
17095 | + | |
17096 | +static void tx_add_credit(netif_t *netif) | |
17097 | +{ | |
17098 | + unsigned long max_burst, max_credit; | |
17099 | + | |
17100 | + /* | |
17101 | + * Allow a burst big enough to transmit a jumbo packet of up to 128kB. | |
17102 | + * Otherwise the interface can seize up due to insufficient credit. | |
17103 | + */ | |
17104 | + max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size; | |
17105 | + max_burst = min(max_burst, 131072UL); | |
17106 | + max_burst = max(max_burst, netif->credit_bytes); | |
17107 | + | |
17108 | + /* Take care that adding a new chunk of credit doesn't wrap to zero. */ | |
17109 | + max_credit = netif->remaining_credit + netif->credit_bytes; | |
17110 | + if (max_credit < netif->remaining_credit) | |
17111 | + max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ | |
17112 | + | |
17113 | + netif->remaining_credit = min(max_credit, max_burst); | |
17114 | +} | |
17115 | + | |
17116 | +static void tx_credit_callback(unsigned long data) | |
17117 | +{ | |
17118 | + netif_t *netif = (netif_t *)data; | |
17119 | + tx_add_credit(netif); | |
17120 | + netif_schedule_work(netif); | |
17121 | +} | |
17122 | + | |
17123 | +static inline int copy_pending_req(PEND_RING_IDX pending_idx) | |
17124 | +{ | |
17125 | + return gnttab_copy_grant_page(grant_tx_handle[pending_idx], | |
17126 | + &mmap_pages[pending_idx]); | |
17127 | +} | |
17128 | + | |
17129 | +inline static void net_tx_action_dealloc(void) | |
17130 | +{ | |
17131 | + struct netbk_tx_pending_inuse *inuse, *n; | |
17132 | + gnttab_unmap_grant_ref_t *gop; | |
17133 | + u16 pending_idx; | |
17134 | + PEND_RING_IDX dc, dp; | |
17135 | + netif_t *netif; | |
17136 | + int ret; | |
17137 | + LIST_HEAD(list); | |
17138 | + | |
17139 | + dc = dealloc_cons; | |
17140 | + gop = tx_unmap_ops; | |
17141 | + | |
17142 | + /* | |
17143 | + * Free up any grants we have finished using | |
17144 | + */ | |
17145 | + do { | |
17146 | + dp = dealloc_prod; | |
17147 | + | |
17148 | + /* Ensure we see all indices enqueued by netif_idx_release(). */ | |
17149 | + smp_rmb(); | |
17150 | + | |
17151 | + while (dc != dp) { | |
17152 | + unsigned long pfn; | |
17153 | + | |
17154 | + pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; | |
17155 | + list_move_tail(&pending_inuse[pending_idx].list, &list); | |
17156 | + | |
17157 | + pfn = idx_to_pfn(pending_idx); | |
17158 | + /* Already unmapped? */ | |
17159 | + if (!phys_to_machine_mapping_valid(pfn)) | |
17160 | + continue; | |
17161 | + | |
17162 | + gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx), | |
17163 | + GNTMAP_host_map, | |
17164 | + grant_tx_handle[pending_idx]); | |
17165 | + gop++; | |
17166 | + } | |
17167 | + | |
17168 | + if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB || | |
17169 | + list_empty(&pending_inuse_head)) | |
17170 | + break; | |
17171 | + | |
17172 | + /* Copy any entries that have been pending for too long. */ | |
17173 | + list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) { | |
17174 | + if (time_after(inuse->alloc_time + HZ / 2, jiffies)) | |
17175 | + break; | |
17176 | + | |
17177 | + switch (copy_pending_req(inuse - pending_inuse)) { | |
17178 | + case 0: | |
17179 | + list_move_tail(&inuse->list, &list); | |
17180 | + continue; | |
17181 | + case -EBUSY: | |
17182 | + list_del_init(&inuse->list); | |
17183 | + continue; | |
17184 | + case -ENOENT: | |
17185 | + continue; | |
17186 | + } | |
17187 | + | |
17188 | + break; | |
17189 | + } | |
17190 | + } while (dp != dealloc_prod); | |
17191 | + | |
17192 | + dealloc_cons = dc; | |
17193 | + | |
17194 | + ret = HYPERVISOR_grant_table_op( | |
17195 | + GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); | |
17196 | + BUG_ON(ret); | |
17197 | + | |
17198 | + list_for_each_entry_safe(inuse, n, &list, list) { | |
17199 | + pending_idx = inuse - pending_inuse; | |
17200 | + | |
17201 | + netif = pending_tx_info[pending_idx].netif; | |
17202 | + | |
17203 | + make_tx_response(netif, &pending_tx_info[pending_idx].req, | |
17204 | + NETIF_RSP_OKAY); | |
17205 | + | |
17206 | + /* Ready for next use. */ | |
17207 | + gnttab_reset_grant_page(mmap_pages[pending_idx]); | |
17208 | + | |
17209 | + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; | |
17210 | + | |
17211 | + netif_put(netif); | |
17212 | + | |
17213 | + list_del_init(&inuse->list); | |
17214 | + } | |
17215 | +} | |
17216 | + | |
17217 | +static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end) | |
17218 | +{ | |
17219 | + RING_IDX cons = netif->tx.req_cons; | |
17220 | + | |
17221 | + do { | |
17222 | + make_tx_response(netif, txp, NETIF_RSP_ERROR); | |
17223 | + if (cons >= end) | |
17224 | + break; | |
17225 | + txp = RING_GET_REQUEST(&netif->tx, cons++); | |
17226 | + } while (1); | |
17227 | + netif->tx.req_cons = cons; | |
17228 | + netif_schedule_work(netif); | |
17229 | + netif_put(netif); | |
17230 | +} | |
17231 | + | |
17232 | +static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first, | |
17233 | + netif_tx_request_t *txp, int work_to_do) | |
17234 | +{ | |
17235 | + RING_IDX cons = netif->tx.req_cons; | |
17236 | + int frags = 0; | |
17237 | + | |
17238 | + if (!(first->flags & NETTXF_more_data)) | |
17239 | + return 0; | |
17240 | + | |
17241 | + do { | |
17242 | + if (frags >= work_to_do) { | |
17243 | + DPRINTK("Need more frags\n"); | |
17244 | + return -frags; | |
17245 | + } | |
17246 | + | |
17247 | + if (unlikely(frags >= MAX_SKB_FRAGS)) { | |
17248 | + DPRINTK("Too many frags\n"); | |
17249 | + return -frags; | |
17250 | + } | |
17251 | + | |
17252 | + memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), | |
17253 | + sizeof(*txp)); | |
17254 | + if (txp->size > first->size) { | |
17255 | + DPRINTK("Frags galore\n"); | |
17256 | + return -frags; | |
17257 | + } | |
17258 | + | |
17259 | + first->size -= txp->size; | |
17260 | + frags++; | |
17261 | + | |
17262 | + if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { | |
17263 | + DPRINTK("txp->offset: %x, size: %u\n", | |
17264 | + txp->offset, txp->size); | |
17265 | + return -frags; | |
17266 | + } | |
17267 | + } while ((txp++)->flags & NETTXF_more_data); | |
17268 | + | |
17269 | + return frags; | |
17270 | +} | |
17271 | + | |
17272 | +static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, | |
17273 | + struct sk_buff *skb, | |
17274 | + netif_tx_request_t *txp, | |
17275 | + gnttab_map_grant_ref_t *mop) | |
17276 | +{ | |
17277 | + struct skb_shared_info *shinfo = skb_shinfo(skb); | |
17278 | + skb_frag_t *frags = shinfo->frags; | |
17279 | + unsigned long pending_idx = *((u16 *)skb->data); | |
17280 | + int i, start; | |
17281 | + | |
17282 | + /* Skip first skb fragment if it is on same page as header fragment. */ | |
17283 | + start = ((unsigned long)shinfo->frags[0].page == pending_idx); | |
17284 | + | |
17285 | + for (i = start; i < shinfo->nr_frags; i++, txp++) { | |
17286 | + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; | |
17287 | + | |
17288 | + gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx), | |
17289 | + GNTMAP_host_map | GNTMAP_readonly, | |
17290 | + txp->gref, netif->domid); | |
17291 | + | |
17292 | + memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); | |
17293 | + netif_get(netif); | |
17294 | + pending_tx_info[pending_idx].netif = netif; | |
17295 | + frags[i].page = (void *)pending_idx; | |
17296 | + } | |
17297 | + | |
17298 | + return mop; | |
17299 | +} | |
17300 | + | |
17301 | +static int netbk_tx_check_mop(struct sk_buff *skb, | |
17302 | + gnttab_map_grant_ref_t **mopp) | |
17303 | +{ | |
17304 | + gnttab_map_grant_ref_t *mop = *mopp; | |
17305 | + int pending_idx = *((u16 *)skb->data); | |
17306 | + netif_t *netif = pending_tx_info[pending_idx].netif; | |
17307 | + netif_tx_request_t *txp; | |
17308 | + struct skb_shared_info *shinfo = skb_shinfo(skb); | |
17309 | + int nr_frags = shinfo->nr_frags; | |
17310 | + int i, err, start; | |
17311 | + | |
17312 | + /* Check status of header. */ | |
17313 | + err = mop->status; | |
17314 | + if (unlikely(err)) { | |
17315 | + txp = &pending_tx_info[pending_idx].req; | |
17316 | + make_tx_response(netif, txp, NETIF_RSP_ERROR); | |
17317 | + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; | |
17318 | + netif_put(netif); | |
17319 | + } else { | |
17320 | + set_phys_to_machine( | |
17321 | + __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT, | |
17322 | + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); | |
17323 | + grant_tx_handle[pending_idx] = mop->handle; | |
17324 | + } | |
17325 | + | |
17326 | + /* Skip first skb fragment if it is on same page as header fragment. */ | |
17327 | + start = ((unsigned long)shinfo->frags[0].page == pending_idx); | |
17328 | + | |
17329 | + for (i = start; i < nr_frags; i++) { | |
17330 | + int j, newerr; | |
17331 | + | |
17332 | + pending_idx = (unsigned long)shinfo->frags[i].page; | |
17333 | + | |
17334 | + /* Check error status: if okay then remember grant handle. */ | |
17335 | + newerr = (++mop)->status; | |
17336 | + if (likely(!newerr)) { | |
17337 | + set_phys_to_machine( | |
17338 | + __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT, | |
17339 | + FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); | |
17340 | + grant_tx_handle[pending_idx] = mop->handle; | |
17341 | + /* Had a previous error? Invalidate this fragment. */ | |
17342 | + if (unlikely(err)) | |
17343 | + netif_idx_release(pending_idx); | |
17344 | + continue; | |
17345 | + } | |
17346 | + | |
17347 | + /* Error on this fragment: respond to client with an error. */ | |
17348 | + txp = &pending_tx_info[pending_idx].req; | |
17349 | + make_tx_response(netif, txp, NETIF_RSP_ERROR); | |
17350 | + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; | |
17351 | + netif_put(netif); | |
17352 | + | |
17353 | + /* Not the first error? Preceding frags already invalidated. */ | |
17354 | + if (err) | |
17355 | + continue; | |
17356 | + | |
17357 | + /* First error: invalidate header and preceding fragments. */ | |
17358 | + pending_idx = *((u16 *)skb->data); | |
17359 | + netif_idx_release(pending_idx); | |
17360 | + for (j = start; j < i; j++) { | |
17361 | + pending_idx = (unsigned long)shinfo->frags[i].page; | |
17362 | + netif_idx_release(pending_idx); | |
17363 | + } | |
17364 | + | |
17365 | + /* Remember the error: invalidate all subsequent fragments. */ | |
17366 | + err = newerr; | |
17367 | + } | |
17368 | + | |
17369 | + *mopp = mop + 1; | |
17370 | + return err; | |
17371 | +} | |
17372 | + | |
17373 | +static void netbk_fill_frags(struct sk_buff *skb) | |
17374 | +{ | |
17375 | + struct skb_shared_info *shinfo = skb_shinfo(skb); | |
17376 | + int nr_frags = shinfo->nr_frags; | |
17377 | + int i; | |
17378 | + | |
17379 | + for (i = 0; i < nr_frags; i++) { | |
17380 | + skb_frag_t *frag = shinfo->frags + i; | |
17381 | + netif_tx_request_t *txp; | |
17382 | + unsigned long pending_idx; | |
17383 | + | |
17384 | + pending_idx = (unsigned long)frag->page; | |
17385 | + | |
17386 | + pending_inuse[pending_idx].alloc_time = jiffies; | |
17387 | + list_add_tail(&pending_inuse[pending_idx].list, | |
17388 | + &pending_inuse_head); | |
17389 | + | |
17390 | + txp = &pending_tx_info[pending_idx].req; | |
17391 | + frag->page = virt_to_page(idx_to_kaddr(pending_idx)); | |
17392 | + frag->size = txp->size; | |
17393 | + frag->page_offset = txp->offset; | |
17394 | + | |
17395 | + skb->len += txp->size; | |
17396 | + skb->data_len += txp->size; | |
17397 | + skb->truesize += txp->size; | |
17398 | + } | |
17399 | +} | |
17400 | + | |
17401 | +int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras, | |
17402 | + int work_to_do) | |
17403 | +{ | |
17404 | + struct netif_extra_info extra; | |
17405 | + RING_IDX cons = netif->tx.req_cons; | |
17406 | + | |
17407 | + do { | |
17408 | + if (unlikely(work_to_do-- <= 0)) { | |
17409 | + DPRINTK("Missing extra info\n"); | |
17410 | + return -EBADR; | |
17411 | + } | |
17412 | + | |
17413 | + memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons), | |
17414 | + sizeof(extra)); | |
17415 | + if (unlikely(!extra.type || | |
17416 | + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { | |
17417 | + netif->tx.req_cons = ++cons; | |
17418 | + DPRINTK("Invalid extra type: %d\n", extra.type); | |
17419 | + return -EINVAL; | |
17420 | + } | |
17421 | + | |
17422 | + memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); | |
17423 | + netif->tx.req_cons = ++cons; | |
17424 | + } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); | |
17425 | + | |
17426 | + return work_to_do; | |
17427 | +} | |
17428 | + | |
17429 | +static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) | |
17430 | +{ | |
17431 | + if (!gso->u.gso.size) { | |
17432 | + DPRINTK("GSO size must not be zero.\n"); | |
17433 | + return -EINVAL; | |
17434 | + } | |
17435 | + | |
17436 | + /* Currently only TCPv4 S.O. is supported. */ | |
17437 | + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { | |
17438 | + DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); | |
17439 | + return -EINVAL; | |
17440 | + } | |
17441 | + | |
17442 | + skb_shinfo(skb)->gso_size = gso->u.gso.size; | |
17443 | + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; | |
17444 | + | |
17445 | + /* Header must be checked, and gso_segs computed. */ | |
17446 | + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | |
17447 | + skb_shinfo(skb)->gso_segs = 0; | |
17448 | + | |
17449 | + return 0; | |
17450 | +} | |
17451 | + | |
17452 | +/* Called after netfront has transmitted */ | |
17453 | +static void net_tx_action(unsigned long unused) | |
17454 | +{ | |
17455 | + struct list_head *ent; | |
17456 | + struct sk_buff *skb; | |
17457 | + netif_t *netif; | |
17458 | + netif_tx_request_t txreq; | |
17459 | + netif_tx_request_t txfrags[MAX_SKB_FRAGS]; | |
17460 | + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; | |
17461 | + u16 pending_idx; | |
17462 | + RING_IDX i; | |
17463 | + gnttab_map_grant_ref_t *mop; | |
17464 | + unsigned int data_len; | |
17465 | + int ret, work_to_do; | |
17466 | + | |
17467 | + if (dealloc_cons != dealloc_prod) | |
17468 | + net_tx_action_dealloc(); | |
17469 | + | |
17470 | + mop = tx_map_ops; | |
17471 | + while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && | |
17472 | + !list_empty(&net_schedule_list)) { | |
17473 | + /* Get a netif from the list with work to do. */ | |
17474 | + ent = net_schedule_list.next; | |
17475 | + netif = list_entry(ent, netif_t, list); | |
17476 | + netif_get(netif); | |
17477 | + remove_from_net_schedule_list(netif); | |
17478 | + | |
17479 | + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); | |
17480 | + if (!work_to_do) { | |
17481 | + netif_put(netif); | |
17482 | + continue; | |
17483 | + } | |
17484 | + | |
17485 | + i = netif->tx.req_cons; | |
17486 | + rmb(); /* Ensure that we see the request before we copy it. */ | |
17487 | + memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); | |
17488 | + | |
17489 | + /* Credit-based scheduling. */ | |
17490 | + if (txreq.size > netif->remaining_credit) { | |
17491 | + unsigned long now = jiffies; | |
17492 | + unsigned long next_credit = | |
17493 | + netif->credit_timeout.expires + | |
17494 | + msecs_to_jiffies(netif->credit_usec / 1000); | |
17495 | + | |
17496 | + /* Timer could already be pending in rare cases. */ | |
17497 | + if (timer_pending(&netif->credit_timeout)) { | |
17498 | + netif_put(netif); | |
17499 | + continue; | |
17500 | + } | |
17501 | + | |
17502 | + /* Passed the point where we can replenish credit? */ | |
17503 | + if (time_after_eq(now, next_credit)) { | |
17504 | + netif->credit_timeout.expires = now; | |
17505 | + tx_add_credit(netif); | |
17506 | + } | |
17507 | + | |
17508 | + /* Still too big to send right now? Set a callback. */ | |
17509 | + if (txreq.size > netif->remaining_credit) { | |
17510 | + netif->credit_timeout.data = | |
17511 | + (unsigned long)netif; | |
17512 | + netif->credit_timeout.function = | |
17513 | + tx_credit_callback; | |
17514 | + __mod_timer(&netif->credit_timeout, | |
17515 | + next_credit); | |
17516 | + netif_put(netif); | |
17517 | + continue; | |
17518 | + } | |
17519 | + } | |
17520 | + netif->remaining_credit -= txreq.size; | |
17521 | + | |
17522 | + work_to_do--; | |
17523 | + netif->tx.req_cons = ++i; | |
17524 | + | |
17525 | + memset(extras, 0, sizeof(extras)); | |
17526 | + if (txreq.flags & NETTXF_extra_info) { | |
17527 | + work_to_do = netbk_get_extras(netif, extras, | |
17528 | + work_to_do); | |
17529 | + i = netif->tx.req_cons; | |
17530 | + if (unlikely(work_to_do < 0)) { | |
17531 | + netbk_tx_err(netif, &txreq, i); | |
17532 | + continue; | |
17533 | + } | |
17534 | + } | |
17535 | + | |
17536 | + ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do); | |
17537 | + if (unlikely(ret < 0)) { | |
17538 | + netbk_tx_err(netif, &txreq, i - ret); | |
17539 | + continue; | |
17540 | + } | |
17541 | + i += ret; | |
17542 | + | |
17543 | + if (unlikely(txreq.size < ETH_HLEN)) { | |
17544 | + DPRINTK("Bad packet size: %d\n", txreq.size); | |
17545 | + netbk_tx_err(netif, &txreq, i); | |
17546 | + continue; | |
17547 | + } | |
17548 | + | |
17549 | + /* No crossing a page as the payload mustn't fragment. */ | |
17550 | + if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { | |
17551 | + DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", | |
17552 | + txreq.offset, txreq.size, | |
17553 | + (txreq.offset &~PAGE_MASK) + txreq.size); | |
17554 | + netbk_tx_err(netif, &txreq, i); | |
17555 | + continue; | |
17556 | + } | |
17557 | + | |
17558 | + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; | |
17559 | + | |
17560 | + data_len = (txreq.size > PKT_PROT_LEN && | |
17561 | + ret < MAX_SKB_FRAGS) ? | |
17562 | + PKT_PROT_LEN : txreq.size; | |
17563 | + | |
17564 | + skb = alloc_skb(data_len + 16 + NET_IP_ALIGN, | |
17565 | + GFP_ATOMIC | __GFP_NOWARN); | |
17566 | + if (unlikely(skb == NULL)) { | |
17567 | + DPRINTK("Can't allocate a skb in start_xmit.\n"); | |
17568 | + netbk_tx_err(netif, &txreq, i); | |
17569 | + break; | |
17570 | + } | |
17571 | + | |
17572 | + /* Packets passed to netif_rx() must have some headroom. */ | |
17573 | + skb_reserve(skb, 16 + NET_IP_ALIGN); | |
17574 | + | |
17575 | + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { | |
17576 | + struct netif_extra_info *gso; | |
17577 | + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; | |
17578 | + | |
17579 | + if (netbk_set_skb_gso(skb, gso)) { | |
17580 | + kfree_skb(skb); | |
17581 | + netbk_tx_err(netif, &txreq, i); | |
17582 | + continue; | |
17583 | + } | |
17584 | + } | |
17585 | + | |
17586 | + gnttab_set_map_op(mop, idx_to_kaddr(pending_idx), | |
17587 | + GNTMAP_host_map | GNTMAP_readonly, | |
17588 | + txreq.gref, netif->domid); | |
17589 | + mop++; | |
17590 | + | |
17591 | + memcpy(&pending_tx_info[pending_idx].req, | |
17592 | + &txreq, sizeof(txreq)); | |
17593 | + pending_tx_info[pending_idx].netif = netif; | |
17594 | + *((u16 *)skb->data) = pending_idx; | |
17595 | + | |
17596 | + __skb_put(skb, data_len); | |
17597 | + | |
17598 | + skb_shinfo(skb)->nr_frags = ret; | |
17599 | + if (data_len < txreq.size) { | |
17600 | + skb_shinfo(skb)->nr_frags++; | |
17601 | + skb_shinfo(skb)->frags[0].page = | |
17602 | + (void *)(unsigned long)pending_idx; | |
17603 | + } else { | |
17604 | + /* Discriminate from any valid pending_idx value. */ | |
17605 | + skb_shinfo(skb)->frags[0].page = (void *)~0UL; | |
17606 | + } | |
17607 | + | |
17608 | + __skb_queue_tail(&tx_queue, skb); | |
17609 | + | |
17610 | + pending_cons++; | |
17611 | + | |
17612 | + mop = netbk_get_requests(netif, skb, txfrags, mop); | |
17613 | + | |
17614 | + netif->tx.req_cons = i; | |
17615 | + netif_schedule_work(netif); | |
17616 | + | |
17617 | + if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) | |
17618 | + break; | |
17619 | + } | |
17620 | + | |
17621 | + if (mop == tx_map_ops) | |
17622 | + return; | |
17623 | + | |
17624 | + ret = HYPERVISOR_grant_table_op( | |
17625 | + GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); | |
17626 | + BUG_ON(ret); | |
17627 | + | |
17628 | + mop = tx_map_ops; | |
17629 | + while ((skb = __skb_dequeue(&tx_queue)) != NULL) { | |
17630 | + netif_tx_request_t *txp; | |
17631 | + | |
17632 | + pending_idx = *((u16 *)skb->data); | |
17633 | + netif = pending_tx_info[pending_idx].netif; | |
17634 | + txp = &pending_tx_info[pending_idx].req; | |
17635 | + | |
17636 | + /* Check the remap error code. */ | |
17637 | + if (unlikely(netbk_tx_check_mop(skb, &mop))) { | |
17638 | + DPRINTK("netback grant failed.\n"); | |
17639 | + skb_shinfo(skb)->nr_frags = 0; | |
17640 | + kfree_skb(skb); | |
17641 | + continue; | |
17642 | + } | |
17643 | + | |
17644 | + data_len = skb->len; | |
17645 | + memcpy(skb->data, | |
17646 | + (void *)(idx_to_kaddr(pending_idx)|txp->offset), | |
17647 | + data_len); | |
17648 | + if (data_len < txp->size) { | |
17649 | + /* Append the packet payload as a fragment. */ | |
17650 | + txp->offset += data_len; | |
17651 | + txp->size -= data_len; | |
17652 | + } else { | |
17653 | + /* Schedule a response immediately. */ | |
17654 | + netif_idx_release(pending_idx); | |
17655 | + } | |
17656 | + | |
17657 | + /* | |
17658 | + * Old frontends do not assert data_validated but we | |
17659 | + * can infer it from csum_blank so test both flags. | |
17660 | + */ | |
17661 | + if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) { | |
17662 | + skb->ip_summed = CHECKSUM_UNNECESSARY; | |
17663 | + skb->proto_data_valid = 1; | |
17664 | + } else { | |
17665 | + skb->ip_summed = CHECKSUM_NONE; | |
17666 | + skb->proto_data_valid = 0; | |
17667 | + } | |
17668 | + skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank); | |
17669 | + | |
17670 | + netbk_fill_frags(skb); | |
17671 | + | |
17672 | + skb->dev = netif->dev; | |
17673 | + skb->protocol = eth_type_trans(skb, skb->dev); | |
17674 | + | |
17675 | + netif->stats.rx_bytes += skb->len; | |
17676 | + netif->stats.rx_packets++; | |
17677 | + | |
17678 | + if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && | |
17679 | + unlikely(skb_linearize(skb))) { | |
17680 | + DPRINTK("Can't linearize skb in net_tx_action.\n"); | |
17681 | + kfree_skb(skb); | |
17682 | + continue; | |
17683 | + } | |
17684 | + | |
17685 | + netif_rx(skb); | |
17686 | + netif->dev->last_rx = jiffies; | |
17687 | + } | |
17688 | + | |
17689 | + if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && | |
17690 | + !list_empty(&pending_inuse_head)) { | |
17691 | + struct netbk_tx_pending_inuse *oldest; | |
17692 | + | |
17693 | + oldest = list_entry(pending_inuse_head.next, | |
17694 | + struct netbk_tx_pending_inuse, list); | |
17695 | + mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ); | |
17696 | + } | |
17697 | +} | |
17698 | + | |
17699 | +static void netif_idx_release(u16 pending_idx) | |
17700 | +{ | |
17701 | + static DEFINE_SPINLOCK(_lock); | |
17702 | + unsigned long flags; | |
17703 | + | |
17704 | + spin_lock_irqsave(&_lock, flags); | |
17705 | + dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx; | |
17706 | + /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ | |
17707 | + smp_wmb(); | |
17708 | + dealloc_prod++; | |
17709 | + spin_unlock_irqrestore(&_lock, flags); | |
17710 | + | |
17711 | + tasklet_schedule(&net_tx_tasklet); | |
17712 | +} | |
17713 | + | |
17714 | +static void netif_page_release(struct page *page) | |
17715 | +{ | |
17716 | + netif_idx_release(netif_page_index(page)); | |
17717 | +} | |
17718 | + | |
17719 | +irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) | |
17720 | +{ | |
17721 | + netif_t *netif = dev_id; | |
17722 | + | |
17723 | + add_to_net_schedule_list_tail(netif); | |
17724 | + maybe_schedule_tx_action(); | |
17725 | + | |
17726 | + if (netif_schedulable(netif) && !netbk_queue_full(netif)) | |
17727 | + netif_wake_queue(netif->dev); | |
17728 | + | |
17729 | + return IRQ_HANDLED; | |
17730 | +} | |
17731 | + | |
17732 | +static void make_tx_response(netif_t *netif, | |
17733 | + netif_tx_request_t *txp, | |
17734 | + s8 st) | |
17735 | +{ | |
17736 | + RING_IDX i = netif->tx.rsp_prod_pvt; | |
17737 | + netif_tx_response_t *resp; | |
17738 | + int notify; | |
17739 | + | |
17740 | + resp = RING_GET_RESPONSE(&netif->tx, i); | |
17741 | + resp->id = txp->id; | |
17742 | + resp->status = st; | |
17743 | + | |
17744 | + if (txp->flags & NETTXF_extra_info) | |
17745 | + RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL; | |
17746 | + | |
17747 | + netif->tx.rsp_prod_pvt = ++i; | |
17748 | + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); | |
17749 | + if (notify) | |
17750 | + notify_remote_via_irq(netif->irq); | |
17751 | + | |
17752 | +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER | |
17753 | + if (i == netif->tx.req_cons) { | |
17754 | + int more_to_do; | |
17755 | + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); | |
17756 | + if (more_to_do) | |
17757 | + add_to_net_schedule_list_tail(netif); | |
17758 | + } | |
17759 | +#endif | |
17760 | +} | |
17761 | + | |
17762 | +static netif_rx_response_t *make_rx_response(netif_t *netif, | |
17763 | + u16 id, | |
17764 | + s8 st, | |
17765 | + u16 offset, | |
17766 | + u16 size, | |
17767 | + u16 flags) | |
17768 | +{ | |
17769 | + RING_IDX i = netif->rx.rsp_prod_pvt; | |
17770 | + netif_rx_response_t *resp; | |
17771 | + | |
17772 | + resp = RING_GET_RESPONSE(&netif->rx, i); | |
17773 | + resp->offset = offset; | |
17774 | + resp->flags = flags; | |
17775 | + resp->id = id; | |
17776 | + resp->status = (s16)size; | |
17777 | + if (st < 0) | |
17778 | + resp->status = (s16)st; | |
17779 | + | |
17780 | + netif->rx.rsp_prod_pvt = ++i; | |
17781 | + | |
17782 | + return resp; | |
17783 | +} | |
17784 | + | |
17785 | +#ifdef NETBE_DEBUG_INTERRUPT | |
17786 | +static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) | |
17787 | +{ | |
17788 | + struct list_head *ent; | |
17789 | + netif_t *netif; | |
17790 | + int i = 0; | |
17791 | + | |
17792 | + printk(KERN_ALERT "netif_schedule_list:\n"); | |
17793 | + spin_lock_irq(&net_schedule_list_lock); | |
17794 | + | |
17795 | + list_for_each (ent, &net_schedule_list) { | |
17796 | + netif = list_entry(ent, netif_t, list); | |
17797 | + printk(KERN_ALERT " %d: private(rx_req_cons=%08x " | |
17798 | + "rx_resp_prod=%08x\n", | |
17799 | + i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); | |
17800 | + printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n", | |
17801 | + netif->tx.req_cons, netif->tx.rsp_prod_pvt); | |
17802 | + printk(KERN_ALERT " shared(rx_req_prod=%08x " | |
17803 | + "rx_resp_prod=%08x\n", | |
17804 | + netif->rx.sring->req_prod, netif->rx.sring->rsp_prod); | |
17805 | + printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n", | |
17806 | + netif->rx.sring->rsp_event, netif->tx.sring->req_prod); | |
17807 | + printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n", | |
17808 | + netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event); | |
17809 | + i++; | |
17810 | + } | |
17811 | + | |
17812 | + spin_unlock_irq(&net_schedule_list_lock); | |
17813 | + printk(KERN_ALERT " ** End of netif_schedule_list **\n"); | |
17814 | + | |
17815 | + return IRQ_HANDLED; | |
17816 | +} | |
17817 | +#endif | |
17818 | + | |
17819 | +static int __init netback_init(void) | |
17820 | +{ | |
17821 | + int i; | |
17822 | + struct page *page; | |
17823 | + | |
17824 | + if (!is_running_on_xen()) | |
17825 | + return -ENODEV; | |
17826 | + | |
17827 | + /* We can increase reservation by this much in net_rx_action(). */ | |
17828 | + balloon_update_driver_allowance(NET_RX_RING_SIZE); | |
17829 | + | |
17830 | + skb_queue_head_init(&rx_queue); | |
17831 | + skb_queue_head_init(&tx_queue); | |
17832 | + | |
17833 | + init_timer(&net_timer); | |
17834 | + net_timer.data = 0; | |
17835 | + net_timer.function = net_alarm; | |
17836 | + | |
17837 | + init_timer(&netbk_tx_pending_timer); | |
17838 | + netbk_tx_pending_timer.data = 0; | |
17839 | + netbk_tx_pending_timer.function = netbk_tx_pending_timeout; | |
17840 | + | |
17841 | + mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); | |
17842 | + if (mmap_pages == NULL) { | |
17843 | + printk("%s: out of memory\n", __FUNCTION__); | |
17844 | + return -ENOMEM; | |
17845 | + } | |
17846 | + | |
17847 | + for (i = 0; i < MAX_PENDING_REQS; i++) { | |
17848 | + page = mmap_pages[i]; | |
17849 | + SetPageForeign(page, netif_page_release); | |
17850 | + netif_page_index(page) = i; | |
17851 | + INIT_LIST_HEAD(&pending_inuse[i].list); | |
17852 | + } | |
17853 | + | |
17854 | + pending_cons = 0; | |
17855 | + pending_prod = MAX_PENDING_REQS; | |
17856 | + for (i = 0; i < MAX_PENDING_REQS; i++) | |
17857 | + pending_ring[i] = i; | |
17858 | + | |
17859 | + spin_lock_init(&net_schedule_list_lock); | |
17860 | + INIT_LIST_HEAD(&net_schedule_list); | |
17861 | + | |
17862 | + netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; | |
17863 | + if (MODPARM_copy_skb) { | |
17864 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, | |
17865 | + NULL, 0)) | |
17866 | + netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB; | |
17867 | + else | |
17868 | + netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; | |
17869 | + } | |
17870 | + | |
17871 | + netif_accel_init(); | |
17872 | + | |
17873 | + netif_xenbus_init(); | |
17874 | + | |
17875 | +#ifdef NETBE_DEBUG_INTERRUPT | |
17876 | + (void)bind_virq_to_irqhandler(VIRQ_DEBUG, | |
17877 | + 0, | |
17878 | + netif_be_dbg, | |
17879 | + SA_SHIRQ, | |
17880 | + "net-be-dbg", | |
17881 | + &netif_be_dbg); | |
17882 | +#endif | |
17883 | + | |
17884 | + return 0; | |
17885 | +} | |
17886 | + | |
17887 | +module_init(netback_init); | |
17888 | + | |
17889 | +MODULE_LICENSE("Dual BSD/GPL"); | |
17890 | Index: head-2008-11-25/drivers/xen/netback/xenbus.c | |
17891 | =================================================================== | |
17892 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
17893 | +++ head-2008-11-25/drivers/xen/netback/xenbus.c 2008-09-01 12:07:31.000000000 +0200 | |
17894 | @@ -0,0 +1,454 @@ | |
17895 | +/* Xenbus code for netif backend | |
17896 | + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | |
17897 | + Copyright (C) 2005 XenSource Ltd | |
17898 | + | |
17899 | + This program is free software; you can redistribute it and/or modify | |
17900 | + it under the terms of the GNU General Public License as published by | |
17901 | + the Free Software Foundation; either version 2 of the License, or | |
17902 | + (at your option) any later version. | |
17903 | + | |
17904 | + This program is distributed in the hope that it will be useful, | |
17905 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17906 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17907 | + GNU General Public License for more details. | |
17908 | + | |
17909 | + You should have received a copy of the GNU General Public License | |
17910 | + along with this program; if not, write to the Free Software | |
17911 | + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17912 | +*/ | |
17913 | + | |
17914 | +#include <stdarg.h> | |
17915 | +#include <linux/module.h> | |
17916 | +#include <xen/xenbus.h> | |
17917 | +#include "common.h" | |
17918 | + | |
17919 | +#if 0 | |
17920 | +#undef DPRINTK | |
17921 | +#define DPRINTK(fmt, args...) \ | |
17922 | + printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) | |
17923 | +#endif | |
17924 | + | |
17925 | + | |
17926 | +static int connect_rings(struct backend_info *); | |
17927 | +static void connect(struct backend_info *); | |
17928 | +static void backend_create_netif(struct backend_info *be); | |
17929 | + | |
17930 | +static int netback_remove(struct xenbus_device *dev) | |
17931 | +{ | |
17932 | + struct backend_info *be = dev->dev.driver_data; | |
17933 | + | |
17934 | + netback_remove_accelerators(be, dev); | |
17935 | + | |
17936 | + if (be->netif) { | |
17937 | + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); | |
17938 | + netif_disconnect(be->netif); | |
17939 | + be->netif = NULL; | |
17940 | + } | |
17941 | + kfree(be); | |
17942 | + dev->dev.driver_data = NULL; | |
17943 | + return 0; | |
17944 | +} | |
17945 | + | |
17946 | + | |
17947 | +/** | |
17948 | + * Entry point to this code when a new device is created. Allocate the basic | |
17949 | + * structures and switch to InitWait. | |
17950 | + */ | |
17951 | +static int netback_probe(struct xenbus_device *dev, | |
17952 | + const struct xenbus_device_id *id) | |
17953 | +{ | |
17954 | + const char *message; | |
17955 | + struct xenbus_transaction xbt; | |
17956 | + int err; | |
17957 | + int sg; | |
17958 | + struct backend_info *be = kzalloc(sizeof(struct backend_info), | |
17959 | + GFP_KERNEL); | |
17960 | + if (!be) { | |
17961 | + xenbus_dev_fatal(dev, -ENOMEM, | |
17962 | + "allocating backend structure"); | |
17963 | + return -ENOMEM; | |
17964 | + } | |
17965 | + | |
17966 | + be->dev = dev; | |
17967 | + dev->dev.driver_data = be; | |
17968 | + | |
17969 | + sg = 1; | |
17970 | + if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) | |
17971 | + sg = 0; | |
17972 | + | |
17973 | + do { | |
17974 | + err = xenbus_transaction_start(&xbt); | |
17975 | + if (err) { | |
17976 | + xenbus_dev_fatal(dev, err, "starting transaction"); | |
17977 | + goto fail; | |
17978 | + } | |
17979 | + | |
17980 | + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); | |
17981 | + if (err) { | |
17982 | + message = "writing feature-sg"; | |
17983 | + goto abort_transaction; | |
17984 | + } | |
17985 | + | |
17986 | + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", | |
17987 | + "%d", sg); | |
17988 | + if (err) { | |
17989 | + message = "writing feature-gso-tcpv4"; | |
17990 | + goto abort_transaction; | |
17991 | + } | |
17992 | + | |
17993 | + /* We support rx-copy path. */ | |
17994 | + err = xenbus_printf(xbt, dev->nodename, | |
17995 | + "feature-rx-copy", "%d", 1); | |
17996 | + if (err) { | |
17997 | + message = "writing feature-rx-copy"; | |
17998 | + goto abort_transaction; | |
17999 | + } | |
18000 | + | |
18001 | + /* | |
18002 | + * We don't support rx-flip path (except old guests who don't | |
18003 | + * grok this feature flag). | |
18004 | + */ | |
18005 | + err = xenbus_printf(xbt, dev->nodename, | |
18006 | + "feature-rx-flip", "%d", 0); | |
18007 | + if (err) { | |
18008 | + message = "writing feature-rx-flip"; | |
18009 | + goto abort_transaction; | |
18010 | + } | |
18011 | + | |
18012 | + err = xenbus_transaction_end(xbt, 0); | |
18013 | + } while (err == -EAGAIN); | |
18014 | + | |
18015 | + if (err) { | |
18016 | + xenbus_dev_fatal(dev, err, "completing transaction"); | |
18017 | + goto fail; | |
18018 | + } | |
18019 | + | |
18020 | + netback_probe_accelerators(be, dev); | |
18021 | + | |
18022 | + err = xenbus_switch_state(dev, XenbusStateInitWait); | |
18023 | + if (err) | |
18024 | + goto fail; | |
18025 | + | |
18026 | + /* This kicks hotplug scripts, so do it immediately. */ | |
18027 | + backend_create_netif(be); | |
18028 | + | |
18029 | + return 0; | |
18030 | + | |
18031 | +abort_transaction: | |
18032 | + xenbus_transaction_end(xbt, 1); | |
18033 | + xenbus_dev_fatal(dev, err, "%s", message); | |
18034 | +fail: | |
18035 | + DPRINTK("failed"); | |
18036 | + netback_remove(dev); | |
18037 | + return err; | |
18038 | +} | |
18039 | + | |
18040 | + | |
18041 | +/** | |
18042 | + * Handle the creation of the hotplug script environment. We add the script | |
18043 | + * and vif variables to the environment, for the benefit of the vif-* hotplug | |
18044 | + * scripts. | |
18045 | + */ | |
18046 | +static int netback_uevent(struct xenbus_device *xdev, char **envp, | |
18047 | + int num_envp, char *buffer, int buffer_size) | |
18048 | +{ | |
18049 | + struct backend_info *be = xdev->dev.driver_data; | |
18050 | + netif_t *netif = be->netif; | |
18051 | + int i = 0, length = 0; | |
18052 | + char *val; | |
18053 | + | |
18054 | + DPRINTK("netback_uevent"); | |
18055 | + | |
18056 | + val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); | |
18057 | + if (IS_ERR(val)) { | |
18058 | + int err = PTR_ERR(val); | |
18059 | + xenbus_dev_fatal(xdev, err, "reading script"); | |
18060 | + return err; | |
18061 | + } | |
18062 | + else { | |
18063 | + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, | |
18064 | + &length, "script=%s", val); | |
18065 | + kfree(val); | |
18066 | + } | |
18067 | + | |
18068 | + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, | |
18069 | + "vif=%s", netif->dev->name); | |
18070 | + | |
18071 | + envp[i] = NULL; | |
18072 | + | |
18073 | + return 0; | |
18074 | +} | |
18075 | + | |
18076 | + | |
18077 | +static void backend_create_netif(struct backend_info *be) | |
18078 | +{ | |
18079 | + int err; | |
18080 | + long handle; | |
18081 | + struct xenbus_device *dev = be->dev; | |
18082 | + | |
18083 | + if (be->netif != NULL) | |
18084 | + return; | |
18085 | + | |
18086 | + err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle); | |
18087 | + if (err != 1) { | |
18088 | + xenbus_dev_fatal(dev, err, "reading handle"); | |
18089 | + return; | |
18090 | + } | |
18091 | + | |
18092 | + be->netif = netif_alloc(dev->otherend_id, handle); | |
18093 | + if (IS_ERR(be->netif)) { | |
18094 | + err = PTR_ERR(be->netif); | |
18095 | + be->netif = NULL; | |
18096 | + xenbus_dev_fatal(dev, err, "creating interface"); | |
18097 | + return; | |
18098 | + } | |
18099 | + | |
18100 | + kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); | |
18101 | +} | |
18102 | + | |
18103 | + | |
18104 | +/** | |
18105 | + * Callback received when the frontend's state changes. | |
18106 | + */ | |
18107 | +static void frontend_changed(struct xenbus_device *dev, | |
18108 | + enum xenbus_state frontend_state) | |
18109 | +{ | |
18110 | + struct backend_info *be = dev->dev.driver_data; | |
18111 | + | |
18112 | + DPRINTK("%s", xenbus_strstate(frontend_state)); | |
18113 | + | |
18114 | + be->frontend_state = frontend_state; | |
18115 | + | |
18116 | + switch (frontend_state) { | |
18117 | + case XenbusStateInitialising: | |
18118 | + if (dev->state == XenbusStateClosed) { | |
18119 | + printk(KERN_INFO "%s: %s: prepare for reconnect\n", | |
18120 | + __FUNCTION__, dev->nodename); | |
18121 | + xenbus_switch_state(dev, XenbusStateInitWait); | |
18122 | + } | |
18123 | + break; | |
18124 | + | |
18125 | + case XenbusStateInitialised: | |
18126 | + break; | |
18127 | + | |
18128 | + case XenbusStateConnected: | |
18129 | + if (dev->state == XenbusStateConnected) | |
18130 | + break; | |
18131 | + backend_create_netif(be); | |
18132 | + if (be->netif) | |
18133 | + connect(be); | |
18134 | + break; | |
18135 | + | |
18136 | + case XenbusStateClosing: | |
18137 | + if (be->netif) { | |
18138 | + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); | |
18139 | + netif_disconnect(be->netif); | |
18140 | + be->netif = NULL; | |
18141 | + } | |
18142 | + xenbus_switch_state(dev, XenbusStateClosing); | |
18143 | + break; | |
18144 | + | |
18145 | + case XenbusStateClosed: | |
18146 | + xenbus_switch_state(dev, XenbusStateClosed); | |
18147 | + if (xenbus_dev_is_online(dev)) | |
18148 | + break; | |
18149 | + /* fall through if not online */ | |
18150 | + case XenbusStateUnknown: | |
18151 | + device_unregister(&dev->dev); | |
18152 | + break; | |
18153 | + | |
18154 | + default: | |
18155 | + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | |
18156 | + frontend_state); | |
18157 | + break; | |
18158 | + } | |
18159 | +} | |
18160 | + | |
18161 | + | |
18162 | +static void xen_net_read_rate(struct xenbus_device *dev, | |
18163 | + unsigned long *bytes, unsigned long *usec) | |
18164 | +{ | |
18165 | + char *s, *e; | |
18166 | + unsigned long b, u; | |
18167 | + char *ratestr; | |
18168 | + | |
18169 | + /* Default to unlimited bandwidth. */ | |
18170 | + *bytes = ~0UL; | |
18171 | + *usec = 0; | |
18172 | + | |
18173 | + ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL); | |
18174 | + if (IS_ERR(ratestr)) | |
18175 | + return; | |
18176 | + | |
18177 | + s = ratestr; | |
18178 | + b = simple_strtoul(s, &e, 10); | |
18179 | + if ((s == e) || (*e != ',')) | |
18180 | + goto fail; | |
18181 | + | |
18182 | + s = e + 1; | |
18183 | + u = simple_strtoul(s, &e, 10); | |
18184 | + if ((s == e) || (*e != '\0')) | |
18185 | + goto fail; | |
18186 | + | |
18187 | + *bytes = b; | |
18188 | + *usec = u; | |
18189 | + | |
18190 | + kfree(ratestr); | |
18191 | + return; | |
18192 | + | |
18193 | + fail: | |
18194 | + WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n"); | |
18195 | + kfree(ratestr); | |
18196 | +} | |
18197 | + | |
18198 | +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) | |
18199 | +{ | |
18200 | + char *s, *e, *macstr; | |
18201 | + int i; | |
18202 | + | |
18203 | + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); | |
18204 | + if (IS_ERR(macstr)) | |
18205 | + return PTR_ERR(macstr); | |
18206 | + | |
18207 | + for (i = 0; i < ETH_ALEN; i++) { | |
18208 | + mac[i] = simple_strtoul(s, &e, 16); | |
18209 | + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { | |
18210 | + kfree(macstr); | |
18211 | + return -ENOENT; | |
18212 | + } | |
18213 | + s = e+1; | |
18214 | + } | |
18215 | + | |
18216 | + kfree(macstr); | |
18217 | + return 0; | |
18218 | +} | |
18219 | + | |
18220 | +static void connect(struct backend_info *be) | |
18221 | +{ | |
18222 | + int err; | |
18223 | + struct xenbus_device *dev = be->dev; | |
18224 | + | |
18225 | + err = connect_rings(be); | |
18226 | + if (err) | |
18227 | + return; | |
18228 | + | |
18229 | + err = xen_net_read_mac(dev, be->netif->fe_dev_addr); | |
18230 | + if (err) { | |
18231 | + xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); | |
18232 | + return; | |
18233 | + } | |
18234 | + | |
18235 | + xen_net_read_rate(dev, &be->netif->credit_bytes, | |
18236 | + &be->netif->credit_usec); | |
18237 | + be->netif->remaining_credit = be->netif->credit_bytes; | |
18238 | + | |
18239 | + xenbus_switch_state(dev, XenbusStateConnected); | |
18240 | + | |
18241 | + netif_wake_queue(be->netif->dev); | |
18242 | +} | |
18243 | + | |
18244 | + | |
18245 | +static int connect_rings(struct backend_info *be) | |
18246 | +{ | |
18247 | + struct xenbus_device *dev = be->dev; | |
18248 | + unsigned long tx_ring_ref, rx_ring_ref; | |
18249 | + unsigned int evtchn, rx_copy; | |
18250 | + int err; | |
18251 | + int val; | |
18252 | + | |
18253 | + DPRINTK(""); | |
18254 | + | |
18255 | + err = xenbus_gather(XBT_NIL, dev->otherend, | |
18256 | + "tx-ring-ref", "%lu", &tx_ring_ref, | |
18257 | + "rx-ring-ref", "%lu", &rx_ring_ref, | |
18258 | + "event-channel", "%u", &evtchn, NULL); | |
18259 | + if (err) { | |
18260 | + xenbus_dev_fatal(dev, err, | |
18261 | + "reading %s/ring-ref and event-channel", | |
18262 | + dev->otherend); | |
18263 | + return err; | |
18264 | + } | |
18265 | + | |
18266 | + err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", | |
18267 | + &rx_copy); | |
18268 | + if (err == -ENOENT) { | |
18269 | + err = 0; | |
18270 | + rx_copy = 0; | |
18271 | + } | |
18272 | + if (err < 0) { | |
18273 | + xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy", | |
18274 | + dev->otherend); | |
18275 | + return err; | |
18276 | + } | |
18277 | + be->netif->copying_receiver = !!rx_copy; | |
18278 | + | |
18279 | + if (be->netif->dev->tx_queue_len != 0) { | |
18280 | + if (xenbus_scanf(XBT_NIL, dev->otherend, | |
18281 | + "feature-rx-notify", "%d", &val) < 0) | |
18282 | + val = 0; | |
18283 | + if (val) | |
18284 | + be->netif->can_queue = 1; | |
18285 | + else | |
18286 | + /* Must be non-zero for pfifo_fast to work. */ | |
18287 | + be->netif->dev->tx_queue_len = 1; | |
18288 | + } | |
18289 | + | |
18290 | + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0) | |
18291 | + val = 0; | |
18292 | + if (val) { | |
18293 | + be->netif->features |= NETIF_F_SG; | |
18294 | + be->netif->dev->features |= NETIF_F_SG; | |
18295 | + } | |
18296 | + | |
18297 | + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", | |
18298 | + &val) < 0) | |
18299 | + val = 0; | |
18300 | + if (val) { | |
18301 | + be->netif->features |= NETIF_F_TSO; | |
18302 | + be->netif->dev->features |= NETIF_F_TSO; | |
18303 | + } | |
18304 | + | |
18305 | + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", | |
18306 | + "%d", &val) < 0) | |
18307 | + val = 0; | |
18308 | + if (val) { | |
18309 | + be->netif->features &= ~NETIF_F_IP_CSUM; | |
18310 | + be->netif->dev->features &= ~NETIF_F_IP_CSUM; | |
18311 | + } | |
18312 | + | |
18313 | + /* Map the shared frame, irq etc. */ | |
18314 | + err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn); | |
18315 | + if (err) { | |
18316 | + xenbus_dev_fatal(dev, err, | |
18317 | + "mapping shared-frames %lu/%lu port %u", | |
18318 | + tx_ring_ref, rx_ring_ref, evtchn); | |
18319 | + return err; | |
18320 | + } | |
18321 | + return 0; | |
18322 | +} | |
18323 | + | |
18324 | + | |
18325 | +/* ** Driver Registration ** */ | |
18326 | + | |
18327 | + | |
18328 | +static const struct xenbus_device_id netback_ids[] = { | |
18329 | + { "vif" }, | |
18330 | + { "" } | |
18331 | +}; | |
18332 | + | |
18333 | + | |
18334 | +static struct xenbus_driver netback = { | |
18335 | + .name = "vif", | |
18336 | + .owner = THIS_MODULE, | |
18337 | + .ids = netback_ids, | |
18338 | + .probe = netback_probe, | |
18339 | + .remove = netback_remove, | |
18340 | + .uevent = netback_uevent, | |
18341 | + .otherend_changed = frontend_changed, | |
18342 | +}; | |
18343 | + | |
18344 | + | |
18345 | +void netif_xenbus_init(void) | |
18346 | +{ | |
18347 | + xenbus_register_backend(&netback); | |
18348 | +} | |
18349 | Index: head-2008-11-25/drivers/xen/netfront/Makefile | |
18350 | =================================================================== | |
18351 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
18352 | +++ head-2008-11-25/drivers/xen/netfront/Makefile 2007-07-12 08:54:23.000000000 +0200 | |
18353 | @@ -0,0 +1,4 @@ | |
18354 | + | |
18355 | +obj-$(CONFIG_XEN_NETDEV_FRONTEND) := xennet.o | |
18356 | + | |
18357 | +xennet-objs := netfront.o accel.o | |
18358 | Index: head-2008-11-25/drivers/xen/netfront/accel.c | |
18359 | =================================================================== | |
18360 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
18361 | +++ head-2008-11-25/drivers/xen/netfront/accel.c 2008-08-07 12:44:36.000000000 +0200 | |
18362 | @@ -0,0 +1,824 @@ | |
18363 | +/****************************************************************************** | |
18364 | + * Virtual network driver for conversing with remote driver backends. | |
18365 | + * | |
18366 | + * Copyright (C) 2007 Solarflare Communications, Inc. | |
18367 | + * | |
18368 | + * This program is free software; you can redistribute it and/or | |
18369 | + * modify it under the terms of the GNU General Public License version 2 | |
18370 | + * as published by the Free Software Foundation; or, when distributed | |
18371 | + * separately from the Linux kernel or incorporated into other | |
18372 | + * software packages, subject to the following license: | |
18373 | + * | |
18374 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
18375 | + * of this source file (the "Software"), to deal in the Software without | |
18376 | + * restriction, including without limitation the rights to use, copy, modify, | |
18377 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
18378 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
18379 | + * the following conditions: | |
18380 | + * | |
18381 | + * The above copyright notice and this permission notice shall be included in | |
18382 | + * all copies or substantial portions of the Software. | |
18383 | + * | |
18384 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18385 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18386 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
18387 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18388 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
18389 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
18390 | + * IN THE SOFTWARE. | |
18391 | + */ | |
18392 | + | |
18393 | +#include <linux/netdevice.h> | |
18394 | +#include <linux/skbuff.h> | |
18395 | +#include <linux/list.h> | |
18396 | +#include <linux/mutex.h> | |
18397 | +#include <asm/hypervisor.h> | |
18398 | +#include <xen/xenbus.h> | |
18399 | + | |
18400 | +#include "netfront.h" | |
18401 | + | |
18402 | +#define DPRINTK(fmt, args...) \ | |
18403 | + pr_debug("netfront/accel (%s:%d) " fmt, \ | |
18404 | + __FUNCTION__, __LINE__, ##args) | |
18405 | +#define IPRINTK(fmt, args...) \ | |
18406 | + printk(KERN_INFO "netfront/accel: " fmt, ##args) | |
18407 | +#define WPRINTK(fmt, args...) \ | |
18408 | + printk(KERN_WARNING "netfront/accel: " fmt, ##args) | |
18409 | + | |
18410 | +static int netfront_remove_accelerator(struct netfront_info *np, | |
18411 | + struct xenbus_device *dev); | |
18412 | +static int netfront_load_accelerator(struct netfront_info *np, | |
18413 | + struct xenbus_device *dev, | |
18414 | + const char *frontend); | |
18415 | + | |
18416 | +/* | |
18417 | + * List of all netfront accelerator plugin modules available. Each | |
18418 | + * list entry is of type struct netfront_accelerator. | |
18419 | + */ | |
18420 | +static struct list_head accelerators_list; | |
18421 | + | |
18422 | +/* Lock to protect access to accelerators_list */ | |
18423 | +static spinlock_t accelerators_lock; | |
18424 | + | |
18425 | +/* Workqueue to process acceleration configuration changes */ | |
18426 | +struct workqueue_struct *accel_watch_workqueue; | |
18427 | + | |
18428 | +/* Mutex to prevent concurrent loads and suspends, etc. */ | |
18429 | +DEFINE_MUTEX(accelerator_mutex); | |
18430 | + | |
18431 | +void netif_init_accel(void) | |
18432 | +{ | |
18433 | + INIT_LIST_HEAD(&accelerators_list); | |
18434 | + spin_lock_init(&accelerators_lock); | |
18435 | + | |
18436 | + accel_watch_workqueue = create_workqueue("net_accel"); | |
18437 | +} | |
18438 | + | |
18439 | +void netif_exit_accel(void) | |
18440 | +{ | |
18441 | + struct netfront_accelerator *accelerator, *tmp; | |
18442 | + unsigned long flags; | |
18443 | + | |
18444 | + flush_workqueue(accel_watch_workqueue); | |
18445 | + destroy_workqueue(accel_watch_workqueue); | |
18446 | + | |
18447 | + spin_lock_irqsave(&accelerators_lock, flags); | |
18448 | + | |
18449 | + list_for_each_entry_safe(accelerator, tmp, &accelerators_list, link) { | |
18450 | + BUG_ON(!list_empty(&accelerator->vif_states)); | |
18451 | + | |
18452 | + list_del(&accelerator->link); | |
18453 | + kfree(accelerator->frontend); | |
18454 | + kfree(accelerator); | |
18455 | + } | |
18456 | + | |
18457 | + spin_unlock_irqrestore(&accelerators_lock, flags); | |
18458 | +} | |
18459 | + | |
18460 | + | |
18461 | +/* | |
18462 | + * Watch the configured accelerator and change plugin if it's modified | |
18463 | + */ | |
18464 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
18465 | +static void accel_watch_work(struct work_struct *context) | |
18466 | +#else | |
18467 | +static void accel_watch_work(void *context) | |
18468 | +#endif | |
18469 | +{ | |
18470 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
18471 | + struct netfront_accel_vif_state *vif_state = | |
18472 | + container_of(context, struct netfront_accel_vif_state, | |
18473 | + accel_work); | |
18474 | +#else | |
18475 | + struct netfront_accel_vif_state *vif_state = | |
18476 | + (struct netfront_accel_vif_state *)context; | |
18477 | +#endif | |
18478 | + struct netfront_info *np = vif_state->np; | |
18479 | + char *accel_frontend; | |
18480 | + int accel_len, rc = -1; | |
18481 | + | |
18482 | + mutex_lock(&accelerator_mutex); | |
18483 | + | |
18484 | + accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend, | |
18485 | + "accel-frontend", &accel_len); | |
18486 | + if (IS_ERR(accel_frontend)) { | |
18487 | + accel_frontend = NULL; | |
18488 | + netfront_remove_accelerator(np, np->xbdev); | |
18489 | + } else { | |
18490 | + /* If this is the first time, request the accelerator, | |
18491 | + otherwise only request one if it has changed */ | |
18492 | + if (vif_state->accel_frontend == NULL) { | |
18493 | + rc = netfront_load_accelerator(np, np->xbdev, | |
18494 | + accel_frontend); | |
18495 | + } else { | |
18496 | + if (strncmp(vif_state->accel_frontend, accel_frontend, | |
18497 | + accel_len)) { | |
18498 | + netfront_remove_accelerator(np, np->xbdev); | |
18499 | + rc = netfront_load_accelerator(np, np->xbdev, | |
18500 | + accel_frontend); | |
18501 | + } | |
18502 | + } | |
18503 | + } | |
18504 | + | |
18505 | + /* Get rid of previous state and replace with the new name */ | |
18506 | + if (vif_state->accel_frontend != NULL) | |
18507 | + kfree(vif_state->accel_frontend); | |
18508 | + vif_state->accel_frontend = accel_frontend; | |
18509 | + | |
18510 | + mutex_unlock(&accelerator_mutex); | |
18511 | + | |
18512 | + if (rc == 0) { | |
18513 | + DPRINTK("requesting module %s\n", accel_frontend); | |
18514 | + request_module("%s", accel_frontend); | |
18515 | + /* | |
18516 | + * Module should now call netfront_accelerator_loaded() once | |
18517 | + * it's up and running, and we can continue from there | |
18518 | + */ | |
18519 | + } | |
18520 | +} | |
18521 | + | |
18522 | + | |
18523 | +static void accel_watch_changed(struct xenbus_watch *watch, | |
18524 | + const char **vec, unsigned int len) | |
18525 | +{ | |
18526 | + struct netfront_accel_vif_state *vif_state = | |
18527 | + container_of(watch, struct netfront_accel_vif_state, | |
18528 | + accel_watch); | |
18529 | + queue_work(accel_watch_workqueue, &vif_state->accel_work); | |
18530 | +} | |
18531 | + | |
18532 | + | |
18533 | +void netfront_accelerator_add_watch(struct netfront_info *np) | |
18534 | +{ | |
18535 | + int err; | |
18536 | + | |
18537 | + /* Check we're not trying to overwrite an existing watch */ | |
18538 | + BUG_ON(np->accel_vif_state.accel_watch.node != NULL); | |
18539 | + | |
18540 | + /* Get a watch on the accelerator plugin */ | |
18541 | + err = xenbus_watch_path2(np->xbdev, np->xbdev->otherend, | |
18542 | + "accel-frontend", | |
18543 | + &np->accel_vif_state.accel_watch, | |
18544 | + accel_watch_changed); | |
18545 | + if (err) { | |
18546 | + DPRINTK("%s: Failed to register accel watch: %d\n", | |
18547 | + __FUNCTION__, err); | |
18548 | + np->accel_vif_state.accel_watch.node = NULL; | |
18549 | + } | |
18550 | +} | |
18551 | + | |
18552 | + | |
18553 | +static | |
18554 | +void netfront_accelerator_remove_watch(struct netfront_info *np) | |
18555 | +{ | |
18556 | + struct netfront_accel_vif_state *vif_state = &np->accel_vif_state; | |
18557 | + | |
18558 | + /* Get rid of watch on accelerator plugin */ | |
18559 | + if (vif_state->accel_watch.node != NULL) { | |
18560 | + unregister_xenbus_watch(&vif_state->accel_watch); | |
18561 | + kfree(vif_state->accel_watch.node); | |
18562 | + vif_state->accel_watch.node = NULL; | |
18563 | + | |
18564 | + flush_workqueue(accel_watch_workqueue); | |
18565 | + | |
18566 | + /* Clean up any state left from watch */ | |
18567 | + if (vif_state->accel_frontend != NULL) { | |
18568 | + kfree(vif_state->accel_frontend); | |
18569 | + vif_state->accel_frontend = NULL; | |
18570 | + } | |
18571 | + } | |
18572 | +} | |
18573 | + | |
18574 | + | |
18575 | +/* | |
18576 | + * Initialise the accel_vif_state field in the netfront state | |
18577 | + */ | |
18578 | +void init_accelerator_vif(struct netfront_info *np, | |
18579 | + struct xenbus_device *dev) | |
18580 | +{ | |
18581 | + np->accelerator = NULL; | |
18582 | + | |
18583 | + /* It's assumed that these things don't change */ | |
18584 | + np->accel_vif_state.np = np; | |
18585 | + np->accel_vif_state.dev = dev; | |
18586 | + | |
18587 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
18588 | + INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work); | |
18589 | +#else | |
18590 | + INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work, | |
18591 | + &np->accel_vif_state); | |
18592 | +#endif | |
18593 | +} | |
18594 | + | |
18595 | + | |
18596 | +/* | |
18597 | + * Compare a frontend description string against an accelerator to see | |
18598 | + * if they match. Would ultimately be nice to replace the string with | |
18599 | + * a unique numeric identifier for each accelerator. | |
18600 | + */ | |
18601 | +static int match_accelerator(const char *frontend, | |
18602 | + struct netfront_accelerator *accelerator) | |
18603 | +{ | |
18604 | + return strcmp(frontend, accelerator->frontend) == 0; | |
18605 | +} | |
18606 | + | |
18607 | + | |
18608 | +/* | |
18609 | + * Add a frontend vif to the list of vifs that is using a netfront | |
18610 | + * accelerator plugin module. | |
18611 | + */ | |
18612 | +static void add_accelerator_vif(struct netfront_accelerator *accelerator, | |
18613 | + struct netfront_info *np) | |
18614 | +{ | |
18615 | + unsigned long flags; | |
18616 | + | |
18617 | + /* Need lock to write list */ | |
18618 | + spin_lock_irqsave(&accelerator->vif_states_lock, flags); | |
18619 | + | |
18620 | + if (np->accelerator == NULL) { | |
18621 | + np->accelerator = accelerator; | |
18622 | + | |
18623 | + list_add(&np->accel_vif_state.link, &accelerator->vif_states); | |
18624 | + } else { | |
18625 | + /* | |
18626 | + * May get here legitimately if suspend_cancel is | |
18627 | + * called, but in that case configuration should not | |
18628 | + * have changed | |
18629 | + */ | |
18630 | + BUG_ON(np->accelerator != accelerator); | |
18631 | + } | |
18632 | + | |
18633 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); | |
18634 | +} | |
18635 | + | |
18636 | + | |
18637 | +/* | |
18638 | + * Initialise the state to track an accelerator plugin module. | |
18639 | + */ | |
18640 | +static int init_accelerator(const char *frontend, | |
18641 | + struct netfront_accelerator **result, | |
18642 | + struct netfront_accel_hooks *hooks) | |
18643 | +{ | |
18644 | + struct netfront_accelerator *accelerator = | |
18645 | + kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL); | |
18646 | + unsigned long flags; | |
18647 | + int frontend_len; | |
18648 | + | |
18649 | + if (!accelerator) { | |
18650 | + DPRINTK("no memory for accelerator\n"); | |
18651 | + return -ENOMEM; | |
18652 | + } | |
18653 | + | |
18654 | + frontend_len = strlen(frontend) + 1; | |
18655 | + accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL); | |
18656 | + if (!accelerator->frontend) { | |
18657 | + DPRINTK("no memory for accelerator\n"); | |
18658 | + kfree(accelerator); | |
18659 | + return -ENOMEM; | |
18660 | + } | |
18661 | + strlcpy(accelerator->frontend, frontend, frontend_len); | |
18662 | + | |
18663 | + INIT_LIST_HEAD(&accelerator->vif_states); | |
18664 | + spin_lock_init(&accelerator->vif_states_lock); | |
18665 | + | |
18666 | + accelerator->hooks = hooks; | |
18667 | + | |
18668 | + spin_lock_irqsave(&accelerators_lock, flags); | |
18669 | + list_add(&accelerator->link, &accelerators_list); | |
18670 | + spin_unlock_irqrestore(&accelerators_lock, flags); | |
18671 | + | |
18672 | + *result = accelerator; | |
18673 | + | |
18674 | + return 0; | |
18675 | +} | |
18676 | + | |
18677 | + | |
18678 | +/* | |
18679 | + * Modify the hooks stored in the per-vif state to match that in the | |
18680 | + * netfront accelerator's state. | |
18681 | + */ | |
18682 | +static void | |
18683 | +accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state) | |
18684 | +{ | |
18685 | + /* This function must be called with the vif_states_lock held */ | |
18686 | + | |
18687 | + DPRINTK("%p\n",vif_state); | |
18688 | + | |
18689 | + /* Make sure there are no data path operations going on */ | |
18690 | + netif_poll_disable(vif_state->np->netdev); | |
18691 | + netif_tx_lock_bh(vif_state->np->netdev); | |
18692 | + | |
18693 | + vif_state->hooks = vif_state->np->accelerator->hooks; | |
18694 | + | |
18695 | + netif_tx_unlock_bh(vif_state->np->netdev); | |
18696 | + netif_poll_enable(vif_state->np->netdev); | |
18697 | +} | |
18698 | + | |
18699 | + | |
18700 | +static void accelerator_probe_new_vif(struct netfront_info *np, | |
18701 | + struct xenbus_device *dev, | |
18702 | + struct netfront_accelerator *accelerator) | |
18703 | +{ | |
18704 | + struct netfront_accel_hooks *hooks; | |
18705 | + unsigned long flags; | |
18706 | + | |
18707 | + DPRINTK("\n"); | |
18708 | + | |
18709 | + /* Include this frontend device on the accelerator's list */ | |
18710 | + add_accelerator_vif(accelerator, np); | |
18711 | + | |
18712 | + hooks = accelerator->hooks; | |
18713 | + | |
18714 | + if (hooks) { | |
18715 | + if (hooks->new_device(np->netdev, dev) == 0) { | |
18716 | + spin_lock_irqsave | |
18717 | + (&accelerator->vif_states_lock, flags); | |
18718 | + | |
18719 | + accelerator_set_vif_state_hooks(&np->accel_vif_state); | |
18720 | + | |
18721 | + spin_unlock_irqrestore | |
18722 | + (&accelerator->vif_states_lock, flags); | |
18723 | + } | |
18724 | + } | |
18725 | + | |
18726 | + return; | |
18727 | +} | |
18728 | + | |
18729 | + | |
18730 | +/* | |
18731 | + * Request that a particular netfront accelerator plugin is loaded. | |
18732 | + * Usually called as a result of the vif configuration specifying | |
18733 | + * which one to use. Must be called with accelerator_mutex held | |
18734 | + */ | |
18735 | +static int netfront_load_accelerator(struct netfront_info *np, | |
18736 | + struct xenbus_device *dev, | |
18737 | + const char *frontend) | |
18738 | +{ | |
18739 | + struct netfront_accelerator *accelerator; | |
18740 | + int rc = 0; | |
18741 | + | |
18742 | + DPRINTK(" %s\n", frontend); | |
18743 | + | |
18744 | + /* | |
18745 | + * Look at list of loaded accelerators to see if the requested | |
18746 | + * one is already there | |
18747 | + */ | |
18748 | + list_for_each_entry(accelerator, &accelerators_list, link) { | |
18749 | + if (match_accelerator(frontend, accelerator)) { | |
18750 | + accelerator_probe_new_vif(np, dev, accelerator); | |
18751 | + return 0; | |
18752 | + } | |
18753 | + } | |
18754 | + | |
18755 | + /* Couldn't find it, so create a new one and load the module */ | |
18756 | + if ((rc = init_accelerator(frontend, &accelerator, NULL)) < 0) { | |
18757 | + return rc; | |
18758 | + } | |
18759 | + | |
18760 | + /* Include this frontend device on the accelerator's list */ | |
18761 | + add_accelerator_vif(accelerator, np); | |
18762 | + | |
18763 | + return rc; | |
18764 | +} | |
18765 | + | |
18766 | + | |
18767 | +/* | |
18768 | + * Go through all the netfront vifs and see if they have requested | |
18769 | + * this accelerator. Notify the accelerator plugin of the relevant | |
18770 | + * device if so. Called when an accelerator plugin module is first | |
18771 | + * loaded and connects to netfront. | |
18772 | + */ | |
18773 | +static void | |
18774 | +accelerator_probe_vifs(struct netfront_accelerator *accelerator, | |
18775 | + struct netfront_accel_hooks *hooks) | |
18776 | +{ | |
18777 | + struct netfront_accel_vif_state *vif_state, *tmp; | |
18778 | + unsigned long flags; | |
18779 | + | |
18780 | + DPRINTK("%p\n", accelerator); | |
18781 | + | |
18782 | + /* | |
18783 | + * Store the hooks for future calls to probe a new device, and | |
18784 | + * to wire into the vif_state once the accelerator plugin is | |
18785 | + * ready to accelerate each vif | |
18786 | + */ | |
18787 | + BUG_ON(hooks == NULL); | |
18788 | + accelerator->hooks = hooks; | |
18789 | + | |
18790 | + /* | |
18791 | + * currently hold accelerator_mutex, so don't need | |
18792 | + * vif_states_lock to read the list | |
18793 | + */ | |
18794 | + list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states, | |
18795 | + link) { | |
18796 | + struct netfront_info *np = vif_state->np; | |
18797 | + | |
18798 | + if (hooks->new_device(np->netdev, vif_state->dev) == 0) { | |
18799 | + spin_lock_irqsave | |
18800 | + (&accelerator->vif_states_lock, flags); | |
18801 | + | |
18802 | + accelerator_set_vif_state_hooks(vif_state); | |
18803 | + | |
18804 | + spin_unlock_irqrestore | |
18805 | + (&accelerator->vif_states_lock, flags); | |
18806 | + } | |
18807 | + } | |
18808 | +} | |
18809 | + | |
18810 | + | |
18811 | +/* | |
18812 | + * Called by the netfront accelerator plugin module when it has loaded | |
18813 | + */ | |
18814 | +int netfront_accelerator_loaded(int version, const char *frontend, | |
18815 | + struct netfront_accel_hooks *hooks) | |
18816 | +{ | |
18817 | + struct netfront_accelerator *accelerator; | |
18818 | + | |
18819 | + if (is_initial_xendomain()) | |
18820 | + return -EINVAL; | |
18821 | + | |
18822 | + if (version != NETFRONT_ACCEL_VERSION) { | |
18823 | + if (version > NETFRONT_ACCEL_VERSION) { | |
18824 | + /* Caller has higher version number, leave it | |
18825 | + up to them to decide whether to continue. | |
18826 | + They can re-call with a lower number if | |
18827 | + they're happy to be compatible with us */ | |
18828 | + return NETFRONT_ACCEL_VERSION; | |
18829 | + } else { | |
18830 | + /* We have a more recent version than caller. | |
18831 | + Currently reject, but may in future be able | |
18832 | + to be backwardly compatible */ | |
18833 | + return -EPROTO; | |
18834 | + } | |
18835 | + } | |
18836 | + | |
18837 | + mutex_lock(&accelerator_mutex); | |
18838 | + | |
18839 | + /* | |
18840 | + * Look through list of accelerators to see if it has already | |
18841 | + * been requested | |
18842 | + */ | |
18843 | + list_for_each_entry(accelerator, &accelerators_list, link) { | |
18844 | + if (match_accelerator(frontend, accelerator)) { | |
18845 | + accelerator_probe_vifs(accelerator, hooks); | |
18846 | + goto out; | |
18847 | + } | |
18848 | + } | |
18849 | + | |
18850 | + /* | |
18851 | + * If it wasn't in the list, add it now so that when it is | |
18852 | + * requested the caller will find it | |
18853 | + */ | |
18854 | + DPRINTK("Couldn't find matching accelerator (%s)\n", | |
18855 | + frontend); | |
18856 | + | |
18857 | + init_accelerator(frontend, &accelerator, hooks); | |
18858 | + | |
18859 | + out: | |
18860 | + mutex_unlock(&accelerator_mutex); | |
18861 | + return 0; | |
18862 | +} | |
18863 | +EXPORT_SYMBOL_GPL(netfront_accelerator_loaded); | |
18864 | + | |
18865 | + | |
18866 | +/* | |
18867 | + * Remove the hooks from a single vif state. | |
18868 | + */ | |
18869 | +static void | |
18870 | +accelerator_remove_single_hook(struct netfront_accelerator *accelerator, | |
18871 | + struct netfront_accel_vif_state *vif_state) | |
18872 | +{ | |
18873 | + /* Make sure there are no data path operations going on */ | |
18874 | + netif_poll_disable(vif_state->np->netdev); | |
18875 | + netif_tx_lock_bh(vif_state->np->netdev); | |
18876 | + | |
18877 | + /* | |
18878 | + * Remove the hooks, but leave the vif_state on the | |
18879 | + * accelerator's list as that signifies this vif is | |
18880 | + * interested in using that accelerator if it becomes | |
18881 | + * available again | |
18882 | + */ | |
18883 | + vif_state->hooks = NULL; | |
18884 | + | |
18885 | + netif_tx_unlock_bh(vif_state->np->netdev); | |
18886 | + netif_poll_enable(vif_state->np->netdev); | |
18887 | +} | |
18888 | + | |
18889 | + | |
18890 | +/* | |
18891 | + * Safely remove the accelerator function hooks from a netfront state. | |
18892 | + */ | |
18893 | +static void accelerator_remove_hooks(struct netfront_accelerator *accelerator) | |
18894 | +{ | |
18895 | + struct netfront_accel_hooks *hooks; | |
18896 | + struct netfront_accel_vif_state *vif_state, *tmp; | |
18897 | + unsigned long flags; | |
18898 | + | |
18899 | + /* Mutex is held so don't need vif_states_lock to iterate list */ | |
18900 | + list_for_each_entry_safe(vif_state, tmp, | |
18901 | + &accelerator->vif_states, | |
18902 | + link) { | |
18903 | + spin_lock_irqsave(&accelerator->vif_states_lock, flags); | |
18904 | + | |
18905 | + if(vif_state->hooks) { | |
18906 | + hooks = vif_state->hooks; | |
18907 | + | |
18908 | + /* Last chance to get statistics from the accelerator */ | |
18909 | + hooks->get_stats(vif_state->np->netdev, | |
18910 | + &vif_state->np->stats); | |
18911 | + | |
18912 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, | |
18913 | + flags); | |
18914 | + | |
18915 | + accelerator_remove_single_hook(accelerator, vif_state); | |
18916 | + | |
18917 | + accelerator->hooks->remove(vif_state->dev); | |
18918 | + } else { | |
18919 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, | |
18920 | + flags); | |
18921 | + } | |
18922 | + } | |
18923 | + | |
18924 | + accelerator->hooks = NULL; | |
18925 | +} | |
18926 | + | |
18927 | + | |
18928 | +/* | |
18929 | + * Called by a netfront accelerator when it is unloaded. This safely | |
18930 | + * removes the hooks into the plugin and blocks until all devices have | |
18931 | + * finished using it, so on return it is safe to unload. | |
18932 | + */ | |
18933 | +void netfront_accelerator_stop(const char *frontend) | |
18934 | +{ | |
18935 | + struct netfront_accelerator *accelerator; | |
18936 | + unsigned long flags; | |
18937 | + | |
18938 | + mutex_lock(&accelerator_mutex); | |
18939 | + spin_lock_irqsave(&accelerators_lock, flags); | |
18940 | + | |
18941 | + list_for_each_entry(accelerator, &accelerators_list, link) { | |
18942 | + if (match_accelerator(frontend, accelerator)) { | |
18943 | + spin_unlock_irqrestore(&accelerators_lock, flags); | |
18944 | + | |
18945 | + accelerator_remove_hooks(accelerator); | |
18946 | + | |
18947 | + goto out; | |
18948 | + } | |
18949 | + } | |
18950 | + spin_unlock_irqrestore(&accelerators_lock, flags); | |
18951 | + out: | |
18952 | + mutex_unlock(&accelerator_mutex); | |
18953 | +} | |
18954 | +EXPORT_SYMBOL_GPL(netfront_accelerator_stop); | |
18955 | + | |
18956 | + | |
18957 | +/* Helper for call_remove and do_suspend */ | |
18958 | +static int do_remove(struct netfront_info *np, struct xenbus_device *dev, | |
18959 | + unsigned long *lock_flags) | |
18960 | +{ | |
18961 | + struct netfront_accelerator *accelerator = np->accelerator; | |
18962 | + struct netfront_accel_hooks *hooks; | |
18963 | + int rc = 0; | |
18964 | + | |
18965 | + if (np->accel_vif_state.hooks) { | |
18966 | + hooks = np->accel_vif_state.hooks; | |
18967 | + | |
18968 | + /* Last chance to get statistics from the accelerator */ | |
18969 | + hooks->get_stats(np->netdev, &np->stats); | |
18970 | + | |
18971 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, | |
18972 | + *lock_flags); | |
18973 | + | |
18974 | + /* | |
18975 | + * Try and do the opposite of accelerator_probe_new_vif | |
18976 | + * to ensure there's no state pointing back at the | |
18977 | + * netdev | |
18978 | + */ | |
18979 | + accelerator_remove_single_hook(accelerator, | |
18980 | + &np->accel_vif_state); | |
18981 | + | |
18982 | + rc = accelerator->hooks->remove(dev); | |
18983 | + | |
18984 | + spin_lock_irqsave(&accelerator->vif_states_lock, *lock_flags); | |
18985 | + } | |
18986 | + | |
18987 | + return rc; | |
18988 | +} | |
18989 | + | |
18990 | + | |
18991 | +static int netfront_remove_accelerator(struct netfront_info *np, | |
18992 | + struct xenbus_device *dev) | |
18993 | +{ | |
18994 | + struct netfront_accelerator *accelerator; | |
18995 | + struct netfront_accel_vif_state *tmp_vif_state; | |
18996 | + unsigned long flags; | |
18997 | + int rc = 0; | |
18998 | + | |
18999 | + /* Check that we've got a device that was accelerated */ | |
19000 | + if (np->accelerator == NULL) | |
19001 | + return rc; | |
19002 | + | |
19003 | + accelerator = np->accelerator; | |
19004 | + | |
19005 | + spin_lock_irqsave(&accelerator->vif_states_lock, flags); | |
19006 | + | |
19007 | + list_for_each_entry(tmp_vif_state, &accelerator->vif_states, | |
19008 | + link) { | |
19009 | + if (tmp_vif_state == &np->accel_vif_state) { | |
19010 | + list_del(&np->accel_vif_state.link); | |
19011 | + break; | |
19012 | + } | |
19013 | + } | |
19014 | + | |
19015 | + rc = do_remove(np, dev, &flags); | |
19016 | + | |
19017 | + np->accelerator = NULL; | |
19018 | + | |
19019 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); | |
19020 | + | |
19021 | + return rc; | |
19022 | +} | |
19023 | + | |
19024 | + | |
19025 | +int netfront_accelerator_call_remove(struct netfront_info *np, | |
19026 | + struct xenbus_device *dev) | |
19027 | +{ | |
19028 | + int rc; | |
19029 | + netfront_accelerator_remove_watch(np); | |
19030 | + mutex_lock(&accelerator_mutex); | |
19031 | + rc = netfront_remove_accelerator(np, dev); | |
19032 | + mutex_unlock(&accelerator_mutex); | |
19033 | + return rc; | |
19034 | +} | |
19035 | + | |
19036 | + | |
19037 | +int netfront_accelerator_suspend(struct netfront_info *np, | |
19038 | + struct xenbus_device *dev) | |
19039 | +{ | |
19040 | + unsigned long flags; | |
19041 | + int rc = 0; | |
19042 | + | |
19043 | + netfront_accelerator_remove_watch(np); | |
19044 | + | |
19045 | + mutex_lock(&accelerator_mutex); | |
19046 | + | |
19047 | + /* Check that we've got a device that was accelerated */ | |
19048 | + if (np->accelerator == NULL) | |
19049 | + goto out; | |
19050 | + | |
19051 | + /* | |
19052 | + * Call the remove accelerator hook, but leave the vif_state | |
19053 | + * on the accelerator's list in case there is a suspend_cancel. | |
19054 | + */ | |
19055 | + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); | |
19056 | + | |
19057 | + rc = do_remove(np, dev, &flags); | |
19058 | + | |
19059 | + spin_unlock_irqrestore(&np->accelerator->vif_states_lock, flags); | |
19060 | + out: | |
19061 | + mutex_unlock(&accelerator_mutex); | |
19062 | + return rc; | |
19063 | +} | |
19064 | + | |
19065 | + | |
19066 | +int netfront_accelerator_suspend_cancel(struct netfront_info *np, | |
19067 | + struct xenbus_device *dev) | |
19068 | +{ | |
19069 | + /* | |
19070 | + * Setting the watch will cause it to fire and probe the | |
19071 | + * accelerator, so no need to call accelerator_probe_new_vif() | |
19072 | + * directly here | |
19073 | + */ | |
19074 | + if (dev->state == XenbusStateConnected) | |
19075 | + netfront_accelerator_add_watch(np); | |
19076 | + return 0; | |
19077 | +} | |
19078 | + | |
19079 | + | |
19080 | +void netfront_accelerator_resume(struct netfront_info *np, | |
19081 | + struct xenbus_device *dev) | |
19082 | +{ | |
19083 | + struct netfront_accel_vif_state *accel_vif_state = NULL; | |
19084 | + spinlock_t *vif_states_lock; | |
19085 | + unsigned long flags; | |
19086 | + | |
19087 | + mutex_lock(&accelerator_mutex); | |
19088 | + | |
19089 | + /* Check that we've got a device that was accelerated */ | |
19090 | + if(np->accelerator == NULL) | |
19091 | + goto out; | |
19092 | + | |
19093 | + /* Find the vif_state from the accelerator's list */ | |
19094 | + list_for_each_entry(accel_vif_state, &np->accelerator->vif_states, | |
19095 | + link) { | |
19096 | + if (accel_vif_state->dev == dev) { | |
19097 | + BUG_ON(accel_vif_state != &np->accel_vif_state); | |
19098 | + | |
19099 | + vif_states_lock = &np->accelerator->vif_states_lock; | |
19100 | + spin_lock_irqsave(vif_states_lock, flags); | |
19101 | + | |
19102 | + /* | |
19103 | + * Remove it from the accelerator's list so | |
19104 | + * state is consistent for probing new vifs | |
19105 | + * when they get connected | |
19106 | + */ | |
19107 | + list_del(&accel_vif_state->link); | |
19108 | + np->accelerator = NULL; | |
19109 | + | |
19110 | + spin_unlock_irqrestore(vif_states_lock, flags); | |
19111 | + | |
19112 | + break; | |
19113 | + } | |
19114 | + } | |
19115 | + | |
19116 | + out: | |
19117 | + mutex_unlock(&accelerator_mutex); | |
19118 | + return; | |
19119 | +} | |
19120 | + | |
19121 | + | |
19122 | +int netfront_check_accelerator_queue_ready(struct net_device *dev, | |
19123 | + struct netfront_info *np) | |
19124 | +{ | |
19125 | + struct netfront_accelerator *accelerator; | |
19126 | + struct netfront_accel_hooks *hooks; | |
19127 | + int rc = 1; | |
19128 | + unsigned long flags; | |
19129 | + | |
19130 | + accelerator = np->accelerator; | |
19131 | + | |
19132 | + /* Call the check_ready accelerator hook. */ | |
19133 | + if (np->accel_vif_state.hooks && accelerator) { | |
19134 | + spin_lock_irqsave(&accelerator->vif_states_lock, flags); | |
19135 | + hooks = np->accel_vif_state.hooks; | |
19136 | + if (hooks && np->accelerator == accelerator) | |
19137 | + rc = np->accel_vif_state.hooks->check_ready(dev); | |
19138 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); | |
19139 | + } | |
19140 | + | |
19141 | + return rc; | |
19142 | +} | |
19143 | + | |
19144 | + | |
19145 | +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, | |
19146 | + struct net_device *dev) | |
19147 | +{ | |
19148 | + struct netfront_accelerator *accelerator; | |
19149 | + struct netfront_accel_hooks *hooks; | |
19150 | + unsigned long flags; | |
19151 | + | |
19152 | + accelerator = np->accelerator; | |
19153 | + | |
19154 | + /* Call the stop_napi_interrupts accelerator hook. */ | |
19155 | + if (np->accel_vif_state.hooks && accelerator != NULL) { | |
19156 | + spin_lock_irqsave(&accelerator->vif_states_lock, flags); | |
19157 | + hooks = np->accel_vif_state.hooks; | |
19158 | + if (hooks && np->accelerator == accelerator) | |
19159 | + np->accel_vif_state.hooks->stop_napi_irq(dev); | |
19160 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); | |
19161 | + } | |
19162 | +} | |
19163 | + | |
19164 | + | |
19165 | +int netfront_accelerator_call_get_stats(struct netfront_info *np, | |
19166 | + struct net_device *dev) | |
19167 | +{ | |
19168 | + struct netfront_accelerator *accelerator; | |
19169 | + struct netfront_accel_hooks *hooks; | |
19170 | + unsigned long flags; | |
19171 | + int rc = 0; | |
19172 | + | |
19173 | + accelerator = np->accelerator; | |
19174 | + | |
19175 | + /* Call the get_stats accelerator hook. */ | |
19176 | + if (np->accel_vif_state.hooks && accelerator != NULL) { | |
19177 | + spin_lock_irqsave(&accelerator->vif_states_lock, flags); | |
19178 | + hooks = np->accel_vif_state.hooks; | |
19179 | + if (hooks && np->accelerator == accelerator) | |
19180 | + rc = np->accel_vif_state.hooks->get_stats(dev, | |
19181 | + &np->stats); | |
19182 | + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); | |
19183 | + } | |
19184 | + return rc; | |
19185 | +} | |
19186 | + | |
19187 | Index: head-2008-11-25/drivers/xen/netfront/netfront.c | |
19188 | =================================================================== | |
19189 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
19190 | +++ head-2008-11-25/drivers/xen/netfront/netfront.c 2008-07-21 11:00:33.000000000 +0200 | |
19191 | @@ -0,0 +1,2240 @@ | |
19192 | +/****************************************************************************** | |
19193 | + * Virtual network driver for conversing with remote driver backends. | |
19194 | + * | |
19195 | + * Copyright (c) 2002-2005, K A Fraser | |
19196 | + * Copyright (c) 2005, XenSource Ltd | |
19197 | + * Copyright (C) 2007 Solarflare Communications, Inc. | |
19198 | + * | |
19199 | + * This program is free software; you can redistribute it and/or | |
19200 | + * modify it under the terms of the GNU General Public License version 2 | |
19201 | + * as published by the Free Software Foundation; or, when distributed | |
19202 | + * separately from the Linux kernel or incorporated into other | |
19203 | + * software packages, subject to the following license: | |
19204 | + * | |
19205 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
19206 | + * of this source file (the "Software"), to deal in the Software without | |
19207 | + * restriction, including without limitation the rights to use, copy, modify, | |
19208 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
19209 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
19210 | + * the following conditions: | |
19211 | + * | |
19212 | + * The above copyright notice and this permission notice shall be included in | |
19213 | + * all copies or substantial portions of the Software. | |
19214 | + * | |
19215 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19216 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19217 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19218 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19219 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
19220 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
19221 | + * IN THE SOFTWARE. | |
19222 | + */ | |
19223 | + | |
19224 | +#include <linux/module.h> | |
19225 | +#include <linux/version.h> | |
19226 | +#include <linux/kernel.h> | |
19227 | +#include <linux/sched.h> | |
19228 | +#include <linux/slab.h> | |
19229 | +#include <linux/string.h> | |
19230 | +#include <linux/errno.h> | |
19231 | +#include <linux/netdevice.h> | |
19232 | +#include <linux/inetdevice.h> | |
19233 | +#include <linux/etherdevice.h> | |
19234 | +#include <linux/skbuff.h> | |
19235 | +#include <linux/init.h> | |
19236 | +#include <linux/bitops.h> | |
19237 | +#include <linux/ethtool.h> | |
19238 | +#include <linux/in.h> | |
19239 | +#include <linux/if_ether.h> | |
19240 | +#include <linux/io.h> | |
19241 | +#include <linux/moduleparam.h> | |
19242 | +#include <net/sock.h> | |
19243 | +#include <net/pkt_sched.h> | |
19244 | +#include <net/arp.h> | |
19245 | +#include <net/route.h> | |
19246 | +#include <asm/uaccess.h> | |
19247 | +#include <xen/evtchn.h> | |
19248 | +#include <xen/xenbus.h> | |
19249 | +#include <xen/interface/io/netif.h> | |
19250 | +#include <xen/interface/memory.h> | |
19251 | +#include <xen/balloon.h> | |
19252 | +#include <asm/page.h> | |
19253 | +#include <asm/maddr.h> | |
19254 | +#include <asm/uaccess.h> | |
19255 | +#include <xen/interface/grant_table.h> | |
19256 | +#include <xen/gnttab.h> | |
19257 | + | |
19258 | +struct netfront_cb { | |
19259 | + struct page *page; | |
19260 | + unsigned offset; | |
19261 | +}; | |
19262 | + | |
19263 | +#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) | |
19264 | + | |
19265 | +#include "netfront.h" | |
19266 | + | |
19267 | +/* | |
19268 | + * Mutually-exclusive module options to select receive data path: | |
19269 | + * rx_copy : Packets are copied by network backend into local memory | |
19270 | + * rx_flip : Page containing packet data is transferred to our ownership | |
19271 | + * For fully-virtualised guests there is no option - copying must be used. | |
19272 | + * For paravirtualised guests, flipping is the default. | |
19273 | + */ | |
19274 | +#ifdef CONFIG_XEN | |
19275 | +static int MODPARM_rx_copy = 0; | |
19276 | +module_param_named(rx_copy, MODPARM_rx_copy, bool, 0); | |
19277 | +MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)"); | |
19278 | +static int MODPARM_rx_flip = 0; | |
19279 | +module_param_named(rx_flip, MODPARM_rx_flip, bool, 0); | |
19280 | +MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)"); | |
19281 | +#else | |
19282 | +static const int MODPARM_rx_copy = 1; | |
19283 | +static const int MODPARM_rx_flip = 0; | |
19284 | +#endif | |
19285 | + | |
19286 | +#define RX_COPY_THRESHOLD 256 | |
19287 | + | |
19288 | +/* If we don't have GSO, fake things up so that we never try to use it. */ | |
19289 | +#if defined(NETIF_F_GSO) | |
19290 | +#define HAVE_GSO 1 | |
19291 | +#define HAVE_TSO 1 /* TSO is a subset of GSO */ | |
19292 | +#define HAVE_CSUM_OFFLOAD 1 | |
19293 | +static inline void dev_disable_gso_features(struct net_device *dev) | |
19294 | +{ | |
19295 | + /* Turn off all GSO bits except ROBUST. */ | |
19296 | + dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; | |
19297 | + dev->features |= NETIF_F_GSO_ROBUST; | |
19298 | +} | |
19299 | +#elif defined(NETIF_F_TSO) | |
19300 | +#define HAVE_GSO 0 | |
19301 | +#define HAVE_TSO 1 | |
19302 | + | |
19303 | +/* Some older kernels cannot cope with incorrect checksums, | |
19304 | + * particularly in netfilter. I'm not sure there is 100% correlation | |
19305 | + * with the presence of NETIF_F_TSO but it appears to be a good first | |
19306 | + * approximiation. | |
19307 | + */ | |
19308 | +#define HAVE_CSUM_OFFLOAD 0 | |
19309 | + | |
19310 | +#define gso_size tso_size | |
19311 | +#define gso_segs tso_segs | |
19312 | +static inline void dev_disable_gso_features(struct net_device *dev) | |
19313 | +{ | |
19314 | + /* Turn off all TSO bits. */ | |
19315 | + dev->features &= ~NETIF_F_TSO; | |
19316 | +} | |
19317 | +static inline int skb_is_gso(const struct sk_buff *skb) | |
19318 | +{ | |
19319 | + return skb_shinfo(skb)->tso_size; | |
19320 | +} | |
19321 | +static inline int skb_gso_ok(struct sk_buff *skb, int features) | |
19322 | +{ | |
19323 | + return (features & NETIF_F_TSO); | |
19324 | +} | |
19325 | + | |
19326 | +static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) | |
19327 | +{ | |
19328 | + return skb_is_gso(skb) && | |
19329 | + (!skb_gso_ok(skb, dev->features) || | |
19330 | + unlikely(skb->ip_summed != CHECKSUM_HW)); | |
19331 | +} | |
19332 | +#else | |
19333 | +#define HAVE_GSO 0 | |
19334 | +#define HAVE_TSO 0 | |
19335 | +#define HAVE_CSUM_OFFLOAD 0 | |
19336 | +#define netif_needs_gso(dev, skb) 0 | |
19337 | +#define dev_disable_gso_features(dev) ((void)0) | |
19338 | +#define ethtool_op_set_tso(dev, data) (-ENOSYS) | |
19339 | +#endif | |
19340 | + | |
19341 | +#define GRANT_INVALID_REF 0 | |
19342 | + | |
19343 | +struct netfront_rx_info { | |
19344 | + struct netif_rx_response rx; | |
19345 | + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; | |
19346 | +}; | |
19347 | + | |
19348 | +/* | |
19349 | + * Implement our own carrier flag: the network stack's version causes delays | |
19350 | + * when the carrier is re-enabled (in particular, dev_activate() may not | |
19351 | + * immediately be called, which can cause packet loss). | |
19352 | + */ | |
19353 | +#define netfront_carrier_on(netif) ((netif)->carrier = 1) | |
19354 | +#define netfront_carrier_off(netif) ((netif)->carrier = 0) | |
19355 | +#define netfront_carrier_ok(netif) ((netif)->carrier) | |
19356 | + | |
19357 | +/* | |
19358 | + * Access macros for acquiring freeing slots in tx_skbs[]. | |
19359 | + */ | |
19360 | + | |
19361 | +static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id) | |
19362 | +{ | |
19363 | + list[id] = list[0]; | |
19364 | + list[0] = (void *)(unsigned long)id; | |
19365 | +} | |
19366 | + | |
19367 | +static inline unsigned short get_id_from_freelist(struct sk_buff **list) | |
19368 | +{ | |
19369 | + unsigned int id = (unsigned int)(unsigned long)list[0]; | |
19370 | + list[0] = list[id]; | |
19371 | + return id; | |
19372 | +} | |
19373 | + | |
19374 | +static inline int xennet_rxidx(RING_IDX idx) | |
19375 | +{ | |
19376 | + return idx & (NET_RX_RING_SIZE - 1); | |
19377 | +} | |
19378 | + | |
19379 | +static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np, | |
19380 | + RING_IDX ri) | |
19381 | +{ | |
19382 | + int i = xennet_rxidx(ri); | |
19383 | + struct sk_buff *skb = np->rx_skbs[i]; | |
19384 | + np->rx_skbs[i] = NULL; | |
19385 | + return skb; | |
19386 | +} | |
19387 | + | |
19388 | +static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np, | |
19389 | + RING_IDX ri) | |
19390 | +{ | |
19391 | + int i = xennet_rxidx(ri); | |
19392 | + grant_ref_t ref = np->grant_rx_ref[i]; | |
19393 | + np->grant_rx_ref[i] = GRANT_INVALID_REF; | |
19394 | + return ref; | |
19395 | +} | |
19396 | + | |
19397 | +#define DPRINTK(fmt, args...) \ | |
19398 | + pr_debug("netfront (%s:%d) " fmt, \ | |
19399 | + __FUNCTION__, __LINE__, ##args) | |
19400 | +#define IPRINTK(fmt, args...) \ | |
19401 | + printk(KERN_INFO "netfront: " fmt, ##args) | |
19402 | +#define WPRINTK(fmt, args...) \ | |
19403 | + printk(KERN_WARNING "netfront: " fmt, ##args) | |
19404 | + | |
19405 | +static int setup_device(struct xenbus_device *, struct netfront_info *); | |
19406 | +static struct net_device *create_netdev(struct xenbus_device *); | |
19407 | + | |
19408 | +static void end_access(int, void *); | |
19409 | +static void netif_disconnect_backend(struct netfront_info *); | |
19410 | + | |
19411 | +static int network_connect(struct net_device *); | |
19412 | +static void network_tx_buf_gc(struct net_device *); | |
19413 | +static void network_alloc_rx_buffers(struct net_device *); | |
19414 | +static void send_fake_arp(struct net_device *); | |
19415 | + | |
19416 | +static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs); | |
19417 | + | |
19418 | +#ifdef CONFIG_SYSFS | |
19419 | +static int xennet_sysfs_addif(struct net_device *netdev); | |
19420 | +static void xennet_sysfs_delif(struct net_device *netdev); | |
19421 | +#else /* !CONFIG_SYSFS */ | |
19422 | +#define xennet_sysfs_addif(dev) (0) | |
19423 | +#define xennet_sysfs_delif(dev) do { } while(0) | |
19424 | +#endif | |
19425 | + | |
19426 | +static inline int xennet_can_sg(struct net_device *dev) | |
19427 | +{ | |
19428 | + return dev->features & NETIF_F_SG; | |
19429 | +} | |
19430 | + | |
19431 | +/** | |
19432 | + * Entry point to this code when a new device is created. Allocate the basic | |
19433 | + * structures and the ring buffers for communication with the backend, and | |
19434 | + * inform the backend of the appropriate details for those. | |
19435 | + */ | |
19436 | +static int __devinit netfront_probe(struct xenbus_device *dev, | |
19437 | + const struct xenbus_device_id *id) | |
19438 | +{ | |
19439 | + int err; | |
19440 | + struct net_device *netdev; | |
19441 | + struct netfront_info *info; | |
19442 | + | |
19443 | + netdev = create_netdev(dev); | |
19444 | + if (IS_ERR(netdev)) { | |
19445 | + err = PTR_ERR(netdev); | |
19446 | + xenbus_dev_fatal(dev, err, "creating netdev"); | |
19447 | + return err; | |
19448 | + } | |
19449 | + | |
19450 | + info = netdev_priv(netdev); | |
19451 | + dev->dev.driver_data = info; | |
19452 | + | |
19453 | + err = register_netdev(info->netdev); | |
19454 | + if (err) { | |
19455 | + printk(KERN_WARNING "%s: register_netdev err=%d\n", | |
19456 | + __FUNCTION__, err); | |
19457 | + goto fail; | |
19458 | + } | |
19459 | + | |
19460 | + err = xennet_sysfs_addif(info->netdev); | |
19461 | + if (err) { | |
19462 | + unregister_netdev(info->netdev); | |
19463 | + printk(KERN_WARNING "%s: add sysfs failed err=%d\n", | |
19464 | + __FUNCTION__, err); | |
19465 | + goto fail; | |
19466 | + } | |
19467 | + | |
19468 | + return 0; | |
19469 | + | |
19470 | + fail: | |
19471 | + free_netdev(netdev); | |
19472 | + dev->dev.driver_data = NULL; | |
19473 | + return err; | |
19474 | +} | |
19475 | + | |
19476 | +static int __devexit netfront_remove(struct xenbus_device *dev) | |
19477 | +{ | |
19478 | + struct netfront_info *info = dev->dev.driver_data; | |
19479 | + | |
19480 | + DPRINTK("%s\n", dev->nodename); | |
19481 | + | |
19482 | + netfront_accelerator_call_remove(info, dev); | |
19483 | + | |
19484 | + netif_disconnect_backend(info); | |
19485 | + | |
19486 | + del_timer_sync(&info->rx_refill_timer); | |
19487 | + | |
19488 | + xennet_sysfs_delif(info->netdev); | |
19489 | + | |
19490 | + unregister_netdev(info->netdev); | |
19491 | + | |
19492 | + free_netdev(info->netdev); | |
19493 | + | |
19494 | + return 0; | |
19495 | +} | |
19496 | + | |
19497 | + | |
19498 | +static int netfront_suspend(struct xenbus_device *dev) | |
19499 | +{ | |
19500 | + struct netfront_info *info = dev->dev.driver_data; | |
19501 | + return netfront_accelerator_suspend(info, dev); | |
19502 | +} | |
19503 | + | |
19504 | + | |
19505 | +static int netfront_suspend_cancel(struct xenbus_device *dev) | |
19506 | +{ | |
19507 | + struct netfront_info *info = dev->dev.driver_data; | |
19508 | + return netfront_accelerator_suspend_cancel(info, dev); | |
19509 | +} | |
19510 | + | |
19511 | + | |
19512 | +/** | |
19513 | + * We are reconnecting to the backend, due to a suspend/resume, or a backend | |
19514 | + * driver restart. We tear down our netif structure and recreate it, but | |
19515 | + * leave the device-layer structures intact so that this is transparent to the | |
19516 | + * rest of the kernel. | |
19517 | + */ | |
19518 | +static int netfront_resume(struct xenbus_device *dev) | |
19519 | +{ | |
19520 | + struct netfront_info *info = dev->dev.driver_data; | |
19521 | + | |
19522 | + DPRINTK("%s\n", dev->nodename); | |
19523 | + | |
19524 | + netfront_accelerator_resume(info, dev); | |
19525 | + | |
19526 | + netif_disconnect_backend(info); | |
19527 | + return 0; | |
19528 | +} | |
19529 | + | |
19530 | +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) | |
19531 | +{ | |
19532 | + char *s, *e, *macstr; | |
19533 | + int i; | |
19534 | + | |
19535 | + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); | |
19536 | + if (IS_ERR(macstr)) | |
19537 | + return PTR_ERR(macstr); | |
19538 | + | |
19539 | + for (i = 0; i < ETH_ALEN; i++) { | |
19540 | + mac[i] = simple_strtoul(s, &e, 16); | |
19541 | + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { | |
19542 | + kfree(macstr); | |
19543 | + return -ENOENT; | |
19544 | + } | |
19545 | + s = e+1; | |
19546 | + } | |
19547 | + | |
19548 | + kfree(macstr); | |
19549 | + return 0; | |
19550 | +} | |
19551 | + | |
19552 | +/* Common code used when first setting up, and when resuming. */ | |
19553 | +static int talk_to_backend(struct xenbus_device *dev, | |
19554 | + struct netfront_info *info) | |
19555 | +{ | |
19556 | + const char *message; | |
19557 | + struct xenbus_transaction xbt; | |
19558 | + int err; | |
19559 | + | |
19560 | + /* Read mac only in the first setup. */ | |
19561 | + if (!is_valid_ether_addr(info->mac)) { | |
19562 | + err = xen_net_read_mac(dev, info->mac); | |
19563 | + if (err) { | |
19564 | + xenbus_dev_fatal(dev, err, "parsing %s/mac", | |
19565 | + dev->nodename); | |
19566 | + goto out; | |
19567 | + } | |
19568 | + } | |
19569 | + | |
19570 | + /* Create shared ring, alloc event channel. */ | |
19571 | + err = setup_device(dev, info); | |
19572 | + if (err) | |
19573 | + goto out; | |
19574 | + | |
19575 | + /* This will load an accelerator if one is configured when the | |
19576 | + * watch fires */ | |
19577 | + netfront_accelerator_add_watch(info); | |
19578 | + | |
19579 | +again: | |
19580 | + err = xenbus_transaction_start(&xbt); | |
19581 | + if (err) { | |
19582 | + xenbus_dev_fatal(dev, err, "starting transaction"); | |
19583 | + goto destroy_ring; | |
19584 | + } | |
19585 | + | |
19586 | + err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u", | |
19587 | + info->tx_ring_ref); | |
19588 | + if (err) { | |
19589 | + message = "writing tx ring-ref"; | |
19590 | + goto abort_transaction; | |
19591 | + } | |
19592 | + err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u", | |
19593 | + info->rx_ring_ref); | |
19594 | + if (err) { | |
19595 | + message = "writing rx ring-ref"; | |
19596 | + goto abort_transaction; | |
19597 | + } | |
19598 | + err = xenbus_printf(xbt, dev->nodename, | |
19599 | + "event-channel", "%u", | |
19600 | + irq_to_evtchn_port(info->irq)); | |
19601 | + if (err) { | |
19602 | + message = "writing event-channel"; | |
19603 | + goto abort_transaction; | |
19604 | + } | |
19605 | + | |
19606 | + err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", | |
19607 | + info->copying_receiver); | |
19608 | + if (err) { | |
19609 | + message = "writing request-rx-copy"; | |
19610 | + goto abort_transaction; | |
19611 | + } | |
19612 | + | |
19613 | + err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1); | |
19614 | + if (err) { | |
19615 | + message = "writing feature-rx-notify"; | |
19616 | + goto abort_transaction; | |
19617 | + } | |
19618 | + | |
19619 | + err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", | |
19620 | + "%d", !HAVE_CSUM_OFFLOAD); | |
19621 | + if (err) { | |
19622 | + message = "writing feature-no-csum-offload"; | |
19623 | + goto abort_transaction; | |
19624 | + } | |
19625 | + | |
19626 | + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); | |
19627 | + if (err) { | |
19628 | + message = "writing feature-sg"; | |
19629 | + goto abort_transaction; | |
19630 | + } | |
19631 | + | |
19632 | + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", | |
19633 | + HAVE_TSO); | |
19634 | + if (err) { | |
19635 | + message = "writing feature-gso-tcpv4"; | |
19636 | + goto abort_transaction; | |
19637 | + } | |
19638 | + | |
19639 | + err = xenbus_transaction_end(xbt, 0); | |
19640 | + if (err) { | |
19641 | + if (err == -EAGAIN) | |
19642 | + goto again; | |
19643 | + xenbus_dev_fatal(dev, err, "completing transaction"); | |
19644 | + goto destroy_ring; | |
19645 | + } | |
19646 | + | |
19647 | + return 0; | |
19648 | + | |
19649 | + abort_transaction: | |
19650 | + xenbus_transaction_end(xbt, 1); | |
19651 | + xenbus_dev_fatal(dev, err, "%s", message); | |
19652 | + destroy_ring: | |
19653 | + netfront_accelerator_call_remove(info, dev); | |
19654 | + netif_disconnect_backend(info); | |
19655 | + out: | |
19656 | + return err; | |
19657 | +} | |
19658 | + | |
19659 | +static int setup_device(struct xenbus_device *dev, struct netfront_info *info) | |
19660 | +{ | |
19661 | + struct netif_tx_sring *txs; | |
19662 | + struct netif_rx_sring *rxs; | |
19663 | + int err; | |
19664 | + struct net_device *netdev = info->netdev; | |
19665 | + | |
19666 | + info->tx_ring_ref = GRANT_INVALID_REF; | |
19667 | + info->rx_ring_ref = GRANT_INVALID_REF; | |
19668 | + info->rx.sring = NULL; | |
19669 | + info->tx.sring = NULL; | |
19670 | + info->irq = 0; | |
19671 | + | |
19672 | + txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); | |
19673 | + if (!txs) { | |
19674 | + err = -ENOMEM; | |
19675 | + xenbus_dev_fatal(dev, err, "allocating tx ring page"); | |
19676 | + goto fail; | |
19677 | + } | |
19678 | + SHARED_RING_INIT(txs); | |
19679 | + FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); | |
19680 | + | |
19681 | + err = xenbus_grant_ring(dev, virt_to_mfn(txs)); | |
19682 | + if (err < 0) { | |
19683 | + free_page((unsigned long)txs); | |
19684 | + goto fail; | |
19685 | + } | |
19686 | + info->tx_ring_ref = err; | |
19687 | + | |
19688 | + rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); | |
19689 | + if (!rxs) { | |
19690 | + err = -ENOMEM; | |
19691 | + xenbus_dev_fatal(dev, err, "allocating rx ring page"); | |
19692 | + goto fail; | |
19693 | + } | |
19694 | + SHARED_RING_INIT(rxs); | |
19695 | + FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); | |
19696 | + | |
19697 | + err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); | |
19698 | + if (err < 0) { | |
19699 | + free_page((unsigned long)rxs); | |
19700 | + goto fail; | |
19701 | + } | |
19702 | + info->rx_ring_ref = err; | |
19703 | + | |
19704 | + memcpy(netdev->dev_addr, info->mac, ETH_ALEN); | |
19705 | + | |
19706 | + err = bind_listening_port_to_irqhandler( | |
19707 | + dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name, | |
19708 | + netdev); | |
19709 | + if (err < 0) | |
19710 | + goto fail; | |
19711 | + info->irq = err; | |
19712 | + | |
19713 | + return 0; | |
19714 | + | |
19715 | + fail: | |
19716 | + return err; | |
19717 | +} | |
19718 | + | |
19719 | +/** | |
19720 | + * Callback received when the backend's state changes. | |
19721 | + */ | |
19722 | +static void backend_changed(struct xenbus_device *dev, | |
19723 | + enum xenbus_state backend_state) | |
19724 | +{ | |
19725 | + struct netfront_info *np = dev->dev.driver_data; | |
19726 | + struct net_device *netdev = np->netdev; | |
19727 | + | |
19728 | + DPRINTK("%s\n", xenbus_strstate(backend_state)); | |
19729 | + | |
19730 | + switch (backend_state) { | |
19731 | + case XenbusStateInitialising: | |
19732 | + case XenbusStateInitialised: | |
19733 | + case XenbusStateConnected: | |
19734 | + case XenbusStateReconfiguring: | |
19735 | + case XenbusStateReconfigured: | |
19736 | + case XenbusStateUnknown: | |
19737 | + case XenbusStateClosed: | |
19738 | + break; | |
19739 | + | |
19740 | + case XenbusStateInitWait: | |
19741 | + if (dev->state != XenbusStateInitialising) | |
19742 | + break; | |
19743 | + if (network_connect(netdev) != 0) | |
19744 | + break; | |
19745 | + xenbus_switch_state(dev, XenbusStateConnected); | |
19746 | + send_fake_arp(netdev); | |
19747 | + break; | |
19748 | + | |
19749 | + case XenbusStateClosing: | |
19750 | + xenbus_frontend_closed(dev); | |
19751 | + break; | |
19752 | + } | |
19753 | +} | |
19754 | + | |
19755 | +/** Send a packet on a net device to encourage switches to learn the | |
19756 | + * MAC. We send a fake ARP request. | |
19757 | + * | |
19758 | + * @param dev device | |
19759 | + * @return 0 on success, error code otherwise | |
19760 | + */ | |
19761 | +static void send_fake_arp(struct net_device *dev) | |
19762 | +{ | |
19763 | +#ifdef CONFIG_INET | |
19764 | + struct sk_buff *skb; | |
19765 | + u32 src_ip, dst_ip; | |
19766 | + | |
19767 | + dst_ip = INADDR_BROADCAST; | |
19768 | + src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK); | |
19769 | + | |
19770 | + /* No IP? Then nothing to do. */ | |
19771 | + if (src_ip == 0) | |
19772 | + return; | |
19773 | + | |
19774 | + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, | |
19775 | + dst_ip, dev, src_ip, | |
19776 | + /*dst_hw*/ NULL, /*src_hw*/ NULL, | |
19777 | + /*target_hw*/ dev->dev_addr); | |
19778 | + if (skb == NULL) | |
19779 | + return; | |
19780 | + | |
19781 | + dev_queue_xmit(skb); | |
19782 | +#endif | |
19783 | +} | |
19784 | + | |
19785 | +static inline int netfront_tx_slot_available(struct netfront_info *np) | |
19786 | +{ | |
19787 | + return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < | |
19788 | + (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); | |
19789 | +} | |
19790 | + | |
19791 | + | |
19792 | +static inline void network_maybe_wake_tx(struct net_device *dev) | |
19793 | +{ | |
19794 | + struct netfront_info *np = netdev_priv(dev); | |
19795 | + | |
19796 | + if (unlikely(netif_queue_stopped(dev)) && | |
19797 | + netfront_tx_slot_available(np) && | |
19798 | + likely(netif_running(dev)) && | |
19799 | + netfront_check_accelerator_queue_ready(dev, np)) | |
19800 | + netif_wake_queue(dev); | |
19801 | +} | |
19802 | + | |
19803 | + | |
19804 | +int netfront_check_queue_ready(struct net_device *dev) | |
19805 | +{ | |
19806 | + struct netfront_info *np = netdev_priv(dev); | |
19807 | + | |
19808 | + return unlikely(netif_queue_stopped(dev)) && | |
19809 | + netfront_tx_slot_available(np) && | |
19810 | + likely(netif_running(dev)); | |
19811 | +} | |
19812 | +EXPORT_SYMBOL(netfront_check_queue_ready); | |
19813 | + | |
19814 | + | |
19815 | +static int network_open(struct net_device *dev) | |
19816 | +{ | |
19817 | + struct netfront_info *np = netdev_priv(dev); | |
19818 | + | |
19819 | + memset(&np->stats, 0, sizeof(np->stats)); | |
19820 | + | |
19821 | + spin_lock_bh(&np->rx_lock); | |
19822 | + if (netfront_carrier_ok(np)) { | |
19823 | + network_alloc_rx_buffers(dev); | |
19824 | + np->rx.sring->rsp_event = np->rx.rsp_cons + 1; | |
19825 | + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ | |
19826 | + netfront_accelerator_call_stop_napi_irq(np, dev); | |
19827 | + | |
19828 | + netif_rx_schedule(dev); | |
19829 | + } | |
19830 | + } | |
19831 | + spin_unlock_bh(&np->rx_lock); | |
19832 | + | |
19833 | + network_maybe_wake_tx(dev); | |
19834 | + | |
19835 | + return 0; | |
19836 | +} | |
19837 | + | |
19838 | +static void network_tx_buf_gc(struct net_device *dev) | |
19839 | +{ | |
19840 | + RING_IDX cons, prod; | |
19841 | + unsigned short id; | |
19842 | + struct netfront_info *np = netdev_priv(dev); | |
19843 | + struct sk_buff *skb; | |
19844 | + | |
19845 | + BUG_ON(!netfront_carrier_ok(np)); | |
19846 | + | |
19847 | + do { | |
19848 | + prod = np->tx.sring->rsp_prod; | |
19849 | + rmb(); /* Ensure we see responses up to 'rp'. */ | |
19850 | + | |
19851 | + for (cons = np->tx.rsp_cons; cons != prod; cons++) { | |
19852 | + struct netif_tx_response *txrsp; | |
19853 | + | |
19854 | + txrsp = RING_GET_RESPONSE(&np->tx, cons); | |
19855 | + if (txrsp->status == NETIF_RSP_NULL) | |
19856 | + continue; | |
19857 | + | |
19858 | + id = txrsp->id; | |
19859 | + skb = np->tx_skbs[id]; | |
19860 | + if (unlikely(gnttab_query_foreign_access( | |
19861 | + np->grant_tx_ref[id]) != 0)) { | |
19862 | + printk(KERN_ALERT "network_tx_buf_gc: warning " | |
19863 | + "-- grant still in use by backend " | |
19864 | + "domain.\n"); | |
19865 | + BUG(); | |
19866 | + } | |
19867 | + gnttab_end_foreign_access_ref(np->grant_tx_ref[id]); | |
19868 | + gnttab_release_grant_reference( | |
19869 | + &np->gref_tx_head, np->grant_tx_ref[id]); | |
19870 | + np->grant_tx_ref[id] = GRANT_INVALID_REF; | |
19871 | + add_id_to_freelist(np->tx_skbs, id); | |
19872 | + dev_kfree_skb_irq(skb); | |
19873 | + } | |
19874 | + | |
19875 | + np->tx.rsp_cons = prod; | |
19876 | + | |
19877 | + /* | |
19878 | + * Set a new event, then check for race with update of tx_cons. | |
19879 | + * Note that it is essential to schedule a callback, no matter | |
19880 | + * how few buffers are pending. Even if there is space in the | |
19881 | + * transmit ring, higher layers may be blocked because too much | |
19882 | + * data is outstanding: in such cases notification from Xen is | |
19883 | + * likely to be the only kick that we'll get. | |
19884 | + */ | |
19885 | + np->tx.sring->rsp_event = | |
19886 | + prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; | |
19887 | + mb(); | |
19888 | + } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); | |
19889 | + | |
19890 | + network_maybe_wake_tx(dev); | |
19891 | +} | |
19892 | + | |
19893 | +static void rx_refill_timeout(unsigned long data) | |
19894 | +{ | |
19895 | + struct net_device *dev = (struct net_device *)data; | |
19896 | + struct netfront_info *np = netdev_priv(dev); | |
19897 | + | |
19898 | + netfront_accelerator_call_stop_napi_irq(np, dev); | |
19899 | + | |
19900 | + netif_rx_schedule(dev); | |
19901 | +} | |
19902 | + | |
19903 | +static void network_alloc_rx_buffers(struct net_device *dev) | |
19904 | +{ | |
19905 | + unsigned short id; | |
19906 | + struct netfront_info *np = netdev_priv(dev); | |
19907 | + struct sk_buff *skb; | |
19908 | + struct page *page; | |
19909 | + int i, batch_target, notify; | |
19910 | + RING_IDX req_prod = np->rx.req_prod_pvt; | |
19911 | + struct xen_memory_reservation reservation; | |
19912 | + grant_ref_t ref; | |
19913 | + unsigned long pfn; | |
19914 | + void *vaddr; | |
19915 | + int nr_flips; | |
19916 | + netif_rx_request_t *req; | |
19917 | + | |
19918 | + if (unlikely(!netfront_carrier_ok(np))) | |
19919 | + return; | |
19920 | + | |
19921 | + /* | |
19922 | + * Allocate skbuffs greedily, even though we batch updates to the | |
19923 | + * receive ring. This creates a less bursty demand on the memory | |
19924 | + * allocator, so should reduce the chance of failed allocation requests | |
19925 | + * both for ourself and for other kernel subsystems. | |
19926 | + */ | |
19927 | + batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); | |
19928 | + for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { | |
19929 | + /* | |
19930 | + * Allocate an skb and a page. Do not use __dev_alloc_skb as | |
19931 | + * that will allocate page-sized buffers which is not | |
19932 | + * necessary here. | |
19933 | + * 16 bytes added as necessary headroom for netif_receive_skb. | |
19934 | + */ | |
19935 | + skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN, | |
19936 | + GFP_ATOMIC | __GFP_NOWARN); | |
19937 | + if (unlikely(!skb)) | |
19938 | + goto no_skb; | |
19939 | + | |
19940 | + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); | |
19941 | + if (!page) { | |
19942 | + kfree_skb(skb); | |
19943 | +no_skb: | |
19944 | + /* Any skbuffs queued for refill? Force them out. */ | |
19945 | + if (i != 0) | |
19946 | + goto refill; | |
19947 | + /* Could not allocate any skbuffs. Try again later. */ | |
19948 | + mod_timer(&np->rx_refill_timer, | |
19949 | + jiffies + (HZ/10)); | |
19950 | + break; | |
19951 | + } | |
19952 | + | |
19953 | + skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */ | |
19954 | + skb_shinfo(skb)->frags[0].page = page; | |
19955 | + skb_shinfo(skb)->nr_frags = 1; | |
19956 | + __skb_queue_tail(&np->rx_batch, skb); | |
19957 | + } | |
19958 | + | |
19959 | + /* Is the batch large enough to be worthwhile? */ | |
19960 | + if (i < (np->rx_target/2)) { | |
19961 | + if (req_prod > np->rx.sring->req_prod) | |
19962 | + goto push; | |
19963 | + return; | |
19964 | + } | |
19965 | + | |
19966 | + /* Adjust our fill target if we risked running out of buffers. */ | |
19967 | + if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && | |
19968 | + ((np->rx_target *= 2) > np->rx_max_target)) | |
19969 | + np->rx_target = np->rx_max_target; | |
19970 | + | |
19971 | + refill: | |
19972 | + for (nr_flips = i = 0; ; i++) { | |
19973 | + if ((skb = __skb_dequeue(&np->rx_batch)) == NULL) | |
19974 | + break; | |
19975 | + | |
19976 | + skb->dev = dev; | |
19977 | + | |
19978 | + id = xennet_rxidx(req_prod + i); | |
19979 | + | |
19980 | + BUG_ON(np->rx_skbs[id]); | |
19981 | + np->rx_skbs[id] = skb; | |
19982 | + | |
19983 | + ref = gnttab_claim_grant_reference(&np->gref_rx_head); | |
19984 | + BUG_ON((signed short)ref < 0); | |
19985 | + np->grant_rx_ref[id] = ref; | |
19986 | + | |
19987 | + pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); | |
19988 | + vaddr = page_address(skb_shinfo(skb)->frags[0].page); | |
19989 | + | |
19990 | + req = RING_GET_REQUEST(&np->rx, req_prod + i); | |
19991 | + if (!np->copying_receiver) { | |
19992 | + gnttab_grant_foreign_transfer_ref(ref, | |
19993 | + np->xbdev->otherend_id, | |
19994 | + pfn); | |
19995 | + np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn); | |
19996 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
19997 | + /* Remove this page before passing | |
19998 | + * back to Xen. */ | |
19999 | + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | |
20000 | + MULTI_update_va_mapping(np->rx_mcl+i, | |
20001 | + (unsigned long)vaddr, | |
20002 | + __pte(0), 0); | |
20003 | + } | |
20004 | + nr_flips++; | |
20005 | + } else { | |
20006 | + gnttab_grant_foreign_access_ref(ref, | |
20007 | + np->xbdev->otherend_id, | |
20008 | + pfn_to_mfn(pfn), | |
20009 | + 0); | |
20010 | + } | |
20011 | + | |
20012 | + req->id = id; | |
20013 | + req->gref = ref; | |
20014 | + } | |
20015 | + | |
20016 | + if ( nr_flips != 0 ) { | |
20017 | + /* Tell the ballon driver what is going on. */ | |
20018 | + balloon_update_driver_allowance(i); | |
20019 | + | |
20020 | + set_xen_guest_handle(reservation.extent_start, | |
20021 | + np->rx_pfn_array); | |
20022 | + reservation.nr_extents = nr_flips; | |
20023 | + reservation.extent_order = 0; | |
20024 | + reservation.address_bits = 0; | |
20025 | + reservation.domid = DOMID_SELF; | |
20026 | + | |
20027 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
20028 | + /* After all PTEs have been zapped, flush the TLB. */ | |
20029 | + np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = | |
20030 | + UVMF_TLB_FLUSH|UVMF_ALL; | |
20031 | + | |
20032 | + /* Give away a batch of pages. */ | |
20033 | + np->rx_mcl[i].op = __HYPERVISOR_memory_op; | |
20034 | + np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; | |
20035 | + np->rx_mcl[i].args[1] = (unsigned long)&reservation; | |
20036 | + | |
20037 | + /* Zap PTEs and give away pages in one big | |
20038 | + * multicall. */ | |
20039 | + if (unlikely(HYPERVISOR_multicall(np->rx_mcl, i+1))) | |
20040 | + BUG(); | |
20041 | + | |
20042 | + /* Check return status of HYPERVISOR_memory_op(). */ | |
20043 | + if (unlikely(np->rx_mcl[i].result != i)) | |
20044 | + panic("Unable to reduce memory reservation\n"); | |
20045 | + while (nr_flips--) | |
20046 | + BUG_ON(np->rx_mcl[nr_flips].result); | |
20047 | + } else { | |
20048 | + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, | |
20049 | + &reservation) != i) | |
20050 | + panic("Unable to reduce memory reservation\n"); | |
20051 | + } | |
20052 | + } else { | |
20053 | + wmb(); | |
20054 | + } | |
20055 | + | |
20056 | + /* Above is a suitable barrier to ensure backend will see requests. */ | |
20057 | + np->rx.req_prod_pvt = req_prod + i; | |
20058 | + push: | |
20059 | + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); | |
20060 | + if (notify) | |
20061 | + notify_remote_via_irq(np->irq); | |
20062 | +} | |
20063 | + | |
20064 | +static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, | |
20065 | + struct netif_tx_request *tx) | |
20066 | +{ | |
20067 | + struct netfront_info *np = netdev_priv(dev); | |
20068 | + char *data = skb->data; | |
20069 | + unsigned long mfn; | |
20070 | + RING_IDX prod = np->tx.req_prod_pvt; | |
20071 | + int frags = skb_shinfo(skb)->nr_frags; | |
20072 | + unsigned int offset = offset_in_page(data); | |
20073 | + unsigned int len = skb_headlen(skb); | |
20074 | + unsigned int id; | |
20075 | + grant_ref_t ref; | |
20076 | + int i; | |
20077 | + | |
20078 | + while (len > PAGE_SIZE - offset) { | |
20079 | + tx->size = PAGE_SIZE - offset; | |
20080 | + tx->flags |= NETTXF_more_data; | |
20081 | + len -= tx->size; | |
20082 | + data += tx->size; | |
20083 | + offset = 0; | |
20084 | + | |
20085 | + id = get_id_from_freelist(np->tx_skbs); | |
20086 | + np->tx_skbs[id] = skb_get(skb); | |
20087 | + tx = RING_GET_REQUEST(&np->tx, prod++); | |
20088 | + tx->id = id; | |
20089 | + ref = gnttab_claim_grant_reference(&np->gref_tx_head); | |
20090 | + BUG_ON((signed short)ref < 0); | |
20091 | + | |
20092 | + mfn = virt_to_mfn(data); | |
20093 | + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, | |
20094 | + mfn, GTF_readonly); | |
20095 | + | |
20096 | + tx->gref = np->grant_tx_ref[id] = ref; | |
20097 | + tx->offset = offset; | |
20098 | + tx->size = len; | |
20099 | + tx->flags = 0; | |
20100 | + } | |
20101 | + | |
20102 | + for (i = 0; i < frags; i++) { | |
20103 | + skb_frag_t *frag = skb_shinfo(skb)->frags + i; | |
20104 | + | |
20105 | + tx->flags |= NETTXF_more_data; | |
20106 | + | |
20107 | + id = get_id_from_freelist(np->tx_skbs); | |
20108 | + np->tx_skbs[id] = skb_get(skb); | |
20109 | + tx = RING_GET_REQUEST(&np->tx, prod++); | |
20110 | + tx->id = id; | |
20111 | + ref = gnttab_claim_grant_reference(&np->gref_tx_head); | |
20112 | + BUG_ON((signed short)ref < 0); | |
20113 | + | |
20114 | + mfn = pfn_to_mfn(page_to_pfn(frag->page)); | |
20115 | + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, | |
20116 | + mfn, GTF_readonly); | |
20117 | + | |
20118 | + tx->gref = np->grant_tx_ref[id] = ref; | |
20119 | + tx->offset = frag->page_offset; | |
20120 | + tx->size = frag->size; | |
20121 | + tx->flags = 0; | |
20122 | + } | |
20123 | + | |
20124 | + np->tx.req_prod_pvt = prod; | |
20125 | +} | |
20126 | + | |
20127 | +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) | |
20128 | +{ | |
20129 | + unsigned short id; | |
20130 | + struct netfront_info *np = netdev_priv(dev); | |
20131 | + struct netif_tx_request *tx; | |
20132 | + struct netif_extra_info *extra; | |
20133 | + char *data = skb->data; | |
20134 | + RING_IDX i; | |
20135 | + grant_ref_t ref; | |
20136 | + unsigned long mfn; | |
20137 | + int notify; | |
20138 | + int frags = skb_shinfo(skb)->nr_frags; | |
20139 | + unsigned int offset = offset_in_page(data); | |
20140 | + unsigned int len = skb_headlen(skb); | |
20141 | + | |
20142 | + /* Check the fast path, if hooks are available */ | |
20143 | + if (np->accel_vif_state.hooks && | |
20144 | + np->accel_vif_state.hooks->start_xmit(skb, dev)) { | |
20145 | + /* Fast path has sent this packet */ | |
20146 | + return 0; | |
20147 | + } | |
20148 | + | |
20149 | + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; | |
20150 | + if (unlikely(frags > MAX_SKB_FRAGS + 1)) { | |
20151 | + printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", | |
20152 | + frags); | |
20153 | + dump_stack(); | |
20154 | + goto drop; | |
20155 | + } | |
20156 | + | |
20157 | + spin_lock_irq(&np->tx_lock); | |
20158 | + | |
20159 | + if (unlikely(!netfront_carrier_ok(np) || | |
20160 | + (frags > 1 && !xennet_can_sg(dev)) || | |
20161 | + netif_needs_gso(dev, skb))) { | |
20162 | + spin_unlock_irq(&np->tx_lock); | |
20163 | + goto drop; | |
20164 | + } | |
20165 | + | |
20166 | + i = np->tx.req_prod_pvt; | |
20167 | + | |
20168 | + id = get_id_from_freelist(np->tx_skbs); | |
20169 | + np->tx_skbs[id] = skb; | |
20170 | + | |
20171 | + tx = RING_GET_REQUEST(&np->tx, i); | |
20172 | + | |
20173 | + tx->id = id; | |
20174 | + ref = gnttab_claim_grant_reference(&np->gref_tx_head); | |
20175 | + BUG_ON((signed short)ref < 0); | |
20176 | + mfn = virt_to_mfn(data); | |
20177 | + gnttab_grant_foreign_access_ref( | |
20178 | + ref, np->xbdev->otherend_id, mfn, GTF_readonly); | |
20179 | + tx->gref = np->grant_tx_ref[id] = ref; | |
20180 | + tx->offset = offset; | |
20181 | + tx->size = len; | |
20182 | + | |
20183 | + tx->flags = 0; | |
20184 | + extra = NULL; | |
20185 | + | |
20186 | + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ | |
20187 | + tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; | |
20188 | +#ifdef CONFIG_XEN | |
20189 | + if (skb->proto_data_valid) /* remote but checksummed? */ | |
20190 | + tx->flags |= NETTXF_data_validated; | |
20191 | +#endif | |
20192 | + | |
20193 | +#if HAVE_TSO | |
20194 | + if (skb_shinfo(skb)->gso_size) { | |
20195 | + struct netif_extra_info *gso = (struct netif_extra_info *) | |
20196 | + RING_GET_REQUEST(&np->tx, ++i); | |
20197 | + | |
20198 | + if (extra) | |
20199 | + extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; | |
20200 | + else | |
20201 | + tx->flags |= NETTXF_extra_info; | |
20202 | + | |
20203 | + gso->u.gso.size = skb_shinfo(skb)->gso_size; | |
20204 | + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; | |
20205 | + gso->u.gso.pad = 0; | |
20206 | + gso->u.gso.features = 0; | |
20207 | + | |
20208 | + gso->type = XEN_NETIF_EXTRA_TYPE_GSO; | |
20209 | + gso->flags = 0; | |
20210 | + extra = gso; | |
20211 | + } | |
20212 | +#endif | |
20213 | + | |
20214 | + np->tx.req_prod_pvt = i + 1; | |
20215 | + | |
20216 | + xennet_make_frags(skb, dev, tx); | |
20217 | + tx->size = skb->len; | |
20218 | + | |
20219 | + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); | |
20220 | + if (notify) | |
20221 | + notify_remote_via_irq(np->irq); | |
20222 | + | |
20223 | + np->stats.tx_bytes += skb->len; | |
20224 | + np->stats.tx_packets++; | |
20225 | + dev->trans_start = jiffies; | |
20226 | + | |
20227 | + /* Note: It is not safe to access skb after network_tx_buf_gc()! */ | |
20228 | + network_tx_buf_gc(dev); | |
20229 | + | |
20230 | + if (!netfront_tx_slot_available(np)) | |
20231 | + netif_stop_queue(dev); | |
20232 | + | |
20233 | + spin_unlock_irq(&np->tx_lock); | |
20234 | + | |
20235 | + return 0; | |
20236 | + | |
20237 | + drop: | |
20238 | + np->stats.tx_dropped++; | |
20239 | + dev_kfree_skb(skb); | |
20240 | + return 0; | |
20241 | +} | |
20242 | + | |
20243 | +static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs) | |
20244 | +{ | |
20245 | + struct net_device *dev = dev_id; | |
20246 | + struct netfront_info *np = netdev_priv(dev); | |
20247 | + unsigned long flags; | |
20248 | + | |
20249 | + spin_lock_irqsave(&np->tx_lock, flags); | |
20250 | + | |
20251 | + if (likely(netfront_carrier_ok(np))) { | |
20252 | + network_tx_buf_gc(dev); | |
20253 | + /* Under tx_lock: protects access to rx shared-ring indexes. */ | |
20254 | + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { | |
20255 | + netfront_accelerator_call_stop_napi_irq(np, dev); | |
20256 | + | |
20257 | + netif_rx_schedule(dev); | |
20258 | + dev->last_rx = jiffies; | |
20259 | + } | |
20260 | + } | |
20261 | + | |
20262 | + spin_unlock_irqrestore(&np->tx_lock, flags); | |
20263 | + | |
20264 | + return IRQ_HANDLED; | |
20265 | +} | |
20266 | + | |
20267 | +static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, | |
20268 | + grant_ref_t ref) | |
20269 | +{ | |
20270 | + int new = xennet_rxidx(np->rx.req_prod_pvt); | |
20271 | + | |
20272 | + BUG_ON(np->rx_skbs[new]); | |
20273 | + np->rx_skbs[new] = skb; | |
20274 | + np->grant_rx_ref[new] = ref; | |
20275 | + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; | |
20276 | + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; | |
20277 | + np->rx.req_prod_pvt++; | |
20278 | +} | |
20279 | + | |
20280 | +int xennet_get_extras(struct netfront_info *np, | |
20281 | + struct netif_extra_info *extras, RING_IDX rp) | |
20282 | + | |
20283 | +{ | |
20284 | + struct netif_extra_info *extra; | |
20285 | + RING_IDX cons = np->rx.rsp_cons; | |
20286 | + int err = 0; | |
20287 | + | |
20288 | + do { | |
20289 | + struct sk_buff *skb; | |
20290 | + grant_ref_t ref; | |
20291 | + | |
20292 | + if (unlikely(cons + 1 == rp)) { | |
20293 | + if (net_ratelimit()) | |
20294 | + WPRINTK("Missing extra info\n"); | |
20295 | + err = -EBADR; | |
20296 | + break; | |
20297 | + } | |
20298 | + | |
20299 | + extra = (struct netif_extra_info *) | |
20300 | + RING_GET_RESPONSE(&np->rx, ++cons); | |
20301 | + | |
20302 | + if (unlikely(!extra->type || | |
20303 | + extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { | |
20304 | + if (net_ratelimit()) | |
20305 | + WPRINTK("Invalid extra type: %d\n", | |
20306 | + extra->type); | |
20307 | + err = -EINVAL; | |
20308 | + } else { | |
20309 | + memcpy(&extras[extra->type - 1], extra, | |
20310 | + sizeof(*extra)); | |
20311 | + } | |
20312 | + | |
20313 | + skb = xennet_get_rx_skb(np, cons); | |
20314 | + ref = xennet_get_rx_ref(np, cons); | |
20315 | + xennet_move_rx_slot(np, skb, ref); | |
20316 | + } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); | |
20317 | + | |
20318 | + np->rx.rsp_cons = cons; | |
20319 | + return err; | |
20320 | +} | |
20321 | + | |
20322 | +static int xennet_get_responses(struct netfront_info *np, | |
20323 | + struct netfront_rx_info *rinfo, RING_IDX rp, | |
20324 | + struct sk_buff_head *list, | |
20325 | + int *pages_flipped_p) | |
20326 | +{ | |
20327 | + int pages_flipped = *pages_flipped_p; | |
20328 | + struct mmu_update *mmu; | |
20329 | + struct multicall_entry *mcl; | |
20330 | + struct netif_rx_response *rx = &rinfo->rx; | |
20331 | + struct netif_extra_info *extras = rinfo->extras; | |
20332 | + RING_IDX cons = np->rx.rsp_cons; | |
20333 | + struct sk_buff *skb = xennet_get_rx_skb(np, cons); | |
20334 | + grant_ref_t ref = xennet_get_rx_ref(np, cons); | |
20335 | + int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); | |
20336 | + int frags = 1; | |
20337 | + int err = 0; | |
20338 | + unsigned long ret; | |
20339 | + | |
20340 | + if (rx->flags & NETRXF_extra_info) { | |
20341 | + err = xennet_get_extras(np, extras, rp); | |
20342 | + cons = np->rx.rsp_cons; | |
20343 | + } | |
20344 | + | |
20345 | + for (;;) { | |
20346 | + unsigned long mfn; | |
20347 | + | |
20348 | + if (unlikely(rx->status < 0 || | |
20349 | + rx->offset + rx->status > PAGE_SIZE)) { | |
20350 | + if (net_ratelimit()) | |
20351 | + WPRINTK("rx->offset: %x, size: %u\n", | |
20352 | + rx->offset, rx->status); | |
20353 | + xennet_move_rx_slot(np, skb, ref); | |
20354 | + err = -EINVAL; | |
20355 | + goto next; | |
20356 | + } | |
20357 | + | |
20358 | + /* | |
20359 | + * This definitely indicates a bug, either in this driver or in | |
20360 | + * the backend driver. In future this should flag the bad | |
20361 | + * situation to the system controller to reboot the backed. | |
20362 | + */ | |
20363 | + if (ref == GRANT_INVALID_REF) { | |
20364 | + if (net_ratelimit()) | |
20365 | + WPRINTK("Bad rx response id %d.\n", rx->id); | |
20366 | + err = -EINVAL; | |
20367 | + goto next; | |
20368 | + } | |
20369 | + | |
20370 | + if (!np->copying_receiver) { | |
20371 | + /* Memory pressure, insufficient buffer | |
20372 | + * headroom, ... */ | |
20373 | + if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { | |
20374 | + if (net_ratelimit()) | |
20375 | + WPRINTK("Unfulfilled rx req " | |
20376 | + "(id=%d, st=%d).\n", | |
20377 | + rx->id, rx->status); | |
20378 | + xennet_move_rx_slot(np, skb, ref); | |
20379 | + err = -ENOMEM; | |
20380 | + goto next; | |
20381 | + } | |
20382 | + | |
20383 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
20384 | + /* Remap the page. */ | |
20385 | + struct page *page = | |
20386 | + skb_shinfo(skb)->frags[0].page; | |
20387 | + unsigned long pfn = page_to_pfn(page); | |
20388 | + void *vaddr = page_address(page); | |
20389 | + | |
20390 | + mcl = np->rx_mcl + pages_flipped; | |
20391 | + mmu = np->rx_mmu + pages_flipped; | |
20392 | + | |
20393 | + MULTI_update_va_mapping(mcl, | |
20394 | + (unsigned long)vaddr, | |
20395 | + pfn_pte_ma(mfn, | |
20396 | + PAGE_KERNEL), | |
20397 | + 0); | |
20398 | + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | |
20399 | + | MMU_MACHPHYS_UPDATE; | |
20400 | + mmu->val = pfn; | |
20401 | + | |
20402 | + set_phys_to_machine(pfn, mfn); | |
20403 | + } | |
20404 | + pages_flipped++; | |
20405 | + } else { | |
20406 | + ret = gnttab_end_foreign_access_ref(ref); | |
20407 | + BUG_ON(!ret); | |
20408 | + } | |
20409 | + | |
20410 | + gnttab_release_grant_reference(&np->gref_rx_head, ref); | |
20411 | + | |
20412 | + __skb_queue_tail(list, skb); | |
20413 | + | |
20414 | +next: | |
20415 | + if (!(rx->flags & NETRXF_more_data)) | |
20416 | + break; | |
20417 | + | |
20418 | + if (cons + frags == rp) { | |
20419 | + if (net_ratelimit()) | |
20420 | + WPRINTK("Need more frags\n"); | |
20421 | + err = -ENOENT; | |
20422 | + break; | |
20423 | + } | |
20424 | + | |
20425 | + rx = RING_GET_RESPONSE(&np->rx, cons + frags); | |
20426 | + skb = xennet_get_rx_skb(np, cons + frags); | |
20427 | + ref = xennet_get_rx_ref(np, cons + frags); | |
20428 | + frags++; | |
20429 | + } | |
20430 | + | |
20431 | + if (unlikely(frags > max)) { | |
20432 | + if (net_ratelimit()) | |
20433 | + WPRINTK("Too many frags\n"); | |
20434 | + err = -E2BIG; | |
20435 | + } | |
20436 | + | |
20437 | + if (unlikely(err)) | |
20438 | + np->rx.rsp_cons = cons + frags; | |
20439 | + | |
20440 | + *pages_flipped_p = pages_flipped; | |
20441 | + | |
20442 | + return err; | |
20443 | +} | |
20444 | + | |
20445 | +static RING_IDX xennet_fill_frags(struct netfront_info *np, | |
20446 | + struct sk_buff *skb, | |
20447 | + struct sk_buff_head *list) | |
20448 | +{ | |
20449 | + struct skb_shared_info *shinfo = skb_shinfo(skb); | |
20450 | + int nr_frags = shinfo->nr_frags; | |
20451 | + RING_IDX cons = np->rx.rsp_cons; | |
20452 | + skb_frag_t *frag = shinfo->frags + nr_frags; | |
20453 | + struct sk_buff *nskb; | |
20454 | + | |
20455 | + while ((nskb = __skb_dequeue(list))) { | |
20456 | + struct netif_rx_response *rx = | |
20457 | + RING_GET_RESPONSE(&np->rx, ++cons); | |
20458 | + | |
20459 | + frag->page = skb_shinfo(nskb)->frags[0].page; | |
20460 | + frag->page_offset = rx->offset; | |
20461 | + frag->size = rx->status; | |
20462 | + | |
20463 | + skb->data_len += rx->status; | |
20464 | + | |
20465 | + skb_shinfo(nskb)->nr_frags = 0; | |
20466 | + kfree_skb(nskb); | |
20467 | + | |
20468 | + frag++; | |
20469 | + nr_frags++; | |
20470 | + } | |
20471 | + | |
20472 | + shinfo->nr_frags = nr_frags; | |
20473 | + return cons; | |
20474 | +} | |
20475 | + | |
20476 | +static int xennet_set_skb_gso(struct sk_buff *skb, | |
20477 | + struct netif_extra_info *gso) | |
20478 | +{ | |
20479 | + if (!gso->u.gso.size) { | |
20480 | + if (net_ratelimit()) | |
20481 | + WPRINTK("GSO size must not be zero.\n"); | |
20482 | + return -EINVAL; | |
20483 | + } | |
20484 | + | |
20485 | + /* Currently only TCPv4 S.O. is supported. */ | |
20486 | + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { | |
20487 | + if (net_ratelimit()) | |
20488 | + WPRINTK("Bad GSO type %d.\n", gso->u.gso.type); | |
20489 | + return -EINVAL; | |
20490 | + } | |
20491 | + | |
20492 | +#if HAVE_TSO | |
20493 | + skb_shinfo(skb)->gso_size = gso->u.gso.size; | |
20494 | +#if HAVE_GSO | |
20495 | + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; | |
20496 | + | |
20497 | + /* Header must be checked, and gso_segs computed. */ | |
20498 | + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | |
20499 | +#endif | |
20500 | + skb_shinfo(skb)->gso_segs = 0; | |
20501 | + | |
20502 | + return 0; | |
20503 | +#else | |
20504 | + if (net_ratelimit()) | |
20505 | + WPRINTK("GSO unsupported by this kernel.\n"); | |
20506 | + return -EINVAL; | |
20507 | +#endif | |
20508 | +} | |
20509 | + | |
20510 | +static int netif_poll(struct net_device *dev, int *pbudget) | |
20511 | +{ | |
20512 | + struct netfront_info *np = netdev_priv(dev); | |
20513 | + struct sk_buff *skb; | |
20514 | + struct netfront_rx_info rinfo; | |
20515 | + struct netif_rx_response *rx = &rinfo.rx; | |
20516 | + struct netif_extra_info *extras = rinfo.extras; | |
20517 | + RING_IDX i, rp; | |
20518 | + struct multicall_entry *mcl; | |
20519 | + int work_done, budget, more_to_do = 1, accel_more_to_do = 1; | |
20520 | + struct sk_buff_head rxq; | |
20521 | + struct sk_buff_head errq; | |
20522 | + struct sk_buff_head tmpq; | |
20523 | + unsigned long flags; | |
20524 | + unsigned int len; | |
20525 | + int pages_flipped = 0; | |
20526 | + int err; | |
20527 | + | |
20528 | + spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */ | |
20529 | + | |
20530 | + if (unlikely(!netfront_carrier_ok(np))) { | |
20531 | + spin_unlock(&np->rx_lock); | |
20532 | + return 0; | |
20533 | + } | |
20534 | + | |
20535 | + skb_queue_head_init(&rxq); | |
20536 | + skb_queue_head_init(&errq); | |
20537 | + skb_queue_head_init(&tmpq); | |
20538 | + | |
20539 | + if ((budget = *pbudget) > dev->quota) | |
20540 | + budget = dev->quota; | |
20541 | + rp = np->rx.sring->rsp_prod; | |
20542 | + rmb(); /* Ensure we see queued responses up to 'rp'. */ | |
20543 | + | |
20544 | + i = np->rx.rsp_cons; | |
20545 | + work_done = 0; | |
20546 | + while ((i != rp) && (work_done < budget)) { | |
20547 | + memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); | |
20548 | + memset(extras, 0, sizeof(rinfo.extras)); | |
20549 | + | |
20550 | + err = xennet_get_responses(np, &rinfo, rp, &tmpq, | |
20551 | + &pages_flipped); | |
20552 | + | |
20553 | + if (unlikely(err)) { | |
20554 | +err: | |
20555 | + while ((skb = __skb_dequeue(&tmpq))) | |
20556 | + __skb_queue_tail(&errq, skb); | |
20557 | + np->stats.rx_errors++; | |
20558 | + i = np->rx.rsp_cons; | |
20559 | + continue; | |
20560 | + } | |
20561 | + | |
20562 | + skb = __skb_dequeue(&tmpq); | |
20563 | + | |
20564 | + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { | |
20565 | + struct netif_extra_info *gso; | |
20566 | + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; | |
20567 | + | |
20568 | + if (unlikely(xennet_set_skb_gso(skb, gso))) { | |
20569 | + __skb_queue_head(&tmpq, skb); | |
20570 | + np->rx.rsp_cons += skb_queue_len(&tmpq); | |
20571 | + goto err; | |
20572 | + } | |
20573 | + } | |
20574 | + | |
20575 | + NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page; | |
20576 | + NETFRONT_SKB_CB(skb)->offset = rx->offset; | |
20577 | + | |
20578 | + len = rx->status; | |
20579 | + if (len > RX_COPY_THRESHOLD) | |
20580 | + len = RX_COPY_THRESHOLD; | |
20581 | + skb_put(skb, len); | |
20582 | + | |
20583 | + if (rx->status > len) { | |
20584 | + skb_shinfo(skb)->frags[0].page_offset = | |
20585 | + rx->offset + len; | |
20586 | + skb_shinfo(skb)->frags[0].size = rx->status - len; | |
20587 | + skb->data_len = rx->status - len; | |
20588 | + } else { | |
20589 | + skb_shinfo(skb)->frags[0].page = NULL; | |
20590 | + skb_shinfo(skb)->nr_frags = 0; | |
20591 | + } | |
20592 | + | |
20593 | + i = xennet_fill_frags(np, skb, &tmpq); | |
20594 | + | |
20595 | + /* | |
20596 | + * Truesize must approximates the size of true data plus | |
20597 | + * any supervisor overheads. Adding hypervisor overheads | |
20598 | + * has been shown to significantly reduce achievable | |
20599 | + * bandwidth with the default receive buffer size. It is | |
20600 | + * therefore not wise to account for it here. | |
20601 | + * | |
20602 | + * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to | |
20603 | + * RX_COPY_THRESHOLD + the supervisor overheads. Here, we | |
20604 | + * add the size of the data pulled in xennet_fill_frags(). | |
20605 | + * | |
20606 | + * We also adjust for any unused space in the main data | |
20607 | + * area by subtracting (RX_COPY_THRESHOLD - len). This is | |
20608 | + * especially important with drivers which split incoming | |
20609 | + * packets into header and data, using only 66 bytes of | |
20610 | + * the main data area (see the e1000 driver for example.) | |
20611 | + * On such systems, without this last adjustement, our | |
20612 | + * achievable receive throughout using the standard receive | |
20613 | + * buffer size was cut by 25%(!!!). | |
20614 | + */ | |
20615 | + skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); | |
20616 | + skb->len += skb->data_len; | |
20617 | + | |
20618 | + /* | |
20619 | + * Old backends do not assert data_validated but we | |
20620 | + * can infer it from csum_blank so test both flags. | |
20621 | + */ | |
20622 | + if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) | |
20623 | + skb->ip_summed = CHECKSUM_UNNECESSARY; | |
20624 | + else | |
20625 | + skb->ip_summed = CHECKSUM_NONE; | |
20626 | +#ifdef CONFIG_XEN | |
20627 | + skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE); | |
20628 | + skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank); | |
20629 | +#endif | |
20630 | + np->stats.rx_packets++; | |
20631 | + np->stats.rx_bytes += skb->len; | |
20632 | + | |
20633 | + __skb_queue_tail(&rxq, skb); | |
20634 | + | |
20635 | + np->rx.rsp_cons = ++i; | |
20636 | + work_done++; | |
20637 | + } | |
20638 | + | |
20639 | + if (pages_flipped) { | |
20640 | + /* Some pages are no longer absent... */ | |
20641 | + balloon_update_driver_allowance(-pages_flipped); | |
20642 | + | |
20643 | + /* Do all the remapping work and M2P updates. */ | |
20644 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
20645 | + mcl = np->rx_mcl + pages_flipped; | |
20646 | + mcl->op = __HYPERVISOR_mmu_update; | |
20647 | + mcl->args[0] = (unsigned long)np->rx_mmu; | |
20648 | + mcl->args[1] = pages_flipped; | |
20649 | + mcl->args[2] = 0; | |
20650 | + mcl->args[3] = DOMID_SELF; | |
20651 | + err = HYPERVISOR_multicall_check(np->rx_mcl, | |
20652 | + pages_flipped + 1, | |
20653 | + NULL); | |
20654 | + BUG_ON(err); | |
20655 | + } | |
20656 | + } | |
20657 | + | |
20658 | + while ((skb = __skb_dequeue(&errq))) | |
20659 | + kfree_skb(skb); | |
20660 | + | |
20661 | + while ((skb = __skb_dequeue(&rxq)) != NULL) { | |
20662 | + struct page *page = NETFRONT_SKB_CB(skb)->page; | |
20663 | + void *vaddr = page_address(page); | |
20664 | + unsigned offset = NETFRONT_SKB_CB(skb)->offset; | |
20665 | + | |
20666 | + memcpy(skb->data, vaddr + offset, skb_headlen(skb)); | |
20667 | + | |
20668 | + if (page != skb_shinfo(skb)->frags[0].page) | |
20669 | + __free_page(page); | |
20670 | + | |
20671 | + /* Ethernet work: Delayed to here as it peeks the header. */ | |
20672 | + skb->protocol = eth_type_trans(skb, dev); | |
20673 | + | |
20674 | + /* Pass it up. */ | |
20675 | + netif_receive_skb(skb); | |
20676 | + dev->last_rx = jiffies; | |
20677 | + } | |
20678 | + | |
20679 | + /* If we get a callback with very few responses, reduce fill target. */ | |
20680 | + /* NB. Note exponential increase, linear decrease. */ | |
20681 | + if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > | |
20682 | + ((3*np->rx_target) / 4)) && | |
20683 | + (--np->rx_target < np->rx_min_target)) | |
20684 | + np->rx_target = np->rx_min_target; | |
20685 | + | |
20686 | + network_alloc_rx_buffers(dev); | |
20687 | + | |
20688 | + if (work_done < budget) { | |
20689 | + /* there's some spare capacity, try the accelerated path */ | |
20690 | + int accel_budget = budget - work_done; | |
20691 | + int accel_budget_start = accel_budget; | |
20692 | + | |
20693 | + if (np->accel_vif_state.hooks) { | |
20694 | + accel_more_to_do = | |
20695 | + np->accel_vif_state.hooks->netdev_poll | |
20696 | + (dev, &accel_budget); | |
20697 | + work_done += (accel_budget_start - accel_budget); | |
20698 | + } else | |
20699 | + accel_more_to_do = 0; | |
20700 | + } | |
20701 | + | |
20702 | + *pbudget -= work_done; | |
20703 | + dev->quota -= work_done; | |
20704 | + | |
20705 | + if (work_done < budget) { | |
20706 | + local_irq_save(flags); | |
20707 | + | |
20708 | + RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); | |
20709 | + | |
20710 | + if (!more_to_do && !accel_more_to_do && | |
20711 | + np->accel_vif_state.hooks) { | |
20712 | + /* | |
20713 | + * Slow path has nothing more to do, see if | |
20714 | + * fast path is likewise | |
20715 | + */ | |
20716 | + accel_more_to_do = | |
20717 | + np->accel_vif_state.hooks->start_napi_irq(dev); | |
20718 | + } | |
20719 | + | |
20720 | + if (!more_to_do && !accel_more_to_do) | |
20721 | + __netif_rx_complete(dev); | |
20722 | + | |
20723 | + local_irq_restore(flags); | |
20724 | + } | |
20725 | + | |
20726 | + spin_unlock(&np->rx_lock); | |
20727 | + | |
20728 | + return more_to_do | accel_more_to_do; | |
20729 | +} | |
20730 | + | |
20731 | +static void netif_release_tx_bufs(struct netfront_info *np) | |
20732 | +{ | |
20733 | + struct sk_buff *skb; | |
20734 | + int i; | |
20735 | + | |
20736 | + for (i = 1; i <= NET_TX_RING_SIZE; i++) { | |
20737 | + if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET) | |
20738 | + continue; | |
20739 | + | |
20740 | + skb = np->tx_skbs[i]; | |
20741 | + gnttab_end_foreign_access_ref(np->grant_tx_ref[i]); | |
20742 | + gnttab_release_grant_reference( | |
20743 | + &np->gref_tx_head, np->grant_tx_ref[i]); | |
20744 | + np->grant_tx_ref[i] = GRANT_INVALID_REF; | |
20745 | + add_id_to_freelist(np->tx_skbs, i); | |
20746 | + dev_kfree_skb_irq(skb); | |
20747 | + } | |
20748 | +} | |
20749 | + | |
20750 | +static void netif_release_rx_bufs_flip(struct netfront_info *np) | |
20751 | +{ | |
20752 | + struct mmu_update *mmu = np->rx_mmu; | |
20753 | + struct multicall_entry *mcl = np->rx_mcl; | |
20754 | + struct sk_buff_head free_list; | |
20755 | + struct sk_buff *skb; | |
20756 | + unsigned long mfn; | |
20757 | + int xfer = 0, noxfer = 0, unused = 0; | |
20758 | + int id, ref, rc; | |
20759 | + | |
20760 | + skb_queue_head_init(&free_list); | |
20761 | + | |
20762 | + spin_lock_bh(&np->rx_lock); | |
20763 | + | |
20764 | + for (id = 0; id < NET_RX_RING_SIZE; id++) { | |
20765 | + if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) { | |
20766 | + unused++; | |
20767 | + continue; | |
20768 | + } | |
20769 | + | |
20770 | + skb = np->rx_skbs[id]; | |
20771 | + mfn = gnttab_end_foreign_transfer_ref(ref); | |
20772 | + gnttab_release_grant_reference(&np->gref_rx_head, ref); | |
20773 | + np->grant_rx_ref[id] = GRANT_INVALID_REF; | |
20774 | + add_id_to_freelist(np->rx_skbs, id); | |
20775 | + | |
20776 | + if (0 == mfn) { | |
20777 | + struct page *page = skb_shinfo(skb)->frags[0].page; | |
20778 | + balloon_release_driver_page(page); | |
20779 | + skb_shinfo(skb)->nr_frags = 0; | |
20780 | + dev_kfree_skb(skb); | |
20781 | + noxfer++; | |
20782 | + continue; | |
20783 | + } | |
20784 | + | |
20785 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
20786 | + /* Remap the page. */ | |
20787 | + struct page *page = skb_shinfo(skb)->frags[0].page; | |
20788 | + unsigned long pfn = page_to_pfn(page); | |
20789 | + void *vaddr = page_address(page); | |
20790 | + | |
20791 | + MULTI_update_va_mapping(mcl, (unsigned long)vaddr, | |
20792 | + pfn_pte_ma(mfn, PAGE_KERNEL), | |
20793 | + 0); | |
20794 | + mcl++; | |
20795 | + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | |
20796 | + | MMU_MACHPHYS_UPDATE; | |
20797 | + mmu->val = pfn; | |
20798 | + mmu++; | |
20799 | + | |
20800 | + set_phys_to_machine(pfn, mfn); | |
20801 | + } | |
20802 | + __skb_queue_tail(&free_list, skb); | |
20803 | + xfer++; | |
20804 | + } | |
20805 | + | |
20806 | + DPRINTK("%s: %d xfer, %d noxfer, %d unused\n", | |
20807 | + __FUNCTION__, xfer, noxfer, unused); | |
20808 | + | |
20809 | + if (xfer) { | |
20810 | + /* Some pages are no longer absent... */ | |
20811 | + balloon_update_driver_allowance(-xfer); | |
20812 | + | |
20813 | + if (!xen_feature(XENFEAT_auto_translated_physmap)) { | |
20814 | + /* Do all the remapping work and M2P updates. */ | |
20815 | + mcl->op = __HYPERVISOR_mmu_update; | |
20816 | + mcl->args[0] = (unsigned long)np->rx_mmu; | |
20817 | + mcl->args[1] = mmu - np->rx_mmu; | |
20818 | + mcl->args[2] = 0; | |
20819 | + mcl->args[3] = DOMID_SELF; | |
20820 | + mcl++; | |
20821 | + rc = HYPERVISOR_multicall_check( | |
20822 | + np->rx_mcl, mcl - np->rx_mcl, NULL); | |
20823 | + BUG_ON(rc); | |
20824 | + } | |
20825 | + } | |
20826 | + | |
20827 | + while ((skb = __skb_dequeue(&free_list)) != NULL) | |
20828 | + dev_kfree_skb(skb); | |
20829 | + | |
20830 | + spin_unlock_bh(&np->rx_lock); | |
20831 | +} | |
20832 | + | |
20833 | +static void netif_release_rx_bufs_copy(struct netfront_info *np) | |
20834 | +{ | |
20835 | + struct sk_buff *skb; | |
20836 | + int i, ref; | |
20837 | + int busy = 0, inuse = 0; | |
20838 | + | |
20839 | + spin_lock_bh(&np->rx_lock); | |
20840 | + | |
20841 | + for (i = 0; i < NET_RX_RING_SIZE; i++) { | |
20842 | + ref = np->grant_rx_ref[i]; | |
20843 | + | |
20844 | + if (ref == GRANT_INVALID_REF) | |
20845 | + continue; | |
20846 | + | |
20847 | + inuse++; | |
20848 | + | |
20849 | + skb = np->rx_skbs[i]; | |
20850 | + | |
20851 | + if (!gnttab_end_foreign_access_ref(ref)) | |
20852 | + { | |
20853 | + busy++; | |
20854 | + continue; | |
20855 | + } | |
20856 | + | |
20857 | + gnttab_release_grant_reference(&np->gref_rx_head, ref); | |
20858 | + np->grant_rx_ref[i] = GRANT_INVALID_REF; | |
20859 | + add_id_to_freelist(np->rx_skbs, i); | |
20860 | + | |
20861 | + dev_kfree_skb(skb); | |
20862 | + } | |
20863 | + | |
20864 | + if (busy) | |
20865 | + DPRINTK("%s: Unable to release %d of %d inuse grant references out of %ld total.\n", | |
20866 | + __FUNCTION__, busy, inuse, NET_RX_RING_SIZE); | |
20867 | + | |
20868 | + spin_unlock_bh(&np->rx_lock); | |
20869 | +} | |
20870 | + | |
20871 | +static int network_close(struct net_device *dev) | |
20872 | +{ | |
20873 | + struct netfront_info *np = netdev_priv(dev); | |
20874 | + netif_stop_queue(np->netdev); | |
20875 | + return 0; | |
20876 | +} | |
20877 | + | |
20878 | + | |
20879 | +static struct net_device_stats *network_get_stats(struct net_device *dev) | |
20880 | +{ | |
20881 | + struct netfront_info *np = netdev_priv(dev); | |
20882 | + | |
20883 | + netfront_accelerator_call_get_stats(np, dev); | |
20884 | + return &np->stats; | |
20885 | +} | |
20886 | + | |
20887 | +static int xennet_set_mac_address(struct net_device *dev, void *p) | |
20888 | +{ | |
20889 | + struct netfront_info *np = netdev_priv(dev); | |
20890 | + struct sockaddr *addr = p; | |
20891 | + | |
20892 | + if (netif_running(dev)) | |
20893 | + return -EBUSY; | |
20894 | + | |
20895 | + if (!is_valid_ether_addr(addr->sa_data)) | |
20896 | + return -EADDRNOTAVAIL; | |
20897 | + | |
20898 | + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); | |
20899 | + memcpy(np->mac, addr->sa_data, ETH_ALEN); | |
20900 | + | |
20901 | + return 0; | |
20902 | +} | |
20903 | + | |
20904 | +static int xennet_change_mtu(struct net_device *dev, int mtu) | |
20905 | +{ | |
20906 | + int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; | |
20907 | + | |
20908 | + if (mtu > max) | |
20909 | + return -EINVAL; | |
20910 | + dev->mtu = mtu; | |
20911 | + return 0; | |
20912 | +} | |
20913 | + | |
20914 | +static int xennet_set_sg(struct net_device *dev, u32 data) | |
20915 | +{ | |
20916 | + if (data) { | |
20917 | + struct netfront_info *np = netdev_priv(dev); | |
20918 | + int val; | |
20919 | + | |
20920 | + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", | |
20921 | + "%d", &val) < 0) | |
20922 | + val = 0; | |
20923 | + if (!val) | |
20924 | + return -ENOSYS; | |
20925 | + } else if (dev->mtu > ETH_DATA_LEN) | |
20926 | + dev->mtu = ETH_DATA_LEN; | |
20927 | + | |
20928 | + return ethtool_op_set_sg(dev, data); | |
20929 | +} | |
20930 | + | |
20931 | +static int xennet_set_tso(struct net_device *dev, u32 data) | |
20932 | +{ | |
20933 | + if (data) { | |
20934 | + struct netfront_info *np = netdev_priv(dev); | |
20935 | + int val; | |
20936 | + | |
20937 | + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, | |
20938 | + "feature-gso-tcpv4", "%d", &val) < 0) | |
20939 | + val = 0; | |
20940 | + if (!val) | |
20941 | + return -ENOSYS; | |
20942 | + } | |
20943 | + | |
20944 | + return ethtool_op_set_tso(dev, data); | |
20945 | +} | |
20946 | + | |
20947 | +static void xennet_set_features(struct net_device *dev) | |
20948 | +{ | |
20949 | + dev_disable_gso_features(dev); | |
20950 | + xennet_set_sg(dev, 0); | |
20951 | + | |
20952 | + /* We need checksum offload to enable scatter/gather and TSO. */ | |
20953 | + if (!(dev->features & NETIF_F_IP_CSUM)) | |
20954 | + return; | |
20955 | + | |
20956 | + if (xennet_set_sg(dev, 1)) | |
20957 | + return; | |
20958 | + | |
20959 | + /* Before 2.6.9 TSO seems to be unreliable so do not enable it | |
20960 | + * on older kernels. | |
20961 | + */ | |
20962 | + if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)) | |
20963 | + xennet_set_tso(dev, 1); | |
20964 | +} | |
20965 | + | |
20966 | +static int network_connect(struct net_device *dev) | |
20967 | +{ | |
20968 | + struct netfront_info *np = netdev_priv(dev); | |
20969 | + int i, requeue_idx, err; | |
20970 | + struct sk_buff *skb; | |
20971 | + grant_ref_t ref; | |
20972 | + netif_rx_request_t *req; | |
20973 | + unsigned int feature_rx_copy, feature_rx_flip; | |
20974 | + | |
20975 | + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, | |
20976 | + "feature-rx-copy", "%u", &feature_rx_copy); | |
20977 | + if (err != 1) | |
20978 | + feature_rx_copy = 0; | |
20979 | + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, | |
20980 | + "feature-rx-flip", "%u", &feature_rx_flip); | |
20981 | + if (err != 1) | |
20982 | + feature_rx_flip = 1; | |
20983 | + | |
20984 | + /* | |
20985 | + * Copy packets on receive path if: | |
20986 | + * (a) This was requested by user, and the backend supports it; or | |
20987 | + * (b) Flipping was requested, but this is unsupported by the backend. | |
20988 | + */ | |
20989 | + np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) || | |
20990 | + (MODPARM_rx_flip && !feature_rx_flip)); | |
20991 | + | |
20992 | + err = talk_to_backend(np->xbdev, np); | |
20993 | + if (err) | |
20994 | + return err; | |
20995 | + | |
20996 | + xennet_set_features(dev); | |
20997 | + | |
20998 | + DPRINTK("device %s has %sing receive path.\n", | |
20999 | + dev->name, np->copying_receiver ? "copy" : "flipp"); | |
21000 | + | |
21001 | + spin_lock_bh(&np->rx_lock); | |
21002 | + spin_lock_irq(&np->tx_lock); | |
21003 | + | |
21004 | + /* | |
21005 | + * Recovery procedure: | |
21006 | + * NB. Freelist index entries are always going to be less than | |
21007 | + * PAGE_OFFSET, whereas pointers to skbs will always be equal or | |
21008 | + * greater than PAGE_OFFSET: we use this property to distinguish | |
21009 | + * them. | |
21010 | + */ | |
21011 | + | |
21012 | + /* Step 1: Discard all pending TX packet fragments. */ | |
21013 | + netif_release_tx_bufs(np); | |
21014 | + | |
21015 | + /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ | |
21016 | + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { | |
21017 | + if (!np->rx_skbs[i]) | |
21018 | + continue; | |
21019 | + | |
21020 | + skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i); | |
21021 | + ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); | |
21022 | + req = RING_GET_REQUEST(&np->rx, requeue_idx); | |
21023 | + | |
21024 | + if (!np->copying_receiver) { | |
21025 | + gnttab_grant_foreign_transfer_ref( | |
21026 | + ref, np->xbdev->otherend_id, | |
21027 | + page_to_pfn(skb_shinfo(skb)->frags->page)); | |
21028 | + } else { | |
21029 | + gnttab_grant_foreign_access_ref( | |
21030 | + ref, np->xbdev->otherend_id, | |
21031 | + pfn_to_mfn(page_to_pfn(skb_shinfo(skb)-> | |
21032 | + frags->page)), | |
21033 | + 0); | |
21034 | + } | |
21035 | + req->gref = ref; | |
21036 | + req->id = requeue_idx; | |
21037 | + | |
21038 | + requeue_idx++; | |
21039 | + } | |
21040 | + | |
21041 | + np->rx.req_prod_pvt = requeue_idx; | |
21042 | + | |
21043 | + /* | |
21044 | + * Step 3: All public and private state should now be sane. Get | |
21045 | + * ready to start sending and receiving packets and give the driver | |
21046 | + * domain a kick because we've probably just requeued some | |
21047 | + * packets. | |
21048 | + */ | |
21049 | + netfront_carrier_on(np); | |
21050 | + notify_remote_via_irq(np->irq); | |
21051 | + network_tx_buf_gc(dev); | |
21052 | + network_alloc_rx_buffers(dev); | |
21053 | + | |
21054 | + spin_unlock_irq(&np->tx_lock); | |
21055 | + spin_unlock_bh(&np->rx_lock); | |
21056 | + | |
21057 | + return 0; | |
21058 | +} | |
21059 | + | |
21060 | +static void netif_uninit(struct net_device *dev) | |
21061 | +{ | |
21062 | + struct netfront_info *np = netdev_priv(dev); | |
21063 | + netif_release_tx_bufs(np); | |
21064 | + if (np->copying_receiver) | |
21065 | + netif_release_rx_bufs_copy(np); | |
21066 | + else | |
21067 | + netif_release_rx_bufs_flip(np); | |
21068 | + gnttab_free_grant_references(np->gref_tx_head); | |
21069 | + gnttab_free_grant_references(np->gref_rx_head); | |
21070 | +} | |
21071 | + | |
21072 | +static struct ethtool_ops network_ethtool_ops = | |
21073 | +{ | |
21074 | + .get_tx_csum = ethtool_op_get_tx_csum, | |
21075 | + .set_tx_csum = ethtool_op_set_tx_csum, | |
21076 | + .get_sg = ethtool_op_get_sg, | |
21077 | + .set_sg = xennet_set_sg, | |
21078 | +#if HAVE_TSO | |
21079 | + .get_tso = ethtool_op_get_tso, | |
21080 | + .set_tso = xennet_set_tso, | |
21081 | +#endif | |
21082 | + .get_link = ethtool_op_get_link, | |
21083 | +}; | |
21084 | + | |
21085 | +#ifdef CONFIG_SYSFS | |
21086 | +static ssize_t show_rxbuf_min(struct class_device *cd, char *buf) | |
21087 | +{ | |
21088 | + struct net_device *netdev = container_of(cd, struct net_device, | |
21089 | + class_dev); | |
21090 | + struct netfront_info *info = netdev_priv(netdev); | |
21091 | + | |
21092 | + return sprintf(buf, "%u\n", info->rx_min_target); | |
21093 | +} | |
21094 | + | |
21095 | +static ssize_t store_rxbuf_min(struct class_device *cd, | |
21096 | + const char *buf, size_t len) | |
21097 | +{ | |
21098 | + struct net_device *netdev = container_of(cd, struct net_device, | |
21099 | + class_dev); | |
21100 | + struct netfront_info *np = netdev_priv(netdev); | |
21101 | + char *endp; | |
21102 | + unsigned long target; | |
21103 | + | |
21104 | + if (!capable(CAP_NET_ADMIN)) | |
21105 | + return -EPERM; | |
21106 | + | |
21107 | + target = simple_strtoul(buf, &endp, 0); | |
21108 | + if (endp == buf) | |
21109 | + return -EBADMSG; | |
21110 | + | |
21111 | + if (target < RX_MIN_TARGET) | |
21112 | + target = RX_MIN_TARGET; | |
21113 | + if (target > RX_MAX_TARGET) | |
21114 | + target = RX_MAX_TARGET; | |
21115 | + | |
21116 | + spin_lock_bh(&np->rx_lock); | |
21117 | + if (target > np->rx_max_target) | |
21118 | + np->rx_max_target = target; | |
21119 | + np->rx_min_target = target; | |
21120 | + if (target > np->rx_target) | |
21121 | + np->rx_target = target; | |
21122 | + | |
21123 | + network_alloc_rx_buffers(netdev); | |
21124 | + | |
21125 | + spin_unlock_bh(&np->rx_lock); | |
21126 | + return len; | |
21127 | +} | |
21128 | + | |
21129 | +static ssize_t show_rxbuf_max(struct class_device *cd, char *buf) | |
21130 | +{ | |
21131 | + struct net_device *netdev = container_of(cd, struct net_device, | |
21132 | + class_dev); | |
21133 | + struct netfront_info *info = netdev_priv(netdev); | |
21134 | + | |
21135 | + return sprintf(buf, "%u\n", info->rx_max_target); | |
21136 | +} | |
21137 | + | |
21138 | +static ssize_t store_rxbuf_max(struct class_device *cd, | |
21139 | + const char *buf, size_t len) | |
21140 | +{ | |
21141 | + struct net_device *netdev = container_of(cd, struct net_device, | |
21142 | + class_dev); | |
21143 | + struct netfront_info *np = netdev_priv(netdev); | |
21144 | + char *endp; | |
21145 | + unsigned long target; | |
21146 | + | |
21147 | + if (!capable(CAP_NET_ADMIN)) | |
21148 | + return -EPERM; | |
21149 | + | |
21150 | + target = simple_strtoul(buf, &endp, 0); | |
21151 | + if (endp == buf) | |
21152 | + return -EBADMSG; | |
21153 | + | |
21154 | + if (target < RX_MIN_TARGET) | |
21155 | + target = RX_MIN_TARGET; | |
21156 | + if (target > RX_MAX_TARGET) | |
21157 | + target = RX_MAX_TARGET; | |
21158 | + | |
21159 | + spin_lock_bh(&np->rx_lock); | |
21160 | + if (target < np->rx_min_target) | |
21161 | + np->rx_min_target = target; | |
21162 | + np->rx_max_target = target; | |
21163 | + if (target < np->rx_target) | |
21164 | + np->rx_target = target; | |
21165 | + | |
21166 | + network_alloc_rx_buffers(netdev); | |
21167 | + | |
21168 | + spin_unlock_bh(&np->rx_lock); | |
21169 | + return len; | |
21170 | +} | |
21171 | + | |
21172 | +static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf) | |
21173 | +{ | |
21174 | + struct net_device *netdev = container_of(cd, struct net_device, | |
21175 | + class_dev); | |
21176 | + struct netfront_info *info = netdev_priv(netdev); | |
21177 | + | |
21178 | + return sprintf(buf, "%u\n", info->rx_target); | |
21179 | +} | |
21180 | + | |
21181 | +static const struct class_device_attribute xennet_attrs[] = { | |
21182 | + __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), | |
21183 | + __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), | |
21184 | + __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), | |
21185 | +}; | |
21186 | + | |
21187 | +static int xennet_sysfs_addif(struct net_device *netdev) | |
21188 | +{ | |
21189 | + int i; | |
21190 | + int error = 0; | |
21191 | + | |
21192 | + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { | |
21193 | + error = class_device_create_file(&netdev->class_dev, | |
21194 | + &xennet_attrs[i]); | |
21195 | + if (error) | |
21196 | + goto fail; | |
21197 | + } | |
21198 | + return 0; | |
21199 | + | |
21200 | + fail: | |
21201 | + while (--i >= 0) | |
21202 | + class_device_remove_file(&netdev->class_dev, | |
21203 | + &xennet_attrs[i]); | |
21204 | + return error; | |
21205 | +} | |
21206 | + | |
21207 | +static void xennet_sysfs_delif(struct net_device *netdev) | |
21208 | +{ | |
21209 | + int i; | |
21210 | + | |
21211 | + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { | |
21212 | + class_device_remove_file(&netdev->class_dev, | |
21213 | + &xennet_attrs[i]); | |
21214 | + } | |
21215 | +} | |
21216 | + | |
21217 | +#endif /* CONFIG_SYSFS */ | |
21218 | + | |
21219 | + | |
21220 | +/* | |
21221 | + * Nothing to do here. Virtual interface is point-to-point and the | |
21222 | + * physical interface is probably promiscuous anyway. | |
21223 | + */ | |
21224 | +static void network_set_multicast_list(struct net_device *dev) | |
21225 | +{ | |
21226 | +} | |
21227 | + | |
21228 | +static struct net_device * __devinit create_netdev(struct xenbus_device *dev) | |
21229 | +{ | |
21230 | + int i, err = 0; | |
21231 | + struct net_device *netdev = NULL; | |
21232 | + struct netfront_info *np = NULL; | |
21233 | + | |
21234 | + netdev = alloc_etherdev(sizeof(struct netfront_info)); | |
21235 | + if (!netdev) { | |
21236 | + printk(KERN_WARNING "%s> alloc_etherdev failed.\n", | |
21237 | + __FUNCTION__); | |
21238 | + return ERR_PTR(-ENOMEM); | |
21239 | + } | |
21240 | + | |
21241 | + np = netdev_priv(netdev); | |
21242 | + np->xbdev = dev; | |
21243 | + | |
21244 | + spin_lock_init(&np->tx_lock); | |
21245 | + spin_lock_init(&np->rx_lock); | |
21246 | + | |
21247 | + init_accelerator_vif(np, dev); | |
21248 | + | |
21249 | + skb_queue_head_init(&np->rx_batch); | |
21250 | + np->rx_target = RX_DFL_MIN_TARGET; | |
21251 | + np->rx_min_target = RX_DFL_MIN_TARGET; | |
21252 | + np->rx_max_target = RX_MAX_TARGET; | |
21253 | + | |
21254 | + init_timer(&np->rx_refill_timer); | |
21255 | + np->rx_refill_timer.data = (unsigned long)netdev; | |
21256 | + np->rx_refill_timer.function = rx_refill_timeout; | |
21257 | + | |
21258 | + /* Initialise {tx,rx}_skbs as a free chain containing every entry. */ | |
21259 | + for (i = 0; i <= NET_TX_RING_SIZE; i++) { | |
21260 | + np->tx_skbs[i] = (void *)((unsigned long) i+1); | |
21261 | + np->grant_tx_ref[i] = GRANT_INVALID_REF; | |
21262 | + } | |
21263 | + | |
21264 | + for (i = 0; i < NET_RX_RING_SIZE; i++) { | |
21265 | + np->rx_skbs[i] = NULL; | |
21266 | + np->grant_rx_ref[i] = GRANT_INVALID_REF; | |
21267 | + } | |
21268 | + | |
21269 | + /* A grant for every tx ring slot */ | |
21270 | + if (gnttab_alloc_grant_references(TX_MAX_TARGET, | |
21271 | + &np->gref_tx_head) < 0) { | |
21272 | + printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); | |
21273 | + err = -ENOMEM; | |
21274 | + goto exit; | |
21275 | + } | |
21276 | + /* A grant for every rx ring slot */ | |
21277 | + if (gnttab_alloc_grant_references(RX_MAX_TARGET, | |
21278 | + &np->gref_rx_head) < 0) { | |
21279 | + printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); | |
21280 | + err = -ENOMEM; | |
21281 | + goto exit_free_tx; | |
21282 | + } | |
21283 | + | |
21284 | + netdev->open = network_open; | |
21285 | + netdev->hard_start_xmit = network_start_xmit; | |
21286 | + netdev->stop = network_close; | |
21287 | + netdev->get_stats = network_get_stats; | |
21288 | + netdev->poll = netif_poll; | |
21289 | + netdev->set_multicast_list = network_set_multicast_list; | |
21290 | + netdev->uninit = netif_uninit; | |
21291 | + netdev->set_mac_address = xennet_set_mac_address; | |
21292 | + netdev->change_mtu = xennet_change_mtu; | |
21293 | + netdev->weight = 64; | |
21294 | + netdev->features = NETIF_F_IP_CSUM; | |
21295 | + | |
21296 | + SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); | |
21297 | + SET_MODULE_OWNER(netdev); | |
21298 | + SET_NETDEV_DEV(netdev, &dev->dev); | |
21299 | + | |
21300 | + np->netdev = netdev; | |
21301 | + | |
21302 | + netfront_carrier_off(np); | |
21303 | + | |
21304 | + return netdev; | |
21305 | + | |
21306 | + exit_free_tx: | |
21307 | + gnttab_free_grant_references(np->gref_tx_head); | |
21308 | + exit: | |
21309 | + free_netdev(netdev); | |
21310 | + return ERR_PTR(err); | |
21311 | +} | |
21312 | + | |
21313 | +#ifdef CONFIG_INET | |
21314 | +/* | |
21315 | + * We use this notifier to send out a fake ARP reply to reset switches and | |
21316 | + * router ARP caches when an IP interface is brought up on a VIF. | |
21317 | + */ | |
21318 | +static int | |
21319 | +inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr) | |
21320 | +{ | |
21321 | + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; | |
21322 | + struct net_device *dev = ifa->ifa_dev->dev; | |
21323 | + | |
21324 | + /* UP event and is it one of our devices? */ | |
21325 | + if (event == NETDEV_UP && dev->open == network_open) | |
21326 | + send_fake_arp(dev); | |
21327 | + | |
21328 | + return NOTIFY_DONE; | |
21329 | +} | |
21330 | + | |
21331 | +static struct notifier_block notifier_inetdev = { | |
21332 | + .notifier_call = inetdev_notify, | |
21333 | + .next = NULL, | |
21334 | + .priority = 0 | |
21335 | +}; | |
21336 | +#endif | |
21337 | + | |
21338 | + | |
21339 | +static void netif_disconnect_backend(struct netfront_info *info) | |
21340 | +{ | |
21341 | + /* Stop old i/f to prevent errors whilst we rebuild the state. */ | |
21342 | + spin_lock_bh(&info->rx_lock); | |
21343 | + spin_lock_irq(&info->tx_lock); | |
21344 | + netfront_carrier_off(info); | |
21345 | + spin_unlock_irq(&info->tx_lock); | |
21346 | + spin_unlock_bh(&info->rx_lock); | |
21347 | + | |
21348 | + if (info->irq) | |
21349 | + unbind_from_irqhandler(info->irq, info->netdev); | |
21350 | + info->irq = 0; | |
21351 | + | |
21352 | + end_access(info->tx_ring_ref, info->tx.sring); | |
21353 | + end_access(info->rx_ring_ref, info->rx.sring); | |
21354 | + info->tx_ring_ref = GRANT_INVALID_REF; | |
21355 | + info->rx_ring_ref = GRANT_INVALID_REF; | |
21356 | + info->tx.sring = NULL; | |
21357 | + info->rx.sring = NULL; | |
21358 | +} | |
21359 | + | |
21360 | + | |
21361 | +static void end_access(int ref, void *page) | |
21362 | +{ | |
21363 | + if (ref != GRANT_INVALID_REF) | |
21364 | + gnttab_end_foreign_access(ref, (unsigned long)page); | |
21365 | +} | |
21366 | + | |
21367 | + | |
21368 | +/* ** Driver registration ** */ | |
21369 | + | |
21370 | + | |
21371 | +static const struct xenbus_device_id netfront_ids[] = { | |
21372 | + { "vif" }, | |
21373 | + { "" } | |
21374 | +}; | |
21375 | +MODULE_ALIAS("xen:vif"); | |
21376 | + | |
21377 | + | |
21378 | +static struct xenbus_driver netfront_driver = { | |
21379 | + .name = "vif", | |
21380 | + .owner = THIS_MODULE, | |
21381 | + .ids = netfront_ids, | |
21382 | + .probe = netfront_probe, | |
21383 | + .remove = __devexit_p(netfront_remove), | |
21384 | + .suspend = netfront_suspend, | |
21385 | + .suspend_cancel = netfront_suspend_cancel, | |
21386 | + .resume = netfront_resume, | |
21387 | + .otherend_changed = backend_changed, | |
21388 | +}; | |
21389 | + | |
21390 | + | |
21391 | +static int __init netif_init(void) | |
21392 | +{ | |
21393 | + if (!is_running_on_xen()) | |
21394 | + return -ENODEV; | |
21395 | + | |
21396 | +#ifdef CONFIG_XEN | |
21397 | + if (MODPARM_rx_flip && MODPARM_rx_copy) { | |
21398 | + WPRINTK("Cannot specify both rx_copy and rx_flip.\n"); | |
21399 | + return -EINVAL; | |
21400 | + } | |
21401 | + | |
21402 | + if (!MODPARM_rx_flip && !MODPARM_rx_copy) | |
21403 | + MODPARM_rx_flip = 1; /* Default is to flip. */ | |
21404 | +#endif | |
21405 | + | |
21406 | + netif_init_accel(); | |
21407 | + | |
21408 | + IPRINTK("Initialising virtual ethernet driver.\n"); | |
21409 | + | |
21410 | +#ifdef CONFIG_INET | |
21411 | + (void)register_inetaddr_notifier(¬ifier_inetdev); | |
21412 | +#endif | |
21413 | + | |
21414 | + return xenbus_register_frontend(&netfront_driver); | |
21415 | +} | |
21416 | +module_init(netif_init); | |
21417 | + | |
21418 | + | |
21419 | +static void __exit netif_exit(void) | |
21420 | +{ | |
21421 | +#ifdef CONFIG_INET | |
21422 | + unregister_inetaddr_notifier(¬ifier_inetdev); | |
21423 | +#endif | |
21424 | + | |
21425 | + netif_exit_accel(); | |
21426 | + | |
21427 | + return xenbus_unregister_driver(&netfront_driver); | |
21428 | +} | |
21429 | +module_exit(netif_exit); | |
21430 | + | |
21431 | +MODULE_LICENSE("Dual BSD/GPL"); | |
21432 | Index: head-2008-11-25/drivers/xen/netfront/netfront.h | |
21433 | =================================================================== | |
21434 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
21435 | +++ head-2008-11-25/drivers/xen/netfront/netfront.h 2008-01-07 13:19:18.000000000 +0100 | |
21436 | @@ -0,0 +1,274 @@ | |
21437 | +/****************************************************************************** | |
21438 | + * Virtual network driver for conversing with remote driver backends. | |
21439 | + * | |
21440 | + * Copyright (c) 2002-2005, K A Fraser | |
21441 | + * Copyright (c) 2005, XenSource Ltd | |
21442 | + * Copyright (C) 2007 Solarflare Communications, Inc. | |
21443 | + * | |
21444 | + * This program is free software; you can redistribute it and/or | |
21445 | + * modify it under the terms of the GNU General Public License version 2 | |
21446 | + * as published by the Free Software Foundation; or, when distributed | |
21447 | + * separately from the Linux kernel or incorporated into other | |
21448 | + * software packages, subject to the following license: | |
21449 | + * | |
21450 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
21451 | + * of this source file (the "Software"), to deal in the Software without | |
21452 | + * restriction, including without limitation the rights to use, copy, modify, | |
21453 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
21454 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
21455 | + * the following conditions: | |
21456 | + * | |
21457 | + * The above copyright notice and this permission notice shall be included in | |
21458 | + * all copies or substantial portions of the Software. | |
21459 | + * | |
21460 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
21461 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
21462 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21463 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21464 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
21465 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21466 | + * IN THE SOFTWARE. | |
21467 | + */ | |
21468 | + | |
21469 | +#ifndef NETFRONT_H | |
21470 | +#define NETFRONT_H | |
21471 | + | |
21472 | +#include <xen/interface/io/netif.h> | |
21473 | +#include <linux/netdevice.h> | |
21474 | +#include <linux/skbuff.h> | |
21475 | +#include <linux/list.h> | |
21476 | + | |
21477 | +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) | |
21478 | +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) | |
21479 | + | |
21480 | +#include <xen/xenbus.h> | |
21481 | + | |
21482 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
21483 | +#include <xen/platform-compat.h> | |
21484 | +#endif | |
21485 | + | |
21486 | +/* | |
21487 | + * Function pointer table for hooks into a network acceleration | |
21488 | + * plugin. These are called at appropriate points from the netfront | |
21489 | + * driver | |
21490 | + */ | |
21491 | +struct netfront_accel_hooks { | |
21492 | + /* | |
21493 | + * new_device: Accelerator hook to ask the plugin to support a | |
21494 | + * new network interface | |
21495 | + */ | |
21496 | + int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev); | |
21497 | + /* | |
21498 | + * remove: Opposite of new_device | |
21499 | + */ | |
21500 | + int (*remove)(struct xenbus_device *dev); | |
21501 | + /* | |
21502 | + * The net_device is being polled, check the accelerated | |
21503 | + * hardware for any pending packets | |
21504 | + */ | |
21505 | + int (*netdev_poll)(struct net_device *dev, int *pbudget); | |
21506 | + /* | |
21507 | + * start_xmit: Used to give the accelerated plugin the option | |
21508 | + * of sending a packet. Returns non-zero if has done so, or | |
21509 | + * zero to decline and force the packet onto normal send | |
21510 | + * path | |
21511 | + */ | |
21512 | + int (*start_xmit)(struct sk_buff *skb, struct net_device *dev); | |
21513 | + /* | |
21514 | + * start/stop_napi_interrupts Used by netfront to indicate | |
21515 | + * when napi interrupts should be enabled or disabled | |
21516 | + */ | |
21517 | + int (*start_napi_irq)(struct net_device *dev); | |
21518 | + void (*stop_napi_irq)(struct net_device *dev); | |
21519 | + /* | |
21520 | + * Called before re-enabling the TX queue to check the fast | |
21521 | + * path has slots too | |
21522 | + */ | |
21523 | + int (*check_ready)(struct net_device *dev); | |
21524 | + /* | |
21525 | + * Get the fastpath network statistics | |
21526 | + */ | |
21527 | + int (*get_stats)(struct net_device *dev, | |
21528 | + struct net_device_stats *stats); | |
21529 | +}; | |
21530 | + | |
21531 | + | |
21532 | +/* Version of API/protocol for communication between netfront and | |
21533 | + acceleration plugin supported */ | |
21534 | +#define NETFRONT_ACCEL_VERSION 0x00010003 | |
21535 | + | |
21536 | +/* | |
21537 | + * Per-netfront device state for the accelerator. This is used to | |
21538 | + * allow efficient per-netfront device access to the accelerator | |
21539 | + * hooks | |
21540 | + */ | |
21541 | +struct netfront_accel_vif_state { | |
21542 | + struct list_head link; | |
21543 | + | |
21544 | + struct xenbus_device *dev; | |
21545 | + struct netfront_info *np; | |
21546 | + struct netfront_accel_hooks *hooks; | |
21547 | + | |
21548 | + /* Watch on the accelerator configuration value */ | |
21549 | + struct xenbus_watch accel_watch; | |
21550 | + /* Work item to process change in accelerator */ | |
21551 | + struct work_struct accel_work; | |
21552 | + /* The string from xenbus last time accel_watch fired */ | |
21553 | + char *accel_frontend; | |
21554 | +}; | |
21555 | + | |
21556 | +/* | |
21557 | + * Per-accelerator state stored in netfront. These form a list that | |
21558 | + * is used to track which devices are accelerated by which plugins, | |
21559 | + * and what plugins are available/have been requested | |
21560 | + */ | |
21561 | +struct netfront_accelerator { | |
21562 | + /* Used to make a list */ | |
21563 | + struct list_head link; | |
21564 | + /* ID of the accelerator */ | |
21565 | + int id; | |
21566 | + /* | |
21567 | + * String describing the accelerator. Currently this is the | |
21568 | + * name of the accelerator module. This is provided by the | |
21569 | + * backend accelerator through xenstore | |
21570 | + */ | |
21571 | + char *frontend; | |
21572 | + /* The hooks into the accelerator plugin module */ | |
21573 | + struct netfront_accel_hooks *hooks; | |
21574 | + | |
21575 | + /* | |
21576 | + * List of per-netfront device state (struct | |
21577 | + * netfront_accel_vif_state) for each netfront device that is | |
21578 | + * using this accelerator | |
21579 | + */ | |
21580 | + struct list_head vif_states; | |
21581 | + spinlock_t vif_states_lock; | |
21582 | +}; | |
21583 | + | |
21584 | +struct netfront_info { | |
21585 | + struct list_head list; | |
21586 | + struct net_device *netdev; | |
21587 | + | |
21588 | + struct net_device_stats stats; | |
21589 | + | |
21590 | + struct netif_tx_front_ring tx; | |
21591 | + struct netif_rx_front_ring rx; | |
21592 | + | |
21593 | + spinlock_t tx_lock; | |
21594 | + spinlock_t rx_lock; | |
21595 | + | |
21596 | + unsigned int irq; | |
21597 | + unsigned int copying_receiver; | |
21598 | + unsigned int carrier; | |
21599 | + | |
21600 | + /* Receive-ring batched refills. */ | |
21601 | +#define RX_MIN_TARGET 8 | |
21602 | +#define RX_DFL_MIN_TARGET 64 | |
21603 | +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) | |
21604 | + unsigned rx_min_target, rx_max_target, rx_target; | |
21605 | + struct sk_buff_head rx_batch; | |
21606 | + | |
21607 | + struct timer_list rx_refill_timer; | |
21608 | + | |
21609 | + /* | |
21610 | + * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs | |
21611 | + * is an index into a chain of free entries. | |
21612 | + */ | |
21613 | + struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; | |
21614 | + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; | |
21615 | + | |
21616 | +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) | |
21617 | + grant_ref_t gref_tx_head; | |
21618 | + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; | |
21619 | + grant_ref_t gref_rx_head; | |
21620 | + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; | |
21621 | + | |
21622 | + struct xenbus_device *xbdev; | |
21623 | + int tx_ring_ref; | |
21624 | + int rx_ring_ref; | |
21625 | + u8 mac[ETH_ALEN]; | |
21626 | + | |
21627 | + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; | |
21628 | + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; | |
21629 | + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; | |
21630 | + | |
21631 | + /* Private pointer to state internal to accelerator module */ | |
21632 | + void *accel_priv; | |
21633 | + /* The accelerator used by this netfront device */ | |
21634 | + struct netfront_accelerator *accelerator; | |
21635 | + /* The accelerator state for this netfront device */ | |
21636 | + struct netfront_accel_vif_state accel_vif_state; | |
21637 | +}; | |
21638 | + | |
21639 | + | |
21640 | +/* Exported Functions */ | |
21641 | + | |
21642 | +/* | |
21643 | + * Called by an accelerator plugin module when it has loaded. | |
21644 | + * | |
21645 | + * frontend: the string describing the accelerator, currently the module name | |
21646 | + * hooks: the hooks for netfront to use to call into the accelerator | |
21647 | + * version: the version of API between frontend and plugin requested | |
21648 | + * | |
21649 | + * return: 0 on success, <0 on error, >0 (with version supported) on | |
21650 | + * version mismatch | |
21651 | + */ | |
21652 | +extern int netfront_accelerator_loaded(int version, const char *frontend, | |
21653 | + struct netfront_accel_hooks *hooks); | |
21654 | + | |
21655 | +/* | |
21656 | + * Called by an accelerator plugin module when it is about to unload. | |
21657 | + * | |
21658 | + * frontend: the string describing the accelerator. Must match the | |
21659 | + * one passed to netfront_accelerator_loaded() | |
21660 | + */ | |
21661 | +extern void netfront_accelerator_stop(const char *frontend); | |
21662 | + | |
21663 | +/* | |
21664 | + * Called by an accelerator before waking the net device's TX queue to | |
21665 | + * ensure the slow path has available slots. Returns true if OK to | |
21666 | + * wake, false if still busy | |
21667 | + */ | |
21668 | +extern int netfront_check_queue_ready(struct net_device *net_dev); | |
21669 | + | |
21670 | + | |
21671 | +/* Internal-to-netfront Functions */ | |
21672 | + | |
21673 | +/* | |
21674 | + * Call into accelerator and check to see if it has tx space before we | |
21675 | + * wake the net device's TX queue. Returns true if OK to wake, false | |
21676 | + * if still busy | |
21677 | + */ | |
21678 | +extern | |
21679 | +int netfront_check_accelerator_queue_ready(struct net_device *dev, | |
21680 | + struct netfront_info *np); | |
21681 | +extern | |
21682 | +int netfront_accelerator_call_remove(struct netfront_info *np, | |
21683 | + struct xenbus_device *dev); | |
21684 | +extern | |
21685 | +int netfront_accelerator_suspend(struct netfront_info *np, | |
21686 | + struct xenbus_device *dev); | |
21687 | +extern | |
21688 | +int netfront_accelerator_suspend_cancel(struct netfront_info *np, | |
21689 | + struct xenbus_device *dev); | |
21690 | +extern | |
21691 | +void netfront_accelerator_resume(struct netfront_info *np, | |
21692 | + struct xenbus_device *dev); | |
21693 | +extern | |
21694 | +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, | |
21695 | + struct net_device *dev); | |
21696 | +extern | |
21697 | +int netfront_accelerator_call_get_stats(struct netfront_info *np, | |
21698 | + struct net_device *dev); | |
21699 | +extern | |
21700 | +void netfront_accelerator_add_watch(struct netfront_info *np); | |
21701 | + | |
21702 | +extern | |
21703 | +void netif_init_accel(void); | |
21704 | +extern | |
21705 | +void netif_exit_accel(void); | |
21706 | + | |
21707 | +extern | |
21708 | +void init_accelerator_vif(struct netfront_info *np, | |
21709 | + struct xenbus_device *dev); | |
21710 | +#endif /* NETFRONT_H */ | |
21711 | Index: head-2008-11-25/drivers/xen/pciback/Makefile | |
21712 | =================================================================== | |
21713 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
21714 | +++ head-2008-11-25/drivers/xen/pciback/Makefile 2008-07-21 11:00:33.000000000 +0200 | |
21715 | @@ -0,0 +1,17 @@ | |
21716 | +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o | |
21717 | + | |
21718 | +pciback-y := pci_stub.o pciback_ops.o xenbus.o | |
21719 | +pciback-y += conf_space.o conf_space_header.o \ | |
21720 | + conf_space_capability.o \ | |
21721 | + conf_space_capability_vpd.o \ | |
21722 | + conf_space_capability_pm.o \ | |
21723 | + conf_space_quirks.o | |
21724 | +pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o | |
21725 | +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o | |
21726 | +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o | |
21727 | +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o | |
21728 | +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o | |
21729 | + | |
21730 | +ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) | |
21731 | +EXTRA_CFLAGS += -DDEBUG | |
21732 | +endif | |
21733 | Index: head-2008-11-25/drivers/xen/pciback/conf_space.c | |
21734 | =================================================================== | |
21735 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
21736 | +++ head-2008-11-25/drivers/xen/pciback/conf_space.c 2008-10-29 09:55:56.000000000 +0100 | |
21737 | @@ -0,0 +1,426 @@ | |
21738 | +/* | |
21739 | + * PCI Backend - Functions for creating a virtual configuration space for | |
21740 | + * exported PCI Devices. | |
21741 | + * It's dangerous to allow PCI Driver Domains to change their | |
21742 | + * device's resources (memory, i/o ports, interrupts). We need to | |
21743 | + * restrict changes to certain PCI Configuration registers: | |
21744 | + * BARs, INTERRUPT_PIN, most registers in the header... | |
21745 | + * | |
21746 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
21747 | + */ | |
21748 | + | |
21749 | +#include <linux/kernel.h> | |
21750 | +#include <linux/pci.h> | |
21751 | +#include "pciback.h" | |
21752 | +#include "conf_space.h" | |
21753 | +#include "conf_space_quirks.h" | |
21754 | + | |
21755 | +#define DEFINE_PCI_CONFIG(op,size,type) \ | |
21756 | +int pciback_##op##_config_##size \ | |
21757 | +(struct pci_dev *dev, int offset, type value, void *data) \ | |
21758 | +{ \ | |
21759 | + return pci_##op##_config_##size (dev, offset, value); \ | |
21760 | +} | |
21761 | + | |
21762 | +DEFINE_PCI_CONFIG(read, byte, u8 *) | |
21763 | +DEFINE_PCI_CONFIG(read, word, u16 *) | |
21764 | +DEFINE_PCI_CONFIG(read, dword, u32 *) | |
21765 | + | |
21766 | +DEFINE_PCI_CONFIG(write, byte, u8) | |
21767 | +DEFINE_PCI_CONFIG(write, word, u16) | |
21768 | +DEFINE_PCI_CONFIG(write, dword, u32) | |
21769 | + | |
21770 | +static int conf_space_read(struct pci_dev *dev, | |
21771 | + const struct config_field_entry *entry, | |
21772 | + int offset, u32 *value) | |
21773 | +{ | |
21774 | + int ret = 0; | |
21775 | + const struct config_field *field = entry->field; | |
21776 | + | |
21777 | + *value = 0; | |
21778 | + | |
21779 | + switch (field->size) { | |
21780 | + case 1: | |
21781 | + if (field->u.b.read) | |
21782 | + ret = field->u.b.read(dev, offset, (u8 *) value, | |
21783 | + entry->data); | |
21784 | + break; | |
21785 | + case 2: | |
21786 | + if (field->u.w.read) | |
21787 | + ret = field->u.w.read(dev, offset, (u16 *) value, | |
21788 | + entry->data); | |
21789 | + break; | |
21790 | + case 4: | |
21791 | + if (field->u.dw.read) | |
21792 | + ret = field->u.dw.read(dev, offset, value, entry->data); | |
21793 | + break; | |
21794 | + } | |
21795 | + return ret; | |
21796 | +} | |
21797 | + | |
21798 | +static int conf_space_write(struct pci_dev *dev, | |
21799 | + const struct config_field_entry *entry, | |
21800 | + int offset, u32 value) | |
21801 | +{ | |
21802 | + int ret = 0; | |
21803 | + const struct config_field *field = entry->field; | |
21804 | + | |
21805 | + switch (field->size) { | |
21806 | + case 1: | |
21807 | + if (field->u.b.write) | |
21808 | + ret = field->u.b.write(dev, offset, (u8) value, | |
21809 | + entry->data); | |
21810 | + break; | |
21811 | + case 2: | |
21812 | + if (field->u.w.write) | |
21813 | + ret = field->u.w.write(dev, offset, (u16) value, | |
21814 | + entry->data); | |
21815 | + break; | |
21816 | + case 4: | |
21817 | + if (field->u.dw.write) | |
21818 | + ret = field->u.dw.write(dev, offset, value, | |
21819 | + entry->data); | |
21820 | + break; | |
21821 | + } | |
21822 | + return ret; | |
21823 | +} | |
21824 | + | |
21825 | +static inline u32 get_mask(int size) | |
21826 | +{ | |
21827 | + if (size == 1) | |
21828 | + return 0xff; | |
21829 | + else if (size == 2) | |
21830 | + return 0xffff; | |
21831 | + else | |
21832 | + return 0xffffffff; | |
21833 | +} | |
21834 | + | |
21835 | +static inline int valid_request(int offset, int size) | |
21836 | +{ | |
21837 | + /* Validate request (no un-aligned requests) */ | |
21838 | + if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0) | |
21839 | + return 1; | |
21840 | + return 0; | |
21841 | +} | |
21842 | + | |
21843 | +static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, | |
21844 | + int offset) | |
21845 | +{ | |
21846 | + if (offset >= 0) { | |
21847 | + new_val_mask <<= (offset * 8); | |
21848 | + new_val <<= (offset * 8); | |
21849 | + } else { | |
21850 | + new_val_mask >>= (offset * -8); | |
21851 | + new_val >>= (offset * -8); | |
21852 | + } | |
21853 | + val = (val & ~new_val_mask) | (new_val & new_val_mask); | |
21854 | + | |
21855 | + return val; | |
21856 | +} | |
21857 | + | |
21858 | +static int pcibios_err_to_errno(int err) | |
21859 | +{ | |
21860 | + switch (err) { | |
21861 | + case PCIBIOS_SUCCESSFUL: | |
21862 | + return XEN_PCI_ERR_success; | |
21863 | + case PCIBIOS_DEVICE_NOT_FOUND: | |
21864 | + return XEN_PCI_ERR_dev_not_found; | |
21865 | + case PCIBIOS_BAD_REGISTER_NUMBER: | |
21866 | + return XEN_PCI_ERR_invalid_offset; | |
21867 | + case PCIBIOS_FUNC_NOT_SUPPORTED: | |
21868 | + return XEN_PCI_ERR_not_implemented; | |
21869 | + case PCIBIOS_SET_FAILED: | |
21870 | + return XEN_PCI_ERR_access_denied; | |
21871 | + } | |
21872 | + return err; | |
21873 | +} | |
21874 | + | |
21875 | +int pciback_config_read(struct pci_dev *dev, int offset, int size, | |
21876 | + u32 * ret_val) | |
21877 | +{ | |
21878 | + int err = 0; | |
21879 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
21880 | + const struct config_field_entry *cfg_entry; | |
21881 | + const struct config_field *field; | |
21882 | + int req_start, req_end, field_start, field_end; | |
21883 | + /* if read fails for any reason, return 0 (as if device didn't respond) */ | |
21884 | + u32 value = 0, tmp_val; | |
21885 | + | |
21886 | + if (unlikely(verbose_request)) | |
21887 | + printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n", | |
21888 | + pci_name(dev), size, offset); | |
21889 | + | |
21890 | + if (!valid_request(offset, size)) { | |
21891 | + err = XEN_PCI_ERR_invalid_offset; | |
21892 | + goto out; | |
21893 | + } | |
21894 | + | |
21895 | + /* Get the real value first, then modify as appropriate */ | |
21896 | + switch (size) { | |
21897 | + case 1: | |
21898 | + err = pci_read_config_byte(dev, offset, (u8 *) & value); | |
21899 | + break; | |
21900 | + case 2: | |
21901 | + err = pci_read_config_word(dev, offset, (u16 *) & value); | |
21902 | + break; | |
21903 | + case 4: | |
21904 | + err = pci_read_config_dword(dev, offset, &value); | |
21905 | + break; | |
21906 | + } | |
21907 | + | |
21908 | + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | |
21909 | + field = cfg_entry->field; | |
21910 | + | |
21911 | + req_start = offset; | |
21912 | + req_end = offset + size; | |
21913 | + field_start = OFFSET(cfg_entry); | |
21914 | + field_end = OFFSET(cfg_entry) + field->size; | |
21915 | + | |
21916 | + if ((req_start >= field_start && req_start < field_end) | |
21917 | + || (req_end > field_start && req_end <= field_end)) { | |
21918 | + err = conf_space_read(dev, cfg_entry, field_start, | |
21919 | + &tmp_val); | |
21920 | + if (err) | |
21921 | + goto out; | |
21922 | + | |
21923 | + value = merge_value(value, tmp_val, | |
21924 | + get_mask(field->size), | |
21925 | + field_start - req_start); | |
21926 | + } | |
21927 | + } | |
21928 | + | |
21929 | + out: | |
21930 | + if (unlikely(verbose_request)) | |
21931 | + printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n", | |
21932 | + pci_name(dev), size, offset, value); | |
21933 | + | |
21934 | + *ret_val = value; | |
21935 | + return pcibios_err_to_errno(err); | |
21936 | +} | |
21937 | + | |
21938 | +int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) | |
21939 | +{ | |
21940 | + int err = 0, handled = 0; | |
21941 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
21942 | + const struct config_field_entry *cfg_entry; | |
21943 | + const struct config_field *field; | |
21944 | + u32 tmp_val; | |
21945 | + int req_start, req_end, field_start, field_end; | |
21946 | + | |
21947 | + if (unlikely(verbose_request)) | |
21948 | + printk(KERN_DEBUG | |
21949 | + "pciback: %s: write request %d bytes at 0x%x = %x\n", | |
21950 | + pci_name(dev), size, offset, value); | |
21951 | + | |
21952 | + if (!valid_request(offset, size)) | |
21953 | + return XEN_PCI_ERR_invalid_offset; | |
21954 | + | |
21955 | + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | |
21956 | + field = cfg_entry->field; | |
21957 | + | |
21958 | + req_start = offset; | |
21959 | + req_end = offset + size; | |
21960 | + field_start = OFFSET(cfg_entry); | |
21961 | + field_end = OFFSET(cfg_entry) + field->size; | |
21962 | + | |
21963 | + if ((req_start >= field_start && req_start < field_end) | |
21964 | + || (req_end > field_start && req_end <= field_end)) { | |
21965 | + tmp_val = 0; | |
21966 | + | |
21967 | + err = pciback_config_read(dev, field_start, | |
21968 | + field->size, &tmp_val); | |
21969 | + if (err) | |
21970 | + break; | |
21971 | + | |
21972 | + tmp_val = merge_value(tmp_val, value, get_mask(size), | |
21973 | + req_start - field_start); | |
21974 | + | |
21975 | + err = conf_space_write(dev, cfg_entry, field_start, | |
21976 | + tmp_val); | |
21977 | + | |
21978 | + /* handled is set true here, but not every byte | |
21979 | + * may have been written! Properly detecting if | |
21980 | + * every byte is handled is unnecessary as the | |
21981 | + * flag is used to detect devices that need | |
21982 | + * special helpers to work correctly. | |
21983 | + */ | |
21984 | + handled = 1; | |
21985 | + } | |
21986 | + } | |
21987 | + | |
21988 | + if (!handled && !err) { | |
21989 | + /* By default, anything not specificially handled above is | |
21990 | + * read-only. The permissive flag changes this behavior so | |
21991 | + * that anything not specifically handled above is writable. | |
21992 | + * This means that some fields may still be read-only because | |
21993 | + * they have entries in the config_field list that intercept | |
21994 | + * the write and do nothing. */ | |
21995 | + if (dev_data->permissive) { | |
21996 | + switch (size) { | |
21997 | + case 1: | |
21998 | + err = pci_write_config_byte(dev, offset, | |
21999 | + (u8) value); | |
22000 | + break; | |
22001 | + case 2: | |
22002 | + err = pci_write_config_word(dev, offset, | |
22003 | + (u16) value); | |
22004 | + break; | |
22005 | + case 4: | |
22006 | + err = pci_write_config_dword(dev, offset, | |
22007 | + (u32) value); | |
22008 | + break; | |
22009 | + } | |
22010 | + } else if (!dev_data->warned_on_write) { | |
22011 | + dev_data->warned_on_write = 1; | |
22012 | + dev_warn(&dev->dev, "Driver tried to write to a " | |
22013 | + "read-only configuration space field at offset " | |
22014 | + "0x%x, size %d. This may be harmless, but if " | |
22015 | + "you have problems with your device:\n" | |
22016 | + "1) see permissive attribute in sysfs\n" | |
22017 | + "2) report problems to the xen-devel " | |
22018 | + "mailing list along with details of your " | |
22019 | + "device obtained from lspci.\n", offset, size); | |
22020 | + } | |
22021 | + } | |
22022 | + | |
22023 | + return pcibios_err_to_errno(err); | |
22024 | +} | |
22025 | + | |
22026 | +void pciback_config_free_dyn_fields(struct pci_dev *dev) | |
22027 | +{ | |
22028 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
22029 | + struct config_field_entry *cfg_entry, *t; | |
22030 | + const struct config_field *field; | |
22031 | + | |
22032 | + dev_dbg(&dev->dev, | |
22033 | + "free-ing dynamically allocated virtual configuration space fields\n"); | |
22034 | + | |
22035 | + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { | |
22036 | + field = cfg_entry->field; | |
22037 | + | |
22038 | + if (field->clean) { | |
22039 | + field->clean((struct config_field *)field); | |
22040 | + | |
22041 | + if (cfg_entry->data) | |
22042 | + kfree(cfg_entry->data); | |
22043 | + | |
22044 | + list_del(&cfg_entry->list); | |
22045 | + kfree(cfg_entry); | |
22046 | + } | |
22047 | + | |
22048 | + } | |
22049 | +} | |
22050 | + | |
22051 | +void pciback_config_reset_dev(struct pci_dev *dev) | |
22052 | +{ | |
22053 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
22054 | + const struct config_field_entry *cfg_entry; | |
22055 | + const struct config_field *field; | |
22056 | + | |
22057 | + dev_dbg(&dev->dev, "resetting virtual configuration space\n"); | |
22058 | + | |
22059 | + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | |
22060 | + field = cfg_entry->field; | |
22061 | + | |
22062 | + if (field->reset) | |
22063 | + field->reset(dev, OFFSET(cfg_entry), cfg_entry->data); | |
22064 | + } | |
22065 | +} | |
22066 | + | |
22067 | +void pciback_config_free_dev(struct pci_dev *dev) | |
22068 | +{ | |
22069 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
22070 | + struct config_field_entry *cfg_entry, *t; | |
22071 | + const struct config_field *field; | |
22072 | + | |
22073 | + dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n"); | |
22074 | + | |
22075 | + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { | |
22076 | + list_del(&cfg_entry->list); | |
22077 | + | |
22078 | + field = cfg_entry->field; | |
22079 | + | |
22080 | + if (field->release) | |
22081 | + field->release(dev, OFFSET(cfg_entry), cfg_entry->data); | |
22082 | + | |
22083 | + kfree(cfg_entry); | |
22084 | + } | |
22085 | +} | |
22086 | + | |
22087 | +int pciback_config_add_field_offset(struct pci_dev *dev, | |
22088 | + const struct config_field *field, | |
22089 | + unsigned int base_offset) | |
22090 | +{ | |
22091 | + int err = 0; | |
22092 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
22093 | + struct config_field_entry *cfg_entry; | |
22094 | + void *tmp; | |
22095 | + | |
22096 | + cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL); | |
22097 | + if (!cfg_entry) { | |
22098 | + err = -ENOMEM; | |
22099 | + goto out; | |
22100 | + } | |
22101 | + | |
22102 | + cfg_entry->data = NULL; | |
22103 | + cfg_entry->field = field; | |
22104 | + cfg_entry->base_offset = base_offset; | |
22105 | + | |
22106 | + /* silently ignore duplicate fields */ | |
22107 | + err = pciback_field_is_dup(dev,OFFSET(cfg_entry)); | |
22108 | + if (err) | |
22109 | + goto out; | |
22110 | + | |
22111 | + if (field->init) { | |
22112 | + tmp = field->init(dev, OFFSET(cfg_entry)); | |
22113 | + | |
22114 | + if (IS_ERR(tmp)) { | |
22115 | + err = PTR_ERR(tmp); | |
22116 | + goto out; | |
22117 | + } | |
22118 | + | |
22119 | + cfg_entry->data = tmp; | |
22120 | + } | |
22121 | + | |
22122 | + dev_dbg(&dev->dev, "added config field at offset 0x%02x\n", | |
22123 | + OFFSET(cfg_entry)); | |
22124 | + list_add_tail(&cfg_entry->list, &dev_data->config_fields); | |
22125 | + | |
22126 | + out: | |
22127 | + if (err) | |
22128 | + kfree(cfg_entry); | |
22129 | + | |
22130 | + return err; | |
22131 | +} | |
22132 | + | |
22133 | +/* This sets up the device's virtual configuration space to keep track of | |
22134 | + * certain registers (like the base address registers (BARs) so that we can | |
22135 | + * keep the client from manipulating them directly. | |
22136 | + */ | |
22137 | +int pciback_config_init_dev(struct pci_dev *dev) | |
22138 | +{ | |
22139 | + int err = 0; | |
22140 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
22141 | + | |
22142 | + dev_dbg(&dev->dev, "initializing virtual configuration space\n"); | |
22143 | + | |
22144 | + INIT_LIST_HEAD(&dev_data->config_fields); | |
22145 | + | |
22146 | + err = pciback_config_header_add_fields(dev); | |
22147 | + if (err) | |
22148 | + goto out; | |
22149 | + | |
22150 | + err = pciback_config_capability_add_fields(dev); | |
22151 | + if (err) | |
22152 | + goto out; | |
22153 | + | |
22154 | + err = pciback_config_quirks_init(dev); | |
22155 | + | |
22156 | + out: | |
22157 | + return err; | |
22158 | +} | |
22159 | + | |
22160 | +int pciback_config_init(void) | |
22161 | +{ | |
22162 | + return pciback_config_capability_init(); | |
22163 | +} | |
22164 | Index: head-2008-11-25/drivers/xen/pciback/conf_space.h | |
22165 | =================================================================== | |
22166 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22167 | +++ head-2008-11-25/drivers/xen/pciback/conf_space.h 2008-10-29 09:55:56.000000000 +0100 | |
22168 | @@ -0,0 +1,126 @@ | |
22169 | +/* | |
22170 | + * PCI Backend - Common data structures for overriding the configuration space | |
22171 | + * | |
22172 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
22173 | + */ | |
22174 | + | |
22175 | +#ifndef __XEN_PCIBACK_CONF_SPACE_H__ | |
22176 | +#define __XEN_PCIBACK_CONF_SPACE_H__ | |
22177 | + | |
22178 | +#include <linux/list.h> | |
22179 | +#include <linux/err.h> | |
22180 | + | |
22181 | +/* conf_field_init can return an errno in a ptr with ERR_PTR() */ | |
22182 | +typedef void *(*conf_field_init) (struct pci_dev * dev, int offset); | |
22183 | +typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data); | |
22184 | +typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data); | |
22185 | + | |
22186 | +typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value, | |
22187 | + void *data); | |
22188 | +typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value, | |
22189 | + void *data); | |
22190 | +typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value, | |
22191 | + void *data); | |
22192 | +typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value, | |
22193 | + void *data); | |
22194 | +typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value, | |
22195 | + void *data); | |
22196 | +typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value, | |
22197 | + void *data); | |
22198 | + | |
22199 | +/* These are the fields within the configuration space which we | |
22200 | + * are interested in intercepting reads/writes to and changing their | |
22201 | + * values. | |
22202 | + */ | |
22203 | +struct config_field { | |
22204 | + unsigned int offset; | |
22205 | + unsigned int size; | |
22206 | + unsigned int mask; | |
22207 | + conf_field_init init; | |
22208 | + conf_field_reset reset; | |
22209 | + conf_field_free release; | |
22210 | + void (*clean) (struct config_field * field); | |
22211 | + union { | |
22212 | + struct { | |
22213 | + conf_dword_write write; | |
22214 | + conf_dword_read read; | |
22215 | + } dw; | |
22216 | + struct { | |
22217 | + conf_word_write write; | |
22218 | + conf_word_read read; | |
22219 | + } w; | |
22220 | + struct { | |
22221 | + conf_byte_write write; | |
22222 | + conf_byte_read read; | |
22223 | + } b; | |
22224 | + } u; | |
22225 | + struct list_head list; | |
22226 | +}; | |
22227 | + | |
22228 | +struct config_field_entry { | |
22229 | + struct list_head list; | |
22230 | + const struct config_field *field; | |
22231 | + unsigned int base_offset; | |
22232 | + void *data; | |
22233 | +}; | |
22234 | + | |
22235 | +#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) | |
22236 | + | |
22237 | +/* Add fields to a device - the add_fields macro expects to get a pointer to | |
22238 | + * the first entry in an array (of which the ending is marked by size==0) | |
22239 | + */ | |
22240 | +int pciback_config_add_field_offset(struct pci_dev *dev, | |
22241 | + const struct config_field *field, | |
22242 | + unsigned int offset); | |
22243 | + | |
22244 | +static inline int pciback_config_add_field(struct pci_dev *dev, | |
22245 | + const struct config_field *field) | |
22246 | +{ | |
22247 | + return pciback_config_add_field_offset(dev, field, 0); | |
22248 | +} | |
22249 | + | |
22250 | +static inline int pciback_config_add_fields(struct pci_dev *dev, | |
22251 | + const struct config_field *field) | |
22252 | +{ | |
22253 | + int i, err = 0; | |
22254 | + for (i = 0; field[i].size != 0; i++) { | |
22255 | + err = pciback_config_add_field(dev, &field[i]); | |
22256 | + if (err) | |
22257 | + break; | |
22258 | + } | |
22259 | + return err; | |
22260 | +} | |
22261 | + | |
22262 | +static inline int pciback_config_add_fields_offset(struct pci_dev *dev, | |
22263 | + const struct config_field *field, | |
22264 | + unsigned int offset) | |
22265 | +{ | |
22266 | + int i, err = 0; | |
22267 | + for (i = 0; field[i].size != 0; i++) { | |
22268 | + err = pciback_config_add_field_offset(dev, &field[i], offset); | |
22269 | + if (err) | |
22270 | + break; | |
22271 | + } | |
22272 | + return err; | |
22273 | +} | |
22274 | + | |
22275 | +/* Read/Write the real configuration space */ | |
22276 | +int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value, | |
22277 | + void *data); | |
22278 | +int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value, | |
22279 | + void *data); | |
22280 | +int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value, | |
22281 | + void *data); | |
22282 | +int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value, | |
22283 | + void *data); | |
22284 | +int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value, | |
22285 | + void *data); | |
22286 | +int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value, | |
22287 | + void *data); | |
22288 | + | |
22289 | +int pciback_config_capability_init(void); | |
22290 | + | |
22291 | +int pciback_config_header_add_fields(struct pci_dev *dev); | |
22292 | +int pciback_config_capability_add_fields(struct pci_dev *dev); | |
22293 | + | |
22294 | +#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ | |
22295 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability.c | |
22296 | =================================================================== | |
22297 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22298 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability.c 2008-10-29 09:55:56.000000000 +0100 | |
22299 | @@ -0,0 +1,69 @@ | |
22300 | +/* | |
22301 | + * PCI Backend - Handles the virtual fields found on the capability lists | |
22302 | + * in the configuration space. | |
22303 | + * | |
22304 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
22305 | + */ | |
22306 | + | |
22307 | +#include <linux/kernel.h> | |
22308 | +#include <linux/pci.h> | |
22309 | +#include "pciback.h" | |
22310 | +#include "conf_space.h" | |
22311 | +#include "conf_space_capability.h" | |
22312 | + | |
22313 | +static LIST_HEAD(capabilities); | |
22314 | + | |
22315 | +static const struct config_field caplist_header[] = { | |
22316 | + { | |
22317 | + .offset = PCI_CAP_LIST_ID, | |
22318 | + .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ | |
22319 | + .u.w.read = pciback_read_config_word, | |
22320 | + .u.w.write = NULL, | |
22321 | + }, | |
22322 | + {} | |
22323 | +}; | |
22324 | + | |
22325 | +static inline void register_capability(struct pciback_config_capability *cap) | |
22326 | +{ | |
22327 | + list_add_tail(&cap->cap_list, &capabilities); | |
22328 | +} | |
22329 | + | |
22330 | +int pciback_config_capability_add_fields(struct pci_dev *dev) | |
22331 | +{ | |
22332 | + int err = 0; | |
22333 | + struct pciback_config_capability *cap; | |
22334 | + int cap_offset; | |
22335 | + | |
22336 | + list_for_each_entry(cap, &capabilities, cap_list) { | |
22337 | + cap_offset = pci_find_capability(dev, cap->capability); | |
22338 | + if (cap_offset) { | |
22339 | + dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", | |
22340 | + cap->capability, cap_offset); | |
22341 | + | |
22342 | + err = pciback_config_add_fields_offset(dev, | |
22343 | + caplist_header, | |
22344 | + cap_offset); | |
22345 | + if (err) | |
22346 | + goto out; | |
22347 | + err = pciback_config_add_fields_offset(dev, | |
22348 | + cap->fields, | |
22349 | + cap_offset); | |
22350 | + if (err) | |
22351 | + goto out; | |
22352 | + } | |
22353 | + } | |
22354 | + | |
22355 | + out: | |
22356 | + return err; | |
22357 | +} | |
22358 | + | |
22359 | +extern struct pciback_config_capability pciback_config_capability_vpd; | |
22360 | +extern struct pciback_config_capability pciback_config_capability_pm; | |
22361 | + | |
22362 | +int pciback_config_capability_init(void) | |
22363 | +{ | |
22364 | + register_capability(&pciback_config_capability_vpd); | |
22365 | + register_capability(&pciback_config_capability_pm); | |
22366 | + | |
22367 | + return 0; | |
22368 | +} | |
22369 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability.h | |
22370 | =================================================================== | |
22371 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22372 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability.h 2008-10-29 09:55:56.000000000 +0100 | |
22373 | @@ -0,0 +1,23 @@ | |
22374 | +/* | |
22375 | + * PCI Backend - Data structures for special overlays for structures on | |
22376 | + * the capability list. | |
22377 | + * | |
22378 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
22379 | + */ | |
22380 | + | |
22381 | +#ifndef __PCIBACK_CONFIG_CAPABILITY_H__ | |
22382 | +#define __PCIBACK_CONFIG_CAPABILITY_H__ | |
22383 | + | |
22384 | +#include <linux/pci.h> | |
22385 | +#include <linux/list.h> | |
22386 | + | |
22387 | +struct pciback_config_capability { | |
22388 | + struct list_head cap_list; | |
22389 | + | |
22390 | + int capability; | |
22391 | + | |
22392 | + /* If the device has the capability found above, add these fields */ | |
22393 | + const struct config_field *fields; | |
22394 | +}; | |
22395 | + | |
22396 | +#endif | |
22397 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_msi.c | |
22398 | =================================================================== | |
22399 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22400 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_msi.c 2008-09-15 13:40:15.000000000 +0200 | |
22401 | @@ -0,0 +1,79 @@ | |
22402 | +/* | |
22403 | + * PCI Backend -- Configuration overlay for MSI capability | |
22404 | + */ | |
22405 | +#include <linux/pci.h> | |
22406 | +#include <linux/slab.h> | |
22407 | +#include "conf_space.h" | |
22408 | +#include "conf_space_capability.h" | |
22409 | +#include <xen/interface/io/pciif.h> | |
22410 | +#include "pciback.h" | |
22411 | + | |
22412 | +int pciback_enable_msi(struct pciback_device *pdev, | |
22413 | + struct pci_dev *dev, struct xen_pci_op *op) | |
22414 | +{ | |
22415 | + int otherend = pdev->xdev->otherend_id; | |
22416 | + int status; | |
22417 | + | |
22418 | + status = pci_enable_msi(dev); | |
22419 | + | |
22420 | + if (status) { | |
22421 | + printk("error enable msi for guest %x status %x\n", otherend, status); | |
22422 | + op->value = 0; | |
22423 | + return XEN_PCI_ERR_op_failed; | |
22424 | + } | |
22425 | + | |
22426 | + op->value = dev->irq; | |
22427 | + return 0; | |
22428 | +} | |
22429 | + | |
22430 | +int pciback_disable_msi(struct pciback_device *pdev, | |
22431 | + struct pci_dev *dev, struct xen_pci_op *op) | |
22432 | +{ | |
22433 | + pci_disable_msi(dev); | |
22434 | + | |
22435 | + op->value = dev->irq; | |
22436 | + return 0; | |
22437 | +} | |
22438 | + | |
22439 | +int pciback_enable_msix(struct pciback_device *pdev, | |
22440 | + struct pci_dev *dev, struct xen_pci_op *op) | |
22441 | +{ | |
22442 | + int i, result; | |
22443 | + struct msix_entry *entries; | |
22444 | + | |
22445 | + if (op->value > SH_INFO_MAX_VEC) | |
22446 | + return -EINVAL; | |
22447 | + | |
22448 | + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); | |
22449 | + if (entries == NULL) | |
22450 | + return -ENOMEM; | |
22451 | + | |
22452 | + for (i = 0; i < op->value; i++) { | |
22453 | + entries[i].entry = op->msix_entries[i].entry; | |
22454 | + entries[i].vector = op->msix_entries[i].vector; | |
22455 | + } | |
22456 | + | |
22457 | + result = pci_enable_msix(dev, entries, op->value); | |
22458 | + | |
22459 | + for (i = 0; i < op->value; i++) { | |
22460 | + op->msix_entries[i].entry = entries[i].entry; | |
22461 | + op->msix_entries[i].vector = entries[i].vector; | |
22462 | + } | |
22463 | + | |
22464 | + kfree(entries); | |
22465 | + | |
22466 | + op->value = result; | |
22467 | + | |
22468 | + return result; | |
22469 | +} | |
22470 | + | |
22471 | +int pciback_disable_msix(struct pciback_device *pdev, | |
22472 | + struct pci_dev *dev, struct xen_pci_op *op) | |
22473 | +{ | |
22474 | + | |
22475 | + pci_disable_msix(dev); | |
22476 | + | |
22477 | + op->value = dev->irq; | |
22478 | + return 0; | |
22479 | +} | |
22480 | + | |
22481 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_pm.c | |
22482 | =================================================================== | |
22483 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22484 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_pm.c 2008-10-29 09:55:56.000000000 +0100 | |
22485 | @@ -0,0 +1,126 @@ | |
22486 | +/* | |
22487 | + * PCI Backend - Configuration space overlay for power management | |
22488 | + * | |
22489 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
22490 | + */ | |
22491 | + | |
22492 | +#include <linux/pci.h> | |
22493 | +#include "conf_space.h" | |
22494 | +#include "conf_space_capability.h" | |
22495 | + | |
22496 | +static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, | |
22497 | + void *data) | |
22498 | +{ | |
22499 | + int err; | |
22500 | + u16 real_value; | |
22501 | + | |
22502 | + err = pci_read_config_word(dev, offset, &real_value); | |
22503 | + if (err) | |
22504 | + goto out; | |
22505 | + | |
22506 | + *value = real_value & ~PCI_PM_CAP_PME_MASK; | |
22507 | + | |
22508 | + out: | |
22509 | + return err; | |
22510 | +} | |
22511 | + | |
22512 | +/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. | |
22513 | + * Can't allow driver domain to enable PMEs - they're shared */ | |
22514 | +#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) | |
22515 | + | |
22516 | +static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, | |
22517 | + void *data) | |
22518 | +{ | |
22519 | + int err; | |
22520 | + u16 old_value; | |
22521 | + pci_power_t new_state, old_state; | |
22522 | + | |
22523 | + err = pci_read_config_word(dev, offset, &old_value); | |
22524 | + if (err) | |
22525 | + goto out; | |
22526 | + | |
22527 | + old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); | |
22528 | + new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); | |
22529 | + | |
22530 | + new_value &= PM_OK_BITS; | |
22531 | + if ((old_value & PM_OK_BITS) != new_value) { | |
22532 | + new_value = (old_value & ~PM_OK_BITS) | new_value; | |
22533 | + err = pci_write_config_word(dev, offset, new_value); | |
22534 | + if (err) | |
22535 | + goto out; | |
22536 | + } | |
22537 | + | |
22538 | + /* Let pci core handle the power management change */ | |
22539 | + dev_dbg(&dev->dev, "set power state to %x\n", new_state); | |
22540 | + err = pci_set_power_state(dev, new_state); | |
22541 | + if (err) { | |
22542 | + err = PCIBIOS_SET_FAILED; | |
22543 | + goto out; | |
22544 | + } | |
22545 | + | |
22546 | + /* | |
22547 | + * Device may lose PCI config info on D3->D0 transition. This | |
22548 | + * is a problem for some guests which will not reset BARs. Even | |
22549 | + * those that have a go will be foiled by our BAR-write handler | |
22550 | + * which will discard the write! Since Linux won't re-init | |
22551 | + * the config space automatically in all cases, we do it here. | |
22552 | + * Future: Should we re-initialise all first 64 bytes of config space? | |
22553 | + */ | |
22554 | + if (new_state == PCI_D0 && | |
22555 | + (old_state == PCI_D3hot || old_state == PCI_D3cold) && | |
22556 | + !(old_value & PCI_PM_CTRL_NO_SOFT_RESET)) | |
22557 | + pci_restore_bars(dev); | |
22558 | + | |
22559 | + out: | |
22560 | + return err; | |
22561 | +} | |
22562 | + | |
22563 | +/* Ensure PMEs are disabled */ | |
22564 | +static void *pm_ctrl_init(struct pci_dev *dev, int offset) | |
22565 | +{ | |
22566 | + int err; | |
22567 | + u16 value; | |
22568 | + | |
22569 | + err = pci_read_config_word(dev, offset, &value); | |
22570 | + if (err) | |
22571 | + goto out; | |
22572 | + | |
22573 | + if (value & PCI_PM_CTRL_PME_ENABLE) { | |
22574 | + value &= ~PCI_PM_CTRL_PME_ENABLE; | |
22575 | + err = pci_write_config_word(dev, offset, value); | |
22576 | + } | |
22577 | + | |
22578 | + out: | |
22579 | + return ERR_PTR(err); | |
22580 | +} | |
22581 | + | |
22582 | +static const struct config_field caplist_pm[] = { | |
22583 | + { | |
22584 | + .offset = PCI_PM_PMC, | |
22585 | + .size = 2, | |
22586 | + .u.w.read = pm_caps_read, | |
22587 | + }, | |
22588 | + { | |
22589 | + .offset = PCI_PM_CTRL, | |
22590 | + .size = 2, | |
22591 | + .init = pm_ctrl_init, | |
22592 | + .u.w.read = pciback_read_config_word, | |
22593 | + .u.w.write = pm_ctrl_write, | |
22594 | + }, | |
22595 | + { | |
22596 | + .offset = PCI_PM_PPB_EXTENSIONS, | |
22597 | + .size = 1, | |
22598 | + .u.b.read = pciback_read_config_byte, | |
22599 | + }, | |
22600 | + { | |
22601 | + .offset = PCI_PM_DATA_REGISTER, | |
22602 | + .size = 1, | |
22603 | + .u.b.read = pciback_read_config_byte, | |
22604 | + }, | |
22605 | + {} | |
22606 | +}; | |
22607 | + | |
22608 | +struct pciback_config_capability pciback_config_capability_pm = { | |
22609 | + .capability = PCI_CAP_ID_PM, | |
22610 | + .fields = caplist_pm, | |
22611 | +}; | |
22612 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_capability_vpd.c | |
22613 | =================================================================== | |
22614 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22615 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_capability_vpd.c 2008-10-29 09:55:56.000000000 +0100 | |
22616 | @@ -0,0 +1,40 @@ | |
22617 | +/* | |
22618 | + * PCI Backend - Configuration space overlay for Vital Product Data | |
22619 | + * | |
22620 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
22621 | + */ | |
22622 | + | |
22623 | +#include <linux/pci.h> | |
22624 | +#include "conf_space.h" | |
22625 | +#include "conf_space_capability.h" | |
22626 | + | |
22627 | +static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, | |
22628 | + void *data) | |
22629 | +{ | |
22630 | + /* Disallow writes to the vital product data */ | |
22631 | + if (value & PCI_VPD_ADDR_F) | |
22632 | + return PCIBIOS_SET_FAILED; | |
22633 | + else | |
22634 | + return pci_write_config_word(dev, offset, value); | |
22635 | +} | |
22636 | + | |
22637 | +static const struct config_field caplist_vpd[] = { | |
22638 | + { | |
22639 | + .offset = PCI_VPD_ADDR, | |
22640 | + .size = 2, | |
22641 | + .u.w.read = pciback_read_config_word, | |
22642 | + .u.w.write = vpd_address_write, | |
22643 | + }, | |
22644 | + { | |
22645 | + .offset = PCI_VPD_DATA, | |
22646 | + .size = 4, | |
22647 | + .u.dw.read = pciback_read_config_dword, | |
22648 | + .u.dw.write = NULL, | |
22649 | + }, | |
22650 | + {} | |
22651 | +}; | |
22652 | + | |
22653 | +struct pciback_config_capability pciback_config_capability_vpd = { | |
22654 | + .capability = PCI_CAP_ID_VPD, | |
22655 | + .fields = caplist_vpd, | |
22656 | +}; | |
22657 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_header.c | |
22658 | =================================================================== | |
22659 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22660 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_header.c 2008-10-29 09:55:56.000000000 +0100 | |
22661 | @@ -0,0 +1,317 @@ | |
22662 | +/* | |
22663 | + * PCI Backend - Handles the virtual fields in the configuration space headers. | |
22664 | + * | |
22665 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
22666 | + */ | |
22667 | + | |
22668 | +#include <linux/kernel.h> | |
22669 | +#include <linux/pci.h> | |
22670 | +#include "pciback.h" | |
22671 | +#include "conf_space.h" | |
22672 | + | |
22673 | +struct pci_bar_info { | |
22674 | + u32 val; | |
22675 | + u32 len_val; | |
22676 | + int which; | |
22677 | +}; | |
22678 | + | |
22679 | +#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) | |
22680 | +#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) | |
22681 | + | |
22682 | +static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) | |
22683 | +{ | |
22684 | + int err; | |
22685 | + | |
22686 | + if (!dev->is_enabled && is_enable_cmd(value)) { | |
22687 | + if (unlikely(verbose_request)) | |
22688 | + printk(KERN_DEBUG "pciback: %s: enable\n", | |
22689 | + pci_name(dev)); | |
22690 | + err = pci_enable_device(dev); | |
22691 | + if (err) | |
22692 | + return err; | |
22693 | + } else if (dev->is_enabled && !is_enable_cmd(value)) { | |
22694 | + if (unlikely(verbose_request)) | |
22695 | + printk(KERN_DEBUG "pciback: %s: disable\n", | |
22696 | + pci_name(dev)); | |
22697 | + pci_disable_device(dev); | |
22698 | + } | |
22699 | + | |
22700 | + if (!dev->is_busmaster && is_master_cmd(value)) { | |
22701 | + if (unlikely(verbose_request)) | |
22702 | + printk(KERN_DEBUG "pciback: %s: set bus master\n", | |
22703 | + pci_name(dev)); | |
22704 | + pci_set_master(dev); | |
22705 | + } | |
22706 | + | |
22707 | + if (value & PCI_COMMAND_INVALIDATE) { | |
22708 | + if (unlikely(verbose_request)) | |
22709 | + printk(KERN_DEBUG | |
22710 | + "pciback: %s: enable memory-write-invalidate\n", | |
22711 | + pci_name(dev)); | |
22712 | + err = pci_set_mwi(dev); | |
22713 | + if (err) { | |
22714 | + printk(KERN_WARNING | |
22715 | + "pciback: %s: cannot enable memory-write-invalidate (%d)\n", | |
22716 | + pci_name(dev), err); | |
22717 | + value &= ~PCI_COMMAND_INVALIDATE; | |
22718 | + } | |
22719 | + } | |
22720 | + | |
22721 | + return pci_write_config_word(dev, offset, value); | |
22722 | +} | |
22723 | + | |
22724 | +static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) | |
22725 | +{ | |
22726 | + struct pci_bar_info *bar = data; | |
22727 | + | |
22728 | + if (unlikely(!bar)) { | |
22729 | + printk(KERN_WARNING "pciback: driver data not found for %s\n", | |
22730 | + pci_name(dev)); | |
22731 | + return XEN_PCI_ERR_op_failed; | |
22732 | + } | |
22733 | + | |
22734 | + /* A write to obtain the length must happen as a 32-bit write. | |
22735 | + * This does not (yet) support writing individual bytes | |
22736 | + */ | |
22737 | + if (value == ~PCI_ROM_ADDRESS_ENABLE) | |
22738 | + bar->which = 1; | |
22739 | + else { | |
22740 | + u32 tmpval; | |
22741 | + pci_read_config_dword(dev, offset, &tmpval); | |
22742 | + if (tmpval != bar->val && value == bar->val) { | |
22743 | + /* Allow restoration of bar value. */ | |
22744 | + pci_write_config_dword(dev, offset, bar->val); | |
22745 | + } | |
22746 | + bar->which = 0; | |
22747 | + } | |
22748 | + | |
22749 | + /* Do we need to support enabling/disabling the rom address here? */ | |
22750 | + | |
22751 | + return 0; | |
22752 | +} | |
22753 | + | |
22754 | +/* For the BARs, only allow writes which write ~0 or | |
22755 | + * the correct resource information | |
22756 | + * (Needed for when the driver probes the resource usage) | |
22757 | + */ | |
22758 | +static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) | |
22759 | +{ | |
22760 | + struct pci_bar_info *bar = data; | |
22761 | + | |
22762 | + if (unlikely(!bar)) { | |
22763 | + printk(KERN_WARNING "pciback: driver data not found for %s\n", | |
22764 | + pci_name(dev)); | |
22765 | + return XEN_PCI_ERR_op_failed; | |
22766 | + } | |
22767 | + | |
22768 | + /* A write to obtain the length must happen as a 32-bit write. | |
22769 | + * This does not (yet) support writing individual bytes | |
22770 | + */ | |
22771 | + if (value == ~0) | |
22772 | + bar->which = 1; | |
22773 | + else { | |
22774 | + u32 tmpval; | |
22775 | + pci_read_config_dword(dev, offset, &tmpval); | |
22776 | + if (tmpval != bar->val && value == bar->val) { | |
22777 | + /* Allow restoration of bar value. */ | |
22778 | + pci_write_config_dword(dev, offset, bar->val); | |
22779 | + } | |
22780 | + bar->which = 0; | |
22781 | + } | |
22782 | + | |
22783 | + return 0; | |
22784 | +} | |
22785 | + | |
22786 | +static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) | |
22787 | +{ | |
22788 | + struct pci_bar_info *bar = data; | |
22789 | + | |
22790 | + if (unlikely(!bar)) { | |
22791 | + printk(KERN_WARNING "pciback: driver data not found for %s\n", | |
22792 | + pci_name(dev)); | |
22793 | + return XEN_PCI_ERR_op_failed; | |
22794 | + } | |
22795 | + | |
22796 | + *value = bar->which ? bar->len_val : bar->val; | |
22797 | + | |
22798 | + return 0; | |
22799 | +} | |
22800 | + | |
22801 | +static inline void read_dev_bar(struct pci_dev *dev, | |
22802 | + struct pci_bar_info *bar_info, int offset, | |
22803 | + u32 len_mask) | |
22804 | +{ | |
22805 | + pci_read_config_dword(dev, offset, &bar_info->val); | |
22806 | + pci_write_config_dword(dev, offset, len_mask); | |
22807 | + pci_read_config_dword(dev, offset, &bar_info->len_val); | |
22808 | + pci_write_config_dword(dev, offset, bar_info->val); | |
22809 | +} | |
22810 | + | |
22811 | +static void *bar_init(struct pci_dev *dev, int offset) | |
22812 | +{ | |
22813 | + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); | |
22814 | + | |
22815 | + if (!bar) | |
22816 | + return ERR_PTR(-ENOMEM); | |
22817 | + | |
22818 | + read_dev_bar(dev, bar, offset, ~0); | |
22819 | + bar->which = 0; | |
22820 | + | |
22821 | + return bar; | |
22822 | +} | |
22823 | + | |
22824 | +static void *rom_init(struct pci_dev *dev, int offset) | |
22825 | +{ | |
22826 | + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); | |
22827 | + | |
22828 | + if (!bar) | |
22829 | + return ERR_PTR(-ENOMEM); | |
22830 | + | |
22831 | + read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); | |
22832 | + bar->which = 0; | |
22833 | + | |
22834 | + return bar; | |
22835 | +} | |
22836 | + | |
22837 | +static void bar_reset(struct pci_dev *dev, int offset, void *data) | |
22838 | +{ | |
22839 | + struct pci_bar_info *bar = data; | |
22840 | + | |
22841 | + bar->which = 0; | |
22842 | +} | |
22843 | + | |
22844 | +static void bar_release(struct pci_dev *dev, int offset, void *data) | |
22845 | +{ | |
22846 | + kfree(data); | |
22847 | +} | |
22848 | + | |
22849 | +static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, | |
22850 | + void *data) | |
22851 | +{ | |
22852 | + *value = (u8) dev->irq; | |
22853 | + | |
22854 | + return 0; | |
22855 | +} | |
22856 | + | |
22857 | +static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) | |
22858 | +{ | |
22859 | + u8 cur_value; | |
22860 | + int err; | |
22861 | + | |
22862 | + err = pci_read_config_byte(dev, offset, &cur_value); | |
22863 | + if (err) | |
22864 | + goto out; | |
22865 | + | |
22866 | + if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START) | |
22867 | + || value == PCI_BIST_START) | |
22868 | + err = pci_write_config_byte(dev, offset, value); | |
22869 | + | |
22870 | + out: | |
22871 | + return err; | |
22872 | +} | |
22873 | + | |
22874 | +static const struct config_field header_common[] = { | |
22875 | + { | |
22876 | + .offset = PCI_COMMAND, | |
22877 | + .size = 2, | |
22878 | + .u.w.read = pciback_read_config_word, | |
22879 | + .u.w.write = command_write, | |
22880 | + }, | |
22881 | + { | |
22882 | + .offset = PCI_INTERRUPT_LINE, | |
22883 | + .size = 1, | |
22884 | + .u.b.read = interrupt_read, | |
22885 | + }, | |
22886 | + { | |
22887 | + .offset = PCI_INTERRUPT_PIN, | |
22888 | + .size = 1, | |
22889 | + .u.b.read = pciback_read_config_byte, | |
22890 | + }, | |
22891 | + { | |
22892 | + /* Any side effects of letting driver domain control cache line? */ | |
22893 | + .offset = PCI_CACHE_LINE_SIZE, | |
22894 | + .size = 1, | |
22895 | + .u.b.read = pciback_read_config_byte, | |
22896 | + .u.b.write = pciback_write_config_byte, | |
22897 | + }, | |
22898 | + { | |
22899 | + .offset = PCI_LATENCY_TIMER, | |
22900 | + .size = 1, | |
22901 | + .u.b.read = pciback_read_config_byte, | |
22902 | + }, | |
22903 | + { | |
22904 | + .offset = PCI_BIST, | |
22905 | + .size = 1, | |
22906 | + .u.b.read = pciback_read_config_byte, | |
22907 | + .u.b.write = bist_write, | |
22908 | + }, | |
22909 | + {} | |
22910 | +}; | |
22911 | + | |
22912 | +#define CFG_FIELD_BAR(reg_offset) \ | |
22913 | + { \ | |
22914 | + .offset = reg_offset, \ | |
22915 | + .size = 4, \ | |
22916 | + .init = bar_init, \ | |
22917 | + .reset = bar_reset, \ | |
22918 | + .release = bar_release, \ | |
22919 | + .u.dw.read = bar_read, \ | |
22920 | + .u.dw.write = bar_write, \ | |
22921 | + } | |
22922 | + | |
22923 | +#define CFG_FIELD_ROM(reg_offset) \ | |
22924 | + { \ | |
22925 | + .offset = reg_offset, \ | |
22926 | + .size = 4, \ | |
22927 | + .init = rom_init, \ | |
22928 | + .reset = bar_reset, \ | |
22929 | + .release = bar_release, \ | |
22930 | + .u.dw.read = bar_read, \ | |
22931 | + .u.dw.write = rom_write, \ | |
22932 | + } | |
22933 | + | |
22934 | +static const struct config_field header_0[] = { | |
22935 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), | |
22936 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), | |
22937 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_2), | |
22938 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_3), | |
22939 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_4), | |
22940 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_5), | |
22941 | + CFG_FIELD_ROM(PCI_ROM_ADDRESS), | |
22942 | + {} | |
22943 | +}; | |
22944 | + | |
22945 | +static const struct config_field header_1[] = { | |
22946 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), | |
22947 | + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), | |
22948 | + CFG_FIELD_ROM(PCI_ROM_ADDRESS1), | |
22949 | + {} | |
22950 | +}; | |
22951 | + | |
22952 | +int pciback_config_header_add_fields(struct pci_dev *dev) | |
22953 | +{ | |
22954 | + int err; | |
22955 | + | |
22956 | + err = pciback_config_add_fields(dev, header_common); | |
22957 | + if (err) | |
22958 | + goto out; | |
22959 | + | |
22960 | + switch (dev->hdr_type) { | |
22961 | + case PCI_HEADER_TYPE_NORMAL: | |
22962 | + err = pciback_config_add_fields(dev, header_0); | |
22963 | + break; | |
22964 | + | |
22965 | + case PCI_HEADER_TYPE_BRIDGE: | |
22966 | + err = pciback_config_add_fields(dev, header_1); | |
22967 | + break; | |
22968 | + | |
22969 | + default: | |
22970 | + err = -EINVAL; | |
22971 | + printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n", | |
22972 | + pci_name(dev), dev->hdr_type); | |
22973 | + break; | |
22974 | + } | |
22975 | + | |
22976 | + out: | |
22977 | + return err; | |
22978 | +} | |
22979 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_quirks.c | |
22980 | =================================================================== | |
22981 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
22982 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_quirks.c 2007-06-12 13:13:45.000000000 +0200 | |
22983 | @@ -0,0 +1,126 @@ | |
22984 | +/* | |
22985 | + * PCI Backend - Handle special overlays for broken devices. | |
22986 | + * | |
22987 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
22988 | + * Author: Chris Bookholt <hap10@epoch.ncsc.mil> | |
22989 | + */ | |
22990 | + | |
22991 | +#include <linux/kernel.h> | |
22992 | +#include <linux/pci.h> | |
22993 | +#include "pciback.h" | |
22994 | +#include "conf_space.h" | |
22995 | +#include "conf_space_quirks.h" | |
22996 | + | |
22997 | +LIST_HEAD(pciback_quirks); | |
22998 | + | |
22999 | +struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev) | |
23000 | +{ | |
23001 | + struct pciback_config_quirk *tmp_quirk; | |
23002 | + | |
23003 | + list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list) | |
23004 | + if (pci_match_id(&tmp_quirk->devid, dev)) | |
23005 | + goto out; | |
23006 | + tmp_quirk = NULL; | |
23007 | + printk(KERN_DEBUG | |
23008 | + "quirk didn't match any device pciback knows about\n"); | |
23009 | + out: | |
23010 | + return tmp_quirk; | |
23011 | +} | |
23012 | + | |
23013 | +static inline void register_quirk(struct pciback_config_quirk *quirk) | |
23014 | +{ | |
23015 | + list_add_tail(&quirk->quirks_list, &pciback_quirks); | |
23016 | +} | |
23017 | + | |
23018 | +int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg) | |
23019 | +{ | |
23020 | + int ret = 0; | |
23021 | + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); | |
23022 | + struct config_field_entry *cfg_entry; | |
23023 | + | |
23024 | + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | |
23025 | + if ( OFFSET(cfg_entry) == reg) { | |
23026 | + ret = 1; | |
23027 | + break; | |
23028 | + } | |
23029 | + } | |
23030 | + return ret; | |
23031 | +} | |
23032 | + | |
23033 | +int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field | |
23034 | + *field) | |
23035 | +{ | |
23036 | + int err = 0; | |
23037 | + | |
23038 | + switch (field->size) { | |
23039 | + case 1: | |
23040 | + field->u.b.read = pciback_read_config_byte; | |
23041 | + field->u.b.write = pciback_write_config_byte; | |
23042 | + break; | |
23043 | + case 2: | |
23044 | + field->u.w.read = pciback_read_config_word; | |
23045 | + field->u.w.write = pciback_write_config_word; | |
23046 | + break; | |
23047 | + case 4: | |
23048 | + field->u.dw.read = pciback_read_config_dword; | |
23049 | + field->u.dw.write = pciback_write_config_dword; | |
23050 | + break; | |
23051 | + default: | |
23052 | + err = -EINVAL; | |
23053 | + goto out; | |
23054 | + } | |
23055 | + | |
23056 | + pciback_config_add_field(dev, field); | |
23057 | + | |
23058 | + out: | |
23059 | + return err; | |
23060 | +} | |
23061 | + | |
23062 | +int pciback_config_quirks_init(struct pci_dev *dev) | |
23063 | +{ | |
23064 | + struct pciback_config_quirk *quirk; | |
23065 | + int ret = 0; | |
23066 | + | |
23067 | + quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); | |
23068 | + if (!quirk) { | |
23069 | + ret = -ENOMEM; | |
23070 | + goto out; | |
23071 | + } | |
23072 | + | |
23073 | + quirk->devid.vendor = dev->vendor; | |
23074 | + quirk->devid.device = dev->device; | |
23075 | + quirk->devid.subvendor = dev->subsystem_vendor; | |
23076 | + quirk->devid.subdevice = dev->subsystem_device; | |
23077 | + quirk->devid.class = 0; | |
23078 | + quirk->devid.class_mask = 0; | |
23079 | + quirk->devid.driver_data = 0UL; | |
23080 | + | |
23081 | + quirk->pdev = dev; | |
23082 | + | |
23083 | + register_quirk(quirk); | |
23084 | + out: | |
23085 | + return ret; | |
23086 | +} | |
23087 | + | |
23088 | +void pciback_config_field_free(struct config_field *field) | |
23089 | +{ | |
23090 | + kfree(field); | |
23091 | +} | |
23092 | + | |
23093 | +int pciback_config_quirk_release(struct pci_dev *dev) | |
23094 | +{ | |
23095 | + struct pciback_config_quirk *quirk; | |
23096 | + int ret = 0; | |
23097 | + | |
23098 | + quirk = pciback_find_quirk(dev); | |
23099 | + if (!quirk) { | |
23100 | + ret = -ENXIO; | |
23101 | + goto out; | |
23102 | + } | |
23103 | + | |
23104 | + list_del(&quirk->quirks_list); | |
23105 | + kfree(quirk); | |
23106 | + | |
23107 | + out: | |
23108 | + return ret; | |
23109 | +} | |
23110 | Index: head-2008-11-25/drivers/xen/pciback/conf_space_quirks.h | |
23111 | =================================================================== | |
23112 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
23113 | +++ head-2008-11-25/drivers/xen/pciback/conf_space_quirks.h 2007-06-12 13:13:45.000000000 +0200 | |
23114 | @@ -0,0 +1,35 @@ | |
23115 | +/* | |
23116 | + * PCI Backend - Data structures for special overlays for broken devices. | |
23117 | + * | |
23118 | + * Ryan Wilson <hap9@epoch.ncsc.mil> | |
23119 | + * Chris Bookholt <hap10@epoch.ncsc.mil> | |
23120 | + */ | |
23121 | + | |
23122 | +#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ | |
23123 | +#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ | |
23124 | + | |
23125 | +#include <linux/pci.h> | |
23126 | +#include <linux/list.h> | |
23127 | + | |
23128 | +struct pciback_config_quirk { | |
23129 | + struct list_head quirks_list; | |
23130 | + struct pci_device_id devid; | |
23131 | + struct pci_dev *pdev; | |
23132 | +}; | |
23133 | + | |
23134 | +struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev); | |
23135 | + | |
23136 | +int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field | |
23137 | + *field); | |
23138 | + | |
23139 | +int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg); | |
23140 | + | |
23141 | +int pciback_config_quirks_init(struct pci_dev *dev); | |
23142 | + | |
23143 | +void pciback_config_field_free(struct config_field *field); | |
23144 | + | |
23145 | +int pciback_config_quirk_release(struct pci_dev *dev); | |
23146 | + | |
23147 | +int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg); | |
23148 | + | |
23149 | +#endif | |
23150 | Index: head-2008-11-25/drivers/xen/pciback/controller.c | |
23151 | =================================================================== | |
23152 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
23153 | +++ head-2008-11-25/drivers/xen/pciback/controller.c 2008-02-26 10:54:11.000000000 +0100 | |
23154 | @@ -0,0 +1,408 @@ | |
23155 | +/* | |
23156 | + * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. | |
23157 | + * Alex Williamson <alex.williamson@hp.com> | |
23158 | + * | |
23159 | + * PCI "Controller" Backend - virtualize PCI bus topology based on PCI | |
23160 | + * controllers. Devices under the same PCI controller are exposed on the | |
23161 | + * same virtual domain:bus. Within a bus, device slots are virtualized | |
23162 | + * to compact the bus. | |
23163 | + * | |
23164 | + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
23165 | + * This program is free software; you can redistribute it and/or modify | |
23166 | + * it under the terms of the GNU General Public License as published by | |
23167 | + * the Free Software Foundation; either version 2 of the License, or | |
23168 | + * (at your option) any later version. | |
23169 | + * | |
23170 | + * This program is distributed in the hope that it will be useful, | |
23171 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23172 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23173 | + * GNU General Public License for more details. | |
23174 | + * | |
23175 | + * You should have received a copy of the GNU General Public License | |
23176 | + * along with this program; if not, write to the Free Software | |
23177 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
23178 | + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
23179 | + */ | |
23180 | + | |
23181 | +#include <linux/acpi.h> | |
23182 | +#include <linux/list.h> | |
23183 | +#include <linux/pci.h> | |
23184 | +#include <linux/spinlock.h> | |
23185 | +#include "pciback.h" | |
23186 | + | |
23187 | +#define PCI_MAX_BUSSES 255 | |
23188 | +#define PCI_MAX_SLOTS 32 | |
23189 | + | |
23190 | +struct controller_dev_entry { | |
23191 | + struct list_head list; | |
23192 | + struct pci_dev *dev; | |
23193 | + unsigned int devfn; | |
23194 | +}; | |
23195 | + | |
23196 | +struct controller_list_entry { | |
23197 | + struct list_head list; | |
23198 | + struct pci_controller *controller; | |
23199 | + unsigned int domain; | |
23200 | + unsigned int bus; | |
23201 | + unsigned int next_devfn; | |
23202 | + struct list_head dev_list; | |
23203 | +}; | |
23204 | + | |
23205 | +struct controller_dev_data { | |
23206 | + struct list_head list; | |
23207 | + unsigned int next_domain; | |
23208 | + unsigned int next_bus; | |
23209 | + spinlock_t lock; | |
23210 | +}; | |
23211 | + | |
23212 | +struct walk_info { | |
23213 | + struct pciback_device *pdev; | |
23214 | + int resource_count; | |
23215 | + int root_num; | |
23216 | +}; | |
23217 | + | |
23218 | +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, | |
23219 | + unsigned int domain, unsigned int bus, | |
23220 | + unsigned int devfn) | |
23221 | +{ | |
23222 | + struct controller_dev_data *dev_data = pdev->pci_dev_data; | |
23223 | + struct controller_dev_entry *dev_entry; | |
23224 | + struct controller_list_entry *cntrl_entry; | |
23225 | + struct pci_dev *dev = NULL; | |
23226 | + unsigned long flags; | |
23227 | + | |
23228 | + spin_lock_irqsave(&dev_data->lock, flags); | |
23229 | + | |
23230 | + list_for_each_entry(cntrl_entry, &dev_data->list, list) { | |
23231 | + if (cntrl_entry->domain != domain || | |
23232 | + cntrl_entry->bus != bus) | |
23233 | + continue; | |
23234 | + | |
23235 | + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { | |
23236 | + if (devfn == dev_entry->devfn) { | |
23237 | + dev = dev_entry->dev; | |
23238 | + goto found; | |
23239 | + } | |
23240 | + } | |
23241 | + } | |
23242 | +found: | |
23243 | + spin_unlock_irqrestore(&dev_data->lock, flags); | |
23244 | + | |
23245 | + return dev; | |
23246 | +} | |
23247 | + | |
23248 | +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, | |
23249 | + int devid, publish_pci_dev_cb publish_cb) | |
23250 | +{ | |
23251 | + struct controller_dev_data *dev_data = pdev->pci_dev_data; | |
23252 | + struct controller_dev_entry *dev_entry; | |
23253 | + struct controller_list_entry *cntrl_entry; | |
23254 | + struct pci_controller *dev_controller = PCI_CONTROLLER(dev); | |
23255 | + unsigned long flags; | |
23256 | + int ret = 0, found = 0; | |
23257 | + | |
23258 | + spin_lock_irqsave(&dev_data->lock, flags); | |
23259 | + | |
23260 | + /* Look to see if we already have a domain:bus for this controller */ | |
23261 | + list_for_each_entry(cntrl_entry, &dev_data->list, list) { | |
23262 | + if (cntrl_entry->controller == dev_controller) { | |
23263 | + found = 1; | |
23264 | + break; | |
23265 | + } | |
23266 | + } | |
23267 | + | |
23268 | + if (!found) { | |
23269 | + cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC); | |
23270 | + if (!cntrl_entry) { | |
23271 | + ret = -ENOMEM; | |
23272 | + goto out; | |
23273 | + } | |
23274 | + | |
23275 | + cntrl_entry->controller = dev_controller; | |
23276 | + cntrl_entry->next_devfn = PCI_DEVFN(0, 0); | |
23277 | + | |
23278 | + cntrl_entry->domain = dev_data->next_domain; | |
23279 | + cntrl_entry->bus = dev_data->next_bus++; | |
23280 | + if (dev_data->next_bus > PCI_MAX_BUSSES) { | |
23281 | + dev_data->next_domain++; | |
23282 | + dev_data->next_bus = 0; | |
23283 | + } | |
23284 | + | |
23285 | + INIT_LIST_HEAD(&cntrl_entry->dev_list); | |
23286 | + | |
23287 | + list_add_tail(&cntrl_entry->list, &dev_data->list); | |
23288 | + } | |
23289 | + | |
23290 | + if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) { | |
23291 | + /* | |
23292 | + * While it seems unlikely, this can actually happen if | |
23293 | + * a controller has P2P bridges under it. | |
23294 | + */ | |
23295 | + xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x " | |
23296 | + "is full, no room to export %04x:%02x:%02x.%x", | |
23297 | + cntrl_entry->domain, cntrl_entry->bus, | |
23298 | + pci_domain_nr(dev->bus), dev->bus->number, | |
23299 | + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); | |
23300 | + ret = -ENOSPC; | |
23301 | + goto out; | |
23302 | + } | |
23303 | + | |
23304 | + dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC); | |
23305 | + if (!dev_entry) { | |
23306 | + if (list_empty(&cntrl_entry->dev_list)) { | |
23307 | + list_del(&cntrl_entry->list); | |
23308 | + kfree(cntrl_entry); | |
23309 | + } | |
23310 | + ret = -ENOMEM; | |
23311 | + goto out; | |
23312 | + } | |
23313 | + | |
23314 | + dev_entry->dev = dev; | |
23315 | + dev_entry->devfn = cntrl_entry->next_devfn; | |
23316 | + | |
23317 | + list_add_tail(&dev_entry->list, &cntrl_entry->dev_list); | |
23318 | + | |
23319 | + cntrl_entry->next_devfn += PCI_DEVFN(1, 0); | |
23320 | + | |
23321 | +out: | |
23322 | + spin_unlock_irqrestore(&dev_data->lock, flags); | |
23323 | + | |
23324 | + /* TODO: Publish virtual domain:bus:slot.func here. */ | |
23325 | + | |
23326 | + return ret; | |
23327 | +} | |
23328 | + | |
23329 | +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) | |
23330 | +{ | |
23331 | + struct controller_dev_data *dev_data = pdev->pci_dev_data; | |
23332 | + struct controller_list_entry *cntrl_entry; | |
23333 | + struct controller_dev_entry *dev_entry = NULL; | |
23334 | + struct pci_dev *found_dev = NULL; | |
23335 | + unsigned long flags; | |
23336 | + | |
23337 | + spin_lock_irqsave(&dev_data->lock, flags); | |
23338 | + | |
23339 | + list_for_each_entry(cntrl_entry, &dev_data->list, list) { | |
23340 | + if (cntrl_entry->controller != PCI_CONTROLLER(dev)) | |
23341 | + continue; | |
23342 | + | |
23343 | + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { | |
23344 | + if (dev_entry->dev == dev) { | |
23345 | + found_dev = dev_entry->dev; | |
23346 | + break; | |
23347 | + } | |
23348 | + } | |
23349 | + } | |
23350 | + | |
23351 | + if (!found_dev) { | |
23352 | + spin_unlock_irqrestore(&dev_data->lock, flags); | |
23353 | + return; | |
23354 | + } | |
23355 | + | |
23356 | + list_del(&dev_entry->list); | |
23357 | + kfree(dev_entry); | |
23358 | + | |
23359 | + if (list_empty(&cntrl_entry->dev_list)) { | |
23360 | + list_del(&cntrl_entry->list); | |
23361 | + kfree(cntrl_entry); | |
23362 | + } | |
23363 | + | |
23364 | + spin_unlock_irqrestore(&dev_data->lock, flags); | |
23365 | + pcistub_put_pci_dev(found_dev); | |
23366 | +} | |
23367 | + | |
23368 | +int pciback_init_devices(struct pciback_device *pdev) | |
23369 | +{ | |
23370 | + struct controller_dev_data *dev_data; | |
23371 | + | |
23372 | + dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); | |
23373 | + if (!dev_data) | |
23374 | + return -ENOMEM; | |
23375 | + | |
23376 | + spin_lock_init(&dev_data->lock); | |
23377 | + | |
23378 | + INIT_LIST_HEAD(&dev_data->list); | |
23379 | + | |
23380 | + /* Starting domain:bus numbers */ | |
23381 | + dev_data->next_domain = 0; | |
23382 | + dev_data->next_bus = 0; | |
23383 | + | |
23384 | + pdev->pci_dev_data = dev_data; | |
23385 | + | |
23386 | + return 0; | |
23387 | +} | |
23388 | + | |
23389 | +static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data) | |
23390 | +{ | |
23391 | + struct walk_info *info = data; | |
23392 | + struct acpi_resource_address64 addr; | |
23393 | + acpi_status status; | |
23394 | + int i, len, err; | |
23395 | + char str[32], tmp[3]; | |
23396 | + unsigned char *ptr, *buf; | |
23397 | + | |
23398 | + status = acpi_resource_to_address64(res, &addr); | |
23399 | + | |
23400 | + /* Do we care about this range? Let's check. */ | |
23401 | + if (!ACPI_SUCCESS(status) || | |
23402 | + !(addr.resource_type == ACPI_MEMORY_RANGE || | |
23403 | + addr.resource_type == ACPI_IO_RANGE) || | |
23404 | + !addr.address_length || addr.producer_consumer != ACPI_PRODUCER) | |
23405 | + return AE_OK; | |
23406 | + | |
23407 | + /* | |
23408 | + * Furthermore, we really only care to tell the guest about | |
23409 | + * address ranges that require address translation of some sort. | |
23410 | + */ | |
23411 | + if (!(addr.resource_type == ACPI_MEMORY_RANGE && | |
23412 | + addr.info.mem.translation) && | |
23413 | + !(addr.resource_type == ACPI_IO_RANGE && | |
23414 | + addr.info.io.translation)) | |
23415 | + return AE_OK; | |
23416 | + | |
23417 | + /* Store the resource in xenbus for the guest */ | |
23418 | + len = snprintf(str, sizeof(str), "root-%d-resource-%d", | |
23419 | + info->root_num, info->resource_count); | |
23420 | + if (unlikely(len >= (sizeof(str) - 1))) | |
23421 | + return AE_OK; | |
23422 | + | |
23423 | + buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL); | |
23424 | + if (!buf) | |
23425 | + return AE_OK; | |
23426 | + | |
23427 | + /* Clean out resource_source */ | |
23428 | + res->data.address64.resource_source.index = 0xFF; | |
23429 | + res->data.address64.resource_source.string_length = 0; | |
23430 | + res->data.address64.resource_source.string_ptr = NULL; | |
23431 | + | |
23432 | + ptr = (unsigned char *)res; | |
23433 | + | |
23434 | + /* Turn the acpi_resource into an ASCII byte stream */ | |
23435 | + for (i = 0; i < sizeof(*res); i++) { | |
23436 | + snprintf(tmp, sizeof(tmp), "%02x", ptr[i]); | |
23437 | + strncat(buf, tmp, 2); | |
23438 | + } | |
23439 | + | |
23440 | + err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename, | |
23441 | + str, "%s", buf); | |
23442 | + | |
23443 | + if (!err) | |
23444 | + info->resource_count++; | |
23445 | + | |
23446 | + kfree(buf); | |
23447 | + | |
23448 | + return AE_OK; | |
23449 | +} | |
23450 | + | |
23451 | +int pciback_publish_pci_roots(struct pciback_device *pdev, | |
23452 | + publish_pci_root_cb publish_root_cb) | |
23453 | +{ | |
23454 | + struct controller_dev_data *dev_data = pdev->pci_dev_data; | |
23455 | + struct controller_list_entry *cntrl_entry; | |
23456 | + int i, root_num, len, err = 0; | |
23457 | + unsigned int domain, bus; | |
23458 | + char str[64]; | |
23459 | + struct walk_info info; | |
23460 | + | |
23461 | + spin_lock(&dev_data->lock); | |
23462 | + | |
23463 | + list_for_each_entry(cntrl_entry, &dev_data->list, list) { | |
23464 | + /* First publish all the domain:bus info */ | |
23465 | + err = publish_root_cb(pdev, cntrl_entry->domain, | |
23466 | + cntrl_entry->bus); | |
23467 | + if (err) | |
23468 | + goto out; | |
23469 | + | |
23470 | + /* | |
23471 | + * Now figure out which root-%d this belongs to | |
23472 | + * so we can associate resources with it. | |
23473 | + */ | |
23474 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | |
23475 | + "root_num", "%d", &root_num); | |
23476 | + | |
23477 | + if (err != 1) | |
23478 | + goto out; | |
23479 | + | |
23480 | + for (i = 0; i < root_num; i++) { | |
23481 | + len = snprintf(str, sizeof(str), "root-%d", i); | |
23482 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
23483 | + err = -ENOMEM; | |
23484 | + goto out; | |
23485 | + } | |
23486 | + | |
23487 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | |
23488 | + str, "%x:%x", &domain, &bus); | |
23489 | + if (err != 2) | |
23490 | + goto out; | |
23491 | + | |
23492 | + /* Is this the one we just published? */ | |
23493 | + if (domain == cntrl_entry->domain && | |
23494 | + bus == cntrl_entry->bus) | |
23495 | + break; | |
23496 | + } | |
23497 | + | |
23498 | + if (i == root_num) | |
23499 | + goto out; | |
23500 | + | |
23501 | + info.pdev = pdev; | |
23502 | + info.resource_count = 0; | |
23503 | + info.root_num = i; | |
23504 | + | |
23505 | + /* Let ACPI do the heavy lifting on decoding resources */ | |
23506 | + acpi_walk_resources(cntrl_entry->controller->acpi_handle, | |
23507 | + METHOD_NAME__CRS, write_xenbus_resource, | |
23508 | + &info); | |
23509 | + | |
23510 | + /* No resouces. OK. On to the next one */ | |
23511 | + if (!info.resource_count) | |
23512 | + continue; | |
23513 | + | |
23514 | + /* Store the number of resources we wrote for this root-%d */ | |
23515 | + len = snprintf(str, sizeof(str), "root-%d-resources", i); | |
23516 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
23517 | + err = -ENOMEM; | |
23518 | + goto out; | |
23519 | + } | |
23520 | + | |
23521 | + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, | |
23522 | + "%d", info.resource_count); | |
23523 | + if (err) | |
23524 | + goto out; | |
23525 | + } | |
23526 | + | |
23527 | + /* Finally, write some magic to synchronize with the guest. */ | |
23528 | + len = snprintf(str, sizeof(str), "root-resource-magic"); | |
23529 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
23530 | + err = -ENOMEM; | |
23531 | + goto out; | |
23532 | + } | |
23533 | + | |
23534 | + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, | |
23535 | + "%lx", (sizeof(struct acpi_resource) * 2) + 1); | |
23536 | + | |
23537 | +out: | |
23538 | + spin_unlock(&dev_data->lock); | |
23539 | + | |
23540 | + return err; | |
23541 | +} | |
23542 | + | |
23543 | +void pciback_release_devices(struct pciback_device *pdev) | |
23544 | +{ | |
23545 | + struct controller_dev_data *dev_data = pdev->pci_dev_data; | |
23546 | + struct controller_list_entry *cntrl_entry, *c; | |
23547 | + struct controller_dev_entry *dev_entry, *d; | |
23548 | + | |
23549 | + list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) { | |
23550 | + list_for_each_entry_safe(dev_entry, d, | |
23551 | + &cntrl_entry->dev_list, list) { | |
23552 | + list_del(&dev_entry->list); | |
23553 | + pcistub_put_pci_dev(dev_entry->dev); | |
23554 | + kfree(dev_entry); | |
23555 | + } | |
23556 | + list_del(&cntrl_entry->list); | |
23557 | + kfree(cntrl_entry); | |
23558 | + } | |
23559 | + | |
23560 | + kfree(dev_data); | |
23561 | + pdev->pci_dev_data = NULL; | |
23562 | +} | |
23563 | Index: head-2008-11-25/drivers/xen/pciback/passthrough.c | |
23564 | =================================================================== | |
23565 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
23566 | +++ head-2008-11-25/drivers/xen/pciback/passthrough.c 2008-04-02 12:34:02.000000000 +0200 | |
23567 | @@ -0,0 +1,166 @@ | |
23568 | +/* | |
23569 | + * PCI Backend - Provides restricted access to the real PCI bus topology | |
23570 | + * to the frontend | |
23571 | + * | |
23572 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
23573 | + */ | |
23574 | + | |
23575 | +#include <linux/list.h> | |
23576 | +#include <linux/pci.h> | |
23577 | +#include <linux/spinlock.h> | |
23578 | +#include "pciback.h" | |
23579 | + | |
23580 | +struct passthrough_dev_data { | |
23581 | + /* Access to dev_list must be protected by lock */ | |
23582 | + struct list_head dev_list; | |
23583 | + spinlock_t lock; | |
23584 | +}; | |
23585 | + | |
23586 | +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, | |
23587 | + unsigned int domain, unsigned int bus, | |
23588 | + unsigned int devfn) | |
23589 | +{ | |
23590 | + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | |
23591 | + struct pci_dev_entry *dev_entry; | |
23592 | + struct pci_dev *dev = NULL; | |
23593 | + unsigned long flags; | |
23594 | + | |
23595 | + spin_lock_irqsave(&dev_data->lock, flags); | |
23596 | + | |
23597 | + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { | |
23598 | + if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) | |
23599 | + && bus == (unsigned int)dev_entry->dev->bus->number | |
23600 | + && devfn == dev_entry->dev->devfn) { | |
23601 | + dev = dev_entry->dev; | |
23602 | + break; | |
23603 | + } | |
23604 | + } | |
23605 | + | |
23606 | + spin_unlock_irqrestore(&dev_data->lock, flags); | |
23607 | + | |
23608 | + return dev; | |
23609 | +} | |
23610 | + | |
23611 | +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, | |
23612 | + int devid, publish_pci_dev_cb publish_cb) | |
23613 | +{ | |
23614 | + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | |
23615 | + struct pci_dev_entry *dev_entry; | |
23616 | + unsigned long flags; | |
23617 | + unsigned int domain, bus, devfn; | |
23618 | + int err; | |
23619 | + | |
23620 | + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); | |
23621 | + if (!dev_entry) | |
23622 | + return -ENOMEM; | |
23623 | + dev_entry->dev = dev; | |
23624 | + | |
23625 | + spin_lock_irqsave(&dev_data->lock, flags); | |
23626 | + list_add_tail(&dev_entry->list, &dev_data->dev_list); | |
23627 | + spin_unlock_irqrestore(&dev_data->lock, flags); | |
23628 | + | |
23629 | + /* Publish this device. */ | |
23630 | + domain = (unsigned int)pci_domain_nr(dev->bus); | |
23631 | + bus = (unsigned int)dev->bus->number; | |
23632 | + devfn = dev->devfn; | |
23633 | + err = publish_cb(pdev, domain, bus, devfn, devid); | |
23634 | + | |
23635 | + return err; | |
23636 | +} | |
23637 | + | |
23638 | +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) | |
23639 | +{ | |
23640 | + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | |
23641 | + struct pci_dev_entry *dev_entry, *t; | |
23642 | + struct pci_dev *found_dev = NULL; | |
23643 | + unsigned long flags; | |
23644 | + | |
23645 | + spin_lock_irqsave(&dev_data->lock, flags); | |
23646 | + | |
23647 | + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { | |
23648 | + if (dev_entry->dev == dev) { | |
23649 | + list_del(&dev_entry->list); | |
23650 | + found_dev = dev_entry->dev; | |
23651 | + kfree(dev_entry); | |
23652 | + } | |
23653 | + } | |
23654 | + | |
23655 | + spin_unlock_irqrestore(&dev_data->lock, flags); | |
23656 | + | |
23657 | + if (found_dev) | |
23658 | + pcistub_put_pci_dev(found_dev); | |
23659 | +} | |
23660 | + | |
23661 | +int pciback_init_devices(struct pciback_device *pdev) | |
23662 | +{ | |
23663 | + struct passthrough_dev_data *dev_data; | |
23664 | + | |
23665 | + dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); | |
23666 | + if (!dev_data) | |
23667 | + return -ENOMEM; | |
23668 | + | |
23669 | + spin_lock_init(&dev_data->lock); | |
23670 | + | |
23671 | + INIT_LIST_HEAD(&dev_data->dev_list); | |
23672 | + | |
23673 | + pdev->pci_dev_data = dev_data; | |
23674 | + | |
23675 | + return 0; | |
23676 | +} | |
23677 | + | |
23678 | +int pciback_publish_pci_roots(struct pciback_device *pdev, | |
23679 | + publish_pci_root_cb publish_root_cb) | |
23680 | +{ | |
23681 | + int err = 0; | |
23682 | + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | |
23683 | + struct pci_dev_entry *dev_entry, *e; | |
23684 | + struct pci_dev *dev; | |
23685 | + int found; | |
23686 | + unsigned int domain, bus; | |
23687 | + | |
23688 | + spin_lock(&dev_data->lock); | |
23689 | + | |
23690 | + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { | |
23691 | + /* Only publish this device as a root if none of its | |
23692 | + * parent bridges are exported | |
23693 | + */ | |
23694 | + found = 0; | |
23695 | + dev = dev_entry->dev->bus->self; | |
23696 | + for (; !found && dev != NULL; dev = dev->bus->self) { | |
23697 | + list_for_each_entry(e, &dev_data->dev_list, list) { | |
23698 | + if (dev == e->dev) { | |
23699 | + found = 1; | |
23700 | + break; | |
23701 | + } | |
23702 | + } | |
23703 | + } | |
23704 | + | |
23705 | + domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus); | |
23706 | + bus = (unsigned int)dev_entry->dev->bus->number; | |
23707 | + | |
23708 | + if (!found) { | |
23709 | + err = publish_root_cb(pdev, domain, bus); | |
23710 | + if (err) | |
23711 | + break; | |
23712 | + } | |
23713 | + } | |
23714 | + | |
23715 | + spin_unlock(&dev_data->lock); | |
23716 | + | |
23717 | + return err; | |
23718 | +} | |
23719 | + | |
23720 | +void pciback_release_devices(struct pciback_device *pdev) | |
23721 | +{ | |
23722 | + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; | |
23723 | + struct pci_dev_entry *dev_entry, *t; | |
23724 | + | |
23725 | + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { | |
23726 | + list_del(&dev_entry->list); | |
23727 | + pcistub_put_pci_dev(dev_entry->dev); | |
23728 | + kfree(dev_entry); | |
23729 | + } | |
23730 | + | |
23731 | + kfree(dev_data); | |
23732 | + pdev->pci_dev_data = NULL; | |
23733 | +} | |
23734 | Index: head-2008-11-25/drivers/xen/pciback/pci_stub.c | |
23735 | =================================================================== | |
23736 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
23737 | +++ head-2008-11-25/drivers/xen/pciback/pci_stub.c 2008-10-29 09:55:56.000000000 +0100 | |
23738 | @@ -0,0 +1,948 @@ | |
23739 | +/* | |
23740 | + * PCI Stub Driver - Grabs devices in backend to be exported later | |
23741 | + * | |
23742 | + * Ryan Wilson <hap9@epoch.ncsc.mil> | |
23743 | + * Chris Bookholt <hap10@epoch.ncsc.mil> | |
23744 | + */ | |
23745 | +#include <linux/module.h> | |
23746 | +#include <linux/init.h> | |
23747 | +#include <linux/list.h> | |
23748 | +#include <linux/spinlock.h> | |
23749 | +#include <linux/kref.h> | |
23750 | +#include <asm/atomic.h> | |
23751 | +#include "pciback.h" | |
23752 | +#include "conf_space.h" | |
23753 | +#include "conf_space_quirks.h" | |
23754 | + | |
23755 | +static char *pci_devs_to_hide = NULL; | |
23756 | +module_param_named(hide, pci_devs_to_hide, charp, 0444); | |
23757 | + | |
23758 | +struct pcistub_device_id { | |
23759 | + struct list_head slot_list; | |
23760 | + int domain; | |
23761 | + unsigned char bus; | |
23762 | + unsigned int devfn; | |
23763 | +}; | |
23764 | +static LIST_HEAD(pcistub_device_ids); | |
23765 | +static DEFINE_SPINLOCK(device_ids_lock); | |
23766 | + | |
23767 | +struct pcistub_device { | |
23768 | + struct kref kref; | |
23769 | + struct list_head dev_list; | |
23770 | + spinlock_t lock; | |
23771 | + | |
23772 | + struct pci_dev *dev; | |
23773 | + struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */ | |
23774 | +}; | |
23775 | + | |
23776 | +/* Access to pcistub_devices & seized_devices lists and the initialize_devices | |
23777 | + * flag must be locked with pcistub_devices_lock | |
23778 | + */ | |
23779 | +static DEFINE_SPINLOCK(pcistub_devices_lock); | |
23780 | +static LIST_HEAD(pcistub_devices); | |
23781 | + | |
23782 | +/* wait for device_initcall before initializing our devices | |
23783 | + * (see pcistub_init_devices_late) | |
23784 | + */ | |
23785 | +static int initialize_devices = 0; | |
23786 | +static LIST_HEAD(seized_devices); | |
23787 | + | |
23788 | +static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) | |
23789 | +{ | |
23790 | + struct pcistub_device *psdev; | |
23791 | + | |
23792 | + dev_dbg(&dev->dev, "pcistub_device_alloc\n"); | |
23793 | + | |
23794 | + psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); | |
23795 | + if (!psdev) | |
23796 | + return NULL; | |
23797 | + | |
23798 | + psdev->dev = pci_dev_get(dev); | |
23799 | + if (!psdev->dev) { | |
23800 | + kfree(psdev); | |
23801 | + return NULL; | |
23802 | + } | |
23803 | + | |
23804 | + kref_init(&psdev->kref); | |
23805 | + spin_lock_init(&psdev->lock); | |
23806 | + | |
23807 | + return psdev; | |
23808 | +} | |
23809 | + | |
23810 | +/* Don't call this directly as it's called by pcistub_device_put */ | |
23811 | +static void pcistub_device_release(struct kref *kref) | |
23812 | +{ | |
23813 | + struct pcistub_device *psdev; | |
23814 | + | |
23815 | + psdev = container_of(kref, struct pcistub_device, kref); | |
23816 | + | |
23817 | + dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); | |
23818 | + | |
23819 | + /* Clean-up the device */ | |
23820 | + pciback_reset_device(psdev->dev); | |
23821 | + pciback_config_free_dyn_fields(psdev->dev); | |
23822 | + pciback_config_free_dev(psdev->dev); | |
23823 | + kfree(pci_get_drvdata(psdev->dev)); | |
23824 | + pci_set_drvdata(psdev->dev, NULL); | |
23825 | + | |
23826 | + pci_dev_put(psdev->dev); | |
23827 | + | |
23828 | + kfree(psdev); | |
23829 | +} | |
23830 | + | |
23831 | +static inline void pcistub_device_get(struct pcistub_device *psdev) | |
23832 | +{ | |
23833 | + kref_get(&psdev->kref); | |
23834 | +} | |
23835 | + | |
23836 | +static inline void pcistub_device_put(struct pcistub_device *psdev) | |
23837 | +{ | |
23838 | + kref_put(&psdev->kref, pcistub_device_release); | |
23839 | +} | |
23840 | + | |
23841 | +static struct pcistub_device *pcistub_device_find(int domain, int bus, | |
23842 | + int slot, int func) | |
23843 | +{ | |
23844 | + struct pcistub_device *psdev = NULL; | |
23845 | + unsigned long flags; | |
23846 | + | |
23847 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
23848 | + | |
23849 | + list_for_each_entry(psdev, &pcistub_devices, dev_list) { | |
23850 | + if (psdev->dev != NULL | |
23851 | + && domain == pci_domain_nr(psdev->dev->bus) | |
23852 | + && bus == psdev->dev->bus->number | |
23853 | + && PCI_DEVFN(slot, func) == psdev->dev->devfn) { | |
23854 | + pcistub_device_get(psdev); | |
23855 | + goto out; | |
23856 | + } | |
23857 | + } | |
23858 | + | |
23859 | + /* didn't find it */ | |
23860 | + psdev = NULL; | |
23861 | + | |
23862 | + out: | |
23863 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
23864 | + return psdev; | |
23865 | +} | |
23866 | + | |
23867 | +static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev, | |
23868 | + struct pcistub_device *psdev) | |
23869 | +{ | |
23870 | + struct pci_dev *pci_dev = NULL; | |
23871 | + unsigned long flags; | |
23872 | + | |
23873 | + pcistub_device_get(psdev); | |
23874 | + | |
23875 | + spin_lock_irqsave(&psdev->lock, flags); | |
23876 | + if (!psdev->pdev) { | |
23877 | + psdev->pdev = pdev; | |
23878 | + pci_dev = psdev->dev; | |
23879 | + } | |
23880 | + spin_unlock_irqrestore(&psdev->lock, flags); | |
23881 | + | |
23882 | + if (!pci_dev) | |
23883 | + pcistub_device_put(psdev); | |
23884 | + | |
23885 | + return pci_dev; | |
23886 | +} | |
23887 | + | |
23888 | +struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, | |
23889 | + int domain, int bus, | |
23890 | + int slot, int func) | |
23891 | +{ | |
23892 | + struct pcistub_device *psdev; | |
23893 | + struct pci_dev *found_dev = NULL; | |
23894 | + unsigned long flags; | |
23895 | + | |
23896 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
23897 | + | |
23898 | + list_for_each_entry(psdev, &pcistub_devices, dev_list) { | |
23899 | + if (psdev->dev != NULL | |
23900 | + && domain == pci_domain_nr(psdev->dev->bus) | |
23901 | + && bus == psdev->dev->bus->number | |
23902 | + && PCI_DEVFN(slot, func) == psdev->dev->devfn) { | |
23903 | + found_dev = pcistub_device_get_pci_dev(pdev, psdev); | |
23904 | + break; | |
23905 | + } | |
23906 | + } | |
23907 | + | |
23908 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
23909 | + return found_dev; | |
23910 | +} | |
23911 | + | |
23912 | +struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, | |
23913 | + struct pci_dev *dev) | |
23914 | +{ | |
23915 | + struct pcistub_device *psdev; | |
23916 | + struct pci_dev *found_dev = NULL; | |
23917 | + unsigned long flags; | |
23918 | + | |
23919 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
23920 | + | |
23921 | + list_for_each_entry(psdev, &pcistub_devices, dev_list) { | |
23922 | + if (psdev->dev == dev) { | |
23923 | + found_dev = pcistub_device_get_pci_dev(pdev, psdev); | |
23924 | + break; | |
23925 | + } | |
23926 | + } | |
23927 | + | |
23928 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
23929 | + return found_dev; | |
23930 | +} | |
23931 | + | |
23932 | +void pcistub_put_pci_dev(struct pci_dev *dev) | |
23933 | +{ | |
23934 | + struct pcistub_device *psdev, *found_psdev = NULL; | |
23935 | + unsigned long flags; | |
23936 | + | |
23937 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
23938 | + | |
23939 | + list_for_each_entry(psdev, &pcistub_devices, dev_list) { | |
23940 | + if (psdev->dev == dev) { | |
23941 | + found_psdev = psdev; | |
23942 | + break; | |
23943 | + } | |
23944 | + } | |
23945 | + | |
23946 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
23947 | + | |
23948 | + /* Cleanup our device | |
23949 | + * (so it's ready for the next domain) | |
23950 | + */ | |
23951 | + pciback_reset_device(found_psdev->dev); | |
23952 | + pciback_config_free_dyn_fields(found_psdev->dev); | |
23953 | + pciback_config_reset_dev(found_psdev->dev); | |
23954 | + | |
23955 | + spin_lock_irqsave(&found_psdev->lock, flags); | |
23956 | + found_psdev->pdev = NULL; | |
23957 | + spin_unlock_irqrestore(&found_psdev->lock, flags); | |
23958 | + | |
23959 | + pcistub_device_put(found_psdev); | |
23960 | +} | |
23961 | + | |
23962 | +static int __devinit pcistub_match_one(struct pci_dev *dev, | |
23963 | + struct pcistub_device_id *pdev_id) | |
23964 | +{ | |
23965 | + /* Match the specified device by domain, bus, slot, func and also if | |
23966 | + * any of the device's parent bridges match. | |
23967 | + */ | |
23968 | + for (; dev != NULL; dev = dev->bus->self) { | |
23969 | + if (pci_domain_nr(dev->bus) == pdev_id->domain | |
23970 | + && dev->bus->number == pdev_id->bus | |
23971 | + && dev->devfn == pdev_id->devfn) | |
23972 | + return 1; | |
23973 | + | |
23974 | + /* Sometimes topmost bridge links to itself. */ | |
23975 | + if (dev == dev->bus->self) | |
23976 | + break; | |
23977 | + } | |
23978 | + | |
23979 | + return 0; | |
23980 | +} | |
23981 | + | |
23982 | +static int __devinit pcistub_match(struct pci_dev *dev) | |
23983 | +{ | |
23984 | + struct pcistub_device_id *pdev_id; | |
23985 | + unsigned long flags; | |
23986 | + int found = 0; | |
23987 | + | |
23988 | + spin_lock_irqsave(&device_ids_lock, flags); | |
23989 | + list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) { | |
23990 | + if (pcistub_match_one(dev, pdev_id)) { | |
23991 | + found = 1; | |
23992 | + break; | |
23993 | + } | |
23994 | + } | |
23995 | + spin_unlock_irqrestore(&device_ids_lock, flags); | |
23996 | + | |
23997 | + return found; | |
23998 | +} | |
23999 | + | |
24000 | +static int __devinit pcistub_init_device(struct pci_dev *dev) | |
24001 | +{ | |
24002 | + struct pciback_dev_data *dev_data; | |
24003 | + int err = 0; | |
24004 | + | |
24005 | + dev_dbg(&dev->dev, "initializing...\n"); | |
24006 | + | |
24007 | + /* The PCI backend is not intended to be a module (or to work with | |
24008 | + * removable PCI devices (yet). If it were, pciback_config_free() | |
24009 | + * would need to be called somewhere to free the memory allocated | |
24010 | + * here and then to call kfree(pci_get_drvdata(psdev->dev)). | |
24011 | + */ | |
24012 | + dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC); | |
24013 | + if (!dev_data) { | |
24014 | + err = -ENOMEM; | |
24015 | + goto out; | |
24016 | + } | |
24017 | + pci_set_drvdata(dev, dev_data); | |
24018 | + | |
24019 | + dev_dbg(&dev->dev, "initializing config\n"); | |
24020 | + err = pciback_config_init_dev(dev); | |
24021 | + if (err) | |
24022 | + goto out; | |
24023 | + | |
24024 | + /* HACK: Force device (& ACPI) to determine what IRQ it's on - we | |
24025 | + * must do this here because pcibios_enable_device may specify | |
24026 | + * the pci device's true irq (and possibly its other resources) | |
24027 | + * if they differ from what's in the configuration space. | |
24028 | + * This makes the assumption that the device's resources won't | |
24029 | + * change after this point (otherwise this code may break!) | |
24030 | + */ | |
24031 | + dev_dbg(&dev->dev, "enabling device\n"); | |
24032 | + err = pci_enable_device(dev); | |
24033 | + if (err) | |
24034 | + goto config_release; | |
24035 | + | |
24036 | + /* Now disable the device (this also ensures some private device | |
24037 | + * data is setup before we export) | |
24038 | + */ | |
24039 | + dev_dbg(&dev->dev, "reset device\n"); | |
24040 | + pciback_reset_device(dev); | |
24041 | + | |
24042 | + return 0; | |
24043 | + | |
24044 | + config_release: | |
24045 | + pciback_config_free_dev(dev); | |
24046 | + | |
24047 | + out: | |
24048 | + pci_set_drvdata(dev, NULL); | |
24049 | + kfree(dev_data); | |
24050 | + return err; | |
24051 | +} | |
24052 | + | |
24053 | +/* | |
24054 | + * Because some initialization still happens on | |
24055 | + * devices during fs_initcall, we need to defer | |
24056 | + * full initialization of our devices until | |
24057 | + * device_initcall. | |
24058 | + */ | |
24059 | +static int __init pcistub_init_devices_late(void) | |
24060 | +{ | |
24061 | + struct pcistub_device *psdev; | |
24062 | + unsigned long flags; | |
24063 | + int err = 0; | |
24064 | + | |
24065 | + pr_debug("pciback: pcistub_init_devices_late\n"); | |
24066 | + | |
24067 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
24068 | + | |
24069 | + while (!list_empty(&seized_devices)) { | |
24070 | + psdev = container_of(seized_devices.next, | |
24071 | + struct pcistub_device, dev_list); | |
24072 | + list_del(&psdev->dev_list); | |
24073 | + | |
24074 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
24075 | + | |
24076 | + err = pcistub_init_device(psdev->dev); | |
24077 | + if (err) { | |
24078 | + dev_err(&psdev->dev->dev, | |
24079 | + "error %d initializing device\n", err); | |
24080 | + kfree(psdev); | |
24081 | + psdev = NULL; | |
24082 | + } | |
24083 | + | |
24084 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
24085 | + | |
24086 | + if (psdev) | |
24087 | + list_add_tail(&psdev->dev_list, &pcistub_devices); | |
24088 | + } | |
24089 | + | |
24090 | + initialize_devices = 1; | |
24091 | + | |
24092 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
24093 | + | |
24094 | + return 0; | |
24095 | +} | |
24096 | + | |
24097 | +static int __devinit pcistub_seize(struct pci_dev *dev) | |
24098 | +{ | |
24099 | + struct pcistub_device *psdev; | |
24100 | + unsigned long flags; | |
24101 | + int err = 0; | |
24102 | + | |
24103 | + psdev = pcistub_device_alloc(dev); | |
24104 | + if (!psdev) | |
24105 | + return -ENOMEM; | |
24106 | + | |
24107 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
24108 | + | |
24109 | + if (initialize_devices) { | |
24110 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
24111 | + | |
24112 | + /* don't want irqs disabled when calling pcistub_init_device */ | |
24113 | + err = pcistub_init_device(psdev->dev); | |
24114 | + | |
24115 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
24116 | + | |
24117 | + if (!err) | |
24118 | + list_add(&psdev->dev_list, &pcistub_devices); | |
24119 | + } else { | |
24120 | + dev_dbg(&dev->dev, "deferring initialization\n"); | |
24121 | + list_add(&psdev->dev_list, &seized_devices); | |
24122 | + } | |
24123 | + | |
24124 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
24125 | + | |
24126 | + if (err) | |
24127 | + pcistub_device_put(psdev); | |
24128 | + | |
24129 | + return err; | |
24130 | +} | |
24131 | + | |
24132 | +static int __devinit pcistub_probe(struct pci_dev *dev, | |
24133 | + const struct pci_device_id *id) | |
24134 | +{ | |
24135 | + int err = 0; | |
24136 | + | |
24137 | + dev_dbg(&dev->dev, "probing...\n"); | |
24138 | + | |
24139 | + if (pcistub_match(dev)) { | |
24140 | + | |
24141 | + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL | |
24142 | + && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { | |
24143 | + dev_err(&dev->dev, "can't export pci devices that " | |
24144 | + "don't have a normal (0) or bridge (1) " | |
24145 | + "header type!\n"); | |
24146 | + err = -ENODEV; | |
24147 | + goto out; | |
24148 | + } | |
24149 | + | |
24150 | + dev_info(&dev->dev, "seizing device\n"); | |
24151 | + err = pcistub_seize(dev); | |
24152 | + } else | |
24153 | + /* Didn't find the device */ | |
24154 | + err = -ENODEV; | |
24155 | + | |
24156 | + out: | |
24157 | + return err; | |
24158 | +} | |
24159 | + | |
24160 | +static void pcistub_remove(struct pci_dev *dev) | |
24161 | +{ | |
24162 | + struct pcistub_device *psdev, *found_psdev = NULL; | |
24163 | + unsigned long flags; | |
24164 | + | |
24165 | + dev_dbg(&dev->dev, "removing\n"); | |
24166 | + | |
24167 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
24168 | + | |
24169 | + pciback_config_quirk_release(dev); | |
24170 | + | |
24171 | + list_for_each_entry(psdev, &pcistub_devices, dev_list) { | |
24172 | + if (psdev->dev == dev) { | |
24173 | + found_psdev = psdev; | |
24174 | + break; | |
24175 | + } | |
24176 | + } | |
24177 | + | |
24178 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
24179 | + | |
24180 | + if (found_psdev) { | |
24181 | + dev_dbg(&dev->dev, "found device to remove - in use? %p\n", | |
24182 | + found_psdev->pdev); | |
24183 | + | |
24184 | + if (found_psdev->pdev) { | |
24185 | + printk(KERN_WARNING "pciback: ****** removing device " | |
24186 | + "%s while still in-use! ******\n", | |
24187 | + pci_name(found_psdev->dev)); | |
24188 | + printk(KERN_WARNING "pciback: ****** driver domain may " | |
24189 | + "still access this device's i/o resources!\n"); | |
24190 | + printk(KERN_WARNING "pciback: ****** shutdown driver " | |
24191 | + "domain before binding device\n"); | |
24192 | + printk(KERN_WARNING "pciback: ****** to other drivers " | |
24193 | + "or domains\n"); | |
24194 | + | |
24195 | + pciback_release_pci_dev(found_psdev->pdev, | |
24196 | + found_psdev->dev); | |
24197 | + } | |
24198 | + | |
24199 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
24200 | + list_del(&found_psdev->dev_list); | |
24201 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
24202 | + | |
24203 | + /* the final put for releasing from the list */ | |
24204 | + pcistub_device_put(found_psdev); | |
24205 | + } | |
24206 | +} | |
24207 | + | |
24208 | +static const struct pci_device_id pcistub_ids[] = { | |
24209 | + { | |
24210 | + .vendor = PCI_ANY_ID, | |
24211 | + .device = PCI_ANY_ID, | |
24212 | + .subvendor = PCI_ANY_ID, | |
24213 | + .subdevice = PCI_ANY_ID, | |
24214 | + }, | |
24215 | + {0,}, | |
24216 | +}; | |
24217 | + | |
24218 | +/* | |
24219 | + * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't | |
24220 | + * for a normal device. I don't want it to be loaded automatically. | |
24221 | + */ | |
24222 | + | |
24223 | +static struct pci_driver pciback_pci_driver = { | |
24224 | + .name = "pciback", | |
24225 | + .id_table = pcistub_ids, | |
24226 | + .probe = pcistub_probe, | |
24227 | + .remove = pcistub_remove, | |
24228 | +}; | |
24229 | + | |
24230 | +static inline int str_to_slot(const char *buf, int *domain, int *bus, | |
24231 | + int *slot, int *func) | |
24232 | +{ | |
24233 | + int err; | |
24234 | + | |
24235 | + err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); | |
24236 | + if (err == 4) | |
24237 | + return 0; | |
24238 | + else if (err < 0) | |
24239 | + return -EINVAL; | |
24240 | + | |
24241 | + /* try again without domain */ | |
24242 | + *domain = 0; | |
24243 | + err = sscanf(buf, " %x:%x.%x", bus, slot, func); | |
24244 | + if (err == 3) | |
24245 | + return 0; | |
24246 | + | |
24247 | + return -EINVAL; | |
24248 | +} | |
24249 | + | |
24250 | +static inline int str_to_quirk(const char *buf, int *domain, int *bus, int | |
24251 | + *slot, int *func, int *reg, int *size, int *mask) | |
24252 | +{ | |
24253 | + int err; | |
24254 | + | |
24255 | + err = | |
24256 | + sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot, | |
24257 | + func, reg, size, mask); | |
24258 | + if (err == 7) | |
24259 | + return 0; | |
24260 | + return -EINVAL; | |
24261 | +} | |
24262 | + | |
24263 | +static int pcistub_device_id_add(int domain, int bus, int slot, int func) | |
24264 | +{ | |
24265 | + struct pcistub_device_id *pci_dev_id; | |
24266 | + unsigned long flags; | |
24267 | + | |
24268 | + pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); | |
24269 | + if (!pci_dev_id) | |
24270 | + return -ENOMEM; | |
24271 | + | |
24272 | + pci_dev_id->domain = domain; | |
24273 | + pci_dev_id->bus = bus; | |
24274 | + pci_dev_id->devfn = PCI_DEVFN(slot, func); | |
24275 | + | |
24276 | + pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n", | |
24277 | + domain, bus, slot, func); | |
24278 | + | |
24279 | + spin_lock_irqsave(&device_ids_lock, flags); | |
24280 | + list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids); | |
24281 | + spin_unlock_irqrestore(&device_ids_lock, flags); | |
24282 | + | |
24283 | + return 0; | |
24284 | +} | |
24285 | + | |
24286 | +static int pcistub_device_id_remove(int domain, int bus, int slot, int func) | |
24287 | +{ | |
24288 | + struct pcistub_device_id *pci_dev_id, *t; | |
24289 | + int devfn = PCI_DEVFN(slot, func); | |
24290 | + int err = -ENOENT; | |
24291 | + unsigned long flags; | |
24292 | + | |
24293 | + spin_lock_irqsave(&device_ids_lock, flags); | |
24294 | + list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) { | |
24295 | + | |
24296 | + if (pci_dev_id->domain == domain | |
24297 | + && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { | |
24298 | + /* Don't break; here because it's possible the same | |
24299 | + * slot could be in the list more than once | |
24300 | + */ | |
24301 | + list_del(&pci_dev_id->slot_list); | |
24302 | + kfree(pci_dev_id); | |
24303 | + | |
24304 | + err = 0; | |
24305 | + | |
24306 | + pr_debug("pciback: removed %04x:%02x:%02x.%01x from " | |
24307 | + "seize list\n", domain, bus, slot, func); | |
24308 | + } | |
24309 | + } | |
24310 | + spin_unlock_irqrestore(&device_ids_lock, flags); | |
24311 | + | |
24312 | + return err; | |
24313 | +} | |
24314 | + | |
24315 | +static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, | |
24316 | + int size, int mask) | |
24317 | +{ | |
24318 | + int err = 0; | |
24319 | + struct pcistub_device *psdev; | |
24320 | + struct pci_dev *dev; | |
24321 | + struct config_field *field; | |
24322 | + | |
24323 | + psdev = pcistub_device_find(domain, bus, slot, func); | |
24324 | + if (!psdev || !psdev->dev) { | |
24325 | + err = -ENODEV; | |
24326 | + goto out; | |
24327 | + } | |
24328 | + dev = psdev->dev; | |
24329 | + | |
24330 | + field = kzalloc(sizeof(*field), GFP_ATOMIC); | |
24331 | + if (!field) { | |
24332 | + err = -ENOMEM; | |
24333 | + goto out; | |
24334 | + } | |
24335 | + | |
24336 | + field->offset = reg; | |
24337 | + field->size = size; | |
24338 | + field->mask = mask; | |
24339 | + field->init = NULL; | |
24340 | + field->reset = NULL; | |
24341 | + field->release = NULL; | |
24342 | + field->clean = pciback_config_field_free; | |
24343 | + | |
24344 | + err = pciback_config_quirks_add_field(dev, field); | |
24345 | + if (err) | |
24346 | + kfree(field); | |
24347 | + out: | |
24348 | + return err; | |
24349 | +} | |
24350 | + | |
24351 | +static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, | |
24352 | + size_t count) | |
24353 | +{ | |
24354 | + int domain, bus, slot, func; | |
24355 | + int err; | |
24356 | + | |
24357 | + err = str_to_slot(buf, &domain, &bus, &slot, &func); | |
24358 | + if (err) | |
24359 | + goto out; | |
24360 | + | |
24361 | + err = pcistub_device_id_add(domain, bus, slot, func); | |
24362 | + | |
24363 | + out: | |
24364 | + if (!err) | |
24365 | + err = count; | |
24366 | + return err; | |
24367 | +} | |
24368 | + | |
24369 | +DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); | |
24370 | + | |
24371 | +static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, | |
24372 | + size_t count) | |
24373 | +{ | |
24374 | + int domain, bus, slot, func; | |
24375 | + int err; | |
24376 | + | |
24377 | + err = str_to_slot(buf, &domain, &bus, &slot, &func); | |
24378 | + if (err) | |
24379 | + goto out; | |
24380 | + | |
24381 | + err = pcistub_device_id_remove(domain, bus, slot, func); | |
24382 | + | |
24383 | + out: | |
24384 | + if (!err) | |
24385 | + err = count; | |
24386 | + return err; | |
24387 | +} | |
24388 | + | |
24389 | +DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); | |
24390 | + | |
24391 | +static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) | |
24392 | +{ | |
24393 | + struct pcistub_device_id *pci_dev_id; | |
24394 | + size_t count = 0; | |
24395 | + unsigned long flags; | |
24396 | + | |
24397 | + spin_lock_irqsave(&device_ids_lock, flags); | |
24398 | + list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) { | |
24399 | + if (count >= PAGE_SIZE) | |
24400 | + break; | |
24401 | + | |
24402 | + count += scnprintf(buf + count, PAGE_SIZE - count, | |
24403 | + "%04x:%02x:%02x.%01x\n", | |
24404 | + pci_dev_id->domain, pci_dev_id->bus, | |
24405 | + PCI_SLOT(pci_dev_id->devfn), | |
24406 | + PCI_FUNC(pci_dev_id->devfn)); | |
24407 | + } | |
24408 | + spin_unlock_irqrestore(&device_ids_lock, flags); | |
24409 | + | |
24410 | + return count; | |
24411 | +} | |
24412 | + | |
24413 | +DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); | |
24414 | + | |
24415 | +static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, | |
24416 | + size_t count) | |
24417 | +{ | |
24418 | + int domain, bus, slot, func, reg, size, mask; | |
24419 | + int err; | |
24420 | + | |
24421 | + err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size, | |
24422 | + &mask); | |
24423 | + if (err) | |
24424 | + goto out; | |
24425 | + | |
24426 | + err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); | |
24427 | + | |
24428 | + out: | |
24429 | + if (!err) | |
24430 | + err = count; | |
24431 | + return err; | |
24432 | +} | |
24433 | + | |
24434 | +static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) | |
24435 | +{ | |
24436 | + int count = 0; | |
24437 | + unsigned long flags; | |
24438 | + extern struct list_head pciback_quirks; | |
24439 | + struct pciback_config_quirk *quirk; | |
24440 | + struct pciback_dev_data *dev_data; | |
24441 | + const struct config_field *field; | |
24442 | + const struct config_field_entry *cfg_entry; | |
24443 | + | |
24444 | + spin_lock_irqsave(&device_ids_lock, flags); | |
24445 | + list_for_each_entry(quirk, &pciback_quirks, quirks_list) { | |
24446 | + if (count >= PAGE_SIZE) | |
24447 | + goto out; | |
24448 | + | |
24449 | + count += scnprintf(buf + count, PAGE_SIZE - count, | |
24450 | + "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", | |
24451 | + quirk->pdev->bus->number, | |
24452 | + PCI_SLOT(quirk->pdev->devfn), | |
24453 | + PCI_FUNC(quirk->pdev->devfn), | |
24454 | + quirk->devid.vendor, quirk->devid.device, | |
24455 | + quirk->devid.subvendor, | |
24456 | + quirk->devid.subdevice); | |
24457 | + | |
24458 | + dev_data = pci_get_drvdata(quirk->pdev); | |
24459 | + | |
24460 | + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { | |
24461 | + field = cfg_entry->field; | |
24462 | + if (count >= PAGE_SIZE) | |
24463 | + goto out; | |
24464 | + | |
24465 | + count += scnprintf(buf + count, PAGE_SIZE - count, | |
24466 | + "\t\t%08x:%01x:%08x\n", | |
24467 | + cfg_entry->base_offset + field->offset, | |
24468 | + field->size, field->mask); | |
24469 | + } | |
24470 | + } | |
24471 | + | |
24472 | + out: | |
24473 | + spin_unlock_irqrestore(&device_ids_lock, flags); | |
24474 | + | |
24475 | + return count; | |
24476 | +} | |
24477 | + | |
24478 | +DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); | |
24479 | + | |
24480 | +static ssize_t permissive_add(struct device_driver *drv, const char *buf, | |
24481 | + size_t count) | |
24482 | +{ | |
24483 | + int domain, bus, slot, func; | |
24484 | + int err; | |
24485 | + struct pcistub_device *psdev; | |
24486 | + struct pciback_dev_data *dev_data; | |
24487 | + err = str_to_slot(buf, &domain, &bus, &slot, &func); | |
24488 | + if (err) | |
24489 | + goto out; | |
24490 | + psdev = pcistub_device_find(domain, bus, slot, func); | |
24491 | + if (!psdev) { | |
24492 | + err = -ENODEV; | |
24493 | + goto out; | |
24494 | + } | |
24495 | + if (!psdev->dev) { | |
24496 | + err = -ENODEV; | |
24497 | + goto release; | |
24498 | + } | |
24499 | + dev_data = pci_get_drvdata(psdev->dev); | |
24500 | + /* the driver data for a device should never be null at this point */ | |
24501 | + if (!dev_data) { | |
24502 | + err = -ENXIO; | |
24503 | + goto release; | |
24504 | + } | |
24505 | + if (!dev_data->permissive) { | |
24506 | + dev_data->permissive = 1; | |
24507 | + /* Let user know that what they're doing could be unsafe */ | |
24508 | + dev_warn(&psdev->dev->dev, | |
24509 | + "enabling permissive mode configuration space accesses!\n"); | |
24510 | + dev_warn(&psdev->dev->dev, | |
24511 | + "permissive mode is potentially unsafe!\n"); | |
24512 | + } | |
24513 | + release: | |
24514 | + pcistub_device_put(psdev); | |
24515 | + out: | |
24516 | + if (!err) | |
24517 | + err = count; | |
24518 | + return err; | |
24519 | +} | |
24520 | + | |
24521 | +static ssize_t permissive_show(struct device_driver *drv, char *buf) | |
24522 | +{ | |
24523 | + struct pcistub_device *psdev; | |
24524 | + struct pciback_dev_data *dev_data; | |
24525 | + size_t count = 0; | |
24526 | + unsigned long flags; | |
24527 | + spin_lock_irqsave(&pcistub_devices_lock, flags); | |
24528 | + list_for_each_entry(psdev, &pcistub_devices, dev_list) { | |
24529 | + if (count >= PAGE_SIZE) | |
24530 | + break; | |
24531 | + if (!psdev->dev) | |
24532 | + continue; | |
24533 | + dev_data = pci_get_drvdata(psdev->dev); | |
24534 | + if (!dev_data || !dev_data->permissive) | |
24535 | + continue; | |
24536 | + count += | |
24537 | + scnprintf(buf + count, PAGE_SIZE - count, "%s\n", | |
24538 | + pci_name(psdev->dev)); | |
24539 | + } | |
24540 | + spin_unlock_irqrestore(&pcistub_devices_lock, flags); | |
24541 | + return count; | |
24542 | +} | |
24543 | + | |
24544 | +DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); | |
24545 | + | |
24546 | +#ifdef CONFIG_PCI_MSI | |
24547 | + | |
24548 | +int pciback_get_owner(struct pci_dev *dev) | |
24549 | +{ | |
24550 | + struct pcistub_device *psdev; | |
24551 | + | |
24552 | + psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number, | |
24553 | + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); | |
24554 | + | |
24555 | + if (!psdev || !psdev->pdev) | |
24556 | + return -1; | |
24557 | + | |
24558 | + return psdev->pdev->xdev->otherend_id; | |
24559 | +} | |
24560 | +#endif | |
24561 | + | |
24562 | +static void pcistub_exit(void) | |
24563 | +{ | |
24564 | + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot); | |
24565 | + driver_remove_file(&pciback_pci_driver.driver, | |
24566 | + &driver_attr_remove_slot); | |
24567 | + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots); | |
24568 | + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks); | |
24569 | + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); | |
24570 | + | |
24571 | + pci_unregister_driver(&pciback_pci_driver); | |
24572 | + WARN_ON(unregister_msi_get_owner(pciback_get_owner)); | |
24573 | +} | |
24574 | + | |
24575 | +static int __init pcistub_init(void) | |
24576 | +{ | |
24577 | + int pos = 0; | |
24578 | + int err = 0; | |
24579 | + int domain, bus, slot, func; | |
24580 | + int parsed; | |
24581 | + | |
24582 | + if (pci_devs_to_hide && *pci_devs_to_hide) { | |
24583 | + do { | |
24584 | + parsed = 0; | |
24585 | + | |
24586 | + err = sscanf(pci_devs_to_hide + pos, | |
24587 | + " (%x:%x:%x.%x) %n", | |
24588 | + &domain, &bus, &slot, &func, &parsed); | |
24589 | + if (err != 4) { | |
24590 | + domain = 0; | |
24591 | + err = sscanf(pci_devs_to_hide + pos, | |
24592 | + " (%x:%x.%x) %n", | |
24593 | + &bus, &slot, &func, &parsed); | |
24594 | + if (err != 3) | |
24595 | + goto parse_error; | |
24596 | + } | |
24597 | + | |
24598 | + err = pcistub_device_id_add(domain, bus, slot, func); | |
24599 | + if (err) | |
24600 | + goto out; | |
24601 | + | |
24602 | + /* if parsed<=0, we've reached the end of the string */ | |
24603 | + pos += parsed; | |
24604 | + } while (parsed > 0 && pci_devs_to_hide[pos]); | |
24605 | + } | |
24606 | + | |
24607 | + /* If we're the first PCI Device Driver to register, we're the | |
24608 | + * first one to get offered PCI devices as they become | |
24609 | + * available (and thus we can be the first to grab them) | |
24610 | + */ | |
24611 | + err = pci_register_driver(&pciback_pci_driver); | |
24612 | + if (err < 0) | |
24613 | + goto out; | |
24614 | + | |
24615 | + err = driver_create_file(&pciback_pci_driver.driver, | |
24616 | + &driver_attr_new_slot); | |
24617 | + if (!err) | |
24618 | + err = driver_create_file(&pciback_pci_driver.driver, | |
24619 | + &driver_attr_remove_slot); | |
24620 | + if (!err) | |
24621 | + err = driver_create_file(&pciback_pci_driver.driver, | |
24622 | + &driver_attr_slots); | |
24623 | + if (!err) | |
24624 | + err = driver_create_file(&pciback_pci_driver.driver, | |
24625 | + &driver_attr_quirks); | |
24626 | + if (!err) | |
24627 | + err = driver_create_file(&pciback_pci_driver.driver, | |
24628 | + &driver_attr_permissive); | |
24629 | + | |
24630 | + if (!err) | |
24631 | + err = register_msi_get_owner(pciback_get_owner); | |
24632 | + if (err) | |
24633 | + pcistub_exit(); | |
24634 | + | |
24635 | + out: | |
24636 | + return err; | |
24637 | + | |
24638 | + parse_error: | |
24639 | + printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n", | |
24640 | + pci_devs_to_hide + pos); | |
24641 | + return -EINVAL; | |
24642 | +} | |
24643 | + | |
24644 | +#ifndef MODULE | |
24645 | +/* | |
24646 | + * fs_initcall happens before device_initcall | |
24647 | + * so pciback *should* get called first (b/c we | |
24648 | + * want to suck up any device before other drivers | |
24649 | + * get a chance by being the first pci device | |
24650 | + * driver to register) | |
24651 | + */ | |
24652 | +fs_initcall(pcistub_init); | |
24653 | +#endif | |
24654 | + | |
24655 | +static int __init pciback_init(void) | |
24656 | +{ | |
24657 | + int err; | |
24658 | + | |
24659 | + err = pciback_config_init(); | |
24660 | + if (err) | |
24661 | + return err; | |
24662 | + | |
24663 | +#ifdef MODULE | |
24664 | + err = pcistub_init(); | |
24665 | + if (err < 0) | |
24666 | + return err; | |
24667 | +#endif | |
24668 | + | |
24669 | + pcistub_init_devices_late(); | |
24670 | + err = pciback_xenbus_register(); | |
24671 | + if (err) | |
24672 | + pcistub_exit(); | |
24673 | + | |
24674 | + return err; | |
24675 | +} | |
24676 | + | |
24677 | +static void __exit pciback_cleanup(void) | |
24678 | +{ | |
24679 | + pciback_xenbus_unregister(); | |
24680 | + pcistub_exit(); | |
24681 | +} | |
24682 | + | |
24683 | +module_init(pciback_init); | |
24684 | +module_exit(pciback_cleanup); | |
24685 | + | |
24686 | +MODULE_LICENSE("Dual BSD/GPL"); | |
24687 | Index: head-2008-11-25/drivers/xen/pciback/pciback.h | |
24688 | =================================================================== | |
24689 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
24690 | +++ head-2008-11-25/drivers/xen/pciback/pciback.h 2008-07-21 11:00:33.000000000 +0200 | |
24691 | @@ -0,0 +1,111 @@ | |
24692 | +/* | |
24693 | + * PCI Backend Common Data Structures & Function Declarations | |
24694 | + * | |
24695 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
24696 | + */ | |
24697 | +#ifndef __XEN_PCIBACK_H__ | |
24698 | +#define __XEN_PCIBACK_H__ | |
24699 | + | |
24700 | +#include <linux/pci.h> | |
24701 | +#include <linux/interrupt.h> | |
24702 | +#include <xen/xenbus.h> | |
24703 | +#include <linux/list.h> | |
24704 | +#include <linux/spinlock.h> | |
24705 | +#include <linux/workqueue.h> | |
24706 | +#include <asm/atomic.h> | |
24707 | +#include <xen/interface/io/pciif.h> | |
24708 | + | |
24709 | +struct pci_dev_entry { | |
24710 | + struct list_head list; | |
24711 | + struct pci_dev *dev; | |
24712 | +}; | |
24713 | + | |
24714 | +#define _PDEVF_op_active (0) | |
24715 | +#define PDEVF_op_active (1<<(_PDEVF_op_active)) | |
24716 | + | |
24717 | +struct pciback_device { | |
24718 | + void *pci_dev_data; | |
24719 | + spinlock_t dev_lock; | |
24720 | + | |
24721 | + struct xenbus_device *xdev; | |
24722 | + | |
24723 | + struct xenbus_watch be_watch; | |
24724 | + u8 be_watching; | |
24725 | + | |
24726 | + int evtchn_irq; | |
24727 | + | |
24728 | + struct vm_struct *sh_area; | |
24729 | + struct xen_pci_sharedinfo *sh_info; | |
24730 | + | |
24731 | + unsigned long flags; | |
24732 | + | |
24733 | + struct work_struct op_work; | |
24734 | +}; | |
24735 | + | |
24736 | +struct pciback_dev_data { | |
24737 | + struct list_head config_fields; | |
24738 | + int permissive; | |
24739 | + int warned_on_write; | |
24740 | +}; | |
24741 | + | |
24742 | +/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ | |
24743 | +struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, | |
24744 | + int domain, int bus, | |
24745 | + int slot, int func); | |
24746 | +struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, | |
24747 | + struct pci_dev *dev); | |
24748 | +void pcistub_put_pci_dev(struct pci_dev *dev); | |
24749 | + | |
24750 | +/* Ensure a device is turned off or reset */ | |
24751 | +void pciback_reset_device(struct pci_dev *pdev); | |
24752 | + | |
24753 | +/* Access a virtual configuration space for a PCI device */ | |
24754 | +int pciback_config_init(void); | |
24755 | +int pciback_config_init_dev(struct pci_dev *dev); | |
24756 | +void pciback_config_free_dyn_fields(struct pci_dev *dev); | |
24757 | +void pciback_config_reset_dev(struct pci_dev *dev); | |
24758 | +void pciback_config_free_dev(struct pci_dev *dev); | |
24759 | +int pciback_config_read(struct pci_dev *dev, int offset, int size, | |
24760 | + u32 * ret_val); | |
24761 | +int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value); | |
24762 | + | |
24763 | +/* Handle requests for specific devices from the frontend */ | |
24764 | +typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev, | |
24765 | + unsigned int domain, unsigned int bus, | |
24766 | + unsigned int devfn, unsigned int devid); | |
24767 | +typedef int (*publish_pci_root_cb) (struct pciback_device * pdev, | |
24768 | + unsigned int domain, unsigned int bus); | |
24769 | +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, | |
24770 | + int devid, publish_pci_dev_cb publish_cb); | |
24771 | +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev); | |
24772 | +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, | |
24773 | + unsigned int domain, unsigned int bus, | |
24774 | + unsigned int devfn); | |
24775 | +int pciback_init_devices(struct pciback_device *pdev); | |
24776 | +int pciback_publish_pci_roots(struct pciback_device *pdev, | |
24777 | + publish_pci_root_cb cb); | |
24778 | +void pciback_release_devices(struct pciback_device *pdev); | |
24779 | + | |
24780 | +/* Handles events from front-end */ | |
24781 | +irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs); | |
24782 | +void pciback_do_op(void *data); | |
24783 | + | |
24784 | +int pciback_xenbus_register(void); | |
24785 | +void pciback_xenbus_unregister(void); | |
24786 | + | |
24787 | +#ifdef CONFIG_PCI_MSI | |
24788 | +int pciback_enable_msi(struct pciback_device *pdev, | |
24789 | + struct pci_dev *dev, struct xen_pci_op *op); | |
24790 | + | |
24791 | +int pciback_disable_msi(struct pciback_device *pdev, | |
24792 | + struct pci_dev *dev, struct xen_pci_op *op); | |
24793 | + | |
24794 | + | |
24795 | +int pciback_enable_msix(struct pciback_device *pdev, | |
24796 | + struct pci_dev *dev, struct xen_pci_op *op); | |
24797 | + | |
24798 | +int pciback_disable_msix(struct pciback_device *pdev, | |
24799 | + struct pci_dev *dev, struct xen_pci_op *op); | |
24800 | +#endif | |
24801 | +extern int verbose_request; | |
24802 | +#endif | |
24803 | Index: head-2008-11-25/drivers/xen/pciback/pciback_ops.c | |
24804 | =================================================================== | |
24805 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
24806 | +++ head-2008-11-25/drivers/xen/pciback/pciback_ops.c 2008-07-21 11:00:33.000000000 +0200 | |
24807 | @@ -0,0 +1,117 @@ | |
24808 | +/* | |
24809 | + * PCI Backend Operations - respond to PCI requests from Frontend | |
24810 | + * | |
24811 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
24812 | + */ | |
24813 | +#include <linux/module.h> | |
24814 | +#include <asm/bitops.h> | |
24815 | +#include <xen/evtchn.h> | |
24816 | +#include "pciback.h" | |
24817 | + | |
24818 | +int verbose_request = 0; | |
24819 | +module_param(verbose_request, int, 0644); | |
24820 | + | |
24821 | +/* Ensure a device is "turned off" and ready to be exported. | |
24822 | + * (Also see pciback_config_reset to ensure virtual configuration space is | |
24823 | + * ready to be re-exported) | |
24824 | + */ | |
24825 | +void pciback_reset_device(struct pci_dev *dev) | |
24826 | +{ | |
24827 | + u16 cmd; | |
24828 | + | |
24829 | + /* Disable devices (but not bridges) */ | |
24830 | + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | |
24831 | + pci_disable_device(dev); | |
24832 | + | |
24833 | + pci_write_config_word(dev, PCI_COMMAND, 0); | |
24834 | + | |
24835 | + dev->is_enabled = 0; | |
24836 | + dev->is_busmaster = 0; | |
24837 | + } else { | |
24838 | + pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
24839 | + if (cmd & (PCI_COMMAND_INVALIDATE)) { | |
24840 | + cmd &= ~(PCI_COMMAND_INVALIDATE); | |
24841 | + pci_write_config_word(dev, PCI_COMMAND, cmd); | |
24842 | + | |
24843 | + dev->is_busmaster = 0; | |
24844 | + } | |
24845 | + } | |
24846 | +} | |
24847 | + | |
24848 | +static inline void test_and_schedule_op(struct pciback_device *pdev) | |
24849 | +{ | |
24850 | + /* Check that frontend is requesting an operation and that we are not | |
24851 | + * already processing a request */ | |
24852 | + if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) | |
24853 | + && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) | |
24854 | + schedule_work(&pdev->op_work); | |
24855 | +} | |
24856 | + | |
24857 | +/* Performing the configuration space reads/writes must not be done in atomic | |
24858 | + * context because some of the pci_* functions can sleep (mostly due to ACPI | |
24859 | + * use of semaphores). This function is intended to be called from a work | |
24860 | + * queue in process context taking a struct pciback_device as a parameter */ | |
24861 | +void pciback_do_op(void *data) | |
24862 | +{ | |
24863 | + struct pciback_device *pdev = data; | |
24864 | + struct pci_dev *dev; | |
24865 | + struct xen_pci_op *op = &pdev->sh_info->op; | |
24866 | + | |
24867 | + dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn); | |
24868 | + | |
24869 | + if (dev == NULL) | |
24870 | + op->err = XEN_PCI_ERR_dev_not_found; | |
24871 | + else | |
24872 | + { | |
24873 | + switch (op->cmd) | |
24874 | + { | |
24875 | + case XEN_PCI_OP_conf_read: | |
24876 | + op->err = pciback_config_read(dev, | |
24877 | + op->offset, op->size, &op->value); | |
24878 | + break; | |
24879 | + case XEN_PCI_OP_conf_write: | |
24880 | + op->err = pciback_config_write(dev, | |
24881 | + op->offset, op->size, op->value); | |
24882 | + break; | |
24883 | +#ifdef CONFIG_PCI_MSI | |
24884 | + case XEN_PCI_OP_enable_msi: | |
24885 | + op->err = pciback_enable_msi(pdev, dev, op); | |
24886 | + break; | |
24887 | + case XEN_PCI_OP_disable_msi: | |
24888 | + op->err = pciback_disable_msi(pdev, dev, op); | |
24889 | + break; | |
24890 | + case XEN_PCI_OP_enable_msix: | |
24891 | + op->err = pciback_enable_msix(pdev, dev, op); | |
24892 | + break; | |
24893 | + case XEN_PCI_OP_disable_msix: | |
24894 | + op->err = pciback_disable_msix(pdev, dev, op); | |
24895 | + break; | |
24896 | +#endif | |
24897 | + default: | |
24898 | + op->err = XEN_PCI_ERR_not_implemented; | |
24899 | + break; | |
24900 | + } | |
24901 | + } | |
24902 | + /* Tell the driver domain that we're done. */ | |
24903 | + wmb(); | |
24904 | + clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
24905 | + notify_remote_via_irq(pdev->evtchn_irq); | |
24906 | + | |
24907 | + /* Mark that we're done. */ | |
24908 | + smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ | |
24909 | + clear_bit(_PDEVF_op_active, &pdev->flags); | |
24910 | + smp_mb__after_clear_bit(); /* /before/ final check for work */ | |
24911 | + | |
24912 | + /* Check to see if the driver domain tried to start another request in | |
24913 | + * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. */ | |
24914 | + test_and_schedule_op(pdev); | |
24915 | +} | |
24916 | + | |
24917 | +irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs) | |
24918 | +{ | |
24919 | + struct pciback_device *pdev = dev_id; | |
24920 | + | |
24921 | + test_and_schedule_op(pdev); | |
24922 | + | |
24923 | + return IRQ_HANDLED; | |
24924 | +} | |
24925 | Index: head-2008-11-25/drivers/xen/pciback/slot.c | |
24926 | =================================================================== | |
24927 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
24928 | +++ head-2008-11-25/drivers/xen/pciback/slot.c 2008-02-26 10:54:11.000000000 +0100 | |
24929 | @@ -0,0 +1,157 @@ | |
24930 | +/* | |
24931 | + * PCI Backend - Provides a Virtual PCI bus (with real devices) | |
24932 | + * to the frontend | |
24933 | + * | |
24934 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c) | |
24935 | + * Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c | |
24936 | + */ | |
24937 | + | |
24938 | +#include <linux/list.h> | |
24939 | +#include <linux/slab.h> | |
24940 | +#include <linux/pci.h> | |
24941 | +#include <linux/spinlock.h> | |
24942 | +#include "pciback.h" | |
24943 | + | |
24944 | +/* There are at most 32 slots in a pci bus. */ | |
24945 | +#define PCI_SLOT_MAX 32 | |
24946 | + | |
24947 | +#define PCI_BUS_NBR 2 | |
24948 | + | |
24949 | +struct slot_dev_data { | |
24950 | + /* Access to dev_list must be protected by lock */ | |
24951 | + struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX]; | |
24952 | + spinlock_t lock; | |
24953 | +}; | |
24954 | + | |
24955 | +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, | |
24956 | + unsigned int domain, unsigned int bus, | |
24957 | + unsigned int devfn) | |
24958 | +{ | |
24959 | + struct pci_dev *dev = NULL; | |
24960 | + struct slot_dev_data *slot_dev = pdev->pci_dev_data; | |
24961 | + unsigned long flags; | |
24962 | + | |
24963 | + if (domain != 0 || PCI_FUNC(devfn) != 0) | |
24964 | + return NULL; | |
24965 | + | |
24966 | + if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR) | |
24967 | + return NULL; | |
24968 | + | |
24969 | + spin_lock_irqsave(&slot_dev->lock, flags); | |
24970 | + dev = slot_dev->slots[bus][PCI_SLOT(devfn)]; | |
24971 | + spin_unlock_irqrestore(&slot_dev->lock, flags); | |
24972 | + | |
24973 | + return dev; | |
24974 | +} | |
24975 | + | |
24976 | +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, | |
24977 | + int devid, publish_pci_dev_cb publish_cb) | |
24978 | +{ | |
24979 | + int err = 0, slot, bus; | |
24980 | + struct slot_dev_data *slot_dev = pdev->pci_dev_data; | |
24981 | + unsigned long flags; | |
24982 | + | |
24983 | + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { | |
24984 | + err = -EFAULT; | |
24985 | + xenbus_dev_fatal(pdev->xdev, err, | |
24986 | + "Can't export bridges on the virtual PCI bus"); | |
24987 | + goto out; | |
24988 | + } | |
24989 | + | |
24990 | + spin_lock_irqsave(&slot_dev->lock, flags); | |
24991 | + | |
24992 | + /* Assign to a new slot on the virtual PCI bus */ | |
24993 | + for (bus = 0; bus < PCI_BUS_NBR; bus++) | |
24994 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
24995 | + if (slot_dev->slots[bus][slot] == NULL) { | |
24996 | + printk(KERN_INFO | |
24997 | + "pciback: slot: %s: assign to virtual slot %d, bus %d\n", | |
24998 | + pci_name(dev), slot, bus); | |
24999 | + slot_dev->slots[bus][slot] = dev; | |
25000 | + goto unlock; | |
25001 | + } | |
25002 | + } | |
25003 | + | |
25004 | + err = -ENOMEM; | |
25005 | + xenbus_dev_fatal(pdev->xdev, err, | |
25006 | + "No more space on root virtual PCI bus"); | |
25007 | + | |
25008 | + unlock: | |
25009 | + spin_unlock_irqrestore(&slot_dev->lock, flags); | |
25010 | + | |
25011 | + /* Publish this device. */ | |
25012 | + if(!err) | |
25013 | + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid); | |
25014 | + | |
25015 | + out: | |
25016 | + return err; | |
25017 | +} | |
25018 | + | |
25019 | +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) | |
25020 | +{ | |
25021 | + int slot, bus; | |
25022 | + struct slot_dev_data *slot_dev = pdev->pci_dev_data; | |
25023 | + struct pci_dev *found_dev = NULL; | |
25024 | + unsigned long flags; | |
25025 | + | |
25026 | + spin_lock_irqsave(&slot_dev->lock, flags); | |
25027 | + | |
25028 | + for (bus = 0; bus < PCI_BUS_NBR; bus++) | |
25029 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
25030 | + if (slot_dev->slots[bus][slot] == dev) { | |
25031 | + slot_dev->slots[bus][slot] = NULL; | |
25032 | + found_dev = dev; | |
25033 | + goto out; | |
25034 | + } | |
25035 | + } | |
25036 | + | |
25037 | + out: | |
25038 | + spin_unlock_irqrestore(&slot_dev->lock, flags); | |
25039 | + | |
25040 | + if (found_dev) | |
25041 | + pcistub_put_pci_dev(found_dev); | |
25042 | +} | |
25043 | + | |
25044 | +int pciback_init_devices(struct pciback_device *pdev) | |
25045 | +{ | |
25046 | + int slot, bus; | |
25047 | + struct slot_dev_data *slot_dev; | |
25048 | + | |
25049 | + slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL); | |
25050 | + if (!slot_dev) | |
25051 | + return -ENOMEM; | |
25052 | + | |
25053 | + spin_lock_init(&slot_dev->lock); | |
25054 | + | |
25055 | + for (bus = 0; bus < PCI_BUS_NBR; bus++) | |
25056 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) | |
25057 | + slot_dev->slots[bus][slot] = NULL; | |
25058 | + | |
25059 | + pdev->pci_dev_data = slot_dev; | |
25060 | + | |
25061 | + return 0; | |
25062 | +} | |
25063 | + | |
25064 | +int pciback_publish_pci_roots(struct pciback_device *pdev, | |
25065 | + publish_pci_root_cb publish_cb) | |
25066 | +{ | |
25067 | + /* The Virtual PCI bus has only one root */ | |
25068 | + return publish_cb(pdev, 0, 0); | |
25069 | +} | |
25070 | + | |
25071 | +void pciback_release_devices(struct pciback_device *pdev) | |
25072 | +{ | |
25073 | + int slot, bus; | |
25074 | + struct slot_dev_data *slot_dev = pdev->pci_dev_data; | |
25075 | + struct pci_dev *dev; | |
25076 | + | |
25077 | + for (bus = 0; bus < PCI_BUS_NBR; bus++) | |
25078 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
25079 | + dev = slot_dev->slots[bus][slot]; | |
25080 | + if (dev != NULL) | |
25081 | + pcistub_put_pci_dev(dev); | |
25082 | + } | |
25083 | + | |
25084 | + kfree(slot_dev); | |
25085 | + pdev->pci_dev_data = NULL; | |
25086 | +} | |
25087 | Index: head-2008-11-25/drivers/xen/pciback/vpci.c | |
25088 | =================================================================== | |
25089 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
25090 | +++ head-2008-11-25/drivers/xen/pciback/vpci.c 2008-02-26 10:54:11.000000000 +0100 | |
25091 | @@ -0,0 +1,212 @@ | |
25092 | +/* | |
25093 | + * PCI Backend - Provides a Virtual PCI bus (with real devices) | |
25094 | + * to the frontend | |
25095 | + * | |
25096 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
25097 | + */ | |
25098 | + | |
25099 | +#include <linux/list.h> | |
25100 | +#include <linux/slab.h> | |
25101 | +#include <linux/pci.h> | |
25102 | +#include <linux/spinlock.h> | |
25103 | +#include "pciback.h" | |
25104 | + | |
25105 | +#define PCI_SLOT_MAX 32 | |
25106 | + | |
25107 | +struct vpci_dev_data { | |
25108 | + /* Access to dev_list must be protected by lock */ | |
25109 | + struct list_head dev_list[PCI_SLOT_MAX]; | |
25110 | + spinlock_t lock; | |
25111 | +}; | |
25112 | + | |
25113 | +static inline struct list_head *list_first(struct list_head *head) | |
25114 | +{ | |
25115 | + return head->next; | |
25116 | +} | |
25117 | + | |
25118 | +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, | |
25119 | + unsigned int domain, unsigned int bus, | |
25120 | + unsigned int devfn) | |
25121 | +{ | |
25122 | + struct pci_dev_entry *entry; | |
25123 | + struct pci_dev *dev = NULL; | |
25124 | + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | |
25125 | + unsigned long flags; | |
25126 | + | |
25127 | + if (domain != 0 || bus != 0) | |
25128 | + return NULL; | |
25129 | + | |
25130 | + if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { | |
25131 | + spin_lock_irqsave(&vpci_dev->lock, flags); | |
25132 | + | |
25133 | + list_for_each_entry(entry, | |
25134 | + &vpci_dev->dev_list[PCI_SLOT(devfn)], | |
25135 | + list) { | |
25136 | + if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) { | |
25137 | + dev = entry->dev; | |
25138 | + break; | |
25139 | + } | |
25140 | + } | |
25141 | + | |
25142 | + spin_unlock_irqrestore(&vpci_dev->lock, flags); | |
25143 | + } | |
25144 | + return dev; | |
25145 | +} | |
25146 | + | |
25147 | +static inline int match_slot(struct pci_dev *l, struct pci_dev *r) | |
25148 | +{ | |
25149 | + if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus) | |
25150 | + && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn)) | |
25151 | + return 1; | |
25152 | + | |
25153 | + return 0; | |
25154 | +} | |
25155 | + | |
25156 | +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, | |
25157 | + int devid, publish_pci_dev_cb publish_cb) | |
25158 | +{ | |
25159 | + int err = 0, slot, func; | |
25160 | + struct pci_dev_entry *t, *dev_entry; | |
25161 | + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | |
25162 | + unsigned long flags; | |
25163 | + | |
25164 | + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { | |
25165 | + err = -EFAULT; | |
25166 | + xenbus_dev_fatal(pdev->xdev, err, | |
25167 | + "Can't export bridges on the virtual PCI bus"); | |
25168 | + goto out; | |
25169 | + } | |
25170 | + | |
25171 | + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); | |
25172 | + if (!dev_entry) { | |
25173 | + err = -ENOMEM; | |
25174 | + xenbus_dev_fatal(pdev->xdev, err, | |
25175 | + "Error adding entry to virtual PCI bus"); | |
25176 | + goto out; | |
25177 | + } | |
25178 | + | |
25179 | + dev_entry->dev = dev; | |
25180 | + | |
25181 | + spin_lock_irqsave(&vpci_dev->lock, flags); | |
25182 | + | |
25183 | + /* Keep multi-function devices together on the virtual PCI bus */ | |
25184 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
25185 | + if (!list_empty(&vpci_dev->dev_list[slot])) { | |
25186 | + t = list_entry(list_first(&vpci_dev->dev_list[slot]), | |
25187 | + struct pci_dev_entry, list); | |
25188 | + | |
25189 | + if (match_slot(dev, t->dev)) { | |
25190 | + pr_info("pciback: vpci: %s: " | |
25191 | + "assign to virtual slot %d func %d\n", | |
25192 | + pci_name(dev), slot, | |
25193 | + PCI_FUNC(dev->devfn)); | |
25194 | + list_add_tail(&dev_entry->list, | |
25195 | + &vpci_dev->dev_list[slot]); | |
25196 | + func = PCI_FUNC(dev->devfn); | |
25197 | + goto unlock; | |
25198 | + } | |
25199 | + } | |
25200 | + } | |
25201 | + | |
25202 | + /* Assign to a new slot on the virtual PCI bus */ | |
25203 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
25204 | + if (list_empty(&vpci_dev->dev_list[slot])) { | |
25205 | + printk(KERN_INFO | |
25206 | + "pciback: vpci: %s: assign to virtual slot %d\n", | |
25207 | + pci_name(dev), slot); | |
25208 | + list_add_tail(&dev_entry->list, | |
25209 | + &vpci_dev->dev_list[slot]); | |
25210 | + func = PCI_FUNC(dev->devfn); | |
25211 | + goto unlock; | |
25212 | + } | |
25213 | + } | |
25214 | + | |
25215 | + err = -ENOMEM; | |
25216 | + xenbus_dev_fatal(pdev->xdev, err, | |
25217 | + "No more space on root virtual PCI bus"); | |
25218 | + | |
25219 | + unlock: | |
25220 | + spin_unlock_irqrestore(&vpci_dev->lock, flags); | |
25221 | + | |
25222 | + /* Publish this device. */ | |
25223 | + if(!err) | |
25224 | + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); | |
25225 | + | |
25226 | + out: | |
25227 | + return err; | |
25228 | +} | |
25229 | + | |
25230 | +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) | |
25231 | +{ | |
25232 | + int slot; | |
25233 | + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | |
25234 | + struct pci_dev *found_dev = NULL; | |
25235 | + unsigned long flags; | |
25236 | + | |
25237 | + spin_lock_irqsave(&vpci_dev->lock, flags); | |
25238 | + | |
25239 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
25240 | + struct pci_dev_entry *e, *tmp; | |
25241 | + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], | |
25242 | + list) { | |
25243 | + if (e->dev == dev) { | |
25244 | + list_del(&e->list); | |
25245 | + found_dev = e->dev; | |
25246 | + kfree(e); | |
25247 | + goto out; | |
25248 | + } | |
25249 | + } | |
25250 | + } | |
25251 | + | |
25252 | + out: | |
25253 | + spin_unlock_irqrestore(&vpci_dev->lock, flags); | |
25254 | + | |
25255 | + if (found_dev) | |
25256 | + pcistub_put_pci_dev(found_dev); | |
25257 | +} | |
25258 | + | |
25259 | +int pciback_init_devices(struct pciback_device *pdev) | |
25260 | +{ | |
25261 | + int slot; | |
25262 | + struct vpci_dev_data *vpci_dev; | |
25263 | + | |
25264 | + vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL); | |
25265 | + if (!vpci_dev) | |
25266 | + return -ENOMEM; | |
25267 | + | |
25268 | + spin_lock_init(&vpci_dev->lock); | |
25269 | + | |
25270 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
25271 | + INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); | |
25272 | + } | |
25273 | + | |
25274 | + pdev->pci_dev_data = vpci_dev; | |
25275 | + | |
25276 | + return 0; | |
25277 | +} | |
25278 | + | |
25279 | +int pciback_publish_pci_roots(struct pciback_device *pdev, | |
25280 | + publish_pci_root_cb publish_cb) | |
25281 | +{ | |
25282 | + /* The Virtual PCI bus has only one root */ | |
25283 | + return publish_cb(pdev, 0, 0); | |
25284 | +} | |
25285 | + | |
25286 | +void pciback_release_devices(struct pciback_device *pdev) | |
25287 | +{ | |
25288 | + int slot; | |
25289 | + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; | |
25290 | + | |
25291 | + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { | |
25292 | + struct pci_dev_entry *e, *tmp; | |
25293 | + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], | |
25294 | + list) { | |
25295 | + list_del(&e->list); | |
25296 | + pcistub_put_pci_dev(e->dev); | |
25297 | + kfree(e); | |
25298 | + } | |
25299 | + } | |
25300 | + | |
25301 | + kfree(vpci_dev); | |
25302 | + pdev->pci_dev_data = NULL; | |
25303 | +} | |
25304 | Index: head-2008-11-25/drivers/xen/pciback/xenbus.c | |
25305 | =================================================================== | |
25306 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
25307 | +++ head-2008-11-25/drivers/xen/pciback/xenbus.c 2008-07-21 11:00:33.000000000 +0200 | |
25308 | @@ -0,0 +1,704 @@ | |
25309 | +/* | |
25310 | + * PCI Backend Xenbus Setup - handles setup with frontend and xend | |
25311 | + * | |
25312 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
25313 | + */ | |
25314 | +#include <linux/module.h> | |
25315 | +#include <linux/init.h> | |
25316 | +#include <linux/list.h> | |
25317 | +#include <linux/vmalloc.h> | |
25318 | +#include <xen/xenbus.h> | |
25319 | +#include <xen/evtchn.h> | |
25320 | +#include "pciback.h" | |
25321 | + | |
25322 | +#define INVALID_EVTCHN_IRQ (-1) | |
25323 | + | |
25324 | +static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) | |
25325 | +{ | |
25326 | + struct pciback_device *pdev; | |
25327 | + | |
25328 | + pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); | |
25329 | + if (pdev == NULL) | |
25330 | + goto out; | |
25331 | + dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); | |
25332 | + | |
25333 | + pdev->xdev = xdev; | |
25334 | + xdev->dev.driver_data = pdev; | |
25335 | + | |
25336 | + spin_lock_init(&pdev->dev_lock); | |
25337 | + | |
25338 | + pdev->sh_area = NULL; | |
25339 | + pdev->sh_info = NULL; | |
25340 | + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; | |
25341 | + pdev->be_watching = 0; | |
25342 | + | |
25343 | + INIT_WORK(&pdev->op_work, pciback_do_op, pdev); | |
25344 | + | |
25345 | + if (pciback_init_devices(pdev)) { | |
25346 | + kfree(pdev); | |
25347 | + pdev = NULL; | |
25348 | + } | |
25349 | + out: | |
25350 | + return pdev; | |
25351 | +} | |
25352 | + | |
25353 | +static void pciback_disconnect(struct pciback_device *pdev) | |
25354 | +{ | |
25355 | + spin_lock(&pdev->dev_lock); | |
25356 | + | |
25357 | + /* Ensure the guest can't trigger our handler before removing devices */ | |
25358 | + if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { | |
25359 | + unbind_from_irqhandler(pdev->evtchn_irq, pdev); | |
25360 | + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; | |
25361 | + } | |
25362 | + | |
25363 | + /* If the driver domain started an op, make sure we complete it or | |
25364 | + * delete it before releasing the shared memory */ | |
25365 | + cancel_delayed_work(&pdev->op_work); | |
25366 | + flush_scheduled_work(); | |
25367 | + | |
25368 | + if (pdev->sh_info != NULL) { | |
25369 | + xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area); | |
25370 | + pdev->sh_info = NULL; | |
25371 | + } | |
25372 | + | |
25373 | + spin_unlock(&pdev->dev_lock); | |
25374 | +} | |
25375 | + | |
25376 | +static void free_pdev(struct pciback_device *pdev) | |
25377 | +{ | |
25378 | + if (pdev->be_watching) | |
25379 | + unregister_xenbus_watch(&pdev->be_watch); | |
25380 | + | |
25381 | + pciback_disconnect(pdev); | |
25382 | + | |
25383 | + pciback_release_devices(pdev); | |
25384 | + | |
25385 | + pdev->xdev->dev.driver_data = NULL; | |
25386 | + pdev->xdev = NULL; | |
25387 | + | |
25388 | + kfree(pdev); | |
25389 | +} | |
25390 | + | |
25391 | +static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, | |
25392 | + int remote_evtchn) | |
25393 | +{ | |
25394 | + int err = 0; | |
25395 | + struct vm_struct *area; | |
25396 | + | |
25397 | + dev_dbg(&pdev->xdev->dev, | |
25398 | + "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", | |
25399 | + gnt_ref, remote_evtchn); | |
25400 | + | |
25401 | + area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref); | |
25402 | + if (IS_ERR(area)) { | |
25403 | + err = PTR_ERR(area); | |
25404 | + goto out; | |
25405 | + } | |
25406 | + pdev->sh_area = area; | |
25407 | + pdev->sh_info = area->addr; | |
25408 | + | |
25409 | + err = bind_interdomain_evtchn_to_irqhandler( | |
25410 | + pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, | |
25411 | + SA_SAMPLE_RANDOM, "pciback", pdev); | |
25412 | + if (err < 0) { | |
25413 | + xenbus_dev_fatal(pdev->xdev, err, | |
25414 | + "Error binding event channel to IRQ"); | |
25415 | + goto out; | |
25416 | + } | |
25417 | + pdev->evtchn_irq = err; | |
25418 | + err = 0; | |
25419 | + | |
25420 | + dev_dbg(&pdev->xdev->dev, "Attached!\n"); | |
25421 | + out: | |
25422 | + return err; | |
25423 | +} | |
25424 | + | |
25425 | +static int pciback_attach(struct pciback_device *pdev) | |
25426 | +{ | |
25427 | + int err = 0; | |
25428 | + int gnt_ref, remote_evtchn; | |
25429 | + char *magic = NULL; | |
25430 | + | |
25431 | + spin_lock(&pdev->dev_lock); | |
25432 | + | |
25433 | + /* Make sure we only do this setup once */ | |
25434 | + if (xenbus_read_driver_state(pdev->xdev->nodename) != | |
25435 | + XenbusStateInitialised) | |
25436 | + goto out; | |
25437 | + | |
25438 | + /* Wait for frontend to state that it has published the configuration */ | |
25439 | + if (xenbus_read_driver_state(pdev->xdev->otherend) != | |
25440 | + XenbusStateInitialised) | |
25441 | + goto out; | |
25442 | + | |
25443 | + dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); | |
25444 | + | |
25445 | + err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, | |
25446 | + "pci-op-ref", "%u", &gnt_ref, | |
25447 | + "event-channel", "%u", &remote_evtchn, | |
25448 | + "magic", NULL, &magic, NULL); | |
25449 | + if (err) { | |
25450 | + /* If configuration didn't get read correctly, wait longer */ | |
25451 | + xenbus_dev_fatal(pdev->xdev, err, | |
25452 | + "Error reading configuration from frontend"); | |
25453 | + goto out; | |
25454 | + } | |
25455 | + | |
25456 | + if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { | |
25457 | + xenbus_dev_fatal(pdev->xdev, -EFAULT, | |
25458 | + "version mismatch (%s/%s) with pcifront - " | |
25459 | + "halting pciback", | |
25460 | + magic, XEN_PCI_MAGIC); | |
25461 | + goto out; | |
25462 | + } | |
25463 | + | |
25464 | + err = pciback_do_attach(pdev, gnt_ref, remote_evtchn); | |
25465 | + if (err) | |
25466 | + goto out; | |
25467 | + | |
25468 | + dev_dbg(&pdev->xdev->dev, "Connecting...\n"); | |
25469 | + | |
25470 | + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); | |
25471 | + if (err) | |
25472 | + xenbus_dev_fatal(pdev->xdev, err, | |
25473 | + "Error switching to connected state!"); | |
25474 | + | |
25475 | + dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); | |
25476 | + out: | |
25477 | + spin_unlock(&pdev->dev_lock); | |
25478 | + | |
25479 | + if (magic) | |
25480 | + kfree(magic); | |
25481 | + | |
25482 | + return err; | |
25483 | +} | |
25484 | + | |
25485 | +static int pciback_publish_pci_dev(struct pciback_device *pdev, | |
25486 | + unsigned int domain, unsigned int bus, | |
25487 | + unsigned int devfn, unsigned int devid) | |
25488 | +{ | |
25489 | + int err; | |
25490 | + int len; | |
25491 | + char str[64]; | |
25492 | + | |
25493 | + len = snprintf(str, sizeof(str), "vdev-%d", devid); | |
25494 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
25495 | + err = -ENOMEM; | |
25496 | + goto out; | |
25497 | + } | |
25498 | + | |
25499 | + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, | |
25500 | + "%04x:%02x:%02x.%02x", domain, bus, | |
25501 | + PCI_SLOT(devfn), PCI_FUNC(devfn)); | |
25502 | + | |
25503 | + out: | |
25504 | + return err; | |
25505 | +} | |
25506 | + | |
25507 | +static int pciback_export_device(struct pciback_device *pdev, | |
25508 | + int domain, int bus, int slot, int func, | |
25509 | + int devid) | |
25510 | +{ | |
25511 | + struct pci_dev *dev; | |
25512 | + int err = 0; | |
25513 | + | |
25514 | + dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", | |
25515 | + domain, bus, slot, func); | |
25516 | + | |
25517 | + dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); | |
25518 | + if (!dev) { | |
25519 | + err = -EINVAL; | |
25520 | + xenbus_dev_fatal(pdev->xdev, err, | |
25521 | + "Couldn't locate PCI device " | |
25522 | + "(%04x:%02x:%02x.%01x)! " | |
25523 | + "perhaps already in-use?", | |
25524 | + domain, bus, slot, func); | |
25525 | + goto out; | |
25526 | + } | |
25527 | + | |
25528 | + err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev); | |
25529 | + if (err) | |
25530 | + goto out; | |
25531 | + | |
25532 | + /* TODO: It'd be nice to export a bridge and have all of its children | |
25533 | + * get exported with it. This may be best done in xend (which will | |
25534 | + * have to calculate resource usage anyway) but we probably want to | |
25535 | + * put something in here to ensure that if a bridge gets given to a | |
25536 | + * driver domain, that all devices under that bridge are not given | |
25537 | + * to other driver domains (as he who controls the bridge can disable | |
25538 | + * it and stop the other devices from working). | |
25539 | + */ | |
25540 | + out: | |
25541 | + return err; | |
25542 | +} | |
25543 | + | |
25544 | +static int pciback_remove_device(struct pciback_device *pdev, | |
25545 | + int domain, int bus, int slot, int func) | |
25546 | +{ | |
25547 | + int err = 0; | |
25548 | + struct pci_dev *dev; | |
25549 | + | |
25550 | + dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", | |
25551 | + domain, bus, slot, func); | |
25552 | + | |
25553 | + dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); | |
25554 | + if (!dev) { | |
25555 | + err = -EINVAL; | |
25556 | + dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " | |
25557 | + "(%04x:%02x:%02x.%01x)! not owned by this domain\n", | |
25558 | + domain, bus, slot, func); | |
25559 | + goto out; | |
25560 | + } | |
25561 | + | |
25562 | + pciback_release_pci_dev(pdev, dev); | |
25563 | + | |
25564 | + out: | |
25565 | + return err; | |
25566 | +} | |
25567 | + | |
25568 | +static int pciback_publish_pci_root(struct pciback_device *pdev, | |
25569 | + unsigned int domain, unsigned int bus) | |
25570 | +{ | |
25571 | + unsigned int d, b; | |
25572 | + int i, root_num, len, err; | |
25573 | + char str[64]; | |
25574 | + | |
25575 | + dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); | |
25576 | + | |
25577 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | |
25578 | + "root_num", "%d", &root_num); | |
25579 | + if (err == 0 || err == -ENOENT) | |
25580 | + root_num = 0; | |
25581 | + else if (err < 0) | |
25582 | + goto out; | |
25583 | + | |
25584 | + /* Verify that we haven't already published this pci root */ | |
25585 | + for (i = 0; i < root_num; i++) { | |
25586 | + len = snprintf(str, sizeof(str), "root-%d", i); | |
25587 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
25588 | + err = -ENOMEM; | |
25589 | + goto out; | |
25590 | + } | |
25591 | + | |
25592 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | |
25593 | + str, "%x:%x", &d, &b); | |
25594 | + if (err < 0) | |
25595 | + goto out; | |
25596 | + if (err != 2) { | |
25597 | + err = -EINVAL; | |
25598 | + goto out; | |
25599 | + } | |
25600 | + | |
25601 | + if (d == domain && b == bus) { | |
25602 | + err = 0; | |
25603 | + goto out; | |
25604 | + } | |
25605 | + } | |
25606 | + | |
25607 | + len = snprintf(str, sizeof(str), "root-%d", root_num); | |
25608 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
25609 | + err = -ENOMEM; | |
25610 | + goto out; | |
25611 | + } | |
25612 | + | |
25613 | + dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", | |
25614 | + root_num, domain, bus); | |
25615 | + | |
25616 | + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, | |
25617 | + "%04x:%02x", domain, bus); | |
25618 | + if (err) | |
25619 | + goto out; | |
25620 | + | |
25621 | + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, | |
25622 | + "root_num", "%d", (root_num + 1)); | |
25623 | + | |
25624 | + out: | |
25625 | + return err; | |
25626 | +} | |
25627 | + | |
25628 | +static int pciback_reconfigure(struct pciback_device *pdev) | |
25629 | +{ | |
25630 | + int err = 0; | |
25631 | + int num_devs; | |
25632 | + int domain, bus, slot, func; | |
25633 | + int substate; | |
25634 | + int i, len; | |
25635 | + char state_str[64]; | |
25636 | + char dev_str[64]; | |
25637 | + | |
25638 | + spin_lock(&pdev->dev_lock); | |
25639 | + | |
25640 | + dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); | |
25641 | + | |
25642 | + /* Make sure we only reconfigure once */ | |
25643 | + if (xenbus_read_driver_state(pdev->xdev->nodename) != | |
25644 | + XenbusStateReconfiguring) | |
25645 | + goto out; | |
25646 | + | |
25647 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", | |
25648 | + &num_devs); | |
25649 | + if (err != 1) { | |
25650 | + if (err >= 0) | |
25651 | + err = -EINVAL; | |
25652 | + xenbus_dev_fatal(pdev->xdev, err, | |
25653 | + "Error reading number of devices"); | |
25654 | + goto out; | |
25655 | + } | |
25656 | + | |
25657 | + for (i = 0; i < num_devs; i++) { | |
25658 | + len = snprintf(state_str, sizeof(state_str), "state-%d", i); | |
25659 | + if (unlikely(len >= (sizeof(state_str) - 1))) { | |
25660 | + err = -ENOMEM; | |
25661 | + xenbus_dev_fatal(pdev->xdev, err, | |
25662 | + "String overflow while reading " | |
25663 | + "configuration"); | |
25664 | + goto out; | |
25665 | + } | |
25666 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, | |
25667 | + "%d", &substate); | |
25668 | + if (err != 1) | |
25669 | + substate = XenbusStateUnknown; | |
25670 | + | |
25671 | + switch (substate) { | |
25672 | + case XenbusStateInitialising: | |
25673 | + dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); | |
25674 | + | |
25675 | + len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); | |
25676 | + if (unlikely(len >= (sizeof(dev_str) - 1))) { | |
25677 | + err = -ENOMEM; | |
25678 | + xenbus_dev_fatal(pdev->xdev, err, | |
25679 | + "String overflow while " | |
25680 | + "reading configuration"); | |
25681 | + goto out; | |
25682 | + } | |
25683 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | |
25684 | + dev_str, "%x:%x:%x.%x", | |
25685 | + &domain, &bus, &slot, &func); | |
25686 | + if (err < 0) { | |
25687 | + xenbus_dev_fatal(pdev->xdev, err, | |
25688 | + "Error reading device " | |
25689 | + "configuration"); | |
25690 | + goto out; | |
25691 | + } | |
25692 | + if (err != 4) { | |
25693 | + err = -EINVAL; | |
25694 | + xenbus_dev_fatal(pdev->xdev, err, | |
25695 | + "Error parsing pci device " | |
25696 | + "configuration"); | |
25697 | + goto out; | |
25698 | + } | |
25699 | + | |
25700 | + err = pciback_export_device(pdev, domain, bus, slot, | |
25701 | + func, i); | |
25702 | + if (err) | |
25703 | + goto out; | |
25704 | + | |
25705 | + /* Publish pci roots. */ | |
25706 | + err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); | |
25707 | + if (err) { | |
25708 | + xenbus_dev_fatal(pdev->xdev, err, | |
25709 | + "Error while publish PCI root" | |
25710 | + "buses for frontend"); | |
25711 | + goto out; | |
25712 | + } | |
25713 | + | |
25714 | + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, | |
25715 | + state_str, "%d", | |
25716 | + XenbusStateInitialised); | |
25717 | + if (err) { | |
25718 | + xenbus_dev_fatal(pdev->xdev, err, | |
25719 | + "Error switching substate of " | |
25720 | + "dev-%d\n", i); | |
25721 | + goto out; | |
25722 | + } | |
25723 | + break; | |
25724 | + | |
25725 | + case XenbusStateClosing: | |
25726 | + dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); | |
25727 | + | |
25728 | + len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); | |
25729 | + if (unlikely(len >= (sizeof(dev_str) - 1))) { | |
25730 | + err = -ENOMEM; | |
25731 | + xenbus_dev_fatal(pdev->xdev, err, | |
25732 | + "String overflow while " | |
25733 | + "reading configuration"); | |
25734 | + goto out; | |
25735 | + } | |
25736 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, | |
25737 | + dev_str, "%x:%x:%x.%x", | |
25738 | + &domain, &bus, &slot, &func); | |
25739 | + if (err < 0) { | |
25740 | + xenbus_dev_fatal(pdev->xdev, err, | |
25741 | + "Error reading device " | |
25742 | + "configuration"); | |
25743 | + goto out; | |
25744 | + } | |
25745 | + if (err != 4) { | |
25746 | + err = -EINVAL; | |
25747 | + xenbus_dev_fatal(pdev->xdev, err, | |
25748 | + "Error parsing pci device " | |
25749 | + "configuration"); | |
25750 | + goto out; | |
25751 | + } | |
25752 | + | |
25753 | + err = pciback_remove_device(pdev, domain, bus, slot, | |
25754 | + func); | |
25755 | + if(err) | |
25756 | + goto out; | |
25757 | + | |
25758 | + /* TODO: If at some point we implement support for pci | |
25759 | + * root hot-remove on pcifront side, we'll need to | |
25760 | + * remove unnecessary xenstore nodes of pci roots here. | |
25761 | + */ | |
25762 | + | |
25763 | + break; | |
25764 | + | |
25765 | + default: | |
25766 | + break; | |
25767 | + } | |
25768 | + } | |
25769 | + | |
25770 | + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); | |
25771 | + if (err) { | |
25772 | + xenbus_dev_fatal(pdev->xdev, err, | |
25773 | + "Error switching to reconfigured state!"); | |
25774 | + goto out; | |
25775 | + } | |
25776 | + | |
25777 | + out: | |
25778 | + spin_unlock(&pdev->dev_lock); | |
25779 | + | |
25780 | + return 0; | |
25781 | +} | |
25782 | + | |
25783 | +static void pciback_frontend_changed(struct xenbus_device *xdev, | |
25784 | + enum xenbus_state fe_state) | |
25785 | +{ | |
25786 | + struct pciback_device *pdev = xdev->dev.driver_data; | |
25787 | + | |
25788 | + dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); | |
25789 | + | |
25790 | + switch (fe_state) { | |
25791 | + case XenbusStateInitialised: | |
25792 | + pciback_attach(pdev); | |
25793 | + break; | |
25794 | + | |
25795 | + case XenbusStateReconfiguring: | |
25796 | + pciback_reconfigure(pdev); | |
25797 | + break; | |
25798 | + | |
25799 | + case XenbusStateConnected: | |
25800 | + /* pcifront switched its state from reconfiguring to connected. | |
25801 | + * Then switch to connected state. | |
25802 | + */ | |
25803 | + xenbus_switch_state(xdev, XenbusStateConnected); | |
25804 | + break; | |
25805 | + | |
25806 | + case XenbusStateClosing: | |
25807 | + pciback_disconnect(pdev); | |
25808 | + xenbus_switch_state(xdev, XenbusStateClosing); | |
25809 | + break; | |
25810 | + | |
25811 | + case XenbusStateClosed: | |
25812 | + pciback_disconnect(pdev); | |
25813 | + xenbus_switch_state(xdev, XenbusStateClosed); | |
25814 | + if (xenbus_dev_is_online(xdev)) | |
25815 | + break; | |
25816 | + /* fall through if not online */ | |
25817 | + case XenbusStateUnknown: | |
25818 | + dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); | |
25819 | + device_unregister(&xdev->dev); | |
25820 | + break; | |
25821 | + | |
25822 | + default: | |
25823 | + break; | |
25824 | + } | |
25825 | +} | |
25826 | + | |
25827 | +static int pciback_setup_backend(struct pciback_device *pdev) | |
25828 | +{ | |
25829 | + /* Get configuration from xend (if available now) */ | |
25830 | + int domain, bus, slot, func; | |
25831 | + int err = 0; | |
25832 | + int i, num_devs; | |
25833 | + char dev_str[64]; | |
25834 | + char state_str[64]; | |
25835 | + | |
25836 | + spin_lock(&pdev->dev_lock); | |
25837 | + | |
25838 | + /* It's possible we could get the call to setup twice, so make sure | |
25839 | + * we're not already connected. | |
25840 | + */ | |
25841 | + if (xenbus_read_driver_state(pdev->xdev->nodename) != | |
25842 | + XenbusStateInitWait) | |
25843 | + goto out; | |
25844 | + | |
25845 | + dev_dbg(&pdev->xdev->dev, "getting be setup\n"); | |
25846 | + | |
25847 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", | |
25848 | + &num_devs); | |
25849 | + if (err != 1) { | |
25850 | + if (err >= 0) | |
25851 | + err = -EINVAL; | |
25852 | + xenbus_dev_fatal(pdev->xdev, err, | |
25853 | + "Error reading number of devices"); | |
25854 | + goto out; | |
25855 | + } | |
25856 | + | |
25857 | + for (i = 0; i < num_devs; i++) { | |
25858 | + int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); | |
25859 | + if (unlikely(l >= (sizeof(dev_str) - 1))) { | |
25860 | + err = -ENOMEM; | |
25861 | + xenbus_dev_fatal(pdev->xdev, err, | |
25862 | + "String overflow while reading " | |
25863 | + "configuration"); | |
25864 | + goto out; | |
25865 | + } | |
25866 | + | |
25867 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, | |
25868 | + "%x:%x:%x.%x", &domain, &bus, &slot, &func); | |
25869 | + if (err < 0) { | |
25870 | + xenbus_dev_fatal(pdev->xdev, err, | |
25871 | + "Error reading device configuration"); | |
25872 | + goto out; | |
25873 | + } | |
25874 | + if (err != 4) { | |
25875 | + err = -EINVAL; | |
25876 | + xenbus_dev_fatal(pdev->xdev, err, | |
25877 | + "Error parsing pci device " | |
25878 | + "configuration"); | |
25879 | + goto out; | |
25880 | + } | |
25881 | + | |
25882 | + err = pciback_export_device(pdev, domain, bus, slot, func, i); | |
25883 | + if (err) | |
25884 | + goto out; | |
25885 | + | |
25886 | + /* Switch substate of this device. */ | |
25887 | + l = snprintf(state_str, sizeof(state_str), "state-%d", i); | |
25888 | + if (unlikely(l >= (sizeof(state_str) - 1))) { | |
25889 | + err = -ENOMEM; | |
25890 | + xenbus_dev_fatal(pdev->xdev, err, | |
25891 | + "String overflow while reading " | |
25892 | + "configuration"); | |
25893 | + goto out; | |
25894 | + } | |
25895 | + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, | |
25896 | + "%d", XenbusStateInitialised); | |
25897 | + if (err) { | |
25898 | + xenbus_dev_fatal(pdev->xdev, err, "Error switching " | |
25899 | + "substate of dev-%d\n", i); | |
25900 | + goto out; | |
25901 | + } | |
25902 | + } | |
25903 | + | |
25904 | + err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); | |
25905 | + if (err) { | |
25906 | + xenbus_dev_fatal(pdev->xdev, err, | |
25907 | + "Error while publish PCI root buses " | |
25908 | + "for frontend"); | |
25909 | + goto out; | |
25910 | + } | |
25911 | + | |
25912 | + err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); | |
25913 | + if (err) | |
25914 | + xenbus_dev_fatal(pdev->xdev, err, | |
25915 | + "Error switching to initialised state!"); | |
25916 | + | |
25917 | + out: | |
25918 | + spin_unlock(&pdev->dev_lock); | |
25919 | + | |
25920 | + if (!err) | |
25921 | + /* see if pcifront is already configured (if not, we'll wait) */ | |
25922 | + pciback_attach(pdev); | |
25923 | + | |
25924 | + return err; | |
25925 | +} | |
25926 | + | |
25927 | +static void pciback_be_watch(struct xenbus_watch *watch, | |
25928 | + const char **vec, unsigned int len) | |
25929 | +{ | |
25930 | + struct pciback_device *pdev = | |
25931 | + container_of(watch, struct pciback_device, be_watch); | |
25932 | + | |
25933 | + switch (xenbus_read_driver_state(pdev->xdev->nodename)) { | |
25934 | + case XenbusStateInitWait: | |
25935 | + pciback_setup_backend(pdev); | |
25936 | + break; | |
25937 | + | |
25938 | + default: | |
25939 | + break; | |
25940 | + } | |
25941 | +} | |
25942 | + | |
25943 | +static int pciback_xenbus_probe(struct xenbus_device *dev, | |
25944 | + const struct xenbus_device_id *id) | |
25945 | +{ | |
25946 | + int err = 0; | |
25947 | + struct pciback_device *pdev = alloc_pdev(dev); | |
25948 | + | |
25949 | + if (pdev == NULL) { | |
25950 | + err = -ENOMEM; | |
25951 | + xenbus_dev_fatal(dev, err, | |
25952 | + "Error allocating pciback_device struct"); | |
25953 | + goto out; | |
25954 | + } | |
25955 | + | |
25956 | + /* wait for xend to configure us */ | |
25957 | + err = xenbus_switch_state(dev, XenbusStateInitWait); | |
25958 | + if (err) | |
25959 | + goto out; | |
25960 | + | |
25961 | + /* watch the backend node for backend configuration information */ | |
25962 | + err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, | |
25963 | + pciback_be_watch); | |
25964 | + if (err) | |
25965 | + goto out; | |
25966 | + pdev->be_watching = 1; | |
25967 | + | |
25968 | + /* We need to force a call to our callback here in case | |
25969 | + * xend already configured us! | |
25970 | + */ | |
25971 | + pciback_be_watch(&pdev->be_watch, NULL, 0); | |
25972 | + | |
25973 | + out: | |
25974 | + return err; | |
25975 | +} | |
25976 | + | |
25977 | +static int pciback_xenbus_remove(struct xenbus_device *dev) | |
25978 | +{ | |
25979 | + struct pciback_device *pdev = dev->dev.driver_data; | |
25980 | + | |
25981 | + if (pdev != NULL) | |
25982 | + free_pdev(pdev); | |
25983 | + | |
25984 | + return 0; | |
25985 | +} | |
25986 | + | |
25987 | +static const struct xenbus_device_id xenpci_ids[] = { | |
25988 | + {"pci"}, | |
25989 | + {{0}}, | |
25990 | +}; | |
25991 | + | |
25992 | +static struct xenbus_driver xenbus_pciback_driver = { | |
25993 | + .name = "pciback", | |
25994 | + .owner = THIS_MODULE, | |
25995 | + .ids = xenpci_ids, | |
25996 | + .probe = pciback_xenbus_probe, | |
25997 | + .remove = pciback_xenbus_remove, | |
25998 | + .otherend_changed = pciback_frontend_changed, | |
25999 | +}; | |
26000 | + | |
26001 | +int __init pciback_xenbus_register(void) | |
26002 | +{ | |
26003 | + if (!is_running_on_xen()) | |
26004 | + return -ENODEV; | |
26005 | + | |
26006 | + return xenbus_register_backend(&xenbus_pciback_driver); | |
26007 | +} | |
26008 | + | |
26009 | +void __exit pciback_xenbus_unregister(void) | |
26010 | +{ | |
26011 | + xenbus_unregister_driver(&xenbus_pciback_driver); | |
26012 | +} | |
26013 | Index: head-2008-11-25/drivers/xen/pcifront/Makefile | |
26014 | =================================================================== | |
26015 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
26016 | +++ head-2008-11-25/drivers/xen/pcifront/Makefile 2007-06-12 13:13:45.000000000 +0200 | |
26017 | @@ -0,0 +1,7 @@ | |
26018 | +obj-y += pcifront.o | |
26019 | + | |
26020 | +pcifront-y := pci_op.o xenbus.o pci.o | |
26021 | + | |
26022 | +ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y) | |
26023 | +EXTRA_CFLAGS += -DDEBUG | |
26024 | +endif | |
26025 | Index: head-2008-11-25/drivers/xen/pcifront/pci.c | |
26026 | =================================================================== | |
26027 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
26028 | +++ head-2008-11-25/drivers/xen/pcifront/pci.c 2007-06-12 13:13:45.000000000 +0200 | |
26029 | @@ -0,0 +1,46 @@ | |
26030 | +/* | |
26031 | + * PCI Frontend Operations - ensure only one PCI frontend runs at a time | |
26032 | + * | |
26033 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
26034 | + */ | |
26035 | +#include <linux/module.h> | |
26036 | +#include <linux/init.h> | |
26037 | +#include <linux/pci.h> | |
26038 | +#include <linux/spinlock.h> | |
26039 | +#include "pcifront.h" | |
26040 | + | |
26041 | +DEFINE_SPINLOCK(pcifront_dev_lock); | |
26042 | +static struct pcifront_device *pcifront_dev = NULL; | |
26043 | + | |
26044 | +int pcifront_connect(struct pcifront_device *pdev) | |
26045 | +{ | |
26046 | + int err = 0; | |
26047 | + | |
26048 | + spin_lock(&pcifront_dev_lock); | |
26049 | + | |
26050 | + if (!pcifront_dev) { | |
26051 | + dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); | |
26052 | + pcifront_dev = pdev; | |
26053 | + } | |
26054 | + else { | |
26055 | + dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); | |
26056 | + err = -EEXIST; | |
26057 | + } | |
26058 | + | |
26059 | + spin_unlock(&pcifront_dev_lock); | |
26060 | + | |
26061 | + return err; | |
26062 | +} | |
26063 | + | |
26064 | +void pcifront_disconnect(struct pcifront_device *pdev) | |
26065 | +{ | |
26066 | + spin_lock(&pcifront_dev_lock); | |
26067 | + | |
26068 | + if (pdev == pcifront_dev) { | |
26069 | + dev_info(&pdev->xdev->dev, | |
26070 | + "Disconnecting PCI Frontend Buses\n"); | |
26071 | + pcifront_dev = NULL; | |
26072 | + } | |
26073 | + | |
26074 | + spin_unlock(&pcifront_dev_lock); | |
26075 | +} | |
26076 | Index: head-2008-11-25/drivers/xen/pcifront/pci_op.c | |
26077 | =================================================================== | |
26078 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
26079 | +++ head-2008-11-25/drivers/xen/pcifront/pci_op.c 2008-07-21 11:00:33.000000000 +0200 | |
26080 | @@ -0,0 +1,551 @@ | |
26081 | +/* | |
26082 | + * PCI Frontend Operations - Communicates with frontend | |
26083 | + * | |
26084 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
26085 | + */ | |
26086 | +#include <linux/module.h> | |
26087 | +#include <linux/version.h> | |
26088 | +#include <linux/init.h> | |
26089 | +#include <linux/pci.h> | |
26090 | +#include <linux/spinlock.h> | |
26091 | +#include <linux/time.h> | |
26092 | +#include <xen/evtchn.h> | |
26093 | +#include "pcifront.h" | |
26094 | + | |
26095 | +static int verbose_request = 0; | |
26096 | +module_param(verbose_request, int, 0644); | |
26097 | + | |
26098 | +#ifdef __ia64__ | |
26099 | +static void pcifront_init_sd(struct pcifront_sd *sd, | |
26100 | + unsigned int domain, unsigned int bus, | |
26101 | + struct pcifront_device *pdev) | |
26102 | +{ | |
26103 | + int err, i, j, k, len, root_num, res_count; | |
26104 | + struct acpi_resource res; | |
26105 | + unsigned int d, b, byte; | |
26106 | + unsigned long magic; | |
26107 | + char str[64], tmp[3]; | |
26108 | + unsigned char *buf, *bufp; | |
26109 | + u8 *ptr; | |
26110 | + | |
26111 | + memset(sd, 0, sizeof(*sd)); | |
26112 | + | |
26113 | + sd->segment = domain; | |
26114 | + sd->node = -1; /* Revisit for NUMA */ | |
26115 | + sd->platform_data = pdev; | |
26116 | + | |
26117 | + /* Look for resources for this controller in xenbus. */ | |
26118 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "root_num", | |
26119 | + "%d", &root_num); | |
26120 | + if (err != 1) | |
26121 | + return; | |
26122 | + | |
26123 | + for (i = 0; i < root_num; i++) { | |
26124 | + len = snprintf(str, sizeof(str), "root-%d", i); | |
26125 | + if (unlikely(len >= (sizeof(str) - 1))) | |
26126 | + return; | |
26127 | + | |
26128 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, | |
26129 | + str, "%x:%x", &d, &b); | |
26130 | + if (err != 2) | |
26131 | + return; | |
26132 | + | |
26133 | + if (d == domain && b == bus) | |
26134 | + break; | |
26135 | + } | |
26136 | + | |
26137 | + if (i == root_num) | |
26138 | + return; | |
26139 | + | |
26140 | + len = snprintf(str, sizeof(str), "root-resource-magic"); | |
26141 | + | |
26142 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, | |
26143 | + str, "%lx", &magic); | |
26144 | + | |
26145 | + if (err != 1) | |
26146 | + return; /* No resources, nothing to do */ | |
26147 | + | |
26148 | + if (magic != (sizeof(res) * 2) + 1) { | |
26149 | + printk(KERN_WARNING "pcifront: resource magic mismatch\n"); | |
26150 | + return; | |
26151 | + } | |
26152 | + | |
26153 | + len = snprintf(str, sizeof(str), "root-%d-resources", i); | |
26154 | + if (unlikely(len >= (sizeof(str) - 1))) | |
26155 | + return; | |
26156 | + | |
26157 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, | |
26158 | + str, "%d", &res_count); | |
26159 | + | |
26160 | + if (err != 1) | |
26161 | + return; /* No resources, nothing to do */ | |
26162 | + | |
26163 | + sd->window = kzalloc(sizeof(*sd->window) * res_count, GFP_KERNEL); | |
26164 | + if (!sd->window) | |
26165 | + return; | |
26166 | + | |
26167 | + /* magic is also the size of the byte stream in xenbus */ | |
26168 | + buf = kmalloc(magic, GFP_KERNEL); | |
26169 | + if (!buf) { | |
26170 | + kfree(sd->window); | |
26171 | + sd->window = NULL; | |
26172 | + return; | |
26173 | + } | |
26174 | + | |
26175 | + /* Read the resources out of xenbus */ | |
26176 | + for (j = 0; j < res_count; j++) { | |
26177 | + memset(&res, 0, sizeof(res)); | |
26178 | + memset(buf, 0, magic); | |
26179 | + | |
26180 | + len = snprintf(str, sizeof(str), "root-%d-resource-%d", i, j); | |
26181 | + if (unlikely(len >= (sizeof(str) - 1))) | |
26182 | + return; | |
26183 | + | |
26184 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, | |
26185 | + "%s", buf); | |
26186 | + if (err != 1) { | |
26187 | + printk(KERN_WARNING "pcifront: error reading " | |
26188 | + "resource %d on bus %04x:%02x\n", | |
26189 | + j, domain, bus); | |
26190 | + continue; | |
26191 | + } | |
26192 | + | |
26193 | + bufp = buf; | |
26194 | + ptr = (u8 *)&res; | |
26195 | + memset(tmp, 0, sizeof(tmp)); | |
26196 | + | |
26197 | + /* Copy ASCII byte stream into structure */ | |
26198 | + for (k = 0; k < magic - 1; k += 2) { | |
26199 | + memcpy(tmp, bufp, 2); | |
26200 | + bufp += 2; | |
26201 | + | |
26202 | + sscanf(tmp, "%02x", &byte); | |
26203 | + *ptr = byte; | |
26204 | + ptr++; | |
26205 | + } | |
26206 | + | |
26207 | + xen_add_resource(sd, domain, bus, &res); | |
26208 | + sd->windows++; | |
26209 | + } | |
26210 | + kfree(buf); | |
26211 | +} | |
26212 | +#endif | |
26213 | + | |
26214 | +static int errno_to_pcibios_err(int errno) | |
26215 | +{ | |
26216 | + switch (errno) { | |
26217 | + case XEN_PCI_ERR_success: | |
26218 | + return PCIBIOS_SUCCESSFUL; | |
26219 | + | |
26220 | + case XEN_PCI_ERR_dev_not_found: | |
26221 | + return PCIBIOS_DEVICE_NOT_FOUND; | |
26222 | + | |
26223 | + case XEN_PCI_ERR_invalid_offset: | |
26224 | + case XEN_PCI_ERR_op_failed: | |
26225 | + return PCIBIOS_BAD_REGISTER_NUMBER; | |
26226 | + | |
26227 | + case XEN_PCI_ERR_not_implemented: | |
26228 | + return PCIBIOS_FUNC_NOT_SUPPORTED; | |
26229 | + | |
26230 | + case XEN_PCI_ERR_access_denied: | |
26231 | + return PCIBIOS_SET_FAILED; | |
26232 | + } | |
26233 | + return errno; | |
26234 | +} | |
26235 | + | |
26236 | +static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) | |
26237 | +{ | |
26238 | + int err = 0; | |
26239 | + struct xen_pci_op *active_op = &pdev->sh_info->op; | |
26240 | + unsigned long irq_flags; | |
26241 | + evtchn_port_t port = pdev->evtchn; | |
26242 | + s64 ns, ns_timeout; | |
26243 | + struct timeval tv; | |
26244 | + | |
26245 | + spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); | |
26246 | + | |
26247 | + memcpy(active_op, op, sizeof(struct xen_pci_op)); | |
26248 | + | |
26249 | + /* Go */ | |
26250 | + wmb(); | |
26251 | + set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
26252 | + notify_remote_via_evtchn(port); | |
26253 | + | |
26254 | + /* | |
26255 | + * We set a poll timeout of 3 seconds but give up on return after | |
26256 | + * 2 seconds. It is better to time out too late rather than too early | |
26257 | + * (in the latter case we end up continually re-executing poll() with a | |
26258 | + * timeout in the past). 1s difference gives plenty of slack for error. | |
26259 | + */ | |
26260 | + do_gettimeofday(&tv); | |
26261 | + ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC; | |
26262 | + | |
26263 | + clear_evtchn(port); | |
26264 | + | |
26265 | + while (test_bit(_XEN_PCIF_active, | |
26266 | + (unsigned long *)&pdev->sh_info->flags)) { | |
26267 | + if (HYPERVISOR_poll(&port, 1, jiffies + 3*HZ)) | |
26268 | + BUG(); | |
26269 | + clear_evtchn(port); | |
26270 | + do_gettimeofday(&tv); | |
26271 | + ns = timeval_to_ns(&tv); | |
26272 | + if (ns > ns_timeout) { | |
26273 | + dev_err(&pdev->xdev->dev, | |
26274 | + "pciback not responding!!!\n"); | |
26275 | + clear_bit(_XEN_PCIF_active, | |
26276 | + (unsigned long *)&pdev->sh_info->flags); | |
26277 | + err = XEN_PCI_ERR_dev_not_found; | |
26278 | + goto out; | |
26279 | + } | |
26280 | + } | |
26281 | + | |
26282 | + memcpy(op, active_op, sizeof(struct xen_pci_op)); | |
26283 | + | |
26284 | + err = op->err; | |
26285 | + out: | |
26286 | + spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags); | |
26287 | + return err; | |
26288 | +} | |
26289 | + | |
26290 | +/* Access to this function is spinlocked in drivers/pci/access.c */ | |
26291 | +static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn, | |
26292 | + int where, int size, u32 * val) | |
26293 | +{ | |
26294 | + int err = 0; | |
26295 | + struct xen_pci_op op = { | |
26296 | + .cmd = XEN_PCI_OP_conf_read, | |
26297 | + .domain = pci_domain_nr(bus), | |
26298 | + .bus = bus->number, | |
26299 | + .devfn = devfn, | |
26300 | + .offset = where, | |
26301 | + .size = size, | |
26302 | + }; | |
26303 | + struct pcifront_sd *sd = bus->sysdata; | |
26304 | + struct pcifront_device *pdev = pcifront_get_pdev(sd); | |
26305 | + | |
26306 | + if (verbose_request) | |
26307 | + dev_info(&pdev->xdev->dev, | |
26308 | + "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n", | |
26309 | + pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), | |
26310 | + PCI_FUNC(devfn), where, size); | |
26311 | + | |
26312 | + err = do_pci_op(pdev, &op); | |
26313 | + | |
26314 | + if (likely(!err)) { | |
26315 | + if (verbose_request) | |
26316 | + dev_info(&pdev->xdev->dev, "read got back value %x\n", | |
26317 | + op.value); | |
26318 | + | |
26319 | + *val = op.value; | |
26320 | + } else if (err == -ENODEV) { | |
26321 | + /* No device here, pretend that it just returned 0 */ | |
26322 | + err = 0; | |
26323 | + *val = 0; | |
26324 | + } | |
26325 | + | |
26326 | + return errno_to_pcibios_err(err); | |
26327 | +} | |
26328 | + | |
26329 | +/* Access to this function is spinlocked in drivers/pci/access.c */ | |
26330 | +static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, | |
26331 | + int where, int size, u32 val) | |
26332 | +{ | |
26333 | + struct xen_pci_op op = { | |
26334 | + .cmd = XEN_PCI_OP_conf_write, | |
26335 | + .domain = pci_domain_nr(bus), | |
26336 | + .bus = bus->number, | |
26337 | + .devfn = devfn, | |
26338 | + .offset = where, | |
26339 | + .size = size, | |
26340 | + .value = val, | |
26341 | + }; | |
26342 | + struct pcifront_sd *sd = bus->sysdata; | |
26343 | + struct pcifront_device *pdev = pcifront_get_pdev(sd); | |
26344 | + | |
26345 | + if (verbose_request) | |
26346 | + dev_info(&pdev->xdev->dev, | |
26347 | + "write dev=%04x:%02x:%02x.%01x - " | |
26348 | + "offset %x size %d val %x\n", | |
26349 | + pci_domain_nr(bus), bus->number, | |
26350 | + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); | |
26351 | + | |
26352 | + return errno_to_pcibios_err(do_pci_op(pdev, &op)); | |
26353 | +} | |
26354 | + | |
26355 | +struct pci_ops pcifront_bus_ops = { | |
26356 | + .read = pcifront_bus_read, | |
26357 | + .write = pcifront_bus_write, | |
26358 | +}; | |
26359 | + | |
26360 | +#ifdef CONFIG_PCI_MSI | |
26361 | +int pci_frontend_enable_msix(struct pci_dev *dev, | |
26362 | + struct msix_entry *entries, | |
26363 | + int nvec) | |
26364 | +{ | |
26365 | + int err; | |
26366 | + int i; | |
26367 | + struct xen_pci_op op = { | |
26368 | + .cmd = XEN_PCI_OP_enable_msix, | |
26369 | + .domain = pci_domain_nr(dev->bus), | |
26370 | + .bus = dev->bus->number, | |
26371 | + .devfn = dev->devfn, | |
26372 | + .value = nvec, | |
26373 | + }; | |
26374 | + struct pcifront_sd *sd = dev->bus->sysdata; | |
26375 | + struct pcifront_device *pdev = pcifront_get_pdev(sd); | |
26376 | + | |
26377 | + if (nvec > SH_INFO_MAX_VEC) { | |
26378 | + printk("too much vector for pci frontend%x\n", nvec); | |
26379 | + return -EINVAL; | |
26380 | + } | |
26381 | + | |
26382 | + for (i = 0; i < nvec; i++) { | |
26383 | + op.msix_entries[i].entry = entries[i].entry; | |
26384 | + op.msix_entries[i].vector = entries[i].vector; | |
26385 | + } | |
26386 | + | |
26387 | + err = do_pci_op(pdev, &op); | |
26388 | + | |
26389 | + if (!err) { | |
26390 | + if (!op.value) { | |
26391 | + /* we get the result */ | |
26392 | + for ( i = 0; i < nvec; i++) | |
26393 | + entries[i].vector = op.msix_entries[i].vector; | |
26394 | + return 0; | |
26395 | + } | |
26396 | + else { | |
26397 | + printk("enable msix get value %x\n", op.value); | |
26398 | + return op.value; | |
26399 | + } | |
26400 | + } | |
26401 | + else { | |
26402 | + printk("enable msix get err %x\n", err); | |
26403 | + return err; | |
26404 | + } | |
26405 | +} | |
26406 | + | |
26407 | +void pci_frontend_disable_msix(struct pci_dev* dev) | |
26408 | +{ | |
26409 | + int err; | |
26410 | + struct xen_pci_op op = { | |
26411 | + .cmd = XEN_PCI_OP_disable_msix, | |
26412 | + .domain = pci_domain_nr(dev->bus), | |
26413 | + .bus = dev->bus->number, | |
26414 | + .devfn = dev->devfn, | |
26415 | + }; | |
26416 | + struct pcifront_sd *sd = dev->bus->sysdata; | |
26417 | + struct pcifront_device *pdev = pcifront_get_pdev(sd); | |
26418 | + | |
26419 | + err = do_pci_op(pdev, &op); | |
26420 | + | |
26421 | + /* What should do for error ? */ | |
26422 | + if (err) | |
26423 | + printk("pci_disable_msix get err %x\n", err); | |
26424 | +} | |
26425 | + | |
26426 | +int pci_frontend_enable_msi(struct pci_dev *dev) | |
26427 | +{ | |
26428 | + int err; | |
26429 | + struct xen_pci_op op = { | |
26430 | + .cmd = XEN_PCI_OP_enable_msi, | |
26431 | + .domain = pci_domain_nr(dev->bus), | |
26432 | + .bus = dev->bus->number, | |
26433 | + .devfn = dev->devfn, | |
26434 | + }; | |
26435 | + struct pcifront_sd *sd = dev->bus->sysdata; | |
26436 | + struct pcifront_device *pdev = pcifront_get_pdev(sd); | |
26437 | + | |
26438 | + err = do_pci_op(pdev, &op); | |
26439 | + if (likely(!err)) { | |
26440 | + dev->irq = op.value; | |
26441 | + } | |
26442 | + else { | |
26443 | + printk("pci frontend enable msi failed for dev %x:%x \n", | |
26444 | + op.bus, op.devfn); | |
26445 | + err = -EINVAL; | |
26446 | + } | |
26447 | + return err; | |
26448 | +} | |
26449 | + | |
26450 | +void pci_frontend_disable_msi(struct pci_dev* dev) | |
26451 | +{ | |
26452 | + int err; | |
26453 | + struct xen_pci_op op = { | |
26454 | + .cmd = XEN_PCI_OP_disable_msi, | |
26455 | + .domain = pci_domain_nr(dev->bus), | |
26456 | + .bus = dev->bus->number, | |
26457 | + .devfn = dev->devfn, | |
26458 | + }; | |
26459 | + struct pcifront_sd *sd = dev->bus->sysdata; | |
26460 | + struct pcifront_device *pdev = pcifront_get_pdev(sd); | |
26461 | + | |
26462 | + err = do_pci_op(pdev, &op); | |
26463 | + if (err == XEN_PCI_ERR_dev_not_found) { | |
26464 | + /* XXX No response from backend, what shall we do? */ | |
26465 | + printk("get no response from backend for disable MSI\n"); | |
26466 | + return; | |
26467 | + } | |
26468 | + if (likely(!err)) | |
26469 | + dev->irq = op.value; | |
26470 | + else | |
26471 | + /* how can pciback notify us fail? */ | |
26472 | + printk("get fake response frombackend \n"); | |
26473 | +} | |
26474 | +#endif /* CONFIG_PCI_MSI */ | |
26475 | + | |
26476 | +/* Claim resources for the PCI frontend as-is, backend won't allow changes */ | |
26477 | +static void pcifront_claim_resource(struct pci_dev *dev, void *data) | |
26478 | +{ | |
26479 | + struct pcifront_device *pdev = data; | |
26480 | + int i; | |
26481 | + struct resource *r; | |
26482 | + | |
26483 | + for (i = 0; i < PCI_NUM_RESOURCES; i++) { | |
26484 | + r = &dev->resource[i]; | |
26485 | + | |
26486 | + if (!r->parent && r->start && r->flags) { | |
26487 | + dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n", | |
26488 | + pci_name(dev), i); | |
26489 | + pci_claim_resource(dev, i); | |
26490 | + } | |
26491 | + } | |
26492 | +} | |
26493 | + | |
26494 | +int __devinit pcifront_scan_root(struct pcifront_device *pdev, | |
26495 | + unsigned int domain, unsigned int bus) | |
26496 | +{ | |
26497 | + struct pci_bus *b; | |
26498 | + struct pcifront_sd *sd = NULL; | |
26499 | + struct pci_bus_entry *bus_entry = NULL; | |
26500 | + int err = 0; | |
26501 | + | |
26502 | +#ifndef CONFIG_PCI_DOMAINS | |
26503 | + if (domain != 0) { | |
26504 | + dev_err(&pdev->xdev->dev, | |
26505 | + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); | |
26506 | + dev_err(&pdev->xdev->dev, | |
26507 | + "Please compile with CONFIG_PCI_DOMAINS\n"); | |
26508 | + err = -EINVAL; | |
26509 | + goto err_out; | |
26510 | + } | |
26511 | +#endif | |
26512 | + | |
26513 | + dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", | |
26514 | + domain, bus); | |
26515 | + | |
26516 | + bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); | |
26517 | + sd = kmalloc(sizeof(*sd), GFP_KERNEL); | |
26518 | + if (!bus_entry || !sd) { | |
26519 | + err = -ENOMEM; | |
26520 | + goto err_out; | |
26521 | + } | |
26522 | + pcifront_init_sd(sd, domain, bus, pdev); | |
26523 | + | |
26524 | + b = pci_scan_bus_parented(&pdev->xdev->dev, bus, | |
26525 | + &pcifront_bus_ops, sd); | |
26526 | + if (!b) { | |
26527 | + dev_err(&pdev->xdev->dev, | |
26528 | + "Error creating PCI Frontend Bus!\n"); | |
26529 | + err = -ENOMEM; | |
26530 | + goto err_out; | |
26531 | + } | |
26532 | + | |
26533 | + pcifront_setup_root_resources(b, sd); | |
26534 | + bus_entry->bus = b; | |
26535 | + | |
26536 | + list_add(&bus_entry->list, &pdev->root_buses); | |
26537 | + | |
26538 | + /* Claim resources before going "live" with our devices */ | |
26539 | + pci_walk_bus(b, pcifront_claim_resource, pdev); | |
26540 | + | |
26541 | + pci_bus_add_devices(b); | |
26542 | + | |
26543 | + return 0; | |
26544 | + | |
26545 | + err_out: | |
26546 | + kfree(bus_entry); | |
26547 | + kfree(sd); | |
26548 | + | |
26549 | + return err; | |
26550 | +} | |
26551 | + | |
26552 | +int __devinit pcifront_rescan_root(struct pcifront_device *pdev, | |
26553 | + unsigned int domain, unsigned int bus) | |
26554 | +{ | |
26555 | + struct pci_bus *b; | |
26556 | + struct pci_dev *d; | |
26557 | + unsigned int devfn; | |
26558 | + | |
26559 | +#ifndef CONFIG_PCI_DOMAINS | |
26560 | + if (domain != 0) { | |
26561 | + dev_err(&pdev->xdev->dev, | |
26562 | + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); | |
26563 | + dev_err(&pdev->xdev->dev, | |
26564 | + "Please compile with CONFIG_PCI_DOMAINS\n"); | |
26565 | + return -EINVAL; | |
26566 | + } | |
26567 | +#endif | |
26568 | + | |
26569 | + dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", | |
26570 | + domain, bus); | |
26571 | + | |
26572 | + b = pci_find_bus(domain, bus); | |
26573 | + if(!b) | |
26574 | + /* If the bus is unknown, create it. */ | |
26575 | + return pcifront_scan_root(pdev, domain, bus); | |
26576 | + | |
26577 | + /* Rescan the bus for newly attached functions and add. | |
26578 | + * We omit handling of PCI bridge attachment because pciback prevents | |
26579 | + * bridges from being exported. | |
26580 | + */ | |
26581 | + for (devfn = 0; devfn < 0x100; devfn++) { | |
26582 | + d = pci_get_slot(b, devfn); | |
26583 | + if(d) { | |
26584 | + /* Device is already known. */ | |
26585 | + pci_dev_put(d); | |
26586 | + continue; | |
26587 | + } | |
26588 | + | |
26589 | + d = pci_scan_single_device(b, devfn); | |
26590 | + if (d) { | |
26591 | + dev_info(&pdev->xdev->dev, "New device on " | |
26592 | + "%04x:%02x:%02x.%02x found.\n", domain, bus, | |
26593 | + PCI_SLOT(devfn), PCI_FUNC(devfn)); | |
26594 | + pci_bus_add_device(d); | |
26595 | + } | |
26596 | + } | |
26597 | + | |
26598 | + return 0; | |
26599 | +} | |
26600 | + | |
26601 | +static void free_root_bus_devs(struct pci_bus *bus) | |
26602 | +{ | |
26603 | + struct pci_dev *dev; | |
26604 | + | |
26605 | + while (!list_empty(&bus->devices)) { | |
26606 | + dev = container_of(bus->devices.next, struct pci_dev, | |
26607 | + bus_list); | |
26608 | + dev_dbg(&dev->dev, "removing device\n"); | |
26609 | + pci_remove_bus_device(dev); | |
26610 | + } | |
26611 | +} | |
26612 | + | |
26613 | +void pcifront_free_roots(struct pcifront_device *pdev) | |
26614 | +{ | |
26615 | + struct pci_bus_entry *bus_entry, *t; | |
26616 | + | |
26617 | + dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n"); | |
26618 | + | |
26619 | + list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) { | |
26620 | + list_del(&bus_entry->list); | |
26621 | + | |
26622 | + free_root_bus_devs(bus_entry->bus); | |
26623 | + | |
26624 | + kfree(bus_entry->bus->sysdata); | |
26625 | + | |
26626 | + device_unregister(bus_entry->bus->bridge); | |
26627 | + pci_remove_bus(bus_entry->bus); | |
26628 | + | |
26629 | + kfree(bus_entry); | |
26630 | + } | |
26631 | +} | |
26632 | Index: head-2008-11-25/drivers/xen/pcifront/pcifront.h | |
26633 | =================================================================== | |
26634 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
26635 | +++ head-2008-11-25/drivers/xen/pcifront/pcifront.h 2008-02-26 10:54:11.000000000 +0100 | |
26636 | @@ -0,0 +1,42 @@ | |
26637 | +/* | |
26638 | + * PCI Frontend - Common data structures & function declarations | |
26639 | + * | |
26640 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
26641 | + */ | |
26642 | +#ifndef __XEN_PCIFRONT_H__ | |
26643 | +#define __XEN_PCIFRONT_H__ | |
26644 | + | |
26645 | +#include <linux/spinlock.h> | |
26646 | +#include <linux/pci.h> | |
26647 | +#include <xen/xenbus.h> | |
26648 | +#include <xen/interface/io/pciif.h> | |
26649 | +#include <xen/pcifront.h> | |
26650 | + | |
26651 | +struct pci_bus_entry { | |
26652 | + struct list_head list; | |
26653 | + struct pci_bus *bus; | |
26654 | +}; | |
26655 | + | |
26656 | +struct pcifront_device { | |
26657 | + struct xenbus_device *xdev; | |
26658 | + struct list_head root_buses; | |
26659 | + spinlock_t dev_lock; | |
26660 | + | |
26661 | + int evtchn; | |
26662 | + int gnt_ref; | |
26663 | + | |
26664 | + /* Lock this when doing any operations in sh_info */ | |
26665 | + spinlock_t sh_info_lock; | |
26666 | + struct xen_pci_sharedinfo *sh_info; | |
26667 | +}; | |
26668 | + | |
26669 | +int pcifront_connect(struct pcifront_device *pdev); | |
26670 | +void pcifront_disconnect(struct pcifront_device *pdev); | |
26671 | + | |
26672 | +int pcifront_scan_root(struct pcifront_device *pdev, | |
26673 | + unsigned int domain, unsigned int bus); | |
26674 | +int pcifront_rescan_root(struct pcifront_device *pdev, | |
26675 | + unsigned int domain, unsigned int bus); | |
26676 | +void pcifront_free_roots(struct pcifront_device *pdev); | |
26677 | + | |
26678 | +#endif /* __XEN_PCIFRONT_H__ */ | |
26679 | Index: head-2008-11-25/drivers/xen/pcifront/xenbus.c | |
26680 | =================================================================== | |
26681 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
26682 | +++ head-2008-11-25/drivers/xen/pcifront/xenbus.c 2008-07-21 11:00:33.000000000 +0200 | |
26683 | @@ -0,0 +1,455 @@ | |
26684 | +/* | |
26685 | + * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn) | |
26686 | + * | |
26687 | + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
26688 | + */ | |
26689 | +#include <linux/module.h> | |
26690 | +#include <linux/init.h> | |
26691 | +#include <linux/mm.h> | |
26692 | +#include <xen/xenbus.h> | |
26693 | +#include <xen/gnttab.h> | |
26694 | +#include "pcifront.h" | |
26695 | + | |
26696 | +#ifndef __init_refok | |
26697 | +#define __init_refok | |
26698 | +#endif | |
26699 | + | |
26700 | +#define INVALID_GRANT_REF (0) | |
26701 | +#define INVALID_EVTCHN (-1) | |
26702 | + | |
26703 | +static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev) | |
26704 | +{ | |
26705 | + struct pcifront_device *pdev; | |
26706 | + | |
26707 | + pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL); | |
26708 | + if (pdev == NULL) | |
26709 | + goto out; | |
26710 | + | |
26711 | + pdev->sh_info = | |
26712 | + (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL); | |
26713 | + if (pdev->sh_info == NULL) { | |
26714 | + kfree(pdev); | |
26715 | + pdev = NULL; | |
26716 | + goto out; | |
26717 | + } | |
26718 | + pdev->sh_info->flags = 0; | |
26719 | + | |
26720 | + xdev->dev.driver_data = pdev; | |
26721 | + pdev->xdev = xdev; | |
26722 | + | |
26723 | + INIT_LIST_HEAD(&pdev->root_buses); | |
26724 | + | |
26725 | + spin_lock_init(&pdev->dev_lock); | |
26726 | + spin_lock_init(&pdev->sh_info_lock); | |
26727 | + | |
26728 | + pdev->evtchn = INVALID_EVTCHN; | |
26729 | + pdev->gnt_ref = INVALID_GRANT_REF; | |
26730 | + | |
26731 | + dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n", | |
26732 | + pdev, pdev->sh_info); | |
26733 | + out: | |
26734 | + return pdev; | |
26735 | +} | |
26736 | + | |
26737 | +static void free_pdev(struct pcifront_device *pdev) | |
26738 | +{ | |
26739 | + dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev); | |
26740 | + | |
26741 | + pcifront_free_roots(pdev); | |
26742 | + | |
26743 | + if (pdev->evtchn != INVALID_EVTCHN) | |
26744 | + xenbus_free_evtchn(pdev->xdev, pdev->evtchn); | |
26745 | + | |
26746 | + if (pdev->gnt_ref != INVALID_GRANT_REF) | |
26747 | + gnttab_end_foreign_access(pdev->gnt_ref, | |
26748 | + (unsigned long)pdev->sh_info); | |
26749 | + | |
26750 | + pdev->xdev->dev.driver_data = NULL; | |
26751 | + | |
26752 | + kfree(pdev); | |
26753 | +} | |
26754 | + | |
26755 | +static int pcifront_publish_info(struct pcifront_device *pdev) | |
26756 | +{ | |
26757 | + int err = 0; | |
26758 | + struct xenbus_transaction trans; | |
26759 | + | |
26760 | + err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); | |
26761 | + if (err < 0) | |
26762 | + goto out; | |
26763 | + | |
26764 | + pdev->gnt_ref = err; | |
26765 | + | |
26766 | + err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); | |
26767 | + if (err) | |
26768 | + goto out; | |
26769 | + | |
26770 | + do_publish: | |
26771 | + err = xenbus_transaction_start(&trans); | |
26772 | + if (err) { | |
26773 | + xenbus_dev_fatal(pdev->xdev, err, | |
26774 | + "Error writing configuration for backend " | |
26775 | + "(start transaction)"); | |
26776 | + goto out; | |
26777 | + } | |
26778 | + | |
26779 | + err = xenbus_printf(trans, pdev->xdev->nodename, | |
26780 | + "pci-op-ref", "%u", pdev->gnt_ref); | |
26781 | + if (!err) | |
26782 | + err = xenbus_printf(trans, pdev->xdev->nodename, | |
26783 | + "event-channel", "%u", pdev->evtchn); | |
26784 | + if (!err) | |
26785 | + err = xenbus_printf(trans, pdev->xdev->nodename, | |
26786 | + "magic", XEN_PCI_MAGIC); | |
26787 | + | |
26788 | + if (err) { | |
26789 | + xenbus_transaction_end(trans, 1); | |
26790 | + xenbus_dev_fatal(pdev->xdev, err, | |
26791 | + "Error writing configuration for backend"); | |
26792 | + goto out; | |
26793 | + } else { | |
26794 | + err = xenbus_transaction_end(trans, 0); | |
26795 | + if (err == -EAGAIN) | |
26796 | + goto do_publish; | |
26797 | + else if (err) { | |
26798 | + xenbus_dev_fatal(pdev->xdev, err, | |
26799 | + "Error completing transaction " | |
26800 | + "for backend"); | |
26801 | + goto out; | |
26802 | + } | |
26803 | + } | |
26804 | + | |
26805 | + xenbus_switch_state(pdev->xdev, XenbusStateInitialised); | |
26806 | + | |
26807 | + dev_dbg(&pdev->xdev->dev, "publishing successful!\n"); | |
26808 | + | |
26809 | + out: | |
26810 | + return err; | |
26811 | +} | |
26812 | + | |
26813 | +static int __devinit pcifront_try_connect(struct pcifront_device *pdev) | |
26814 | +{ | |
26815 | + int err = -EFAULT; | |
26816 | + int i, num_roots, len; | |
26817 | + char str[64]; | |
26818 | + unsigned int domain, bus; | |
26819 | + | |
26820 | + spin_lock(&pdev->dev_lock); | |
26821 | + | |
26822 | + /* Only connect once */ | |
26823 | + if (xenbus_read_driver_state(pdev->xdev->nodename) != | |
26824 | + XenbusStateInitialised) | |
26825 | + goto out; | |
26826 | + | |
26827 | + err = pcifront_connect(pdev); | |
26828 | + if (err) { | |
26829 | + xenbus_dev_fatal(pdev->xdev, err, | |
26830 | + "Error connecting PCI Frontend"); | |
26831 | + goto out; | |
26832 | + } | |
26833 | + | |
26834 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, | |
26835 | + "root_num", "%d", &num_roots); | |
26836 | + if (err == -ENOENT) { | |
26837 | + xenbus_dev_error(pdev->xdev, err, | |
26838 | + "No PCI Roots found, trying 0000:00"); | |
26839 | + err = pcifront_scan_root(pdev, 0, 0); | |
26840 | + num_roots = 0; | |
26841 | + } else if (err != 1) { | |
26842 | + if (err == 0) | |
26843 | + err = -EINVAL; | |
26844 | + xenbus_dev_fatal(pdev->xdev, err, | |
26845 | + "Error reading number of PCI roots"); | |
26846 | + goto out; | |
26847 | + } | |
26848 | + | |
26849 | + for (i = 0; i < num_roots; i++) { | |
26850 | + len = snprintf(str, sizeof(str), "root-%d", i); | |
26851 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
26852 | + err = -ENOMEM; | |
26853 | + goto out; | |
26854 | + } | |
26855 | + | |
26856 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, | |
26857 | + "%x:%x", &domain, &bus); | |
26858 | + if (err != 2) { | |
26859 | + if (err >= 0) | |
26860 | + err = -EINVAL; | |
26861 | + xenbus_dev_fatal(pdev->xdev, err, | |
26862 | + "Error reading PCI root %d", i); | |
26863 | + goto out; | |
26864 | + } | |
26865 | + | |
26866 | + err = pcifront_scan_root(pdev, domain, bus); | |
26867 | + if (err) { | |
26868 | + xenbus_dev_fatal(pdev->xdev, err, | |
26869 | + "Error scanning PCI root %04x:%02x", | |
26870 | + domain, bus); | |
26871 | + goto out; | |
26872 | + } | |
26873 | + } | |
26874 | + | |
26875 | + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); | |
26876 | + if (err) | |
26877 | + goto out; | |
26878 | + | |
26879 | + out: | |
26880 | + spin_unlock(&pdev->dev_lock); | |
26881 | + return err; | |
26882 | +} | |
26883 | + | |
26884 | +static int pcifront_try_disconnect(struct pcifront_device *pdev) | |
26885 | +{ | |
26886 | + int err = 0; | |
26887 | + enum xenbus_state prev_state; | |
26888 | + | |
26889 | + spin_lock(&pdev->dev_lock); | |
26890 | + | |
26891 | + prev_state = xenbus_read_driver_state(pdev->xdev->nodename); | |
26892 | + | |
26893 | + if (prev_state >= XenbusStateClosing) | |
26894 | + goto out; | |
26895 | + | |
26896 | + if(prev_state == XenbusStateConnected) { | |
26897 | + pcifront_free_roots(pdev); | |
26898 | + pcifront_disconnect(pdev); | |
26899 | + } | |
26900 | + | |
26901 | + err = xenbus_switch_state(pdev->xdev, XenbusStateClosed); | |
26902 | + | |
26903 | + out: | |
26904 | + spin_unlock(&pdev->dev_lock); | |
26905 | + | |
26906 | + return err; | |
26907 | +} | |
26908 | + | |
26909 | +static int __devinit pcifront_attach_devices(struct pcifront_device *pdev) | |
26910 | +{ | |
26911 | + int err = -EFAULT; | |
26912 | + int i, num_roots, len; | |
26913 | + unsigned int domain, bus; | |
26914 | + char str[64]; | |
26915 | + | |
26916 | + spin_lock(&pdev->dev_lock); | |
26917 | + | |
26918 | + if (xenbus_read_driver_state(pdev->xdev->nodename) != | |
26919 | + XenbusStateReconfiguring) | |
26920 | + goto out; | |
26921 | + | |
26922 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, | |
26923 | + "root_num", "%d", &num_roots); | |
26924 | + if (err == -ENOENT) { | |
26925 | + xenbus_dev_error(pdev->xdev, err, | |
26926 | + "No PCI Roots found, trying 0000:00"); | |
26927 | + err = pcifront_rescan_root(pdev, 0, 0); | |
26928 | + num_roots = 0; | |
26929 | + } else if (err != 1) { | |
26930 | + if (err == 0) | |
26931 | + err = -EINVAL; | |
26932 | + xenbus_dev_fatal(pdev->xdev, err, | |
26933 | + "Error reading number of PCI roots"); | |
26934 | + goto out; | |
26935 | + } | |
26936 | + | |
26937 | + for (i = 0; i < num_roots; i++) { | |
26938 | + len = snprintf(str, sizeof(str), "root-%d", i); | |
26939 | + if (unlikely(len >= (sizeof(str) - 1))) { | |
26940 | + err = -ENOMEM; | |
26941 | + goto out; | |
26942 | + } | |
26943 | + | |
26944 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, | |
26945 | + "%x:%x", &domain, &bus); | |
26946 | + if (err != 2) { | |
26947 | + if (err >= 0) | |
26948 | + err = -EINVAL; | |
26949 | + xenbus_dev_fatal(pdev->xdev, err, | |
26950 | + "Error reading PCI root %d", i); | |
26951 | + goto out; | |
26952 | + } | |
26953 | + | |
26954 | + err = pcifront_rescan_root(pdev, domain, bus); | |
26955 | + if (err) { | |
26956 | + xenbus_dev_fatal(pdev->xdev, err, | |
26957 | + "Error scanning PCI root %04x:%02x", | |
26958 | + domain, bus); | |
26959 | + goto out; | |
26960 | + } | |
26961 | + } | |
26962 | + | |
26963 | + xenbus_switch_state(pdev->xdev, XenbusStateConnected); | |
26964 | + | |
26965 | + out: | |
26966 | + spin_unlock(&pdev->dev_lock); | |
26967 | + return err; | |
26968 | +} | |
26969 | + | |
26970 | +static int pcifront_detach_devices(struct pcifront_device *pdev) | |
26971 | +{ | |
26972 | + int err = 0; | |
26973 | + int i, num_devs; | |
26974 | + unsigned int domain, bus, slot, func; | |
26975 | + struct pci_bus *pci_bus; | |
26976 | + struct pci_dev *pci_dev; | |
26977 | + char str[64]; | |
26978 | + | |
26979 | + spin_lock(&pdev->dev_lock); | |
26980 | + | |
26981 | + if (xenbus_read_driver_state(pdev->xdev->nodename) != | |
26982 | + XenbusStateConnected) | |
26983 | + goto out; | |
26984 | + | |
26985 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d", | |
26986 | + &num_devs); | |
26987 | + if (err != 1) { | |
26988 | + if (err >= 0) | |
26989 | + err = -EINVAL; | |
26990 | + xenbus_dev_fatal(pdev->xdev, err, | |
26991 | + "Error reading number of PCI devices"); | |
26992 | + goto out; | |
26993 | + } | |
26994 | + | |
26995 | + /* Find devices being detached and remove them. */ | |
26996 | + for (i = 0; i < num_devs; i++) { | |
26997 | + int l, state; | |
26998 | + l = snprintf(str, sizeof(str), "state-%d", i); | |
26999 | + if (unlikely(l >= (sizeof(str) - 1))) { | |
27000 | + err = -ENOMEM; | |
27001 | + goto out; | |
27002 | + } | |
27003 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d", | |
27004 | + &state); | |
27005 | + if (err != 1) | |
27006 | + state = XenbusStateUnknown; | |
27007 | + | |
27008 | + if (state != XenbusStateClosing) | |
27009 | + continue; | |
27010 | + | |
27011 | + /* Remove device. */ | |
27012 | + l = snprintf(str, sizeof(str), "vdev-%d", i); | |
27013 | + if (unlikely(l >= (sizeof(str) - 1))) { | |
27014 | + err = -ENOMEM; | |
27015 | + goto out; | |
27016 | + } | |
27017 | + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, | |
27018 | + "%x:%x:%x.%x", &domain, &bus, &slot, &func); | |
27019 | + if (err != 4) { | |
27020 | + if (err >= 0) | |
27021 | + err = -EINVAL; | |
27022 | + xenbus_dev_fatal(pdev->xdev, err, | |
27023 | + "Error reading PCI device %d", i); | |
27024 | + goto out; | |
27025 | + } | |
27026 | + | |
27027 | + pci_bus = pci_find_bus(domain, bus); | |
27028 | + if(!pci_bus) { | |
27029 | + dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n", | |
27030 | + domain, bus); | |
27031 | + continue; | |
27032 | + } | |
27033 | + pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func)); | |
27034 | + if(!pci_dev) { | |
27035 | + dev_dbg(&pdev->xdev->dev, | |
27036 | + "Cannot get PCI device %04x:%02x:%02x.%02x\n", | |
27037 | + domain, bus, slot, func); | |
27038 | + continue; | |
27039 | + } | |
27040 | + pci_remove_bus_device(pci_dev); | |
27041 | + pci_dev_put(pci_dev); | |
27042 | + | |
27043 | + dev_dbg(&pdev->xdev->dev, | |
27044 | + "PCI device %04x:%02x:%02x.%02x removed.\n", | |
27045 | + domain, bus, slot, func); | |
27046 | + } | |
27047 | + | |
27048 | + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); | |
27049 | + | |
27050 | + out: | |
27051 | + spin_unlock(&pdev->dev_lock); | |
27052 | + return err; | |
27053 | +} | |
27054 | + | |
27055 | +static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, | |
27056 | + enum xenbus_state be_state) | |
27057 | +{ | |
27058 | + struct pcifront_device *pdev = xdev->dev.driver_data; | |
27059 | + | |
27060 | + switch (be_state) { | |
27061 | + case XenbusStateUnknown: | |
27062 | + case XenbusStateInitialising: | |
27063 | + case XenbusStateInitWait: | |
27064 | + case XenbusStateInitialised: | |
27065 | + case XenbusStateClosed: | |
27066 | + break; | |
27067 | + | |
27068 | + case XenbusStateConnected: | |
27069 | + pcifront_try_connect(pdev); | |
27070 | + break; | |
27071 | + | |
27072 | + case XenbusStateClosing: | |
27073 | + dev_warn(&xdev->dev, "backend going away!\n"); | |
27074 | + pcifront_try_disconnect(pdev); | |
27075 | + break; | |
27076 | + | |
27077 | + case XenbusStateReconfiguring: | |
27078 | + pcifront_detach_devices(pdev); | |
27079 | + break; | |
27080 | + | |
27081 | + case XenbusStateReconfigured: | |
27082 | + pcifront_attach_devices(pdev); | |
27083 | + break; | |
27084 | + } | |
27085 | +} | |
27086 | + | |
27087 | +static int pcifront_xenbus_probe(struct xenbus_device *xdev, | |
27088 | + const struct xenbus_device_id *id) | |
27089 | +{ | |
27090 | + int err = 0; | |
27091 | + struct pcifront_device *pdev = alloc_pdev(xdev); | |
27092 | + | |
27093 | + if (pdev == NULL) { | |
27094 | + err = -ENOMEM; | |
27095 | + xenbus_dev_fatal(xdev, err, | |
27096 | + "Error allocating pcifront_device struct"); | |
27097 | + goto out; | |
27098 | + } | |
27099 | + | |
27100 | + err = pcifront_publish_info(pdev); | |
27101 | + | |
27102 | + out: | |
27103 | + return err; | |
27104 | +} | |
27105 | + | |
27106 | +static int pcifront_xenbus_remove(struct xenbus_device *xdev) | |
27107 | +{ | |
27108 | + if (xdev->dev.driver_data) | |
27109 | + free_pdev(xdev->dev.driver_data); | |
27110 | + | |
27111 | + return 0; | |
27112 | +} | |
27113 | + | |
27114 | +static const struct xenbus_device_id xenpci_ids[] = { | |
27115 | + {"pci"}, | |
27116 | + {{0}}, | |
27117 | +}; | |
27118 | +MODULE_ALIAS("xen:pci"); | |
27119 | + | |
27120 | +static struct xenbus_driver xenbus_pcifront_driver = { | |
27121 | + .name = "pcifront", | |
27122 | + .owner = THIS_MODULE, | |
27123 | + .ids = xenpci_ids, | |
27124 | + .probe = pcifront_xenbus_probe, | |
27125 | + .remove = pcifront_xenbus_remove, | |
27126 | + .otherend_changed = pcifront_backend_changed, | |
27127 | +}; | |
27128 | + | |
27129 | +static int __init pcifront_init(void) | |
27130 | +{ | |
27131 | + if (!is_running_on_xen()) | |
27132 | + return -ENODEV; | |
27133 | + | |
27134 | + return xenbus_register_frontend(&xenbus_pcifront_driver); | |
27135 | +} | |
27136 | + | |
27137 | +/* Initialize after the Xen PCI Frontend Stub is initialized */ | |
27138 | +subsys_initcall(pcifront_init); | |
27139 | Index: head-2008-11-25/drivers/xen/privcmd/Makefile | |
27140 | =================================================================== | |
27141 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
27142 | +++ head-2008-11-25/drivers/xen/privcmd/Makefile 2007-07-10 09:42:30.000000000 +0200 | |
27143 | @@ -0,0 +1,3 @@ | |
27144 | + | |
27145 | +obj-y += privcmd.o | |
27146 | +obj-$(CONFIG_COMPAT) += compat_privcmd.o | |
27147 | Index: head-2008-11-25/drivers/xen/privcmd/compat_privcmd.c | |
27148 | =================================================================== | |
27149 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
27150 | +++ head-2008-11-25/drivers/xen/privcmd/compat_privcmd.c 2007-07-10 09:42:30.000000000 +0200 | |
27151 | @@ -0,0 +1,73 @@ | |
27152 | +/* | |
27153 | + * This program is free software; you can redistribute it and/or modify | |
27154 | + * it under the terms of the GNU General Public License as published by | |
27155 | + * the Free Software Foundation; either version 2 of the License, or | |
27156 | + * (at your option) any later version. | |
27157 | + * | |
27158 | + * This program is distributed in the hope that it will be useful, | |
27159 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
27160 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
27161 | + * GNU General Public License for more details. | |
27162 | + * | |
27163 | + * You should have received a copy of the GNU General Public License | |
27164 | + * along with this program; if not, write to the Free Software | |
27165 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
27166 | + * | |
27167 | + * Copyright (C) IBM Corp. 2006 | |
27168 | + * | |
27169 | + * Authors: Jimi Xenidis <jimix@watson.ibm.com> | |
27170 | + */ | |
27171 | + | |
27172 | +#include <linux/config.h> | |
27173 | +#include <linux/compat.h> | |
27174 | +#include <linux/ioctl.h> | |
27175 | +#include <linux/syscalls.h> | |
27176 | +#include <asm/hypervisor.h> | |
27177 | +#include <asm/uaccess.h> | |
27178 | +#include <xen/public/privcmd.h> | |
27179 | +#include <xen/compat_ioctl.h> | |
27180 | + | |
27181 | +int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg) | |
27182 | +{ | |
27183 | + int ret; | |
27184 | + | |
27185 | + switch (cmd) { | |
27186 | + case IOCTL_PRIVCMD_MMAP_32: { | |
27187 | + struct privcmd_mmap *p; | |
27188 | + struct privcmd_mmap_32 *p32; | |
27189 | + struct privcmd_mmap_32 n32; | |
27190 | + | |
27191 | + p32 = compat_ptr(arg); | |
27192 | + p = compat_alloc_user_space(sizeof(*p)); | |
27193 | + if (copy_from_user(&n32, p32, sizeof(n32)) || | |
27194 | + put_user(n32.num, &p->num) || | |
27195 | + put_user(n32.dom, &p->dom) || | |
27196 | + put_user(compat_ptr(n32.entry), &p->entry)) | |
27197 | + return -EFAULT; | |
27198 | + | |
27199 | + ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAP, (unsigned long)p); | |
27200 | + } | |
27201 | + break; | |
27202 | + case IOCTL_PRIVCMD_MMAPBATCH_32: { | |
27203 | + struct privcmd_mmapbatch *p; | |
27204 | + struct privcmd_mmapbatch_32 *p32; | |
27205 | + struct privcmd_mmapbatch_32 n32; | |
27206 | + | |
27207 | + p32 = compat_ptr(arg); | |
27208 | + p = compat_alloc_user_space(sizeof(*p)); | |
27209 | + if (copy_from_user(&n32, p32, sizeof(n32)) || | |
27210 | + put_user(n32.num, &p->num) || | |
27211 | + put_user(n32.dom, &p->dom) || | |
27212 | + put_user(n32.addr, &p->addr) || | |
27213 | + put_user(compat_ptr(n32.arr), &p->arr)) | |
27214 | + return -EFAULT; | |
27215 | + | |
27216 | + ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, (unsigned long)p); | |
27217 | + } | |
27218 | + break; | |
27219 | + default: | |
27220 | + ret = -EINVAL; | |
27221 | + break; | |
27222 | + } | |
27223 | + return ret; | |
27224 | +} | |
27225 | Index: head-2008-11-25/drivers/xen/privcmd/privcmd.c | |
27226 | =================================================================== | |
27227 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
27228 | +++ head-2008-11-25/drivers/xen/privcmd/privcmd.c 2008-07-21 11:00:33.000000000 +0200 | |
27229 | @@ -0,0 +1,356 @@ | |
27230 | +/****************************************************************************** | |
27231 | + * privcmd.c | |
27232 | + * | |
27233 | + * Interface to privileged domain-0 commands. | |
27234 | + * | |
27235 | + * Copyright (c) 2002-2004, K A Fraser, B Dragovic | |
27236 | + */ | |
27237 | + | |
27238 | +#include <linux/kernel.h> | |
27239 | +#include <linux/sched.h> | |
27240 | +#include <linux/slab.h> | |
27241 | +#include <linux/string.h> | |
27242 | +#include <linux/errno.h> | |
27243 | +#include <linux/mm.h> | |
27244 | +#include <linux/mman.h> | |
27245 | +#include <linux/swap.h> | |
27246 | +#include <linux/smp_lock.h> | |
27247 | +#include <linux/highmem.h> | |
27248 | +#include <linux/pagemap.h> | |
27249 | +#include <linux/seq_file.h> | |
27250 | +#include <asm/hypervisor.h> | |
27251 | + | |
27252 | +#include <asm/pgalloc.h> | |
27253 | +#include <asm/pgtable.h> | |
27254 | +#include <asm/uaccess.h> | |
27255 | +#include <asm/tlb.h> | |
27256 | +#include <asm/hypervisor.h> | |
27257 | +#include <xen/public/privcmd.h> | |
27258 | +#include <xen/interface/xen.h> | |
27259 | +#include <xen/xen_proc.h> | |
27260 | +#include <xen/features.h> | |
27261 | + | |
27262 | +static struct proc_dir_entry *privcmd_intf; | |
27263 | +static struct proc_dir_entry *capabilities_intf; | |
27264 | + | |
27265 | +#ifndef HAVE_ARCH_PRIVCMD_MMAP | |
27266 | +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); | |
27267 | +#endif | |
27268 | + | |
27269 | +static long privcmd_ioctl(struct file *file, | |
27270 | + unsigned int cmd, unsigned long data) | |
27271 | +{ | |
27272 | + int ret = -ENOSYS; | |
27273 | + void __user *udata = (void __user *) data; | |
27274 | + | |
27275 | + switch (cmd) { | |
27276 | + case IOCTL_PRIVCMD_HYPERCALL: { | |
27277 | + privcmd_hypercall_t hypercall; | |
27278 | + | |
27279 | + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) | |
27280 | + return -EFAULT; | |
27281 | + | |
27282 | +#if defined(__i386__) | |
27283 | + if (hypercall.op >= (PAGE_SIZE >> 5)) | |
27284 | + break; | |
27285 | + __asm__ __volatile__ ( | |
27286 | + "pushl %%ebx; pushl %%ecx; pushl %%edx; " | |
27287 | + "pushl %%esi; pushl %%edi; " | |
27288 | + "movl 8(%%eax),%%ebx ;" | |
27289 | + "movl 16(%%eax),%%ecx ;" | |
27290 | + "movl 24(%%eax),%%edx ;" | |
27291 | + "movl 32(%%eax),%%esi ;" | |
27292 | + "movl 40(%%eax),%%edi ;" | |
27293 | + "movl (%%eax),%%eax ;" | |
27294 | + "shll $5,%%eax ;" | |
27295 | + "addl $hypercall_page,%%eax ;" | |
27296 | + "call *%%eax ;" | |
27297 | + "popl %%edi; popl %%esi; popl %%edx; " | |
27298 | + "popl %%ecx; popl %%ebx" | |
27299 | + : "=a" (ret) : "0" (&hypercall) : "memory" ); | |
27300 | +#elif defined (__x86_64__) | |
27301 | + if (hypercall.op < (PAGE_SIZE >> 5)) { | |
27302 | + long ign1, ign2, ign3; | |
27303 | + __asm__ __volatile__ ( | |
27304 | + "movq %8,%%r10; movq %9,%%r8;" | |
27305 | + "shll $5,%%eax ;" | |
27306 | + "addq $hypercall_page,%%rax ;" | |
27307 | + "call *%%rax" | |
27308 | + : "=a" (ret), "=D" (ign1), | |
27309 | + "=S" (ign2), "=d" (ign3) | |
27310 | + : "0" ((unsigned int)hypercall.op), | |
27311 | + "1" (hypercall.arg[0]), | |
27312 | + "2" (hypercall.arg[1]), | |
27313 | + "3" (hypercall.arg[2]), | |
27314 | + "g" (hypercall.arg[3]), | |
27315 | + "g" (hypercall.arg[4]) | |
27316 | + : "r8", "r10", "memory" ); | |
27317 | + } | |
27318 | +#else | |
27319 | + ret = privcmd_hypercall(&hypercall); | |
27320 | +#endif | |
27321 | + } | |
27322 | + break; | |
27323 | + | |
27324 | + case IOCTL_PRIVCMD_MMAP: { | |
27325 | +#define MMAP_NR_PER_PAGE (int)((PAGE_SIZE-sizeof(struct list_head))/sizeof(privcmd_mmap_entry_t)) | |
27326 | + privcmd_mmap_t mmapcmd; | |
27327 | + privcmd_mmap_entry_t *msg; | |
27328 | + privcmd_mmap_entry_t __user *p; | |
27329 | + struct mm_struct *mm = current->mm; | |
27330 | + struct vm_area_struct *vma; | |
27331 | + unsigned long va; | |
27332 | + int i, rc; | |
27333 | + LIST_HEAD(pagelist); | |
27334 | + struct list_head *l,*l2; | |
27335 | + | |
27336 | + if (!is_initial_xendomain()) | |
27337 | + return -EPERM; | |
27338 | + | |
27339 | + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) | |
27340 | + return -EFAULT; | |
27341 | + | |
27342 | + p = mmapcmd.entry; | |
27343 | + for (i = 0; i < mmapcmd.num;) { | |
27344 | + int nr = min(mmapcmd.num - i, MMAP_NR_PER_PAGE); | |
27345 | + | |
27346 | + rc = -ENOMEM; | |
27347 | + l = (struct list_head *) __get_free_page(GFP_KERNEL); | |
27348 | + if (l == NULL) | |
27349 | + goto mmap_out; | |
27350 | + | |
27351 | + INIT_LIST_HEAD(l); | |
27352 | + list_add_tail(l, &pagelist); | |
27353 | + msg = (privcmd_mmap_entry_t*)(l + 1); | |
27354 | + | |
27355 | + rc = -EFAULT; | |
27356 | + if (copy_from_user(msg, p, nr*sizeof(*msg))) | |
27357 | + goto mmap_out; | |
27358 | + i += nr; | |
27359 | + p += nr; | |
27360 | + } | |
27361 | + | |
27362 | + l = pagelist.next; | |
27363 | + msg = (privcmd_mmap_entry_t*)(l + 1); | |
27364 | + | |
27365 | + down_write(&mm->mmap_sem); | |
27366 | + | |
27367 | + vma = find_vma(mm, msg->va); | |
27368 | + rc = -EINVAL; | |
27369 | + if (!vma || (msg->va != vma->vm_start) || | |
27370 | + !privcmd_enforce_singleshot_mapping(vma)) | |
27371 | + goto mmap_out; | |
27372 | + | |
27373 | + va = vma->vm_start; | |
27374 | + | |
27375 | + i = 0; | |
27376 | + list_for_each(l, &pagelist) { | |
27377 | + int nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE); | |
27378 | + | |
27379 | + msg = (privcmd_mmap_entry_t*)(l + 1); | |
27380 | + while (i<nr) { | |
27381 | + | |
27382 | + /* Do not allow range to wrap the address space. */ | |
27383 | + rc = -EINVAL; | |
27384 | + if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || | |
27385 | + ((unsigned long)(msg->npages << PAGE_SHIFT) >= -va)) | |
27386 | + goto mmap_out; | |
27387 | + | |
27388 | + /* Range chunks must be contiguous in va space. */ | |
27389 | + if ((msg->va != va) || | |
27390 | + ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) | |
27391 | + goto mmap_out; | |
27392 | + | |
27393 | + if ((rc = direct_remap_pfn_range( | |
27394 | + vma, | |
27395 | + msg->va & PAGE_MASK, | |
27396 | + msg->mfn, | |
27397 | + msg->npages << PAGE_SHIFT, | |
27398 | + vma->vm_page_prot, | |
27399 | + mmapcmd.dom)) < 0) | |
27400 | + goto mmap_out; | |
27401 | + | |
27402 | + va += msg->npages << PAGE_SHIFT; | |
27403 | + msg++; | |
27404 | + i++; | |
27405 | + } | |
27406 | + } | |
27407 | + | |
27408 | + rc = 0; | |
27409 | + | |
27410 | + mmap_out: | |
27411 | + up_write(&mm->mmap_sem); | |
27412 | + list_for_each_safe(l,l2,&pagelist) | |
27413 | + free_page((unsigned long)l); | |
27414 | + ret = rc; | |
27415 | + } | |
27416 | +#undef MMAP_NR_PER_PAGE | |
27417 | + break; | |
27418 | + | |
27419 | + case IOCTL_PRIVCMD_MMAPBATCH: { | |
27420 | +#define MMAPBATCH_NR_PER_PAGE (unsigned long)((PAGE_SIZE-sizeof(struct list_head))/sizeof(unsigned long)) | |
27421 | + privcmd_mmapbatch_t m; | |
27422 | + struct mm_struct *mm = current->mm; | |
27423 | + struct vm_area_struct *vma; | |
27424 | + xen_pfn_t __user *p; | |
27425 | + unsigned long addr, *mfn, nr_pages; | |
27426 | + int i; | |
27427 | + LIST_HEAD(pagelist); | |
27428 | + struct list_head *l, *l2; | |
27429 | + | |
27430 | + if (!is_initial_xendomain()) | |
27431 | + return -EPERM; | |
27432 | + | |
27433 | + if (copy_from_user(&m, udata, sizeof(m))) | |
27434 | + return -EFAULT; | |
27435 | + | |
27436 | + nr_pages = m.num; | |
27437 | + if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) | |
27438 | + return -EINVAL; | |
27439 | + | |
27440 | + p = m.arr; | |
27441 | + for (i=0; i<nr_pages; ) { | |
27442 | + int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); | |
27443 | + | |
27444 | + ret = -ENOMEM; | |
27445 | + l = (struct list_head *)__get_free_page(GFP_KERNEL); | |
27446 | + if (l == NULL) | |
27447 | + goto mmapbatch_out; | |
27448 | + | |
27449 | + INIT_LIST_HEAD(l); | |
27450 | + list_add_tail(l, &pagelist); | |
27451 | + | |
27452 | + mfn = (unsigned long*)(l + 1); | |
27453 | + ret = -EFAULT; | |
27454 | + if (copy_from_user(mfn, p, nr*sizeof(*mfn))) | |
27455 | + goto mmapbatch_out; | |
27456 | + | |
27457 | + i += nr; p+= nr; | |
27458 | + } | |
27459 | + | |
27460 | + down_write(&mm->mmap_sem); | |
27461 | + | |
27462 | + vma = find_vma(mm, m.addr); | |
27463 | + ret = -EINVAL; | |
27464 | + if (!vma || | |
27465 | + (m.addr != vma->vm_start) || | |
27466 | + ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || | |
27467 | + !privcmd_enforce_singleshot_mapping(vma)) { | |
27468 | + up_write(&mm->mmap_sem); | |
27469 | + goto mmapbatch_out; | |
27470 | + } | |
27471 | + | |
27472 | + p = m.arr; | |
27473 | + addr = m.addr; | |
27474 | + i = 0; | |
27475 | + ret = 0; | |
27476 | + list_for_each(l, &pagelist) { | |
27477 | + int nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); | |
27478 | + mfn = (unsigned long *)(l + 1); | |
27479 | + | |
27480 | + while (i<nr) { | |
27481 | + if(direct_remap_pfn_range(vma, addr & PAGE_MASK, | |
27482 | + *mfn, PAGE_SIZE, | |
27483 | + vma->vm_page_prot, m.dom) < 0) { | |
27484 | + *mfn |= 0xf0000000U; | |
27485 | + ret++; | |
27486 | + } | |
27487 | + mfn++; i++; addr += PAGE_SIZE; | |
27488 | + } | |
27489 | + } | |
27490 | + | |
27491 | + up_write(&mm->mmap_sem); | |
27492 | + if (ret > 0) { | |
27493 | + p = m.arr; | |
27494 | + i = 0; | |
27495 | + ret = 0; | |
27496 | + list_for_each(l, &pagelist) { | |
27497 | + int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); | |
27498 | + mfn = (unsigned long *)(l + 1); | |
27499 | + if (copy_to_user(p, mfn, nr*sizeof(*mfn))) | |
27500 | + ret = -EFAULT; | |
27501 | + i += nr; p += nr; | |
27502 | + } | |
27503 | + } | |
27504 | + mmapbatch_out: | |
27505 | + list_for_each_safe(l,l2,&pagelist) | |
27506 | + free_page((unsigned long)l); | |
27507 | +#undef MMAPBATCH_NR_PER_PAGE | |
27508 | + } | |
27509 | + break; | |
27510 | + | |
27511 | + default: | |
27512 | + ret = -EINVAL; | |
27513 | + break; | |
27514 | + } | |
27515 | + | |
27516 | + return ret; | |
27517 | +} | |
27518 | + | |
27519 | +#ifndef HAVE_ARCH_PRIVCMD_MMAP | |
27520 | +static struct page *privcmd_nopage(struct vm_area_struct *vma, | |
27521 | + unsigned long address, | |
27522 | + int *type) | |
27523 | +{ | |
27524 | + return NOPAGE_SIGBUS; | |
27525 | +} | |
27526 | + | |
27527 | +static struct vm_operations_struct privcmd_vm_ops = { | |
27528 | + .nopage = privcmd_nopage | |
27529 | +}; | |
27530 | + | |
27531 | +static int privcmd_mmap(struct file * file, struct vm_area_struct * vma) | |
27532 | +{ | |
27533 | + /* Unsupported for auto-translate guests. */ | |
27534 | + if (xen_feature(XENFEAT_auto_translated_physmap)) | |
27535 | + return -ENOSYS; | |
27536 | + | |
27537 | + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ | |
27538 | + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; | |
27539 | + vma->vm_ops = &privcmd_vm_ops; | |
27540 | + vma->vm_private_data = NULL; | |
27541 | + | |
27542 | + return 0; | |
27543 | +} | |
27544 | + | |
27545 | +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) | |
27546 | +{ | |
27547 | + return (xchg(&vma->vm_private_data, (void *)1) == NULL); | |
27548 | +} | |
27549 | +#endif | |
27550 | + | |
27551 | +static const struct file_operations privcmd_file_ops = { | |
27552 | + .unlocked_ioctl = privcmd_ioctl, | |
27553 | + .mmap = privcmd_mmap, | |
27554 | +}; | |
27555 | + | |
27556 | +static int capabilities_read(char *page, char **start, off_t off, | |
27557 | + int count, int *eof, void *data) | |
27558 | +{ | |
27559 | + int len = 0; | |
27560 | + *page = 0; | |
27561 | + | |
27562 | + if (is_initial_xendomain()) | |
27563 | + len = sprintf( page, "control_d\n" ); | |
27564 | + | |
27565 | + *eof = 1; | |
27566 | + return len; | |
27567 | +} | |
27568 | + | |
27569 | +static int __init privcmd_init(void) | |
27570 | +{ | |
27571 | + if (!is_running_on_xen()) | |
27572 | + return -ENODEV; | |
27573 | + | |
27574 | + privcmd_intf = create_xen_proc_entry("privcmd", 0400); | |
27575 | + if (privcmd_intf != NULL) | |
27576 | + privcmd_intf->proc_fops = &privcmd_file_ops; | |
27577 | + | |
27578 | + capabilities_intf = create_xen_proc_entry("capabilities", 0400 ); | |
27579 | + if (capabilities_intf != NULL) | |
27580 | + capabilities_intf->read_proc = capabilities_read; | |
27581 | + | |
27582 | + return 0; | |
27583 | +} | |
27584 | + | |
27585 | +__initcall(privcmd_init); | |
27586 | Index: head-2008-11-25/drivers/xen/scsiback/Makefile | |
27587 | =================================================================== | |
27588 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
27589 | +++ head-2008-11-25/drivers/xen/scsiback/Makefile 2008-07-21 11:00:33.000000000 +0200 | |
27590 | @@ -0,0 +1,4 @@ | |
27591 | +obj-$(CONFIG_XEN_SCSI_BACKEND) := xen-scsibk.o | |
27592 | + | |
27593 | +xen-scsibk-y := interface.o scsiback.o xenbus.o translate.o emulate.o | |
27594 | + | |
27595 | Index: head-2008-11-25/drivers/xen/scsiback/common.h | |
27596 | =================================================================== | |
27597 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
27598 | +++ head-2008-11-25/drivers/xen/scsiback/common.h 2008-07-21 11:00:33.000000000 +0200 | |
27599 | @@ -0,0 +1,181 @@ | |
27600 | +/* | |
27601 | + * Copyright (c) 2008, FUJITSU Limited | |
27602 | + * | |
27603 | + * Based on the blkback driver code. | |
27604 | + * | |
27605 | + * This program is free software; you can redistribute it and/or | |
27606 | + * modify it under the terms of the GNU General Public License version 2 | |
27607 | + * as published by the Free Software Foundation; or, when distributed | |
27608 | + * separately from the Linux kernel or incorporated into other | |
27609 | + * software packages, subject to the following license: | |
27610 | + * | |
27611 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
27612 | + * of this source file (the "Software"), to deal in the Software without | |
27613 | + * restriction, including without limitation the rights to use, copy, modify, | |
27614 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
27615 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
27616 | + * the following conditions: | |
27617 | + * | |
27618 | + * The above copyright notice and this permission notice shall be included in | |
27619 | + * all copies or substantial portions of the Software. | |
27620 | + * | |
27621 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
27622 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
27623 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
27624 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
27625 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
27626 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
27627 | + * IN THE SOFTWARE. | |
27628 | + */ | |
27629 | + | |
27630 | +#ifndef __SCSIIF__BACKEND__COMMON_H__ | |
27631 | +#define __SCSIIF__BACKEND__COMMON_H__ | |
27632 | + | |
27633 | +#include <linux/version.h> | |
27634 | +#include <linux/module.h> | |
27635 | +#include <linux/interrupt.h> | |
27636 | +#include <linux/slab.h> | |
27637 | +#include <linux/vmalloc.h> | |
27638 | +#include <linux/wait.h> | |
27639 | +#include <linux/sched.h> | |
27640 | +#include <linux/kthread.h> | |
27641 | +#include <linux/blkdev.h> | |
27642 | +#include <linux/list.h> | |
27643 | +#include <linux/kthread.h> | |
27644 | +#include <scsi/scsi.h> | |
27645 | +#include <scsi/scsi_cmnd.h> | |
27646 | +#include <scsi/scsi_host.h> | |
27647 | +#include <scsi/scsi_device.h> | |
27648 | +#include <scsi/scsi_dbg.h> | |
27649 | +#include <scsi/scsi_eh.h> | |
27650 | +#include <asm/io.h> | |
27651 | +#include <asm/setup.h> | |
27652 | +#include <asm/pgalloc.h> | |
27653 | +#include <asm/delay.h> | |
27654 | +#include <xen/evtchn.h> | |
27655 | +#include <asm/hypervisor.h> | |
27656 | +#include <xen/gnttab.h> | |
27657 | +#include <xen/driver_util.h> | |
27658 | +#include <xen/xenbus.h> | |
27659 | +#include <xen/interface/io/ring.h> | |
27660 | +#include <xen/interface/grant_table.h> | |
27661 | +#include <xen/interface/io/vscsiif.h> | |
27662 | + | |
27663 | + | |
27664 | +#define DPRINTK(_f, _a...) \ | |
27665 | + pr_debug("(file=%s, line=%d) " _f, \ | |
27666 | + __FILE__ , __LINE__ , ## _a ) | |
27667 | + | |
27668 | +struct ids_tuple { | |
27669 | + unsigned int hst; /* host */ | |
27670 | + unsigned int chn; /* channel */ | |
27671 | + unsigned int tgt; /* target */ | |
27672 | + unsigned int lun; /* LUN */ | |
27673 | +}; | |
27674 | + | |
27675 | +struct v2p_entry { | |
27676 | + struct ids_tuple v; /* translate from */ | |
27677 | + struct scsi_device *sdev; /* translate to */ | |
27678 | + struct list_head l; | |
27679 | +}; | |
27680 | + | |
27681 | +struct vscsibk_info { | |
27682 | + struct xenbus_device *dev; | |
27683 | + | |
27684 | + domid_t domid; | |
27685 | + unsigned int evtchn; | |
27686 | + unsigned int irq; | |
27687 | + | |
27688 | + struct vscsiif_back_ring ring; | |
27689 | + struct vm_struct *ring_area; | |
27690 | + grant_handle_t shmem_handle; | |
27691 | + grant_ref_t shmem_ref; | |
27692 | + | |
27693 | + spinlock_t ring_lock; | |
27694 | + atomic_t nr_unreplied_reqs; | |
27695 | + | |
27696 | + spinlock_t v2p_lock; | |
27697 | + struct list_head v2p_entry_lists; | |
27698 | + | |
27699 | + struct task_struct *kthread; | |
27700 | + wait_queue_head_t waiting_to_free; | |
27701 | + wait_queue_head_t wq; | |
27702 | + unsigned int waiting_reqs; | |
27703 | + struct page **mmap_pages; | |
27704 | + | |
27705 | +}; | |
27706 | + | |
27707 | +typedef struct { | |
27708 | + unsigned char act; | |
27709 | + struct vscsibk_info *info; | |
27710 | + struct scsi_device *sdev; | |
27711 | + | |
27712 | + uint16_t rqid; | |
27713 | + | |
27714 | + uint8_t nr_segments; | |
27715 | + uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; | |
27716 | + uint8_t cmd_len; | |
27717 | + | |
27718 | + uint8_t sc_data_direction; | |
27719 | + uint16_t timeout_per_command; | |
27720 | + | |
27721 | + uint32_t request_bufflen; | |
27722 | + struct scatterlist *sgl; | |
27723 | + grant_ref_t gref[VSCSIIF_SG_TABLESIZE]; | |
27724 | + | |
27725 | + int32_t rslt; | |
27726 | + uint32_t resid; | |
27727 | + uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; | |
27728 | + | |
27729 | + struct list_head free_list; | |
27730 | +} pending_req_t; | |
27731 | + | |
27732 | + | |
27733 | + | |
27734 | +#define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs)) | |
27735 | +#define scsiback_put(_b) \ | |
27736 | + do { \ | |
27737 | + if (atomic_dec_and_test(&(_b)->nr_unreplied_reqs)) \ | |
27738 | + wake_up(&(_b)->waiting_to_free);\ | |
27739 | + } while (0) | |
27740 | + | |
27741 | +#define VSCSIIF_TIMEOUT (900*HZ) | |
27742 | + | |
27743 | + | |
27744 | +irqreturn_t scsiback_intr(int, void *, struct pt_regs *); | |
27745 | +int scsiback_init_sring(struct vscsibk_info *info, | |
27746 | + unsigned long ring_ref, unsigned int evtchn); | |
27747 | +int scsiback_schedule(void *data); | |
27748 | + | |
27749 | + | |
27750 | +struct vscsibk_info *vscsibk_info_alloc(domid_t domid); | |
27751 | +void scsiback_free(struct vscsibk_info *info); | |
27752 | +void scsiback_disconnect(struct vscsibk_info *info); | |
27753 | +int __init scsiback_interface_init(void); | |
27754 | +void scsiback_interface_exit(void); | |
27755 | +int scsiback_xenbus_init(void); | |
27756 | +void scsiback_xenbus_unregister(void); | |
27757 | + | |
27758 | +void scsiback_init_translation_table(struct vscsibk_info *info); | |
27759 | + | |
27760 | +int scsiback_add_translation_entry(struct vscsibk_info *info, | |
27761 | + struct scsi_device *sdev, struct ids_tuple *v); | |
27762 | + | |
27763 | +int scsiback_del_translation_entry(struct vscsibk_info *info, | |
27764 | + struct ids_tuple *v); | |
27765 | +struct scsi_device *scsiback_do_translation(struct vscsibk_info *info, | |
27766 | + struct ids_tuple *v); | |
27767 | +void scsiback_release_translation_entry(struct vscsibk_info *info); | |
27768 | + | |
27769 | + | |
27770 | +void scsiback_cmd_exec(pending_req_t *pending_req); | |
27771 | +void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result, | |
27772 | + uint32_t resid, pending_req_t *pending_req); | |
27773 | +void scsiback_fast_flush_area(pending_req_t *req); | |
27774 | + | |
27775 | +void scsiback_rsp_emulation(pending_req_t *pending_req); | |
27776 | +void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req); | |
27777 | +void scsiback_emulation_init(void); | |
27778 | + | |
27779 | + | |
27780 | +#endif /* __SCSIIF__BACKEND__COMMON_H__ */ | |
27781 | Index: head-2008-11-25/drivers/xen/scsiback/emulate.c | |
27782 | =================================================================== | |
27783 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
27784 | +++ head-2008-11-25/drivers/xen/scsiback/emulate.c 2008-08-07 12:44:36.000000000 +0200 | |
27785 | @@ -0,0 +1,454 @@ | |
27786 | +/* | |
27787 | + * Xen SCSI backend driver | |
27788 | + * | |
27789 | + * Copyright (c) 2008, FUJITSU Limited | |
27790 | + * | |
27791 | + * This program is free software; you can redistribute it and/or | |
27792 | + * modify it under the terms of the GNU General Public License version 2 | |
27793 | + * as published by the Free Software Foundation; or, when distributed | |
27794 | + * separately from the Linux kernel or incorporated into other | |
27795 | + * software packages, subject to the following license: | |
27796 | + * | |
27797 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
27798 | + * of this source file (the "Software"), to deal in the Software without | |
27799 | + * restriction, including without limitation the rights to use, copy, modify, | |
27800 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
27801 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
27802 | + * the following conditions: | |
27803 | + * | |
27804 | + * The above copyright notice and this permission notice shall be included in | |
27805 | + * all copies or substantial portions of the Software. | |
27806 | + * | |
27807 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
27808 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
27809 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
27810 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
27811 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
27812 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
27813 | + * IN THE SOFTWARE. | |
27814 | + */ | |
27815 | + | |
27816 | +#include <scsi/scsi.h> | |
27817 | +#include <scsi/scsi_cmnd.h> | |
27818 | +#include <scsi/scsi_device.h> | |
27819 | +#include "common.h" | |
27820 | + | |
27821 | +/* Following SCSI commands are not defined in scsi/scsi.h */ | |
27822 | +#define EXTENDED_COPY 0x83 /* EXTENDED COPY command */ | |
27823 | +#define REPORT_ALIASES 0xa3 /* REPORT ALIASES command */ | |
27824 | +#define CHANGE_ALIASES 0xa4 /* CHANGE ALIASES command */ | |
27825 | +#define SET_PRIORITY 0xa4 /* SET PRIORITY command */ | |
27826 | + | |
27827 | + | |
27828 | +/* | |
27829 | + The bitmap in order to control emulation. | |
27830 | + (Bit 3 to 7 are reserved for future use.) | |
27831 | +*/ | |
27832 | +#define VSCSIIF_NEED_CMD_EXEC 0x01 /* If this bit is set, cmd exec */ | |
27833 | + /* is required. */ | |
27834 | +#define VSCSIIF_NEED_EMULATE_REQBUF 0x02 /* If this bit is set, need */ | |
27835 | + /* emulation reqest buff before */ | |
27836 | + /* cmd exec. */ | |
27837 | +#define VSCSIIF_NEED_EMULATE_RSPBUF 0x04 /* If this bit is set, need */ | |
27838 | + /* emulation resp buff after */ | |
27839 | + /* cmd exec. */ | |
27840 | + | |
27841 | +/* Additional Sense Code (ASC) used */ | |
27842 | +#define NO_ADDITIONAL_SENSE 0x0 | |
27843 | +#define LOGICAL_UNIT_NOT_READY 0x4 | |
27844 | +#define UNRECOVERED_READ_ERR 0x11 | |
27845 | +#define PARAMETER_LIST_LENGTH_ERR 0x1a | |
27846 | +#define INVALID_OPCODE 0x20 | |
27847 | +#define ADDR_OUT_OF_RANGE 0x21 | |
27848 | +#define INVALID_FIELD_IN_CDB 0x24 | |
27849 | +#define INVALID_FIELD_IN_PARAM_LIST 0x26 | |
27850 | +#define POWERON_RESET 0x29 | |
27851 | +#define SAVING_PARAMS_UNSUP 0x39 | |
27852 | +#define THRESHOLD_EXCEEDED 0x5d | |
27853 | +#define LOW_POWER_COND_ON 0x5e | |
27854 | + | |
27855 | + | |
27856 | + | |
27857 | +/* Number os SCSI op_code */ | |
27858 | +#define VSCSI_MAX_SCSI_OP_CODE 256 | |
27859 | +static unsigned char bitmap[VSCSI_MAX_SCSI_OP_CODE]; | |
27860 | + | |
27861 | + | |
27862 | + | |
27863 | +/* | |
27864 | + Emulation routines for each SCSI op_code. | |
27865 | +*/ | |
27866 | +static void (*pre_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *); | |
27867 | +static void (*post_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *); | |
27868 | + | |
27869 | + | |
27870 | +static const int check_condition_result = | |
27871 | + (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; | |
27872 | + | |
27873 | +static void scsiback_mk_sense_buffer(uint8_t *data, uint8_t key, | |
27874 | + uint8_t asc, uint8_t asq) | |
27875 | +{ | |
27876 | + data[0] = 0x70; /* fixed, current */ | |
27877 | + data[2] = key; | |
27878 | + data[7] = 0xa; /* implies 18 byte sense buffer */ | |
27879 | + data[12] = asc; | |
27880 | + data[13] = asq; | |
27881 | +} | |
27882 | + | |
27883 | +static void resp_not_supported_cmd(pending_req_t *pending_req, void *data) | |
27884 | +{ | |
27885 | + scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST, | |
27886 | + INVALID_OPCODE, 0); | |
27887 | + pending_req->resid = 0; | |
27888 | + pending_req->rslt = check_condition_result; | |
27889 | +} | |
27890 | + | |
27891 | + | |
27892 | +static int __copy_to_sg(struct scatterlist *sg, unsigned int nr_sg, | |
27893 | + void *buf, unsigned int buflen) | |
27894 | +{ | |
27895 | + void *from = buf; | |
27896 | + void *to; | |
27897 | + unsigned int from_rest = buflen; | |
27898 | + unsigned int to_capa; | |
27899 | + unsigned int copy_size = 0; | |
27900 | + unsigned int i; | |
27901 | + unsigned long pfn; | |
27902 | + | |
27903 | + for (i = 0; i < nr_sg; i++) { | |
27904 | + if (sg->page == NULL) { | |
27905 | + printk(KERN_WARNING "%s: inconsistent length field in " | |
27906 | + "scatterlist\n", __FUNCTION__); | |
27907 | + return -ENOMEM; | |
27908 | + } | |
27909 | + | |
27910 | + to_capa = sg->length; | |
27911 | + copy_size = min_t(unsigned int, to_capa, from_rest); | |
27912 | + | |
27913 | + pfn = page_to_pfn(sg->page); | |
27914 | + to = pfn_to_kaddr(pfn) + (sg->offset); | |
27915 | + memcpy(to, from, copy_size); | |
27916 | + | |
27917 | + from_rest -= copy_size; | |
27918 | + if (from_rest == 0) { | |
27919 | + return 0; | |
27920 | + } | |
27921 | + | |
27922 | + sg++; | |
27923 | + from += copy_size; | |
27924 | + } | |
27925 | + | |
27926 | + printk(KERN_WARNING "%s: no space in scatterlist\n", | |
27927 | + __FUNCTION__); | |
27928 | + return -ENOMEM; | |
27929 | +} | |
27930 | + | |
27931 | +static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg, | |
27932 | + void *buf, unsigned int buflen) | |
27933 | +{ | |
27934 | + void *from; | |
27935 | + void *to = buf; | |
27936 | + unsigned int from_rest; | |
27937 | + unsigned int to_capa = buflen; | |
27938 | + unsigned int copy_size; | |
27939 | + unsigned int i; | |
27940 | + unsigned long pfn; | |
27941 | + | |
27942 | + for (i = 0; i < nr_sg; i++) { | |
27943 | + if (sg->page == NULL) { | |
27944 | + printk(KERN_WARNING "%s: inconsistent length field in " | |
27945 | + "scatterlist\n", __FUNCTION__); | |
27946 | + return -ENOMEM; | |
27947 | + } | |
27948 | + | |
27949 | + from_rest = sg->length; | |
27950 | + if ((from_rest > 0) && (to_capa < from_rest)) { | |
27951 | + printk(KERN_WARNING | |
27952 | + "%s: no space in destination buffer\n", | |
27953 | + __FUNCTION__); | |
27954 | + return -ENOMEM; | |
27955 | + } | |
27956 | + copy_size = from_rest; | |
27957 | + | |
27958 | + pfn = page_to_pfn(sg->page); | |
27959 | + from = pfn_to_kaddr(pfn) + (sg->offset); | |
27960 | + memcpy(to, from, copy_size); | |
27961 | + | |
27962 | + to_capa -= copy_size; | |
27963 | + | |
27964 | + sg++; | |
27965 | + to += copy_size; | |
27966 | + } | |
27967 | + | |
27968 | + return 0; | |
27969 | +} | |
27970 | + | |
27971 | +static int __nr_luns_under_host(struct vscsibk_info *info) | |
27972 | +{ | |
27973 | + struct v2p_entry *entry; | |
27974 | + struct list_head *head = &(info->v2p_entry_lists); | |
27975 | + unsigned long flags; | |
27976 | + int lun_cnt = 0; | |
27977 | + | |
27978 | + spin_lock_irqsave(&info->v2p_lock, flags); | |
27979 | + list_for_each_entry(entry, head, l) { | |
27980 | + lun_cnt++; | |
27981 | + } | |
27982 | + spin_unlock_irqrestore(&info->v2p_lock, flags); | |
27983 | + | |
27984 | + return (lun_cnt); | |
27985 | +} | |
27986 | + | |
27987 | + | |
27988 | +/* REPORT LUNS Define*/ | |
27989 | +#define VSCSI_REPORT_LUNS_HEADER 8 | |
27990 | +#define VSCSI_REPORT_LUNS_RETRY 3 | |
27991 | + | |
27992 | +/* quoted scsi_debug.c/resp_report_luns() */ | |
27993 | +static void __report_luns(pending_req_t *pending_req, void *data) | |
27994 | +{ | |
27995 | + struct vscsibk_info *info = pending_req->info; | |
27996 | + unsigned int channel = pending_req->sdev->channel; | |
27997 | + unsigned int target = pending_req->sdev->id; | |
27998 | + unsigned int nr_seg = pending_req->nr_segments; | |
27999 | + unsigned char *cmd = (unsigned char *)pending_req->cmnd; | |
28000 | + | |
28001 | + unsigned char *buff = NULL; | |
28002 | + unsigned char alloc_len; | |
28003 | + unsigned int alloc_luns = 0; | |
28004 | + unsigned int req_bufflen = 0; | |
28005 | + unsigned int actual_len = 0; | |
28006 | + unsigned int retry_cnt = 0; | |
28007 | + int select_report = (int)cmd[2]; | |
28008 | + int i, lun_cnt = 0, lun, upper, err = 0; | |
28009 | + | |
28010 | + struct v2p_entry *entry; | |
28011 | + struct list_head *head = &(info->v2p_entry_lists); | |
28012 | + unsigned long flags; | |
28013 | + | |
28014 | + struct scsi_lun *one_lun; | |
28015 | + | |
28016 | + req_bufflen = cmd[9] + (cmd[8] << 8) + (cmd[7] << 16) + (cmd[6] << 24); | |
28017 | + if ((req_bufflen < 4) || (select_report != 0)) | |
28018 | + goto fail; | |
28019 | + | |
28020 | + alloc_luns = __nr_luns_under_host(info); | |
28021 | + alloc_len = sizeof(struct scsi_lun) * alloc_luns | |
28022 | + + VSCSI_REPORT_LUNS_HEADER; | |
28023 | +retry: | |
28024 | + if ((buff = kmalloc(alloc_len, GFP_KERNEL)) == NULL) { | |
28025 | + printk(KERN_ERR "scsiback:%s kmalloc err\n", __FUNCTION__); | |
28026 | + goto fail; | |
28027 | + } | |
28028 | + | |
28029 | + memset(buff, 0, alloc_len); | |
28030 | + | |
28031 | + one_lun = (struct scsi_lun *) &buff[8]; | |
28032 | + spin_lock_irqsave(&info->v2p_lock, flags); | |
28033 | + list_for_each_entry(entry, head, l) { | |
28034 | + if ((entry->v.chn == channel) && | |
28035 | + (entry->v.tgt == target)) { | |
28036 | + | |
28037 | + /* check overflow */ | |
28038 | + if (lun_cnt >= alloc_luns) { | |
28039 | + spin_unlock_irqrestore(&info->v2p_lock, | |
28040 | + flags); | |
28041 | + | |
28042 | + if (retry_cnt < VSCSI_REPORT_LUNS_RETRY) { | |
28043 | + retry_cnt++; | |
28044 | + if (buff) | |
28045 | + kfree(buff); | |
28046 | + goto retry; | |
28047 | + } | |
28048 | + | |
28049 | + goto fail; | |
28050 | + } | |
28051 | + | |
28052 | + lun = entry->v.lun; | |
28053 | + upper = (lun >> 8) & 0x3f; | |
28054 | + if (upper) | |
28055 | + one_lun[lun_cnt].scsi_lun[0] = upper; | |
28056 | + one_lun[lun_cnt].scsi_lun[1] = lun & 0xff; | |
28057 | + lun_cnt++; | |
28058 | + } | |
28059 | + } | |
28060 | + | |
28061 | + spin_unlock_irqrestore(&info->v2p_lock, flags); | |
28062 | + | |
28063 | + buff[2] = ((sizeof(struct scsi_lun) * lun_cnt) >> 8) & 0xff; | |
28064 | + buff[3] = (sizeof(struct scsi_lun) * lun_cnt) & 0xff; | |
28065 | + | |
28066 | + actual_len = lun_cnt * sizeof(struct scsi_lun) | |
28067 | + + VSCSI_REPORT_LUNS_HEADER; | |
28068 | + req_bufflen = 0; | |
28069 | + for (i = 0; i < nr_seg; i++) | |
28070 | + req_bufflen += pending_req->sgl[i].length; | |
28071 | + | |
28072 | + err = __copy_to_sg(pending_req->sgl, nr_seg, buff, | |
28073 | + min(req_bufflen, actual_len)); | |
28074 | + if (err) | |
28075 | + goto fail; | |
28076 | + | |
28077 | + memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE); | |
28078 | + pending_req->rslt = 0x00; | |
28079 | + pending_req->resid = req_bufflen - min(req_bufflen, actual_len); | |
28080 | + | |
28081 | + kfree(buff); | |
28082 | + return; | |
28083 | + | |
28084 | +fail: | |
28085 | + scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST, | |
28086 | + INVALID_FIELD_IN_CDB, 0); | |
28087 | + pending_req->rslt = check_condition_result; | |
28088 | + pending_req->resid = 0; | |
28089 | + if (buff) | |
28090 | + kfree(buff); | |
28091 | + return; | |
28092 | +} | |
28093 | + | |
28094 | + | |
28095 | + | |
28096 | +int __pre_do_emulation(pending_req_t *pending_req, void *data) | |
28097 | +{ | |
28098 | + uint8_t op_code = pending_req->cmnd[0]; | |
28099 | + | |
28100 | + if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_REQBUF) && | |
28101 | + pre_function[op_code] != NULL) { | |
28102 | + pre_function[op_code](pending_req, data); | |
28103 | + } | |
28104 | + | |
28105 | + /* | |
28106 | + 0: no need for native driver call, so should return immediately. | |
28107 | + 1: non emulation or should call native driver | |
28108 | + after modifing the request buffer. | |
28109 | + */ | |
28110 | + return !!(bitmap[op_code] & VSCSIIF_NEED_CMD_EXEC); | |
28111 | +} | |
28112 | + | |
28113 | +void scsiback_rsp_emulation(pending_req_t *pending_req) | |
28114 | +{ | |
28115 | + uint8_t op_code = pending_req->cmnd[0]; | |
28116 | + | |
28117 | + if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_RSPBUF) && | |
28118 | + post_function[op_code] != NULL) { | |
28119 | + post_function[op_code](pending_req, NULL); | |
28120 | + } | |
28121 | + | |
28122 | + return; | |
28123 | +} | |
28124 | + | |
28125 | + | |
28126 | +void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req) | |
28127 | +{ | |
28128 | + if (__pre_do_emulation(pending_req, NULL)) { | |
28129 | + scsiback_cmd_exec(pending_req); | |
28130 | + } | |
28131 | + else { | |
28132 | + scsiback_fast_flush_area(pending_req); | |
28133 | + scsiback_do_resp_with_sense(pending_req->sense_buffer, | |
28134 | + pending_req->rslt, pending_req->resid, pending_req); | |
28135 | + } | |
28136 | +} | |
28137 | + | |
28138 | + | |
28139 | +/* | |
28140 | + Following are not customizable functions. | |
28141 | +*/ | |
28142 | +void scsiback_emulation_init(void) | |
28143 | +{ | |
28144 | + int i; | |
28145 | + | |
28146 | + /* Initialize to default state */ | |
28147 | + for (i = 0; i < VSCSI_MAX_SCSI_OP_CODE; i++) { | |
28148 | + bitmap[i] = (VSCSIIF_NEED_EMULATE_REQBUF | | |
28149 | + VSCSIIF_NEED_EMULATE_RSPBUF); | |
28150 | + pre_function[i] = resp_not_supported_cmd; | |
28151 | + post_function[i] = NULL; | |
28152 | + /* means, | |
28153 | + - no need for pre-emulation | |
28154 | + - no need for post-emulation | |
28155 | + - call native driver | |
28156 | + */ | |
28157 | + } | |
28158 | + | |
28159 | + /* | |
28160 | + Register appropriate functions below as you need. | |
28161 | + (See scsi/scsi.h for definition of SCSI op_code.) | |
28162 | + */ | |
28163 | + | |
28164 | + /* | |
28165 | + This command is Non emulation. | |
28166 | + */ | |
28167 | + bitmap[TEST_UNIT_READY] = VSCSIIF_NEED_CMD_EXEC; | |
28168 | + pre_function[TEST_UNIT_READY] = NULL; | |
28169 | + post_function[TEST_UNIT_READY] = NULL; | |
28170 | + | |
28171 | + bitmap[REZERO_UNIT] = VSCSIIF_NEED_CMD_EXEC; | |
28172 | + pre_function[REZERO_UNIT] = NULL; | |
28173 | + post_function[REZERO_UNIT] = NULL; | |
28174 | + | |
28175 | + bitmap[REQUEST_SENSE] = VSCSIIF_NEED_CMD_EXEC; | |
28176 | + pre_function[REQUEST_SENSE] = NULL; | |
28177 | + post_function[REQUEST_SENSE] = NULL; | |
28178 | + | |
28179 | + bitmap[FORMAT_UNIT] = VSCSIIF_NEED_CMD_EXEC; | |
28180 | + pre_function[FORMAT_UNIT] = NULL; | |
28181 | + post_function[FORMAT_UNIT] = NULL; | |
28182 | + | |
28183 | + bitmap[READ_BLOCK_LIMITS] = VSCSIIF_NEED_CMD_EXEC; | |
28184 | + pre_function[READ_BLOCK_LIMITS] = NULL; | |
28185 | + post_function[READ_BLOCK_LIMITS] = NULL; | |
28186 | + | |
28187 | + bitmap[READ_6] = VSCSIIF_NEED_CMD_EXEC; | |
28188 | + pre_function[READ_6] = NULL; | |
28189 | + post_function[READ_6] = NULL; | |
28190 | + | |
28191 | + bitmap[WRITE_6] = VSCSIIF_NEED_CMD_EXEC; | |
28192 | + pre_function[WRITE_6] = NULL; | |
28193 | + post_function[WRITE_6] = NULL; | |
28194 | + | |
28195 | + bitmap[WRITE_FILEMARKS] = VSCSIIF_NEED_CMD_EXEC; | |
28196 | + pre_function[WRITE_FILEMARKS] = NULL; | |
28197 | + post_function[WRITE_FILEMARKS] = NULL; | |
28198 | + | |
28199 | + bitmap[SPACE] = VSCSIIF_NEED_CMD_EXEC; | |
28200 | + pre_function[SPACE] = NULL; | |
28201 | + post_function[SPACE] = NULL; | |
28202 | + | |
28203 | + bitmap[INQUIRY] = VSCSIIF_NEED_CMD_EXEC; | |
28204 | + pre_function[INQUIRY] = NULL; | |
28205 | + post_function[INQUIRY] = NULL; | |
28206 | + | |
28207 | + bitmap[ERASE] = VSCSIIF_NEED_CMD_EXEC; | |
28208 | + pre_function[ERASE] = NULL; | |
28209 | + post_function[ERASE] = NULL; | |
28210 | + | |
28211 | + bitmap[MODE_SENSE] = VSCSIIF_NEED_CMD_EXEC; | |
28212 | + pre_function[MODE_SENSE] = NULL; | |
28213 | + post_function[MODE_SENSE] = NULL; | |
28214 | + | |
28215 | + bitmap[SEND_DIAGNOSTIC] = VSCSIIF_NEED_CMD_EXEC; | |
28216 | + pre_function[SEND_DIAGNOSTIC] = NULL; | |
28217 | + post_function[SEND_DIAGNOSTIC] = NULL; | |
28218 | + | |
28219 | + bitmap[READ_CAPACITY] = VSCSIIF_NEED_CMD_EXEC; | |
28220 | + pre_function[READ_CAPACITY] = NULL; | |
28221 | + post_function[READ_CAPACITY] = NULL; | |
28222 | + | |
28223 | + bitmap[READ_10] = VSCSIIF_NEED_CMD_EXEC; | |
28224 | + pre_function[READ_10] = NULL; | |
28225 | + post_function[READ_10] = NULL; | |
28226 | + | |
28227 | + bitmap[WRITE_10] = VSCSIIF_NEED_CMD_EXEC; | |
28228 | + pre_function[WRITE_10] = NULL; | |
28229 | + post_function[WRITE_10] = NULL; | |
28230 | + | |
28231 | + /* | |
28232 | + This command is Full emulation. | |
28233 | + */ | |
28234 | + pre_function[REPORT_LUNS] = __report_luns; | |
28235 | + bitmap[REPORT_LUNS] = (VSCSIIF_NEED_EMULATE_REQBUF | | |
28236 | + VSCSIIF_NEED_EMULATE_RSPBUF); | |
28237 | + | |
28238 | + return; | |
28239 | +} | |
28240 | Index: head-2008-11-25/drivers/xen/scsiback/interface.c | |
28241 | =================================================================== | |
28242 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
28243 | +++ head-2008-11-25/drivers/xen/scsiback/interface.c 2008-07-21 11:00:33.000000000 +0200 | |
28244 | @@ -0,0 +1,182 @@ | |
28245 | +/* | |
28246 | + * interface management. | |
28247 | + * | |
28248 | + * Copyright (c) 2008, FUJITSU Limited | |
28249 | + * | |
28250 | + * Based on the blkback driver code. | |
28251 | + * | |
28252 | + * This program is free software; you can redistribute it and/or | |
28253 | + * modify it under the terms of the GNU General Public License version 2 | |
28254 | + * as published by the Free Software Foundation; or, when distributed | |
28255 | + * separately from the Linux kernel or incorporated into other | |
28256 | + * software packages, subject to the following license: | |
28257 | + * | |
28258 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
28259 | + * of this source file (the "Software"), to deal in the Software without | |
28260 | + * restriction, including without limitation the rights to use, copy, modify, | |
28261 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
28262 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
28263 | + * the following conditions: | |
28264 | + * | |
28265 | + * The above copyright notice and this permission notice shall be included in | |
28266 | + * all copies or substantial portions of the Software. | |
28267 | + * | |
28268 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
28269 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
28270 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
28271 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
28272 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
28273 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
28274 | + * IN THE SOFTWARE. | |
28275 | + */ | |
28276 | + | |
28277 | +#include <scsi/scsi.h> | |
28278 | +#include <scsi/scsi_host.h> | |
28279 | +#include <scsi/scsi_device.h> | |
28280 | +#include "common.h" | |
28281 | + | |
28282 | +#include <xen/evtchn.h> | |
28283 | +#include <linux/kthread.h> | |
28284 | + | |
28285 | + | |
28286 | +static kmem_cache_t *scsiback_cachep; | |
28287 | + | |
28288 | +struct vscsibk_info *vscsibk_info_alloc(domid_t domid) | |
28289 | +{ | |
28290 | + struct vscsibk_info *info; | |
28291 | + | |
28292 | + info = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL); | |
28293 | + if (!info) | |
28294 | + return ERR_PTR(-ENOMEM); | |
28295 | + | |
28296 | + memset(info, 0, sizeof(*info)); | |
28297 | + info->domid = domid; | |
28298 | + spin_lock_init(&info->ring_lock); | |
28299 | + atomic_set(&info->nr_unreplied_reqs, 0); | |
28300 | + init_waitqueue_head(&info->wq); | |
28301 | + init_waitqueue_head(&info->waiting_to_free); | |
28302 | + | |
28303 | + return info; | |
28304 | +} | |
28305 | + | |
28306 | +static int map_frontend_page( struct vscsibk_info *info, | |
28307 | + unsigned long ring_ref) | |
28308 | +{ | |
28309 | + struct gnttab_map_grant_ref op; | |
28310 | + int err; | |
28311 | + | |
28312 | + gnttab_set_map_op(&op, (unsigned long)info->ring_area->addr, | |
28313 | + GNTMAP_host_map, ring_ref, | |
28314 | + info->domid); | |
28315 | + | |
28316 | + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); | |
28317 | + BUG_ON(err); | |
28318 | + | |
28319 | + if (op.status) { | |
28320 | + printk(KERN_ERR "scsiback: Grant table operation failure !\n"); | |
28321 | + return op.status; | |
28322 | + } | |
28323 | + | |
28324 | + info->shmem_ref = ring_ref; | |
28325 | + info->shmem_handle = op.handle; | |
28326 | + | |
28327 | + return (GNTST_okay); | |
28328 | +} | |
28329 | + | |
28330 | +static void unmap_frontend_page(struct vscsibk_info *info) | |
28331 | +{ | |
28332 | + struct gnttab_unmap_grant_ref op; | |
28333 | + int err; | |
28334 | + | |
28335 | + gnttab_set_unmap_op(&op, (unsigned long)info->ring_area->addr, | |
28336 | + GNTMAP_host_map, info->shmem_handle); | |
28337 | + | |
28338 | + err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); | |
28339 | + BUG_ON(err); | |
28340 | + | |
28341 | +} | |
28342 | + | |
28343 | +int scsiback_init_sring(struct vscsibk_info *info, | |
28344 | + unsigned long ring_ref, unsigned int evtchn) | |
28345 | +{ | |
28346 | + struct vscsiif_sring *sring; | |
28347 | + int err; | |
28348 | + | |
28349 | + if (info->irq) { | |
28350 | + printk(KERN_ERR "scsiback: Already connected through?\n"); | |
28351 | + return -1; | |
28352 | + } | |
28353 | + | |
28354 | + info->ring_area = alloc_vm_area(PAGE_SIZE); | |
28355 | + if (!info) | |
28356 | + return -ENOMEM; | |
28357 | + | |
28358 | + err = map_frontend_page(info, ring_ref); | |
28359 | + if (err) | |
28360 | + goto free_vm; | |
28361 | + | |
28362 | + sring = (struct vscsiif_sring *) info->ring_area->addr; | |
28363 | + BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); | |
28364 | + | |
28365 | + err = bind_interdomain_evtchn_to_irqhandler( | |
28366 | + info->domid, evtchn, | |
28367 | + scsiback_intr, 0, "vscsiif-backend", info); | |
28368 | + | |
28369 | + if (err < 0) | |
28370 | + goto unmap_page; | |
28371 | + | |
28372 | + info->irq = err; | |
28373 | + | |
28374 | + return 0; | |
28375 | + | |
28376 | +unmap_page: | |
28377 | + unmap_frontend_page(info); | |
28378 | +free_vm: | |
28379 | + free_vm_area(info->ring_area); | |
28380 | + | |
28381 | + return err; | |
28382 | +} | |
28383 | + | |
28384 | +void scsiback_disconnect(struct vscsibk_info *info) | |
28385 | +{ | |
28386 | + if (info->kthread) { | |
28387 | + kthread_stop(info->kthread); | |
28388 | + info->kthread = NULL; | |
28389 | + } | |
28390 | + | |
28391 | + wait_event(info->waiting_to_free, | |
28392 | + atomic_read(&info->nr_unreplied_reqs) == 0); | |
28393 | + | |
28394 | + if (info->irq) { | |
28395 | + unbind_from_irqhandler(info->irq, info); | |
28396 | + info->irq = 0; | |
28397 | + } | |
28398 | + | |
28399 | + if (info->ring.sring) { | |
28400 | + unmap_frontend_page(info); | |
28401 | + free_vm_area(info->ring_area); | |
28402 | + info->ring.sring = NULL; | |
28403 | + } | |
28404 | +} | |
28405 | + | |
28406 | +void scsiback_free(struct vscsibk_info *info) | |
28407 | +{ | |
28408 | + kmem_cache_free(scsiback_cachep, info); | |
28409 | +} | |
28410 | + | |
28411 | +int __init scsiback_interface_init(void) | |
28412 | +{ | |
28413 | + scsiback_cachep = kmem_cache_create("vscsiif_cache", | |
28414 | + sizeof(struct vscsibk_info), 0, 0, NULL, NULL); | |
28415 | + if (!scsiback_cachep) { | |
28416 | + printk(KERN_ERR "scsiback: can't init scsi cache\n"); | |
28417 | + return -ENOMEM; | |
28418 | + } | |
28419 | + | |
28420 | + return 0; | |
28421 | +} | |
28422 | + | |
28423 | +void scsiback_interface_exit(void) | |
28424 | +{ | |
28425 | + kmem_cache_destroy(scsiback_cachep); | |
28426 | +} | |
28427 | Index: head-2008-11-25/drivers/xen/scsiback/scsiback.c | |
28428 | =================================================================== | |
28429 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
28430 | +++ head-2008-11-25/drivers/xen/scsiback/scsiback.c 2008-07-21 11:00:33.000000000 +0200 | |
28431 | @@ -0,0 +1,717 @@ | |
28432 | +/* | |
28433 | + * Xen SCSI backend driver | |
28434 | + * | |
28435 | + * Copyright (c) 2008, FUJITSU Limited | |
28436 | + * | |
28437 | + * Based on the blkback driver code. | |
28438 | + * | |
28439 | + * This program is free software; you can redistribute it and/or | |
28440 | + * modify it under the terms of the GNU General Public License version 2 | |
28441 | + * as published by the Free Software Foundation; or, when distributed | |
28442 | + * separately from the Linux kernel or incorporated into other | |
28443 | + * software packages, subject to the following license: | |
28444 | + * | |
28445 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
28446 | + * of this source file (the "Software"), to deal in the Software without | |
28447 | + * restriction, including without limitation the rights to use, copy, modify, | |
28448 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
28449 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
28450 | + * the following conditions: | |
28451 | + * | |
28452 | + * The above copyright notice and this permission notice shall be included in | |
28453 | + * all copies or substantial portions of the Software. | |
28454 | + * | |
28455 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
28456 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
28457 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
28458 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
28459 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
28460 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
28461 | + * IN THE SOFTWARE. | |
28462 | + */ | |
28463 | + | |
28464 | +#include <linux/spinlock.h> | |
28465 | +#include <linux/kthread.h> | |
28466 | +#include <linux/list.h> | |
28467 | +#include <linux/delay.h> | |
28468 | +#include <xen/balloon.h> | |
28469 | +#include <asm/hypervisor.h> | |
28470 | +#include <scsi/scsi.h> | |
28471 | +#include <scsi/scsi_cmnd.h> | |
28472 | +#include <scsi/scsi_host.h> | |
28473 | +#include <scsi/scsi_device.h> | |
28474 | +#include <scsi/scsi_dbg.h> | |
28475 | +#include <scsi/scsi_eh.h> | |
28476 | + | |
28477 | +#include "common.h" | |
28478 | + | |
28479 | + | |
28480 | +struct list_head pending_free; | |
28481 | +DEFINE_SPINLOCK(pending_free_lock); | |
28482 | +DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); | |
28483 | + | |
28484 | +int vscsiif_reqs = VSCSIIF_BACK_MAX_PENDING_REQS; | |
28485 | +module_param_named(reqs, vscsiif_reqs, int, 0); | |
28486 | +MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate"); | |
28487 | + | |
28488 | +static unsigned int log_print_stat = 0; | |
28489 | +module_param(log_print_stat, int, 0644); | |
28490 | + | |
28491 | +#define SCSIBACK_INVALID_HANDLE (~0) | |
28492 | + | |
28493 | +static pending_req_t *pending_reqs; | |
28494 | +static struct page **pending_pages; | |
28495 | +static grant_handle_t *pending_grant_handles; | |
28496 | + | |
28497 | +static int vaddr_pagenr(pending_req_t *req, int seg) | |
28498 | +{ | |
28499 | + return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg; | |
28500 | +} | |
28501 | + | |
28502 | +static unsigned long vaddr(pending_req_t *req, int seg) | |
28503 | +{ | |
28504 | + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); | |
28505 | + return (unsigned long)pfn_to_kaddr(pfn); | |
28506 | +} | |
28507 | + | |
28508 | +#define pending_handle(_req, _seg) \ | |
28509 | + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) | |
28510 | + | |
28511 | + | |
28512 | +void scsiback_fast_flush_area(pending_req_t *req) | |
28513 | +{ | |
28514 | + struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE]; | |
28515 | + unsigned int i, invcount = 0; | |
28516 | + grant_handle_t handle; | |
28517 | + int err; | |
28518 | + | |
28519 | + if (req->nr_segments) { | |
28520 | + for (i = 0; i < req->nr_segments; i++) { | |
28521 | + handle = pending_handle(req, i); | |
28522 | + if (handle == SCSIBACK_INVALID_HANDLE) | |
28523 | + continue; | |
28524 | + gnttab_set_unmap_op(&unmap[i], vaddr(req, i), | |
28525 | + GNTMAP_host_map, handle); | |
28526 | + pending_handle(req, i) = SCSIBACK_INVALID_HANDLE; | |
28527 | + invcount++; | |
28528 | + } | |
28529 | + | |
28530 | + err = HYPERVISOR_grant_table_op( | |
28531 | + GNTTABOP_unmap_grant_ref, unmap, invcount); | |
28532 | + BUG_ON(err); | |
28533 | + kfree(req->sgl); | |
28534 | + } | |
28535 | + | |
28536 | + return; | |
28537 | +} | |
28538 | + | |
28539 | + | |
28540 | +static pending_req_t * alloc_req(struct vscsibk_info *info) | |
28541 | +{ | |
28542 | + pending_req_t *req = NULL; | |
28543 | + unsigned long flags; | |
28544 | + | |
28545 | + spin_lock_irqsave(&pending_free_lock, flags); | |
28546 | + if (!list_empty(&pending_free)) { | |
28547 | + req = list_entry(pending_free.next, pending_req_t, free_list); | |
28548 | + list_del(&req->free_list); | |
28549 | + } | |
28550 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
28551 | + return req; | |
28552 | +} | |
28553 | + | |
28554 | + | |
28555 | +static void free_req(pending_req_t *req) | |
28556 | +{ | |
28557 | + unsigned long flags; | |
28558 | + int was_empty; | |
28559 | + | |
28560 | + spin_lock_irqsave(&pending_free_lock, flags); | |
28561 | + was_empty = list_empty(&pending_free); | |
28562 | + list_add(&req->free_list, &pending_free); | |
28563 | + spin_unlock_irqrestore(&pending_free_lock, flags); | |
28564 | + if (was_empty) | |
28565 | + wake_up(&pending_free_wq); | |
28566 | +} | |
28567 | + | |
28568 | + | |
28569 | +static void scsiback_notify_work(struct vscsibk_info *info) | |
28570 | +{ | |
28571 | + info->waiting_reqs = 1; | |
28572 | + wake_up(&info->wq); | |
28573 | +} | |
28574 | + | |
28575 | +void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result, | |
28576 | + uint32_t resid, pending_req_t *pending_req) | |
28577 | +{ | |
28578 | + vscsiif_response_t *ring_res; | |
28579 | + struct vscsibk_info *info = pending_req->info; | |
28580 | + int notify; | |
28581 | + int more_to_do = 1; | |
28582 | + unsigned long flags; | |
28583 | + | |
28584 | + DPRINTK("%s\n",__FUNCTION__); | |
28585 | + | |
28586 | + spin_lock_irqsave(&info->ring_lock, flags); | |
28587 | + | |
28588 | + ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt); | |
28589 | + info->ring.rsp_prod_pvt++; | |
28590 | + | |
28591 | + ring_res->rslt = result; | |
28592 | + ring_res->rqid = pending_req->rqid; | |
28593 | + | |
28594 | + if (sense_buffer != NULL) { | |
28595 | + memcpy(ring_res->sense_buffer, sense_buffer, | |
28596 | + VSCSIIF_SENSE_BUFFERSIZE); | |
28597 | + ring_res->sense_len = VSCSIIF_SENSE_BUFFERSIZE; | |
28598 | + } else { | |
28599 | + ring_res->sense_len = 0; | |
28600 | + } | |
28601 | + | |
28602 | + ring_res->residual_len = resid; | |
28603 | + | |
28604 | + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify); | |
28605 | + if (info->ring.rsp_prod_pvt == info->ring.req_cons) { | |
28606 | + RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do); | |
28607 | + } else if (RING_HAS_UNCONSUMED_REQUESTS(&info->ring)) { | |
28608 | + more_to_do = 1; | |
28609 | + } | |
28610 | + | |
28611 | + spin_unlock_irqrestore(&info->ring_lock, flags); | |
28612 | + | |
28613 | + if (more_to_do) | |
28614 | + scsiback_notify_work(info); | |
28615 | + | |
28616 | + if (notify) | |
28617 | + notify_remote_via_irq(info->irq); | |
28618 | + | |
28619 | + free_req(pending_req); | |
28620 | +} | |
28621 | + | |
28622 | +static void scsiback_print_status(char *sense_buffer, int errors, | |
28623 | + pending_req_t *pending_req) | |
28624 | +{ | |
28625 | + struct scsi_device *sdev = pending_req->sdev; | |
28626 | + | |
28627 | + printk(KERN_ERR "scsiback: %d:%d:%d:%d ",sdev->host->host_no, | |
28628 | + sdev->channel, sdev->id, sdev->lun); | |
28629 | + printk(KERN_ERR "status = 0x%02x, message = 0x%02x, host = 0x%02x, driver = 0x%02x\n", | |
28630 | + status_byte(errors), msg_byte(errors), | |
28631 | + host_byte(errors), driver_byte(errors)); | |
28632 | + | |
28633 | + printk(KERN_ERR "scsiback: cmnd[0]=0x%02X\n", | |
28634 | + pending_req->cmnd[0]); | |
28635 | + | |
28636 | + if (CHECK_CONDITION & status_byte(errors)) | |
28637 | + __scsi_print_sense("scsiback", sense_buffer, SCSI_SENSE_BUFFERSIZE); | |
28638 | +} | |
28639 | + | |
28640 | + | |
28641 | +static void scsiback_cmd_done(struct request *req, int errors) | |
28642 | +{ | |
28643 | + pending_req_t *pending_req = req->end_io_data; | |
28644 | + unsigned char *sense_buffer; | |
28645 | + unsigned int resid; | |
28646 | + | |
28647 | + sense_buffer = req->sense; | |
28648 | + resid = req->data_len; | |
28649 | + | |
28650 | + if (errors != 0) { | |
28651 | + if (log_print_stat) | |
28652 | + scsiback_print_status(sense_buffer, errors, pending_req); | |
28653 | + } | |
28654 | + | |
28655 | + scsiback_rsp_emulation(pending_req); | |
28656 | + | |
28657 | + scsiback_fast_flush_area(pending_req); | |
28658 | + scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req); | |
28659 | + scsiback_put(pending_req->info); | |
28660 | + | |
28661 | + __blk_put_request(req->q, req); | |
28662 | +} | |
28663 | + | |
28664 | + | |
28665 | +static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req, | |
28666 | + pending_req_t *pending_req) | |
28667 | +{ | |
28668 | + u32 flags; | |
28669 | + int write; | |
28670 | + int i, err = 0; | |
28671 | + unsigned int data_len = 0; | |
28672 | + struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE]; | |
28673 | + struct vscsibk_info *info = pending_req->info; | |
28674 | + | |
28675 | + int data_dir = (int)pending_req->sc_data_direction; | |
28676 | + unsigned int nr_segments = (unsigned int)pending_req->nr_segments; | |
28677 | + | |
28678 | + write = (data_dir == DMA_TO_DEVICE); | |
28679 | + | |
28680 | + if (nr_segments) { | |
28681 | + /* free of (sgl) in fast_flush_area()*/ | |
28682 | + pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments, | |
28683 | + GFP_KERNEL); | |
28684 | + if (!pending_req->sgl) { | |
28685 | + printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__); | |
28686 | + return -ENOMEM; | |
28687 | + } | |
28688 | + | |
28689 | + for (i = 0; i < nr_segments; i++) { | |
28690 | + flags = GNTMAP_host_map; | |
28691 | + if (write) | |
28692 | + flags |= GNTMAP_readonly; | |
28693 | + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | |
28694 | + ring_req->seg[i].gref, | |
28695 | + info->domid); | |
28696 | + } | |
28697 | + | |
28698 | + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments); | |
28699 | + BUG_ON(err); | |
28700 | + | |
28701 | + for (i = 0; i < nr_segments; i++) { | |
28702 | + if (unlikely(map[i].status != 0)) { | |
28703 | + printk(KERN_ERR "scsiback: invalid buffer -- could not remap it\n"); | |
28704 | + map[i].handle = SCSIBACK_INVALID_HANDLE; | |
28705 | + err |= 1; | |
28706 | + } | |
28707 | + | |
28708 | + pending_handle(pending_req, i) = map[i].handle; | |
28709 | + | |
28710 | + if (err) | |
28711 | + continue; | |
28712 | + | |
28713 | + set_phys_to_machine(__pa(vaddr( | |
28714 | + pending_req, i)) >> PAGE_SHIFT, | |
28715 | + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); | |
28716 | + | |
28717 | + pending_req->sgl[i].page = virt_to_page(vaddr(pending_req, i)); | |
28718 | + pending_req->sgl[i].offset = ring_req->seg[i].offset; | |
28719 | + pending_req->sgl[i].length = ring_req->seg[i].length; | |
28720 | + data_len += pending_req->sgl[i].length; | |
28721 | + | |
28722 | + barrier(); | |
28723 | + if (pending_req->sgl[i].offset >= PAGE_SIZE || | |
28724 | + pending_req->sgl[i].length > PAGE_SIZE || | |
28725 | + pending_req->sgl[i].offset + pending_req->sgl[i].length > PAGE_SIZE) | |
28726 | + err |= 1; | |
28727 | + | |
28728 | + } | |
28729 | + | |
28730 | + if (err) | |
28731 | + goto fail_flush; | |
28732 | + } | |
28733 | + | |
28734 | + pending_req->request_bufflen = data_len; | |
28735 | + | |
28736 | + return 0; | |
28737 | + | |
28738 | +fail_flush: | |
28739 | + scsiback_fast_flush_area(pending_req); | |
28740 | + return -ENOMEM; | |
28741 | +} | |
28742 | + | |
28743 | +/* quoted scsi_lib.c/scsi_merge_bio */ | |
28744 | +static int scsiback_merge_bio(struct request *rq, struct bio *bio) | |
28745 | +{ | |
28746 | + struct request_queue *q = rq->q; | |
28747 | + | |
28748 | + bio->bi_flags &= ~(1 << BIO_SEG_VALID); | |
28749 | + if (rq_data_dir(rq) == WRITE) | |
28750 | + bio->bi_rw |= (1 << BIO_RW); | |
28751 | + | |
28752 | + blk_queue_bounce(q, &bio); | |
28753 | + | |
28754 | + if (!rq->bio) | |
28755 | + blk_rq_bio_prep(q, rq, bio); | |
28756 | + else if (!q->back_merge_fn(q, rq, bio)) | |
28757 | + return -EINVAL; | |
28758 | + else { | |
28759 | + rq->biotail->bi_next = bio; | |
28760 | + rq->biotail = bio; | |
28761 | + rq->hard_nr_sectors += bio_sectors(bio); | |
28762 | + rq->nr_sectors = rq->hard_nr_sectors; | |
28763 | + } | |
28764 | + | |
28765 | + return 0; | |
28766 | +} | |
28767 | + | |
28768 | + | |
28769 | +/* quoted scsi_lib.c/scsi_bi_endio */ | |
28770 | +static int scsiback_bi_endio(struct bio *bio, unsigned int bytes_done, int error) | |
28771 | +{ | |
28772 | + if (bio->bi_size) | |
28773 | + return 1; | |
28774 | + | |
28775 | + bio_put(bio); | |
28776 | + return 0; | |
28777 | +} | |
28778 | + | |
28779 | + | |
28780 | + | |
28781 | +/* quoted scsi_lib.c/scsi_req_map_sg . */ | |
28782 | +static int request_map_sg(struct request *rq, pending_req_t *pending_req, unsigned int count) | |
28783 | +{ | |
28784 | + struct request_queue *q = rq->q; | |
28785 | + int nr_pages; | |
28786 | + unsigned int nsegs = count; | |
28787 | + | |
28788 | + unsigned int data_len = 0, len, bytes, off; | |
28789 | + struct page *page; | |
28790 | + struct bio *bio = NULL; | |
28791 | + int i, err, nr_vecs = 0; | |
28792 | + | |
28793 | + for (i = 0; i < nsegs; i++) { | |
28794 | + page = pending_req->sgl[i].page; | |
28795 | + off = (unsigned int)pending_req->sgl[i].offset; | |
28796 | + len = (unsigned int)pending_req->sgl[i].length; | |
28797 | + data_len += len; | |
28798 | + | |
28799 | + nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
28800 | + while (len > 0) { | |
28801 | + bytes = min_t(unsigned int, len, PAGE_SIZE - off); | |
28802 | + | |
28803 | + if (!bio) { | |
28804 | + nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages); | |
28805 | + nr_pages -= nr_vecs; | |
28806 | + bio = bio_alloc(GFP_KERNEL, nr_vecs); | |
28807 | + if (!bio) { | |
28808 | + err = -ENOMEM; | |
28809 | + goto free_bios; | |
28810 | + } | |
28811 | + bio->bi_end_io = scsiback_bi_endio; | |
28812 | + } | |
28813 | + | |
28814 | + if (bio_add_pc_page(q, bio, page, bytes, off) != | |
28815 | + bytes) { | |
28816 | + bio_put(bio); | |
28817 | + err = -EINVAL; | |
28818 | + goto free_bios; | |
28819 | + } | |
28820 | + | |
28821 | + if (bio->bi_vcnt >= nr_vecs) { | |
28822 | + err = scsiback_merge_bio(rq, bio); | |
28823 | + if (err) { | |
28824 | + bio_endio(bio, bio->bi_size, 0); | |
28825 | + goto free_bios; | |
28826 | + } | |
28827 | + bio = NULL; | |
28828 | + } | |
28829 | + | |
28830 | + page++; | |
28831 | + len -= bytes; | |
28832 | + off = 0; | |
28833 | + } | |
28834 | + } | |
28835 | + | |
28836 | + rq->buffer = rq->data = NULL; | |
28837 | + rq->data_len = data_len; | |
28838 | + | |
28839 | + return 0; | |
28840 | + | |
28841 | +free_bios: | |
28842 | + while ((bio = rq->bio) != NULL) { | |
28843 | + rq->bio = bio->bi_next; | |
28844 | + /* | |
28845 | + * call endio instead of bio_put incase it was bounced | |
28846 | + */ | |
28847 | + bio_endio(bio, bio->bi_size, 0); | |
28848 | + } | |
28849 | + | |
28850 | + return err; | |
28851 | +} | |
28852 | + | |
28853 | + | |
28854 | +void scsiback_cmd_exec(pending_req_t *pending_req) | |
28855 | +{ | |
28856 | + int cmd_len = (int)pending_req->cmd_len; | |
28857 | + int data_dir = (int)pending_req->sc_data_direction; | |
28858 | + unsigned int nr_segments = (unsigned int)pending_req->nr_segments; | |
28859 | + unsigned int timeout; | |
28860 | + struct request *rq; | |
28861 | + int write; | |
28862 | + | |
28863 | + DPRINTK("%s\n",__FUNCTION__); | |
28864 | + | |
28865 | + /* because it doesn't timeout backend earlier than frontend.*/ | |
28866 | + if (pending_req->timeout_per_command) | |
28867 | + timeout = pending_req->timeout_per_command * HZ; | |
28868 | + else | |
28869 | + timeout = VSCSIIF_TIMEOUT; | |
28870 | + | |
28871 | + write = (data_dir == DMA_TO_DEVICE); | |
28872 | + rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL); | |
28873 | + | |
28874 | + rq->flags |= REQ_BLOCK_PC; | |
28875 | + rq->cmd_len = cmd_len; | |
28876 | + memcpy(rq->cmd, pending_req->cmnd, cmd_len); | |
28877 | + | |
28878 | + memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE); | |
28879 | + rq->sense = pending_req->sense_buffer; | |
28880 | + rq->sense_len = 0; | |
28881 | + | |
28882 | + /* not allowed to retry in backend. */ | |
28883 | + rq->retries = 0; | |
28884 | + rq->timeout = timeout; | |
28885 | + rq->end_io_data = pending_req; | |
28886 | + | |
28887 | + if (nr_segments) { | |
28888 | + | |
28889 | + if (request_map_sg(rq, pending_req, nr_segments)) { | |
28890 | + printk(KERN_ERR "scsiback: SG Request Map Error\n"); | |
28891 | + return; | |
28892 | + } | |
28893 | + } | |
28894 | + | |
28895 | + scsiback_get(pending_req->info); | |
28896 | + blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done); | |
28897 | + | |
28898 | + return ; | |
28899 | +} | |
28900 | + | |
28901 | + | |
28902 | +static void scsiback_device_reset_exec(pending_req_t *pending_req) | |
28903 | +{ | |
28904 | + struct vscsibk_info *info = pending_req->info; | |
28905 | + int err; | |
28906 | + struct scsi_device *sdev = pending_req->sdev; | |
28907 | + | |
28908 | + scsiback_get(info); | |
28909 | + err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE); | |
28910 | + | |
28911 | + scsiback_do_resp_with_sense(NULL, err, 0, pending_req); | |
28912 | + scsiback_put(info); | |
28913 | + | |
28914 | + return; | |
28915 | +} | |
28916 | + | |
28917 | + | |
28918 | +irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs) | |
28919 | +{ | |
28920 | + scsiback_notify_work((struct vscsibk_info *)dev_id); | |
28921 | + return IRQ_HANDLED; | |
28922 | +} | |
28923 | + | |
28924 | +static int prepare_pending_reqs(struct vscsibk_info *info, | |
28925 | + vscsiif_request_t *ring_req, pending_req_t *pending_req) | |
28926 | +{ | |
28927 | + struct scsi_device *sdev; | |
28928 | + struct ids_tuple vir; | |
28929 | + int err = -EINVAL; | |
28930 | + | |
28931 | + DPRINTK("%s\n",__FUNCTION__); | |
28932 | + | |
28933 | + pending_req->rqid = ring_req->rqid; | |
28934 | + pending_req->act = ring_req->act; | |
28935 | + | |
28936 | + pending_req->info = info; | |
28937 | + | |
28938 | + vir.chn = ring_req->channel; | |
28939 | + vir.tgt = ring_req->id; | |
28940 | + vir.lun = ring_req->lun; | |
28941 | + | |
28942 | + rmb(); | |
28943 | + sdev = scsiback_do_translation(info, &vir); | |
28944 | + if (!sdev) { | |
28945 | + pending_req->sdev = NULL; | |
28946 | + DPRINTK("scsiback: doesn't exist.\n"); | |
28947 | + err = -ENODEV; | |
28948 | + goto invalid_value; | |
28949 | + } | |
28950 | + pending_req->sdev = sdev; | |
28951 | + | |
28952 | + /* request range check from frontend */ | |
28953 | + pending_req->sc_data_direction = ring_req->sc_data_direction; | |
28954 | + barrier(); | |
28955 | + if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) && | |
28956 | + (pending_req->sc_data_direction != DMA_TO_DEVICE) && | |
28957 | + (pending_req->sc_data_direction != DMA_FROM_DEVICE) && | |
28958 | + (pending_req->sc_data_direction != DMA_NONE)) { | |
28959 | + DPRINTK("scsiback: invalid parameter data_dir = %d\n", | |
28960 | + pending_req->sc_data_direction); | |
28961 | + err = -EINVAL; | |
28962 | + goto invalid_value; | |
28963 | + } | |
28964 | + | |
28965 | + pending_req->nr_segments = ring_req->nr_segments; | |
28966 | + barrier(); | |
28967 | + if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) { | |
28968 | + DPRINTK("scsiback: invalid parameter nr_seg = %d\n", | |
28969 | + pending_req->nr_segments); | |
28970 | + err = -EINVAL; | |
28971 | + goto invalid_value; | |
28972 | + } | |
28973 | + | |
28974 | + pending_req->cmd_len = ring_req->cmd_len; | |
28975 | + barrier(); | |
28976 | + if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) { | |
28977 | + DPRINTK("scsiback: invalid parameter cmd_len = %d\n", | |
28978 | + pending_req->cmd_len); | |
28979 | + err = -EINVAL; | |
28980 | + goto invalid_value; | |
28981 | + } | |
28982 | + memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len); | |
28983 | + | |
28984 | + pending_req->timeout_per_command = ring_req->timeout_per_command; | |
28985 | + | |
28986 | + if(scsiback_gnttab_data_map(ring_req, pending_req)) { | |
28987 | + DPRINTK("scsiback: invalid buffer\n"); | |
28988 | + err = -EINVAL; | |
28989 | + goto invalid_value; | |
28990 | + } | |
28991 | + | |
28992 | + return 0; | |
28993 | + | |
28994 | +invalid_value: | |
28995 | + return err; | |
28996 | +} | |
28997 | + | |
28998 | + | |
28999 | +static int scsiback_do_cmd_fn(struct vscsibk_info *info) | |
29000 | +{ | |
29001 | + struct vscsiif_back_ring *ring = &info->ring; | |
29002 | + vscsiif_request_t *ring_req; | |
29003 | + | |
29004 | + pending_req_t *pending_req; | |
29005 | + RING_IDX rc, rp; | |
29006 | + int err, more_to_do = 0; | |
29007 | + | |
29008 | + DPRINTK("%s\n",__FUNCTION__); | |
29009 | + | |
29010 | + rc = ring->req_cons; | |
29011 | + rp = ring->sring->req_prod; | |
29012 | + rmb(); | |
29013 | + | |
29014 | + while ((rc != rp)) { | |
29015 | + if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) | |
29016 | + break; | |
29017 | + pending_req = alloc_req(info); | |
29018 | + if (NULL == pending_req) { | |
29019 | + more_to_do = 1; | |
29020 | + break; | |
29021 | + } | |
29022 | + | |
29023 | + ring_req = RING_GET_REQUEST(ring, rc); | |
29024 | + ring->req_cons = ++rc; | |
29025 | + | |
29026 | + err = prepare_pending_reqs(info, ring_req, | |
29027 | + pending_req); | |
29028 | + if (err == -EINVAL) { | |
29029 | + scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24), | |
29030 | + 0, pending_req); | |
29031 | + continue; | |
29032 | + } else if (err == -ENODEV) { | |
29033 | + scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << 16), | |
29034 | + 0, pending_req); | |
29035 | + continue; | |
29036 | + } | |
29037 | + | |
29038 | + if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) { | |
29039 | + scsiback_req_emulation_or_cmdexec(pending_req); | |
29040 | + } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) { | |
29041 | + scsiback_device_reset_exec(pending_req); | |
29042 | + } else { | |
29043 | + printk(KERN_ERR "scsiback: invalid parameter for request\n"); | |
29044 | + scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24), | |
29045 | + 0, pending_req); | |
29046 | + continue; | |
29047 | + } | |
29048 | + } | |
29049 | + | |
29050 | + if (RING_HAS_UNCONSUMED_REQUESTS(ring)) | |
29051 | + more_to_do = 1; | |
29052 | + | |
29053 | + /* Yield point for this unbounded loop. */ | |
29054 | + cond_resched(); | |
29055 | + | |
29056 | + return more_to_do; | |
29057 | +} | |
29058 | + | |
29059 | + | |
29060 | +int scsiback_schedule(void *data) | |
29061 | +{ | |
29062 | + struct vscsibk_info *info = (struct vscsibk_info *)data; | |
29063 | + | |
29064 | + DPRINTK("%s\n",__FUNCTION__); | |
29065 | + | |
29066 | + while (!kthread_should_stop()) { | |
29067 | + wait_event_interruptible( | |
29068 | + info->wq, | |
29069 | + info->waiting_reqs || kthread_should_stop()); | |
29070 | + wait_event_interruptible( | |
29071 | + pending_free_wq, | |
29072 | + !list_empty(&pending_free) || kthread_should_stop()); | |
29073 | + | |
29074 | + info->waiting_reqs = 0; | |
29075 | + smp_mb(); | |
29076 | + | |
29077 | + if (scsiback_do_cmd_fn(info)) | |
29078 | + info->waiting_reqs = 1; | |
29079 | + } | |
29080 | + | |
29081 | + return 0; | |
29082 | +} | |
29083 | + | |
29084 | + | |
29085 | +static int __init scsiback_init(void) | |
29086 | +{ | |
29087 | + int i, mmap_pages; | |
29088 | + | |
29089 | + if (!is_running_on_xen()) | |
29090 | + return -ENODEV; | |
29091 | + | |
29092 | + mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE; | |
29093 | + | |
29094 | + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * | |
29095 | + vscsiif_reqs, GFP_KERNEL); | |
29096 | + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * | |
29097 | + mmap_pages, GFP_KERNEL); | |
29098 | + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); | |
29099 | + | |
29100 | + if (!pending_reqs || !pending_grant_handles || !pending_pages) | |
29101 | + goto out_of_memory; | |
29102 | + | |
29103 | + for (i = 0; i < mmap_pages; i++) | |
29104 | + pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE; | |
29105 | + | |
29106 | + if (scsiback_interface_init() < 0) | |
29107 | + goto out_of_kmem; | |
29108 | + | |
29109 | + memset(pending_reqs, 0, sizeof(pending_reqs)); | |
29110 | + INIT_LIST_HEAD(&pending_free); | |
29111 | + | |
29112 | + for (i = 0; i < vscsiif_reqs; i++) | |
29113 | + list_add_tail(&pending_reqs[i].free_list, &pending_free); | |
29114 | + | |
29115 | + if (scsiback_xenbus_init()) | |
29116 | + goto out_of_xenbus; | |
29117 | + | |
29118 | + scsiback_emulation_init(); | |
29119 | + | |
29120 | + return 0; | |
29121 | + | |
29122 | +out_of_xenbus: | |
29123 | + scsiback_xenbus_unregister(); | |
29124 | +out_of_kmem: | |
29125 | + scsiback_interface_exit(); | |
29126 | +out_of_memory: | |
29127 | + kfree(pending_reqs); | |
29128 | + kfree(pending_grant_handles); | |
29129 | + free_empty_pages_and_pagevec(pending_pages, mmap_pages); | |
29130 | + printk(KERN_ERR "scsiback: %s: out of memory\n", __FUNCTION__); | |
29131 | + return -ENOMEM; | |
29132 | +} | |
29133 | + | |
29134 | +static void __exit scsiback_exit(void) | |
29135 | +{ | |
29136 | + scsiback_xenbus_unregister(); | |
29137 | + scsiback_interface_exit(); | |
29138 | + kfree(pending_reqs); | |
29139 | + kfree(pending_grant_handles); | |
29140 | + free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * VSCSIIF_SG_TABLESIZE)); | |
29141 | + | |
29142 | +} | |
29143 | + | |
29144 | +module_init(scsiback_init); | |
29145 | +module_exit(scsiback_exit); | |
29146 | + | |
29147 | +MODULE_DESCRIPTION("Xen SCSI backend driver"); | |
29148 | +MODULE_LICENSE("Dual BSD/GPL"); | |
29149 | Index: head-2008-11-25/drivers/xen/scsiback/translate.c | |
29150 | =================================================================== | |
29151 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
29152 | +++ head-2008-11-25/drivers/xen/scsiback/translate.c 2008-07-21 11:00:33.000000000 +0200 | |
29153 | @@ -0,0 +1,168 @@ | |
29154 | +/* | |
29155 | + * Xen SCSI backend driver | |
29156 | + * | |
29157 | + * Copyright (c) 2008, FUJITSU Limited | |
29158 | + * | |
29159 | + * This program is free software; you can redistribute it and/or | |
29160 | + * modify it under the terms of the GNU General Public License version 2 | |
29161 | + * as published by the Free Software Foundation; or, when distributed | |
29162 | + * separately from the Linux kernel or incorporated into other | |
29163 | + * software packages, subject to the following license: | |
29164 | + * | |
29165 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
29166 | + * of this source file (the "Software"), to deal in the Software without | |
29167 | + * restriction, including without limitation the rights to use, copy, modify, | |
29168 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
29169 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
29170 | + * the following conditions: | |
29171 | + * | |
29172 | + * The above copyright notice and this permission notice shall be included in | |
29173 | + * all copies or substantial portions of the Software. | |
29174 | + * | |
29175 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
29176 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
29177 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
29178 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
29179 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29180 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
29181 | + * IN THE SOFTWARE. | |
29182 | + */ | |
29183 | + | |
29184 | +#include <linux/list.h> | |
29185 | +#include <linux/gfp.h> | |
29186 | + | |
29187 | +#include "common.h" | |
29188 | + | |
29189 | +/* | |
29190 | + Initialize the translation entry list | |
29191 | +*/ | |
29192 | +void scsiback_init_translation_table(struct vscsibk_info *info) | |
29193 | +{ | |
29194 | + INIT_LIST_HEAD(&info->v2p_entry_lists); | |
29195 | + spin_lock_init(&info->v2p_lock); | |
29196 | +} | |
29197 | + | |
29198 | + | |
29199 | +/* | |
29200 | + Add a new translation entry | |
29201 | +*/ | |
29202 | +int scsiback_add_translation_entry(struct vscsibk_info *info, | |
29203 | + struct scsi_device *sdev, struct ids_tuple *v) | |
29204 | +{ | |
29205 | + int err = 0; | |
29206 | + struct v2p_entry *entry; | |
29207 | + struct v2p_entry *new; | |
29208 | + struct list_head *head = &(info->v2p_entry_lists); | |
29209 | + unsigned long flags; | |
29210 | + | |
29211 | + spin_lock_irqsave(&info->v2p_lock, flags); | |
29212 | + | |
29213 | + /* Check double assignment to identical virtual ID */ | |
29214 | + list_for_each_entry(entry, head, l) { | |
29215 | + if ((entry->v.chn == v->chn) && | |
29216 | + (entry->v.tgt == v->tgt) && | |
29217 | + (entry->v.lun == v->lun)) { | |
29218 | + printk(KERN_WARNING "scsiback: Virtual ID is already used. " | |
29219 | + "Assignment was not performed.\n"); | |
29220 | + err = -EEXIST; | |
29221 | + goto out; | |
29222 | + } | |
29223 | + | |
29224 | + } | |
29225 | + | |
29226 | + /* Create a new translation entry and add to the list */ | |
29227 | + if ((new = kmalloc(sizeof(struct v2p_entry), GFP_ATOMIC)) == NULL) { | |
29228 | + printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__); | |
29229 | + err = -ENOMEM; | |
29230 | + goto out; | |
29231 | + } | |
29232 | + new->v = *v; | |
29233 | + new->sdev = sdev; | |
29234 | + list_add_tail(&new->l, head); | |
29235 | + | |
29236 | +out: | |
29237 | + spin_unlock_irqrestore(&info->v2p_lock, flags); | |
29238 | + return err; | |
29239 | +} | |
29240 | + | |
29241 | + | |
29242 | +/* | |
29243 | + Delete the translation entry specfied | |
29244 | +*/ | |
29245 | +int scsiback_del_translation_entry(struct vscsibk_info *info, | |
29246 | + struct ids_tuple *v) | |
29247 | +{ | |
29248 | + struct v2p_entry *entry; | |
29249 | + struct list_head *head = &(info->v2p_entry_lists); | |
29250 | + unsigned long flags; | |
29251 | + | |
29252 | + spin_lock_irqsave(&info->v2p_lock, flags); | |
29253 | + /* Find out the translation entry specified */ | |
29254 | + list_for_each_entry(entry, head, l) { | |
29255 | + if ((entry->v.chn == v->chn) && | |
29256 | + (entry->v.tgt == v->tgt) && | |
29257 | + (entry->v.lun == v->lun)) { | |
29258 | + goto found; | |
29259 | + } | |
29260 | + } | |
29261 | + | |
29262 | + spin_unlock_irqrestore(&info->v2p_lock, flags); | |
29263 | + return 1; | |
29264 | + | |
29265 | +found: | |
29266 | + /* Delete the translation entry specfied */ | |
29267 | + scsi_device_put(entry->sdev); | |
29268 | + list_del(&entry->l); | |
29269 | + kfree(entry); | |
29270 | + | |
29271 | + spin_unlock_irqrestore(&info->v2p_lock, flags); | |
29272 | + return 0; | |
29273 | +} | |
29274 | + | |
29275 | + | |
29276 | +/* | |
29277 | + Perform virtual to physical translation | |
29278 | +*/ | |
29279 | +struct scsi_device *scsiback_do_translation(struct vscsibk_info *info, | |
29280 | + struct ids_tuple *v) | |
29281 | +{ | |
29282 | + struct v2p_entry *entry; | |
29283 | + struct list_head *head = &(info->v2p_entry_lists); | |
29284 | + struct scsi_device *sdev = NULL; | |
29285 | + unsigned long flags; | |
29286 | + | |
29287 | + spin_lock_irqsave(&info->v2p_lock, flags); | |
29288 | + list_for_each_entry(entry, head, l) { | |
29289 | + if ((entry->v.chn == v->chn) && | |
29290 | + (entry->v.tgt == v->tgt) && | |
29291 | + (entry->v.lun == v->lun)) { | |
29292 | + sdev = entry->sdev; | |
29293 | + goto out; | |
29294 | + } | |
29295 | + } | |
29296 | +out: | |
29297 | + spin_unlock_irqrestore(&info->v2p_lock, flags); | |
29298 | + return sdev; | |
29299 | +} | |
29300 | + | |
29301 | + | |
29302 | +/* | |
29303 | + Release the translation entry specfied | |
29304 | +*/ | |
29305 | +void scsiback_release_translation_entry(struct vscsibk_info *info) | |
29306 | +{ | |
29307 | + struct v2p_entry *entry, *tmp; | |
29308 | + struct list_head *head = &(info->v2p_entry_lists); | |
29309 | + unsigned long flags; | |
29310 | + | |
29311 | + spin_lock_irqsave(&info->v2p_lock, flags); | |
29312 | + list_for_each_entry_safe(entry, tmp, head, l) { | |
29313 | + scsi_device_put(entry->sdev); | |
29314 | + list_del(&entry->l); | |
29315 | + kfree(entry); | |
29316 | + } | |
29317 | + | |
29318 | + spin_unlock_irqrestore(&info->v2p_lock, flags); | |
29319 | + return; | |
29320 | + | |
29321 | +} | |
29322 | Index: head-2008-11-25/drivers/xen/scsiback/xenbus.c | |
29323 | =================================================================== | |
29324 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
29325 | +++ head-2008-11-25/drivers/xen/scsiback/xenbus.c 2008-07-21 11:00:33.000000000 +0200 | |
29326 | @@ -0,0 +1,368 @@ | |
29327 | +/* | |
29328 | + * Xen SCSI backend driver | |
29329 | + * | |
29330 | + * Copyright (c) 2008, FUJITSU Limited | |
29331 | + * | |
29332 | + * Based on the blkback driver code. | |
29333 | + * | |
29334 | + * This program is free software; you can redistribute it and/or | |
29335 | + * modify it under the terms of the GNU General Public License version 2 | |
29336 | + * as published by the Free Software Foundation; or, when distributed | |
29337 | + * separately from the Linux kernel or incorporated into other | |
29338 | + * software packages, subject to the following license: | |
29339 | + * | |
29340 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
29341 | + * of this source file (the "Software"), to deal in the Software without | |
29342 | + * restriction, including without limitation the rights to use, copy, modify, | |
29343 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
29344 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
29345 | + * the following conditions: | |
29346 | + * | |
29347 | + * The above copyright notice and this permission notice shall be included in | |
29348 | + * all copies or substantial portions of the Software. | |
29349 | + * | |
29350 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
29351 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
29352 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
29353 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
29354 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29355 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
29356 | + * IN THE SOFTWARE. | |
29357 | + */ | |
29358 | + | |
29359 | +#include <stdarg.h> | |
29360 | +#include <linux/module.h> | |
29361 | +#include <linux/kthread.h> | |
29362 | +#include <scsi/scsi.h> | |
29363 | +#include <scsi/scsi_host.h> | |
29364 | +#include <scsi/scsi_device.h> | |
29365 | + | |
29366 | +#include "common.h" | |
29367 | + | |
29368 | +struct backend_info | |
29369 | +{ | |
29370 | + struct xenbus_device *dev; | |
29371 | + struct vscsibk_info *info; | |
29372 | +}; | |
29373 | + | |
29374 | + | |
29375 | +static int __vscsiif_name(struct backend_info *be, char *buf) | |
29376 | +{ | |
29377 | + struct xenbus_device *dev = be->dev; | |
29378 | + unsigned int domid, id; | |
29379 | + | |
29380 | + sscanf(dev->nodename, "backend/vscsi/%u/%u", &domid, &id); | |
29381 | + snprintf(buf, TASK_COMM_LEN, "vscsi.%u.%u", be->info->domid, id); | |
29382 | + | |
29383 | + return 0; | |
29384 | +} | |
29385 | + | |
29386 | +static int scsiback_map(struct backend_info *be) | |
29387 | +{ | |
29388 | + struct xenbus_device *dev = be->dev; | |
29389 | + unsigned long ring_ref; | |
29390 | + unsigned int evtchn; | |
29391 | + int err; | |
29392 | + char name[TASK_COMM_LEN]; | |
29393 | + | |
29394 | + err = xenbus_gather(XBT_NIL, dev->otherend, | |
29395 | + "ring-ref", "%lu", &ring_ref, | |
29396 | + "event-channel", "%u", &evtchn, NULL); | |
29397 | + if (err) { | |
29398 | + xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend); | |
29399 | + return err; | |
29400 | + } | |
29401 | + | |
29402 | + err = scsiback_init_sring(be->info, ring_ref, evtchn); | |
29403 | + if (err) | |
29404 | + return err; | |
29405 | + | |
29406 | + err = __vscsiif_name(be, name); | |
29407 | + if (err) { | |
29408 | + xenbus_dev_error(dev, err, "get scsiback dev name"); | |
29409 | + return err; | |
29410 | + } | |
29411 | + | |
29412 | + be->info->kthread = kthread_run(scsiback_schedule, be->info, name); | |
29413 | + if (IS_ERR(be->info->kthread)) { | |
29414 | + err = PTR_ERR(be->info->kthread); | |
29415 | + be->info->kthread = NULL; | |
29416 | + xenbus_dev_error(be->dev, err, "start vscsiif"); | |
29417 | + return err; | |
29418 | + } | |
29419 | + | |
29420 | + return 0; | |
29421 | +} | |
29422 | + | |
29423 | + | |
29424 | +struct scsi_device *scsiback_get_scsi_device(struct ids_tuple *phy) | |
29425 | +{ | |
29426 | + struct Scsi_Host *shost; | |
29427 | + struct scsi_device *sdev = NULL; | |
29428 | + | |
29429 | + shost = scsi_host_lookup(phy->hst); | |
29430 | + if (IS_ERR(shost)) { | |
29431 | + printk(KERN_ERR "scsiback: host%d doesn't exist.\n", | |
29432 | + phy->hst); | |
29433 | + return NULL; | |
29434 | + } | |
29435 | + sdev = scsi_device_lookup(shost, phy->chn, phy->tgt, phy->lun); | |
29436 | + if (!sdev) { | |
29437 | + printk(KERN_ERR "scsiback: %d:%d:%d:%d doesn't exist.\n", | |
29438 | + phy->hst, phy->chn, phy->tgt, phy->lun); | |
29439 | + scsi_host_put(shost); | |
29440 | + return NULL; | |
29441 | + } | |
29442 | + | |
29443 | + scsi_host_put(shost); | |
29444 | + return (sdev); | |
29445 | +} | |
29446 | + | |
29447 | +#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1 | |
29448 | +#define VSCSIBACK_OP_UPDATEDEV_STATE 2 | |
29449 | + | |
29450 | + | |
29451 | +static void scsiback_do_lun_hotplug(struct backend_info *be, int op) | |
29452 | +{ | |
29453 | + int i, err = 0; | |
29454 | + struct ids_tuple phy, vir; | |
29455 | + int device_state; | |
29456 | + char str[64], state_str[64]; | |
29457 | + char **dir; | |
29458 | + unsigned int dir_n = 0; | |
29459 | + struct xenbus_device *dev = be->dev; | |
29460 | + struct scsi_device *sdev; | |
29461 | + | |
29462 | + dir = xenbus_directory(XBT_NIL, dev->nodename, "vscsi-devs", &dir_n); | |
29463 | + if (IS_ERR(dir)) | |
29464 | + return; | |
29465 | + | |
29466 | + for (i = 0; i < dir_n; i++) { | |
29467 | + | |
29468 | + /* read status */ | |
29469 | + snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]); | |
29470 | + err = xenbus_scanf(XBT_NIL, dev->nodename, state_str, "%u", | |
29471 | + &device_state); | |
29472 | + if (XENBUS_EXIST_ERR(err)) | |
29473 | + continue; | |
29474 | + | |
29475 | + /* physical SCSI device */ | |
29476 | + snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", dir[i]); | |
29477 | + err = xenbus_scanf(XBT_NIL, dev->nodename, str, | |
29478 | + "%u:%u:%u:%u", &phy.hst, &phy.chn, &phy.tgt, &phy.lun); | |
29479 | + if (XENBUS_EXIST_ERR(err)) { | |
29480 | + xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29481 | + "%d", XenbusStateClosed); | |
29482 | + continue; | |
29483 | + } | |
29484 | + | |
29485 | + /* virtual SCSI device */ | |
29486 | + snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]); | |
29487 | + err = xenbus_scanf(XBT_NIL, dev->nodename, str, | |
29488 | + "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun); | |
29489 | + if (XENBUS_EXIST_ERR(err)) { | |
29490 | + xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29491 | + "%d", XenbusStateClosed); | |
29492 | + continue; | |
29493 | + } | |
29494 | + | |
29495 | + switch (op) { | |
29496 | + case VSCSIBACK_OP_ADD_OR_DEL_LUN: | |
29497 | + if (device_state == XenbusStateInitialising) { | |
29498 | + sdev = scsiback_get_scsi_device(&phy); | |
29499 | + if (!sdev) | |
29500 | + xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29501 | + "%d", XenbusStateClosed); | |
29502 | + else { | |
29503 | + err = scsiback_add_translation_entry(be->info, sdev, &vir); | |
29504 | + if (!err) { | |
29505 | + if (xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29506 | + "%d", XenbusStateInitialised)) { | |
29507 | + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); | |
29508 | + scsiback_del_translation_entry(be->info, &vir); | |
29509 | + } | |
29510 | + } else { | |
29511 | + scsi_device_put(sdev); | |
29512 | + xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29513 | + "%d", XenbusStateClosed); | |
29514 | + } | |
29515 | + } | |
29516 | + } | |
29517 | + | |
29518 | + if (device_state == XenbusStateClosing) { | |
29519 | + if (!scsiback_del_translation_entry(be->info, &vir)) { | |
29520 | + if (xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29521 | + "%d", XenbusStateClosed)) | |
29522 | + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); | |
29523 | + } | |
29524 | + } | |
29525 | + break; | |
29526 | + | |
29527 | + case VSCSIBACK_OP_UPDATEDEV_STATE: | |
29528 | + if (device_state == XenbusStateInitialised) { | |
29529 | + /* modify vscsi-devs/dev-x/state */ | |
29530 | + if (xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29531 | + "%d", XenbusStateConnected)) { | |
29532 | + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); | |
29533 | + scsiback_del_translation_entry(be->info, &vir); | |
29534 | + xenbus_printf(XBT_NIL, dev->nodename, state_str, | |
29535 | + "%d", XenbusStateClosed); | |
29536 | + } | |
29537 | + } | |
29538 | + break; | |
29539 | + /*When it is necessary, processing is added here.*/ | |
29540 | + default: | |
29541 | + break; | |
29542 | + } | |
29543 | + } | |
29544 | + | |
29545 | + kfree(dir); | |
29546 | + return ; | |
29547 | +} | |
29548 | + | |
29549 | + | |
29550 | +static void scsiback_frontend_changed(struct xenbus_device *dev, | |
29551 | + enum xenbus_state frontend_state) | |
29552 | +{ | |
29553 | + struct backend_info *be = dev->dev.driver_data; | |
29554 | + int err; | |
29555 | + | |
29556 | + switch (frontend_state) { | |
29557 | + case XenbusStateInitialising: | |
29558 | + break; | |
29559 | + case XenbusStateInitialised: | |
29560 | + err = scsiback_map(be); | |
29561 | + if (err) | |
29562 | + break; | |
29563 | + | |
29564 | + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN); | |
29565 | + xenbus_switch_state(dev, XenbusStateConnected); | |
29566 | + | |
29567 | + break; | |
29568 | + case XenbusStateConnected: | |
29569 | + | |
29570 | + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_UPDATEDEV_STATE); | |
29571 | + | |
29572 | + if (dev->state == XenbusStateConnected) | |
29573 | + break; | |
29574 | + | |
29575 | + xenbus_switch_state(dev, XenbusStateConnected); | |
29576 | + | |
29577 | + break; | |
29578 | + | |
29579 | + case XenbusStateClosing: | |
29580 | + scsiback_disconnect(be->info); | |
29581 | + xenbus_switch_state(dev, XenbusStateClosing); | |
29582 | + break; | |
29583 | + | |
29584 | + case XenbusStateClosed: | |
29585 | + xenbus_switch_state(dev, XenbusStateClosed); | |
29586 | + if (xenbus_dev_is_online(dev)) | |
29587 | + break; | |
29588 | + /* fall through if not online */ | |
29589 | + case XenbusStateUnknown: | |
29590 | + device_unregister(&dev->dev); | |
29591 | + break; | |
29592 | + | |
29593 | + case XenbusStateReconfiguring: | |
29594 | + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN); | |
29595 | + | |
29596 | + xenbus_switch_state(dev, XenbusStateReconfigured); | |
29597 | + | |
29598 | + break; | |
29599 | + | |
29600 | + default: | |
29601 | + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | |
29602 | + frontend_state); | |
29603 | + break; | |
29604 | + } | |
29605 | +} | |
29606 | + | |
29607 | + | |
29608 | +static int scsiback_remove(struct xenbus_device *dev) | |
29609 | +{ | |
29610 | + struct backend_info *be = dev->dev.driver_data; | |
29611 | + | |
29612 | + if (be->info) { | |
29613 | + scsiback_disconnect(be->info); | |
29614 | + scsiback_release_translation_entry(be->info); | |
29615 | + scsiback_free(be->info); | |
29616 | + be->info = NULL; | |
29617 | + } | |
29618 | + | |
29619 | + kfree(be); | |
29620 | + dev->dev.driver_data = NULL; | |
29621 | + | |
29622 | + return 0; | |
29623 | +} | |
29624 | + | |
29625 | + | |
29626 | +static int scsiback_probe(struct xenbus_device *dev, | |
29627 | + const struct xenbus_device_id *id) | |
29628 | +{ | |
29629 | + int err; | |
29630 | + | |
29631 | + struct backend_info *be = kzalloc(sizeof(struct backend_info), | |
29632 | + GFP_KERNEL); | |
29633 | + | |
29634 | + DPRINTK("%p %d\n", dev, dev->otherend_id); | |
29635 | + | |
29636 | + if (!be) { | |
29637 | + xenbus_dev_fatal(dev, -ENOMEM, | |
29638 | + "allocating backend structure"); | |
29639 | + return -ENOMEM; | |
29640 | + } | |
29641 | + be->dev = dev; | |
29642 | + dev->dev.driver_data = be; | |
29643 | + | |
29644 | + be->info = vscsibk_info_alloc(dev->otherend_id); | |
29645 | + if (IS_ERR(be->info)) { | |
29646 | + err = PTR_ERR(be->info); | |
29647 | + be->info = NULL; | |
29648 | + xenbus_dev_fatal(dev, err, "creating scsihost interface"); | |
29649 | + goto fail; | |
29650 | + } | |
29651 | + | |
29652 | + be->info->dev = dev; | |
29653 | + be->info->irq = 0; | |
29654 | + | |
29655 | + scsiback_init_translation_table(be->info); | |
29656 | + | |
29657 | + err = xenbus_switch_state(dev, XenbusStateInitWait); | |
29658 | + if (err) | |
29659 | + goto fail; | |
29660 | + | |
29661 | + return 0; | |
29662 | + | |
29663 | + | |
29664 | +fail: | |
29665 | + printk(KERN_WARNING "scsiback: %s failed\n",__FUNCTION__); | |
29666 | + scsiback_remove(dev); | |
29667 | + | |
29668 | + return err; | |
29669 | +} | |
29670 | + | |
29671 | + | |
29672 | +static struct xenbus_device_id scsiback_ids[] = { | |
29673 | + { "vscsi" }, | |
29674 | + { "" } | |
29675 | +}; | |
29676 | + | |
29677 | +static struct xenbus_driver scsiback = { | |
29678 | + .name = "vscsi", | |
29679 | + .owner = THIS_MODULE, | |
29680 | + .ids = scsiback_ids, | |
29681 | + .probe = scsiback_probe, | |
29682 | + .remove = scsiback_remove, | |
29683 | + .otherend_changed = scsiback_frontend_changed | |
29684 | +}; | |
29685 | + | |
29686 | +int scsiback_xenbus_init(void) | |
29687 | +{ | |
29688 | + return xenbus_register_backend(&scsiback); | |
29689 | +} | |
29690 | + | |
29691 | +void scsiback_xenbus_unregister(void) | |
29692 | +{ | |
29693 | + xenbus_unregister_driver(&scsiback); | |
29694 | +} | |
29695 | Index: head-2008-11-25/drivers/xen/scsifront/Makefile | |
29696 | =================================================================== | |
29697 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
29698 | +++ head-2008-11-25/drivers/xen/scsifront/Makefile 2008-07-21 11:00:33.000000000 +0200 | |
29699 | @@ -0,0 +1,3 @@ | |
29700 | + | |
29701 | +obj-$(CONFIG_XEN_SCSI_FRONTEND) := xenscsi.o | |
29702 | +xenscsi-objs := scsifront.o xenbus.o | |
29703 | Index: head-2008-11-25/drivers/xen/scsifront/common.h | |
29704 | =================================================================== | |
29705 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
29706 | +++ head-2008-11-25/drivers/xen/scsifront/common.h 2008-07-21 11:00:33.000000000 +0200 | |
29707 | @@ -0,0 +1,129 @@ | |
29708 | +/* | |
29709 | + * Xen SCSI frontend driver | |
29710 | + * | |
29711 | + * Copyright (c) 2008, FUJITSU Limited | |
29712 | + * | |
29713 | + * This program is free software; you can redistribute it and/or | |
29714 | + * modify it under the terms of the GNU General Public License version 2 | |
29715 | + * as published by the Free Software Foundation; or, when distributed | |
29716 | + * separately from the Linux kernel or incorporated into other | |
29717 | + * software packages, subject to the following license: | |
29718 | + * | |
29719 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
29720 | + * of this source file (the "Software"), to deal in the Software without | |
29721 | + * restriction, including without limitation the rights to use, copy, modify, | |
29722 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
29723 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
29724 | + * the following conditions: | |
29725 | + * | |
29726 | + * The above copyright notice and this permission notice shall be included in | |
29727 | + * all copies or substantial portions of the Software. | |
29728 | + * | |
29729 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
29730 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
29731 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
29732 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
29733 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29734 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
29735 | + * IN THE SOFTWARE. | |
29736 | + */ | |
29737 | + | |
29738 | +#ifndef __XEN_DRIVERS_SCSIFRONT_H__ | |
29739 | +#define __XEN_DRIVERS_SCSIFRONT_H__ | |
29740 | + | |
29741 | +#include <linux/version.h> | |
29742 | +#include <linux/module.h> | |
29743 | +#include <linux/kernel.h> | |
29744 | +#include <linux/device.h> | |
29745 | +#include <linux/kthread.h> | |
29746 | +#include <linux/wait.h> | |
29747 | +#include <linux/interrupt.h> | |
29748 | +#include <linux/spinlock.h> | |
29749 | +#include <linux/sched.h> | |
29750 | +#include <linux/blkdev.h> | |
29751 | +#include <scsi/scsi_cmnd.h> | |
29752 | +#include <scsi/scsi_device.h> | |
29753 | +#include <scsi/scsi.h> | |
29754 | +#include <scsi/scsi_host.h> | |
29755 | +#include <xen/xenbus.h> | |
29756 | +#include <xen/gnttab.h> | |
29757 | +#include <xen/evtchn.h> | |
29758 | +#include <xen/interface/xen.h> | |
29759 | +#include <xen/interface/io/ring.h> | |
29760 | +#include <xen/interface/io/vscsiif.h> | |
29761 | +#include <asm/delay.h> | |
29762 | + | |
29763 | + | |
29764 | +#define GRANT_INVALID_REF 0 | |
29765 | +#define VSCSI_IN_ABORT 1 | |
29766 | +#define VSCSI_IN_RESET 2 | |
29767 | + | |
29768 | +/* tuning point*/ | |
29769 | +#define VSCSIIF_DEFAULT_CMD_PER_LUN 10 | |
29770 | +#define VSCSIIF_MAX_TARGET 64 | |
29771 | +#define VSCSIIF_MAX_LUN 255 | |
29772 | + | |
29773 | +#define VSCSIIF_RING_SIZE \ | |
29774 | + __RING_SIZE((struct vscsiif_sring *)0, PAGE_SIZE) | |
29775 | +#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE | |
29776 | + | |
29777 | +struct vscsifrnt_shadow { | |
29778 | + uint16_t next_free; | |
29779 | + | |
29780 | + /* command between backend and frontend | |
29781 | + * VSCSIIF_ACT_SCSI_CDB or VSCSIIF_ACT_SCSI_RESET */ | |
29782 | + unsigned char act; | |
29783 | + | |
29784 | + /* do reset function */ | |
29785 | + wait_queue_head_t wq_reset; /* reset work queue */ | |
29786 | + int wait_reset; /* reset work queue condition */ | |
29787 | + int32_t rslt_reset; /* reset response status */ | |
29788 | + /* (SUCESS or FAILED) */ | |
29789 | + | |
29790 | + /* for DMA_TO_DEVICE(1), DMA_FROM_DEVICE(2), DMA_NONE(3) | |
29791 | + requests */ | |
29792 | + unsigned int sc_data_direction; | |
29793 | + | |
29794 | + /* Number of pieces of scatter-gather */ | |
29795 | + unsigned int nr_segments; | |
29796 | + | |
29797 | + /* requested struct scsi_cmnd is stored from kernel */ | |
29798 | + unsigned long req_scsi_cmnd; | |
29799 | + int gref[VSCSIIF_SG_TABLESIZE]; | |
29800 | +}; | |
29801 | + | |
29802 | +struct vscsifrnt_info { | |
29803 | + struct xenbus_device *dev; | |
29804 | + | |
29805 | + struct Scsi_Host *host; | |
29806 | + | |
29807 | + spinlock_t io_lock; | |
29808 | + spinlock_t shadow_lock; | |
29809 | + unsigned int evtchn; | |
29810 | + unsigned int irq; | |
29811 | + | |
29812 | + grant_ref_t ring_ref; | |
29813 | + struct vscsiif_front_ring ring; | |
29814 | + struct vscsiif_response ring_res; | |
29815 | + | |
29816 | + struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS]; | |
29817 | + uint32_t shadow_free; | |
29818 | + | |
29819 | + struct task_struct *kthread; | |
29820 | + wait_queue_head_t wq; | |
29821 | + unsigned int waiting_resp; | |
29822 | + | |
29823 | +}; | |
29824 | + | |
29825 | +#define DPRINTK(_f, _a...) \ | |
29826 | + pr_debug("(file=%s, line=%d) " _f, \ | |
29827 | + __FILE__ , __LINE__ , ## _a ) | |
29828 | + | |
29829 | +int scsifront_xenbus_init(void); | |
29830 | +void scsifront_xenbus_unregister(void); | |
29831 | +int scsifront_schedule(void *data); | |
29832 | +irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs); | |
29833 | +int scsifront_cmd_done(struct vscsifrnt_info *info); | |
29834 | + | |
29835 | + | |
29836 | +#endif /* __XEN_DRIVERS_SCSIFRONT_H__ */ | |
29837 | Index: head-2008-11-25/drivers/xen/scsifront/scsifront.c | |
29838 | =================================================================== | |
29839 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
29840 | +++ head-2008-11-25/drivers/xen/scsifront/scsifront.c 2008-07-21 11:00:33.000000000 +0200 | |
29841 | @@ -0,0 +1,511 @@ | |
29842 | +/* | |
29843 | + * Xen SCSI frontend driver | |
29844 | + * | |
29845 | + * Copyright (c) 2008, FUJITSU Limited | |
29846 | + * | |
29847 | + * This program is free software; you can redistribute it and/or | |
29848 | + * modify it under the terms of the GNU General Public License version 2 | |
29849 | + * as published by the Free Software Foundation; or, when distributed | |
29850 | + * separately from the Linux kernel or incorporated into other | |
29851 | + * software packages, subject to the following license: | |
29852 | + * | |
29853 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
29854 | + * of this source file (the "Software"), to deal in the Software without | |
29855 | + * restriction, including without limitation the rights to use, copy, modify, | |
29856 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
29857 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
29858 | + * the following conditions: | |
29859 | + * | |
29860 | + * The above copyright notice and this permission notice shall be included in | |
29861 | + * all copies or substantial portions of the Software. | |
29862 | + * | |
29863 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
29864 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
29865 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
29866 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
29867 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29868 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
29869 | + * IN THE SOFTWARE. | |
29870 | + */ | |
29871 | + | |
29872 | + | |
29873 | +#include <linux/version.h> | |
29874 | +#include "common.h" | |
29875 | + | |
29876 | +static int get_id_from_freelist(struct vscsifrnt_info *info) | |
29877 | +{ | |
29878 | + unsigned long flags; | |
29879 | + uint32_t free; | |
29880 | + | |
29881 | + spin_lock_irqsave(&info->shadow_lock, flags); | |
29882 | + | |
29883 | + free = info->shadow_free; | |
29884 | + BUG_ON(free > VSCSIIF_MAX_REQS); | |
29885 | + info->shadow_free = info->shadow[free].next_free; | |
29886 | + info->shadow[free].next_free = 0x0fff; | |
29887 | + | |
29888 | + info->shadow[free].wait_reset = 0; | |
29889 | + | |
29890 | + spin_unlock_irqrestore(&info->shadow_lock, flags); | |
29891 | + | |
29892 | + return free; | |
29893 | +} | |
29894 | + | |
29895 | +static void add_id_to_freelist(struct vscsifrnt_info *info, uint32_t id) | |
29896 | +{ | |
29897 | + unsigned long flags; | |
29898 | + | |
29899 | + spin_lock_irqsave(&info->shadow_lock, flags); | |
29900 | + | |
29901 | + info->shadow[id].next_free = info->shadow_free; | |
29902 | + info->shadow[id].req_scsi_cmnd = 0; | |
29903 | + info->shadow_free = id; | |
29904 | + | |
29905 | + spin_unlock_irqrestore(&info->shadow_lock, flags); | |
29906 | +} | |
29907 | + | |
29908 | + | |
29909 | +struct vscsiif_request * scsifront_pre_request(struct vscsifrnt_info *info) | |
29910 | +{ | |
29911 | + struct vscsiif_front_ring *ring = &(info->ring); | |
29912 | + vscsiif_request_t *ring_req; | |
29913 | + uint32_t id; | |
29914 | + | |
29915 | + ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt); | |
29916 | + | |
29917 | + ring->req_prod_pvt++; | |
29918 | + | |
29919 | + id = get_id_from_freelist(info); /* use id by response */ | |
29920 | + ring_req->rqid = (uint16_t)id; | |
29921 | + | |
29922 | + return ring_req; | |
29923 | +} | |
29924 | + | |
29925 | + | |
29926 | +static void scsifront_notify_work(struct vscsifrnt_info *info) | |
29927 | +{ | |
29928 | + info->waiting_resp = 1; | |
29929 | + wake_up(&info->wq); | |
29930 | +} | |
29931 | + | |
29932 | + | |
29933 | +static void scsifront_do_request(struct vscsifrnt_info *info) | |
29934 | +{ | |
29935 | + struct vscsiif_front_ring *ring = &(info->ring); | |
29936 | + unsigned int irq = info->irq; | |
29937 | + int notify; | |
29938 | + | |
29939 | + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify); | |
29940 | + if (notify) | |
29941 | + notify_remote_via_irq(irq); | |
29942 | +} | |
29943 | + | |
29944 | +irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs) | |
29945 | +{ | |
29946 | + scsifront_notify_work((struct vscsifrnt_info *)dev_id); | |
29947 | + return IRQ_HANDLED; | |
29948 | +} | |
29949 | + | |
29950 | + | |
29951 | +static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id) | |
29952 | +{ | |
29953 | + int i; | |
29954 | + | |
29955 | + if (s->sc_data_direction == DMA_NONE) | |
29956 | + return; | |
29957 | + | |
29958 | + if (s->nr_segments) { | |
29959 | + for (i = 0; i < s->nr_segments; i++) { | |
29960 | + if (unlikely(gnttab_query_foreign_access( | |
29961 | + s->gref[i]) != 0)) { | |
29962 | + printk(KERN_ALERT "scsifront: " | |
29963 | + "grant still in use by backend.\n"); | |
29964 | + BUG(); | |
29965 | + } | |
29966 | + gnttab_end_foreign_access(s->gref[i], 0UL); | |
29967 | + } | |
29968 | + } | |
29969 | + | |
29970 | + return; | |
29971 | +} | |
29972 | + | |
29973 | + | |
29974 | +static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info, | |
29975 | + vscsiif_response_t *ring_res) | |
29976 | +{ | |
29977 | + struct scsi_cmnd *sc; | |
29978 | + uint32_t id; | |
29979 | + uint8_t sense_len; | |
29980 | + | |
29981 | + id = ring_res->rqid; | |
29982 | + sc = (struct scsi_cmnd *)info->shadow[id].req_scsi_cmnd; | |
29983 | + | |
29984 | + if (sc == NULL) | |
29985 | + BUG(); | |
29986 | + | |
29987 | + scsifront_gnttab_done(&info->shadow[id], id); | |
29988 | + add_id_to_freelist(info, id); | |
29989 | + | |
29990 | + sc->result = ring_res->rslt; | |
29991 | + sc->resid = ring_res->residual_len; | |
29992 | + | |
29993 | + if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE) | |
29994 | + sense_len = VSCSIIF_SENSE_BUFFERSIZE; | |
29995 | + else | |
29996 | + sense_len = ring_res->sense_len; | |
29997 | + | |
29998 | + if (sense_len) | |
29999 | + memcpy(sc->sense_buffer, ring_res->sense_buffer, sense_len); | |
30000 | + | |
30001 | + sc->scsi_done(sc); | |
30002 | + | |
30003 | + return; | |
30004 | +} | |
30005 | + | |
30006 | + | |
30007 | +static void scsifront_sync_cmd_done(struct vscsifrnt_info *info, | |
30008 | + vscsiif_response_t *ring_res) | |
30009 | +{ | |
30010 | + uint16_t id = ring_res->rqid; | |
30011 | + unsigned long flags; | |
30012 | + | |
30013 | + spin_lock_irqsave(&info->shadow_lock, flags); | |
30014 | + info->shadow[id].wait_reset = 1; | |
30015 | + info->shadow[id].rslt_reset = ring_res->rslt; | |
30016 | + spin_unlock_irqrestore(&info->shadow_lock, flags); | |
30017 | + | |
30018 | + wake_up(&(info->shadow[id].wq_reset)); | |
30019 | +} | |
30020 | + | |
30021 | + | |
30022 | +int scsifront_cmd_done(struct vscsifrnt_info *info) | |
30023 | +{ | |
30024 | + vscsiif_response_t *ring_res; | |
30025 | + | |
30026 | + RING_IDX i, rp; | |
30027 | + int more_to_do = 0; | |
30028 | + unsigned long flags; | |
30029 | + | |
30030 | + spin_lock_irqsave(&info->io_lock, flags); | |
30031 | + | |
30032 | + rp = info->ring.sring->rsp_prod; | |
30033 | + rmb(); | |
30034 | + for (i = info->ring.rsp_cons; i != rp; i++) { | |
30035 | + | |
30036 | + ring_res = RING_GET_RESPONSE(&info->ring, i); | |
30037 | + | |
30038 | + if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB) | |
30039 | + scsifront_cdb_cmd_done(info, ring_res); | |
30040 | + else | |
30041 | + scsifront_sync_cmd_done(info, ring_res); | |
30042 | + } | |
30043 | + | |
30044 | + info->ring.rsp_cons = i; | |
30045 | + | |
30046 | + if (i != info->ring.req_prod_pvt) { | |
30047 | + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); | |
30048 | + } else { | |
30049 | + info->ring.sring->rsp_event = i + 1; | |
30050 | + } | |
30051 | + | |
30052 | + spin_unlock_irqrestore(&info->io_lock, flags); | |
30053 | + | |
30054 | + | |
30055 | + /* Yield point for this unbounded loop. */ | |
30056 | + cond_resched(); | |
30057 | + | |
30058 | + return more_to_do; | |
30059 | +} | |
30060 | + | |
30061 | + | |
30062 | + | |
30063 | + | |
30064 | +int scsifront_schedule(void *data) | |
30065 | +{ | |
30066 | + struct vscsifrnt_info *info = (struct vscsifrnt_info *)data; | |
30067 | + | |
30068 | + while (!kthread_should_stop()) { | |
30069 | + wait_event_interruptible( | |
30070 | + info->wq, | |
30071 | + info->waiting_resp || kthread_should_stop()); | |
30072 | + | |
30073 | + info->waiting_resp = 0; | |
30074 | + smp_mb(); | |
30075 | + | |
30076 | + if (scsifront_cmd_done(info)) | |
30077 | + info->waiting_resp = 1; | |
30078 | + } | |
30079 | + | |
30080 | + return 0; | |
30081 | +} | |
30082 | + | |
30083 | + | |
30084 | + | |
30085 | +static int map_data_for_request(struct vscsifrnt_info *info, | |
30086 | + struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id) | |
30087 | +{ | |
30088 | + grant_ref_t gref_head; | |
30089 | + struct page *page; | |
30090 | + int err, i, ref, ref_cnt = 0; | |
30091 | + int write = (sc->sc_data_direction == DMA_TO_DEVICE); | |
30092 | + int nr_pages, off, len, bytes; | |
30093 | + unsigned long buffer_pfn; | |
30094 | + unsigned int data_len = 0; | |
30095 | + | |
30096 | + if (sc->sc_data_direction == DMA_NONE) | |
30097 | + return 0; | |
30098 | + | |
30099 | + err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head); | |
30100 | + if (err) { | |
30101 | + printk(KERN_ERR "scsifront: gnttab_alloc_grant_references() error\n"); | |
30102 | + return -ENOMEM; | |
30103 | + } | |
30104 | + | |
30105 | + if (sc->use_sg) { | |
30106 | + /* quoted scsi_lib.c/scsi_req_map_sg . */ | |
30107 | + struct scatterlist *sg = (struct scatterlist *)sc->request_buffer; | |
30108 | + nr_pages = (sc->request_bufflen + sg[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
30109 | + | |
30110 | + if (nr_pages > VSCSIIF_SG_TABLESIZE) { | |
30111 | + printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n"); | |
30112 | + ref_cnt = (-E2BIG); | |
30113 | + goto big_to_sg; | |
30114 | + } | |
30115 | + | |
30116 | + for (i = 0; i < sc->use_sg; i++) { | |
30117 | + page = sg[i].page; | |
30118 | + off = sg[i].offset; | |
30119 | + len = sg[i].length; | |
30120 | + data_len += len; | |
30121 | + | |
30122 | + buffer_pfn = page_to_phys(page) >> PAGE_SHIFT; | |
30123 | + | |
30124 | + while (len > 0) { | |
30125 | + bytes = min_t(unsigned int, len, PAGE_SIZE - off); | |
30126 | + | |
30127 | + ref = gnttab_claim_grant_reference(&gref_head); | |
30128 | + BUG_ON(ref == -ENOSPC); | |
30129 | + | |
30130 | + gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id, | |
30131 | + buffer_pfn, write); | |
30132 | + | |
30133 | + info->shadow[id].gref[ref_cnt] = ref; | |
30134 | + ring_req->seg[ref_cnt].gref = ref; | |
30135 | + ring_req->seg[ref_cnt].offset = (uint16_t)off; | |
30136 | + ring_req->seg[ref_cnt].length = (uint16_t)bytes; | |
30137 | + | |
30138 | + buffer_pfn++; | |
30139 | + len -= bytes; | |
30140 | + off = 0; | |
30141 | + ref_cnt++; | |
30142 | + } | |
30143 | + } | |
30144 | + } else if (sc->request_bufflen) { | |
30145 | + unsigned long end = ((unsigned long)sc->request_buffer | |
30146 | + + sc->request_bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
30147 | + unsigned long start = (unsigned long)sc->request_buffer >> PAGE_SHIFT; | |
30148 | + | |
30149 | + page = virt_to_page(sc->request_buffer); | |
30150 | + nr_pages = end - start; | |
30151 | + len = sc->request_bufflen; | |
30152 | + | |
30153 | + if (nr_pages > VSCSIIF_SG_TABLESIZE) { | |
30154 | + ref_cnt = (-E2BIG); | |
30155 | + goto big_to_sg; | |
30156 | + } | |
30157 | + | |
30158 | + buffer_pfn = page_to_phys(page) >> PAGE_SHIFT; | |
30159 | + | |
30160 | + off = offset_in_page((unsigned long)sc->request_buffer); | |
30161 | + for (i = 0; i < nr_pages; i++) { | |
30162 | + bytes = PAGE_SIZE - off; | |
30163 | + | |
30164 | + if (bytes > len) | |
30165 | + bytes = len; | |
30166 | + | |
30167 | + ref = gnttab_claim_grant_reference(&gref_head); | |
30168 | + BUG_ON(ref == -ENOSPC); | |
30169 | + | |
30170 | + gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id, | |
30171 | + buffer_pfn, write); | |
30172 | + | |
30173 | + info->shadow[id].gref[i] = ref; | |
30174 | + ring_req->seg[i].gref = ref; | |
30175 | + ring_req->seg[i].offset = (uint16_t)off; | |
30176 | + ring_req->seg[i].length = (uint16_t)bytes; | |
30177 | + | |
30178 | + buffer_pfn++; | |
30179 | + len -= bytes; | |
30180 | + off = 0; | |
30181 | + ref_cnt++; | |
30182 | + } | |
30183 | + } | |
30184 | + | |
30185 | +big_to_sg: | |
30186 | + | |
30187 | + gnttab_free_grant_references(gref_head); | |
30188 | + | |
30189 | + return ref_cnt; | |
30190 | +} | |
30191 | + | |
30192 | +static int scsifront_queuecommand(struct scsi_cmnd *sc, | |
30193 | + void (*done)(struct scsi_cmnd *)) | |
30194 | +{ | |
30195 | + struct vscsifrnt_info *info = | |
30196 | + (struct vscsifrnt_info *) sc->device->host->hostdata; | |
30197 | + vscsiif_request_t *ring_req; | |
30198 | + int ref_cnt; | |
30199 | + uint16_t rqid; | |
30200 | + | |
30201 | + if (RING_FULL(&info->ring)) { | |
30202 | + goto out_host_busy; | |
30203 | + } | |
30204 | + | |
30205 | + sc->scsi_done = done; | |
30206 | + sc->result = 0; | |
30207 | + | |
30208 | + ring_req = scsifront_pre_request(info); | |
30209 | + rqid = ring_req->rqid; | |
30210 | + ring_req->act = VSCSIIF_ACT_SCSI_CDB; | |
30211 | + | |
30212 | + ring_req->id = sc->device->id; | |
30213 | + ring_req->lun = sc->device->lun; | |
30214 | + ring_req->channel = sc->device->channel; | |
30215 | + ring_req->cmd_len = sc->cmd_len; | |
30216 | + | |
30217 | + BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE); | |
30218 | + | |
30219 | + if ( sc->cmd_len ) | |
30220 | + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len); | |
30221 | + else | |
30222 | + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); | |
30223 | + | |
30224 | + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction; | |
30225 | + ring_req->timeout_per_command = (sc->timeout_per_command / HZ); | |
30226 | + | |
30227 | + info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc; | |
30228 | + info->shadow[rqid].sc_data_direction = sc->sc_data_direction; | |
30229 | + info->shadow[rqid].act = ring_req->act; | |
30230 | + | |
30231 | + ref_cnt = map_data_for_request(info, sc, ring_req, rqid); | |
30232 | + if (ref_cnt < 0) { | |
30233 | + add_id_to_freelist(info, rqid); | |
30234 | + if (ref_cnt == (-ENOMEM)) | |
30235 | + goto out_host_busy; | |
30236 | + else { | |
30237 | + sc->result = (DID_ERROR << 16); | |
30238 | + goto out_fail_command; | |
30239 | + } | |
30240 | + } | |
30241 | + | |
30242 | + ring_req->nr_segments = (uint8_t)ref_cnt; | |
30243 | + info->shadow[rqid].nr_segments = ref_cnt; | |
30244 | + | |
30245 | + scsifront_do_request(info); | |
30246 | + | |
30247 | + return 0; | |
30248 | + | |
30249 | +out_host_busy: | |
30250 | + return SCSI_MLQUEUE_HOST_BUSY; | |
30251 | + | |
30252 | +out_fail_command: | |
30253 | + done(sc); | |
30254 | + return 0; | |
30255 | +} | |
30256 | + | |
30257 | + | |
30258 | +static int scsifront_eh_abort_handler(struct scsi_cmnd *sc) | |
30259 | +{ | |
30260 | + return (FAILED); | |
30261 | +} | |
30262 | + | |
30263 | +/* vscsi supports only device_reset, because it is each of LUNs */ | |
30264 | +static int scsifront_dev_reset_handler(struct scsi_cmnd *sc) | |
30265 | +{ | |
30266 | + struct Scsi_Host *host = sc->device->host; | |
30267 | + struct vscsifrnt_info *info = | |
30268 | + (struct vscsifrnt_info *) sc->device->host->hostdata; | |
30269 | + | |
30270 | + vscsiif_request_t *ring_req; | |
30271 | + uint16_t rqid; | |
30272 | + int err; | |
30273 | + | |
30274 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) | |
30275 | + spin_lock_irq(host->host_lock); | |
30276 | +#endif | |
30277 | + | |
30278 | + ring_req = scsifront_pre_request(info); | |
30279 | + ring_req->act = VSCSIIF_ACT_SCSI_RESET; | |
30280 | + | |
30281 | + rqid = ring_req->rqid; | |
30282 | + info->shadow[rqid].act = VSCSIIF_ACT_SCSI_RESET; | |
30283 | + | |
30284 | + ring_req->channel = sc->device->channel; | |
30285 | + ring_req->id = sc->device->id; | |
30286 | + ring_req->lun = sc->device->lun; | |
30287 | + ring_req->cmd_len = sc->cmd_len; | |
30288 | + | |
30289 | + if ( sc->cmd_len ) | |
30290 | + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len); | |
30291 | + else | |
30292 | + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); | |
30293 | + | |
30294 | + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction; | |
30295 | + ring_req->timeout_per_command = (sc->timeout_per_command / HZ); | |
30296 | + ring_req->nr_segments = 0; | |
30297 | + | |
30298 | + scsifront_do_request(info); | |
30299 | + | |
30300 | + spin_unlock_irq(host->host_lock); | |
30301 | + wait_event_interruptible(info->shadow[rqid].wq_reset, | |
30302 | + info->shadow[rqid].wait_reset); | |
30303 | + spin_lock_irq(host->host_lock); | |
30304 | + | |
30305 | + err = info->shadow[rqid].rslt_reset; | |
30306 | + | |
30307 | + add_id_to_freelist(info, rqid); | |
30308 | + | |
30309 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) | |
30310 | + spin_unlock_irq(host->host_lock); | |
30311 | +#endif | |
30312 | + return (err); | |
30313 | +} | |
30314 | + | |
30315 | + | |
30316 | +struct scsi_host_template scsifront_sht = { | |
30317 | + .module = THIS_MODULE, | |
30318 | + .name = "Xen SCSI frontend driver", | |
30319 | + .queuecommand = scsifront_queuecommand, | |
30320 | + .eh_abort_handler = scsifront_eh_abort_handler, | |
30321 | + .eh_device_reset_handler= scsifront_dev_reset_handler, | |
30322 | + .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN, | |
30323 | + .can_queue = VSCSIIF_MAX_REQS, | |
30324 | + .this_id = -1, | |
30325 | + .sg_tablesize = VSCSIIF_SG_TABLESIZE, | |
30326 | + .use_clustering = DISABLE_CLUSTERING, | |
30327 | + .proc_name = "scsifront", | |
30328 | +}; | |
30329 | + | |
30330 | + | |
30331 | +static int __init scsifront_init(void) | |
30332 | +{ | |
30333 | + int err; | |
30334 | + | |
30335 | + if (!is_running_on_xen()) | |
30336 | + return -ENODEV; | |
30337 | + | |
30338 | + err = scsifront_xenbus_init(); | |
30339 | + | |
30340 | + return err; | |
30341 | +} | |
30342 | + | |
30343 | +static void __exit scsifront_exit(void) | |
30344 | +{ | |
30345 | + scsifront_xenbus_unregister(); | |
30346 | +} | |
30347 | + | |
30348 | +module_init(scsifront_init); | |
30349 | +module_exit(scsifront_exit); | |
30350 | + | |
30351 | +MODULE_DESCRIPTION("Xen SCSI frontend driver"); | |
30352 | +MODULE_LICENSE("GPL"); | |
30353 | Index: head-2008-11-25/drivers/xen/scsifront/xenbus.c | |
30354 | =================================================================== | |
30355 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
30356 | +++ head-2008-11-25/drivers/xen/scsifront/xenbus.c 2008-07-21 11:00:33.000000000 +0200 | |
30357 | @@ -0,0 +1,421 @@ | |
30358 | +/* | |
30359 | + * Xen SCSI frontend driver | |
30360 | + * | |
30361 | + * Copyright (c) 2008, FUJITSU Limited | |
30362 | + * | |
30363 | + * This program is free software; you can redistribute it and/or | |
30364 | + * modify it under the terms of the GNU General Public License version 2 | |
30365 | + * as published by the Free Software Foundation; or, when distributed | |
30366 | + * separately from the Linux kernel or incorporated into other | |
30367 | + * software packages, subject to the following license: | |
30368 | + * | |
30369 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
30370 | + * of this source file (the "Software"), to deal in the Software without | |
30371 | + * restriction, including without limitation the rights to use, copy, modify, | |
30372 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
30373 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
30374 | + * the following conditions: | |
30375 | + * | |
30376 | + * The above copyright notice and this permission notice shall be included in | |
30377 | + * all copies or substantial portions of the Software. | |
30378 | + * | |
30379 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
30380 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
30381 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
30382 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
30383 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
30384 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
30385 | + * IN THE SOFTWARE. | |
30386 | + */ | |
30387 | + | |
30388 | + | |
30389 | +#include <linux/version.h> | |
30390 | +#include "common.h" | |
30391 | + | |
30392 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) | |
30393 | + #define DEFAULT_TASK_COMM_LEN 16 | |
30394 | +#else | |
30395 | + #define DEFAULT_TASK_COMM_LEN TASK_COMM_LEN | |
30396 | +#endif | |
30397 | + | |
30398 | +extern struct scsi_host_template scsifront_sht; | |
30399 | + | |
30400 | +static void scsifront_free(struct vscsifrnt_info *info) | |
30401 | +{ | |
30402 | + struct Scsi_Host *host = info->host; | |
30403 | + | |
30404 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) | |
30405 | + if (host->shost_state != SHOST_DEL) { | |
30406 | +#else | |
30407 | + if (!test_bit(SHOST_DEL, &host->shost_state)) { | |
30408 | +#endif | |
30409 | + scsi_remove_host(info->host); | |
30410 | + } | |
30411 | + | |
30412 | + if (info->ring_ref != GRANT_INVALID_REF) { | |
30413 | + gnttab_end_foreign_access(info->ring_ref, | |
30414 | + (unsigned long)info->ring.sring); | |
30415 | + info->ring_ref = GRANT_INVALID_REF; | |
30416 | + info->ring.sring = NULL; | |
30417 | + } | |
30418 | + | |
30419 | + if (info->irq) | |
30420 | + unbind_from_irqhandler(info->irq, info); | |
30421 | + info->irq = 0; | |
30422 | + | |
30423 | + scsi_host_put(info->host); | |
30424 | +} | |
30425 | + | |
30426 | + | |
30427 | +static int scsifront_alloc_ring(struct vscsifrnt_info *info) | |
30428 | +{ | |
30429 | + struct xenbus_device *dev = info->dev; | |
30430 | + struct vscsiif_sring *sring; | |
30431 | + int err = -ENOMEM; | |
30432 | + | |
30433 | + | |
30434 | + info->ring_ref = GRANT_INVALID_REF; | |
30435 | + | |
30436 | + /***** Frontend to Backend ring start *****/ | |
30437 | + sring = (struct vscsiif_sring *) __get_free_page(GFP_KERNEL); | |
30438 | + if (!sring) { | |
30439 | + xenbus_dev_fatal(dev, err, "fail to allocate shared ring (Front to Back)"); | |
30440 | + return err; | |
30441 | + } | |
30442 | + SHARED_RING_INIT(sring); | |
30443 | + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); | |
30444 | + | |
30445 | + err = xenbus_grant_ring(dev, virt_to_mfn(sring)); | |
30446 | + if (err < 0) { | |
30447 | + free_page((unsigned long) sring); | |
30448 | + info->ring.sring = NULL; | |
30449 | + xenbus_dev_fatal(dev, err, "fail to grant shared ring (Front to Back)"); | |
30450 | + goto free_sring; | |
30451 | + } | |
30452 | + info->ring_ref = err; | |
30453 | + | |
30454 | + err = bind_listening_port_to_irqhandler( | |
30455 | + dev->otherend_id, scsifront_intr, | |
30456 | + SA_SAMPLE_RANDOM, "scsifront", info); | |
30457 | + | |
30458 | + if (err <= 0) { | |
30459 | + xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler"); | |
30460 | + goto free_sring; | |
30461 | + } | |
30462 | + info->irq = err; | |
30463 | + | |
30464 | + return 0; | |
30465 | + | |
30466 | +/* free resource */ | |
30467 | +free_sring: | |
30468 | + scsifront_free(info); | |
30469 | + | |
30470 | + return err; | |
30471 | +} | |
30472 | + | |
30473 | + | |
30474 | +static int scsifront_init_ring(struct vscsifrnt_info *info) | |
30475 | +{ | |
30476 | + struct xenbus_device *dev = info->dev; | |
30477 | + struct xenbus_transaction xbt; | |
30478 | + int err; | |
30479 | + | |
30480 | + DPRINTK("%s\n",__FUNCTION__); | |
30481 | + | |
30482 | + err = scsifront_alloc_ring(info); | |
30483 | + if (err) | |
30484 | + return err; | |
30485 | + DPRINTK("%u %u\n", info->ring_ref, info->evtchn); | |
30486 | + | |
30487 | +again: | |
30488 | + err = xenbus_transaction_start(&xbt); | |
30489 | + if (err) { | |
30490 | + xenbus_dev_fatal(dev, err, "starting transaction"); | |
30491 | + } | |
30492 | + | |
30493 | + err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u", | |
30494 | + info->ring_ref); | |
30495 | + if (err) { | |
30496 | + xenbus_dev_fatal(dev, err, "%s", "writing ring-ref"); | |
30497 | + goto fail; | |
30498 | + } | |
30499 | + | |
30500 | + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | |
30501 | + irq_to_evtchn_port(info->irq)); | |
30502 | + | |
30503 | + if (err) { | |
30504 | + xenbus_dev_fatal(dev, err, "%s", "writing event-channel"); | |
30505 | + goto fail; | |
30506 | + } | |
30507 | + | |
30508 | + err = xenbus_transaction_end(xbt, 0); | |
30509 | + if (err) { | |
30510 | + if (err == -EAGAIN) | |
30511 | + goto again; | |
30512 | + xenbus_dev_fatal(dev, err, "completing transaction"); | |
30513 | + goto free_sring; | |
30514 | + } | |
30515 | + | |
30516 | + return 0; | |
30517 | + | |
30518 | +fail: | |
30519 | + xenbus_transaction_end(xbt, 1); | |
30520 | +free_sring: | |
30521 | + /* free resource */ | |
30522 | + scsifront_free(info); | |
30523 | + | |
30524 | + return err; | |
30525 | +} | |
30526 | + | |
30527 | + | |
30528 | +static int scsifront_probe(struct xenbus_device *dev, | |
30529 | + const struct xenbus_device_id *id) | |
30530 | +{ | |
30531 | + struct vscsifrnt_info *info; | |
30532 | + struct Scsi_Host *host; | |
30533 | + int i, err = -ENOMEM; | |
30534 | + char name[DEFAULT_TASK_COMM_LEN]; | |
30535 | + | |
30536 | + host = scsi_host_alloc(&scsifront_sht, sizeof(*info)); | |
30537 | + if (!host) { | |
30538 | + xenbus_dev_fatal(dev, err, "fail to allocate scsi host"); | |
30539 | + return err; | |
30540 | + } | |
30541 | + info = (struct vscsifrnt_info *) host->hostdata; | |
30542 | + info->host = host; | |
30543 | + | |
30544 | + | |
30545 | + dev->dev.driver_data = info; | |
30546 | + info->dev = dev; | |
30547 | + | |
30548 | + for (i = 0; i < VSCSIIF_MAX_REQS; i++) { | |
30549 | + info->shadow[i].next_free = i + 1; | |
30550 | + init_waitqueue_head(&(info->shadow[i].wq_reset)); | |
30551 | + info->shadow[i].wait_reset = 0; | |
30552 | + } | |
30553 | + info->shadow[VSCSIIF_MAX_REQS - 1].next_free = 0x0fff; | |
30554 | + | |
30555 | + err = scsifront_init_ring(info); | |
30556 | + if (err) { | |
30557 | + scsi_host_put(host); | |
30558 | + return err; | |
30559 | + } | |
30560 | + | |
30561 | + init_waitqueue_head(&info->wq); | |
30562 | + spin_lock_init(&info->io_lock); | |
30563 | + spin_lock_init(&info->shadow_lock); | |
30564 | + | |
30565 | + snprintf(name, DEFAULT_TASK_COMM_LEN, "vscsiif.%d", info->host->host_no); | |
30566 | + | |
30567 | + info->kthread = kthread_run(scsifront_schedule, info, name); | |
30568 | + if (IS_ERR(info->kthread)) { | |
30569 | + err = PTR_ERR(info->kthread); | |
30570 | + info->kthread = NULL; | |
30571 | + printk(KERN_ERR "scsifront: kthread start err %d\n", err); | |
30572 | + goto free_sring; | |
30573 | + } | |
30574 | + | |
30575 | + host->max_id = VSCSIIF_MAX_TARGET; | |
30576 | + host->max_channel = 0; | |
30577 | + host->max_lun = VSCSIIF_MAX_LUN; | |
30578 | + host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512; | |
30579 | + | |
30580 | + err = scsi_add_host(host, &dev->dev); | |
30581 | + if (err) { | |
30582 | + printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err); | |
30583 | + goto free_sring; | |
30584 | + } | |
30585 | + | |
30586 | + xenbus_switch_state(dev, XenbusStateInitialised); | |
30587 | + | |
30588 | + return 0; | |
30589 | + | |
30590 | +free_sring: | |
30591 | + /* free resource */ | |
30592 | + scsifront_free(info); | |
30593 | + return err; | |
30594 | +} | |
30595 | + | |
30596 | +static int scsifront_remove(struct xenbus_device *dev) | |
30597 | +{ | |
30598 | + struct vscsifrnt_info *info = dev->dev.driver_data; | |
30599 | + | |
30600 | + DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename); | |
30601 | + | |
30602 | + if (info->kthread) { | |
30603 | + kthread_stop(info->kthread); | |
30604 | + info->kthread = NULL; | |
30605 | + } | |
30606 | + | |
30607 | + scsifront_free(info); | |
30608 | + | |
30609 | + return 0; | |
30610 | +} | |
30611 | + | |
30612 | + | |
30613 | +static int scsifront_disconnect(struct vscsifrnt_info *info) | |
30614 | +{ | |
30615 | + struct xenbus_device *dev = info->dev; | |
30616 | + struct Scsi_Host *host = info->host; | |
30617 | + | |
30618 | + DPRINTK("%s: %s disconnect\n",__FUNCTION__ ,dev->nodename); | |
30619 | + | |
30620 | + /* | |
30621 | + When this function is executed, all devices of | |
30622 | + Frontend have been deleted. | |
30623 | + Therefore, it need not block I/O before remove_host. | |
30624 | + */ | |
30625 | + | |
30626 | + scsi_remove_host(host); | |
30627 | + xenbus_frontend_closed(dev); | |
30628 | + | |
30629 | + return 0; | |
30630 | +} | |
30631 | + | |
30632 | +#define VSCSIFRONT_OP_ADD_LUN 1 | |
30633 | +#define VSCSIFRONT_OP_DEL_LUN 2 | |
30634 | + | |
30635 | +static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) | |
30636 | +{ | |
30637 | + struct xenbus_device *dev = info->dev; | |
30638 | + int i, err = 0; | |
30639 | + char str[64], state_str[64]; | |
30640 | + char **dir; | |
30641 | + unsigned int dir_n = 0; | |
30642 | + unsigned int device_state; | |
30643 | + unsigned int hst, chn, tgt, lun; | |
30644 | + struct scsi_device *sdev; | |
30645 | + | |
30646 | + dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n); | |
30647 | + if (IS_ERR(dir)) | |
30648 | + return; | |
30649 | + | |
30650 | + for (i = 0; i < dir_n; i++) { | |
30651 | + /* read status */ | |
30652 | + snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]); | |
30653 | + err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u", | |
30654 | + &device_state); | |
30655 | + if (XENBUS_EXIST_ERR(err)) | |
30656 | + continue; | |
30657 | + | |
30658 | + /* virtual SCSI device */ | |
30659 | + snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]); | |
30660 | + err = xenbus_scanf(XBT_NIL, dev->otherend, str, | |
30661 | + "%u:%u:%u:%u", &hst, &chn, &tgt, &lun); | |
30662 | + if (XENBUS_EXIST_ERR(err)) | |
30663 | + continue; | |
30664 | + | |
30665 | + /* front device state path */ | |
30666 | + snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]); | |
30667 | + | |
30668 | + switch (op) { | |
30669 | + case VSCSIFRONT_OP_ADD_LUN: | |
30670 | + if (device_state == XenbusStateInitialised) { | |
30671 | + sdev = scsi_device_lookup(info->host, chn, tgt, lun); | |
30672 | + if (sdev) { | |
30673 | + printk(KERN_ERR "scsifront: Device already in use.\n"); | |
30674 | + scsi_device_put(sdev); | |
30675 | + xenbus_printf(XBT_NIL, dev->nodename, | |
30676 | + state_str, "%d", XenbusStateClosed); | |
30677 | + } else { | |
30678 | + scsi_add_device(info->host, chn, tgt, lun); | |
30679 | + xenbus_printf(XBT_NIL, dev->nodename, | |
30680 | + state_str, "%d", XenbusStateConnected); | |
30681 | + } | |
30682 | + } | |
30683 | + break; | |
30684 | + case VSCSIFRONT_OP_DEL_LUN: | |
30685 | + if (device_state == XenbusStateClosing) { | |
30686 | + sdev = scsi_device_lookup(info->host, chn, tgt, lun); | |
30687 | + if (sdev) { | |
30688 | + scsi_remove_device(sdev); | |
30689 | + scsi_device_put(sdev); | |
30690 | + xenbus_printf(XBT_NIL, dev->nodename, | |
30691 | + state_str, "%d", XenbusStateClosed); | |
30692 | + } | |
30693 | + } | |
30694 | + break; | |
30695 | + default: | |
30696 | + break; | |
30697 | + } | |
30698 | + } | |
30699 | + | |
30700 | + kfree(dir); | |
30701 | + return; | |
30702 | +} | |
30703 | + | |
30704 | + | |
30705 | + | |
30706 | + | |
30707 | +static void scsifront_backend_changed(struct xenbus_device *dev, | |
30708 | + enum xenbus_state backend_state) | |
30709 | +{ | |
30710 | + struct vscsifrnt_info *info = dev->dev.driver_data; | |
30711 | + | |
30712 | + DPRINTK("%p %u %u\n", dev, dev->state, backend_state); | |
30713 | + | |
30714 | + switch (backend_state) { | |
30715 | + case XenbusStateUnknown: | |
30716 | + case XenbusStateInitialising: | |
30717 | + case XenbusStateInitWait: | |
30718 | + case XenbusStateClosed: | |
30719 | + break; | |
30720 | + | |
30721 | + case XenbusStateInitialised: | |
30722 | + break; | |
30723 | + | |
30724 | + case XenbusStateConnected: | |
30725 | + if (xenbus_read_driver_state(dev->nodename) == | |
30726 | + XenbusStateInitialised) { | |
30727 | + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); | |
30728 | + } | |
30729 | + | |
30730 | + if (dev->state == XenbusStateConnected) | |
30731 | + break; | |
30732 | + | |
30733 | + xenbus_switch_state(dev, XenbusStateConnected); | |
30734 | + break; | |
30735 | + | |
30736 | + case XenbusStateClosing: | |
30737 | + scsifront_disconnect(info); | |
30738 | + break; | |
30739 | + | |
30740 | + case XenbusStateReconfiguring: | |
30741 | + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN); | |
30742 | + xenbus_switch_state(dev, XenbusStateReconfiguring); | |
30743 | + break; | |
30744 | + | |
30745 | + case XenbusStateReconfigured: | |
30746 | + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); | |
30747 | + xenbus_switch_state(dev, XenbusStateConnected); | |
30748 | + break; | |
30749 | + } | |
30750 | +} | |
30751 | + | |
30752 | + | |
30753 | +static struct xenbus_device_id scsifront_ids[] = { | |
30754 | + { "vscsi" }, | |
30755 | + { "" } | |
30756 | +}; | |
30757 | + | |
30758 | + | |
30759 | +static struct xenbus_driver scsifront_driver = { | |
30760 | + .name = "vscsi", | |
30761 | + .owner = THIS_MODULE, | |
30762 | + .ids = scsifront_ids, | |
30763 | + .probe = scsifront_probe, | |
30764 | + .remove = scsifront_remove, | |
30765 | +/* .resume = scsifront_resume, */ | |
30766 | + .otherend_changed = scsifront_backend_changed, | |
30767 | +}; | |
30768 | + | |
30769 | +int scsifront_xenbus_init(void) | |
30770 | +{ | |
30771 | + return xenbus_register_frontend(&scsifront_driver); | |
30772 | +} | |
30773 | + | |
30774 | +void scsifront_xenbus_unregister(void) | |
30775 | +{ | |
30776 | + xenbus_unregister_driver(&scsifront_driver); | |
30777 | +} | |
30778 | + | |
30779 | Index: head-2008-11-25/drivers/xen/sfc_netback/Makefile | |
30780 | =================================================================== | |
30781 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
30782 | +++ head-2008-11-25/drivers/xen/sfc_netback/Makefile 2008-02-26 10:54:11.000000000 +0100 | |
30783 | @@ -0,0 +1,12 @@ | |
30784 | +EXTRA_CFLAGS += -Idrivers/xen/sfc_netback -Idrivers/xen/sfc_netutil -Idrivers/xen/netback -Idrivers/net/sfc | |
30785 | +EXTRA_CFLAGS += -D__ci_driver__ | |
30786 | +EXTRA_CFLAGS += -DEFX_USE_KCOMPAT | |
30787 | +EXTRA_CFLAGS += -Werror | |
30788 | + | |
30789 | +ifdef GCOV | |
30790 | +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV | |
30791 | +endif | |
30792 | + | |
30793 | +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) := sfc_netback.o | |
30794 | + | |
30795 | +sfc_netback-objs := accel.o accel_fwd.o accel_msg.o accel_solarflare.o accel_xenbus.o accel_debugfs.o | |
30796 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel.c | |
30797 | =================================================================== | |
30798 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
30799 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel.c 2008-02-26 10:54:11.000000000 +0100 | |
30800 | @@ -0,0 +1,129 @@ | |
30801 | +/**************************************************************************** | |
30802 | + * Solarflare driver for Xen network acceleration | |
30803 | + * | |
30804 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
30805 | + * 9501 Jeronimo Road, Suite 250, | |
30806 | + * Irvine, CA 92618, USA | |
30807 | + * | |
30808 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
30809 | + * | |
30810 | + * This program is free software; you can redistribute it and/or modify it | |
30811 | + * under the terms of the GNU General Public License version 2 as published | |
30812 | + * by the Free Software Foundation, incorporated herein by reference. | |
30813 | + * | |
30814 | + * This program is distributed in the hope that it will be useful, | |
30815 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
30816 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
30817 | + * GNU General Public License for more details. | |
30818 | + * | |
30819 | + * You should have received a copy of the GNU General Public License | |
30820 | + * along with this program; if not, write to the Free Software | |
30821 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
30822 | + **************************************************************************** | |
30823 | + */ | |
30824 | + | |
30825 | +#include "accel.h" | |
30826 | +#include "accel_msg_iface.h" | |
30827 | +#include "accel_solarflare.h" | |
30828 | + | |
30829 | +#include <linux/notifier.h> | |
30830 | + | |
30831 | +#ifdef EFX_GCOV | |
30832 | +#include "gcov.h" | |
30833 | +#endif | |
30834 | + | |
30835 | +static int netback_accel_netdev_event(struct notifier_block *nb, | |
30836 | + unsigned long event, void *ptr) | |
30837 | +{ | |
30838 | + struct net_device *net_dev = (struct net_device *)ptr; | |
30839 | + struct netback_accel *bend; | |
30840 | + | |
30841 | + if ((event == NETDEV_UP) || (event == NETDEV_DOWN)) { | |
30842 | + mutex_lock(&bend_list_mutex); | |
30843 | + bend = bend_list; | |
30844 | + while (bend != NULL) { | |
30845 | + mutex_lock(&bend->bend_mutex); | |
30846 | + /* | |
30847 | + * This happens when the shared pages have | |
30848 | + * been unmapped, but the bend not yet removed | |
30849 | + * from list | |
30850 | + */ | |
30851 | + if (bend->shared_page == NULL) | |
30852 | + goto next; | |
30853 | + | |
30854 | + if (bend->net_dev->ifindex == net_dev->ifindex) | |
30855 | + netback_accel_set_interface_state | |
30856 | + (bend, event == NETDEV_UP); | |
30857 | + | |
30858 | + next: | |
30859 | + mutex_unlock(&bend->bend_mutex); | |
30860 | + bend = bend->next_bend; | |
30861 | + } | |
30862 | + mutex_unlock(&bend_list_mutex); | |
30863 | + } | |
30864 | + | |
30865 | + return NOTIFY_DONE; | |
30866 | +} | |
30867 | + | |
30868 | + | |
30869 | +static struct notifier_block netback_accel_netdev_notifier = { | |
30870 | + .notifier_call = netback_accel_netdev_event, | |
30871 | +}; | |
30872 | + | |
30873 | + | |
30874 | +unsigned sfc_netback_max_pages = NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES; | |
30875 | +module_param_named(max_pages, sfc_netback_max_pages, uint, 0644); | |
30876 | +MODULE_PARM_DESC(max_pages, | |
30877 | + "The number of buffer pages to enforce on each guest"); | |
30878 | + | |
30879 | +/* Initialise subsystems need for the accelerated fast path */ | |
30880 | +static int __init netback_accel_init(void) | |
30881 | +{ | |
30882 | + int rc = 0; | |
30883 | + | |
30884 | +#ifdef EFX_GCOV | |
30885 | + gcov_provider_init(THIS_MODULE); | |
30886 | +#endif | |
30887 | + | |
30888 | + rc = netback_accel_init_fwd(); | |
30889 | + | |
30890 | + if (rc == 0) | |
30891 | + netback_accel_debugfs_init(); | |
30892 | + | |
30893 | + if (rc == 0) | |
30894 | + rc = netback_accel_sf_init(); | |
30895 | + | |
30896 | + if (rc == 0) | |
30897 | + rc = register_netdevice_notifier | |
30898 | + (&netback_accel_netdev_notifier); | |
30899 | + | |
30900 | + /* | |
30901 | + * What if no device was found, shouldn't we clean up stuff | |
30902 | + * we've allocated for acceleration subsystem? | |
30903 | + */ | |
30904 | + | |
30905 | + return rc; | |
30906 | +} | |
30907 | + | |
30908 | +module_init(netback_accel_init); | |
30909 | + | |
30910 | +static void __exit netback_accel_exit(void) | |
30911 | +{ | |
30912 | + unregister_netdevice_notifier(&netback_accel_netdev_notifier); | |
30913 | + | |
30914 | + netback_accel_sf_shutdown(); | |
30915 | + | |
30916 | + netback_accel_shutdown_bends(); | |
30917 | + | |
30918 | + netback_accel_debugfs_fini(); | |
30919 | + | |
30920 | + netback_accel_shutdown_fwd(); | |
30921 | + | |
30922 | +#ifdef EFX_GCOV | |
30923 | + gcov_provider_fini(THIS_MODULE); | |
30924 | +#endif | |
30925 | +} | |
30926 | + | |
30927 | +module_exit(netback_accel_exit); | |
30928 | + | |
30929 | +MODULE_LICENSE("GPL"); | |
30930 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel.h | |
30931 | =================================================================== | |
30932 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
30933 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel.h 2008-02-26 10:54:11.000000000 +0100 | |
30934 | @@ -0,0 +1,393 @@ | |
30935 | +/**************************************************************************** | |
30936 | + * Solarflare driver for Xen network acceleration | |
30937 | + * | |
30938 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
30939 | + * 9501 Jeronimo Road, Suite 250, | |
30940 | + * Irvine, CA 92618, USA | |
30941 | + * | |
30942 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
30943 | + * | |
30944 | + * This program is free software; you can redistribute it and/or modify it | |
30945 | + * under the terms of the GNU General Public License version 2 as published | |
30946 | + * by the Free Software Foundation, incorporated herein by reference. | |
30947 | + * | |
30948 | + * This program is distributed in the hope that it will be useful, | |
30949 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
30950 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
30951 | + * GNU General Public License for more details. | |
30952 | + * | |
30953 | + * You should have received a copy of the GNU General Public License | |
30954 | + * along with this program; if not, write to the Free Software | |
30955 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
30956 | + **************************************************************************** | |
30957 | + */ | |
30958 | + | |
30959 | +#ifndef NETBACK_ACCEL_H | |
30960 | +#define NETBACK_ACCEL_H | |
30961 | + | |
30962 | +#include <linux/slab.h> | |
30963 | +#include <linux/ip.h> | |
30964 | +#include <linux/tcp.h> | |
30965 | +#include <linux/udp.h> | |
30966 | +#include <linux/in.h> | |
30967 | +#include <linux/netdevice.h> | |
30968 | +#include <linux/etherdevice.h> | |
30969 | +#include <linux/mutex.h> | |
30970 | +#include <linux/wait.h> | |
30971 | + | |
30972 | +#include <xen/xenbus.h> | |
30973 | + | |
30974 | +#include "accel_shared_fifo.h" | |
30975 | +#include "accel_msg_iface.h" | |
30976 | +#include "accel_util.h" | |
30977 | + | |
30978 | +/************************************************************************** | |
30979 | + * Datatypes | |
30980 | + **************************************************************************/ | |
30981 | + | |
30982 | +#define NETBACK_ACCEL_DEFAULT_MAX_FILTERS (8) | |
30983 | +#define NETBACK_ACCEL_DEFAULT_MAX_MCASTS (8) | |
30984 | +#define NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES (384) | |
30985 | +/* Variable to store module parameter for max_buf_pages */ | |
30986 | +extern unsigned sfc_netback_max_pages; | |
30987 | + | |
30988 | +#define NETBACK_ACCEL_STATS 1 | |
30989 | + | |
30990 | +#if NETBACK_ACCEL_STATS | |
30991 | +#define NETBACK_ACCEL_STATS_OP(x) x | |
30992 | +#else | |
30993 | +#define NETBACK_ACCEL_STATS_OP(x) | |
30994 | +#endif | |
30995 | + | |
30996 | +/*! Statistics for a given backend */ | |
30997 | +struct netback_accel_stats { | |
30998 | + /*! Number of eventq wakeup events */ | |
30999 | + u64 evq_wakeups; | |
31000 | + /*! Number of eventq timeout events */ | |
31001 | + u64 evq_timeouts; | |
31002 | + /*! Number of filters used */ | |
31003 | + u32 num_filters; | |
31004 | + /*! Number of buffer pages registered */ | |
31005 | + u32 num_buffer_pages; | |
31006 | +}; | |
31007 | + | |
31008 | + | |
31009 | +/* Debug fs nodes for each of the above stats */ | |
31010 | +struct netback_accel_dbfs { | |
31011 | + struct dentry *evq_wakeups; | |
31012 | + struct dentry *evq_timeouts; | |
31013 | + struct dentry *num_filters; | |
31014 | + struct dentry *num_buffer_pages; | |
31015 | +}; | |
31016 | + | |
31017 | + | |
31018 | +/*! Resource limits for a given NIC */ | |
31019 | +struct netback_accel_limits { | |
31020 | + int max_filters; /*!< Max. number of filters to use. */ | |
31021 | + int max_mcasts; /*!< Max. number of mcast subscriptions */ | |
31022 | + int max_buf_pages; /*!< Max. number of pages of NIC buffers */ | |
31023 | +}; | |
31024 | + | |
31025 | + | |
31026 | +/*! The state for an instance of the back end driver. */ | |
31027 | +struct netback_accel { | |
31028 | + /*! mutex to protect this state */ | |
31029 | + struct mutex bend_mutex; | |
31030 | + | |
31031 | + /*! Watches on xenstore */ | |
31032 | + struct xenbus_watch domu_accel_watch; | |
31033 | + struct xenbus_watch config_accel_watch; | |
31034 | + | |
31035 | + /*! Pointer to whatever device cookie ties us in to the hypervisor */ | |
31036 | + void *hdev_data; | |
31037 | + | |
31038 | + /*! FIFO indices. Next page is msg FIFOs */ | |
31039 | + struct net_accel_shared_page *shared_page; | |
31040 | + | |
31041 | + /*! Defer control message processing */ | |
31042 | + struct work_struct handle_msg; | |
31043 | + | |
31044 | + /*! Identifies other end VM and interface.*/ | |
31045 | + int far_end; | |
31046 | + int vif_num; | |
31047 | + | |
31048 | + /*!< To unmap the shared pages */ | |
31049 | + void *sh_pages_unmap; | |
31050 | + | |
31051 | + /* Resource tracking */ | |
31052 | + /*! Limits on H/W & Dom0 resources */ | |
31053 | + struct netback_accel_limits quotas; | |
31054 | + | |
31055 | + /* Hardware resources */ | |
31056 | + /*! The H/W type of associated NIC */ | |
31057 | + enum net_accel_hw_type hw_type; | |
31058 | + /*! State of allocation */ | |
31059 | + int hw_state; | |
31060 | + /*! Index into ci_driver.nics[] for this interface */ | |
31061 | + int nic_index; | |
31062 | + /*! How to set up the acceleration for this hardware */ | |
31063 | + int (*accel_setup)(struct netback_accel *); | |
31064 | + /*! And how to stop it. */ | |
31065 | + void (*accel_shutdown)(struct netback_accel *); | |
31066 | + | |
31067 | + /*! The physical/real net_dev for this interface */ | |
31068 | + struct net_device *net_dev; | |
31069 | + | |
31070 | + /*! Magic pointer to locate state in fowarding table */ | |
31071 | + void *fwd_priv; | |
31072 | + | |
31073 | + /*! Message FIFO */ | |
31074 | + sh_msg_fifo2 to_domU; | |
31075 | + /*! Message FIFO */ | |
31076 | + sh_msg_fifo2 from_domU; | |
31077 | + | |
31078 | + /*! General notification channel id */ | |
31079 | + int msg_channel; | |
31080 | + /*! General notification channel irq */ | |
31081 | + int msg_channel_irq; | |
31082 | + | |
31083 | + /*! Event channel id dedicated to network packet interrupts. */ | |
31084 | + int net_channel; | |
31085 | + /*! Event channel irq dedicated to network packets interrupts */ | |
31086 | + int net_channel_irq; | |
31087 | + | |
31088 | + /*! The MAC address the frontend goes by. */ | |
31089 | + u8 mac[ETH_ALEN]; | |
31090 | + /*! Driver name of associated NIC */ | |
31091 | + char *nicname; | |
31092 | + | |
31093 | + /*! Array of pointers to buffer pages mapped */ | |
31094 | + grant_handle_t *buffer_maps; | |
31095 | + u64 *buffer_addrs; | |
31096 | + /*! Index into buffer_maps */ | |
31097 | + int buffer_maps_index; | |
31098 | + /*! Max number of pages that domU is allowed/will request to map */ | |
31099 | + int max_pages; | |
31100 | + | |
31101 | + /*! Pointer to hardware specific private area */ | |
31102 | + void *accel_hw_priv; | |
31103 | + | |
31104 | + /*! Wait queue for changes in accelstate. */ | |
31105 | + wait_queue_head_t state_wait_queue; | |
31106 | + | |
31107 | + /*! Current state of the frontend according to the xenbus | |
31108 | + * watch. */ | |
31109 | + XenbusState frontend_state; | |
31110 | + | |
31111 | + /*! Current state of this backend. */ | |
31112 | + XenbusState backend_state; | |
31113 | + | |
31114 | + /*! Non-zero if the backend is being removed. */ | |
31115 | + int removing; | |
31116 | + | |
31117 | + /*! Non-zero if the setup_vnic has been called. */ | |
31118 | + int vnic_is_setup; | |
31119 | + | |
31120 | +#if NETBACK_ACCEL_STATS | |
31121 | + struct netback_accel_stats stats; | |
31122 | +#endif | |
31123 | +#if defined(CONFIG_DEBUG_FS) | |
31124 | + char *dbfs_dir_name; | |
31125 | + struct dentry *dbfs_dir; | |
31126 | + struct netback_accel_dbfs dbfs; | |
31127 | +#endif | |
31128 | + | |
31129 | + /*! List */ | |
31130 | + struct netback_accel *next_bend; | |
31131 | +}; | |
31132 | + | |
31133 | + | |
31134 | +/* | |
31135 | + * Values for netback_accel.hw_state. States of resource allocation | |
31136 | + * we can go through | |
31137 | + */ | |
31138 | +/*! No hardware has yet been allocated. */ | |
31139 | +#define NETBACK_ACCEL_RES_NONE (0) | |
31140 | +/*! Hardware has been allocated. */ | |
31141 | +#define NETBACK_ACCEL_RES_ALLOC (1) | |
31142 | +#define NETBACK_ACCEL_RES_FILTER (2) | |
31143 | +#define NETBACK_ACCEL_RES_HWINFO (3) | |
31144 | + | |
31145 | +/*! Filtering specification. This assumes that for VNIC support we | |
31146 | + * will always want wildcard entries, so only specifies the | |
31147 | + * destination IP/port | |
31148 | + */ | |
31149 | +struct netback_accel_filter_spec { | |
31150 | + /*! Internal, used to access efx_vi API */ | |
31151 | + void *filter_handle; | |
31152 | + | |
31153 | + /*! Destination IP in network order */ | |
31154 | + u32 destip_be; | |
31155 | + /*! Destination port in network order */ | |
31156 | + u16 destport_be; | |
31157 | + /*! Mac address */ | |
31158 | + u8 mac[ETH_ALEN]; | |
31159 | + /*! TCP or UDP */ | |
31160 | + u8 proto; | |
31161 | +}; | |
31162 | + | |
31163 | + | |
31164 | +/************************************************************************** | |
31165 | + * From accel.c | |
31166 | + **************************************************************************/ | |
31167 | + | |
31168 | +/*! \brief Start up all the acceleration plugins | |
31169 | + * | |
31170 | + * \return 0 on success, an errno on failure | |
31171 | + */ | |
31172 | +extern int netback_accel_init_accel(void); | |
31173 | + | |
31174 | +/*! \brief Shut down all the acceleration plugins | |
31175 | + */ | |
31176 | +extern void netback_accel_shutdown_accel(void); | |
31177 | + | |
31178 | + | |
31179 | +/************************************************************************** | |
31180 | + * From accel_fwd.c | |
31181 | + **************************************************************************/ | |
31182 | + | |
31183 | +/*! \brief Init the forwarding infrastructure | |
31184 | + * \return 0 on success, or -ENOMEM if it couldn't get memory for the | |
31185 | + * forward table | |
31186 | + */ | |
31187 | +extern int netback_accel_init_fwd(void); | |
31188 | + | |
31189 | +/*! \brief Shut down the forwarding and free memory. */ | |
31190 | +extern void netback_accel_shutdown_fwd(void); | |
31191 | + | |
31192 | +/*! Initialise each nic port's fowarding table */ | |
31193 | +extern void *netback_accel_init_fwd_port(void); | |
31194 | +extern void netback_accel_shutdown_fwd_port(void *fwd_priv); | |
31195 | + | |
31196 | +/*! \brief Add an entry to the forwarding table. | |
31197 | + * \param mac : MAC address, used as hash key | |
31198 | + * \param ctxt : value to associate with key (can be NULL, see | |
31199 | + * netback_accel_fwd_set_context) | |
31200 | + * \return 0 on success, -ENOMEM if table was full and could no grow it | |
31201 | + */ | |
31202 | +extern int netback_accel_fwd_add(const __u8 *mac, void *context, | |
31203 | + void *fwd_priv); | |
31204 | + | |
31205 | +/*! \brief Remove an entry from the forwarding table. | |
31206 | + * \param mac : the MAC address to remove | |
31207 | + * \return nothing: it is not an error if the mac was not in the table | |
31208 | + */ | |
31209 | +extern void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv); | |
31210 | + | |
31211 | +/*! \brief Set the context pointer for an existing fwd table entry. | |
31212 | + * \param mac : key that is already present in the table | |
31213 | + * \param context : new value to associate with key | |
31214 | + * \return 0 on success, -ENOENT if mac not present in table. | |
31215 | + */ | |
31216 | +extern int netback_accel_fwd_set_context(const __u8 *mac, void *context, | |
31217 | + void *fwd_priv); | |
31218 | + | |
31219 | +/************************************************************************** | |
31220 | + * From accel_msg.c | |
31221 | + **************************************************************************/ | |
31222 | + | |
31223 | + | |
31224 | +/*! \brief Send the start-of-day message that handshakes with the VNIC | |
31225 | + * and tells it its MAC address. | |
31226 | + * | |
31227 | + * \param bend The back end driver data structure | |
31228 | + * \param version The version of communication to use, e.g. NET_ACCEL_MSG_VERSION | |
31229 | + */ | |
31230 | +extern void netback_accel_msg_tx_hello(struct netback_accel *bend, | |
31231 | + unsigned version); | |
31232 | + | |
31233 | +/*! \brief Send a "there's a new local mac address" message | |
31234 | + * | |
31235 | + * \param bend The back end driver data structure for the vnic to send | |
31236 | + * the message to | |
31237 | + * \param mac Pointer to the new mac address | |
31238 | + */ | |
31239 | +extern void netback_accel_msg_tx_new_localmac(struct netback_accel *bend, | |
31240 | + const void *mac); | |
31241 | + | |
31242 | +/*! \brief Send a "a mac address that was local has gone away" message | |
31243 | + * | |
31244 | + * \param bend The back end driver data structure for the vnic to send | |
31245 | + * the message to | |
31246 | + * \param mac Pointer to the old mac address | |
31247 | + */ | |
31248 | +extern void netback_accel_msg_tx_old_localmac(struct netback_accel *bend, | |
31249 | + const void *mac); | |
31250 | + | |
31251 | +extern void netback_accel_set_interface_state(struct netback_accel *bend, | |
31252 | + int up); | |
31253 | + | |
31254 | +/*! \brief Process the message queue for a bend that has just | |
31255 | + * interrupted. | |
31256 | + * | |
31257 | + * Demultiplexs an interrupt from the front end driver, taking | |
31258 | + * messages from the fifo and taking appropriate action. | |
31259 | + * | |
31260 | + * \param bend The back end driver data structure | |
31261 | + */ | |
31262 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
31263 | +extern void netback_accel_msg_rx_handler(struct work_struct *arg); | |
31264 | +#else | |
31265 | +extern void netback_accel_msg_rx_handler(void *bend_void); | |
31266 | +#endif | |
31267 | + | |
31268 | +/************************************************************************** | |
31269 | + * From accel_xenbus.c | |
31270 | + **************************************************************************/ | |
31271 | +/*! List of all the bends currently in existence. */ | |
31272 | +extern struct netback_accel *bend_list; | |
31273 | +extern struct mutex bend_list_mutex; | |
31274 | + | |
31275 | +/*! \brief Probe a new network interface. */ | |
31276 | +extern int netback_accel_probe(struct xenbus_device *dev); | |
31277 | + | |
31278 | +/*! \brief Remove a network interface. */ | |
31279 | +extern int netback_accel_remove(struct xenbus_device *dev); | |
31280 | + | |
31281 | +/*! \brief Shutdown all accelerator backends */ | |
31282 | +extern void netback_accel_shutdown_bends(void); | |
31283 | + | |
31284 | +/*! \brief Initiate the xenbus state teardown handshake */ | |
31285 | +extern void netback_accel_set_closing(struct netback_accel *bend); | |
31286 | + | |
31287 | +/************************************************************************** | |
31288 | + * From accel_debugfs.c | |
31289 | + **************************************************************************/ | |
31290 | +/*! Global statistics */ | |
31291 | +struct netback_accel_global_stats { | |
31292 | + /*! Number of TX packets seen through driverlink */ | |
31293 | + u64 dl_tx_packets; | |
31294 | + /*! Number of TX packets seen through driverlink we didn't like */ | |
31295 | + u64 dl_tx_bad_packets; | |
31296 | + /*! Number of RX packets seen through driverlink */ | |
31297 | + u64 dl_rx_packets; | |
31298 | + /*! Number of mac addresses we are forwarding to */ | |
31299 | + u32 num_fwds; | |
31300 | +}; | |
31301 | + | |
31302 | +/*! Debug fs entries for each of the above stats */ | |
31303 | +struct netback_accel_global_dbfs { | |
31304 | + struct dentry *dl_tx_packets; | |
31305 | + struct dentry *dl_tx_bad_packets; | |
31306 | + struct dentry *dl_rx_packets; | |
31307 | + struct dentry *num_fwds; | |
31308 | +}; | |
31309 | + | |
31310 | +#if NETBACK_ACCEL_STATS | |
31311 | +extern struct netback_accel_global_stats global_stats; | |
31312 | +#endif | |
31313 | + | |
31314 | +/*! \brief Initialise the debugfs root and populate with global stats */ | |
31315 | +extern void netback_accel_debugfs_init(void); | |
31316 | + | |
31317 | +/*! \brief Remove our debugfs root directory */ | |
31318 | +extern void netback_accel_debugfs_fini(void); | |
31319 | + | |
31320 | +/*! \brief Add per-bend statistics to debug fs */ | |
31321 | +extern int netback_accel_debugfs_create(struct netback_accel *bend); | |
31322 | +/*! \brief Remove per-bend statistics from debug fs */ | |
31323 | +extern int netback_accel_debugfs_remove(struct netback_accel *bend); | |
31324 | + | |
31325 | +#endif /* NETBACK_ACCEL_H */ | |
31326 | + | |
31327 | + | |
31328 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel_debugfs.c | |
31329 | =================================================================== | |
31330 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
31331 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel_debugfs.c 2008-02-26 10:54:11.000000000 +0100 | |
31332 | @@ -0,0 +1,148 @@ | |
31333 | +/**************************************************************************** | |
31334 | + * Solarflare driver for Xen network acceleration | |
31335 | + * | |
31336 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
31337 | + * 9501 Jeronimo Road, Suite 250, | |
31338 | + * Irvine, CA 92618, USA | |
31339 | + * | |
31340 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
31341 | + * | |
31342 | + * This program is free software; you can redistribute it and/or modify it | |
31343 | + * under the terms of the GNU General Public License version 2 as published | |
31344 | + * by the Free Software Foundation, incorporated herein by reference. | |
31345 | + * | |
31346 | + * This program is distributed in the hope that it will be useful, | |
31347 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
31348 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
31349 | + * GNU General Public License for more details. | |
31350 | + * | |
31351 | + * You should have received a copy of the GNU General Public License | |
31352 | + * along with this program; if not, write to the Free Software | |
31353 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
31354 | + **************************************************************************** | |
31355 | + */ | |
31356 | + | |
31357 | +#include <linux/fs.h> | |
31358 | +#include <linux/debugfs.h> | |
31359 | + | |
31360 | +#include "accel.h" | |
31361 | + | |
31362 | +#if defined(CONFIG_DEBUG_FS) | |
31363 | +static struct dentry *sfc_debugfs_root = NULL; | |
31364 | +#endif | |
31365 | + | |
31366 | +#if NETBACK_ACCEL_STATS | |
31367 | +struct netback_accel_global_stats global_stats; | |
31368 | +#if defined(CONFIG_DEBUG_FS) | |
31369 | +static struct netback_accel_global_dbfs global_dbfs; | |
31370 | +#endif | |
31371 | +#endif | |
31372 | + | |
31373 | +void netback_accel_debugfs_init(void) | |
31374 | +{ | |
31375 | +#if defined(CONFIG_DEBUG_FS) | |
31376 | + sfc_debugfs_root = debugfs_create_dir("sfc_netback", NULL); | |
31377 | + if (sfc_debugfs_root == NULL) | |
31378 | + return; | |
31379 | + | |
31380 | + global_dbfs.num_fwds = debugfs_create_u32 | |
31381 | + ("num_fwds", S_IRUSR | S_IRGRP | S_IROTH, | |
31382 | + sfc_debugfs_root, &global_stats.num_fwds); | |
31383 | + global_dbfs.dl_tx_packets = debugfs_create_u64 | |
31384 | + ("dl_tx_packets", S_IRUSR | S_IRGRP | S_IROTH, | |
31385 | + sfc_debugfs_root, &global_stats.dl_tx_packets); | |
31386 | + global_dbfs.dl_rx_packets = debugfs_create_u64 | |
31387 | + ("dl_rx_packets", S_IRUSR | S_IRGRP | S_IROTH, | |
31388 | + sfc_debugfs_root, &global_stats.dl_rx_packets); | |
31389 | + global_dbfs.dl_tx_bad_packets = debugfs_create_u64 | |
31390 | + ("dl_tx_bad_packets", S_IRUSR | S_IRGRP | S_IROTH, | |
31391 | + sfc_debugfs_root, &global_stats.dl_tx_bad_packets); | |
31392 | +#endif | |
31393 | +} | |
31394 | + | |
31395 | + | |
31396 | +void netback_accel_debugfs_fini(void) | |
31397 | +{ | |
31398 | +#if defined(CONFIG_DEBUG_FS) | |
31399 | + debugfs_remove(global_dbfs.num_fwds); | |
31400 | + debugfs_remove(global_dbfs.dl_tx_packets); | |
31401 | + debugfs_remove(global_dbfs.dl_rx_packets); | |
31402 | + debugfs_remove(global_dbfs.dl_tx_bad_packets); | |
31403 | + | |
31404 | + debugfs_remove(sfc_debugfs_root); | |
31405 | +#endif | |
31406 | +} | |
31407 | + | |
31408 | + | |
31409 | +int netback_accel_debugfs_create(struct netback_accel *bend) | |
31410 | +{ | |
31411 | +#if defined(CONFIG_DEBUG_FS) | |
31412 | + /* Smallest length is 7 (vif0.0\n) */ | |
31413 | + int length = 7, temp; | |
31414 | + | |
31415 | + if (sfc_debugfs_root == NULL) | |
31416 | + return -ENOENT; | |
31417 | + | |
31418 | + /* Work out length of string representation of far_end and vif_num */ | |
31419 | + temp = bend->far_end; | |
31420 | + while (temp > 9) { | |
31421 | + length++; | |
31422 | + temp = temp / 10; | |
31423 | + } | |
31424 | + temp = bend->vif_num; | |
31425 | + while (temp > 9) { | |
31426 | + length++; | |
31427 | + temp = temp / 10; | |
31428 | + } | |
31429 | + | |
31430 | + bend->dbfs_dir_name = kmalloc(length, GFP_KERNEL); | |
31431 | + if (bend->dbfs_dir_name == NULL) | |
31432 | + return -ENOMEM; | |
31433 | + sprintf(bend->dbfs_dir_name, "vif%d.%d", bend->far_end, bend->vif_num); | |
31434 | + | |
31435 | + bend->dbfs_dir = debugfs_create_dir(bend->dbfs_dir_name, | |
31436 | + sfc_debugfs_root); | |
31437 | + if (bend->dbfs_dir == NULL) { | |
31438 | + kfree(bend->dbfs_dir_name); | |
31439 | + return -ENOMEM; | |
31440 | + } | |
31441 | + | |
31442 | +#if NETBACK_ACCEL_STATS | |
31443 | + bend->dbfs.evq_wakeups = debugfs_create_u64 | |
31444 | + ("evq_wakeups", S_IRUSR | S_IRGRP | S_IROTH, | |
31445 | + bend->dbfs_dir, &bend->stats.evq_wakeups); | |
31446 | + bend->dbfs.evq_timeouts = debugfs_create_u64 | |
31447 | + ("evq_timeouts", S_IRUSR | S_IRGRP | S_IROTH, | |
31448 | + bend->dbfs_dir, &bend->stats.evq_timeouts); | |
31449 | + bend->dbfs.num_filters = debugfs_create_u32 | |
31450 | + ("num_filters", S_IRUSR | S_IRGRP | S_IROTH, | |
31451 | + bend->dbfs_dir, &bend->stats.num_filters); | |
31452 | + bend->dbfs.num_buffer_pages = debugfs_create_u32 | |
31453 | + ("num_buffer_pages", S_IRUSR | S_IRGRP | S_IROTH, | |
31454 | + bend->dbfs_dir, &bend->stats.num_buffer_pages); | |
31455 | +#endif | |
31456 | +#endif | |
31457 | + return 0; | |
31458 | +} | |
31459 | + | |
31460 | + | |
31461 | +int netback_accel_debugfs_remove(struct netback_accel *bend) | |
31462 | +{ | |
31463 | +#if defined(CONFIG_DEBUG_FS) | |
31464 | + if (bend->dbfs_dir != NULL) { | |
31465 | +#if NETBACK_ACCEL_STATS | |
31466 | + debugfs_remove(bend->dbfs.evq_wakeups); | |
31467 | + debugfs_remove(bend->dbfs.evq_timeouts); | |
31468 | + debugfs_remove(bend->dbfs.num_filters); | |
31469 | + debugfs_remove(bend->dbfs.num_buffer_pages); | |
31470 | +#endif | |
31471 | + debugfs_remove(bend->dbfs_dir); | |
31472 | + } | |
31473 | + | |
31474 | + if (bend->dbfs_dir_name) | |
31475 | + kfree(bend->dbfs_dir_name); | |
31476 | +#endif | |
31477 | + return 0; | |
31478 | +} | |
31479 | + | |
31480 | + | |
31481 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel_fwd.c | |
31482 | =================================================================== | |
31483 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
31484 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel_fwd.c 2008-04-02 12:34:02.000000000 +0200 | |
31485 | @@ -0,0 +1,420 @@ | |
31486 | +/**************************************************************************** | |
31487 | + * Solarflare driver for Xen network acceleration | |
31488 | + * | |
31489 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
31490 | + * 9501 Jeronimo Road, Suite 250, | |
31491 | + * Irvine, CA 92618, USA | |
31492 | + * | |
31493 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
31494 | + * | |
31495 | + * This program is free software; you can redistribute it and/or modify it | |
31496 | + * under the terms of the GNU General Public License version 2 as published | |
31497 | + * by the Free Software Foundation, incorporated herein by reference. | |
31498 | + * | |
31499 | + * This program is distributed in the hope that it will be useful, | |
31500 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
31501 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
31502 | + * GNU General Public License for more details. | |
31503 | + * | |
31504 | + * You should have received a copy of the GNU General Public License | |
31505 | + * along with this program; if not, write to the Free Software | |
31506 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
31507 | + **************************************************************************** | |
31508 | + */ | |
31509 | + | |
31510 | +#include "accel.h" | |
31511 | +#include "accel_cuckoo_hash.h" | |
31512 | +#include "accel_util.h" | |
31513 | +#include "accel_solarflare.h" | |
31514 | + | |
31515 | +#include "driverlink_api.h" | |
31516 | + | |
31517 | +#include <linux/if_arp.h> | |
31518 | +#include <linux/skbuff.h> | |
31519 | +#include <linux/list.h> | |
31520 | + | |
31521 | +/* State stored in the forward table */ | |
31522 | +struct fwd_struct { | |
31523 | + struct list_head link; /* Forms list */ | |
31524 | + void * context; | |
31525 | + __u8 valid; | |
31526 | + __u8 mac[ETH_ALEN]; | |
31527 | +}; | |
31528 | + | |
31529 | +/* Max value we support */ | |
31530 | +#define NUM_FWDS_BITS 8 | |
31531 | +#define NUM_FWDS (1 << NUM_FWDS_BITS) | |
31532 | +#define FWD_MASK (NUM_FWDS - 1) | |
31533 | + | |
31534 | +struct port_fwd { | |
31535 | + /* Make a list */ | |
31536 | + struct list_head link; | |
31537 | + /* Hash table to store the fwd_structs */ | |
31538 | + cuckoo_hash_table fwd_hash_table; | |
31539 | + /* The array of fwd_structs */ | |
31540 | + struct fwd_struct *fwd_array; | |
31541 | + /* Linked list of entries in use. */ | |
31542 | + struct list_head fwd_list; | |
31543 | + /* Could do something clever with a reader/writer lock. */ | |
31544 | + spinlock_t fwd_lock; | |
31545 | + /* Make find_free_entry() a bit faster by caching this */ | |
31546 | + int last_free_index; | |
31547 | +}; | |
31548 | + | |
31549 | +/* | |
31550 | + * This is unlocked as it's only called from dl probe and remove, | |
31551 | + * which are themselves synchronised. Could get rid of it entirely as | |
31552 | + * it's never iterated, but useful for debug | |
31553 | + */ | |
31554 | +static struct list_head port_fwds; | |
31555 | + | |
31556 | + | |
31557 | +/* Search the fwd_array for an unused entry */ | |
31558 | +static int fwd_find_free_entry(struct port_fwd *fwd_set) | |
31559 | +{ | |
31560 | + int index = fwd_set->last_free_index; | |
31561 | + | |
31562 | + do { | |
31563 | + if (!fwd_set->fwd_array[index].valid) { | |
31564 | + fwd_set->last_free_index = index; | |
31565 | + return index; | |
31566 | + } | |
31567 | + index++; | |
31568 | + if (index >= NUM_FWDS) | |
31569 | + index = 0; | |
31570 | + } while (index != fwd_set->last_free_index); | |
31571 | + | |
31572 | + return -ENOMEM; | |
31573 | +} | |
31574 | + | |
31575 | + | |
31576 | +/* Look up a MAC in the hash table. Caller should hold table lock. */ | |
31577 | +static inline struct fwd_struct *fwd_find_entry(const __u8 *mac, | |
31578 | + struct port_fwd *fwd_set) | |
31579 | +{ | |
31580 | + cuckoo_hash_value value; | |
31581 | + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); | |
31582 | + | |
31583 | + if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table, | |
31584 | + (cuckoo_hash_key *)(&key), | |
31585 | + &value)) { | |
31586 | + struct fwd_struct *fwd = &fwd_set->fwd_array[value]; | |
31587 | + DPRINTK_ON(memcmp(fwd->mac, mac, ETH_ALEN) != 0); | |
31588 | + return fwd; | |
31589 | + } | |
31590 | + | |
31591 | + return NULL; | |
31592 | +} | |
31593 | + | |
31594 | + | |
31595 | +/* Initialise each nic port's fowarding table */ | |
31596 | +void *netback_accel_init_fwd_port(void) | |
31597 | +{ | |
31598 | + struct port_fwd *fwd_set; | |
31599 | + | |
31600 | + fwd_set = kzalloc(sizeof(struct port_fwd), GFP_KERNEL); | |
31601 | + if (fwd_set == NULL) { | |
31602 | + return NULL; | |
31603 | + } | |
31604 | + | |
31605 | + spin_lock_init(&fwd_set->fwd_lock); | |
31606 | + | |
31607 | + fwd_set->fwd_array = kzalloc(sizeof (struct fwd_struct) * NUM_FWDS, | |
31608 | + GFP_KERNEL); | |
31609 | + if (fwd_set->fwd_array == NULL) { | |
31610 | + kfree(fwd_set); | |
31611 | + return NULL; | |
31612 | + } | |
31613 | + | |
31614 | + if (cuckoo_hash_init(&fwd_set->fwd_hash_table, NUM_FWDS_BITS, 8) != 0) { | |
31615 | + kfree(fwd_set->fwd_array); | |
31616 | + kfree(fwd_set); | |
31617 | + return NULL; | |
31618 | + } | |
31619 | + | |
31620 | + INIT_LIST_HEAD(&fwd_set->fwd_list); | |
31621 | + | |
31622 | + list_add(&fwd_set->link, &port_fwds); | |
31623 | + | |
31624 | + return fwd_set; | |
31625 | +} | |
31626 | + | |
31627 | + | |
31628 | +void netback_accel_shutdown_fwd_port(void *fwd_priv) | |
31629 | +{ | |
31630 | + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; | |
31631 | + | |
31632 | + BUG_ON(fwd_priv == NULL); | |
31633 | + | |
31634 | + BUG_ON(list_empty(&port_fwds)); | |
31635 | + list_del(&fwd_set->link); | |
31636 | + | |
31637 | + BUG_ON(!list_empty(&fwd_set->fwd_list)); | |
31638 | + | |
31639 | + cuckoo_hash_destroy(&fwd_set->fwd_hash_table); | |
31640 | + kfree(fwd_set->fwd_array); | |
31641 | + kfree(fwd_set); | |
31642 | +} | |
31643 | + | |
31644 | + | |
31645 | +int netback_accel_init_fwd() | |
31646 | +{ | |
31647 | + INIT_LIST_HEAD(&port_fwds); | |
31648 | + return 0; | |
31649 | +} | |
31650 | + | |
31651 | + | |
31652 | +void netback_accel_shutdown_fwd() | |
31653 | +{ | |
31654 | + BUG_ON(!list_empty(&port_fwds)); | |
31655 | +} | |
31656 | + | |
31657 | + | |
31658 | +/* | |
31659 | + * Add an entry to the forwarding table. Returns -ENOMEM if no | |
31660 | + * space. | |
31661 | + */ | |
31662 | +int netback_accel_fwd_add(const __u8 *mac, void *context, void *fwd_priv) | |
31663 | +{ | |
31664 | + struct fwd_struct *fwd; | |
31665 | + int rc = 0, index; | |
31666 | + unsigned long flags; | |
31667 | + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); | |
31668 | + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; | |
31669 | + | |
31670 | + BUG_ON(fwd_priv == NULL); | |
31671 | + | |
31672 | + DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac)); | |
31673 | + | |
31674 | + spin_lock_irqsave(&fwd_set->fwd_lock, flags); | |
31675 | + | |
31676 | + if ((rc = fwd_find_free_entry(fwd_set)) < 0 ) { | |
31677 | + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); | |
31678 | + return rc; | |
31679 | + } | |
31680 | + | |
31681 | + index = rc; | |
31682 | + | |
31683 | + /* Shouldn't already be in the table */ | |
31684 | + if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table, | |
31685 | + (cuckoo_hash_key *)(&key), &rc) != 0) { | |
31686 | + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); | |
31687 | + EPRINTK("MAC address " MAC_FMT " already accelerated.\n", | |
31688 | + MAC_ARG(mac)); | |
31689 | + return -EEXIST; | |
31690 | + } | |
31691 | + | |
31692 | + if ((rc = cuckoo_hash_add(&fwd_set->fwd_hash_table, | |
31693 | + (cuckoo_hash_key *)(&key), index, 1)) == 0) { | |
31694 | + fwd = &fwd_set->fwd_array[index]; | |
31695 | + fwd->valid = 1; | |
31696 | + fwd->context = context; | |
31697 | + memcpy(fwd->mac, mac, ETH_ALEN); | |
31698 | + list_add(&fwd->link, &fwd_set->fwd_list); | |
31699 | + NETBACK_ACCEL_STATS_OP(global_stats.num_fwds++); | |
31700 | + } | |
31701 | + | |
31702 | + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); | |
31703 | + | |
31704 | + /* | |
31705 | + * No need to tell frontend that this mac address is local - | |
31706 | + * it should auto-discover through packets on fastpath what is | |
31707 | + * local and what is not, and just being on same server | |
31708 | + * doesn't make it local (it could be on a different | |
31709 | + * bridge) | |
31710 | + */ | |
31711 | + | |
31712 | + return rc; | |
31713 | +} | |
31714 | + | |
31715 | + | |
31716 | +/* remove an entry from the forwarding tables. */ | |
31717 | +void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv) | |
31718 | +{ | |
31719 | + struct fwd_struct *fwd; | |
31720 | + unsigned long flags; | |
31721 | + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); | |
31722 | + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; | |
31723 | + | |
31724 | + DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac)); | |
31725 | + | |
31726 | + BUG_ON(fwd_priv == NULL); | |
31727 | + | |
31728 | + spin_lock_irqsave(&fwd_set->fwd_lock, flags); | |
31729 | + | |
31730 | + fwd = fwd_find_entry(mac, fwd_set); | |
31731 | + if (fwd != NULL) { | |
31732 | + BUG_ON(list_empty(&fwd_set->fwd_list)); | |
31733 | + list_del(&fwd->link); | |
31734 | + | |
31735 | + fwd->valid = 0; | |
31736 | + cuckoo_hash_remove(&fwd_set->fwd_hash_table, | |
31737 | + (cuckoo_hash_key *)(&key)); | |
31738 | + NETBACK_ACCEL_STATS_OP(global_stats.num_fwds--); | |
31739 | + } | |
31740 | + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); | |
31741 | + | |
31742 | + /* | |
31743 | + * No need to tell frontend that this is no longer present - | |
31744 | + * the frontend is currently only interested in remote | |
31745 | + * addresses and it works these out (mostly) by itself | |
31746 | + */ | |
31747 | +} | |
31748 | + | |
31749 | + | |
31750 | +/* Set the context pointer for a hash table entry. */ | |
31751 | +int netback_accel_fwd_set_context(const __u8 *mac, void *context, | |
31752 | + void *fwd_priv) | |
31753 | +{ | |
31754 | + struct fwd_struct *fwd; | |
31755 | + unsigned long flags; | |
31756 | + int rc = -ENOENT; | |
31757 | + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; | |
31758 | + | |
31759 | + BUG_ON(fwd_priv == NULL); | |
31760 | + | |
31761 | + spin_lock_irqsave(&fwd_set->fwd_lock, flags); | |
31762 | + fwd = fwd_find_entry(mac, fwd_set); | |
31763 | + if (fwd != NULL) { | |
31764 | + fwd->context = context; | |
31765 | + rc = 0; | |
31766 | + } | |
31767 | + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); | |
31768 | + return rc; | |
31769 | +} | |
31770 | + | |
31771 | + | |
31772 | +/************************************************************************** | |
31773 | + * Process a received packet | |
31774 | + **************************************************************************/ | |
31775 | + | |
31776 | +/* | |
31777 | + * Returns whether or not we have a match in our forward table for the | |
31778 | + * this skb. Must be called with appropriate fwd_lock already held | |
31779 | + */ | |
31780 | +static struct netback_accel *for_a_vnic(struct netback_pkt_buf *skb, | |
31781 | + struct port_fwd *fwd_set) | |
31782 | +{ | |
31783 | + struct fwd_struct *fwd; | |
31784 | + struct netback_accel *retval = NULL; | |
31785 | + | |
31786 | + fwd = fwd_find_entry(skb->mac.raw, fwd_set); | |
31787 | + if (fwd != NULL) | |
31788 | + retval = fwd->context; | |
31789 | + return retval; | |
31790 | +} | |
31791 | + | |
31792 | + | |
31793 | +static inline int packet_is_arp_reply(struct sk_buff *skb) | |
31794 | +{ | |
31795 | + return skb->protocol == ntohs(ETH_P_ARP) | |
31796 | + && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY); | |
31797 | +} | |
31798 | + | |
31799 | + | |
31800 | +static inline void hdr_to_filt(struct ethhdr *ethhdr, struct iphdr *ip, | |
31801 | + struct netback_accel_filter_spec *spec) | |
31802 | +{ | |
31803 | + spec->proto = ip->protocol; | |
31804 | + spec->destip_be = ip->daddr; | |
31805 | + memcpy(spec->mac, ethhdr->h_source, ETH_ALEN); | |
31806 | + | |
31807 | + if (ip->protocol == IPPROTO_TCP) { | |
31808 | + struct tcphdr *tcp = (struct tcphdr *)((char *)ip + 4 * ip->ihl); | |
31809 | + spec->destport_be = tcp->dest; | |
31810 | + } else { | |
31811 | + struct udphdr *udp = (struct udphdr *)((char *)ip + 4 * ip->ihl); | |
31812 | + EPRINTK_ON(ip->protocol != IPPROTO_UDP); | |
31813 | + spec->destport_be = udp->dest; | |
31814 | + } | |
31815 | +} | |
31816 | + | |
31817 | + | |
31818 | +static inline int netback_accel_can_filter(struct netback_pkt_buf *skb) | |
31819 | +{ | |
31820 | + return (skb->protocol == htons(ETH_P_IP) && | |
31821 | + ((skb->nh.iph->protocol == IPPROTO_TCP) || | |
31822 | + (skb->nh.iph->protocol == IPPROTO_UDP))); | |
31823 | +} | |
31824 | + | |
31825 | + | |
31826 | +static inline void netback_accel_filter_packet(struct netback_accel *bend, | |
31827 | + struct netback_pkt_buf *skb) | |
31828 | +{ | |
31829 | + struct netback_accel_filter_spec fs; | |
31830 | + struct ethhdr *eh = (struct ethhdr *)(skb->mac.raw); | |
31831 | + | |
31832 | + hdr_to_filt(eh, skb->nh.iph, &fs); | |
31833 | + | |
31834 | + netback_accel_filter_check_add(bend, &fs); | |
31835 | +} | |
31836 | + | |
31837 | + | |
31838 | +/* | |
31839 | + * Receive a packet and do something appropriate with it. Return true | |
31840 | + * to take exclusive ownership of the packet. This is verging on | |
31841 | + * solarflare specific | |
31842 | + */ | |
31843 | +void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv) | |
31844 | +{ | |
31845 | + struct netback_accel *bend; | |
31846 | + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; | |
31847 | + unsigned long flags; | |
31848 | + | |
31849 | + BUG_ON(fwd_priv == NULL); | |
31850 | + | |
31851 | + /* Checking for bcast is cheaper so do that first */ | |
31852 | + if (is_broadcast_ether_addr(skb->mac.raw)) { | |
31853 | + /* pass through the slow path by not claiming ownership */ | |
31854 | + return; | |
31855 | + } else if (is_multicast_ether_addr(skb->mac.raw)) { | |
31856 | + /* pass through the slow path by not claiming ownership */ | |
31857 | + return; | |
31858 | + } else { | |
31859 | + /* It is unicast */ | |
31860 | + spin_lock_irqsave(&fwd_set->fwd_lock, flags); | |
31861 | + /* We insert filter to pass it off to a VNIC */ | |
31862 | + if ((bend = for_a_vnic(skb, fwd_set)) != NULL) | |
31863 | + if (netback_accel_can_filter(skb)) | |
31864 | + netback_accel_filter_packet(bend, skb); | |
31865 | + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); | |
31866 | + } | |
31867 | + return; | |
31868 | +} | |
31869 | + | |
31870 | + | |
31871 | +void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv) | |
31872 | +{ | |
31873 | + __u8 *mac; | |
31874 | + unsigned long flags; | |
31875 | + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; | |
31876 | + struct fwd_struct *fwd; | |
31877 | + | |
31878 | + BUG_ON(fwd_priv == NULL); | |
31879 | + | |
31880 | + if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) { | |
31881 | + /* | |
31882 | + * update our fast path forwarding to reflect this | |
31883 | + * gratuitous ARP | |
31884 | + */ | |
31885 | + mac = skb->mac.raw+ETH_ALEN; | |
31886 | + | |
31887 | + DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", | |
31888 | + __FUNCTION__, MAC_ARG(mac)); | |
31889 | + | |
31890 | + spin_lock_irqsave(&fwd_set->fwd_lock, flags); | |
31891 | + /* | |
31892 | + * Might not be local, but let's tell them all it is, | |
31893 | + * and they can restore the fastpath if they continue | |
31894 | + * to get packets that way | |
31895 | + */ | |
31896 | + list_for_each_entry(fwd, &fwd_set->fwd_list, link) { | |
31897 | + struct netback_accel *bend = fwd->context; | |
31898 | + if (bend != NULL) | |
31899 | + netback_accel_msg_tx_new_localmac(bend, mac); | |
31900 | + } | |
31901 | + | |
31902 | + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); | |
31903 | + } | |
31904 | + return; | |
31905 | +} | |
31906 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel_msg.c | |
31907 | =================================================================== | |
31908 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
31909 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel_msg.c 2008-02-20 09:32:49.000000000 +0100 | |
31910 | @@ -0,0 +1,392 @@ | |
31911 | +/**************************************************************************** | |
31912 | + * Solarflare driver for Xen network acceleration | |
31913 | + * | |
31914 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
31915 | + * 9501 Jeronimo Road, Suite 250, | |
31916 | + * Irvine, CA 92618, USA | |
31917 | + * | |
31918 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
31919 | + * | |
31920 | + * This program is free software; you can redistribute it and/or modify it | |
31921 | + * under the terms of the GNU General Public License version 2 as published | |
31922 | + * by the Free Software Foundation, incorporated herein by reference. | |
31923 | + * | |
31924 | + * This program is distributed in the hope that it will be useful, | |
31925 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
31926 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
31927 | + * GNU General Public License for more details. | |
31928 | + * | |
31929 | + * You should have received a copy of the GNU General Public License | |
31930 | + * along with this program; if not, write to the Free Software | |
31931 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
31932 | + **************************************************************************** | |
31933 | + */ | |
31934 | + | |
31935 | +#include <xen/evtchn.h> | |
31936 | + | |
31937 | +#include "accel.h" | |
31938 | +#include "accel_msg_iface.h" | |
31939 | +#include "accel_util.h" | |
31940 | +#include "accel_solarflare.h" | |
31941 | + | |
31942 | +/* Send a HELLO to front end to start things off */ | |
31943 | +void netback_accel_msg_tx_hello(struct netback_accel *bend, unsigned version) | |
31944 | +{ | |
31945 | + unsigned long lock_state; | |
31946 | + struct net_accel_msg *msg = | |
31947 | + net_accel_msg_start_send(bend->shared_page, | |
31948 | + &bend->to_domU, &lock_state); | |
31949 | + /* The queue _cannot_ be full, we're the first users. */ | |
31950 | + EPRINTK_ON(msg == NULL); | |
31951 | + | |
31952 | + if (msg != NULL) { | |
31953 | + net_accel_msg_init(msg, NET_ACCEL_MSG_HELLO); | |
31954 | + msg->u.hello.version = version; | |
31955 | + msg->u.hello.max_pages = bend->quotas.max_buf_pages; | |
31956 | + VPRINTK("Sending hello to channel %d\n", bend->msg_channel); | |
31957 | + net_accel_msg_complete_send_notify(bend->shared_page, | |
31958 | + &bend->to_domU, | |
31959 | + &lock_state, | |
31960 | + bend->msg_channel_irq); | |
31961 | + } | |
31962 | +} | |
31963 | + | |
31964 | +/* Send a local mac message to vnic */ | |
31965 | +static void netback_accel_msg_tx_localmac(struct netback_accel *bend, | |
31966 | + int type, const void *mac) | |
31967 | +{ | |
31968 | + unsigned long lock_state; | |
31969 | + struct net_accel_msg *msg; | |
31970 | + | |
31971 | + BUG_ON(bend == NULL || mac == NULL); | |
31972 | + | |
31973 | + VPRINTK("Sending local mac message: " MAC_FMT "\n", | |
31974 | + MAC_ARG((const char *)mac)); | |
31975 | + | |
31976 | + msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU, | |
31977 | + &lock_state); | |
31978 | + | |
31979 | + if (msg != NULL) { | |
31980 | + net_accel_msg_init(msg, NET_ACCEL_MSG_LOCALMAC); | |
31981 | + msg->u.localmac.flags = type; | |
31982 | + memcpy(msg->u.localmac.mac, mac, ETH_ALEN); | |
31983 | + net_accel_msg_complete_send_notify(bend->shared_page, | |
31984 | + &bend->to_domU, | |
31985 | + &lock_state, | |
31986 | + bend->msg_channel_irq); | |
31987 | + } else { | |
31988 | + /* | |
31989 | + * TODO if this happens we may leave a domU | |
31990 | + * fastpathing packets when they should be delivered | |
31991 | + * locally. Solution is get domU to timeout entries | |
31992 | + * in its fastpath lookup table when it receives no RX | |
31993 | + * traffic | |
31994 | + */ | |
31995 | + EPRINTK("%s: saw full queue, may need ARP timer to recover\n", | |
31996 | + __FUNCTION__); | |
31997 | + } | |
31998 | +} | |
31999 | + | |
32000 | +/* Send an add local mac message to vnic */ | |
32001 | +void netback_accel_msg_tx_new_localmac(struct netback_accel *bend, | |
32002 | + const void *mac) | |
32003 | +{ | |
32004 | + netback_accel_msg_tx_localmac(bend, NET_ACCEL_MSG_ADD, mac); | |
32005 | +} | |
32006 | + | |
32007 | + | |
32008 | +static int netback_accel_msg_rx_buffer_map(struct netback_accel *bend, | |
32009 | + struct net_accel_msg *msg) | |
32010 | +{ | |
32011 | + int log2_pages, rc; | |
32012 | + | |
32013 | + /* Can only allocate in power of two */ | |
32014 | + log2_pages = log2_ge(msg->u.mapbufs.pages, 0); | |
32015 | + if (msg->u.mapbufs.pages != pow2(log2_pages)) { | |
32016 | + EPRINTK("%s: Can only alloc bufs in power of 2 sizes (%d)\n", | |
32017 | + __FUNCTION__, msg->u.mapbufs.pages); | |
32018 | + rc = -EINVAL; | |
32019 | + goto err_out; | |
32020 | + } | |
32021 | + | |
32022 | + /* | |
32023 | + * Sanity. Assumes NET_ACCEL_MSG_MAX_PAGE_REQ is same for | |
32024 | + * both directions/domains | |
32025 | + */ | |
32026 | + if (msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ) { | |
32027 | + EPRINTK("%s: too many pages in a single message: %d %d\n", | |
32028 | + __FUNCTION__, msg->u.mapbufs.pages, | |
32029 | + NET_ACCEL_MSG_MAX_PAGE_REQ); | |
32030 | + rc = -EINVAL; | |
32031 | + goto err_out; | |
32032 | + } | |
32033 | + | |
32034 | + if ((rc = netback_accel_add_buffers(bend, msg->u.mapbufs.pages, | |
32035 | + log2_pages, msg->u.mapbufs.grants, | |
32036 | + &msg->u.mapbufs.buf)) < 0) { | |
32037 | + goto err_out; | |
32038 | + } | |
32039 | + | |
32040 | + msg->id |= NET_ACCEL_MSG_REPLY; | |
32041 | + | |
32042 | + return 0; | |
32043 | + | |
32044 | + err_out: | |
32045 | + EPRINTK("%s: err_out\n", __FUNCTION__); | |
32046 | + msg->id |= NET_ACCEL_MSG_ERROR | NET_ACCEL_MSG_REPLY; | |
32047 | + return rc; | |
32048 | +} | |
32049 | + | |
32050 | + | |
32051 | +/* Hint from frontend that one of our filters is out of date */ | |
32052 | +static int netback_accel_process_fastpath(struct netback_accel *bend, | |
32053 | + struct net_accel_msg *msg) | |
32054 | +{ | |
32055 | + struct netback_accel_filter_spec spec; | |
32056 | + | |
32057 | + if (msg->u.fastpath.flags & NET_ACCEL_MSG_REMOVE) { | |
32058 | + /* | |
32059 | + * Would be nice to BUG() this but would leave us | |
32060 | + * vulnerable to naughty frontend | |
32061 | + */ | |
32062 | + EPRINTK_ON(msg->u.fastpath.flags & NET_ACCEL_MSG_ADD); | |
32063 | + | |
32064 | + memcpy(spec.mac, msg->u.fastpath.mac, ETH_ALEN); | |
32065 | + spec.destport_be = msg->u.fastpath.port; | |
32066 | + spec.destip_be = msg->u.fastpath.ip; | |
32067 | + spec.proto = msg->u.fastpath.proto; | |
32068 | + | |
32069 | + netback_accel_filter_remove_spec(bend, &spec); | |
32070 | + } | |
32071 | + | |
32072 | + return 0; | |
32073 | +} | |
32074 | + | |
32075 | + | |
32076 | +/* Flow control for message queues */ | |
32077 | +inline void set_queue_not_full(struct netback_accel *bend) | |
32078 | +{ | |
32079 | + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B, | |
32080 | + (unsigned long *)&bend->shared_page->aflags)) | |
32081 | + notify_remote_via_irq(bend->msg_channel_irq); | |
32082 | + else | |
32083 | + VPRINTK("queue not full bit already set, not signalling\n"); | |
32084 | +} | |
32085 | + | |
32086 | + | |
32087 | +/* Flow control for message queues */ | |
32088 | +inline void set_queue_full(struct netback_accel *bend) | |
32089 | +{ | |
32090 | + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B, | |
32091 | + (unsigned long *)&bend->shared_page->aflags)) | |
32092 | + notify_remote_via_irq(bend->msg_channel_irq); | |
32093 | + else | |
32094 | + VPRINTK("queue full bit already set, not signalling\n"); | |
32095 | +} | |
32096 | + | |
32097 | + | |
32098 | +void netback_accel_set_interface_state(struct netback_accel *bend, int up) | |
32099 | +{ | |
32100 | + bend->shared_page->net_dev_up = up; | |
32101 | + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B, | |
32102 | + (unsigned long *)&bend->shared_page->aflags)) | |
32103 | + notify_remote_via_irq(bend->msg_channel_irq); | |
32104 | + else | |
32105 | + VPRINTK("interface up/down bit already set, not signalling\n"); | |
32106 | +} | |
32107 | + | |
32108 | + | |
32109 | +static int check_rx_hello_version(unsigned version) | |
32110 | +{ | |
32111 | + /* Should only happen if there's been a version mismatch */ | |
32112 | + BUG_ON(version == NET_ACCEL_MSG_VERSION); | |
32113 | + | |
32114 | + if (version > NET_ACCEL_MSG_VERSION) { | |
32115 | + /* Newer protocol, we must refuse */ | |
32116 | + return -EPROTO; | |
32117 | + } | |
32118 | + | |
32119 | + if (version < NET_ACCEL_MSG_VERSION) { | |
32120 | + /* | |
32121 | + * We are newer, so have discretion to accept if we | |
32122 | + * wish. For now however, just reject | |
32123 | + */ | |
32124 | + return -EPROTO; | |
32125 | + } | |
32126 | + | |
32127 | + return -EINVAL; | |
32128 | +} | |
32129 | + | |
32130 | + | |
32131 | +static int process_rx_msg(struct netback_accel *bend, | |
32132 | + struct net_accel_msg *msg) | |
32133 | +{ | |
32134 | + int err = 0; | |
32135 | + | |
32136 | + switch (msg->id) { | |
32137 | + case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO: | |
32138 | + /* Reply to a HELLO; mark ourselves as connected */ | |
32139 | + DPRINTK("got Hello reply, version %.8x\n", | |
32140 | + msg->u.hello.version); | |
32141 | + | |
32142 | + /* | |
32143 | + * Check that we've not successfully done this | |
32144 | + * already. NB no check at the moment that this reply | |
32145 | + * comes after we've actually sent a HELLO as that's | |
32146 | + * not possible with the current code structure | |
32147 | + */ | |
32148 | + if (bend->hw_state != NETBACK_ACCEL_RES_NONE) | |
32149 | + return -EPROTO; | |
32150 | + | |
32151 | + /* Store max_pages for accel_setup */ | |
32152 | + if (msg->u.hello.max_pages > bend->quotas.max_buf_pages) { | |
32153 | + EPRINTK("More pages than quota allows (%d > %d)\n", | |
32154 | + msg->u.hello.max_pages, | |
32155 | + bend->quotas.max_buf_pages); | |
32156 | + /* Force it down to the quota */ | |
32157 | + msg->u.hello.max_pages = bend->quotas.max_buf_pages; | |
32158 | + } | |
32159 | + bend->max_pages = msg->u.hello.max_pages; | |
32160 | + | |
32161 | + /* Set up the hardware visible to the other end */ | |
32162 | + err = bend->accel_setup(bend); | |
32163 | + if (err) { | |
32164 | + /* This is fatal */ | |
32165 | + DPRINTK("Hello gave accel_setup error %d\n", err); | |
32166 | + netback_accel_set_closing(bend); | |
32167 | + } else { | |
32168 | + /* | |
32169 | + * Now add the context so that packet | |
32170 | + * forwarding will commence | |
32171 | + */ | |
32172 | + netback_accel_fwd_set_context(bend->mac, bend, | |
32173 | + bend->fwd_priv); | |
32174 | + } | |
32175 | + break; | |
32176 | + case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_ERROR: | |
32177 | + EPRINTK("got Hello error, versions us:%.8x them:%.8x\n", | |
32178 | + NET_ACCEL_MSG_VERSION, msg->u.hello.version); | |
32179 | + | |
32180 | + if (bend->hw_state != NETBACK_ACCEL_RES_NONE) | |
32181 | + return -EPROTO; | |
32182 | + | |
32183 | + if (msg->u.hello.version != NET_ACCEL_MSG_VERSION) { | |
32184 | + /* Error is due to version mismatch */ | |
32185 | + err = check_rx_hello_version(msg->u.hello.version); | |
32186 | + if (err == 0) { | |
32187 | + /* | |
32188 | + * It's OK to be compatible, send | |
32189 | + * another hello with compatible version | |
32190 | + */ | |
32191 | + netback_accel_msg_tx_hello | |
32192 | + (bend, msg->u.hello.version); | |
32193 | + } else { | |
32194 | + /* | |
32195 | + * Tell frontend that we're not going to | |
32196 | + * send another HELLO by going to Closing. | |
32197 | + */ | |
32198 | + netback_accel_set_closing(bend); | |
32199 | + } | |
32200 | + } | |
32201 | + break; | |
32202 | + case NET_ACCEL_MSG_MAPBUF: | |
32203 | + VPRINTK("Got mapped buffers request %d\n", | |
32204 | + msg->u.mapbufs.reqid); | |
32205 | + | |
32206 | + if (bend->hw_state == NETBACK_ACCEL_RES_NONE) | |
32207 | + return -EPROTO; | |
32208 | + | |
32209 | + /* | |
32210 | + * Frontend wants a buffer table entry for the | |
32211 | + * supplied pages | |
32212 | + */ | |
32213 | + err = netback_accel_msg_rx_buffer_map(bend, msg); | |
32214 | + if (net_accel_msg_reply_notify(bend->shared_page, | |
32215 | + bend->msg_channel_irq, | |
32216 | + &bend->to_domU, msg)) { | |
32217 | + /* | |
32218 | + * This is fatal as we can't tell the frontend | |
32219 | + * about the problem through the message | |
32220 | + * queue, and so would otherwise stalemate | |
32221 | + */ | |
32222 | + netback_accel_set_closing(bend); | |
32223 | + } | |
32224 | + break; | |
32225 | + case NET_ACCEL_MSG_FASTPATH: | |
32226 | + DPRINTK("Got fastpath request\n"); | |
32227 | + | |
32228 | + if (bend->hw_state == NETBACK_ACCEL_RES_NONE) | |
32229 | + return -EPROTO; | |
32230 | + | |
32231 | + err = netback_accel_process_fastpath(bend, msg); | |
32232 | + break; | |
32233 | + default: | |
32234 | + EPRINTK("Huh? Message code is %x\n", msg->id); | |
32235 | + err = -EPROTO; | |
32236 | + break; | |
32237 | + } | |
32238 | + return err; | |
32239 | +} | |
32240 | + | |
32241 | + | |
32242 | +/* Demultiplex an IRQ from the frontend driver. */ | |
32243 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
32244 | +void netback_accel_msg_rx_handler(struct work_struct *arg) | |
32245 | +#else | |
32246 | +void netback_accel_msg_rx_handler(void *bend_void) | |
32247 | +#endif | |
32248 | +{ | |
32249 | + struct net_accel_msg msg; | |
32250 | + int err, queue_was_full = 0; | |
32251 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
32252 | + struct netback_accel *bend = | |
32253 | + container_of(arg, struct netback_accel, handle_msg); | |
32254 | +#else | |
32255 | + struct netback_accel *bend = (struct netback_accel *)bend_void; | |
32256 | +#endif | |
32257 | + | |
32258 | + mutex_lock(&bend->bend_mutex); | |
32259 | + | |
32260 | + /* | |
32261 | + * This happens when the shared pages have been unmapped, but | |
32262 | + * the workqueue not flushed yet | |
32263 | + */ | |
32264 | + if (bend->shared_page == NULL) | |
32265 | + goto done; | |
32266 | + | |
32267 | + if ((bend->shared_page->aflags & | |
32268 | + NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK) != 0) { | |
32269 | + if (bend->shared_page->aflags & | |
32270 | + NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL) { | |
32271 | + /* We've been told there may now be space. */ | |
32272 | + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B, | |
32273 | + (unsigned long *)&bend->shared_page->aflags); | |
32274 | + } | |
32275 | + | |
32276 | + if (bend->shared_page->aflags & | |
32277 | + NET_ACCEL_MSG_AFLAGS_QUEUEUFULL) { | |
32278 | + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B, | |
32279 | + (unsigned long *)&bend->shared_page->aflags); | |
32280 | + queue_was_full = 1; | |
32281 | + } | |
32282 | + } | |
32283 | + | |
32284 | + while ((err = net_accel_msg_recv(bend->shared_page, &bend->from_domU, | |
32285 | + &msg)) == 0) { | |
32286 | + err = process_rx_msg(bend, &msg); | |
32287 | + | |
32288 | + if (err != 0) { | |
32289 | + EPRINTK("%s: Error %d\n", __FUNCTION__, err); | |
32290 | + goto err; | |
32291 | + } | |
32292 | + } | |
32293 | + | |
32294 | + err: | |
32295 | + /* There will be space now if we can make any. */ | |
32296 | + if (queue_was_full) | |
32297 | + set_queue_not_full(bend); | |
32298 | + done: | |
32299 | + mutex_unlock(&bend->bend_mutex); | |
32300 | + | |
32301 | + return; | |
32302 | +} | |
32303 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.c | |
32304 | =================================================================== | |
32305 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
32306 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.c 2008-02-20 09:32:49.000000000 +0100 | |
32307 | @@ -0,0 +1,1253 @@ | |
32308 | +/**************************************************************************** | |
32309 | + * Solarflare driver for Xen network acceleration | |
32310 | + * | |
32311 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
32312 | + * 9501 Jeronimo Road, Suite 250, | |
32313 | + * Irvine, CA 92618, USA | |
32314 | + * | |
32315 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
32316 | + * | |
32317 | + * This program is free software; you can redistribute it and/or modify it | |
32318 | + * under the terms of the GNU General Public License version 2 as published | |
32319 | + * by the Free Software Foundation, incorporated herein by reference. | |
32320 | + * | |
32321 | + * This program is distributed in the hope that it will be useful, | |
32322 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
32323 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
32324 | + * GNU General Public License for more details. | |
32325 | + * | |
32326 | + * You should have received a copy of the GNU General Public License | |
32327 | + * along with this program; if not, write to the Free Software | |
32328 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
32329 | + **************************************************************************** | |
32330 | + */ | |
32331 | + | |
32332 | +#include "common.h" | |
32333 | + | |
32334 | +#include "accel.h" | |
32335 | +#include "accel_solarflare.h" | |
32336 | +#include "accel_msg_iface.h" | |
32337 | +#include "accel_util.h" | |
32338 | + | |
32339 | +#include "accel_cuckoo_hash.h" | |
32340 | + | |
32341 | +#include "ci/driver/resource/efx_vi.h" | |
32342 | + | |
32343 | +#include "ci/efrm/nic_table.h" | |
32344 | +#include "ci/efhw/public.h" | |
32345 | + | |
32346 | +#include <xen/evtchn.h> | |
32347 | +#include <xen/driver_util.h> | |
32348 | +#include <linux/list.h> | |
32349 | +#include <linux/mutex.h> | |
32350 | + | |
32351 | +#include "driverlink_api.h" | |
32352 | + | |
32353 | +#define SF_XEN_RX_USR_BUF_SIZE 2048 | |
32354 | + | |
32355 | +struct falcon_bend_accel_priv { | |
32356 | + struct efx_vi_state *efx_vih; | |
32357 | + | |
32358 | + /*! Array of pointers to dma_map state, used so VNIC can | |
32359 | + * request their removal in a single message | |
32360 | + */ | |
32361 | + struct efx_vi_dma_map_state **dma_maps; | |
32362 | + /*! Index into dma_maps */ | |
32363 | + int dma_maps_index; | |
32364 | + | |
32365 | + /*! Serialises access to filters */ | |
32366 | + spinlock_t filter_lock; | |
32367 | + /*! Bitmap of which filters are free */ | |
32368 | + unsigned long free_filters; | |
32369 | + /*! Used for index normalisation */ | |
32370 | + u32 filter_idx_mask; | |
32371 | + struct netback_accel_filter_spec *fspecs; | |
32372 | + cuckoo_hash_table filter_hash_table; | |
32373 | + | |
32374 | + u32 txdmaq_gnt; | |
32375 | + u32 rxdmaq_gnt; | |
32376 | + u32 doorbell_gnt; | |
32377 | + u32 evq_rptr_gnt; | |
32378 | + u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES]; | |
32379 | + u32 evq_npages; | |
32380 | +}; | |
32381 | + | |
32382 | +/* Forward declaration */ | |
32383 | +static int netback_accel_filter_init(struct netback_accel *); | |
32384 | +static void netback_accel_filter_shutdown(struct netback_accel *); | |
32385 | + | |
32386 | +/************************************************************************** | |
32387 | + * | |
32388 | + * Driverlink stuff | |
32389 | + * | |
32390 | + **************************************************************************/ | |
32391 | + | |
32392 | +struct driverlink_port { | |
32393 | + struct list_head link; | |
32394 | + enum net_accel_hw_type type; | |
32395 | + struct net_device *net_dev; | |
32396 | + struct efx_dl_device *efx_dl_dev; | |
32397 | + int nic_index; | |
32398 | + void *fwd_priv; | |
32399 | +}; | |
32400 | + | |
32401 | +static struct list_head dl_ports; | |
32402 | + | |
32403 | +/* This mutex protects global state, such as the dl_ports list */ | |
32404 | +DEFINE_MUTEX(accel_mutex); | |
32405 | + | |
32406 | +static int init_done = 0; | |
32407 | + | |
32408 | +/* The DL callbacks */ | |
32409 | + | |
32410 | + | |
32411 | +#if defined(EFX_USE_FASTCALL) | |
32412 | +static enum efx_veto fastcall | |
32413 | +#else | |
32414 | +static enum efx_veto | |
32415 | +#endif | |
32416 | +bend_dl_tx_packet(struct efx_dl_device *efx_dl_dev, | |
32417 | + struct sk_buff *skb) | |
32418 | +{ | |
32419 | + struct driverlink_port *port = efx_dl_dev->priv; | |
32420 | + | |
32421 | + BUG_ON(port == NULL); | |
32422 | + | |
32423 | + NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++); | |
32424 | + if (skb->mac.raw != NULL) | |
32425 | + netback_accel_tx_packet(skb, port->fwd_priv); | |
32426 | + else { | |
32427 | + DPRINTK("Ignoring packet with missing mac address\n"); | |
32428 | + NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_bad_packets++); | |
32429 | + } | |
32430 | + return EFX_ALLOW_PACKET; | |
32431 | +} | |
32432 | + | |
32433 | +/* EFX_USE_FASTCALL */ | |
32434 | +#if defined(EFX_USE_FASTCALL) | |
32435 | +static enum efx_veto fastcall | |
32436 | +#else | |
32437 | +static enum efx_veto | |
32438 | +#endif | |
32439 | +bend_dl_rx_packet(struct efx_dl_device *efx_dl_dev, | |
32440 | + const char *pkt_buf, int pkt_len) | |
32441 | +{ | |
32442 | + struct driverlink_port *port = efx_dl_dev->priv; | |
32443 | + struct netback_pkt_buf pkt; | |
32444 | + struct ethhdr *eh; | |
32445 | + | |
32446 | + BUG_ON(port == NULL); | |
32447 | + | |
32448 | + pkt.mac.raw = (char *)pkt_buf; | |
32449 | + pkt.nh.raw = (char *)pkt_buf + ETH_HLEN; | |
32450 | + eh = (struct ethhdr *)pkt_buf; | |
32451 | + pkt.protocol = eh->h_proto; | |
32452 | + | |
32453 | + NETBACK_ACCEL_STATS_OP(global_stats.dl_rx_packets++); | |
32454 | + netback_accel_rx_packet(&pkt, port->fwd_priv); | |
32455 | + return EFX_ALLOW_PACKET; | |
32456 | +} | |
32457 | + | |
32458 | + | |
32459 | +/* Callbacks we'd like to get from the netdriver through driverlink */ | |
32460 | +struct efx_dl_callbacks bend_dl_callbacks = | |
32461 | + { | |
32462 | + .tx_packet = bend_dl_tx_packet, | |
32463 | + .rx_packet = bend_dl_rx_packet, | |
32464 | + }; | |
32465 | + | |
32466 | + | |
32467 | +static struct netback_accel_hooks accel_hooks = { | |
32468 | + THIS_MODULE, | |
32469 | + &netback_accel_probe, | |
32470 | + &netback_accel_remove | |
32471 | +}; | |
32472 | + | |
32473 | + | |
32474 | +/* | |
32475 | + * Handy helper which given an efx_dl_device works out which | |
32476 | + * efab_nic_t index into efrm_nic_table.nics[] it corresponds to | |
32477 | + */ | |
32478 | +static int efx_device_to_efab_nic_index(struct efx_dl_device *efx_dl_dev) | |
32479 | +{ | |
32480 | + int i; | |
32481 | + | |
32482 | + for (i = 0; i < EFHW_MAX_NR_DEVS; i++) { | |
32483 | + struct efhw_nic *nic = efrm_nic_table.nic[i]; | |
32484 | + | |
32485 | + /* | |
32486 | + * It's possible for the nic structure to have not | |
32487 | + * been initialised if the resource driver failed its | |
32488 | + * driverlink probe | |
32489 | + */ | |
32490 | + if (nic == NULL || nic->net_driver_dev == NULL) | |
32491 | + continue; | |
32492 | + | |
32493 | + /* Work out if these are talking about the same NIC */ | |
32494 | + if (nic->net_driver_dev->pci_dev == efx_dl_dev->pci_dev) | |
32495 | + return i; | |
32496 | + } | |
32497 | + | |
32498 | + return -1; | |
32499 | +} | |
32500 | + | |
32501 | + | |
32502 | +/* Driver link probe - register our callbacks */ | |
32503 | +static int bend_dl_probe(struct efx_dl_device *efx_dl_dev, | |
32504 | + const struct net_device *net_dev, | |
32505 | + const struct efx_dl_device_info *dev_info, | |
32506 | + const char* silicon_rev) | |
32507 | +{ | |
32508 | + int rc; | |
32509 | + enum net_accel_hw_type type; | |
32510 | + struct driverlink_port *port; | |
32511 | + | |
32512 | + DPRINTK("%s: %s\n", __FUNCTION__, silicon_rev); | |
32513 | + | |
32514 | + if (strcmp(silicon_rev, "falcon/a1") == 0) | |
32515 | + type = NET_ACCEL_MSG_HWTYPE_FALCON_A; | |
32516 | + else if (strcmp(silicon_rev, "falcon/b0") == 0) | |
32517 | + type = NET_ACCEL_MSG_HWTYPE_FALCON_B; | |
32518 | + else { | |
32519 | + EPRINTK("%s: unsupported silicon %s\n", __FUNCTION__, | |
32520 | + silicon_rev); | |
32521 | + rc = -EINVAL; | |
32522 | + goto fail1; | |
32523 | + } | |
32524 | + | |
32525 | + port = kmalloc(sizeof(struct driverlink_port), GFP_KERNEL); | |
32526 | + if (port == NULL) { | |
32527 | + EPRINTK("%s: no memory for dl probe\n", __FUNCTION__); | |
32528 | + rc = -ENOMEM; | |
32529 | + goto fail1; | |
32530 | + } | |
32531 | + | |
32532 | + port->efx_dl_dev = efx_dl_dev; | |
32533 | + efx_dl_dev->priv = port; | |
32534 | + | |
32535 | + port->nic_index = efx_device_to_efab_nic_index(efx_dl_dev); | |
32536 | + if (port->nic_index < 0) { | |
32537 | + /* | |
32538 | + * This can happen in theory if the resource driver | |
32539 | + * failed to initialise properly | |
32540 | + */ | |
32541 | + EPRINTK("%s: nic structure not found\n", __FUNCTION__); | |
32542 | + rc = -EINVAL; | |
32543 | + goto fail2; | |
32544 | + } | |
32545 | + | |
32546 | + port->fwd_priv = netback_accel_init_fwd_port(); | |
32547 | + if (port->fwd_priv == NULL) { | |
32548 | + EPRINTK("%s: failed to set up forwarding for port\n", | |
32549 | + __FUNCTION__); | |
32550 | + rc = -ENOMEM; | |
32551 | + goto fail2; | |
32552 | + } | |
32553 | + | |
32554 | + rc = efx_dl_register_callbacks(efx_dl_dev, &bend_dl_callbacks); | |
32555 | + if (rc != 0) { | |
32556 | + EPRINTK("%s: register_callbacks failed\n", __FUNCTION__); | |
32557 | + goto fail3; | |
32558 | + } | |
32559 | + | |
32560 | + port->type = type; | |
32561 | + port->net_dev = (struct net_device *)net_dev; | |
32562 | + | |
32563 | + mutex_lock(&accel_mutex); | |
32564 | + list_add(&port->link, &dl_ports); | |
32565 | + mutex_unlock(&accel_mutex); | |
32566 | + | |
32567 | + rc = netback_connect_accelerator(NETBACK_ACCEL_VERSION, 0, | |
32568 | + port->net_dev->name, &accel_hooks); | |
32569 | + | |
32570 | + if (rc < 0) { | |
32571 | + EPRINTK("Xen netback accelerator version mismatch\n"); | |
32572 | + goto fail4; | |
32573 | + } else if (rc > 0) { | |
32574 | + /* | |
32575 | + * In future may want to add backwards compatibility | |
32576 | + * and accept certain subsets of previous versions | |
32577 | + */ | |
32578 | + EPRINTK("Xen netback accelerator version mismatch\n"); | |
32579 | + goto fail4; | |
32580 | + } | |
32581 | + | |
32582 | + return 0; | |
32583 | + | |
32584 | + fail4: | |
32585 | + mutex_lock(&accel_mutex); | |
32586 | + list_del(&port->link); | |
32587 | + mutex_unlock(&accel_mutex); | |
32588 | + | |
32589 | + efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks); | |
32590 | + fail3: | |
32591 | + netback_accel_shutdown_fwd_port(port->fwd_priv); | |
32592 | + fail2: | |
32593 | + efx_dl_dev->priv = NULL; | |
32594 | + kfree(port); | |
32595 | + fail1: | |
32596 | + return rc; | |
32597 | +} | |
32598 | + | |
32599 | + | |
32600 | +static void bend_dl_remove(struct efx_dl_device *efx_dl_dev) | |
32601 | +{ | |
32602 | + struct driverlink_port *port; | |
32603 | + | |
32604 | + DPRINTK("Unregistering driverlink callbacks.\n"); | |
32605 | + | |
32606 | + mutex_lock(&accel_mutex); | |
32607 | + | |
32608 | + port = (struct driverlink_port *)efx_dl_dev->priv; | |
32609 | + | |
32610 | + BUG_ON(list_empty(&dl_ports)); | |
32611 | + BUG_ON(port == NULL); | |
32612 | + BUG_ON(port->efx_dl_dev != efx_dl_dev); | |
32613 | + | |
32614 | + netback_disconnect_accelerator(0, port->net_dev->name); | |
32615 | + | |
32616 | + list_del(&port->link); | |
32617 | + | |
32618 | + mutex_unlock(&accel_mutex); | |
32619 | + | |
32620 | + efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks); | |
32621 | + netback_accel_shutdown_fwd_port(port->fwd_priv); | |
32622 | + | |
32623 | + efx_dl_dev->priv = NULL; | |
32624 | + kfree(port); | |
32625 | + | |
32626 | + return; | |
32627 | +} | |
32628 | + | |
32629 | + | |
32630 | +static struct efx_dl_driver bend_dl_driver = | |
32631 | + { | |
32632 | + .name = "SFC Xen backend", | |
32633 | + .probe = bend_dl_probe, | |
32634 | + .remove = bend_dl_remove, | |
32635 | + }; | |
32636 | + | |
32637 | + | |
32638 | +int netback_accel_sf_init(void) | |
32639 | +{ | |
32640 | + int rc, nic_i; | |
32641 | + struct efhw_nic *nic; | |
32642 | + | |
32643 | + INIT_LIST_HEAD(&dl_ports); | |
32644 | + | |
32645 | + rc = efx_dl_register_driver(&bend_dl_driver); | |
32646 | + /* If we couldn't find the NET driver, give up */ | |
32647 | + if (rc == -ENOENT) | |
32648 | + return rc; | |
32649 | + | |
32650 | + if (rc == 0) { | |
32651 | + EFRM_FOR_EACH_NIC(nic_i, nic) | |
32652 | + falcon_nic_set_rx_usr_buf_size(nic, | |
32653 | + SF_XEN_RX_USR_BUF_SIZE); | |
32654 | + } | |
32655 | + | |
32656 | + init_done = (rc == 0); | |
32657 | + return rc; | |
32658 | +} | |
32659 | + | |
32660 | + | |
32661 | +void netback_accel_sf_shutdown(void) | |
32662 | +{ | |
32663 | + if (!init_done) | |
32664 | + return; | |
32665 | + DPRINTK("Unregistering driverlink driver\n"); | |
32666 | + | |
32667 | + /* | |
32668 | + * This will trigger removal callbacks for all the devices, which | |
32669 | + * will unregister their callbacks, disconnect from netfront, etc. | |
32670 | + */ | |
32671 | + efx_dl_unregister_driver(&bend_dl_driver); | |
32672 | +} | |
32673 | + | |
32674 | + | |
32675 | +int netback_accel_sf_hwtype(struct netback_accel *bend) | |
32676 | +{ | |
32677 | + struct driverlink_port *port; | |
32678 | + | |
32679 | + mutex_lock(&accel_mutex); | |
32680 | + | |
32681 | + list_for_each_entry(port, &dl_ports, link) { | |
32682 | + if (strcmp(bend->nicname, port->net_dev->name) == 0) { | |
32683 | + bend->hw_type = port->type; | |
32684 | + bend->accel_setup = netback_accel_setup_vnic_hw; | |
32685 | + bend->accel_shutdown = netback_accel_shutdown_vnic_hw; | |
32686 | + bend->fwd_priv = port->fwd_priv; | |
32687 | + /* This is just needed to pass to efx_vi_alloc */ | |
32688 | + bend->nic_index = port->nic_index; | |
32689 | + bend->net_dev = port->net_dev; | |
32690 | + mutex_unlock(&accel_mutex); | |
32691 | + return 0; | |
32692 | + } | |
32693 | + } | |
32694 | + | |
32695 | + mutex_unlock(&accel_mutex); | |
32696 | + | |
32697 | + EPRINTK("Failed to identify backend device '%s' with a NIC\n", | |
32698 | + bend->nicname); | |
32699 | + | |
32700 | + return -ENOENT; | |
32701 | +} | |
32702 | + | |
32703 | + | |
32704 | +/**************************************************************************** | |
32705 | + * Resource management code | |
32706 | + ***************************************************************************/ | |
32707 | + | |
32708 | +static int alloc_page_state(struct netback_accel *bend, int max_pages) | |
32709 | +{ | |
32710 | + struct falcon_bend_accel_priv *accel_hw_priv; | |
32711 | + | |
32712 | + if (max_pages < 0 || max_pages > bend->quotas.max_buf_pages) { | |
32713 | + EPRINTK("%s: invalid max_pages: %d\n", __FUNCTION__, max_pages); | |
32714 | + return -EINVAL; | |
32715 | + } | |
32716 | + | |
32717 | + accel_hw_priv = kzalloc(sizeof(struct falcon_bend_accel_priv), | |
32718 | + GFP_KERNEL); | |
32719 | + if (accel_hw_priv == NULL) { | |
32720 | + EPRINTK("%s: no memory for accel_hw_priv\n", __FUNCTION__); | |
32721 | + return -ENOMEM; | |
32722 | + } | |
32723 | + | |
32724 | + accel_hw_priv->dma_maps = kzalloc | |
32725 | + (sizeof(struct efx_vi_dma_map_state **) * | |
32726 | + (max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ), GFP_KERNEL); | |
32727 | + if (accel_hw_priv->dma_maps == NULL) { | |
32728 | + EPRINTK("%s: no memory for dma_maps\n", __FUNCTION__); | |
32729 | + kfree(accel_hw_priv); | |
32730 | + return -ENOMEM; | |
32731 | + } | |
32732 | + | |
32733 | + bend->buffer_maps = kzalloc(sizeof(struct vm_struct *) * max_pages, | |
32734 | + GFP_KERNEL); | |
32735 | + if (bend->buffer_maps == NULL) { | |
32736 | + EPRINTK("%s: no memory for buffer_maps\n", __FUNCTION__); | |
32737 | + kfree(accel_hw_priv->dma_maps); | |
32738 | + kfree(accel_hw_priv); | |
32739 | + return -ENOMEM; | |
32740 | + } | |
32741 | + | |
32742 | + bend->buffer_addrs = kzalloc(sizeof(u64) * max_pages, GFP_KERNEL); | |
32743 | + if (bend->buffer_addrs == NULL) { | |
32744 | + kfree(bend->buffer_maps); | |
32745 | + kfree(accel_hw_priv->dma_maps); | |
32746 | + kfree(accel_hw_priv); | |
32747 | + return -ENOMEM; | |
32748 | + } | |
32749 | + | |
32750 | + bend->accel_hw_priv = accel_hw_priv; | |
32751 | + | |
32752 | + return 0; | |
32753 | +} | |
32754 | + | |
32755 | + | |
32756 | +static int free_page_state(struct netback_accel *bend) | |
32757 | +{ | |
32758 | + struct falcon_bend_accel_priv *accel_hw_priv; | |
32759 | + | |
32760 | + DPRINTK("%s: %p\n", __FUNCTION__, bend); | |
32761 | + | |
32762 | + accel_hw_priv = bend->accel_hw_priv; | |
32763 | + | |
32764 | + if (accel_hw_priv) { | |
32765 | + kfree(accel_hw_priv->dma_maps); | |
32766 | + kfree(bend->buffer_maps); | |
32767 | + kfree(bend->buffer_addrs); | |
32768 | + kfree(accel_hw_priv); | |
32769 | + bend->accel_hw_priv = NULL; | |
32770 | + bend->max_pages = 0; | |
32771 | + } | |
32772 | + | |
32773 | + return 0; | |
32774 | +} | |
32775 | + | |
32776 | + | |
32777 | +/* The timeout event callback for the event q */ | |
32778 | +static void bend_evq_timeout(void *context, int is_timeout) | |
32779 | +{ | |
32780 | + struct netback_accel *bend = (struct netback_accel *)context; | |
32781 | + if (is_timeout) { | |
32782 | + /* Pass event to vnic front end driver */ | |
32783 | + VPRINTK("timeout event to %d\n", bend->net_channel); | |
32784 | + NETBACK_ACCEL_STATS_OP(bend->stats.evq_timeouts++); | |
32785 | + notify_remote_via_irq(bend->net_channel_irq); | |
32786 | + } else { | |
32787 | + /* It's a wakeup event, used by Falcon */ | |
32788 | + VPRINTK("wakeup to %d\n", bend->net_channel); | |
32789 | + NETBACK_ACCEL_STATS_OP(bend->stats.evq_wakeups++); | |
32790 | + notify_remote_via_irq(bend->net_channel_irq); | |
32791 | + } | |
32792 | +} | |
32793 | + | |
32794 | + | |
32795 | +/* | |
32796 | + * Create the eventq and associated gubbins for communication with the | |
32797 | + * front end vnic driver | |
32798 | + */ | |
32799 | +static int ef_get_vnic(struct netback_accel *bend) | |
32800 | +{ | |
32801 | + struct falcon_bend_accel_priv *accel_hw_priv; | |
32802 | + int rc = 0; | |
32803 | + | |
32804 | + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_NONE); | |
32805 | + | |
32806 | + /* Allocate page related state and accel_hw_priv */ | |
32807 | + rc = alloc_page_state(bend, bend->max_pages); | |
32808 | + if (rc != 0) { | |
32809 | + EPRINTK("Failed to allocate page state: %d\n", rc); | |
32810 | + return rc; | |
32811 | + } | |
32812 | + | |
32813 | + accel_hw_priv = bend->accel_hw_priv; | |
32814 | + | |
32815 | + rc = efx_vi_alloc(&accel_hw_priv->efx_vih, bend->nic_index); | |
32816 | + if (rc != 0) { | |
32817 | + EPRINTK("%s: efx_vi_alloc failed %d\n", __FUNCTION__, rc); | |
32818 | + free_page_state(bend); | |
32819 | + return rc; | |
32820 | + } | |
32821 | + | |
32822 | + rc = efx_vi_eventq_register_callback(accel_hw_priv->efx_vih, | |
32823 | + bend_evq_timeout, | |
32824 | + bend); | |
32825 | + if (rc != 0) { | |
32826 | + EPRINTK("%s: register_callback failed %d\n", __FUNCTION__, rc); | |
32827 | + efx_vi_free(accel_hw_priv->efx_vih); | |
32828 | + free_page_state(bend); | |
32829 | + return rc; | |
32830 | + } | |
32831 | + | |
32832 | + bend->hw_state = NETBACK_ACCEL_RES_ALLOC; | |
32833 | + | |
32834 | + return 0; | |
32835 | +} | |
32836 | + | |
32837 | + | |
32838 | +static void ef_free_vnic(struct netback_accel *bend) | |
32839 | +{ | |
32840 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
32841 | + | |
32842 | + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC); | |
32843 | + | |
32844 | + efx_vi_eventq_kill_callback(accel_hw_priv->efx_vih); | |
32845 | + | |
32846 | + DPRINTK("Hardware is freeable. Will proceed.\n"); | |
32847 | + | |
32848 | + efx_vi_free(accel_hw_priv->efx_vih); | |
32849 | + accel_hw_priv->efx_vih = NULL; | |
32850 | + | |
32851 | + VPRINTK("Free page state...\n"); | |
32852 | + free_page_state(bend); | |
32853 | + | |
32854 | + bend->hw_state = NETBACK_ACCEL_RES_NONE; | |
32855 | +} | |
32856 | + | |
32857 | + | |
32858 | +static inline void ungrant_or_crash(grant_ref_t gntref, int domain) { | |
32859 | + if (net_accel_ungrant_page(gntref) == -EBUSY) | |
32860 | + net_accel_shutdown_remote(domain); | |
32861 | +} | |
32862 | + | |
32863 | + | |
32864 | +static void netback_accel_release_hwinfo(struct netback_accel *bend) | |
32865 | +{ | |
32866 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
32867 | + int i; | |
32868 | + | |
32869 | + DPRINTK("Remove dma q grants %d %d\n", accel_hw_priv->txdmaq_gnt, | |
32870 | + accel_hw_priv->rxdmaq_gnt); | |
32871 | + ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end); | |
32872 | + ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end); | |
32873 | + | |
32874 | + DPRINTK("Remove doorbell grant %d\n", accel_hw_priv->doorbell_gnt); | |
32875 | + ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end); | |
32876 | + | |
32877 | + if (bend->hw_type == NET_ACCEL_MSG_HWTYPE_FALCON_A) { | |
32878 | + DPRINTK("Remove rptr grant %d\n", accel_hw_priv->evq_rptr_gnt); | |
32879 | + ungrant_or_crash(accel_hw_priv->evq_rptr_gnt, bend->far_end); | |
32880 | + } | |
32881 | + | |
32882 | + for (i = 0; i < accel_hw_priv->evq_npages; i++) { | |
32883 | + DPRINTK("Remove evq grant %d\n", accel_hw_priv->evq_mem_gnts[i]); | |
32884 | + ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], bend->far_end); | |
32885 | + } | |
32886 | + | |
32887 | + bend->hw_state = NETBACK_ACCEL_RES_FILTER; | |
32888 | + | |
32889 | + return; | |
32890 | +} | |
32891 | + | |
32892 | + | |
32893 | +static int ef_bend_hwinfo_falcon_common(struct netback_accel *bend, | |
32894 | + struct net_accel_hw_falcon_b *hwinfo) | |
32895 | +{ | |
32896 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
32897 | + struct efx_vi_hw_resource_metadata res_mdata; | |
32898 | + struct efx_vi_hw_resource res_array[EFX_VI_HW_RESOURCE_MAXSIZE]; | |
32899 | + int rc, len = EFX_VI_HW_RESOURCE_MAXSIZE, i, pfn = 0; | |
32900 | + unsigned long txdmaq_pfn = 0, rxdmaq_pfn = 0; | |
32901 | + | |
32902 | + rc = efx_vi_hw_resource_get_phys(accel_hw_priv->efx_vih, &res_mdata, | |
32903 | + res_array, &len); | |
32904 | + if (rc != 0) { | |
32905 | + DPRINTK("%s: resource_get_phys returned %d\n", | |
32906 | + __FUNCTION__, rc); | |
32907 | + return rc; | |
32908 | + } | |
32909 | + | |
32910 | + if (res_mdata.version != 0) | |
32911 | + return -EPROTO; | |
32912 | + | |
32913 | + hwinfo->nic_arch = res_mdata.nic_arch; | |
32914 | + hwinfo->nic_variant = res_mdata.nic_variant; | |
32915 | + hwinfo->nic_revision = res_mdata.nic_revision; | |
32916 | + | |
32917 | + hwinfo->evq_order = res_mdata.evq_order; | |
32918 | + hwinfo->evq_offs = res_mdata.evq_offs; | |
32919 | + hwinfo->evq_capacity = res_mdata.evq_capacity; | |
32920 | + hwinfo->instance = res_mdata.instance; | |
32921 | + hwinfo->rx_capacity = res_mdata.rx_capacity; | |
32922 | + hwinfo->tx_capacity = res_mdata.tx_capacity; | |
32923 | + | |
32924 | + VPRINTK("evq_order %d evq_offs %d evq_cap %d inst %d rx_cap %d tx_cap %d\n", | |
32925 | + hwinfo->evq_order, hwinfo->evq_offs, hwinfo->evq_capacity, | |
32926 | + hwinfo->instance, hwinfo->rx_capacity, hwinfo->tx_capacity); | |
32927 | + | |
32928 | + for (i = 0; i < len; i++) { | |
32929 | + struct efx_vi_hw_resource *res = &(res_array[i]); | |
32930 | + switch (res->type) { | |
32931 | + case EFX_VI_HW_RESOURCE_TXDMAQ: | |
32932 | + txdmaq_pfn = page_to_pfn(virt_to_page(res->address)); | |
32933 | + break; | |
32934 | + case EFX_VI_HW_RESOURCE_RXDMAQ: | |
32935 | + rxdmaq_pfn = page_to_pfn(virt_to_page(res->address)); | |
32936 | + break; | |
32937 | + case EFX_VI_HW_RESOURCE_EVQTIMER: | |
32938 | + break; | |
32939 | + case EFX_VI_HW_RESOURCE_EVQRPTR: | |
32940 | + case EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET: | |
32941 | + hwinfo->evq_rptr = res->address; | |
32942 | + break; | |
32943 | + case EFX_VI_HW_RESOURCE_EVQMEMKVA: | |
32944 | + accel_hw_priv->evq_npages = 1 << res_mdata.evq_order; | |
32945 | + pfn = page_to_pfn(virt_to_page(res->address)); | |
32946 | + break; | |
32947 | + case EFX_VI_HW_RESOURCE_BELLPAGE: | |
32948 | + hwinfo->doorbell_mfn = res->address; | |
32949 | + break; | |
32950 | + default: | |
32951 | + EPRINTK("%s: Unknown hardware resource type %d\n", | |
32952 | + __FUNCTION__, res->type); | |
32953 | + break; | |
32954 | + } | |
32955 | + } | |
32956 | + | |
32957 | + VPRINTK("Passing txdmaq page pfn %lx\n", txdmaq_pfn); | |
32958 | + accel_hw_priv->txdmaq_gnt = hwinfo->txdmaq_gnt = | |
32959 | + net_accel_grant_page(bend->hdev_data, pfn_to_mfn(txdmaq_pfn), | |
32960 | + 0); | |
32961 | + | |
32962 | + VPRINTK("Passing rxdmaq page pfn %lx\n", rxdmaq_pfn); | |
32963 | + accel_hw_priv->rxdmaq_gnt = hwinfo->rxdmaq_gnt = | |
32964 | + net_accel_grant_page(bend->hdev_data, pfn_to_mfn(rxdmaq_pfn), | |
32965 | + 0); | |
32966 | + | |
32967 | + VPRINTK("Passing doorbell page mfn %x\n", hwinfo->doorbell_mfn); | |
32968 | + /* Make the relevant H/W pages mappable by the far end */ | |
32969 | + accel_hw_priv->doorbell_gnt = hwinfo->doorbell_gnt = | |
32970 | + net_accel_grant_page(bend->hdev_data, hwinfo->doorbell_mfn, 1); | |
32971 | + | |
32972 | + /* Now do the same for the memory pages */ | |
32973 | + /* Convert the page + length we got back for the evq to grants. */ | |
32974 | + for (i = 0; i < accel_hw_priv->evq_npages; i++) { | |
32975 | + accel_hw_priv->evq_mem_gnts[i] = hwinfo->evq_mem_gnts[i] = | |
32976 | + net_accel_grant_page(bend->hdev_data, pfn_to_mfn(pfn), 0); | |
32977 | + VPRINTK("Got grant %u for evq pfn %x\n", hwinfo->evq_mem_gnts[i], | |
32978 | + pfn); | |
32979 | + pfn++; | |
32980 | + } | |
32981 | + | |
32982 | + return 0; | |
32983 | +} | |
32984 | + | |
32985 | + | |
32986 | +static int ef_bend_hwinfo_falcon_a(struct netback_accel *bend, | |
32987 | + struct net_accel_hw_falcon_a *hwinfo) | |
32988 | +{ | |
32989 | + int rc; | |
32990 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
32991 | + | |
32992 | + if ((rc = ef_bend_hwinfo_falcon_common(bend, &hwinfo->common)) != 0) | |
32993 | + return rc; | |
32994 | + | |
32995 | + /* | |
32996 | + * Note that unlike the above, where the message field is the | |
32997 | + * page number, here evq_rptr is the entire address because | |
32998 | + * it is currently a pointer into the densely mapped timer page. | |
32999 | + */ | |
33000 | + VPRINTK("Passing evq_rptr pfn %x for rptr %x\n", | |
33001 | + hwinfo->common.evq_rptr >> PAGE_SHIFT, | |
33002 | + hwinfo->common.evq_rptr); | |
33003 | + rc = net_accel_grant_page(bend->hdev_data, | |
33004 | + hwinfo->common.evq_rptr >> PAGE_SHIFT, 0); | |
33005 | + if (rc < 0) | |
33006 | + return rc; | |
33007 | + | |
33008 | + accel_hw_priv->evq_rptr_gnt = hwinfo->evq_rptr_gnt = rc; | |
33009 | + VPRINTK("evq_rptr_gnt got %d\n", hwinfo->evq_rptr_gnt); | |
33010 | + | |
33011 | + return 0; | |
33012 | +} | |
33013 | + | |
33014 | + | |
33015 | +static int ef_bend_hwinfo_falcon_b(struct netback_accel *bend, | |
33016 | + struct net_accel_hw_falcon_b *hwinfo) | |
33017 | +{ | |
33018 | + return ef_bend_hwinfo_falcon_common(bend, hwinfo); | |
33019 | +} | |
33020 | + | |
33021 | + | |
33022 | +/* | |
33023 | + * Fill in the message with a description of the hardware resources, based on | |
33024 | + * the H/W type | |
33025 | + */ | |
33026 | +static int netback_accel_hwinfo(struct netback_accel *bend, | |
33027 | + struct net_accel_msg_hw *msgvi) | |
33028 | +{ | |
33029 | + int rc = 0; | |
33030 | + | |
33031 | + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER); | |
33032 | + | |
33033 | + msgvi->type = bend->hw_type; | |
33034 | + switch (bend->hw_type) { | |
33035 | + case NET_ACCEL_MSG_HWTYPE_FALCON_A: | |
33036 | + rc = ef_bend_hwinfo_falcon_a(bend, &msgvi->resources.falcon_a); | |
33037 | + break; | |
33038 | + case NET_ACCEL_MSG_HWTYPE_FALCON_B: | |
33039 | + rc = ef_bend_hwinfo_falcon_b(bend, &msgvi->resources.falcon_b); | |
33040 | + break; | |
33041 | + case NET_ACCEL_MSG_HWTYPE_NONE: | |
33042 | + /* Nothing to do. The slow path should just work. */ | |
33043 | + break; | |
33044 | + } | |
33045 | + | |
33046 | + if (rc == 0) | |
33047 | + bend->hw_state = NETBACK_ACCEL_RES_HWINFO; | |
33048 | + | |
33049 | + return rc; | |
33050 | +} | |
33051 | + | |
33052 | + | |
33053 | +/* Allocate hardware resources and make them available to the client domain */ | |
33054 | +int netback_accel_setup_vnic_hw(struct netback_accel *bend) | |
33055 | +{ | |
33056 | + struct net_accel_msg msg; | |
33057 | + int err; | |
33058 | + | |
33059 | + /* Allocate the event queue, VI and so on. */ | |
33060 | + err = ef_get_vnic(bend); | |
33061 | + if (err) { | |
33062 | + EPRINTK("Failed to allocate hardware resource for bend:" | |
33063 | + "error %d\n", err); | |
33064 | + return err; | |
33065 | + } | |
33066 | + | |
33067 | + /* Set up the filter management */ | |
33068 | + err = netback_accel_filter_init(bend); | |
33069 | + if (err) { | |
33070 | + EPRINTK("Filter setup failed, error %d", err); | |
33071 | + ef_free_vnic(bend); | |
33072 | + return err; | |
33073 | + } | |
33074 | + | |
33075 | + net_accel_msg_init(&msg, NET_ACCEL_MSG_SETHW); | |
33076 | + | |
33077 | + /* | |
33078 | + * Extract the low-level hardware info we will actually pass to the | |
33079 | + * other end, and set up the grants/ioremap permissions needed | |
33080 | + */ | |
33081 | + err = netback_accel_hwinfo(bend, &msg.u.hw); | |
33082 | + | |
33083 | + if (err != 0) { | |
33084 | + netback_accel_filter_shutdown(bend); | |
33085 | + ef_free_vnic(bend); | |
33086 | + return err; | |
33087 | + } | |
33088 | + | |
33089 | + /* Send the message, this is a reply to a hello-reply */ | |
33090 | + err = net_accel_msg_reply_notify(bend->shared_page, | |
33091 | + bend->msg_channel_irq, | |
33092 | + &bend->to_domU, &msg); | |
33093 | + | |
33094 | + /* | |
33095 | + * The message should succeed as it's logically a reply and we | |
33096 | + * guarantee space for replies, but a misbehaving frontend | |
33097 | + * could result in that behaviour, so be tolerant | |
33098 | + */ | |
33099 | + if (err != 0) { | |
33100 | + netback_accel_release_hwinfo(bend); | |
33101 | + netback_accel_filter_shutdown(bend); | |
33102 | + ef_free_vnic(bend); | |
33103 | + } | |
33104 | + | |
33105 | + return err; | |
33106 | +} | |
33107 | + | |
33108 | + | |
33109 | +/* Free hardware resources */ | |
33110 | +void netback_accel_shutdown_vnic_hw(struct netback_accel *bend) | |
33111 | +{ | |
33112 | + /* | |
33113 | + * Only try and release resources if accel_hw_priv was setup, | |
33114 | + * otherwise there is nothing to do as we're on "null-op" | |
33115 | + * acceleration | |
33116 | + */ | |
33117 | + switch (bend->hw_state) { | |
33118 | + case NETBACK_ACCEL_RES_HWINFO: | |
33119 | + VPRINTK("Release hardware resources\n"); | |
33120 | + netback_accel_release_hwinfo(bend); | |
33121 | + /* deliberate drop through */ | |
33122 | + case NETBACK_ACCEL_RES_FILTER: | |
33123 | + VPRINTK("Free filters...\n"); | |
33124 | + netback_accel_filter_shutdown(bend); | |
33125 | + /* deliberate drop through */ | |
33126 | + case NETBACK_ACCEL_RES_ALLOC: | |
33127 | + VPRINTK("Free vnic...\n"); | |
33128 | + ef_free_vnic(bend); | |
33129 | + /* deliberate drop through */ | |
33130 | + case NETBACK_ACCEL_RES_NONE: | |
33131 | + break; | |
33132 | + default: | |
33133 | + BUG(); | |
33134 | + } | |
33135 | +} | |
33136 | + | |
33137 | +/************************************************************************** | |
33138 | + * | |
33139 | + * Buffer table stuff | |
33140 | + * | |
33141 | + **************************************************************************/ | |
33142 | + | |
33143 | +/* | |
33144 | + * Undo any allocation that netback_accel_msg_rx_buffer_map() has made | |
33145 | + * if it fails half way through | |
33146 | + */ | |
33147 | +static inline void buffer_map_cleanup(struct netback_accel *bend, int i) | |
33148 | +{ | |
33149 | + while (i > 0) { | |
33150 | + i--; | |
33151 | + bend->buffer_maps_index--; | |
33152 | + net_accel_unmap_device_page(bend->hdev_data, | |
33153 | + bend->buffer_maps[bend->buffer_maps_index], | |
33154 | + bend->buffer_addrs[bend->buffer_maps_index]); | |
33155 | + } | |
33156 | +} | |
33157 | + | |
33158 | + | |
33159 | +int netback_accel_add_buffers(struct netback_accel *bend, int pages, int log2_pages, | |
33160 | + u32 *grants, u32 *buf_addr_out) | |
33161 | +{ | |
33162 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
33163 | + unsigned long long addr_array[NET_ACCEL_MSG_MAX_PAGE_REQ]; | |
33164 | + int rc, i, index; | |
33165 | + u64 dev_bus_addr; | |
33166 | + | |
33167 | + /* Make sure we can't overflow the dma_maps array */ | |
33168 | + if (accel_hw_priv->dma_maps_index >= | |
33169 | + bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ) { | |
33170 | + EPRINTK("%s: too many buffer table allocations: %d %d\n", | |
33171 | + __FUNCTION__, accel_hw_priv->dma_maps_index, | |
33172 | + bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ); | |
33173 | + return -EINVAL; | |
33174 | + } | |
33175 | + | |
33176 | + /* Make sure we can't overflow the buffer_maps array */ | |
33177 | + if (bend->buffer_maps_index + pages > bend->max_pages) { | |
33178 | + EPRINTK("%s: too many pages mapped: %d + %d > %d\n", | |
33179 | + __FUNCTION__, bend->buffer_maps_index, | |
33180 | + pages, bend->max_pages); | |
33181 | + return -EINVAL; | |
33182 | + } | |
33183 | + | |
33184 | + for (i = 0; i < pages; i++) { | |
33185 | + VPRINTK("%s: mapping page %d\n", __FUNCTION__, i); | |
33186 | + rc = net_accel_map_device_page | |
33187 | + (bend->hdev_data, grants[i], | |
33188 | + &bend->buffer_maps[bend->buffer_maps_index], | |
33189 | + &dev_bus_addr); | |
33190 | + | |
33191 | + if (rc != 0) { | |
33192 | + EPRINTK("error in net_accel_map_device_page\n"); | |
33193 | + buffer_map_cleanup(bend, i); | |
33194 | + return rc; | |
33195 | + } | |
33196 | + | |
33197 | + bend->buffer_addrs[bend->buffer_maps_index] = dev_bus_addr; | |
33198 | + | |
33199 | + bend->buffer_maps_index++; | |
33200 | + | |
33201 | + addr_array[i] = dev_bus_addr; | |
33202 | + } | |
33203 | + | |
33204 | + VPRINTK("%s: mapping dma addresses to vih %p\n", __FUNCTION__, | |
33205 | + accel_hw_priv->efx_vih); | |
33206 | + | |
33207 | + index = accel_hw_priv->dma_maps_index; | |
33208 | + if ((rc = efx_vi_dma_map_addrs(accel_hw_priv->efx_vih, addr_array, pages, | |
33209 | + &(accel_hw_priv->dma_maps[index]))) < 0) { | |
33210 | + EPRINTK("error in dma_map_pages\n"); | |
33211 | + buffer_map_cleanup(bend, i); | |
33212 | + return rc; | |
33213 | + } | |
33214 | + | |
33215 | + accel_hw_priv->dma_maps_index++; | |
33216 | + NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages += pages); | |
33217 | + | |
33218 | + //DPRINTK("%s: getting map address\n", __FUNCTION__); | |
33219 | + | |
33220 | + *buf_addr_out = efx_vi_dma_get_map_addr(accel_hw_priv->efx_vih, | |
33221 | + accel_hw_priv->dma_maps[index]); | |
33222 | + | |
33223 | + //DPRINTK("%s: done\n", __FUNCTION__); | |
33224 | + | |
33225 | + return 0; | |
33226 | +} | |
33227 | + | |
33228 | + | |
33229 | +int netback_accel_remove_buffers(struct netback_accel *bend) | |
33230 | +{ | |
33231 | + /* Only try to free buffers if accel_hw_priv was setup */ | |
33232 | + if (bend->hw_state != NETBACK_ACCEL_RES_NONE) { | |
33233 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
33234 | + int i; | |
33235 | + | |
33236 | + efx_vi_reset(accel_hw_priv->efx_vih); | |
33237 | + | |
33238 | + while (accel_hw_priv->dma_maps_index > 0) { | |
33239 | + accel_hw_priv->dma_maps_index--; | |
33240 | + i = accel_hw_priv->dma_maps_index; | |
33241 | + efx_vi_dma_unmap_addrs(accel_hw_priv->efx_vih, | |
33242 | + accel_hw_priv->dma_maps[i]); | |
33243 | + } | |
33244 | + | |
33245 | + while (bend->buffer_maps_index > 0) { | |
33246 | + VPRINTK("Unmapping granted buffer %d\n", | |
33247 | + bend->buffer_maps_index); | |
33248 | + bend->buffer_maps_index--; | |
33249 | + i = bend->buffer_maps_index; | |
33250 | + net_accel_unmap_device_page(bend->hdev_data, | |
33251 | + bend->buffer_maps[i], | |
33252 | + bend->buffer_addrs[i]); | |
33253 | + } | |
33254 | + | |
33255 | + NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages = 0); | |
33256 | + } | |
33257 | + | |
33258 | + return 0; | |
33259 | +} | |
33260 | + | |
33261 | +/************************************************************************** | |
33262 | + * | |
33263 | + * Filter stuff | |
33264 | + * | |
33265 | + **************************************************************************/ | |
33266 | + | |
33267 | +static int netback_accel_filter_init(struct netback_accel *bend) | |
33268 | +{ | |
33269 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
33270 | + int i, rc; | |
33271 | + | |
33272 | + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC); | |
33273 | + | |
33274 | + spin_lock_init(&accel_hw_priv->filter_lock); | |
33275 | + | |
33276 | + if ((rc = cuckoo_hash_init(&accel_hw_priv->filter_hash_table, | |
33277 | + 5 /* space for 32 filters */, 8)) != 0) { | |
33278 | + EPRINTK("Failed to initialise filter hash table\n"); | |
33279 | + return rc; | |
33280 | + } | |
33281 | + | |
33282 | + accel_hw_priv->fspecs = kzalloc(sizeof(struct netback_accel_filter_spec) * | |
33283 | + bend->quotas.max_filters, | |
33284 | + GFP_KERNEL); | |
33285 | + | |
33286 | + if (accel_hw_priv->fspecs == NULL) { | |
33287 | + EPRINTK("No memory for filter specs.\n"); | |
33288 | + cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table); | |
33289 | + return -ENOMEM; | |
33290 | + } | |
33291 | + | |
33292 | + for (i = 0; i < bend->quotas.max_filters; i++) { | |
33293 | + accel_hw_priv->free_filters |= (1 << i); | |
33294 | + } | |
33295 | + | |
33296 | + /* Base mask on highest set bit in max_filters */ | |
33297 | + accel_hw_priv->filter_idx_mask = (1 << fls(bend->quotas.max_filters)) - 1; | |
33298 | + VPRINTK("filter setup: max is %x mask is %x\n", | |
33299 | + bend->quotas.max_filters, accel_hw_priv->filter_idx_mask); | |
33300 | + | |
33301 | + bend->hw_state = NETBACK_ACCEL_RES_FILTER; | |
33302 | + | |
33303 | + return 0; | |
33304 | +} | |
33305 | + | |
33306 | + | |
33307 | +static inline void make_filter_key(cuckoo_hash_ip_key *key, | |
33308 | + struct netback_accel_filter_spec *filt) | |
33309 | + | |
33310 | +{ | |
33311 | + key->local_ip = filt->destip_be; | |
33312 | + key->local_port = filt->destport_be; | |
33313 | + key->proto = filt->proto; | |
33314 | +} | |
33315 | + | |
33316 | + | |
33317 | +static inline | |
33318 | +void netback_accel_free_filter(struct falcon_bend_accel_priv *accel_hw_priv, | |
33319 | + int filter) | |
33320 | +{ | |
33321 | + cuckoo_hash_ip_key filter_key; | |
33322 | + | |
33323 | + if (!(accel_hw_priv->free_filters & (1 << filter))) { | |
33324 | + efx_vi_filter_stop(accel_hw_priv->efx_vih, | |
33325 | + accel_hw_priv->fspecs[filter].filter_handle); | |
33326 | + make_filter_key(&filter_key, &(accel_hw_priv->fspecs[filter])); | |
33327 | + if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table, | |
33328 | + (cuckoo_hash_key *)&filter_key)) { | |
33329 | + EPRINTK("%s: Couldn't find filter to remove from table\n", | |
33330 | + __FUNCTION__); | |
33331 | + BUG(); | |
33332 | + } | |
33333 | + } | |
33334 | +} | |
33335 | + | |
33336 | + | |
33337 | +static void netback_accel_filter_shutdown(struct netback_accel *bend) | |
33338 | +{ | |
33339 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
33340 | + int i; | |
33341 | + unsigned long flags; | |
33342 | + | |
33343 | + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER); | |
33344 | + | |
33345 | + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags); | |
33346 | + | |
33347 | + BUG_ON(accel_hw_priv->fspecs == NULL); | |
33348 | + | |
33349 | + for (i = 0; i < bend->quotas.max_filters; i++) { | |
33350 | + netback_accel_free_filter(accel_hw_priv, i); | |
33351 | + } | |
33352 | + | |
33353 | + kfree(accel_hw_priv->fspecs); | |
33354 | + accel_hw_priv->fspecs = NULL; | |
33355 | + accel_hw_priv->free_filters = 0; | |
33356 | + | |
33357 | + cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table); | |
33358 | + | |
33359 | + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags); | |
33360 | + | |
33361 | + bend->hw_state = NETBACK_ACCEL_RES_ALLOC; | |
33362 | +} | |
33363 | + | |
33364 | + | |
33365 | +/*! Suggest a filter to replace when we want to insert a new one and have | |
33366 | + * none free. | |
33367 | + */ | |
33368 | +static unsigned get_victim_filter(struct netback_accel *bend) | |
33369 | +{ | |
33370 | + /* | |
33371 | + * We could attempt to get really clever, and may do at some | |
33372 | + * point, but random replacement is v. cheap and low on | |
33373 | + * pathological worst cases. | |
33374 | + */ | |
33375 | + unsigned index, cycles; | |
33376 | + | |
33377 | + rdtscl(cycles); | |
33378 | + | |
33379 | + /* | |
33380 | + * Some doubt about the quality of the bottom few bits, so | |
33381 | + * throw 'em * away | |
33382 | + */ | |
33383 | + index = (cycles >> 4) & ((struct falcon_bend_accel_priv *) | |
33384 | + bend->accel_hw_priv)->filter_idx_mask; | |
33385 | + /* | |
33386 | + * We don't enforce that the number of filters is a power of | |
33387 | + * two, but the masking gets us to within one subtraction of a | |
33388 | + * valid index | |
33389 | + */ | |
33390 | + if (index >= bend->quotas.max_filters) | |
33391 | + index -= bend->quotas.max_filters; | |
33392 | + DPRINTK("backend %s->%d has no free filters. Filter %d will be evicted\n", | |
33393 | + bend->nicname, bend->far_end, index); | |
33394 | + return index; | |
33395 | +} | |
33396 | + | |
33397 | + | |
33398 | +/* Add a filter for the specified IP/port to the backend */ | |
33399 | +int | |
33400 | +netback_accel_filter_check_add(struct netback_accel *bend, | |
33401 | + struct netback_accel_filter_spec *filt) | |
33402 | +{ | |
33403 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
33404 | + struct netback_accel_filter_spec *fs; | |
33405 | + unsigned filter_index; | |
33406 | + unsigned long flags; | |
33407 | + int rc, recycling = 0; | |
33408 | + cuckoo_hash_ip_key filter_key, evict_key; | |
33409 | + | |
33410 | + BUG_ON(filt->proto != IPPROTO_TCP && filt->proto != IPPROTO_UDP); | |
33411 | + | |
33412 | + DPRINTK("Will add %s filter for dst ip %08x and dst port %d\n", | |
33413 | + (filt->proto == IPPROTO_TCP) ? "TCP" : "UDP", | |
33414 | + be32_to_cpu(filt->destip_be), be16_to_cpu(filt->destport_be)); | |
33415 | + | |
33416 | + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags); | |
33417 | + /* | |
33418 | + * Check to see if we're already filtering this IP address and | |
33419 | + * port. Happens if you insert a filter mid-stream as there | |
33420 | + * are many packets backed up to be delivered to dom0 already | |
33421 | + */ | |
33422 | + make_filter_key(&filter_key, filt); | |
33423 | + if (cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table, | |
33424 | + (cuckoo_hash_key *)(&filter_key), | |
33425 | + &filter_index)) { | |
33426 | + DPRINTK("Found matching filter %d already in table\n", | |
33427 | + filter_index); | |
33428 | + rc = -1; | |
33429 | + goto out; | |
33430 | + } | |
33431 | + | |
33432 | + if (accel_hw_priv->free_filters == 0) { | |
33433 | + filter_index = get_victim_filter(bend); | |
33434 | + recycling = 1; | |
33435 | + } else { | |
33436 | + filter_index = __ffs(accel_hw_priv->free_filters); | |
33437 | + clear_bit(filter_index, &accel_hw_priv->free_filters); | |
33438 | + } | |
33439 | + | |
33440 | + fs = &accel_hw_priv->fspecs[filter_index]; | |
33441 | + | |
33442 | + if (recycling) { | |
33443 | + DPRINTK("Removing filter index %d handle %p\n", filter_index, | |
33444 | + fs->filter_handle); | |
33445 | + | |
33446 | + if ((rc = efx_vi_filter_stop(accel_hw_priv->efx_vih, | |
33447 | + fs->filter_handle)) != 0) { | |
33448 | + EPRINTK("Couldn't clear NIC filter table entry %d\n", rc); | |
33449 | + } | |
33450 | + | |
33451 | + make_filter_key(&evict_key, fs); | |
33452 | + if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table, | |
33453 | + (cuckoo_hash_key *)&evict_key)) { | |
33454 | + EPRINTK("Couldn't find filter to remove from table\n"); | |
33455 | + BUG(); | |
33456 | + } | |
33457 | + NETBACK_ACCEL_STATS_OP(bend->stats.num_filters--); | |
33458 | + } | |
33459 | + | |
33460 | + /* Update the filter spec with new details */ | |
33461 | + *fs = *filt; | |
33462 | + | |
33463 | + if ((rc = cuckoo_hash_add(&accel_hw_priv->filter_hash_table, | |
33464 | + (cuckoo_hash_key *)&filter_key, filter_index, | |
33465 | + 1)) != 0) { | |
33466 | + EPRINTK("Error (%d) adding filter to table\n", rc); | |
33467 | + accel_hw_priv->free_filters |= (1 << filter_index); | |
33468 | + goto out; | |
33469 | + } | |
33470 | + | |
33471 | + rc = efx_vi_filter(accel_hw_priv->efx_vih, filt->proto, filt->destip_be, | |
33472 | + filt->destport_be, | |
33473 | + (struct filter_resource_t **)&fs->filter_handle); | |
33474 | + | |
33475 | + if (rc != 0) { | |
33476 | + EPRINTK("Hardware filter insertion failed. Error %d\n", rc); | |
33477 | + accel_hw_priv->free_filters |= (1 << filter_index); | |
33478 | + cuckoo_hash_remove(&accel_hw_priv->filter_hash_table, | |
33479 | + (cuckoo_hash_key *)&filter_key); | |
33480 | + rc = -1; | |
33481 | + goto out; | |
33482 | + } | |
33483 | + | |
33484 | + NETBACK_ACCEL_STATS_OP(bend->stats.num_filters++); | |
33485 | + | |
33486 | + VPRINTK("%s: success index %d handle %p\n", __FUNCTION__, filter_index, | |
33487 | + fs->filter_handle); | |
33488 | + | |
33489 | + rc = filter_index; | |
33490 | + out: | |
33491 | + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags); | |
33492 | + return rc; | |
33493 | +} | |
33494 | + | |
33495 | + | |
33496 | +/* Remove a filter entry for the specific device and IP/port */ | |
33497 | +static void netback_accel_filter_remove(struct netback_accel *bend, | |
33498 | + int filter_index) | |
33499 | +{ | |
33500 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
33501 | + | |
33502 | + BUG_ON(accel_hw_priv->free_filters & (1 << filter_index)); | |
33503 | + netback_accel_free_filter(accel_hw_priv, filter_index); | |
33504 | + accel_hw_priv->free_filters |= (1 << filter_index); | |
33505 | +} | |
33506 | + | |
33507 | + | |
33508 | +/* Remove a filter entry for the specific device and IP/port */ | |
33509 | +void netback_accel_filter_remove_spec(struct netback_accel *bend, | |
33510 | + struct netback_accel_filter_spec *filt) | |
33511 | +{ | |
33512 | + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; | |
33513 | + unsigned filter_found; | |
33514 | + unsigned long flags; | |
33515 | + cuckoo_hash_ip_key filter_key; | |
33516 | + struct netback_accel_filter_spec *fs; | |
33517 | + | |
33518 | + if (filt->proto == IPPROTO_TCP) { | |
33519 | + DPRINTK("Remove TCP filter for dst ip %08x and dst port %d\n", | |
33520 | + be32_to_cpu(filt->destip_be), | |
33521 | + be16_to_cpu(filt->destport_be)); | |
33522 | + } else if (filt->proto == IPPROTO_UDP) { | |
33523 | + DPRINTK("Remove UDP filter for dst ip %08x and dst port %d\n", | |
33524 | + be32_to_cpu(filt->destip_be), | |
33525 | + be16_to_cpu(filt->destport_be)); | |
33526 | + } else { | |
33527 | + /* | |
33528 | + * This could be provoked by an evil frontend, so can't | |
33529 | + * BUG(), but harmless as it should fail tests below | |
33530 | + */ | |
33531 | + DPRINTK("Non-TCP/UDP filter dst ip %08x and dst port %d\n", | |
33532 | + be32_to_cpu(filt->destip_be), | |
33533 | + be16_to_cpu(filt->destport_be)); | |
33534 | + } | |
33535 | + | |
33536 | + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags); | |
33537 | + | |
33538 | + make_filter_key(&filter_key, filt); | |
33539 | + if (!cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table, | |
33540 | + (cuckoo_hash_key *)(&filter_key), | |
33541 | + &filter_found)) { | |
33542 | + EPRINTK("Couldn't find matching filter already in table\n"); | |
33543 | + goto out; | |
33544 | + } | |
33545 | + | |
33546 | + /* Do a full check to make sure we've not had a hash collision */ | |
33547 | + fs = &accel_hw_priv->fspecs[filter_found]; | |
33548 | + if (fs->destip_be == filt->destip_be && | |
33549 | + fs->destport_be == filt->destport_be && | |
33550 | + fs->proto == filt->proto && | |
33551 | + !memcmp(fs->mac, filt->mac, ETH_ALEN)) { | |
33552 | + netback_accel_filter_remove(bend, filter_found); | |
33553 | + } else { | |
33554 | + EPRINTK("Entry in hash table does not match filter spec\n"); | |
33555 | + goto out; | |
33556 | + } | |
33557 | + | |
33558 | + out: | |
33559 | + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags); | |
33560 | +} | |
33561 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.h | |
33562 | =================================================================== | |
33563 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
33564 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel_solarflare.h 2008-02-20 09:32:49.000000000 +0100 | |
33565 | @@ -0,0 +1,88 @@ | |
33566 | +/**************************************************************************** | |
33567 | + * Solarflare driver for Xen network acceleration | |
33568 | + * | |
33569 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
33570 | + * 9501 Jeronimo Road, Suite 250, | |
33571 | + * Irvine, CA 92618, USA | |
33572 | + * | |
33573 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
33574 | + * | |
33575 | + * This program is free software; you can redistribute it and/or modify it | |
33576 | + * under the terms of the GNU General Public License version 2 as published | |
33577 | + * by the Free Software Foundation, incorporated herein by reference. | |
33578 | + * | |
33579 | + * This program is distributed in the hope that it will be useful, | |
33580 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
33581 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
33582 | + * GNU General Public License for more details. | |
33583 | + * | |
33584 | + * You should have received a copy of the GNU General Public License | |
33585 | + * along with this program; if not, write to the Free Software | |
33586 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
33587 | + **************************************************************************** | |
33588 | + */ | |
33589 | + | |
33590 | +#ifndef NETBACK_ACCEL_SOLARFLARE_H | |
33591 | +#define NETBACK_ACCEL_SOLARFLARE_H | |
33592 | + | |
33593 | +#include "accel.h" | |
33594 | +#include "accel_msg_iface.h" | |
33595 | + | |
33596 | +#include "driverlink_api.h" | |
33597 | + | |
33598 | +#define MAX_NICS 5 | |
33599 | +#define MAX_PORTS 2 | |
33600 | + | |
33601 | + | |
33602 | +extern int netback_accel_sf_init(void); | |
33603 | +extern void netback_accel_sf_shutdown(void); | |
33604 | +extern int netback_accel_sf_hwtype(struct netback_accel *bend); | |
33605 | + | |
33606 | +extern int netback_accel_sf_char_init(void); | |
33607 | +extern void netback_accel_sf_char_shutdown(void); | |
33608 | + | |
33609 | +extern int netback_accel_setup_vnic_hw(struct netback_accel *bend); | |
33610 | +extern void netback_accel_shutdown_vnic_hw(struct netback_accel *bend); | |
33611 | + | |
33612 | +extern int netback_accel_add_buffers(struct netback_accel *bend, int pages, | |
33613 | + int log2_pages, u32 *grants, | |
33614 | + u32 *buf_addr_out); | |
33615 | +extern int netback_accel_remove_buffers(struct netback_accel *bend); | |
33616 | + | |
33617 | + | |
33618 | +/* Add a filter for the specified IP/port to the backend */ | |
33619 | +extern int | |
33620 | +netback_accel_filter_check_add(struct netback_accel *bend, | |
33621 | + struct netback_accel_filter_spec *filt); | |
33622 | +/* Remove a filter entry for the specific device and IP/port */ | |
33623 | +extern | |
33624 | +void netback_accel_filter_remove_index(struct netback_accel *bend, | |
33625 | + int filter_index); | |
33626 | +extern | |
33627 | +void netback_accel_filter_remove_spec(struct netback_accel *bend, | |
33628 | + struct netback_accel_filter_spec *filt); | |
33629 | + | |
33630 | +/* This is designed to look a bit like a skb */ | |
33631 | +struct netback_pkt_buf { | |
33632 | + union { | |
33633 | + unsigned char *raw; | |
33634 | + } mac; | |
33635 | + union { | |
33636 | + struct iphdr *iph; | |
33637 | + struct arphdr *arph; | |
33638 | + unsigned char *raw; | |
33639 | + } nh; | |
33640 | + int protocol; | |
33641 | +}; | |
33642 | + | |
33643 | +/*! \brief Handle a received packet: insert fast path filters as necessary | |
33644 | + * \param skb The packet buffer | |
33645 | + */ | |
33646 | +extern void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv); | |
33647 | + | |
33648 | +/*! \brief Handle a transmitted packet: update fast path filters as necessary | |
33649 | + * \param skb The packet buffer | |
33650 | + */ | |
33651 | +extern void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv); | |
33652 | + | |
33653 | +#endif /* NETBACK_ACCEL_SOLARFLARE_H */ | |
33654 | Index: head-2008-11-25/drivers/xen/sfc_netback/accel_xenbus.c | |
33655 | =================================================================== | |
33656 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
33657 | +++ head-2008-11-25/drivers/xen/sfc_netback/accel_xenbus.c 2008-02-26 10:54:11.000000000 +0100 | |
33658 | @@ -0,0 +1,831 @@ | |
33659 | +/**************************************************************************** | |
33660 | + * Solarflare driver for Xen network acceleration | |
33661 | + * | |
33662 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
33663 | + * 9501 Jeronimo Road, Suite 250, | |
33664 | + * Irvine, CA 92618, USA | |
33665 | + * | |
33666 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
33667 | + * | |
33668 | + * This program is free software; you can redistribute it and/or modify it | |
33669 | + * under the terms of the GNU General Public License version 2 as published | |
33670 | + * by the Free Software Foundation, incorporated herein by reference. | |
33671 | + * | |
33672 | + * This program is distributed in the hope that it will be useful, | |
33673 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
33674 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
33675 | + * GNU General Public License for more details. | |
33676 | + * | |
33677 | + * You should have received a copy of the GNU General Public License | |
33678 | + * along with this program; if not, write to the Free Software | |
33679 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
33680 | + **************************************************************************** | |
33681 | + */ | |
33682 | + | |
33683 | +#include <xen/evtchn.h> | |
33684 | +#include <linux/mutex.h> | |
33685 | + | |
33686 | +/* drivers/xen/netback/common.h */ | |
33687 | +#include "common.h" | |
33688 | + | |
33689 | +#include "accel.h" | |
33690 | +#include "accel_solarflare.h" | |
33691 | +#include "accel_util.h" | |
33692 | + | |
33693 | +#define NODENAME_PATH_FMT "backend/vif/%d/%d" | |
33694 | + | |
33695 | +#define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \ | |
33696 | + ((struct backend_info *)(_dev)->dev.driver_data)->netback_accel_priv | |
33697 | + | |
33698 | +/* List of all the bends currently in existence. */ | |
33699 | +struct netback_accel *bend_list = NULL; | |
33700 | +DEFINE_MUTEX(bend_list_mutex); | |
33701 | + | |
33702 | +/* Put in bend_list. Must hold bend_list_mutex */ | |
33703 | +static void link_bend(struct netback_accel *bend) | |
33704 | +{ | |
33705 | + bend->next_bend = bend_list; | |
33706 | + bend_list = bend; | |
33707 | +} | |
33708 | + | |
33709 | +/* Remove from bend_list, Must hold bend_list_mutex */ | |
33710 | +static void unlink_bend(struct netback_accel *bend) | |
33711 | +{ | |
33712 | + struct netback_accel *tmp = bend_list; | |
33713 | + struct netback_accel *prev = NULL; | |
33714 | + while (tmp != NULL) { | |
33715 | + if (tmp == bend) { | |
33716 | + if (prev != NULL) | |
33717 | + prev->next_bend = bend->next_bend; | |
33718 | + else | |
33719 | + bend_list = bend->next_bend; | |
33720 | + return; | |
33721 | + } | |
33722 | + prev = tmp; | |
33723 | + tmp = tmp->next_bend; | |
33724 | + } | |
33725 | +} | |
33726 | + | |
33727 | + | |
33728 | +/* Demultiplex a message IRQ from the frontend driver. */ | |
33729 | +static irqreturn_t msgirq_from_frontend(int irq, void *context, | |
33730 | + struct pt_regs *unused) | |
33731 | +{ | |
33732 | + struct xenbus_device *dev = context; | |
33733 | + struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev); | |
33734 | + VPRINTK("irq %d from device %s\n", irq, dev->nodename); | |
33735 | + schedule_work(&bend->handle_msg); | |
33736 | + return IRQ_HANDLED; | |
33737 | +} | |
33738 | + | |
33739 | + | |
33740 | +/* | |
33741 | + * Demultiplex an IRQ from the frontend driver. This is never used | |
33742 | + * functionally, but we need it to pass to the bind function, and may | |
33743 | + * get called spuriously | |
33744 | + */ | |
33745 | +static irqreturn_t netirq_from_frontend(int irq, void *context, | |
33746 | + struct pt_regs *unused) | |
33747 | +{ | |
33748 | + VPRINTK("netirq %d from device %s\n", irq, | |
33749 | + ((struct xenbus_device *)context)->nodename); | |
33750 | + | |
33751 | + return IRQ_HANDLED; | |
33752 | +} | |
33753 | + | |
33754 | + | |
33755 | +/* Read the limits values of the xenbus structure. */ | |
33756 | +static | |
33757 | +void cfg_hw_quotas(struct xenbus_device *dev, struct netback_accel *bend) | |
33758 | +{ | |
33759 | + int err = xenbus_gather | |
33760 | + (XBT_NIL, dev->nodename, | |
33761 | + "limits/max-filters", "%d", &bend->quotas.max_filters, | |
33762 | + "limits/max-buf-pages", "%d", &bend->quotas.max_buf_pages, | |
33763 | + "limits/max-mcasts", "%d", &bend->quotas.max_mcasts, | |
33764 | + NULL); | |
33765 | + if (err) { | |
33766 | + /* | |
33767 | + * TODO what if they have previously been set by the | |
33768 | + * user? This will overwrite with defaults. Maybe | |
33769 | + * not what we want to do, but useful in startup | |
33770 | + * case | |
33771 | + */ | |
33772 | + DPRINTK("Failed to read quotas from xenbus, using defaults\n"); | |
33773 | + bend->quotas.max_filters = NETBACK_ACCEL_DEFAULT_MAX_FILTERS; | |
33774 | + bend->quotas.max_buf_pages = sfc_netback_max_pages; | |
33775 | + bend->quotas.max_mcasts = NETBACK_ACCEL_DEFAULT_MAX_MCASTS; | |
33776 | + } | |
33777 | + | |
33778 | + return; | |
33779 | +} | |
33780 | + | |
33781 | + | |
33782 | +static void bend_config_accel_change(struct xenbus_watch *watch, | |
33783 | + const char **vec, unsigned int len) | |
33784 | +{ | |
33785 | + struct netback_accel *bend; | |
33786 | + | |
33787 | + bend = container_of(watch, struct netback_accel, config_accel_watch); | |
33788 | + | |
33789 | + mutex_lock(&bend->bend_mutex); | |
33790 | + if (bend->config_accel_watch.node != NULL) { | |
33791 | + struct xenbus_device *dev = | |
33792 | + (struct xenbus_device *)bend->hdev_data; | |
33793 | + DPRINTK("Watch matched, got dev %p otherend %p\n", | |
33794 | + dev, dev->otherend); | |
33795 | + if(!xenbus_exists(XBT_NIL, watch->node, "")) { | |
33796 | + DPRINTK("Ignoring watch as otherend seems invalid\n"); | |
33797 | + goto out; | |
33798 | + } | |
33799 | + | |
33800 | + cfg_hw_quotas(dev, bend); | |
33801 | + } | |
33802 | + out: | |
33803 | + mutex_unlock(&bend->bend_mutex); | |
33804 | + return; | |
33805 | +} | |
33806 | + | |
33807 | + | |
33808 | +/* | |
33809 | + * Setup watch on "limits" in the backend vif info to know when | |
33810 | + * configuration has been set | |
33811 | + */ | |
33812 | +static int setup_config_accel_watch(struct xenbus_device *dev, | |
33813 | + struct netback_accel *bend) | |
33814 | +{ | |
33815 | + int err; | |
33816 | + | |
33817 | + VPRINTK("Setting watch on %s/%s\n", dev->nodename, "limits"); | |
33818 | + | |
33819 | + err = xenbus_watch_path2(dev, dev->nodename, "limits", | |
33820 | + &bend->config_accel_watch, | |
33821 | + bend_config_accel_change); | |
33822 | + | |
33823 | + if (err) { | |
33824 | + EPRINTK("%s: Failed to register xenbus watch: %d\n", | |
33825 | + __FUNCTION__, err); | |
33826 | + bend->config_accel_watch.node = NULL; | |
33827 | + return err; | |
33828 | + } | |
33829 | + return 0; | |
33830 | +} | |
33831 | + | |
33832 | + | |
33833 | +static int | |
33834 | +cfg_frontend_info(struct xenbus_device *dev, struct netback_accel *bend, | |
33835 | + int *grants) | |
33836 | +{ | |
33837 | + /* Get some info from xenbus on the event channel and shmem grant */ | |
33838 | + int err = xenbus_gather(XBT_NIL, dev->otherend, | |
33839 | + "accel-msg-channel", "%u", &bend->msg_channel, | |
33840 | + "accel-ctrl-page", "%d", &(grants[0]), | |
33841 | + "accel-msg-page", "%d", &(grants[1]), | |
33842 | + "accel-net-channel", "%u", &bend->net_channel, | |
33843 | + NULL); | |
33844 | + if (err) | |
33845 | + EPRINTK("failed to read event channels or shmem grant: %d\n", | |
33846 | + err); | |
33847 | + else | |
33848 | + DPRINTK("got event chan %d and net chan %d from frontend\n", | |
33849 | + bend->msg_channel, bend->net_channel); | |
33850 | + return err; | |
33851 | +} | |
33852 | + | |
33853 | + | |
33854 | +/* Setup all the comms needed to chat with the front end driver */ | |
33855 | +static int setup_vnic(struct xenbus_device *dev) | |
33856 | +{ | |
33857 | + struct netback_accel *bend; | |
33858 | + int grants[2], err, msgs_per_queue; | |
33859 | + | |
33860 | + bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev); | |
33861 | + | |
33862 | + err = cfg_frontend_info(dev, bend, grants); | |
33863 | + if (err) | |
33864 | + goto fail1; | |
33865 | + | |
33866 | + /* | |
33867 | + * If we get here, both frontend Connected and configuration | |
33868 | + * options available. All is well. | |
33869 | + */ | |
33870 | + | |
33871 | + /* Get the hardware quotas for the VNIC in question. */ | |
33872 | + cfg_hw_quotas(dev, bend); | |
33873 | + | |
33874 | + /* Set up the deferred work handlers */ | |
33875 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
33876 | + INIT_WORK(&bend->handle_msg, | |
33877 | + netback_accel_msg_rx_handler); | |
33878 | +#else | |
33879 | + INIT_WORK(&bend->handle_msg, | |
33880 | + netback_accel_msg_rx_handler, | |
33881 | + (void*)bend); | |
33882 | +#endif | |
33883 | + | |
33884 | + /* Request the frontend mac */ | |
33885 | + err = net_accel_xen_net_read_mac(dev, bend->mac); | |
33886 | + if (err) | |
33887 | + goto fail2; | |
33888 | + | |
33889 | + /* Set up the shared page. */ | |
33890 | + bend->shared_page = net_accel_map_grants_contig(dev, grants, 2, | |
33891 | + &bend->sh_pages_unmap); | |
33892 | + | |
33893 | + if (bend->shared_page == NULL) { | |
33894 | + EPRINTK("failed to map shared page for %s\n", dev->otherend); | |
33895 | + err = -ENOMEM; | |
33896 | + goto fail2; | |
33897 | + } | |
33898 | + | |
33899 | + /* Initialise the shared page(s) used for comms */ | |
33900 | + net_accel_msg_init_page(bend->shared_page, PAGE_SIZE, | |
33901 | + bend->net_dev->flags & IFF_UP); | |
33902 | + | |
33903 | + msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg); | |
33904 | + | |
33905 | + net_accel_msg_init_queue | |
33906 | + (&bend->to_domU, &bend->shared_page->queue0, | |
33907 | + (struct net_accel_msg *)((__u8*)bend->shared_page + PAGE_SIZE), | |
33908 | + msgs_per_queue); | |
33909 | + | |
33910 | + net_accel_msg_init_queue | |
33911 | + (&bend->from_domU, &bend->shared_page->queue1, | |
33912 | + (struct net_accel_msg *)((__u8*)bend->shared_page + | |
33913 | + (3 * PAGE_SIZE / 2)), | |
33914 | + msgs_per_queue); | |
33915 | + | |
33916 | + /* Bind the message event channel to a handler | |
33917 | + * | |
33918 | + * Note that we will probably get a spurious interrupt when we | |
33919 | + * do this, so it must not be done until we have set up | |
33920 | + * everything we need to handle it. | |
33921 | + */ | |
33922 | + err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id, | |
33923 | + bend->msg_channel, | |
33924 | + msgirq_from_frontend, | |
33925 | + 0, | |
33926 | + "netback_accel", | |
33927 | + dev); | |
33928 | + if (err < 0) { | |
33929 | + EPRINTK("failed to bind event channel: %d\n", err); | |
33930 | + goto fail3; | |
33931 | + } | |
33932 | + else | |
33933 | + bend->msg_channel_irq = err; | |
33934 | + | |
33935 | + /* TODO: No need to bind this evtchn to an irq. */ | |
33936 | + err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id, | |
33937 | + bend->net_channel, | |
33938 | + netirq_from_frontend, | |
33939 | + 0, | |
33940 | + "netback_accel", | |
33941 | + dev); | |
33942 | + if (err < 0) { | |
33943 | + EPRINTK("failed to bind net channel: %d\n", err); | |
33944 | + goto fail4; | |
33945 | + } | |
33946 | + else | |
33947 | + bend->net_channel_irq = err; | |
33948 | + | |
33949 | + /* | |
33950 | + * Grab ourselves an entry in the forwarding hash table. We do | |
33951 | + * this now so we don't have the embarassmesnt of sorting out | |
33952 | + * an allocation failure while at IRQ. Because we pass NULL as | |
33953 | + * the context, the actual hash lookup will succeed for this | |
33954 | + * NIC, but the check for somewhere to forward to will | |
33955 | + * fail. This is necessary to prevent forwarding before | |
33956 | + * hardware resources are set up | |
33957 | + */ | |
33958 | + err = netback_accel_fwd_add(bend->mac, NULL, bend->fwd_priv); | |
33959 | + if (err) { | |
33960 | + EPRINTK("failed to add to fwd hash table\n"); | |
33961 | + goto fail5; | |
33962 | + } | |
33963 | + | |
33964 | + /* | |
33965 | + * Say hello to frontend. Important to do this straight after | |
33966 | + * obtaining the message queue as otherwise we are vulnerable | |
33967 | + * to an evil frontend sending a HELLO-REPLY before we've sent | |
33968 | + * the HELLO and confusing us | |
33969 | + */ | |
33970 | + netback_accel_msg_tx_hello(bend, NET_ACCEL_MSG_VERSION); | |
33971 | + return 0; | |
33972 | + | |
33973 | + fail5: | |
33974 | + unbind_from_irqhandler(bend->net_channel_irq, dev); | |
33975 | + fail4: | |
33976 | + unbind_from_irqhandler(bend->msg_channel_irq, dev); | |
33977 | + fail3: | |
33978 | + net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap); | |
33979 | + bend->shared_page = NULL; | |
33980 | + bend->sh_pages_unmap = NULL; | |
33981 | + fail2: | |
33982 | + fail1: | |
33983 | + return err; | |
33984 | +} | |
33985 | + | |
33986 | + | |
33987 | +static int read_nicname(struct xenbus_device *dev, struct netback_accel *bend) | |
33988 | +{ | |
33989 | + int len; | |
33990 | + | |
33991 | + /* nic name used to select interface used for acceleration */ | |
33992 | + bend->nicname = xenbus_read(XBT_NIL, dev->nodename, "accel", &len); | |
33993 | + if (IS_ERR(bend->nicname)) | |
33994 | + return PTR_ERR(bend->nicname); | |
33995 | + | |
33996 | + return 0; | |
33997 | +} | |
33998 | + | |
33999 | +static const char *frontend_name = "sfc_netfront"; | |
34000 | + | |
34001 | +static int publish_frontend_name(struct xenbus_device *dev) | |
34002 | +{ | |
34003 | + struct xenbus_transaction tr; | |
34004 | + int err; | |
34005 | + | |
34006 | + /* Publish the name of the frontend driver */ | |
34007 | + do { | |
34008 | + err = xenbus_transaction_start(&tr); | |
34009 | + if (err != 0) { | |
34010 | + EPRINTK("%s: transaction start failed\n", __FUNCTION__); | |
34011 | + return err; | |
34012 | + } | |
34013 | + err = xenbus_printf(tr, dev->nodename, "accel-frontend", | |
34014 | + "%s", frontend_name); | |
34015 | + if (err != 0) { | |
34016 | + EPRINTK("%s: xenbus_printf failed\n", __FUNCTION__); | |
34017 | + xenbus_transaction_end(tr, 1); | |
34018 | + return err; | |
34019 | + } | |
34020 | + err = xenbus_transaction_end(tr, 0); | |
34021 | + } while (err == -EAGAIN); | |
34022 | + | |
34023 | + if (err != 0) { | |
34024 | + EPRINTK("failed to end frontend name transaction\n"); | |
34025 | + return err; | |
34026 | + } | |
34027 | + return 0; | |
34028 | +} | |
34029 | + | |
34030 | + | |
34031 | +static int unpublish_frontend_name(struct xenbus_device *dev) | |
34032 | +{ | |
34033 | + struct xenbus_transaction tr; | |
34034 | + int err; | |
34035 | + | |
34036 | + do { | |
34037 | + err = xenbus_transaction_start(&tr); | |
34038 | + if (err != 0) | |
34039 | + break; | |
34040 | + err = xenbus_rm(tr, dev->nodename, "accel-frontend"); | |
34041 | + if (err != 0) { | |
34042 | + xenbus_transaction_end(tr, 1); | |
34043 | + break; | |
34044 | + } | |
34045 | + err = xenbus_transaction_end(tr, 0); | |
34046 | + } while (err == -EAGAIN); | |
34047 | + | |
34048 | + return err; | |
34049 | +} | |
34050 | + | |
34051 | + | |
34052 | +static void cleanup_vnic(struct netback_accel *bend) | |
34053 | +{ | |
34054 | + struct xenbus_device *dev; | |
34055 | + | |
34056 | + dev = (struct xenbus_device *)bend->hdev_data; | |
34057 | + | |
34058 | + DPRINTK("%s: bend %p dev %p\n", __FUNCTION__, bend, dev); | |
34059 | + | |
34060 | + DPRINTK("%s: Remove %p's mac from fwd table...\n", | |
34061 | + __FUNCTION__, bend); | |
34062 | + netback_accel_fwd_remove(bend->mac, bend->fwd_priv); | |
34063 | + | |
34064 | + /* Free buffer table allocations */ | |
34065 | + netback_accel_remove_buffers(bend); | |
34066 | + | |
34067 | + DPRINTK("%s: Release hardware resources...\n", __FUNCTION__); | |
34068 | + if (bend->accel_shutdown) | |
34069 | + bend->accel_shutdown(bend); | |
34070 | + | |
34071 | + if (bend->net_channel_irq) { | |
34072 | + unbind_from_irqhandler(bend->net_channel_irq, dev); | |
34073 | + bend->net_channel_irq = 0; | |
34074 | + } | |
34075 | + | |
34076 | + if (bend->msg_channel_irq) { | |
34077 | + unbind_from_irqhandler(bend->msg_channel_irq, dev); | |
34078 | + bend->msg_channel_irq = 0; | |
34079 | + } | |
34080 | + | |
34081 | + if (bend->sh_pages_unmap) { | |
34082 | + DPRINTK("%s: Unmap grants %p\n", __FUNCTION__, | |
34083 | + bend->sh_pages_unmap); | |
34084 | + net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap); | |
34085 | + bend->sh_pages_unmap = NULL; | |
34086 | + bend->shared_page = NULL; | |
34087 | + } | |
34088 | +} | |
34089 | + | |
34090 | + | |
34091 | +/*************************************************************************/ | |
34092 | + | |
34093 | +/* | |
34094 | + * The following code handles accelstate changes between the frontend | |
34095 | + * and the backend. It calls setup_vnic and cleanup_vnic in matching | |
34096 | + * pairs in response to transitions. | |
34097 | + * | |
34098 | + * Valid state transitions for Dom0 are as follows: | |
34099 | + * | |
34100 | + * Closed->Init on probe or in response to Init from domU | |
34101 | + * Closed->Closing on error/remove | |
34102 | + * | |
34103 | + * Init->Connected in response to Connected from domU | |
34104 | + * Init->Closing on error/remove or in response to Closing from domU | |
34105 | + * | |
34106 | + * Connected->Closing on error/remove or in response to Closing from domU | |
34107 | + * | |
34108 | + * Closing->Closed in response to Closed from domU | |
34109 | + * | |
34110 | + */ | |
34111 | + | |
34112 | + | |
34113 | +static void netback_accel_frontend_changed(struct xenbus_device *dev, | |
34114 | + XenbusState frontend_state) | |
34115 | +{ | |
34116 | + struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev); | |
34117 | + XenbusState backend_state; | |
34118 | + | |
34119 | + DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n", | |
34120 | + __FUNCTION__, xenbus_strstate(bend->frontend_state), | |
34121 | + xenbus_strstate(frontend_state),dev->nodename, dev->otherend); | |
34122 | + | |
34123 | + /* | |
34124 | + * Ignore duplicate state changes. This can happen if the | |
34125 | + * frontend changes state twice in quick succession and the | |
34126 | + * first watch fires in the backend after the second | |
34127 | + * transition has completed. | |
34128 | + */ | |
34129 | + if (bend->frontend_state == frontend_state) | |
34130 | + return; | |
34131 | + | |
34132 | + bend->frontend_state = frontend_state; | |
34133 | + backend_state = bend->backend_state; | |
34134 | + | |
34135 | + switch (frontend_state) { | |
34136 | + case XenbusStateInitialising: | |
34137 | + if (backend_state == XenbusStateClosed && | |
34138 | + !bend->removing) | |
34139 | + backend_state = XenbusStateInitialising; | |
34140 | + break; | |
34141 | + | |
34142 | + case XenbusStateConnected: | |
34143 | + if (backend_state == XenbusStateInitialising) { | |
34144 | + if (!bend->vnic_is_setup && | |
34145 | + setup_vnic(dev) == 0) { | |
34146 | + bend->vnic_is_setup = 1; | |
34147 | + backend_state = XenbusStateConnected; | |
34148 | + } else { | |
34149 | + backend_state = XenbusStateClosing; | |
34150 | + } | |
34151 | + } | |
34152 | + break; | |
34153 | + | |
34154 | + case XenbusStateInitWait: | |
34155 | + case XenbusStateInitialised: | |
34156 | + default: | |
34157 | + DPRINTK("Unknown state %s (%d) from frontend.\n", | |
34158 | + xenbus_strstate(frontend_state), frontend_state); | |
34159 | + /* Unknown state. Fall through. */ | |
34160 | + case XenbusStateClosing: | |
34161 | + if (backend_state != XenbusStateClosed) | |
34162 | + backend_state = XenbusStateClosing; | |
34163 | + | |
34164 | + /* | |
34165 | + * The bend will now persist (with watches active) in | |
34166 | + * case the frontend comes back again, eg. after | |
34167 | + * frontend module reload or suspend/resume | |
34168 | + */ | |
34169 | + | |
34170 | + break; | |
34171 | + | |
34172 | + case XenbusStateUnknown: | |
34173 | + case XenbusStateClosed: | |
34174 | + if (bend->vnic_is_setup) { | |
34175 | + bend->vnic_is_setup = 0; | |
34176 | + cleanup_vnic(bend); | |
34177 | + } | |
34178 | + | |
34179 | + if (backend_state == XenbusStateClosing) | |
34180 | + backend_state = XenbusStateClosed; | |
34181 | + break; | |
34182 | + } | |
34183 | + | |
34184 | + if (backend_state != bend->backend_state) { | |
34185 | + DPRINTK("Switching from state %s (%d) to %s (%d)\n", | |
34186 | + xenbus_strstate(bend->backend_state), | |
34187 | + bend->backend_state, | |
34188 | + xenbus_strstate(backend_state), backend_state); | |
34189 | + bend->backend_state = backend_state; | |
34190 | + net_accel_update_state(dev, backend_state); | |
34191 | + } | |
34192 | + | |
34193 | + wake_up(&bend->state_wait_queue); | |
34194 | +} | |
34195 | + | |
34196 | + | |
34197 | +/* accelstate on the frontend's xenbus node has changed */ | |
34198 | +static void bend_domu_accel_change(struct xenbus_watch *watch, | |
34199 | + const char **vec, unsigned int len) | |
34200 | +{ | |
34201 | + int state; | |
34202 | + struct netback_accel *bend; | |
34203 | + | |
34204 | + bend = container_of(watch, struct netback_accel, domu_accel_watch); | |
34205 | + if (bend->domu_accel_watch.node != NULL) { | |
34206 | + struct xenbus_device *dev = | |
34207 | + (struct xenbus_device *)bend->hdev_data; | |
34208 | + VPRINTK("Watch matched, got dev %p otherend %p\n", | |
34209 | + dev, dev->otherend); | |
34210 | + /* | |
34211 | + * dev->otherend != NULL check to protect against | |
34212 | + * watch firing when domain goes away and we haven't | |
34213 | + * yet cleaned up | |
34214 | + */ | |
34215 | + if (!dev->otherend || | |
34216 | + !xenbus_exists(XBT_NIL, watch->node, "") || | |
34217 | + strncmp(dev->otherend, vec[XS_WATCH_PATH], | |
34218 | + strlen(dev->otherend))) { | |
34219 | + DPRINTK("Ignoring watch as otherend seems invalid\n"); | |
34220 | + return; | |
34221 | + } | |
34222 | + | |
34223 | + mutex_lock(&bend->bend_mutex); | |
34224 | + | |
34225 | + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", | |
34226 | + &state); | |
34227 | + netback_accel_frontend_changed(dev, state); | |
34228 | + | |
34229 | + mutex_unlock(&bend->bend_mutex); | |
34230 | + } | |
34231 | +} | |
34232 | + | |
34233 | +/* Setup watch on frontend's accelstate */ | |
34234 | +static int setup_domu_accel_watch(struct xenbus_device *dev, | |
34235 | + struct netback_accel *bend) | |
34236 | +{ | |
34237 | + int err; | |
34238 | + | |
34239 | + VPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate"); | |
34240 | + | |
34241 | + err = xenbus_watch_path2(dev, dev->otherend, "accelstate", | |
34242 | + &bend->domu_accel_watch, | |
34243 | + bend_domu_accel_change); | |
34244 | + if (err) { | |
34245 | + EPRINTK("%s: Failed to register xenbus watch: %d\n", | |
34246 | + __FUNCTION__, err); | |
34247 | + goto fail; | |
34248 | + } | |
34249 | + return 0; | |
34250 | + fail: | |
34251 | + bend->domu_accel_watch.node = NULL; | |
34252 | + return err; | |
34253 | +} | |
34254 | + | |
34255 | + | |
34256 | +int netback_accel_probe(struct xenbus_device *dev) | |
34257 | +{ | |
34258 | + struct netback_accel *bend; | |
34259 | + struct backend_info *binfo; | |
34260 | + int err; | |
34261 | + | |
34262 | + DPRINTK("%s: passed device %s\n", __FUNCTION__, dev->nodename); | |
34263 | + | |
34264 | + /* Allocate structure to store all our state... */ | |
34265 | + bend = kzalloc(sizeof(struct netback_accel), GFP_KERNEL); | |
34266 | + if (bend == NULL) { | |
34267 | + DPRINTK("%s: no memory for bend\n", __FUNCTION__); | |
34268 | + return -ENOMEM; | |
34269 | + } | |
34270 | + | |
34271 | + mutex_init(&bend->bend_mutex); | |
34272 | + | |
34273 | + mutex_lock(&bend->bend_mutex); | |
34274 | + | |
34275 | + /* ...and store it where we can get at it */ | |
34276 | + binfo = (struct backend_info *) dev->dev.driver_data; | |
34277 | + binfo->netback_accel_priv = bend; | |
34278 | + /* And vice-versa */ | |
34279 | + bend->hdev_data = dev; | |
34280 | + | |
34281 | + DPRINTK("%s: Adding bend %p to list\n", __FUNCTION__, bend); | |
34282 | + | |
34283 | + init_waitqueue_head(&bend->state_wait_queue); | |
34284 | + bend->vnic_is_setup = 0; | |
34285 | + bend->frontend_state = XenbusStateUnknown; | |
34286 | + bend->backend_state = XenbusStateClosed; | |
34287 | + bend->removing = 0; | |
34288 | + | |
34289 | + sscanf(dev->nodename, NODENAME_PATH_FMT, &bend->far_end, | |
34290 | + &bend->vif_num); | |
34291 | + | |
34292 | + err = read_nicname(dev, bend); | |
34293 | + if (err) { | |
34294 | + /* | |
34295 | + * Technically not an error, just means we're not | |
34296 | + * supposed to accelerate this | |
34297 | + */ | |
34298 | + DPRINTK("failed to get device name\n"); | |
34299 | + goto fail_nicname; | |
34300 | + } | |
34301 | + | |
34302 | + /* | |
34303 | + * Look up the device name in the list of NICs provided by | |
34304 | + * driverlink to get the hardware type. | |
34305 | + */ | |
34306 | + err = netback_accel_sf_hwtype(bend); | |
34307 | + if (err) { | |
34308 | + /* | |
34309 | + * Technically not an error, just means we're not | |
34310 | + * supposed to accelerate this, probably belongs to | |
34311 | + * some other backend | |
34312 | + */ | |
34313 | + DPRINTK("failed to match device name\n"); | |
34314 | + goto fail_init_type; | |
34315 | + } | |
34316 | + | |
34317 | + err = publish_frontend_name(dev); | |
34318 | + if (err) | |
34319 | + goto fail_publish; | |
34320 | + | |
34321 | + err = netback_accel_debugfs_create(bend); | |
34322 | + if (err) | |
34323 | + goto fail_debugfs; | |
34324 | + | |
34325 | + mutex_unlock(&bend->bend_mutex); | |
34326 | + | |
34327 | + err = setup_config_accel_watch(dev, bend); | |
34328 | + if (err) | |
34329 | + goto fail_config_watch; | |
34330 | + | |
34331 | + err = setup_domu_accel_watch(dev, bend); | |
34332 | + if (err) | |
34333 | + goto fail_domu_watch; | |
34334 | + | |
34335 | + /* | |
34336 | + * Indicate to the other end that we're ready to start unless | |
34337 | + * the watch has already fired. | |
34338 | + */ | |
34339 | + mutex_lock(&bend->bend_mutex); | |
34340 | + if (bend->backend_state == XenbusStateClosed) { | |
34341 | + bend->backend_state = XenbusStateInitialising; | |
34342 | + net_accel_update_state(dev, XenbusStateInitialising); | |
34343 | + } | |
34344 | + mutex_unlock(&bend->bend_mutex); | |
34345 | + | |
34346 | + mutex_lock(&bend_list_mutex); | |
34347 | + link_bend(bend); | |
34348 | + mutex_unlock(&bend_list_mutex); | |
34349 | + | |
34350 | + return 0; | |
34351 | + | |
34352 | +fail_domu_watch: | |
34353 | + | |
34354 | + unregister_xenbus_watch(&bend->config_accel_watch); | |
34355 | + kfree(bend->config_accel_watch.node); | |
34356 | +fail_config_watch: | |
34357 | + | |
34358 | + /* | |
34359 | + * Flush the scheduled work queue before freeing bend to get | |
34360 | + * rid of any pending netback_accel_msg_rx_handler() | |
34361 | + */ | |
34362 | + flush_scheduled_work(); | |
34363 | + | |
34364 | + mutex_lock(&bend->bend_mutex); | |
34365 | + net_accel_update_state(dev, XenbusStateUnknown); | |
34366 | + netback_accel_debugfs_remove(bend); | |
34367 | +fail_debugfs: | |
34368 | + | |
34369 | + unpublish_frontend_name(dev); | |
34370 | +fail_publish: | |
34371 | + | |
34372 | + /* No need to reverse netback_accel_sf_hwtype. */ | |
34373 | +fail_init_type: | |
34374 | + | |
34375 | + kfree(bend->nicname); | |
34376 | +fail_nicname: | |
34377 | + binfo->netback_accel_priv = NULL; | |
34378 | + mutex_unlock(&bend->bend_mutex); | |
34379 | + kfree(bend); | |
34380 | + return err; | |
34381 | +} | |
34382 | + | |
34383 | + | |
34384 | +int netback_accel_remove(struct xenbus_device *dev) | |
34385 | +{ | |
34386 | + struct backend_info *binfo; | |
34387 | + struct netback_accel *bend; | |
34388 | + int frontend_state; | |
34389 | + | |
34390 | + binfo = (struct backend_info *) dev->dev.driver_data; | |
34391 | + bend = (struct netback_accel *) binfo->netback_accel_priv; | |
34392 | + | |
34393 | + DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend); | |
34394 | + | |
34395 | + BUG_ON(bend == NULL); | |
34396 | + | |
34397 | + mutex_lock(&bend_list_mutex); | |
34398 | + unlink_bend(bend); | |
34399 | + mutex_unlock(&bend_list_mutex); | |
34400 | + | |
34401 | + mutex_lock(&bend->bend_mutex); | |
34402 | + | |
34403 | + /* Reject any requests to connect. */ | |
34404 | + bend->removing = 1; | |
34405 | + | |
34406 | + /* | |
34407 | + * Switch to closing to tell the other end that we're going | |
34408 | + * away. | |
34409 | + */ | |
34410 | + if (bend->backend_state != XenbusStateClosing) { | |
34411 | + bend->backend_state = XenbusStateClosing; | |
34412 | + net_accel_update_state(dev, XenbusStateClosing); | |
34413 | + } | |
34414 | + | |
34415 | + frontend_state = (int)XenbusStateUnknown; | |
34416 | + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", | |
34417 | + &frontend_state); | |
34418 | + | |
34419 | + mutex_unlock(&bend->bend_mutex); | |
34420 | + | |
34421 | + /* | |
34422 | + * Wait until this end goes to the closed state. This happens | |
34423 | + * in response to the other end going to the closed state. | |
34424 | + * Don't bother doing this if the other end is already closed | |
34425 | + * because if it is then there is nothing to do. | |
34426 | + */ | |
34427 | + if (frontend_state != (int)XenbusStateClosed && | |
34428 | + frontend_state != (int)XenbusStateUnknown) | |
34429 | + wait_event(bend->state_wait_queue, | |
34430 | + bend->backend_state == XenbusStateClosed); | |
34431 | + | |
34432 | + unregister_xenbus_watch(&bend->domu_accel_watch); | |
34433 | + kfree(bend->domu_accel_watch.node); | |
34434 | + | |
34435 | + unregister_xenbus_watch(&bend->config_accel_watch); | |
34436 | + kfree(bend->config_accel_watch.node); | |
34437 | + | |
34438 | + /* | |
34439 | + * Flush the scheduled work queue before freeing bend to get | |
34440 | + * rid of any pending netback_accel_msg_rx_handler() | |
34441 | + */ | |
34442 | + flush_scheduled_work(); | |
34443 | + | |
34444 | + mutex_lock(&bend->bend_mutex); | |
34445 | + | |
34446 | + /* Tear down the vnic if it was set up. */ | |
34447 | + if (bend->vnic_is_setup) { | |
34448 | + bend->vnic_is_setup = 0; | |
34449 | + cleanup_vnic(bend); | |
34450 | + } | |
34451 | + | |
34452 | + bend->backend_state = XenbusStateUnknown; | |
34453 | + net_accel_update_state(dev, XenbusStateUnknown); | |
34454 | + | |
34455 | + netback_accel_debugfs_remove(bend); | |
34456 | + | |
34457 | + unpublish_frontend_name(dev); | |
34458 | + | |
34459 | + kfree(bend->nicname); | |
34460 | + | |
34461 | + binfo->netback_accel_priv = NULL; | |
34462 | + | |
34463 | + mutex_unlock(&bend->bend_mutex); | |
34464 | + | |
34465 | + kfree(bend); | |
34466 | + | |
34467 | + return 0; | |
34468 | +} | |
34469 | + | |
34470 | + | |
34471 | +void netback_accel_shutdown_bends(void) | |
34472 | +{ | |
34473 | + mutex_lock(&bend_list_mutex); | |
34474 | + /* | |
34475 | + * I think we should have had a remove callback for all | |
34476 | + * interfaces before being allowed to unload the module | |
34477 | + */ | |
34478 | + BUG_ON(bend_list != NULL); | |
34479 | + mutex_unlock(&bend_list_mutex); | |
34480 | +} | |
34481 | + | |
34482 | + | |
34483 | +void netback_accel_set_closing(struct netback_accel *bend) | |
34484 | +{ | |
34485 | + | |
34486 | + bend->backend_state = XenbusStateClosing; | |
34487 | + net_accel_update_state((struct xenbus_device *)bend->hdev_data, | |
34488 | + XenbusStateClosing); | |
34489 | +} | |
34490 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat.h | |
34491 | =================================================================== | |
34492 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
34493 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat.h 2008-02-20 09:32:49.000000000 +0100 | |
34494 | @@ -0,0 +1,53 @@ | |
34495 | +/**************************************************************************** | |
34496 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
34497 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
34498 | + * 9501 Jeronimo Road, Suite 250, | |
34499 | + * Irvine, CA 92618, USA | |
34500 | + * | |
34501 | + * Maintained by Solarflare Communications | |
34502 | + * <linux-xen-drivers@solarflare.com> | |
34503 | + * <onload-dev@solarflare.com> | |
34504 | + * | |
34505 | + * This program is free software; you can redistribute it and/or modify it | |
34506 | + * under the terms of the GNU General Public License version 2 as published | |
34507 | + * by the Free Software Foundation, incorporated herein by reference. | |
34508 | + * | |
34509 | + * This program is distributed in the hope that it will be useful, | |
34510 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
34511 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
34512 | + * GNU General Public License for more details. | |
34513 | + * | |
34514 | + * You should have received a copy of the GNU General Public License | |
34515 | + * along with this program; if not, write to the Free Software | |
34516 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
34517 | + **************************************************************************** | |
34518 | + */ | |
34519 | + | |
34520 | +/* | |
34521 | + * \author djr | |
34522 | + * \brief Compatability layer. Provides definitions of fundamental | |
34523 | + * types and definitions that are used throughout CI source | |
34524 | + * code. It does not introduce any link time dependencies, | |
34525 | + * or include any unnecessary system headers. | |
34526 | + */ | |
34527 | +/*! \cidoxg_include_ci */ | |
34528 | + | |
34529 | +#ifndef __CI_COMPAT_H__ | |
34530 | +#define __CI_COMPAT_H__ | |
34531 | + | |
34532 | +#ifdef __cplusplus | |
34533 | +extern "C" { | |
34534 | +#endif | |
34535 | + | |
34536 | +#include <ci/compat/primitive.h> | |
34537 | +#include <ci/compat/sysdep.h> | |
34538 | +#include <ci/compat/utils.h> | |
34539 | + | |
34540 | + | |
34541 | +#ifdef __cplusplus | |
34542 | +} | |
34543 | +#endif | |
34544 | + | |
34545 | +#endif /* __CI_COMPAT_H__ */ | |
34546 | + | |
34547 | +/*! \cidoxg_end */ | |
34548 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc.h | |
34549 | =================================================================== | |
34550 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
34551 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc.h 2008-02-20 09:32:49.000000000 +0100 | |
34552 | @@ -0,0 +1,158 @@ | |
34553 | +/**************************************************************************** | |
34554 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
34555 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
34556 | + * 9501 Jeronimo Road, Suite 250, | |
34557 | + * Irvine, CA 92618, USA | |
34558 | + * | |
34559 | + * Maintained by Solarflare Communications | |
34560 | + * <linux-xen-drivers@solarflare.com> | |
34561 | + * <onload-dev@solarflare.com> | |
34562 | + * | |
34563 | + * This program is free software; you can redistribute it and/or modify it | |
34564 | + * under the terms of the GNU General Public License version 2 as published | |
34565 | + * by the Free Software Foundation, incorporated herein by reference. | |
34566 | + * | |
34567 | + * This program is distributed in the hope that it will be useful, | |
34568 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
34569 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
34570 | + * GNU General Public License for more details. | |
34571 | + * | |
34572 | + * You should have received a copy of the GNU General Public License | |
34573 | + * along with this program; if not, write to the Free Software | |
34574 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
34575 | + **************************************************************************** | |
34576 | + */ | |
34577 | + | |
34578 | +/*! \cidoxg_include_ci_compat */ | |
34579 | + | |
34580 | +#ifndef __CI_COMPAT_GCC_H__ | |
34581 | +#define __CI_COMPAT_GCC_H__ | |
34582 | + | |
34583 | + | |
34584 | +#define CI_HAVE_INT64 | |
34585 | + | |
34586 | + | |
34587 | +#if defined(__linux__) && defined(__KERNEL__) | |
34588 | + | |
34589 | +# include <linux/types.h> | |
34590 | + | |
34591 | +typedef __u64 ci_uint64; | |
34592 | +typedef __s64 ci_int64; | |
34593 | +# if BITS_PER_LONG == 32 | |
34594 | +typedef __s32 ci_ptr_arith_t; | |
34595 | +typedef __u32 ci_uintptr_t; | |
34596 | +# else | |
34597 | +typedef __s64 ci_ptr_arith_t; | |
34598 | +typedef __u64 ci_uintptr_t; | |
34599 | +# endif | |
34600 | + | |
34601 | + | |
34602 | +/* it's not obvious to me why the below is wrong for x64_64, but | |
34603 | + * gcc seems to complain on this platform | |
34604 | + */ | |
34605 | +# if defined(__ia64__) | |
34606 | +# define CI_PRId64 "ld" | |
34607 | +# define CI_PRIi64 "li" | |
34608 | +# define CI_PRIo64 "lo" | |
34609 | +# define CI_PRIu64 "lu" | |
34610 | +# define CI_PRIx64 "lx" | |
34611 | +# define CI_PRIX64 "lX" | |
34612 | +# else | |
34613 | +# define CI_PRId64 "lld" | |
34614 | +# define CI_PRIi64 "lli" | |
34615 | +# define CI_PRIo64 "llo" | |
34616 | +# define CI_PRIu64 "llu" | |
34617 | +# define CI_PRIx64 "llx" | |
34618 | +# define CI_PRIX64 "llX" | |
34619 | +# endif | |
34620 | + | |
34621 | +# define CI_PRId32 "d" | |
34622 | +# define CI_PRIi32 "i" | |
34623 | +# define CI_PRIo32 "o" | |
34624 | +# define CI_PRIu32 "u" | |
34625 | +# define CI_PRIx32 "x" | |
34626 | +# define CI_PRIX32 "X" | |
34627 | + | |
34628 | +#else | |
34629 | + | |
34630 | +# include <stdint.h> | |
34631 | +# include <inttypes.h> | |
34632 | + | |
34633 | +typedef uint64_t ci_uint64; | |
34634 | +typedef int64_t ci_int64; | |
34635 | +typedef intptr_t ci_ptr_arith_t; | |
34636 | +typedef uintptr_t ci_uintptr_t; | |
34637 | + | |
34638 | +# define CI_PRId64 PRId64 | |
34639 | +# define CI_PRIi64 PRIi64 | |
34640 | +# define CI_PRIo64 PRIo64 | |
34641 | +# define CI_PRIu64 PRIu64 | |
34642 | +# define CI_PRIx64 PRIx64 | |
34643 | +# define CI_PRIX64 PRIX64 | |
34644 | + | |
34645 | +# define CI_PRId32 PRId32 | |
34646 | +# define CI_PRIi32 PRIi32 | |
34647 | +# define CI_PRIo32 PRIo32 | |
34648 | +# define CI_PRIu32 PRIu32 | |
34649 | +# define CI_PRIx32 PRIx32 | |
34650 | +# define CI_PRIX32 PRIX32 | |
34651 | + | |
34652 | +#endif | |
34653 | + | |
34654 | + | |
34655 | +typedef ci_uint64 ci_fixed_descriptor_t; | |
34656 | + | |
34657 | +#define from_fixed_descriptor(desc) ((ci_uintptr_t)(desc)) | |
34658 | +#define to_fixed_descriptor(desc) ((ci_fixed_descriptor_t)(ci_uintptr_t)(desc)) | |
34659 | + | |
34660 | + | |
34661 | +#if __GNUC__ >= 3 && !defined(__cplusplus) | |
34662 | +/* | |
34663 | +** Checks that [p_mbr] has the same type as [&c_type::mbr_name]. | |
34664 | +*/ | |
34665 | +# define CI_CONTAINER(c_type, mbr_name, p_mbr) \ | |
34666 | + __builtin_choose_expr( \ | |
34667 | + __builtin_types_compatible_p(__typeof__(&((c_type*)0)->mbr_name), \ | |
34668 | + __typeof__(p_mbr)), \ | |
34669 | + __CI_CONTAINER(c_type, mbr_name, p_mbr), (void)0) | |
34670 | + | |
34671 | +# define ci_restrict __restrict__ | |
34672 | +#endif | |
34673 | + | |
34674 | + | |
34675 | +#if !defined(__KERNEL__) || defined(__unix__) | |
34676 | +#define CI_HAVE_NPRINTF 1 | |
34677 | +#endif | |
34678 | + | |
34679 | + | |
34680 | +/* At what version was this introduced? */ | |
34681 | +#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91) | |
34682 | +# define CI_LIKELY(t) __builtin_expect((t), 1) | |
34683 | +# define CI_UNLIKELY(t) __builtin_expect((t), 0) | |
34684 | +#endif | |
34685 | + | |
34686 | +/********************************************************************** | |
34687 | + * Attributes | |
34688 | + */ | |
34689 | +#if __GNUC__ >= 3 && defined(NDEBUG) | |
34690 | +# define CI_HF __attribute__((visibility("hidden"))) | |
34691 | +# define CI_HV __attribute__((visibility("hidden"))) | |
34692 | +#else | |
34693 | +# define CI_HF | |
34694 | +# define CI_HV | |
34695 | +#endif | |
34696 | + | |
34697 | +#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) | |
34698 | +# define ci_noinline static __attribute__((__noinline__)) | |
34699 | +/* (Linux 2.6 defines its own "noinline", so we use the "__noinline__" form) */ | |
34700 | +#else | |
34701 | +# define ci_noinline static | |
34702 | +#endif | |
34703 | + | |
34704 | +#define CI_ALIGN(x) __attribute__ ((aligned (x))) | |
34705 | + | |
34706 | +#define CI_PRINTF_LIKE(a,b) __attribute__((format(printf,a,b))) | |
34707 | + | |
34708 | +#endif /* __CI_COMPAT_GCC_H__ */ | |
34709 | + | |
34710 | +/*! \cidoxg_end */ | |
34711 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc_x86.h | |
34712 | =================================================================== | |
34713 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
34714 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/gcc_x86.h 2008-02-20 09:32:49.000000000 +0100 | |
34715 | @@ -0,0 +1,115 @@ | |
34716 | +/**************************************************************************** | |
34717 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
34718 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
34719 | + * 9501 Jeronimo Road, Suite 250, | |
34720 | + * Irvine, CA 92618, USA | |
34721 | + * | |
34722 | + * Maintained by Solarflare Communications | |
34723 | + * <linux-xen-drivers@solarflare.com> | |
34724 | + * <onload-dev@solarflare.com> | |
34725 | + * | |
34726 | + * This program is free software; you can redistribute it and/or modify it | |
34727 | + * under the terms of the GNU General Public License version 2 as published | |
34728 | + * by the Free Software Foundation, incorporated herein by reference. | |
34729 | + * | |
34730 | + * This program is distributed in the hope that it will be useful, | |
34731 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
34732 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
34733 | + * GNU General Public License for more details. | |
34734 | + * | |
34735 | + * You should have received a copy of the GNU General Public License | |
34736 | + * along with this program; if not, write to the Free Software | |
34737 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
34738 | + **************************************************************************** | |
34739 | + */ | |
34740 | + | |
34741 | +/*! \cidoxg_include_ci_compat */ | |
34742 | + | |
34743 | +#ifndef __CI_COMPAT_GCC_X86_H__ | |
34744 | +#define __CI_COMPAT_GCC_X86_H__ | |
34745 | + | |
34746 | +/* | |
34747 | +** The facts: | |
34748 | +** | |
34749 | +** SSE sfence | |
34750 | +** SSE2 lfence, mfence, pause | |
34751 | +*/ | |
34752 | + | |
34753 | +/* | |
34754 | + Barriers to enforce ordering with respect to: | |
34755 | + | |
34756 | + normal memory use: ci_wmb, ci_rmb, ci_wmb | |
34757 | + IO bus access use: ci_wiob, ci_riob, ci_iob | |
34758 | +*/ | |
34759 | +#if defined(__x86_64__) | |
34760 | +# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") | |
34761 | +#else | |
34762 | +# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") | |
34763 | +#endif | |
34764 | + | |
34765 | +/* ?? measure the impact of latency of sfence on a modern processor before we | |
34766 | + take a decision on how to integrate with respect to writecombining */ | |
34767 | + | |
34768 | +/* DJR: I don't think we need to add "memory" here. It means the asm does | |
34769 | +** something to memory that GCC doesn't understand. But all this does is | |
34770 | +** commit changes that GCC thinks have already happened. NB. GCC will not | |
34771 | +** reorder across a __volatile__ __asm__ anyway. | |
34772 | +*/ | |
34773 | +#define ci_gcc_fence() __asm__ __volatile__ ("") | |
34774 | + | |
34775 | +#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) | |
34776 | +# define ci_x86_sfence() __asm__ __volatile__ ("sfence") | |
34777 | +# define ci_x86_lfence() __asm__ __volatile__ ("lfence") | |
34778 | +# define ci_x86_mfence() __asm__ __volatile__ ("mfence") | |
34779 | +#else | |
34780 | +# define ci_x86_sfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8") | |
34781 | +# define ci_x86_lfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xE8") | |
34782 | +# define ci_x86_mfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF0") | |
34783 | +#endif | |
34784 | + | |
34785 | + | |
34786 | +/* x86 processors to P4 Xeon store in-order unless executing streaming | |
34787 | + extensions or when using writecombining | |
34788 | + | |
34789 | + Hence we do not define ci_wmb to use sfence by default. Requirement is that | |
34790 | + we do not use writecombining to memory and any code which uses SSE | |
34791 | + extensions must call sfence directly | |
34792 | + | |
34793 | + We need to track non intel clones which may support out of order store. | |
34794 | + | |
34795 | +*/ | |
34796 | + | |
34797 | +#if CI_CPU_OOS | |
34798 | +# if CI_CPU_HAS_SSE | |
34799 | +# define ci_wmb() ci_x86_sfence() | |
34800 | +# else | |
34801 | +# define ci_wmb() ci_x86_mb() | |
34802 | +# endif | |
34803 | +#else | |
34804 | +# define ci_wmb() ci_gcc_fence() | |
34805 | +#endif | |
34806 | + | |
34807 | +#if CI_CPU_HAS_SSE2 | |
34808 | +# define ci_rmb() ci_x86_lfence() | |
34809 | +# define ci_mb() ci_x86_mfence() | |
34810 | +# define ci_riob() ci_x86_lfence() | |
34811 | +# define ci_wiob() ci_x86_sfence() | |
34812 | +# define ci_iob() ci_x86_mfence() | |
34813 | +#else | |
34814 | +# if CI_CPU_HAS_SSE | |
34815 | +# define ci_wiob() ci_x86_sfence() | |
34816 | +# else | |
34817 | +# define ci_wiob() ci_x86_mb() | |
34818 | +# endif | |
34819 | +# define ci_rmb() ci_x86_mb() | |
34820 | +# define ci_mb() ci_x86_mb() | |
34821 | +# define ci_riob() ci_x86_mb() | |
34822 | +# define ci_iob() ci_x86_mb() | |
34823 | +#endif | |
34824 | + | |
34825 | +typedef unsigned long ci_phys_addr_t; | |
34826 | +#define ci_phys_addr_fmt "%lx" | |
34827 | + | |
34828 | +#endif /* __CI_COMPAT_GCC_X86_H__ */ | |
34829 | + | |
34830 | +/*! \cidoxg_end */ | |
34831 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/primitive.h | |
34832 | =================================================================== | |
34833 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
34834 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/primitive.h 2008-02-20 09:32:49.000000000 +0100 | |
34835 | @@ -0,0 +1,77 @@ | |
34836 | +/**************************************************************************** | |
34837 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
34838 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
34839 | + * 9501 Jeronimo Road, Suite 250, | |
34840 | + * Irvine, CA 92618, USA | |
34841 | + * | |
34842 | + * Maintained by Solarflare Communications | |
34843 | + * <linux-xen-drivers@solarflare.com> | |
34844 | + * <onload-dev@solarflare.com> | |
34845 | + * | |
34846 | + * This program is free software; you can redistribute it and/or modify it | |
34847 | + * under the terms of the GNU General Public License version 2 as published | |
34848 | + * by the Free Software Foundation, incorporated herein by reference. | |
34849 | + * | |
34850 | + * This program is distributed in the hope that it will be useful, | |
34851 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
34852 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
34853 | + * GNU General Public License for more details. | |
34854 | + * | |
34855 | + * You should have received a copy of the GNU General Public License | |
34856 | + * along with this program; if not, write to the Free Software | |
34857 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
34858 | + **************************************************************************** | |
34859 | + */ | |
34860 | +/*! \cidoxg_include_ci_compat */ | |
34861 | + | |
34862 | +#ifndef __CI_COMPAT_PRIMITIVE_H__ | |
34863 | +#define __CI_COMPAT_PRIMITIVE_H__ | |
34864 | + | |
34865 | + | |
34866 | +/********************************************************************** | |
34867 | + * Primitive types. | |
34868 | + */ | |
34869 | + | |
34870 | +typedef unsigned char ci_uint8; | |
34871 | +typedef char ci_int8; | |
34872 | + | |
34873 | +typedef unsigned short ci_uint16; | |
34874 | +typedef short ci_int16; | |
34875 | + | |
34876 | +typedef unsigned int ci_uint32; | |
34877 | +typedef int ci_int32; | |
34878 | + | |
34879 | +/* 64-bit support is platform dependent. */ | |
34880 | + | |
34881 | + | |
34882 | +/********************************************************************** | |
34883 | + * Other fancy types. | |
34884 | + */ | |
34885 | + | |
34886 | +typedef ci_uint8 ci_octet; | |
34887 | + | |
34888 | +typedef enum { | |
34889 | + CI_FALSE = 0, | |
34890 | + CI_TRUE | |
34891 | +} ci_boolean_t; | |
34892 | + | |
34893 | + | |
34894 | +/********************************************************************** | |
34895 | + * Some nice types you'd always assumed were standards. | |
34896 | + * (Really, they are SYSV "standards".) | |
34897 | + */ | |
34898 | + | |
34899 | +#ifdef _WIN32 | |
34900 | +typedef unsigned long ulong; | |
34901 | +typedef unsigned int uint; | |
34902 | +typedef char* caddr_t; | |
34903 | +#elif defined(__linux__) && defined(__KERNEL__) | |
34904 | +#include <linux/types.h> | |
34905 | +#elif defined(__linux__) | |
34906 | +#include <sys/types.h> | |
34907 | +#endif | |
34908 | + | |
34909 | + | |
34910 | +#endif /* __CI_COMPAT_PRIMITIVE_H__ */ | |
34911 | + | |
34912 | +/*! \cidoxg_end */ | |
34913 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/sysdep.h | |
34914 | =================================================================== | |
34915 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
34916 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/sysdep.h 2008-02-20 09:32:49.000000000 +0100 | |
34917 | @@ -0,0 +1,166 @@ | |
34918 | +/**************************************************************************** | |
34919 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
34920 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
34921 | + * 9501 Jeronimo Road, Suite 250, | |
34922 | + * Irvine, CA 92618, USA | |
34923 | + * | |
34924 | + * Maintained by Solarflare Communications | |
34925 | + * <linux-xen-drivers@solarflare.com> | |
34926 | + * <onload-dev@solarflare.com> | |
34927 | + * | |
34928 | + * This program is free software; you can redistribute it and/or modify it | |
34929 | + * under the terms of the GNU General Public License version 2 as published | |
34930 | + * by the Free Software Foundation, incorporated herein by reference. | |
34931 | + * | |
34932 | + * This program is distributed in the hope that it will be useful, | |
34933 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
34934 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
34935 | + * GNU General Public License for more details. | |
34936 | + * | |
34937 | + * You should have received a copy of the GNU General Public License | |
34938 | + * along with this program; if not, write to the Free Software | |
34939 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
34940 | + **************************************************************************** | |
34941 | + */ | |
34942 | + | |
34943 | +/*! \cidoxg_include_ci_compat */ | |
34944 | + | |
34945 | +#ifndef __CI_COMPAT_SYSDEP_H__ | |
34946 | +#define __CI_COMPAT_SYSDEP_H__ | |
34947 | + | |
34948 | + | |
34949 | +/********************************************************************** | |
34950 | + * Platform definition fixups. | |
34951 | + */ | |
34952 | + | |
34953 | +#if defined(__ci_ul_driver__) && !defined(__ci_driver__) | |
34954 | +# define __ci_driver__ | |
34955 | +#endif | |
34956 | + | |
34957 | +#if defined(__ci_driver__) && !defined(__ci_ul_driver__) && \ | |
34958 | + !defined(__KERNEL__) | |
34959 | +# define __KERNEL__ | |
34960 | +#endif | |
34961 | + | |
34962 | + | |
34963 | +/********************************************************************** | |
34964 | + * Sanity checks (no cheating!) | |
34965 | + */ | |
34966 | + | |
34967 | +#if defined(__KERNEL__) && !defined(__ci_driver__) | |
34968 | +# error Insane. | |
34969 | +#endif | |
34970 | + | |
34971 | +#if defined(__KERNEL__) && defined(__ci_ul_driver__) | |
34972 | +# error Madness. | |
34973 | +#endif | |
34974 | + | |
34975 | +#if defined(__unix__) && defined(_WIN32) | |
34976 | +# error Strange. | |
34977 | +#endif | |
34978 | + | |
34979 | +#if defined(__GNUC__) && defined(_MSC_VER) | |
34980 | +# error Crazy. | |
34981 | +#endif | |
34982 | + | |
34983 | + | |
34984 | +/********************************************************************** | |
34985 | + * Compiler and processor dependencies. | |
34986 | + */ | |
34987 | + | |
34988 | +#if defined(__GNUC__) | |
34989 | + | |
34990 | +# include <ci/compat/gcc.h> | |
34991 | + | |
34992 | +# if defined(__i386__) | |
34993 | +# include <ci/compat/x86.h> | |
34994 | +# include <ci/compat/gcc_x86.h> | |
34995 | +# elif defined(__x86_64__) | |
34996 | +# include <ci/compat/x86_64.h> | |
34997 | +# include <ci/compat/gcc_x86.h> | |
34998 | +# elif defined(__PPC__) | |
34999 | +# include <ci/compat/ppc.h> | |
35000 | +# include <ci/compat/gcc_ppc.h> | |
35001 | +# elif defined(__ia64__) | |
35002 | +# include <ci/compat/ia64.h> | |
35003 | +# include <ci/compat/gcc_ia64.h> | |
35004 | +# else | |
35005 | +# error Unknown processor - GNU C | |
35006 | +# endif | |
35007 | + | |
35008 | +#elif defined(_MSC_VER) | |
35009 | + | |
35010 | +# include <ci/compat/msvc.h> | |
35011 | + | |
35012 | +# if defined(__i386__) | |
35013 | +# include <ci/compat/x86.h> | |
35014 | +# include <ci/compat/msvc_x86.h> | |
35015 | +# elif defined(__x86_64__) | |
35016 | +# include <ci/compat/x86_64.h> | |
35017 | +# include <ci/compat/msvc_x86_64.h> | |
35018 | +# else | |
35019 | +# error Unknown processor MSC | |
35020 | +# endif | |
35021 | + | |
35022 | +#elif defined(__PGI) | |
35023 | + | |
35024 | +# include <ci/compat/x86.h> | |
35025 | +# include <ci/compat/pg_x86.h> | |
35026 | + | |
35027 | +#elif defined(__INTEL_COMPILER) | |
35028 | + | |
35029 | +/* Intel compilers v7 claim to be very gcc compatible. */ | |
35030 | +# if __INTEL_COMPILER >= 700 | |
35031 | +# include <ci/compat/gcc.h> | |
35032 | +# include <ci/compat/x86.h> | |
35033 | +# include <ci/compat/gcc_x86.h> | |
35034 | +# else | |
35035 | +# error Old Intel compiler not supported. Yet. | |
35036 | +# endif | |
35037 | + | |
35038 | +#else | |
35039 | +# error Unknown compiler. | |
35040 | +#endif | |
35041 | + | |
35042 | + | |
35043 | +/********************************************************************** | |
35044 | + * Misc stuff (that probably shouldn't be here). | |
35045 | + */ | |
35046 | + | |
35047 | +#ifdef __sun | |
35048 | +# ifdef __KERNEL__ | |
35049 | +# define _KERNEL | |
35050 | +# define _SYSCALL32 | |
35051 | +# ifdef _LP64 | |
35052 | +# define _SYSCALL32_IMPL | |
35053 | +# endif | |
35054 | +# else | |
35055 | +# define _REENTRANT | |
35056 | +# endif | |
35057 | +#endif | |
35058 | + | |
35059 | + | |
35060 | +/********************************************************************** | |
35061 | + * Defaults for anything left undefined. | |
35062 | + */ | |
35063 | + | |
35064 | +#ifndef CI_LIKELY | |
35065 | +# define CI_LIKELY(t) (t) | |
35066 | +# define CI_UNLIKELY(t) (t) | |
35067 | +#endif | |
35068 | + | |
35069 | +#ifndef ci_restrict | |
35070 | +# define ci_restrict | |
35071 | +#endif | |
35072 | + | |
35073 | +#ifndef ci_inline | |
35074 | +# define ci_inline static inline | |
35075 | +#endif | |
35076 | + | |
35077 | +#ifndef ci_noinline | |
35078 | +# define ci_noinline static | |
35079 | +#endif | |
35080 | + | |
35081 | +#endif /* __CI_COMPAT_SYSDEP_H__ */ | |
35082 | + | |
35083 | +/*! \cidoxg_end */ | |
35084 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/utils.h | |
35085 | =================================================================== | |
35086 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
35087 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/utils.h 2008-02-20 09:32:49.000000000 +0100 | |
35088 | @@ -0,0 +1,269 @@ | |
35089 | +/**************************************************************************** | |
35090 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
35091 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
35092 | + * 9501 Jeronimo Road, Suite 250, | |
35093 | + * Irvine, CA 92618, USA | |
35094 | + * | |
35095 | + * Maintained by Solarflare Communications | |
35096 | + * <linux-xen-drivers@solarflare.com> | |
35097 | + * <onload-dev@solarflare.com> | |
35098 | + * | |
35099 | + * This program is free software; you can redistribute it and/or modify it | |
35100 | + * under the terms of the GNU General Public License version 2 as published | |
35101 | + * by the Free Software Foundation, incorporated herein by reference. | |
35102 | + * | |
35103 | + * This program is distributed in the hope that it will be useful, | |
35104 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35105 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
35106 | + * GNU General Public License for more details. | |
35107 | + * | |
35108 | + * You should have received a copy of the GNU General Public License | |
35109 | + * along with this program; if not, write to the Free Software | |
35110 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
35111 | + **************************************************************************** | |
35112 | + */ | |
35113 | + | |
35114 | +/* | |
35115 | + * \author djr | |
35116 | + * \brief Handy utility macros. | |
35117 | + * \date 2003/01/17 | |
35118 | + */ | |
35119 | + | |
35120 | +/*! \cidoxg_include_ci_compat */ | |
35121 | + | |
35122 | +#ifndef __CI_COMPAT_UTILS_H__ | |
35123 | +#define __CI_COMPAT_UTILS_H__ | |
35124 | + | |
35125 | + | |
35126 | +/********************************************************************** | |
35127 | + * Alignment -- [align] must be a power of 2. | |
35128 | + **********************************************************************/ | |
35129 | + | |
35130 | + /*! Align forward onto next boundary. */ | |
35131 | + | |
35132 | +#define CI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u)) | |
35133 | + | |
35134 | + | |
35135 | + /*! Align back onto prev boundary. */ | |
35136 | + | |
35137 | +#define CI_ALIGN_BACK(p, align) ((p) & ~((align)-1u)) | |
35138 | + | |
35139 | + | |
35140 | + /*! How far to next boundary? */ | |
35141 | + | |
35142 | +#define CI_ALIGN_NEEDED(p, align, signed_t) (-(signed_t)(p) & ((align)-1u)) | |
35143 | + | |
35144 | + | |
35145 | + /*! How far beyond prev boundary? */ | |
35146 | + | |
35147 | +#define CI_OFFSET(p, align) ((p) & ((align)-1u)) | |
35148 | + | |
35149 | + | |
35150 | + /*! Does object fit in gap before next boundary? */ | |
35151 | + | |
35152 | +#define CI_FITS(p, size, align, signed_t) \ | |
35153 | + (CI_ALIGN_NEEDED((p) + 1, (align), signed_t) + 1 >= (size)) | |
35154 | + | |
35155 | + | |
35156 | + /*! Align forward onto next boundary. */ | |
35157 | + | |
35158 | +#define CI_PTR_ALIGN_FWD(p, align) \ | |
35159 | + ((char*) CI_ALIGN_FWD(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))) | |
35160 | + | |
35161 | + /*! Align back onto prev boundary. */ | |
35162 | + | |
35163 | +#define CI_PTR_ALIGN_BACK(p, align) \ | |
35164 | + ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))) | |
35165 | + | |
35166 | + /*! How far to next boundary? */ | |
35167 | + | |
35168 | +#define CI_PTR_ALIGN_NEEDED(p, align) \ | |
35169 | + CI_ALIGN_NEEDED(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)), \ | |
35170 | + ci_ptr_arith_t) | |
35171 | + | |
35172 | + /*! How far to next boundary? NZ = not zero i.e. give align if on boundary */ | |
35173 | + | |
35174 | +#define CI_PTR_ALIGN_NEEDED_NZ(p, align) \ | |
35175 | + ((align) - (((char*)p) - \ | |
35176 | + ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))))) | |
35177 | + | |
35178 | + /*! How far beyond prev boundary? */ | |
35179 | + | |
35180 | +#define CI_PTR_OFFSET(p, align) \ | |
35181 | + CI_OFFSET(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))) | |
35182 | + | |
35183 | + | |
35184 | + /* Same as CI_ALIGN_FWD and CI_ALIGN_BACK. */ | |
35185 | + | |
35186 | +#define CI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u)) | |
35187 | + | |
35188 | +#define CI_ROUND_DOWN(i, align) ((i) & ~((align)-1u)) | |
35189 | + | |
35190 | + | |
35191 | +/********************************************************************** | |
35192 | + * Byte-order | |
35193 | + **********************************************************************/ | |
35194 | + | |
35195 | +/* These are not flags. They are enumeration values for use with | |
35196 | + * CI_MY_BYTE_ORDER. */ | |
35197 | +#define CI_BIG_ENDIAN 1 | |
35198 | +#define CI_LITTLE_ENDIAN 0 | |
35199 | + | |
35200 | +/* | |
35201 | +** Note that these byte-swapping primitives may leave junk in bits above | |
35202 | +** the range they operate on. | |
35203 | +** | |
35204 | +** The CI_BSWAP_nn() routines require that bits above [nn] are zero. Use | |
35205 | +** CI_BSWAPM_nn(x) if this cannot be guaranteed. | |
35206 | +*/ | |
35207 | + | |
35208 | +/* ?? May be able to improve on some of these with inline assembler on some | |
35209 | +** platforms. | |
35210 | +*/ | |
35211 | + | |
35212 | +#define CI_BSWAP_16(v) ((((v) & 0xff) << 8) | ((v) >> 8)) | |
35213 | +#define CI_BSWAPM_16(v) ((((v) & 0xff) << 8) | (((v) & 0xff00) >> 8)) | |
35214 | + | |
35215 | +#define CI_BSWAP_32(v) (((v) >> 24) | \ | |
35216 | + (((v) & 0x00ff0000) >> 8) | \ | |
35217 | + (((v) & 0x0000ff00) << 8) | \ | |
35218 | + ((v) << 24)) | |
35219 | +#define CI_BSWAPM_32(v) ((((v) & 0xff000000) >> 24) | \ | |
35220 | + (((v) & 0x00ff0000) >> 8) | \ | |
35221 | + (((v) & 0x0000ff00) << 8) | \ | |
35222 | + ((v) << 24)) | |
35223 | + | |
35224 | +#define CI_BSWAP_64(v) (((v) >> 56) | \ | |
35225 | + (((v) & 0x00ff000000000000) >> 40) | \ | |
35226 | + (((v) & 0x0000ff0000000000) >> 24) | \ | |
35227 | + (((v) & 0x000000ff00000000) >> 8) | \ | |
35228 | + (((v) & 0x00000000ff000000) << 8) | \ | |
35229 | + (((v) & 0x0000000000ff0000) << 24) | \ | |
35230 | + (((v) & 0x000000000000ff00) << 40) | \ | |
35231 | + ((v) << 56)) | |
35232 | + | |
35233 | +# define CI_BSWAPPED_16_IF(c,v) ((c) ? CI_BSWAP_16(v) : (v)) | |
35234 | +# define CI_BSWAPPED_32_IF(c,v) ((c) ? CI_BSWAP_32(v) : (v)) | |
35235 | +# define CI_BSWAPPED_64_IF(c,v) ((c) ? CI_BSWAP_64(v) : (v)) | |
35236 | +# define CI_BSWAP_16_IF(c,v) do{ if((c)) (v) = CI_BSWAP_16(v); }while(0) | |
35237 | +# define CI_BSWAP_32_IF(c,v) do{ if((c)) (v) = CI_BSWAP_32(v); }while(0) | |
35238 | +# define CI_BSWAP_64_IF(c,v) do{ if((c)) (v) = CI_BSWAP_64(v); }while(0) | |
35239 | + | |
35240 | +#if (CI_MY_BYTE_ORDER == CI_LITTLE_ENDIAN) | |
35241 | +# define CI_BSWAP_LE16(v) (v) | |
35242 | +# define CI_BSWAP_LE32(v) (v) | |
35243 | +# define CI_BSWAP_LE64(v) (v) | |
35244 | +# define CI_BSWAP_BE16(v) CI_BSWAP_16(v) | |
35245 | +# define CI_BSWAP_BE32(v) CI_BSWAP_32(v) | |
35246 | +# define CI_BSWAP_BE64(v) CI_BSWAP_64(v) | |
35247 | +# define CI_BSWAPM_LE16(v) (v) | |
35248 | +# define CI_BSWAPM_LE32(v) (v) | |
35249 | +# define CI_BSWAPM_LE64(v) (v) | |
35250 | +# define CI_BSWAPM_BE16(v) CI_BSWAPM_16(v) | |
35251 | +# define CI_BSWAPM_BE32(v) CI_BSWAPM_32(v) | |
35252 | +#elif (CI_MY_BYTE_ORDER == CI_BIG_ENDIAN) | |
35253 | +# define CI_BSWAP_BE16(v) (v) | |
35254 | +# define CI_BSWAP_BE32(v) (v) | |
35255 | +# define CI_BSWAP_BE64(v) (v) | |
35256 | +# define CI_BSWAP_LE16(v) CI_BSWAP_16(v) | |
35257 | +# define CI_BSWAP_LE32(v) CI_BSWAP_32(v) | |
35258 | +# define CI_BSWAP_LE64(v) CI_BSWAP_64(v) | |
35259 | +# define CI_BSWAPM_BE16(v) (v) | |
35260 | +# define CI_BSWAPM_BE32(v) (v) | |
35261 | +# define CI_BSWAPM_BE64(v) (v) | |
35262 | +# define CI_BSWAPM_LE16(v) CI_BSWAPM_16(v) | |
35263 | +# define CI_BSWAPM_LE32(v) CI_BSWAPM_32(v) | |
35264 | +#else | |
35265 | +# error Bad endian. | |
35266 | +#endif | |
35267 | + | |
35268 | + | |
35269 | +/********************************************************************** | |
35270 | + * Get pointer to struct from pointer to member | |
35271 | + **********************************************************************/ | |
35272 | + | |
35273 | +#define CI_MEMBER_OFFSET(c_type, mbr_name) \ | |
35274 | + ((ci_uint32) (ci_uintptr_t)(&((c_type*)0)->mbr_name)) | |
35275 | + | |
35276 | +#define CI_MEMBER_SIZE(c_type, mbr_name) \ | |
35277 | + sizeof(((c_type*)0)->mbr_name) | |
35278 | + | |
35279 | +#define __CI_CONTAINER(c_type, mbr_name, p_mbr) \ | |
35280 | + ( (c_type*) ((char*)(p_mbr) - CI_MEMBER_OFFSET(c_type, mbr_name)) ) | |
35281 | + | |
35282 | +#ifndef CI_CONTAINER | |
35283 | +# define CI_CONTAINER(t,m,p) __CI_CONTAINER(t,m,p) | |
35284 | +#endif | |
35285 | + | |
35286 | + | |
35287 | +/********************************************************************** | |
35288 | + * Structure member initialiser. | |
35289 | + **********************************************************************/ | |
35290 | + | |
35291 | +#ifndef CI_STRUCT_MBR | |
35292 | +# define CI_STRUCT_MBR(name, val) .name = val | |
35293 | +#endif | |
35294 | + | |
35295 | + | |
35296 | +/********************************************************************** | |
35297 | + * min / max | |
35298 | + **********************************************************************/ | |
35299 | + | |
35300 | +#define CI_MIN(x,y) (((x) < (y)) ? (x) : (y)) | |
35301 | +#define CI_MAX(x,y) (((x) > (y)) ? (x) : (y)) | |
35302 | + | |
35303 | +/********************************************************************** | |
35304 | + * abs | |
35305 | + **********************************************************************/ | |
35306 | + | |
35307 | +#define CI_ABS(x) (((x) < 0) ? -(x) : (x)) | |
35308 | + | |
35309 | +/********************************************************************** | |
35310 | + * Conditional debugging | |
35311 | + **********************************************************************/ | |
35312 | + | |
35313 | +#ifdef NDEBUG | |
35314 | +# define CI_DEBUG(x) | |
35315 | +# define CI_NDEBUG(x) x | |
35316 | +# define CI_IF_DEBUG(y,n) (n) | |
35317 | +# define CI_DEBUG_ARG(x) | |
35318 | +#else | |
35319 | +# define CI_DEBUG(x) x | |
35320 | +# define CI_NDEBUG(x) | |
35321 | +# define CI_IF_DEBUG(y,n) (y) | |
35322 | +# define CI_DEBUG_ARG(x) ,x | |
35323 | +#endif | |
35324 | + | |
35325 | +#ifdef __KERNEL__ | |
35326 | +#define CI_KERNEL_ARG(x) ,x | |
35327 | +#else | |
35328 | +#define CI_KERNEL_ARG(x) | |
35329 | +#endif | |
35330 | + | |
35331 | +#ifdef _WIN32 | |
35332 | +# define CI_KERNEL_ARG_WIN(x) CI_KERNEL_ARG(x) | |
35333 | +# define CI_ARG_WIN(x) ,x | |
35334 | +#else | |
35335 | +# define CI_KERNEL_ARG_WIN(x) | |
35336 | +# define CI_ARG_WIN(x) | |
35337 | +#endif | |
35338 | + | |
35339 | +#ifdef __unix__ | |
35340 | +# define CI_KERNEL_ARG_UNIX(x) CI_KERNEL_ARG(x) | |
35341 | +# define CI_ARG_UNIX(x) ,x | |
35342 | +#else | |
35343 | +# define CI_KERNEL_ARG_UNIX(x) | |
35344 | +# define CI_ARG_UNIX(x) | |
35345 | +#endif | |
35346 | + | |
35347 | +#ifdef __linux__ | |
35348 | +# define CI_KERNEL_ARG_LINUX(x) CI_KERNEL_ARG(x) | |
35349 | +# define CI_ARG_LINUX(x) ,x | |
35350 | +#else | |
35351 | +# define CI_KERNEL_ARG_LINUX(x) | |
35352 | +# define CI_ARG_LINUX(x) | |
35353 | +#endif | |
35354 | + | |
35355 | + | |
35356 | +#endif /* __CI_COMPAT_UTILS_H__ */ | |
35357 | +/*! \cidoxg_end */ | |
35358 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86.h | |
35359 | =================================================================== | |
35360 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
35361 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86.h 2008-02-20 09:32:49.000000000 +0100 | |
35362 | @@ -0,0 +1,48 @@ | |
35363 | +/**************************************************************************** | |
35364 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
35365 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
35366 | + * 9501 Jeronimo Road, Suite 250, | |
35367 | + * Irvine, CA 92618, USA | |
35368 | + * | |
35369 | + * Maintained by Solarflare Communications | |
35370 | + * <linux-xen-drivers@solarflare.com> | |
35371 | + * <onload-dev@solarflare.com> | |
35372 | + * | |
35373 | + * This program is free software; you can redistribute it and/or modify it | |
35374 | + * under the terms of the GNU General Public License version 2 as published | |
35375 | + * by the Free Software Foundation, incorporated herein by reference. | |
35376 | + * | |
35377 | + * This program is distributed in the hope that it will be useful, | |
35378 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35379 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
35380 | + * GNU General Public License for more details. | |
35381 | + * | |
35382 | + * You should have received a copy of the GNU General Public License | |
35383 | + * along with this program; if not, write to the Free Software | |
35384 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
35385 | + **************************************************************************** | |
35386 | + */ | |
35387 | + | |
35388 | +/*! \cidoxg_include_ci_compat */ | |
35389 | + | |
35390 | +#ifndef __CI_COMPAT_X86_H__ | |
35391 | +#define __CI_COMPAT_X86_H__ | |
35392 | + | |
35393 | + | |
35394 | +#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN | |
35395 | + | |
35396 | +#define CI_WORD_SIZE 4 | |
35397 | +#define CI_PTR_SIZE 4 | |
35398 | + | |
35399 | +#define CI_PAGE_SIZE 4096 | |
35400 | +#define CI_PAGE_SHIFT 12 | |
35401 | +#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1)) | |
35402 | + | |
35403 | +#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */ | |
35404 | +#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */ | |
35405 | +#define CI_CPU_OOS 0 /* CPU does out of order stores */ | |
35406 | + | |
35407 | + | |
35408 | +#endif /* __CI_COMPAT_X86_H__ */ | |
35409 | + | |
35410 | +/*! \cidoxg_end */ | |
35411 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86_64.h | |
35412 | =================================================================== | |
35413 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
35414 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/compat/x86_64.h 2008-02-20 09:32:49.000000000 +0100 | |
35415 | @@ -0,0 +1,54 @@ | |
35416 | +/**************************************************************************** | |
35417 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
35418 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
35419 | + * 9501 Jeronimo Road, Suite 250, | |
35420 | + * Irvine, CA 92618, USA | |
35421 | + * | |
35422 | + * Maintained by Solarflare Communications | |
35423 | + * <linux-xen-drivers@solarflare.com> | |
35424 | + * <onload-dev@solarflare.com> | |
35425 | + * | |
35426 | + * This program is free software; you can redistribute it and/or modify it | |
35427 | + * under the terms of the GNU General Public License version 2 as published | |
35428 | + * by the Free Software Foundation, incorporated herein by reference. | |
35429 | + * | |
35430 | + * This program is distributed in the hope that it will be useful, | |
35431 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35432 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
35433 | + * GNU General Public License for more details. | |
35434 | + * | |
35435 | + * You should have received a copy of the GNU General Public License | |
35436 | + * along with this program; if not, write to the Free Software | |
35437 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
35438 | + **************************************************************************** | |
35439 | + */ | |
35440 | + | |
35441 | +/* | |
35442 | + * \author djr | |
35443 | + * \brief Arch stuff for AMD x86_64. | |
35444 | + * \date 2004/08/17 | |
35445 | + */ | |
35446 | + | |
35447 | +/*! \cidoxg_include_ci_compat */ | |
35448 | +#ifndef __CI_COMPAT_X86_64_H__ | |
35449 | +#define __CI_COMPAT_X86_64_H__ | |
35450 | + | |
35451 | + | |
35452 | +#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN | |
35453 | + | |
35454 | +#define CI_WORD_SIZE 8 | |
35455 | +#define CI_PTR_SIZE 8 | |
35456 | + | |
35457 | +#define CI_PAGE_SIZE 4096 | |
35458 | +#define CI_PAGE_SHIFT 12 | |
35459 | +#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1)) | |
35460 | + | |
35461 | +#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */ | |
35462 | + | |
35463 | +/* SSE2 disabled while investigating BUG1060 */ | |
35464 | +#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */ | |
35465 | +#define CI_CPU_OOS 0 /* CPU does out of order stores */ | |
35466 | + | |
35467 | + | |
35468 | +#endif /* __CI_COMPAT_X86_64_H__ */ | |
35469 | +/*! \cidoxg_end */ | |
35470 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h | |
35471 | =================================================================== | |
35472 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
35473 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h 2008-02-20 09:32:49.000000000 +0100 | |
35474 | @@ -0,0 +1,276 @@ | |
35475 | +/**************************************************************************** | |
35476 | + * Driver for Solarflare network controllers - | |
35477 | + * resource management for Xen backend, OpenOnload, etc | |
35478 | + * (including support for SFE4001 10GBT NIC) | |
35479 | + * | |
35480 | + * This file contains public EFX VI API to Solarflare resource manager. | |
35481 | + * | |
35482 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
35483 | + * 9501 Jeronimo Road, Suite 250, | |
35484 | + * Irvine, CA 92618, USA | |
35485 | + * | |
35486 | + * Developed and maintained by Solarflare Communications: | |
35487 | + * <linux-xen-drivers@solarflare.com> | |
35488 | + * <onload-dev@solarflare.com> | |
35489 | + * | |
35490 | + * | |
35491 | + * This program is free software; you can redistribute it and/or modify it | |
35492 | + * under the terms of the GNU General Public License version 2 as published | |
35493 | + * by the Free Software Foundation, incorporated herein by reference. | |
35494 | + * | |
35495 | + * This program is distributed in the hope that it will be useful, | |
35496 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35497 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
35498 | + * GNU General Public License for more details. | |
35499 | + * | |
35500 | + * You should have received a copy of the GNU General Public License | |
35501 | + * along with this program; if not, write to the Free Software | |
35502 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
35503 | + **************************************************************************** | |
35504 | + */ | |
35505 | + | |
35506 | +#ifndef __CI_DRIVER_RESOURCE_EFX_VI_H__ | |
35507 | +#define __CI_DRIVER_RESOURCE_EFX_VI_H__ | |
35508 | + | |
35509 | +/* Default size of event queue in the efx_vi resource. Copied from | |
35510 | + * CI_CFG_NETIF_EVENTQ_SIZE */ | |
35511 | +#define EFX_VI_EVENTQ_SIZE_DEFAULT 1024 | |
35512 | + | |
35513 | +extern int efx_vi_eventq_size; | |
35514 | + | |
35515 | +/************************************************************************** | |
35516 | + * efx_vi_state types, allocation and free | |
35517 | + **************************************************************************/ | |
35518 | + | |
35519 | +/*! Handle for refering to a efx_vi */ | |
35520 | +struct efx_vi_state; | |
35521 | + | |
35522 | +/*! | |
35523 | + * Allocate an efx_vi, including event queue and pt_endpoint | |
35524 | + * | |
35525 | + * \param vih_out Pointer to a handle that is set on success | |
35526 | + * \param nic_index Index of NIC to apply this resource to | |
35527 | + * \return Zero on success (and vih_out set), non-zero on failure. | |
35528 | + */ | |
35529 | +extern int | |
35530 | +efx_vi_alloc(struct efx_vi_state **vih_out, int nic_index); | |
35531 | + | |
35532 | +/*! | |
35533 | + * Free a previously allocated efx_vi | |
35534 | + * | |
35535 | + * \param vih The handle of the efx_vi to free | |
35536 | + */ | |
35537 | +extern void | |
35538 | +efx_vi_free(struct efx_vi_state *vih); | |
35539 | + | |
35540 | +/*! | |
35541 | + * Reset a previously allocated efx_vi | |
35542 | + * | |
35543 | + * \param vih The handle of the efx_vi to reset | |
35544 | + */ | |
35545 | +extern void | |
35546 | +efx_vi_reset(struct efx_vi_state *vih); | |
35547 | + | |
35548 | +/************************************************************************** | |
35549 | + * efx_vi_eventq types and functions | |
35550 | + **************************************************************************/ | |
35551 | + | |
35552 | +/*! | |
35553 | + * Register a function to receive callbacks when event queue timeouts | |
35554 | + * or wakeups occur. Only one function per efx_vi can be registered | |
35555 | + * at once. | |
35556 | + * | |
35557 | + * \param vih The handle to identify the efx_vi | |
35558 | + * \param callback The function to callback | |
35559 | + * \param context An argument to pass to the callback function | |
35560 | + * \return Zero on success, non-zero on failure. | |
35561 | + */ | |
35562 | +extern int | |
35563 | +efx_vi_eventq_register_callback(struct efx_vi_state *vih, | |
35564 | + void (*callback)(void *context, int is_timeout), | |
35565 | + void *context); | |
35566 | + | |
35567 | +/*! | |
35568 | + * Remove the current eventq timeout or wakeup callback function | |
35569 | + * | |
35570 | + * \param vih The handle to identify the efx_vi | |
35571 | + * \return Zero on success, non-zero on failure | |
35572 | + */ | |
35573 | +extern int | |
35574 | +efx_vi_eventq_kill_callback(struct efx_vi_state *vih); | |
35575 | + | |
35576 | +/************************************************************************** | |
35577 | + * efx_vi_dma_map types and functions | |
35578 | + **************************************************************************/ | |
35579 | + | |
35580 | +/*! | |
35581 | + * Handle for refering to a efx_vi | |
35582 | + */ | |
35583 | +struct efx_vi_dma_map_state; | |
35584 | + | |
35585 | +/*! | |
35586 | + * Map a list of buffer pages so they are registered with the hardware | |
35587 | + * | |
35588 | + * \param vih The handle to identify the efx_vi | |
35589 | + * \param addrs An array of page pointers to map | |
35590 | + * \param n_addrs Length of the page pointer array. Must be a power of two. | |
35591 | + * \param dmh_out Set on success to a handle used to refer to this mapping | |
35592 | + * \return Zero on success, non-zero on failure. | |
35593 | + */ | |
35594 | +extern int | |
35595 | +efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages, | |
35596 | + int n_pages, struct efx_vi_dma_map_state **dmh_out); | |
35597 | +extern int | |
35598 | +efx_vi_dma_map_addrs(struct efx_vi_state *vih, | |
35599 | + unsigned long long *dev_bus_addrs, int n_pages, | |
35600 | + struct efx_vi_dma_map_state **dmh_out); | |
35601 | + | |
35602 | +/*! | |
35603 | + * Unmap a previously mapped set of pages so they are no longer registered | |
35604 | + * with the hardware. | |
35605 | + * | |
35606 | + * \param vih The handle to identify the efx_vi | |
35607 | + * \param dmh The handle to identify the dma mapping | |
35608 | + */ | |
35609 | +extern void | |
35610 | +efx_vi_dma_unmap_pages(struct efx_vi_state *vih, | |
35611 | + struct efx_vi_dma_map_state *dmh); | |
35612 | +extern void | |
35613 | +efx_vi_dma_unmap_addrs(struct efx_vi_state *vih, | |
35614 | + struct efx_vi_dma_map_state *dmh); | |
35615 | + | |
35616 | +/*! | |
35617 | + * Retrieve the buffer address of the mapping | |
35618 | + * | |
35619 | + * \param vih The handle to identify the efx_vi | |
35620 | + * \param dmh The handle to identify the buffer mapping | |
35621 | + * \return The buffer address on success, or zero on failure | |
35622 | + */ | |
35623 | +extern unsigned | |
35624 | +efx_vi_dma_get_map_addr(struct efx_vi_state *vih, | |
35625 | + struct efx_vi_dma_map_state *dmh); | |
35626 | + | |
35627 | +/************************************************************************** | |
35628 | + * efx_vi filter functions | |
35629 | + **************************************************************************/ | |
35630 | + | |
35631 | +#define EFX_VI_STATIC_FILTERS 32 | |
35632 | + | |
35633 | +/*! Handle to refer to a filter instance */ | |
35634 | +struct filter_resource_t; | |
35635 | + | |
35636 | +/*! | |
35637 | + * Allocate and add a filter | |
35638 | + * | |
35639 | + * \param vih The handle to identify the efx_vi | |
35640 | + * \param protocol The protocol of the new filter: UDP or TCP | |
35641 | + * \param ip_addr_be32 The local ip address of the filter | |
35642 | + * \param port_le16 The local port of the filter | |
35643 | + * \param fh_out Set on success to be a handle to refer to this filter | |
35644 | + * \return Zero on success, non-zero on failure. | |
35645 | + */ | |
35646 | +extern int | |
35647 | +efx_vi_filter(struct efx_vi_state *vih, int protocol, unsigned ip_addr_be32, | |
35648 | + int port_le16, struct filter_resource_t **fh_out); | |
35649 | + | |
35650 | +/*! | |
35651 | + * Remove a filter and free resources associated with it | |
35652 | + * | |
35653 | + * \param vih The handle to identify the efx_vi | |
35654 | + * \param fh The handle to identify the filter | |
35655 | + * \return Zero on success, non-zero on failure | |
35656 | + */ | |
35657 | +extern int | |
35658 | +efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh); | |
35659 | + | |
35660 | +/************************************************************************** | |
35661 | + * efx_vi hw resources types and functions | |
35662 | + **************************************************************************/ | |
35663 | + | |
35664 | +/*! Constants for the type field in efx_vi_hw_resource */ | |
35665 | +#define EFX_VI_HW_RESOURCE_TXDMAQ 0x0 /* PFN of TX DMA Q */ | |
35666 | +#define EFX_VI_HW_RESOURCE_RXDMAQ 0x1 /* PFN of RX DMA Q */ | |
35667 | +#define EFX_VI_HW_RESOURCE_TXBELL 0x2 /* PFN of TX Doorbell (EF1) */ | |
35668 | +#define EFX_VI_HW_RESOURCE_RXBELL 0x3 /* PFN of RX Doorbell (EF1) */ | |
35669 | +#define EFX_VI_HW_RESOURCE_EVQTIMER 0x4 /* Address of event q timer */ | |
35670 | + | |
35671 | +/* Address of event q pointer (EF1) */ | |
35672 | +#define EFX_VI_HW_RESOURCE_EVQPTR 0x5 | |
35673 | +/* Address of register pointer (Falcon A) */ | |
35674 | +#define EFX_VI_HW_RESOURCE_EVQRPTR 0x6 | |
35675 | +/* Offset of register pointer (Falcon B) */ | |
35676 | +#define EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET 0x7 | |
35677 | +/* Address of mem KVA */ | |
35678 | +#define EFX_VI_HW_RESOURCE_EVQMEMKVA 0x8 | |
35679 | +/* PFN of doorbell page (Falcon) */ | |
35680 | +#define EFX_VI_HW_RESOURCE_BELLPAGE 0x9 | |
35681 | + | |
35682 | +/*! How large an array to allocate for the get_() functions - smaller | |
35683 | + than the total number of constants as some are mutually exclusive */ | |
35684 | +#define EFX_VI_HW_RESOURCE_MAXSIZE 0x7 | |
35685 | + | |
35686 | +/*! Constants for the mem_type field in efx_vi_hw_resource */ | |
35687 | +#define EFX_VI_HW_RESOURCE_IOBUFFER 0 /* Host memory */ | |
35688 | +#define EFX_VI_HW_RESOURCE_PERIPHERAL 1 /* Card memory/registers */ | |
35689 | + | |
35690 | +/*! | |
35691 | + * Data structure providing information on a hardware resource mapping | |
35692 | + */ | |
35693 | +struct efx_vi_hw_resource { | |
35694 | + u8 type; /*!< What this resource represents */ | |
35695 | + u8 mem_type; /*!< What type of memory is it in, eg, | |
35696 | + * host or iomem */ | |
35697 | + u8 more_to_follow; /*!< Is this part of a multi-region resource */ | |
35698 | + u32 length; /*!< Length of the resource in bytes */ | |
35699 | + unsigned long address; /*!< Address of this resource */ | |
35700 | +}; | |
35701 | + | |
35702 | +/*! | |
35703 | + * Metadata concerning the list of hardware resource mappings | |
35704 | + */ | |
35705 | +struct efx_vi_hw_resource_metadata { | |
35706 | + int version; | |
35707 | + int evq_order; | |
35708 | + int evq_offs; | |
35709 | + int evq_capacity; | |
35710 | + int instance; | |
35711 | + unsigned rx_capacity; | |
35712 | + unsigned tx_capacity; | |
35713 | + int nic_arch; | |
35714 | + int nic_revision; | |
35715 | + char nic_variant; | |
35716 | +}; | |
35717 | + | |
35718 | +/*! | |
35719 | + * Obtain a list of hardware resource mappings, using virtual addresses | |
35720 | + * | |
35721 | + * \param vih The handle to identify the efx_vi | |
35722 | + * \param mdata Pointer to a structure to receive the metadata | |
35723 | + * \param hw_res_array An array to receive the list of hardware resources | |
35724 | + * \param length The length of hw_res_array. Updated on success to contain | |
35725 | + * the number of entries in the supplied array that were used. | |
35726 | + * \return Zero on success, non-zero on failure | |
35727 | + */ | |
35728 | +extern int | |
35729 | +efx_vi_hw_resource_get_virt(struct efx_vi_state *vih, | |
35730 | + struct efx_vi_hw_resource_metadata *mdata, | |
35731 | + struct efx_vi_hw_resource *hw_res_array, | |
35732 | + int *length); | |
35733 | + | |
35734 | +/*! | |
35735 | + * Obtain a list of hardware resource mappings, using physical addresses | |
35736 | + * | |
35737 | + * \param vih The handle to identify the efx_vi | |
35738 | + * \param mdata Pointer to a structure to receive the metadata | |
35739 | + * \param hw_res_array An array to receive the list of hardware resources | |
35740 | + * \param length The length of hw_res_array. Updated on success to contain | |
35741 | + * the number of entries in the supplied array that were used. | |
35742 | + * \return Zero on success, non-zero on failure | |
35743 | + */ | |
35744 | +extern int | |
35745 | +efx_vi_hw_resource_get_phys(struct efx_vi_state *vih, | |
35746 | + struct efx_vi_hw_resource_metadata *mdata, | |
35747 | + struct efx_vi_hw_resource *hw_res_array, | |
35748 | + int *length); | |
35749 | + | |
35750 | +#endif /* __CI_DRIVER_RESOURCE_EFX_VI_H__ */ | |
35751 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common.h | |
35752 | =================================================================== | |
35753 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
35754 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common.h 2008-02-20 09:32:49.000000000 +0100 | |
35755 | @@ -0,0 +1,102 @@ | |
35756 | +/**************************************************************************** | |
35757 | + * Driver for Solarflare network controllers - | |
35758 | + * resource management for Xen backend, OpenOnload, etc | |
35759 | + * (including support for SFE4001 10GBT NIC) | |
35760 | + * | |
35761 | + * This file provides API of the efhw library which may be used both from | |
35762 | + * the kernel and from the user-space code. | |
35763 | + * | |
35764 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
35765 | + * 9501 Jeronimo Road, Suite 250, | |
35766 | + * Irvine, CA 92618, USA | |
35767 | + * | |
35768 | + * Developed and maintained by Solarflare Communications: | |
35769 | + * <linux-xen-drivers@solarflare.com> | |
35770 | + * <onload-dev@solarflare.com> | |
35771 | + * | |
35772 | + * Certain parts of the driver were implemented by | |
35773 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
35774 | + * OKTET Labs Ltd, Russia, | |
35775 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
35776 | + * by request of Solarflare Communications | |
35777 | + * | |
35778 | + * | |
35779 | + * This program is free software; you can redistribute it and/or modify it | |
35780 | + * under the terms of the GNU General Public License version 2 as published | |
35781 | + * by the Free Software Foundation, incorporated herein by reference. | |
35782 | + * | |
35783 | + * This program is distributed in the hope that it will be useful, | |
35784 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35785 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
35786 | + * GNU General Public License for more details. | |
35787 | + * | |
35788 | + * You should have received a copy of the GNU General Public License | |
35789 | + * along with this program; if not, write to the Free Software | |
35790 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
35791 | + **************************************************************************** | |
35792 | + */ | |
35793 | + | |
35794 | +#ifndef __CI_EFHW_COMMON_H__ | |
35795 | +#define __CI_EFHW_COMMON_H__ | |
35796 | + | |
35797 | +#include <ci/efhw/common_sysdep.h> | |
35798 | + | |
35799 | +enum efhw_arch { | |
35800 | + EFHW_ARCH_FALCON, | |
35801 | + EFHW_ARCH_SIENA, | |
35802 | +}; | |
35803 | + | |
35804 | +typedef uint32_t efhw_buffer_addr_t; | |
35805 | +#define EFHW_BUFFER_ADDR_FMT "[ba:%"PRIx32"]" | |
35806 | + | |
35807 | +/*! Comment? */ | |
35808 | +typedef union { | |
35809 | + uint64_t u64; | |
35810 | + struct { | |
35811 | + uint32_t a; | |
35812 | + uint32_t b; | |
35813 | + } opaque; | |
35814 | + struct { | |
35815 | + uint32_t code; | |
35816 | + uint32_t status; | |
35817 | + } ev1002; | |
35818 | +} efhw_event_t; | |
35819 | + | |
35820 | +/* Flags for TX/RX queues */ | |
35821 | +#define EFHW_VI_JUMBO_EN 0x01 /*! scatter RX over multiple desc */ | |
35822 | +#define EFHW_VI_ISCSI_RX_HDIG_EN 0x02 /*! iscsi rx header digest */ | |
35823 | +#define EFHW_VI_ISCSI_TX_HDIG_EN 0x04 /*! iscsi tx header digest */ | |
35824 | +#define EFHW_VI_ISCSI_RX_DDIG_EN 0x08 /*! iscsi rx data digest */ | |
35825 | +#define EFHW_VI_ISCSI_TX_DDIG_EN 0x10 /*! iscsi tx data digest */ | |
35826 | +#define EFHW_VI_TX_PHYS_ADDR_EN 0x20 /*! TX physical address mode */ | |
35827 | +#define EFHW_VI_RX_PHYS_ADDR_EN 0x40 /*! RX physical address mode */ | |
35828 | +#define EFHW_VI_RM_WITH_INTERRUPT 0x80 /*! VI with an interrupt */ | |
35829 | +#define EFHW_VI_TX_IP_CSUM_DIS 0x100 /*! enable ip checksum generation */ | |
35830 | +#define EFHW_VI_TX_TCPUDP_CSUM_DIS 0x200 /*! enable tcp/udp checksum | |
35831 | + generation */ | |
35832 | +#define EFHW_VI_TX_TCPUDP_ONLY 0x400 /*! drop non-tcp/udp packets */ | |
35833 | + | |
35834 | +/* Types of hardware filter */ | |
35835 | +/* Each of these values implicitly selects scatter filters on B0 - or in | |
35836 | + EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK if a non-scatter filter is required */ | |
35837 | +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD (0) /* dest host only */ | |
35838 | +#define EFHW_IP_FILTER_TYPE_UDP_FULL (1) /* dest host and port */ | |
35839 | +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD (2) /* dest based filter */ | |
35840 | +#define EFHW_IP_FILTER_TYPE_TCP_FULL (3) /* src filter */ | |
35841 | +/* Same again, but with RSS (for B0 only) */ | |
35842 | +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD_RSS_B0 (4) | |
35843 | +#define EFHW_IP_FILTER_TYPE_UDP_FULL_RSS_B0 (5) | |
35844 | +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD_RSS_B0 (6) | |
35845 | +#define EFHW_IP_FILTER_TYPE_TCP_FULL_RSS_B0 (7) | |
35846 | + | |
35847 | +#define EFHW_IP_FILTER_TYPE_FULL_MASK (0x1) /* Mask for full / wildcard */ | |
35848 | +#define EFHW_IP_FILTER_TYPE_TCP_MASK (0x2) /* Mask for TCP type */ | |
35849 | +#define EFHW_IP_FILTER_TYPE_RSS_B0_MASK (0x4) /* Mask for B0 RSS enable */ | |
35850 | +#define EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK (0x8) /* Mask for B0 SCATTER dsbl */ | |
35851 | + | |
35852 | +#define EFHW_IP_FILTER_TYPE_MASK (0xffff) /* Mask of types above */ | |
35853 | + | |
35854 | +#define EFHW_IP_FILTER_BROADCAST (0x10000) /* driverlink filter | |
35855 | + support */ | |
35856 | + | |
35857 | +#endif /* __CI_EFHW_COMMON_H__ */ | |
35858 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h | |
35859 | =================================================================== | |
35860 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
35861 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h 2008-02-20 09:32:49.000000000 +0100 | |
35862 | @@ -0,0 +1,67 @@ | |
35863 | +/**************************************************************************** | |
35864 | + * Driver for Solarflare network controllers - | |
35865 | + * resource management for Xen backend, OpenOnload, etc | |
35866 | + * (including support for SFE4001 10GBT NIC) | |
35867 | + * | |
35868 | + * This file provides version-independent Linux kernel API for | |
35869 | + * userland-to-kernel interfaces. | |
35870 | + * Only kernels >=2.6.9 are supported. | |
35871 | + * | |
35872 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
35873 | + * 9501 Jeronimo Road, Suite 250, | |
35874 | + * Irvine, CA 92618, USA | |
35875 | + * | |
35876 | + * Developed and maintained by Solarflare Communications: | |
35877 | + * <linux-xen-drivers@solarflare.com> | |
35878 | + * <onload-dev@solarflare.com> | |
35879 | + * | |
35880 | + * Certain parts of the driver were implemented by | |
35881 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
35882 | + * OKTET Labs Ltd, Russia, | |
35883 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
35884 | + * by request of Solarflare Communications | |
35885 | + * | |
35886 | + * | |
35887 | + * This program is free software; you can redistribute it and/or modify it | |
35888 | + * under the terms of the GNU General Public License version 2 as published | |
35889 | + * by the Free Software Foundation, incorporated herein by reference. | |
35890 | + * | |
35891 | + * This program is distributed in the hope that it will be useful, | |
35892 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35893 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
35894 | + * GNU General Public License for more details. | |
35895 | + * | |
35896 | + * You should have received a copy of the GNU General Public License | |
35897 | + * along with this program; if not, write to the Free Software | |
35898 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
35899 | + **************************************************************************** | |
35900 | + */ | |
35901 | + | |
35902 | +#ifndef __CI_EFHW_COMMON_LINUX_H__ | |
35903 | +#define __CI_EFHW_COMMON_LINUX_H__ | |
35904 | + | |
35905 | +#include <linux/types.h> | |
35906 | +#include <linux/version.h> | |
35907 | + | |
35908 | +/* Dirty hack, but Linux kernel does not provide DMA_ADDR_T_FMT */ | |
35909 | +#if BITS_PER_LONG == 64 || defined(CONFIG_HIGHMEM64G) | |
35910 | +#define DMA_ADDR_T_FMT "%llx" | |
35911 | +#else | |
35912 | +#define DMA_ADDR_T_FMT "%x" | |
35913 | +#endif | |
35914 | + | |
35915 | +/* Linux kernel also does not provide PRIx32... Sigh. */ | |
35916 | +#define PRIx32 "x" | |
35917 | +#define PRIx64 "llx" | |
35918 | + | |
35919 | + | |
35920 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) | |
35921 | +enum { | |
35922 | + false = 0, | |
35923 | + true = 1 | |
35924 | +}; | |
35925 | + | |
35926 | +typedef _Bool bool; | |
35927 | +#endif /* LINUX_VERSION_CODE < 2.6.19 */ | |
35928 | + | |
35929 | +#endif /* __CI_EFHW_COMMON_LINUX_H__ */ | |
35930 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/debug.h | |
35931 | =================================================================== | |
35932 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
35933 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/debug.h 2008-02-20 09:32:49.000000000 +0100 | |
35934 | @@ -0,0 +1,84 @@ | |
35935 | +/**************************************************************************** | |
35936 | + * Driver for Solarflare network controllers - | |
35937 | + * resource management for Xen backend, OpenOnload, etc | |
35938 | + * (including support for SFE4001 10GBT NIC) | |
35939 | + * | |
35940 | + * This file provides debug-related API for efhw library using Linux kernel | |
35941 | + * primitives. | |
35942 | + * | |
35943 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
35944 | + * 9501 Jeronimo Road, Suite 250, | |
35945 | + * Irvine, CA 92618, USA | |
35946 | + * | |
35947 | + * Developed and maintained by Solarflare Communications: | |
35948 | + * <linux-xen-drivers@solarflare.com> | |
35949 | + * <onload-dev@solarflare.com> | |
35950 | + * | |
35951 | + * Certain parts of the driver were implemented by | |
35952 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
35953 | + * OKTET Labs Ltd, Russia, | |
35954 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
35955 | + * by request of Solarflare Communications | |
35956 | + * | |
35957 | + * | |
35958 | + * This program is free software; you can redistribute it and/or modify it | |
35959 | + * under the terms of the GNU General Public License version 2 as published | |
35960 | + * by the Free Software Foundation, incorporated herein by reference. | |
35961 | + * | |
35962 | + * This program is distributed in the hope that it will be useful, | |
35963 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
35964 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
35965 | + * GNU General Public License for more details. | |
35966 | + * | |
35967 | + * You should have received a copy of the GNU General Public License | |
35968 | + * along with this program; if not, write to the Free Software | |
35969 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
35970 | + **************************************************************************** | |
35971 | + */ | |
35972 | + | |
35973 | +#ifndef __CI_EFHW_DEBUG_LINUX_H__ | |
35974 | +#define __CI_EFHW_DEBUG_LINUX_H__ | |
35975 | + | |
35976 | +#define EFHW_PRINTK_PREFIX "[sfc efhw] " | |
35977 | + | |
35978 | +#define EFHW_PRINTK(level, fmt, ...) \ | |
35979 | + printk(level EFHW_PRINTK_PREFIX fmt "\n", __VA_ARGS__) | |
35980 | + | |
35981 | +/* Following macros should be used with non-zero format parameters | |
35982 | + * due to __VA_ARGS__ limitations. Use "%s" with __FUNCTION__ if you can't | |
35983 | + * find better parameters. */ | |
35984 | +#define EFHW_ERR(fmt, ...) EFHW_PRINTK(KERN_ERR, fmt, __VA_ARGS__) | |
35985 | +#define EFHW_WARN(fmt, ...) EFHW_PRINTK(KERN_WARNING, fmt, __VA_ARGS__) | |
35986 | +#define EFHW_NOTICE(fmt, ...) EFHW_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__) | |
35987 | +#if 0 && !defined(NDEBUG) | |
35988 | +#define EFHW_TRACE(fmt, ...) EFHW_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__) | |
35989 | +#else | |
35990 | +#define EFHW_TRACE(fmt, ...) | |
35991 | +#endif | |
35992 | + | |
35993 | +#ifndef NDEBUG | |
35994 | +#define EFHW_ASSERT(cond) BUG_ON((cond) == 0) | |
35995 | +#define EFHW_DO_DEBUG(expr) expr | |
35996 | +#else | |
35997 | +#define EFHW_ASSERT(cond) | |
35998 | +#define EFHW_DO_DEBUG(expr) | |
35999 | +#endif | |
36000 | + | |
36001 | +#define EFHW_TEST(expr) \ | |
36002 | + do { \ | |
36003 | + if (unlikely(!(expr))) \ | |
36004 | + BUG(); \ | |
36005 | + } while (0) | |
36006 | + | |
36007 | +/* Build time asserts. We paste the line number into the type name | |
36008 | + * so that the macro can be used more than once per file even if the | |
36009 | + * compiler objects to multiple identical typedefs. Collisions | |
36010 | + * between use in different header files is still possible. */ | |
36011 | +#ifndef EFHW_BUILD_ASSERT | |
36012 | +#define __EFHW_BUILD_ASSERT_NAME(_x) __EFHW_BUILD_ASSERT_ILOATHECPP(_x) | |
36013 | +#define __EFHW_BUILD_ASSERT_ILOATHECPP(_x) __EFHW_BUILD_ASSERT__ ##_x | |
36014 | +#define EFHW_BUILD_ASSERT(e) \ | |
36015 | + typedef char __EFHW_BUILD_ASSERT_NAME(__LINE__)[(e) ? 1 : -1] | |
36016 | +#endif | |
36017 | + | |
36018 | +#endif /* __CI_EFHW_DEBUG_LINUX_H__ */ | |
36019 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_config.h | |
36020 | =================================================================== | |
36021 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36022 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_config.h 2008-02-20 09:32:49.000000000 +0100 | |
36023 | @@ -0,0 +1,43 @@ | |
36024 | +/**************************************************************************** | |
36025 | + * Driver for Solarflare network controllers - | |
36026 | + * resource management for Xen backend, OpenOnload, etc | |
36027 | + * (including support for SFE4001 10GBT NIC) | |
36028 | + * | |
36029 | + * This file provides some limits used in both kernel and userland code. | |
36030 | + * | |
36031 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36032 | + * 9501 Jeronimo Road, Suite 250, | |
36033 | + * Irvine, CA 92618, USA | |
36034 | + * | |
36035 | + * Developed and maintained by Solarflare Communications: | |
36036 | + * <linux-xen-drivers@solarflare.com> | |
36037 | + * <onload-dev@solarflare.com> | |
36038 | + * | |
36039 | + * Certain parts of the driver were implemented by | |
36040 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36041 | + * OKTET Labs Ltd, Russia, | |
36042 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36043 | + * by request of Solarflare Communications | |
36044 | + * | |
36045 | + * | |
36046 | + * This program is free software; you can redistribute it and/or modify it | |
36047 | + * under the terms of the GNU General Public License version 2 as published | |
36048 | + * by the Free Software Foundation, incorporated herein by reference. | |
36049 | + * | |
36050 | + * This program is distributed in the hope that it will be useful, | |
36051 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36052 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36053 | + * GNU General Public License for more details. | |
36054 | + * | |
36055 | + * You should have received a copy of the GNU General Public License | |
36056 | + * along with this program; if not, write to the Free Software | |
36057 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
36058 | + **************************************************************************** | |
36059 | + */ | |
36060 | + | |
36061 | +#ifndef __CI_EFHW_EFAB_CONFIG_H__ | |
36062 | +#define __CI_EFHW_EFAB_CONFIG_H__ | |
36063 | + | |
36064 | +#define EFHW_MAX_NR_DEVS 5 /* max number of efhw devices supported */ | |
36065 | + | |
36066 | +#endif /* __CI_EFHW_EFAB_CONFIG_H__ */ | |
36067 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_types.h | |
36068 | =================================================================== | |
36069 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36070 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/efhw_types.h 2008-02-20 09:32:49.000000000 +0100 | |
36071 | @@ -0,0 +1,342 @@ | |
36072 | +/**************************************************************************** | |
36073 | + * Driver for Solarflare network controllers - | |
36074 | + * resource management for Xen backend, OpenOnload, etc | |
36075 | + * (including support for SFE4001 10GBT NIC) | |
36076 | + * | |
36077 | + * This file provides struct efhw_nic and some related types. | |
36078 | + * | |
36079 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36080 | + * 9501 Jeronimo Road, Suite 250, | |
36081 | + * Irvine, CA 92618, USA | |
36082 | + * | |
36083 | + * Developed and maintained by Solarflare Communications: | |
36084 | + * <linux-xen-drivers@solarflare.com> | |
36085 | + * <onload-dev@solarflare.com> | |
36086 | + * | |
36087 | + * Certain parts of the driver were implemented by | |
36088 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36089 | + * OKTET Labs Ltd, Russia, | |
36090 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36091 | + * by request of Solarflare Communications | |
36092 | + * | |
36093 | + * | |
36094 | + * This program is free software; you can redistribute it and/or modify it | |
36095 | + * under the terms of the GNU General Public License version 2 as published | |
36096 | + * by the Free Software Foundation, incorporated herein by reference. | |
36097 | + * | |
36098 | + * This program is distributed in the hope that it will be useful, | |
36099 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36100 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36101 | + * GNU General Public License for more details. | |
36102 | + * | |
36103 | + * You should have received a copy of the GNU General Public License | |
36104 | + * along with this program; if not, write to the Free Software | |
36105 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
36106 | + **************************************************************************** | |
36107 | + */ | |
36108 | + | |
36109 | +#ifndef __CI_EFHW_EFAB_TYPES_H__ | |
36110 | +#define __CI_EFHW_EFAB_TYPES_H__ | |
36111 | + | |
36112 | +#include <ci/efhw/efhw_config.h> | |
36113 | +#include <ci/efhw/hardware_sysdep.h> | |
36114 | +#include <ci/efhw/iopage_types.h> | |
36115 | +#include <ci/efhw/sysdep.h> | |
36116 | + | |
36117 | +/*-------------------------------------------------------------------- | |
36118 | + * | |
36119 | + * hardware limits used in the types | |
36120 | + * | |
36121 | + *--------------------------------------------------------------------*/ | |
36122 | + | |
36123 | +#define EFHW_KEVENTQ_MAX 8 | |
36124 | + | |
36125 | +/*-------------------------------------------------------------------- | |
36126 | + * | |
36127 | + * forward type declarations | |
36128 | + * | |
36129 | + *--------------------------------------------------------------------*/ | |
36130 | + | |
36131 | +struct efhw_nic; | |
36132 | + | |
36133 | +/*-------------------------------------------------------------------- | |
36134 | + * | |
36135 | + * Managed interface | |
36136 | + * | |
36137 | + *--------------------------------------------------------------------*/ | |
36138 | + | |
36139 | +struct efhw_buffer_table_allocation{ | |
36140 | + unsigned base; | |
36141 | + unsigned order; | |
36142 | +}; | |
36143 | + | |
36144 | +struct eventq_resource_hardware { | |
36145 | + /*!iobuffer allocated for eventq - can be larger than eventq */ | |
36146 | + efhw_iopages_t iobuff; | |
36147 | + unsigned iobuff_off; | |
36148 | + struct efhw_buffer_table_allocation buf_tbl_alloc; | |
36149 | + int capacity; /*!< capacity of event queue */ | |
36150 | +}; | |
36151 | + | |
36152 | +/*-------------------------------------------------------------------- | |
36153 | + * | |
36154 | + * event queues and event driven callbacks | |
36155 | + * | |
36156 | + *--------------------------------------------------------------------*/ | |
36157 | + | |
36158 | +struct efhw_keventq { | |
36159 | + volatile int lock; | |
36160 | + caddr_t evq_base; | |
36161 | + int32_t evq_ptr; | |
36162 | + uint32_t evq_mask; | |
36163 | + unsigned instance; | |
36164 | + struct eventq_resource_hardware hw; | |
36165 | + struct efhw_ev_handler *ev_handlers; | |
36166 | +}; | |
36167 | + | |
36168 | +/********************************************************************** | |
36169 | + * Portable HW interface. *************************************** | |
36170 | + **********************************************************************/ | |
36171 | + | |
36172 | +/*-------------------------------------------------------------------- | |
36173 | + * | |
36174 | + * EtherFabric Functional units - configuration and control | |
36175 | + * | |
36176 | + *--------------------------------------------------------------------*/ | |
36177 | + | |
36178 | +struct efhw_func_ops { | |
36179 | + | |
36180 | + /*-------------- Initialisation ------------ */ | |
36181 | + | |
36182 | + /*! close down all hardware functional units - leaves NIC in a safe | |
36183 | + state for driver unload */ | |
36184 | + void (*close_hardware) (struct efhw_nic *nic); | |
36185 | + | |
36186 | + /*! initialise all hardware functional units */ | |
36187 | + int (*init_hardware) (struct efhw_nic *nic, | |
36188 | + struct efhw_ev_handler *, | |
36189 | + const uint8_t *mac_addr); | |
36190 | + | |
36191 | + /*-------------- Interrupt support ------------ */ | |
36192 | + | |
36193 | + /*! Main interrupt routine | |
36194 | + ** This function returns, | |
36195 | + ** - zero, if the IRQ was not generated by EF1 | |
36196 | + ** - non-zero, if EF1 was the source of the IRQ | |
36197 | + ** | |
36198 | + ** | |
36199 | + ** opaque is an OS provided pointer for use by the OS callbacks | |
36200 | + ** e.g in Windows used to indicate DPC scheduled | |
36201 | + */ | |
36202 | + int (*interrupt) (struct efhw_nic *nic); | |
36203 | + | |
36204 | + /*! Enable given interrupt mask for the given IRQ unit */ | |
36205 | + void (*interrupt_enable) (struct efhw_nic *nic, uint idx); | |
36206 | + | |
36207 | + /*! Disable given interrupt mask for the given IRQ unit */ | |
36208 | + void (*interrupt_disable) (struct efhw_nic *nic, uint idx); | |
36209 | + | |
36210 | + /*! Set interrupt moderation strategy for the given IRQ unit | |
36211 | + ** val is in usec | |
36212 | + */ | |
36213 | + void (*set_interrupt_moderation)(struct efhw_nic *nic, | |
36214 | + uint idx, uint val); | |
36215 | + | |
36216 | + /*-------------- Event support ------------ */ | |
36217 | + | |
36218 | + /*! Enable the given event queue | |
36219 | + depending on the underlying implementation (EF1 or Falcon) then | |
36220 | + either a q_base_addr in host memory, or a buffer base id should | |
36221 | + be proivded | |
36222 | + */ | |
36223 | + void (*event_queue_enable) (struct efhw_nic *nic, | |
36224 | + uint evq, /* evnt queue index */ | |
36225 | + uint evq_size, /* units of #entries */ | |
36226 | + dma_addr_t q_base_addr, uint buf_base_id); | |
36227 | + | |
36228 | + /*! Disable the given event queue (and any associated timer) */ | |
36229 | + void (*event_queue_disable) (struct efhw_nic *nic, uint evq, | |
36230 | + int timer_only); | |
36231 | + | |
36232 | + /*! request wakeup from the NIC on a given event Q */ | |
36233 | + void (*wakeup_request) (struct efhw_nic *nic, dma_addr_t q_base_addr, | |
36234 | + int next_i, int evq); | |
36235 | + | |
36236 | + /*! Push a SW event on a given eventQ */ | |
36237 | + void (*sw_event) (struct efhw_nic *nic, int data, int evq); | |
36238 | + | |
36239 | + /*-------------- Filter support ------------ */ | |
36240 | + | |
36241 | + /*! Setup a given filter - The software can request a filter_i, | |
36242 | + * but some EtherFabric implementations will override with | |
36243 | + * a more suitable index | |
36244 | + */ | |
36245 | + int (*ipfilter_set) (struct efhw_nic *nic, int type, | |
36246 | + int *filter_i, int dmaq, | |
36247 | + unsigned saddr_be32, unsigned sport_be16, | |
36248 | + unsigned daddr_be32, unsigned dport_be16); | |
36249 | + | |
36250 | + /*! Attach a given filter to a DMAQ */ | |
36251 | + void (*ipfilter_attach) (struct efhw_nic *nic, int filter_idx, | |
36252 | + int dmaq_idx); | |
36253 | + | |
36254 | + /*! Detach a filter from its DMAQ */ | |
36255 | + void (*ipfilter_detach) (struct efhw_nic *nic, int filter_idx); | |
36256 | + | |
36257 | + /*! Clear down a given filter */ | |
36258 | + void (*ipfilter_clear) (struct efhw_nic *nic, int filter_idx); | |
36259 | + | |
36260 | + /*-------------- DMA support ------------ */ | |
36261 | + | |
36262 | + /*! Initialise NIC state for a given TX DMAQ */ | |
36263 | + void (*dmaq_tx_q_init) (struct efhw_nic *nic, | |
36264 | + uint dmaq, uint evq, uint owner, uint tag, | |
36265 | + uint dmaq_size, uint buf_idx, uint flags); | |
36266 | + | |
36267 | + /*! Initialise NIC state for a given RX DMAQ */ | |
36268 | + void (*dmaq_rx_q_init) (struct efhw_nic *nic, | |
36269 | + uint dmaq, uint evq, uint owner, uint tag, | |
36270 | + uint dmaq_size, uint buf_idx, uint flags); | |
36271 | + | |
36272 | + /*! Disable a given TX DMAQ */ | |
36273 | + void (*dmaq_tx_q_disable) (struct efhw_nic *nic, uint dmaq); | |
36274 | + | |
36275 | + /*! Disable a given RX DMAQ */ | |
36276 | + void (*dmaq_rx_q_disable) (struct efhw_nic *nic, uint dmaq); | |
36277 | + | |
36278 | + /*! Flush a given TX DMA channel */ | |
36279 | + int (*flush_tx_dma_channel) (struct efhw_nic *nic, uint dmaq); | |
36280 | + | |
36281 | + /*! Flush a given RX DMA channel */ | |
36282 | + int (*flush_rx_dma_channel) (struct efhw_nic *nic, uint dmaq); | |
36283 | + | |
36284 | + /*-------------- Buffer table Support ------------ */ | |
36285 | + | |
36286 | + /*! Initialise a buffer table page */ | |
36287 | + void (*buffer_table_set) (struct efhw_nic *nic, | |
36288 | + dma_addr_t dma_addr, | |
36289 | + uint bufsz, uint region, | |
36290 | + int own_id, int buffer_id); | |
36291 | + | |
36292 | + /*! Initialise a block of buffer table pages */ | |
36293 | + void (*buffer_table_set_n) (struct efhw_nic *nic, int buffer_id, | |
36294 | + dma_addr_t dma_addr, | |
36295 | + uint bufsz, uint region, | |
36296 | + int n_pages, int own_id); | |
36297 | + | |
36298 | + /*! Clear a block of buffer table pages */ | |
36299 | + void (*buffer_table_clear) (struct efhw_nic *nic, int buffer_id, | |
36300 | + int num); | |
36301 | + | |
36302 | + /*! Commit a buffer table update */ | |
36303 | + void (*buffer_table_commit) (struct efhw_nic *nic); | |
36304 | + | |
36305 | +}; | |
36306 | + | |
36307 | + | |
36308 | +/*---------------------------------------------------------------------------- | |
36309 | + * | |
36310 | + * NIC type | |
36311 | + * | |
36312 | + *---------------------------------------------------------------------------*/ | |
36313 | + | |
36314 | +struct efhw_device_type { | |
36315 | + int arch; /* enum efhw_arch */ | |
36316 | + char variant; /* 'A', 'B', ... */ | |
36317 | + int revision; /* 0, 1, ... */ | |
36318 | +}; | |
36319 | + | |
36320 | + | |
36321 | +/*---------------------------------------------------------------------------- | |
36322 | + * | |
36323 | + * EtherFabric NIC instance - nic.c for HW independent functions | |
36324 | + * | |
36325 | + *---------------------------------------------------------------------------*/ | |
36326 | + | |
36327 | +/*! */ | |
36328 | +struct efhw_nic { | |
36329 | + /*! zero base index in efrm_nic_table.nic array */ | |
36330 | + volatile int index; | |
36331 | + int ifindex; /*!< OS level nic index */ | |
36332 | +#ifdef HAS_NET_NAMESPACE | |
36333 | + struct net *nd_net; | |
36334 | +#endif | |
36335 | + | |
36336 | + struct efhw_device_type devtype; | |
36337 | + | |
36338 | + /*! Options that can be set by user. */ | |
36339 | + unsigned options; | |
36340 | +# define NIC_OPT_EFTEST 0x1 /* owner is an eftest app */ | |
36341 | + | |
36342 | +# define NIC_OPT_DEFAULT 0 | |
36343 | + | |
36344 | + /*! Internal flags that indicate hardware properties at runtime. */ | |
36345 | + unsigned flags; | |
36346 | +# define NIC_FLAG_NO_INTERRUPT 0x01 /* to be set at init time only */ | |
36347 | +# define NIC_FLAG_TRY_MSI 0x02 | |
36348 | +# define NIC_FLAG_MSI 0x04 | |
36349 | +# define NIC_FLAG_OS_IRQ_EN 0x08 | |
36350 | +# define NIC_FLAG_10G 0x10 | |
36351 | + | |
36352 | + unsigned mtu; /*!< MAC MTU (includes MAC hdr) */ | |
36353 | + | |
36354 | + /* hardware resources */ | |
36355 | + | |
36356 | + /*! I/O address of the start of the bar */ | |
36357 | + efhw_ioaddr_t bar_ioaddr; | |
36358 | + | |
36359 | + /*! Bar number of control aperture. */ | |
36360 | + unsigned ctr_ap_bar; | |
36361 | + /*! Length of control aperture in bytes. */ | |
36362 | + unsigned ctr_ap_bytes; | |
36363 | + | |
36364 | + uint8_t mac_addr[ETH_ALEN]; /*!< mac address */ | |
36365 | + | |
36366 | + /*! EtherFabric Functional Units -- functions */ | |
36367 | + const struct efhw_func_ops *efhw_func; | |
36368 | + | |
36369 | + /* Value read from FPGA version register. Zero for asic. */ | |
36370 | + unsigned fpga_version; | |
36371 | + | |
36372 | + /*! This lock protects a number of misc NIC resources. It should | |
36373 | + * only be used for things that can be at the bottom of the lock | |
36374 | + * order. ie. You mustn't attempt to grab any other lock while | |
36375 | + * holding this one. | |
36376 | + */ | |
36377 | + spinlock_t *reg_lock; | |
36378 | + spinlock_t the_reg_lock; | |
36379 | + | |
36380 | + int buf_commit_outstanding; /*!< outstanding buffer commits */ | |
36381 | + | |
36382 | + /*! interrupt callbacks (hard-irq) */ | |
36383 | + void (*irq_handler) (struct efhw_nic *, int unit); | |
36384 | + | |
36385 | + /*! event queues per driver */ | |
36386 | + struct efhw_keventq evq[EFHW_KEVENTQ_MAX]; | |
36387 | + | |
36388 | +/* for marking when we are not using an IRQ unit | |
36389 | + - 0 is a valid offset to an IRQ unit on EF1! */ | |
36390 | +#define EFHW_IRQ_UNIT_UNUSED 0xffff | |
36391 | + /*! interrupt unit in use */ | |
36392 | + unsigned int irq_unit[EFHW_KEVENTQ_MAX]; | |
36393 | + efhw_iopage_t irq_iobuff; /*!< Falcon SYSERR interrupt */ | |
36394 | + | |
36395 | + /* The new driverlink infrastructure. */ | |
36396 | + struct efx_dl_device *net_driver_dev; | |
36397 | + struct efx_dlfilt_cb_s *dlfilter_cb; | |
36398 | + | |
36399 | + /*! Bit masks of the sizes of event queues and dma queues supported | |
36400 | + * by the nic. */ | |
36401 | + unsigned evq_sizes; | |
36402 | + unsigned rxq_sizes; | |
36403 | + unsigned txq_sizes; | |
36404 | + | |
36405 | + /* Size of filter table (including odd and even banks). */ | |
36406 | + unsigned filter_tbl_size; | |
36407 | +}; | |
36408 | + | |
36409 | + | |
36410 | +#define EFHW_KVA(nic) ((nic)->bar_ioaddr) | |
36411 | + | |
36412 | + | |
36413 | +#endif /* __CI_EFHW_EFHW_TYPES_H__ */ | |
36414 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h | |
36415 | =================================================================== | |
36416 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36417 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h 2008-02-20 09:32:49.000000000 +0100 | |
36418 | @@ -0,0 +1,84 @@ | |
36419 | +/**************************************************************************** | |
36420 | + * Driver for Solarflare network controllers - | |
36421 | + * resource management for Xen backend, OpenOnload, etc | |
36422 | + * (including support for SFE4001 10GBT NIC) | |
36423 | + * | |
36424 | + * This file provides version-independent Linux kernel API for header files | |
36425 | + * with hardware-related definitions (in ci/driver/efab/hardware*). | |
36426 | + * Only kernels >=2.6.9 are supported. | |
36427 | + * | |
36428 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36429 | + * 9501 Jeronimo Road, Suite 250, | |
36430 | + * Irvine, CA 92618, USA | |
36431 | + * | |
36432 | + * Developed and maintained by Solarflare Communications: | |
36433 | + * <linux-xen-drivers@solarflare.com> | |
36434 | + * <onload-dev@solarflare.com> | |
36435 | + * | |
36436 | + * Certain parts of the driver were implemented by | |
36437 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36438 | + * OKTET Labs Ltd, Russia, | |
36439 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36440 | + * by request of Solarflare Communications | |
36441 | + * | |
36442 | + * | |
36443 | + * This program is free software; you can redistribute it and/or modify it | |
36444 | + * under the terms of the GNU General Public License version 2 as published | |
36445 | + * by the Free Software Foundation, incorporated herein by reference. | |
36446 | + * | |
36447 | + * This program is distributed in the hope that it will be useful, | |
36448 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36449 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36450 | + * GNU General Public License for more details. | |
36451 | + * | |
36452 | + * You should have received a copy of the GNU General Public License | |
36453 | + * along with this program; if not, write to the Free Software | |
36454 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
36455 | + **************************************************************************** | |
36456 | + */ | |
36457 | + | |
36458 | +#ifndef __CI_EFHW_HARDWARE_LINUX_H__ | |
36459 | +#define __CI_EFHW_HARDWARE_LINUX_H__ | |
36460 | + | |
36461 | +#include <asm/io.h> | |
36462 | + | |
36463 | +#ifdef __LITTLE_ENDIAN | |
36464 | +#define EFHW_IS_LITTLE_ENDIAN | |
36465 | +#elif __BIG_ENDIAN | |
36466 | +#define EFHW_IS_BIG_ENDIAN | |
36467 | +#else | |
36468 | +#error Unknown endianness | |
36469 | +#endif | |
36470 | + | |
36471 | +#ifndef mmiowb | |
36472 | + #if defined(__i386__) || defined(__x86_64__) | |
36473 | + #define mmiowb() | |
36474 | + #elif defined(__ia64__) | |
36475 | + #ifndef ia64_mfa | |
36476 | + #define ia64_mfa() asm volatile ("mf.a" ::: "memory") | |
36477 | + #endif | |
36478 | + #define mmiowb ia64_mfa | |
36479 | + #else | |
36480 | + #error "Need definition for mmiowb()" | |
36481 | + #endif | |
36482 | +#endif | |
36483 | + | |
36484 | +typedef char *efhw_ioaddr_t; | |
36485 | + | |
36486 | +#ifndef readq | |
36487 | +static inline uint64_t __readq(void __iomem *addr) | |
36488 | +{ | |
36489 | + return *(volatile uint64_t *)addr; | |
36490 | +} | |
36491 | +#define readq(x) __readq(x) | |
36492 | +#endif | |
36493 | + | |
36494 | +#ifndef writeq | |
36495 | +static inline void __writeq(uint64_t v, void __iomem *addr) | |
36496 | +{ | |
36497 | + *(volatile uint64_t *)addr = v; | |
36498 | +} | |
36499 | +#define writeq(val, addr) __writeq((val), (addr)) | |
36500 | +#endif | |
36501 | + | |
36502 | +#endif /* __CI_EFHW_HARDWARE_LINUX_H__ */ | |
36503 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/iopage_types.h | |
36504 | =================================================================== | |
36505 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36506 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/iopage_types.h 2008-02-20 09:32:49.000000000 +0100 | |
36507 | @@ -0,0 +1,188 @@ | |
36508 | +/**************************************************************************** | |
36509 | + * Driver for Solarflare network controllers - | |
36510 | + * resource management for Xen backend, OpenOnload, etc | |
36511 | + * (including support for SFE4001 10GBT NIC) | |
36512 | + * | |
36513 | + * This file provides efhw_page_t and efhw_iopage_t for Linux kernel. | |
36514 | + * | |
36515 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36516 | + * 9501 Jeronimo Road, Suite 250, | |
36517 | + * Irvine, CA 92618, USA | |
36518 | + * | |
36519 | + * Developed and maintained by Solarflare Communications: | |
36520 | + * <linux-xen-drivers@solarflare.com> | |
36521 | + * <onload-dev@solarflare.com> | |
36522 | + * | |
36523 | + * Certain parts of the driver were implemented by | |
36524 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36525 | + * OKTET Labs Ltd, Russia, | |
36526 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36527 | + * by request of Solarflare Communications | |
36528 | + * | |
36529 | + * | |
36530 | + * This program is free software; you can redistribute it and/or modify it | |
36531 | + * under the terms of the GNU General Public License version 2 as published | |
36532 | + * by the Free Software Foundation, incorporated herein by reference. | |
36533 | + * | |
36534 | + * This program is distributed in the hope that it will be useful, | |
36535 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36536 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36537 | + * GNU General Public License for more details. | |
36538 | + * | |
36539 | + * You should have received a copy of the GNU General Public License | |
36540 | + * along with this program; if not, write to the Free Software | |
36541 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
36542 | + **************************************************************************** | |
36543 | + */ | |
36544 | + | |
36545 | +#ifndef __CI_EFHW_IOPAGE_LINUX_H__ | |
36546 | +#define __CI_EFHW_IOPAGE_LINUX_H__ | |
36547 | + | |
36548 | +#include <linux/gfp.h> | |
36549 | +#include <linux/hardirq.h> | |
36550 | +#include <ci/efhw/debug.h> | |
36551 | + | |
36552 | +/*-------------------------------------------------------------------- | |
36553 | + * | |
36554 | + * efhw_page_t: A single page of memory. Directly mapped in the driver, | |
36555 | + * and can be mapped to userlevel. | |
36556 | + * | |
36557 | + *--------------------------------------------------------------------*/ | |
36558 | + | |
36559 | +typedef struct { | |
36560 | + unsigned long kva; | |
36561 | +} efhw_page_t; | |
36562 | + | |
36563 | +static inline int efhw_page_alloc(efhw_page_t *p) | |
36564 | +{ | |
36565 | + p->kva = __get_free_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); | |
36566 | + return p->kva ? 0 : -ENOMEM; | |
36567 | +} | |
36568 | + | |
36569 | +static inline int efhw_page_alloc_zeroed(efhw_page_t *p) | |
36570 | +{ | |
36571 | + p->kva = get_zeroed_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); | |
36572 | + return p->kva ? 0 : -ENOMEM; | |
36573 | +} | |
36574 | + | |
36575 | +static inline void efhw_page_free(efhw_page_t *p) | |
36576 | +{ | |
36577 | + free_page(p->kva); | |
36578 | + EFHW_DO_DEBUG(memset(p, 0, sizeof(*p))); | |
36579 | +} | |
36580 | + | |
36581 | +static inline char *efhw_page_ptr(efhw_page_t *p) | |
36582 | +{ | |
36583 | + return (char *)p->kva; | |
36584 | +} | |
36585 | + | |
36586 | +static inline unsigned efhw_page_pfn(efhw_page_t *p) | |
36587 | +{ | |
36588 | + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); | |
36589 | +} | |
36590 | + | |
36591 | +static inline void efhw_page_mark_invalid(efhw_page_t *p) | |
36592 | +{ | |
36593 | + p->kva = 0; | |
36594 | +} | |
36595 | + | |
36596 | +static inline int efhw_page_is_valid(efhw_page_t *p) | |
36597 | +{ | |
36598 | + return p->kva != 0; | |
36599 | +} | |
36600 | + | |
36601 | +static inline void efhw_page_init_from_va(efhw_page_t *p, void *va) | |
36602 | +{ | |
36603 | + p->kva = (unsigned long)va; | |
36604 | +} | |
36605 | + | |
36606 | +/*-------------------------------------------------------------------- | |
36607 | + * | |
36608 | + * efhw_iopage_t: A single page of memory. Directly mapped in the driver, | |
36609 | + * and can be mapped to userlevel. Can also be accessed by the NIC. | |
36610 | + * | |
36611 | + *--------------------------------------------------------------------*/ | |
36612 | + | |
36613 | +typedef struct { | |
36614 | + efhw_page_t p; | |
36615 | + dma_addr_t dma_addr; | |
36616 | +} efhw_iopage_t; | |
36617 | + | |
36618 | +static inline dma_addr_t efhw_iopage_dma_addr(efhw_iopage_t *p) | |
36619 | +{ | |
36620 | + return p->dma_addr; | |
36621 | +} | |
36622 | + | |
36623 | +#define efhw_iopage_ptr(iop) efhw_page_ptr(&(iop)->p) | |
36624 | +#define efhw_iopage_pfn(iop) efhw_page_pfn(&(iop)->p) | |
36625 | +#define efhw_iopage_mark_invalid(iop) efhw_page_mark_invalid(&(iop)->p) | |
36626 | +#define efhw_iopage_is_valid(iop) efhw_page_is_valid(&(iop)->p) | |
36627 | + | |
36628 | +/*-------------------------------------------------------------------- | |
36629 | + * | |
36630 | + * efhw_iopages_t: A set of pages that are contiguous in physical memory. | |
36631 | + * Directly mapped in the driver, and can be mapped to userlevel. Can also | |
36632 | + * be accessed by the NIC. | |
36633 | + * | |
36634 | + * NB. The O/S may be unwilling to allocate many, or even any of these. So | |
36635 | + * only use this type where the NIC really needs a physically contiguous | |
36636 | + * buffer. | |
36637 | + * | |
36638 | + *--------------------------------------------------------------------*/ | |
36639 | + | |
36640 | +typedef struct { | |
36641 | + caddr_t kva; | |
36642 | + unsigned order; | |
36643 | + dma_addr_t dma_addr; | |
36644 | +} efhw_iopages_t; | |
36645 | + | |
36646 | +static inline caddr_t efhw_iopages_ptr(efhw_iopages_t *p) | |
36647 | +{ | |
36648 | + return p->kva; | |
36649 | +} | |
36650 | + | |
36651 | +static inline unsigned efhw_iopages_pfn(efhw_iopages_t *p) | |
36652 | +{ | |
36653 | + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); | |
36654 | +} | |
36655 | + | |
36656 | +static inline dma_addr_t efhw_iopages_dma_addr(efhw_iopages_t *p) | |
36657 | +{ | |
36658 | + return p->dma_addr; | |
36659 | +} | |
36660 | + | |
36661 | +static inline unsigned efhw_iopages_size(efhw_iopages_t *p) | |
36662 | +{ | |
36663 | + return 1u << (p->order + PAGE_SHIFT); | |
36664 | +} | |
36665 | + | |
36666 | +/* efhw_iopage_t <-> efhw_iopages_t conversions for handling physically | |
36667 | + * contiguous allocations in iobufsets for iSCSI. This allows the | |
36668 | + * essential information about contiguous allocations from | |
36669 | + * efhw_iopages_alloc() to be saved away in the efhw_iopage_t array in an | |
36670 | + * iobufset. (Changing the iobufset resource to use a union type would | |
36671 | + * involve a lot of code changes, and make the iobufset's metadata larger | |
36672 | + * which could be bad as it's supposed to fit into a single page on some | |
36673 | + * platforms.) | |
36674 | + */ | |
36675 | +static inline void | |
36676 | +efhw_iopage_init_from_iopages(efhw_iopage_t *iopage, | |
36677 | + efhw_iopages_t *iopages, unsigned pageno) | |
36678 | +{ | |
36679 | + iopage->p.kva = ((unsigned long)efhw_iopages_ptr(iopages)) | |
36680 | + + (pageno * PAGE_SIZE); | |
36681 | + iopage->dma_addr = efhw_iopages_dma_addr(iopages) + | |
36682 | + (pageno * PAGE_SIZE); | |
36683 | +} | |
36684 | + | |
36685 | +static inline void | |
36686 | +efhw_iopages_init_from_iopage(efhw_iopages_t *iopages, | |
36687 | + efhw_iopage_t *iopage, unsigned order) | |
36688 | +{ | |
36689 | + iopages->kva = (caddr_t) efhw_iopage_ptr(iopage); | |
36690 | + EFHW_ASSERT(iopages->kva); | |
36691 | + iopages->order = order; | |
36692 | + iopages->dma_addr = efhw_iopage_dma_addr(iopage); | |
36693 | +} | |
36694 | + | |
36695 | +#endif /* __CI_EFHW_IOPAGE_LINUX_H__ */ | |
36696 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/public.h | |
36697 | =================================================================== | |
36698 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36699 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/public.h 2008-02-20 09:32:49.000000000 +0100 | |
36700 | @@ -0,0 +1,83 @@ | |
36701 | +/**************************************************************************** | |
36702 | + * Driver for Solarflare network controllers - | |
36703 | + * resource management for Xen backend, OpenOnload, etc | |
36704 | + * (including support for SFE4001 10GBT NIC) | |
36705 | + * | |
36706 | + * This file provides public API of efhw library exported from the SFC | |
36707 | + * resource driver. | |
36708 | + * | |
36709 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36710 | + * 9501 Jeronimo Road, Suite 250, | |
36711 | + * Irvine, CA 92618, USA | |
36712 | + * | |
36713 | + * Developed and maintained by Solarflare Communications: | |
36714 | + * <linux-xen-drivers@solarflare.com> | |
36715 | + * <onload-dev@solarflare.com> | |
36716 | + * | |
36717 | + * Certain parts of the driver were implemented by | |
36718 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36719 | + * OKTET Labs Ltd, Russia, | |
36720 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36721 | + * by request of Solarflare Communications | |
36722 | + * | |
36723 | + * | |
36724 | + * This program is free software; you can redistribute it and/or modify it | |
36725 | + * under the terms of the GNU General Public License version 2 as published | |
36726 | + * by the Free Software Foundation, incorporated herein by reference. | |
36727 | + * | |
36728 | + * This program is distributed in the hope that it will be useful, | |
36729 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36730 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36731 | + * GNU General Public License for more details. | |
36732 | + * | |
36733 | + * You should have received a copy of the GNU General Public License | |
36734 | + * along with this program; if not, write to the Free Software | |
36735 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
36736 | + **************************************************************************** | |
36737 | + */ | |
36738 | + | |
36739 | +#ifndef __CI_EFHW_PUBLIC_H__ | |
36740 | +#define __CI_EFHW_PUBLIC_H__ | |
36741 | + | |
36742 | +#include <ci/efhw/common.h> | |
36743 | +#include <ci/efhw/efhw_types.h> | |
36744 | + | |
36745 | +/*! Returns true if we have some EtherFabric functional units - | |
36746 | + whether configured or not */ | |
36747 | +static inline int efhw_nic_have_functional_units(struct efhw_nic *nic) | |
36748 | +{ | |
36749 | + return nic->efhw_func != 0; | |
36750 | +} | |
36751 | + | |
36752 | +/*! Returns true if the EtherFabric functional units have been configured */ | |
36753 | +static inline int efhw_nic_have_hw(struct efhw_nic *nic) | |
36754 | +{ | |
36755 | + return efhw_nic_have_functional_units(nic) && (EFHW_KVA(nic) != 0); | |
36756 | +} | |
36757 | + | |
36758 | +/*! Helper function to allocate the iobuffer needed by an eventq | |
36759 | + * - it ensures the eventq has the correct alignment for the NIC | |
36760 | + * | |
36761 | + * \param rm Event-queue resource manager | |
36762 | + * \param instance Event-queue instance (index) | |
36763 | + * \param buf_bytes Requested size of eventq | |
36764 | + * \return < 0 if iobuffer allocation fails | |
36765 | + */ | |
36766 | +int efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic, | |
36767 | + struct eventq_resource_hardware *h, | |
36768 | + int evq_instance, unsigned buf_bytes); | |
36769 | + | |
36770 | +extern void falcon_nic_set_rx_usr_buf_size(struct efhw_nic *, | |
36771 | + int rx_usr_buf_size); | |
36772 | + | |
36773 | +extern void | |
36774 | +falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full, | |
36775 | + uint32_t tcp_wild, | |
36776 | + uint32_t udp_full, uint32_t udp_wild); | |
36777 | + | |
36778 | +extern void | |
36779 | +falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full, | |
36780 | + uint32_t *tcp_wild, | |
36781 | + uint32_t *udp_full, uint32_t *udp_wild); | |
36782 | + | |
36783 | +#endif /* __CI_EFHW_PUBLIC_H__ */ | |
36784 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/sysdep.h | |
36785 | =================================================================== | |
36786 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36787 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efhw/sysdep.h 2008-02-20 09:32:49.000000000 +0100 | |
36788 | @@ -0,0 +1,72 @@ | |
36789 | +/**************************************************************************** | |
36790 | + * Driver for Solarflare network controllers - | |
36791 | + * resource management for Xen backend, OpenOnload, etc | |
36792 | + * (including support for SFE4001 10GBT NIC) | |
36793 | + * | |
36794 | + * This file provides version-independent Linux kernel API for efhw library. | |
36795 | + * Only kernels >=2.6.9 are supported. | |
36796 | + * | |
36797 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36798 | + * 9501 Jeronimo Road, Suite 250, | |
36799 | + * Irvine, CA 92618, USA | |
36800 | + * | |
36801 | + * Developed and maintained by Solarflare Communications: | |
36802 | + * <linux-xen-drivers@solarflare.com> | |
36803 | + * <onload-dev@solarflare.com> | |
36804 | + * | |
36805 | + * Certain parts of the driver were implemented by | |
36806 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36807 | + * OKTET Labs Ltd, Russia, | |
36808 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36809 | + * by request of Solarflare Communications | |
36810 | + * | |
36811 | + * | |
36812 | + * This program is free software; you can redistribute it and/or modify it | |
36813 | + * under the terms of the GNU General Public License version 2 as published | |
36814 | + * by the Free Software Foundation, incorporated herein by reference. | |
36815 | + * | |
36816 | + * This program is distributed in the hope that it will be useful, | |
36817 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36818 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36819 | + * GNU General Public License for more details. | |
36820 | + * | |
36821 | + * You should have received a copy of the GNU General Public License | |
36822 | + * along with this program; if not, write to the Free Software | |
36823 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
36824 | + **************************************************************************** | |
36825 | + */ | |
36826 | + | |
36827 | +#ifndef __CI_EFHW_SYSDEP_LINUX_H__ | |
36828 | +#define __CI_EFHW_SYSDEP_LINUX_H__ | |
36829 | + | |
36830 | +#include <linux/version.h> | |
36831 | +#include <linux/module.h> | |
36832 | +#include <linux/spinlock.h> | |
36833 | +#include <linux/delay.h> | |
36834 | +#include <linux/if_ether.h> | |
36835 | + | |
36836 | +#include <linux/netdevice.h> /* necessary for etherdevice.h on some kernels */ | |
36837 | +#include <linux/etherdevice.h> | |
36838 | + | |
36839 | +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21) | |
36840 | +static inline int is_local_ether_addr(const u8 *addr) | |
36841 | +{ | |
36842 | + return (0x02 & addr[0]); | |
36843 | +} | |
36844 | +#endif | |
36845 | + | |
36846 | +typedef unsigned long irq_flags_t; | |
36847 | + | |
36848 | +#define spin_lock_destroy(l_) do {} while (0) | |
36849 | + | |
36850 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24) | |
36851 | +#define HAS_NET_NAMESPACE | |
36852 | +#endif | |
36853 | + | |
36854 | +/* Funny, but linux has round_up for x86 only, defined in | |
36855 | + * x86-specific header */ | |
36856 | +#ifndef round_up | |
36857 | +#define round_up(x, y) (((x) + (y) - 1) & ~((y)-1)) | |
36858 | +#endif | |
36859 | + | |
36860 | +#endif /* __CI_EFHW_SYSDEP_LINUX_H__ */ | |
36861 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/nic_table.h | |
36862 | =================================================================== | |
36863 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36864 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/nic_table.h 2008-02-20 09:32:49.000000000 +0100 | |
36865 | @@ -0,0 +1,98 @@ | |
36866 | +/**************************************************************************** | |
36867 | + * Driver for Solarflare network controllers - | |
36868 | + * resource management for Xen backend, OpenOnload, etc | |
36869 | + * (including support for SFE4001 10GBT NIC) | |
36870 | + * | |
36871 | + * This file provides public API for NIC table. | |
36872 | + * | |
36873 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36874 | + * 9501 Jeronimo Road, Suite 250, | |
36875 | + * Irvine, CA 92618, USA | |
36876 | + * | |
36877 | + * Developed and maintained by Solarflare Communications: | |
36878 | + * <linux-xen-drivers@solarflare.com> | |
36879 | + * <onload-dev@solarflare.com> | |
36880 | + * | |
36881 | + * Certain parts of the driver were implemented by | |
36882 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36883 | + * OKTET Labs Ltd, Russia, | |
36884 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36885 | + * by request of Solarflare Communications | |
36886 | + * | |
36887 | + * | |
36888 | + * This program is free software; you can redistribute it and/or modify it | |
36889 | + * under the terms of the GNU General Public License version 2 as published | |
36890 | + * by the Free Software Foundation, incorporated herein by reference. | |
36891 | + * | |
36892 | + * This program is distributed in the hope that it will be useful, | |
36893 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36894 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36895 | + * GNU General Public License for more details. | |
36896 | + * | |
36897 | + * You should have received a copy of the GNU General Public License | |
36898 | + * along with this program; if not, write to the Free Software | |
36899 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
36900 | + **************************************************************************** | |
36901 | + */ | |
36902 | + | |
36903 | +#ifndef __CI_EFRM_NIC_TABLE_H__ | |
36904 | +#define __CI_EFRM_NIC_TABLE_H__ | |
36905 | + | |
36906 | +#include <ci/efhw/efhw_types.h> | |
36907 | +#include <ci/efrm/sysdep.h> | |
36908 | + | |
36909 | +/*-------------------------------------------------------------------- | |
36910 | + * | |
36911 | + * struct efrm_nic_table - top level driver object keeping all NICs - | |
36912 | + * implemented in driver_object.c | |
36913 | + * | |
36914 | + *--------------------------------------------------------------------*/ | |
36915 | + | |
36916 | +/*! Comment? */ | |
36917 | +struct efrm_nic_table { | |
36918 | + /*! nics attached to this driver */ | |
36919 | + struct efhw_nic *nic[EFHW_MAX_NR_DEVS]; | |
36920 | + /*! pointer to an arbitrary struct efhw_nic if one exists; | |
36921 | + * for code which does not care which NIC it wants but | |
36922 | + * still needs one. Note you cannot assume nic[0] exists. */ | |
36923 | + struct efhw_nic *a_nic; | |
36924 | + uint32_t nic_count; /*!< number of nics attached to this driver */ | |
36925 | + spinlock_t lock; /*!< lock for table modifications */ | |
36926 | + atomic_t ref_count; /*!< refcount for users of nic table */ | |
36927 | +}; | |
36928 | + | |
36929 | +/* Resource driver structures used by other drivers as well */ | |
36930 | +extern struct efrm_nic_table efrm_nic_table; | |
36931 | + | |
36932 | +static inline void efrm_nic_table_hold(void) | |
36933 | +{ | |
36934 | + atomic_inc(&efrm_nic_table.ref_count); | |
36935 | +} | |
36936 | + | |
36937 | +static inline void efrm_nic_table_rele(void) | |
36938 | +{ | |
36939 | + atomic_dec(&efrm_nic_table.ref_count); | |
36940 | +} | |
36941 | + | |
36942 | +static inline int efrm_nic_table_held(void) | |
36943 | +{ | |
36944 | + return (atomic_read(&efrm_nic_table.ref_count) != 0); | |
36945 | +} | |
36946 | + | |
36947 | +/* Run code block _x multiple times with variable nic set to each | |
36948 | + * registered NIC in turn. | |
36949 | + * DO NOT "break" out of this loop early. */ | |
36950 | +#define EFRM_FOR_EACH_NIC(_nic_i, _nic) \ | |
36951 | + for ((_nic_i) = (efrm_nic_table_hold(), 0); \ | |
36952 | + (_nic_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ | |
36953 | + (_nic_i)++) \ | |
36954 | + if (((_nic) = efrm_nic_table.nic[_nic_i])) | |
36955 | + | |
36956 | +#define EFRM_FOR_EACH_NIC_IN_SET(_set, _i, _nic) \ | |
36957 | + for ((_i) = (efrm_nic_table_hold(), 0); \ | |
36958 | + (_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ | |
36959 | + ++(_i)) \ | |
36960 | + if (((_nic) = efrm_nic_table.nic[_i]) && \ | |
36961 | + efrm_nic_set_read((_set), (_i))) | |
36962 | + | |
36963 | +#endif /* __CI_EFRM_NIC_TABLE_H__ */ | |
36964 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep.h | |
36965 | =================================================================== | |
36966 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
36967 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep.h 2008-02-20 09:32:49.000000000 +0100 | |
36968 | @@ -0,0 +1,54 @@ | |
36969 | +/**************************************************************************** | |
36970 | + * Driver for Solarflare network controllers - | |
36971 | + * resource management for Xen backend, OpenOnload, etc | |
36972 | + * (including support for SFE4001 10GBT NIC) | |
36973 | + * | |
36974 | + * This file provides Linux-like system-independent API for efrm library. | |
36975 | + * | |
36976 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
36977 | + * 9501 Jeronimo Road, Suite 250, | |
36978 | + * Irvine, CA 92618, USA | |
36979 | + * | |
36980 | + * Developed and maintained by Solarflare Communications: | |
36981 | + * <linux-xen-drivers@solarflare.com> | |
36982 | + * <onload-dev@solarflare.com> | |
36983 | + * | |
36984 | + * Certain parts of the driver were implemented by | |
36985 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
36986 | + * OKTET Labs Ltd, Russia, | |
36987 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
36988 | + * by request of Solarflare Communications | |
36989 | + * | |
36990 | + * | |
36991 | + * This program is free software; you can redistribute it and/or modify it | |
36992 | + * under the terms of the GNU General Public License version 2 as published | |
36993 | + * by the Free Software Foundation, incorporated herein by reference. | |
36994 | + * | |
36995 | + * This program is distributed in the hope that it will be useful, | |
36996 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
36997 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
36998 | + * GNU General Public License for more details. | |
36999 | + * | |
37000 | + * You should have received a copy of the GNU General Public License | |
37001 | + * along with this program; if not, write to the Free Software | |
37002 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
37003 | + **************************************************************************** | |
37004 | + */ | |
37005 | + | |
37006 | +#ifndef __CI_EFRM_SYSDEP_H__ | |
37007 | +#define __CI_EFRM_SYSDEP_H__ | |
37008 | + | |
37009 | +/* Spinlocks are defined in efhw/sysdep.h */ | |
37010 | +#include <ci/efhw/sysdep.h> | |
37011 | + | |
37012 | +#if defined(__linux__) && defined(__KERNEL__) | |
37013 | + | |
37014 | +# include <ci/efrm/sysdep_linux.h> | |
37015 | + | |
37016 | +#else | |
37017 | + | |
37018 | +# include <ci/efrm/sysdep_ci2linux.h> | |
37019 | + | |
37020 | +#endif | |
37021 | + | |
37022 | +#endif /* __CI_EFRM_SYSDEP_H__ */ | |
37023 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h | |
37024 | =================================================================== | |
37025 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
37026 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h 2008-02-20 09:32:49.000000000 +0100 | |
37027 | @@ -0,0 +1,248 @@ | |
37028 | +/**************************************************************************** | |
37029 | + * Driver for Solarflare network controllers - | |
37030 | + * resource management for Xen backend, OpenOnload, etc | |
37031 | + * (including support for SFE4001 10GBT NIC) | |
37032 | + * | |
37033 | + * This file provides version-independent Linux kernel API for efrm library. | |
37034 | + * Only kernels >=2.6.9 are supported. | |
37035 | + * | |
37036 | + * Copyright 2005-2007: Solarflare Communications Inc, | |
37037 | + * 9501 Jeronimo Road, Suite 250, | |
37038 | + * Irvine, CA 92618, USA | |
37039 | + * | |
37040 | + * Kfifo API is partially stolen from linux-2.6.22/include/linux/list.h | |
37041 | + * Copyright (C) 2004 Stelian Pop <stelian@popies.net> | |
37042 | + * | |
37043 | + * Developed and maintained by Solarflare Communications: | |
37044 | + * <linux-xen-drivers@solarflare.com> | |
37045 | + * <onload-dev@solarflare.com> | |
37046 | + * | |
37047 | + * Certain parts of the driver were implemented by | |
37048 | + * Alexandra Kossovsky <Alexandra.Kossovsky@oktetlabs.ru> | |
37049 | + * OKTET Labs Ltd, Russia, | |
37050 | + * http://oktetlabs.ru, <info@oktetlabs.ru> | |
37051 | + * by request of Solarflare Communications | |
37052 | + * | |
37053 | + * | |
37054 | + * This program is free software; you can redistribute it and/or modify it | |
37055 | + * under the terms of the GNU General Public License version 2 as published | |
37056 | + * by the Free Software Foundation, incorporated herein by reference. | |
37057 | + * | |
37058 | + * This program is distributed in the hope that it will be useful, | |
37059 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
37060 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
37061 | + * GNU General Public License for more details. | |
37062 | + * | |
37063 | + * You should have received a copy of the GNU General Public License | |
37064 | + * along with this program; if not, write to the Free Software | |
37065 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
37066 | + **************************************************************************** | |
37067 | + */ | |
37068 | + | |
37069 | +#ifndef __CI_EFRM_SYSDEP_LINUX_H__ | |
37070 | +#define __CI_EFRM_SYSDEP_LINUX_H__ | |
37071 | + | |
37072 | +#include <linux/version.h> | |
37073 | +#include <linux/list.h> | |
37074 | +#include <linux/vmalloc.h> | |
37075 | +#include <linux/errno.h> | |
37076 | +#include <linux/string.h> | |
37077 | +#include <linux/workqueue.h> | |
37078 | +#include <linux/gfp.h> | |
37079 | +#include <linux/slab.h> | |
37080 | +#include <linux/hardirq.h> | |
37081 | +#include <linux/kernel.h> | |
37082 | +#include <linux/if_ether.h> | |
37083 | +#include <linux/completion.h> | |
37084 | +#include <linux/in.h> | |
37085 | + | |
37086 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
37087 | +/* get roundup_pow_of_two(), which was in kernel.h in early kernel versions */ | |
37088 | +#include <linux/log2.h> | |
37089 | +#endif | |
37090 | + | |
37091 | +/******************************************************************** | |
37092 | + * | |
37093 | + * List API | |
37094 | + * | |
37095 | + ********************************************************************/ | |
37096 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) | |
37097 | +static inline void | |
37098 | +list_replace_init(struct list_head *old, struct list_head *new) | |
37099 | +{ | |
37100 | + new->next = old->next; | |
37101 | + new->next->prev = new; | |
37102 | + new->prev = old->prev; | |
37103 | + new->prev->next = new; | |
37104 | + INIT_LIST_HEAD(old); | |
37105 | +} | |
37106 | +#endif | |
37107 | + | |
37108 | +static inline struct list_head *list_pop(struct list_head *list) | |
37109 | +{ | |
37110 | + struct list_head *link = list->next; | |
37111 | + list_del(link); | |
37112 | + return link; | |
37113 | +} | |
37114 | + | |
37115 | +static inline struct list_head *list_pop_tail(struct list_head *list) | |
37116 | +{ | |
37117 | + struct list_head *link = list->prev; | |
37118 | + list_del(link); | |
37119 | + return link; | |
37120 | +} | |
37121 | + | |
37122 | +/******************************************************************** | |
37123 | + * | |
37124 | + * Workqueue API | |
37125 | + * | |
37126 | + ********************************************************************/ | |
37127 | + | |
37128 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) | |
37129 | +#define NEED_OLD_WORK_API | |
37130 | + | |
37131 | +/** | |
37132 | + * The old and new work function prototypes just change | |
37133 | + * the type of the pointer in the only argument, so it's | |
37134 | + * safe to cast one function type to the other | |
37135 | + */ | |
37136 | +typedef void (*efrm_old_work_func_t) (void *p); | |
37137 | + | |
37138 | +#undef INIT_WORK | |
37139 | +#define INIT_WORK(_work, _func) \ | |
37140 | + do { \ | |
37141 | + INIT_LIST_HEAD(&(_work)->entry); \ | |
37142 | + (_work)->pending = 0; \ | |
37143 | + PREPARE_WORK((_work), \ | |
37144 | + (efrm_old_work_func_t) (_func), \ | |
37145 | + (_work)); \ | |
37146 | + } while (0) | |
37147 | + | |
37148 | +#endif | |
37149 | + | |
37150 | +/******************************************************************** | |
37151 | + * | |
37152 | + * Kfifo API | |
37153 | + * | |
37154 | + ********************************************************************/ | |
37155 | + | |
37156 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) | |
37157 | + | |
37158 | +#if !defined(RHEL_RELEASE_CODE) || (RHEL_RELEASE_CODE < 1029) | |
37159 | +typedef unsigned gfp_t; | |
37160 | +#endif | |
37161 | + | |
37162 | +#define HAS_NO_KFIFO | |
37163 | + | |
37164 | +struct kfifo { | |
37165 | + unsigned char *buffer; /* the buffer holding the data */ | |
37166 | + unsigned int size; /* the size of the allocated buffer */ | |
37167 | + unsigned int in; /* data is added at offset (in % size) */ | |
37168 | + unsigned int out; /* data is extracted from off. (out % size) */ | |
37169 | + spinlock_t *lock; /* protects concurrent modifications */ | |
37170 | +}; | |
37171 | + | |
37172 | +extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, | |
37173 | + gfp_t gfp_mask, spinlock_t *lock); | |
37174 | +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, | |
37175 | + spinlock_t *lock); | |
37176 | +extern void kfifo_free(struct kfifo *fifo); | |
37177 | +extern unsigned int __kfifo_put(struct kfifo *fifo, | |
37178 | + unsigned char *buffer, unsigned int len); | |
37179 | +extern unsigned int __kfifo_get(struct kfifo *fifo, | |
37180 | + unsigned char *buffer, unsigned int len); | |
37181 | + | |
37182 | +/** | |
37183 | + * kfifo_put - puts some data into the FIFO | |
37184 | + * @fifo: the fifo to be used. | |
37185 | + * @buffer: the data to be added. | |
37186 | + * @len: the length of the data to be added. | |
37187 | + * | |
37188 | + * This function copies at most @len bytes from the @buffer into | |
37189 | + * the FIFO depending on the free space, and returns the number of | |
37190 | + * bytes copied. | |
37191 | + */ | |
37192 | +static inline unsigned int | |
37193 | +kfifo_put(struct kfifo *fifo, unsigned char *buffer, unsigned int len) | |
37194 | +{ | |
37195 | + unsigned long flags; | |
37196 | + unsigned int ret; | |
37197 | + | |
37198 | + spin_lock_irqsave(fifo->lock, flags); | |
37199 | + | |
37200 | + ret = __kfifo_put(fifo, buffer, len); | |
37201 | + | |
37202 | + spin_unlock_irqrestore(fifo->lock, flags); | |
37203 | + | |
37204 | + return ret; | |
37205 | +} | |
37206 | + | |
37207 | +/** | |
37208 | + * kfifo_get - gets some data from the FIFO | |
37209 | + * @fifo: the fifo to be used. | |
37210 | + * @buffer: where the data must be copied. | |
37211 | + * @len: the size of the destination buffer. | |
37212 | + * | |
37213 | + * This function copies at most @len bytes from the FIFO into the | |
37214 | + * @buffer and returns the number of copied bytes. | |
37215 | + */ | |
37216 | +static inline unsigned int | |
37217 | +kfifo_get(struct kfifo *fifo, unsigned char *buffer, unsigned int len) | |
37218 | +{ | |
37219 | + unsigned long flags; | |
37220 | + unsigned int ret; | |
37221 | + | |
37222 | + spin_lock_irqsave(fifo->lock, flags); | |
37223 | + | |
37224 | + ret = __kfifo_get(fifo, buffer, len); | |
37225 | + | |
37226 | + /* | |
37227 | + * optimization: if the FIFO is empty, set the indices to 0 | |
37228 | + * so we don't wrap the next time | |
37229 | + */ | |
37230 | + if (fifo->in == fifo->out) | |
37231 | + fifo->in = fifo->out = 0; | |
37232 | + | |
37233 | + spin_unlock_irqrestore(fifo->lock, flags); | |
37234 | + | |
37235 | + return ret; | |
37236 | +} | |
37237 | + | |
37238 | +/** | |
37239 | + * __kfifo_len - returns the number of bytes available in the FIFO, no locking version | |
37240 | + * @fifo: the fifo to be used. | |
37241 | + */ | |
37242 | +static inline unsigned int __kfifo_len(struct kfifo *fifo) | |
37243 | +{ | |
37244 | + return fifo->in - fifo->out; | |
37245 | +} | |
37246 | + | |
37247 | +/** | |
37248 | + * kfifo_len - returns the number of bytes available in the FIFO | |
37249 | + * @fifo: the fifo to be used. | |
37250 | + */ | |
37251 | +static inline unsigned int kfifo_len(struct kfifo *fifo) | |
37252 | +{ | |
37253 | + unsigned long flags; | |
37254 | + unsigned int ret; | |
37255 | + | |
37256 | + spin_lock_irqsave(fifo->lock, flags); | |
37257 | + | |
37258 | + ret = __kfifo_len(fifo); | |
37259 | + | |
37260 | + spin_unlock_irqrestore(fifo->lock, flags); | |
37261 | + | |
37262 | + return ret; | |
37263 | +} | |
37264 | + | |
37265 | +#else | |
37266 | +#include <linux/kfifo.h> | |
37267 | +#endif | |
37268 | + | |
37269 | +static inline void kfifo_vfree(struct kfifo *fifo) | |
37270 | +{ | |
37271 | + vfree(fifo->buffer); | |
37272 | + kfree(fifo); | |
37273 | +} | |
37274 | + | |
37275 | +#endif /* __CI_EFRM_SYSDEP_LINUX_H__ */ | |
37276 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/config.h | |
37277 | =================================================================== | |
37278 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
37279 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/config.h 2008-02-20 09:32:49.000000000 +0100 | |
37280 | @@ -0,0 +1,49 @@ | |
37281 | +/**************************************************************************** | |
37282 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
37283 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
37284 | + * 9501 Jeronimo Road, Suite 250, | |
37285 | + * Irvine, CA 92618, USA | |
37286 | + * | |
37287 | + * Maintained by Solarflare Communications | |
37288 | + * <linux-xen-drivers@solarflare.com> | |
37289 | + * <onload-dev@solarflare.com> | |
37290 | + * | |
37291 | + * This program is free software; you can redistribute it and/or modify it | |
37292 | + * under the terms of the GNU General Public License version 2 as published | |
37293 | + * by the Free Software Foundation, incorporated herein by reference. | |
37294 | + * | |
37295 | + * This program is distributed in the hope that it will be useful, | |
37296 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
37297 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
37298 | + * GNU General Public License for more details. | |
37299 | + * | |
37300 | + * You should have received a copy of the GNU General Public License | |
37301 | + * along with this program; if not, write to the Free Software | |
37302 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
37303 | + **************************************************************************** | |
37304 | + */ | |
37305 | + | |
37306 | +/*! \cidoxg_include_ci_tools */ | |
37307 | + | |
37308 | +#ifndef __CI_TOOLS_CONFIG_H__ | |
37309 | +#define __CI_TOOLS_CONFIG_H__ | |
37310 | + | |
37311 | + | |
37312 | +/********************************************************************** | |
37313 | + * Debugging. | |
37314 | + */ | |
37315 | + | |
37316 | +#define CI_INCLUDE_ASSERT_VALID 0 | |
37317 | + | |
37318 | +/* Set non-zero to allow info about who has allocated what to appear in | |
37319 | + * /proc/drivers/level5/mem. | |
37320 | + * However - Note that doing so can lead to segfault when you unload the | |
37321 | + * driver, and other weirdness. i.e. I don't think the code for is quite | |
37322 | + * right (written by Oktet, hacked by gel), but it does work well enough to be | |
37323 | + * useful. | |
37324 | + */ | |
37325 | +#define CI_MEMLEAK_DEBUG_ALLOC_TABLE 0 | |
37326 | + | |
37327 | + | |
37328 | +#endif /* __CI_TOOLS_CONFIG_H__ */ | |
37329 | +/*! \cidoxg_end */ | |
37330 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/debug.h | |
37331 | =================================================================== | |
37332 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
37333 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/debug.h 2008-02-20 09:32:49.000000000 +0100 | |
37334 | @@ -0,0 +1,336 @@ | |
37335 | +/**************************************************************************** | |
37336 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
37337 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
37338 | + * 9501 Jeronimo Road, Suite 250, | |
37339 | + * Irvine, CA 92618, USA | |
37340 | + * | |
37341 | + * Maintained by Solarflare Communications | |
37342 | + * <linux-xen-drivers@solarflare.com> | |
37343 | + * <onload-dev@solarflare.com> | |
37344 | + * | |
37345 | + * This program is free software; you can redistribute it and/or modify it | |
37346 | + * under the terms of the GNU General Public License version 2 as published | |
37347 | + * by the Free Software Foundation, incorporated herein by reference. | |
37348 | + * | |
37349 | + * This program is distributed in the hope that it will be useful, | |
37350 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
37351 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
37352 | + * GNU General Public License for more details. | |
37353 | + * | |
37354 | + * You should have received a copy of the GNU General Public License | |
37355 | + * along with this program; if not, write to the Free Software | |
37356 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
37357 | + **************************************************************************** | |
37358 | + */ | |
37359 | + | |
37360 | +/*! \cidoxg_include_ci_tools */ | |
37361 | + | |
37362 | +#ifndef __CI_TOOLS_DEBUG_H__ | |
37363 | +#define __CI_TOOLS_DEBUG_H__ | |
37364 | + | |
37365 | +#define CI_LOG_E(x) x /* errors */ | |
37366 | +#define CI_LOG_W(x) x /* warnings */ | |
37367 | +#define CI_LOG_I(x) x /* information */ | |
37368 | +#define CI_LOG_V(x) x /* verbose */ | |
37369 | + | |
37370 | +/* Build time asserts. We paste the line number into the type name | |
37371 | + * so that the macro can be used more than once per file even if the | |
37372 | + * compiler objects to multiple identical typedefs. Collisions | |
37373 | + * between use in different header files is still possible. */ | |
37374 | +#ifndef CI_BUILD_ASSERT | |
37375 | +#define __CI_BUILD_ASSERT_NAME(_x) __CI_BUILD_ASSERT_ILOATHECPP(_x) | |
37376 | +#define __CI_BUILD_ASSERT_ILOATHECPP(_x) __CI_BUILD_ASSERT__ ##_x | |
37377 | +#define CI_BUILD_ASSERT(e)\ | |
37378 | + typedef char __CI_BUILD_ASSERT_NAME(__LINE__)[(e)?1:-1] | |
37379 | +#endif | |
37380 | + | |
37381 | + | |
37382 | +#ifdef NDEBUG | |
37383 | + | |
37384 | +# define _ci_check(exp, file, line) | |
37385 | +# define _ci_assert2(e, x, y, file, line) | |
37386 | +# define _ci_assert(exp, file, line) | |
37387 | +# define _ci_assert_equal(exp1, exp2, file, line) | |
37388 | +# define _ci_assert_equiv(exp1, exp2, file, line) | |
37389 | +# define _ci_assert_nequal(exp1, exp2, file, line) | |
37390 | +# define _ci_assert_le(exp1, exp2, file, line) | |
37391 | +# define _ci_assert_lt(exp1, exp2, file, line) | |
37392 | +# define _ci_assert_ge(exp1, exp2, file, line) | |
37393 | +# define _ci_assert_gt(exp1, exp2, file, line) | |
37394 | +# define _ci_assert_impl(exp1, exp2, file, line) | |
37395 | + | |
37396 | +# define _ci_verify(exp, file, line) \ | |
37397 | + do { \ | |
37398 | + (void)(exp); \ | |
37399 | + } while (0) | |
37400 | + | |
37401 | +# define CI_DEBUG_TRY(exp) \ | |
37402 | + do { \ | |
37403 | + (void)(exp); \ | |
37404 | + } while (0) | |
37405 | + | |
37406 | +#define CI_TRACE(exp,fmt) | |
37407 | +#define CI_TRACE_INT(integer) | |
37408 | +#define CI_TRACE_INT32(integer) | |
37409 | +#define CI_TRACE_INT64(integer) | |
37410 | +#define CI_TRACE_UINT(integer) | |
37411 | +#define CI_TRACE_UINT32(integer) | |
37412 | +#define CI_TRACE_UINT64(integer) | |
37413 | +#define CI_TRACE_HEX(integer) | |
37414 | +#define CI_TRACE_HEX32(integer) | |
37415 | +#define CI_TRACE_HEX64(integer) | |
37416 | +#define CI_TRACE_PTR(pointer) | |
37417 | +#define CI_TRACE_STRING(string) | |
37418 | +#define CI_TRACE_MAC(mac) | |
37419 | +#define CI_TRACE_IP(ip_be32) | |
37420 | +#define CI_TRACE_ARP(arp_pkt) | |
37421 | + | |
37422 | +#else | |
37423 | + | |
37424 | +# define _CI_ASSERT_FMT "\nfrom %s:%d" | |
37425 | + | |
37426 | +# define _ci_check(exp, file, line) \ | |
37427 | + do { \ | |
37428 | + if (CI_UNLIKELY(!(exp))) \ | |
37429 | + ci_warn(("ci_check(%s)"_CI_ASSERT_FMT, #exp, \ | |
37430 | + (file), (line))); \ | |
37431 | + } while (0) | |
37432 | + | |
37433 | +/* | |
37434 | + * NOTE: ci_fail() emits the file and line where the assert is actually | |
37435 | + * coded. | |
37436 | + */ | |
37437 | + | |
37438 | +# define _ci_assert(exp, file, line) \ | |
37439 | + do { \ | |
37440 | + if (CI_UNLIKELY(!(exp))) \ | |
37441 | + ci_fail(("ci_assert(%s)"_CI_ASSERT_FMT, #exp, \ | |
37442 | + (file), (line))); \ | |
37443 | + } while (0) | |
37444 | + | |
37445 | +# define _ci_assert2(e, x, y, file, line) do { \ | |
37446 | + if(CI_UNLIKELY( ! (e) )) \ | |
37447 | + ci_fail(("ci_assert(%s)\nwhere [%s=%"CI_PRIx64"] " \ | |
37448 | + "[%s=%"CI_PRIx64"]\nat %s:%d\nfrom %s:%d", #e \ | |
37449 | + , #x, (ci_uint64)(ci_uintptr_t)(x) \ | |
37450 | + , #y, (ci_uint64)(ci_uintptr_t)(y), \ | |
37451 | + __FILE__, __LINE__, (file), (line))); \ | |
37452 | + } while (0) | |
37453 | + | |
37454 | +# define _ci_verify(exp, file, line) \ | |
37455 | + do { \ | |
37456 | + if (CI_UNLIKELY(!(exp))) \ | |
37457 | + ci_fail(("ci_verify(%s)"_CI_ASSERT_FMT, #exp, \ | |
37458 | + (file), (line))); \ | |
37459 | + } while (0) | |
37460 | + | |
37461 | +# define _ci_assert_equal(x, y, f, l) _ci_assert2((x)==(y), x, y, (f), (l)) | |
37462 | +# define _ci_assert_nequal(x, y, f, l) _ci_assert2((x)!=(y), x, y, (f), (l)) | |
37463 | +# define _ci_assert_le(x, y, f, l) _ci_assert2((x)<=(y), x, y, (f), (l)) | |
37464 | +# define _ci_assert_lt(x, y, f, l) _ci_assert2((x)< (y), x, y, (f), (l)) | |
37465 | +# define _ci_assert_ge(x, y, f, l) _ci_assert2((x)>=(y), x, y, (f), (l)) | |
37466 | +# define _ci_assert_gt(x, y, f, l) _ci_assert2((x)> (y), x, y, (f), (l)) | |
37467 | +# define _ci_assert_or(x, y, f, l) _ci_assert2((x)||(y), x, y, (f), (l)) | |
37468 | +# define _ci_assert_impl(x, y, f, l) _ci_assert2(!(x) || (y), x, y, (f), (l)) | |
37469 | +# define _ci_assert_equiv(x, y, f, l) _ci_assert2(!(x)== !(y), x, y, (f), (l)) | |
37470 | + | |
37471 | +#define _ci_assert_equal_msg(exp1, exp2, msg, file, line) \ | |
37472 | + do { \ | |
37473 | + if (CI_UNLIKELY((exp1)!=(exp2))) \ | |
37474 | + ci_fail(("ci_assert_equal_msg(%s == %s) were " \ | |
37475 | + "(%"CI_PRIx64":%"CI_PRIx64") with msg[%c%c%c%c]" \ | |
37476 | + _CI_ASSERT_FMT, #exp1, #exp2, \ | |
37477 | + (ci_uint64)(ci_uintptr_t)(exp1), \ | |
37478 | + (ci_uint64)(ci_uintptr_t)(exp2), \ | |
37479 | + (((ci_uint32)msg) >> 24) && 0xff, \ | |
37480 | + (((ci_uint32)msg) >> 16) && 0xff, \ | |
37481 | + (((ci_uint32)msg) >> 8 ) && 0xff, \ | |
37482 | + (((ci_uint32)msg) ) && 0xff, \ | |
37483 | + (file), (line))); \ | |
37484 | + } while (0) | |
37485 | + | |
37486 | +# define CI_DEBUG_TRY(exp) CI_TRY(exp) | |
37487 | + | |
37488 | +#define CI_TRACE(exp,fmt) \ | |
37489 | + ci_log("%s:%d:%s] " #exp "=" fmt, \ | |
37490 | + __FILE__, __LINE__, __FUNCTION__, (exp)) | |
37491 | + | |
37492 | + | |
37493 | +#define CI_TRACE_INT(integer) \ | |
37494 | + ci_log("%s:%d:%s] " #integer "=%d", \ | |
37495 | + __FILE__, __LINE__, __FUNCTION__, (integer)) | |
37496 | + | |
37497 | + | |
37498 | +#define CI_TRACE_INT32(integer) \ | |
37499 | + ci_log("%s:%d:%s] " #integer "=%d", \ | |
37500 | + __FILE__, __LINE__, __FUNCTION__, ((ci_int32)integer)) | |
37501 | + | |
37502 | + | |
37503 | +#define CI_TRACE_INT64(integer) \ | |
37504 | + ci_log("%s:%d:%s] " #integer "=%lld", \ | |
37505 | + __FILE__, __LINE__, __FUNCTION__, ((ci_int64)integer)) | |
37506 | + | |
37507 | + | |
37508 | +#define CI_TRACE_UINT(integer) \ | |
37509 | + ci_log("%s:%d:%s] " #integer "=%ud", \ | |
37510 | + __FILE__, __LINE__, __FUNCTION__, (integer)) | |
37511 | + | |
37512 | + | |
37513 | +#define CI_TRACE_UINT32(integer) \ | |
37514 | + ci_log("%s:%d:%s] " #integer "=%ud", \ | |
37515 | + __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer)) | |
37516 | + | |
37517 | + | |
37518 | +#define CI_TRACE_UINT64(integer) \ | |
37519 | + ci_log("%s:%d:%s] " #integer "=%ulld", \ | |
37520 | + __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer)) | |
37521 | + | |
37522 | + | |
37523 | +#define CI_TRACE_HEX(integer) \ | |
37524 | + ci_log("%s:%d:%s] " #integer "=0x%x", \ | |
37525 | + __FILE__, __LINE__, __FUNCTION__, (integer)) | |
37526 | + | |
37527 | + | |
37528 | +#define CI_TRACE_HEX32(integer) \ | |
37529 | + ci_log("%s:%d:%s] " #integer "=0x%x", \ | |
37530 | + __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer)) | |
37531 | + | |
37532 | + | |
37533 | +#define CI_TRACE_HEX64(integer) \ | |
37534 | + ci_log("%s:%d:%s] " #integer "=0x%llx", \ | |
37535 | + __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer)) | |
37536 | + | |
37537 | + | |
37538 | +#define CI_TRACE_PTR(pointer) \ | |
37539 | + ci_log("%s:%d:%s] " #pointer "=0x%p", \ | |
37540 | + __FILE__, __LINE__, __FUNCTION__, (pointer)) | |
37541 | + | |
37542 | + | |
37543 | +#define CI_TRACE_STRING(string) \ | |
37544 | + ci_log("%s:%d:%s] " #string "=%s", \ | |
37545 | + __FILE__, __LINE__, __FUNCTION__, (string)) | |
37546 | + | |
37547 | + | |
37548 | +#define CI_TRACE_MAC(mac) \ | |
37549 | + ci_log("%s:%d:%s] " #mac "=" CI_MAC_PRINTF_FORMAT, \ | |
37550 | + __FILE__, __LINE__, __FUNCTION__, CI_MAC_PRINTF_ARGS(mac)) | |
37551 | + | |
37552 | + | |
37553 | +#define CI_TRACE_IP(ip_be32) \ | |
37554 | + ci_log("%s:%d:%s] " #ip_be32 "=" CI_IP_PRINTF_FORMAT, __FILE__, \ | |
37555 | + __LINE__, __FUNCTION__, CI_IP_PRINTF_ARGS(&(ip_be32))) | |
37556 | + | |
37557 | + | |
37558 | +#define CI_TRACE_ARP(arp_pkt) \ | |
37559 | + ci_log("%s:%d:%s]\n"CI_ARP_PRINTF_FORMAT, \ | |
37560 | + __FILE__, __LINE__, __FUNCTION__, CI_ARP_PRINTF_ARGS(arp_pkt)) | |
37561 | + | |
37562 | +#endif /* NDEBUG */ | |
37563 | + | |
37564 | +#define ci_check(exp) \ | |
37565 | + _ci_check(exp, __FILE__, __LINE__) | |
37566 | + | |
37567 | +#define ci_assert(exp) \ | |
37568 | + _ci_assert(exp, __FILE__, __LINE__) | |
37569 | + | |
37570 | +#define ci_verify(exp) \ | |
37571 | + _ci_verify(exp, __FILE__, __LINE__) | |
37572 | + | |
37573 | +#define ci_assert_equal(exp1, exp2) \ | |
37574 | + _ci_assert_equal(exp1, exp2, __FILE__, __LINE__) | |
37575 | + | |
37576 | +#define ci_assert_equal_msg(exp1, exp2, msg) \ | |
37577 | + _ci_assert_equal_msg(exp1, exp2, msg, __FILE__, __LINE__) | |
37578 | + | |
37579 | +#define ci_assert_nequal(exp1, exp2) \ | |
37580 | + _ci_assert_nequal(exp1, exp2, __FILE__, __LINE__) | |
37581 | + | |
37582 | +#define ci_assert_le(exp1, exp2) \ | |
37583 | + _ci_assert_le(exp1, exp2, __FILE__, __LINE__) | |
37584 | + | |
37585 | +#define ci_assert_lt(exp1, exp2) \ | |
37586 | + _ci_assert_lt(exp1, exp2, __FILE__, __LINE__) | |
37587 | + | |
37588 | +#define ci_assert_ge(exp1, exp2) \ | |
37589 | + _ci_assert_ge(exp1, exp2, __FILE__, __LINE__) | |
37590 | + | |
37591 | +#define ci_assert_gt(exp1, exp2) \ | |
37592 | + _ci_assert_gt(exp1, exp2, __FILE__, __LINE__) | |
37593 | + | |
37594 | +#define ci_assert_impl(exp1, exp2) \ | |
37595 | + _ci_assert_impl(exp1, exp2, __FILE__, __LINE__) | |
37596 | + | |
37597 | +#define ci_assert_equiv(exp1, exp2) \ | |
37598 | + _ci_assert_equiv(exp1, exp2, __FILE__, __LINE__) | |
37599 | + | |
37600 | + | |
37601 | +#define CI_TEST(exp) \ | |
37602 | + do{ \ | |
37603 | + if( CI_UNLIKELY(!(exp)) ) \ | |
37604 | + ci_fail(("CI_TEST(%s)", #exp)); \ | |
37605 | + }while(0) | |
37606 | + | |
37607 | + | |
37608 | +#define CI_TRY(exp) \ | |
37609 | + do{ \ | |
37610 | + int _trc; \ | |
37611 | + _trc=(exp); \ | |
37612 | + if( CI_UNLIKELY(_trc < 0) ) \ | |
37613 | + ci_sys_fail(#exp, _trc); \ | |
37614 | + }while(0) | |
37615 | + | |
37616 | + | |
37617 | +#define CI_TRY_RET(exp) \ | |
37618 | + do{ \ | |
37619 | + int _trc; \ | |
37620 | + _trc=(exp); \ | |
37621 | + if( CI_UNLIKELY(_trc < 0) ) { \ | |
37622 | + ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__); \ | |
37623 | + return _trc; \ | |
37624 | + } \ | |
37625 | + }while(0) | |
37626 | + | |
37627 | +#define CI_LOGLEVEL_TRY_RET(logfn, exp) \ | |
37628 | + do{ \ | |
37629 | + int _trc; \ | |
37630 | + _trc=(exp); \ | |
37631 | + if( CI_UNLIKELY(_trc < 0) ) { \ | |
37632 | + logfn (ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__)); \ | |
37633 | + return _trc; \ | |
37634 | + } \ | |
37635 | + }while(0) | |
37636 | + | |
37637 | + | |
37638 | +#define CI_SOCK_TRY(exp) \ | |
37639 | + do{ \ | |
37640 | + ci_sock_err_t _trc; \ | |
37641 | + _trc=(exp); \ | |
37642 | + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) \ | |
37643 | + ci_sys_fail(#exp, _trc.val); \ | |
37644 | + }while(0) | |
37645 | + | |
37646 | + | |
37647 | +#define CI_SOCK_TRY_RET(exp) \ | |
37648 | + do{ \ | |
37649 | + ci_sock_err_t _trc; \ | |
37650 | + _trc=(exp); \ | |
37651 | + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \ | |
37652 | + ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \ | |
37653 | + return ci_sock_errcode(_trc); \ | |
37654 | + } \ | |
37655 | + }while(0) | |
37656 | + | |
37657 | + | |
37658 | +#define CI_SOCK_TRY_SOCK_RET(exp) \ | |
37659 | + do{ \ | |
37660 | + ci_sock_err_t _trc; \ | |
37661 | + _trc=(exp); \ | |
37662 | + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \ | |
37663 | + ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \ | |
37664 | + return _trc; \ | |
37665 | + } \ | |
37666 | + }while(0) | |
37667 | + | |
37668 | +#endif /* __CI_TOOLS_DEBUG_H__ */ | |
37669 | + | |
37670 | +/*! \cidoxg_end */ | |
37671 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/log.h | |
37672 | =================================================================== | |
37673 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
37674 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/log.h 2008-02-20 09:32:49.000000000 +0100 | |
37675 | @@ -0,0 +1,262 @@ | |
37676 | +/**************************************************************************** | |
37677 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
37678 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
37679 | + * 9501 Jeronimo Road, Suite 250, | |
37680 | + * Irvine, CA 92618, USA | |
37681 | + * | |
37682 | + * Maintained by Solarflare Communications | |
37683 | + * <linux-xen-drivers@solarflare.com> | |
37684 | + * <onload-dev@solarflare.com> | |
37685 | + * | |
37686 | + * This program is free software; you can redistribute it and/or modify it | |
37687 | + * under the terms of the GNU General Public License version 2 as published | |
37688 | + * by the Free Software Foundation, incorporated herein by reference. | |
37689 | + * | |
37690 | + * This program is distributed in the hope that it will be useful, | |
37691 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
37692 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
37693 | + * GNU General Public License for more details. | |
37694 | + * | |
37695 | + * You should have received a copy of the GNU General Public License | |
37696 | + * along with this program; if not, write to the Free Software | |
37697 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
37698 | + **************************************************************************** | |
37699 | + */ | |
37700 | + | |
37701 | +/* | |
37702 | + * \author djr | |
37703 | + * \brief Functions for logging and pretty-printing. | |
37704 | + * \date 2002/08/07 | |
37705 | + */ | |
37706 | + | |
37707 | +/*! \cidoxg_include_ci_tools */ | |
37708 | + | |
37709 | +#ifndef __CI_TOOLS_LOG_H__ | |
37710 | +#define __CI_TOOLS_LOG_H__ | |
37711 | + | |
37712 | +#include <stdarg.h> | |
37713 | + | |
37714 | + | |
37715 | +/********************************************************************** | |
37716 | + * Logging. | |
37717 | + */ | |
37718 | + | |
37719 | +/* size of internal log buffer */ | |
37720 | +#define CI_LOG_MAX_LINE 512 | |
37721 | +/* uses of ci_log must ensure that all trace messages are shorter than this */ | |
37722 | +#define CI_LOG_MAX_MSG_LENGTH (CI_LOG_MAX_LINE-50) | |
37723 | + | |
37724 | +extern void ci_vlog(const char* fmt, va_list args) CI_HF; | |
37725 | +extern void ci_log(const char* fmt, ...) CI_PRINTF_LIKE(1,2) CI_HF; | |
37726 | + | |
37727 | + /*! Set the prefix for log messages. | |
37728 | + ** | |
37729 | + ** Uses the storage pointed to by \em prefix. Therefore \em prefix must | |
37730 | + ** be allocated on the heap, or statically. | |
37731 | + */ | |
37732 | +extern void ci_set_log_prefix(const char* prefix) CI_HF; | |
37733 | + | |
37734 | +typedef void (*ci_log_fn_t)(const char* msg); | |
37735 | +extern ci_log_fn_t ci_log_fn CI_HV; | |
37736 | + | |
37737 | +/* Log functions. */ | |
37738 | +extern void ci_log_null(const char* msg) CI_HF; | |
37739 | +extern void ci_log_stderr(const char* msg) CI_HF; | |
37740 | +extern void ci_log_stdout(const char* msg) CI_HF; | |
37741 | +extern void ci_log_syslog(const char* msg) CI_HF; | |
37742 | + | |
37743 | +/*! Call the following to install special logging behaviours. */ | |
37744 | +extern void ci_log_buffer_till_fail(void) CI_HF; | |
37745 | +extern void ci_log_buffer_till_exit(void) CI_HF; | |
37746 | + | |
37747 | +extern void __ci_log_unique(const char* msg) CI_HF; | |
37748 | +extern ci_log_fn_t __ci_log_unique_fn CI_HV; | |
37749 | +ci_inline void ci_log_uniquify(void) { | |
37750 | + if( ci_log_fn != __ci_log_unique ) { | |
37751 | + __ci_log_unique_fn = ci_log_fn; | |
37752 | + ci_log_fn = __ci_log_unique; | |
37753 | + } | |
37754 | +} | |
37755 | + | |
37756 | +extern void ci_log_file(const char* msg) CI_HF; | |
37757 | +extern int ci_log_file_fd CI_HV; | |
37758 | + | |
37759 | +extern void __ci_log_nth(const char* msg) CI_HF; | |
37760 | +extern ci_log_fn_t __ci_log_nth_fn CI_HV; | |
37761 | +extern int ci_log_nth_n CI_HV; /* default 100 */ | |
37762 | +ci_inline void ci_log_nth(void) { | |
37763 | + if( ci_log_fn != __ci_log_nth ) { | |
37764 | + __ci_log_nth_fn = ci_log_fn; | |
37765 | + ci_log_fn = __ci_log_nth; | |
37766 | + } | |
37767 | +} | |
37768 | + | |
37769 | +extern int ci_log_level CI_HV; | |
37770 | + | |
37771 | +extern int ci_log_options CI_HV; | |
37772 | +#define CI_LOG_PID 0x1 | |
37773 | +#define CI_LOG_TID 0x2 | |
37774 | +#define CI_LOG_TIME 0x4 | |
37775 | +#define CI_LOG_DELTA 0x8 | |
37776 | + | |
37777 | +/********************************************************************** | |
37778 | + * Used to define which mode we are in | |
37779 | + */ | |
37780 | +#if (defined(_WIN32) && !defined(__KERNEL__)) | |
37781 | +typedef enum { | |
37782 | + ci_log_md_NULL=0, | |
37783 | + ci_log_md_ioctl, | |
37784 | + ci_log_md_stderr, | |
37785 | + ci_log_md_stdout, | |
37786 | + ci_log_md_file, | |
37787 | + ci_log_md_serial, | |
37788 | + ci_log_md_syslog, | |
37789 | + ci_log_md_pidfile | |
37790 | +} ci_log_mode_t; | |
37791 | +extern ci_log_mode_t ci_log_mode; | |
37792 | +#endif | |
37793 | + | |
37794 | +/********************************************************************** | |
37795 | + * Pretty-printing. | |
37796 | + */ | |
37797 | + | |
37798 | +extern char ci_printable_char(char c) CI_HF; | |
37799 | + | |
37800 | +extern void (*ci_hex_dump_formatter)(char* buf, const ci_octet* s, | |
37801 | + int i, int off, int len) CI_HV; | |
37802 | +extern void ci_hex_dump_format_octets(char*,const ci_octet*,int,int,int) CI_HF; | |
37803 | +extern void ci_hex_dump_format_dwords(char*,const ci_octet*,int,int,int) CI_HF; | |
37804 | + | |
37805 | +extern void ci_hex_dump_row(char* buf, volatile const void* s, int len, | |
37806 | + ci_ptr_arith_t address) CI_HF; | |
37807 | + /*!< A row contains up to 16 bytes. Row starts at [address & 15u], so | |
37808 | + ** therefore [len + (address & 15u)] must be <= 16. | |
37809 | + */ | |
37810 | + | |
37811 | +extern void ci_hex_dump(ci_log_fn_t, volatile const void*, | |
37812 | + int len, ci_ptr_arith_t address) CI_HF; | |
37813 | + | |
37814 | +extern int ci_hex_dump_to_raw(const char* src_hex, void* buf, | |
37815 | + unsigned* addr_out_opt, int* skip) CI_HF; | |
37816 | + /*!< Recovers raw data from a single line of a hex dump. [buf] must be at | |
37817 | + ** least 16 bytes long. Returns the number of bytes written to [buf] (in | |
37818 | + ** range 1 -> 16), or -1 if [src_hex] doesn't contain hex data. Does not | |
37819 | + ** cope with missing bytes at the start of a line. | |
37820 | + */ | |
37821 | + | |
37822 | +extern int ci_format_eth_addr(char* buf, const void* eth_mac_addr, | |
37823 | + char sep) CI_HF; | |
37824 | + /*!< This will write 18 characters to <buf> including terminating null. | |
37825 | + ** Returns number of bytes written excluding null. If [sep] is zero, ':' | |
37826 | + ** is used. | |
37827 | + */ | |
37828 | + | |
37829 | +extern int ci_parse_eth_addr(void* eth_mac_addr, | |
37830 | + const char* str, char sep) CI_HF; | |
37831 | + /*!< If [sep] is zero, absolutely any separator is accepted (even | |
37832 | + ** inconsistent separators). Returns 0 on success, -1 on error. | |
37833 | + */ | |
37834 | + | |
37835 | +extern int ci_format_ip4_addr(char* buf, unsigned addr_be32) CI_HF; | |
37836 | + /*!< Formats the IP address (in network endian) in dotted-quad. Returns | |
37837 | + ** the number of bytes written (up to 15), excluding the null. [buf] | |
37838 | + ** must be at least 16 bytes long. | |
37839 | + */ | |
37840 | + | |
37841 | + | |
37842 | +/********************************************************************** | |
37843 | + * Error checking. | |
37844 | + */ | |
37845 | + | |
37846 | +extern void (*ci_fail_stop_fn)(void) CI_HV; | |
37847 | + | |
37848 | +extern void ci_fail_stop(void) CI_HF; | |
37849 | +extern void ci_fail_hang(void) CI_HF; | |
37850 | +extern void ci_fail_bomb(void) CI_HF; | |
37851 | +extern void ci_backtrace(void) CI_HF; | |
37852 | + | |
37853 | +#if defined __linux__ && !defined __KERNEL__ | |
37854 | +extern void ci_fail_abort (void) CI_HF; | |
37855 | +#endif | |
37856 | + | |
37857 | +#ifdef __GNUC__ | |
37858 | +extern void | |
37859 | +__ci_fail(const char*, ...) CI_PRINTF_LIKE(1,2) CI_HF; | |
37860 | +#else | |
37861 | +# if _PREFAST_ | |
37862 | + extern void _declspec(noreturn) __ci_fail(const char* fmt, ...); | |
37863 | +# else | |
37864 | + extern void __ci_fail(const char* fmt, ...); | |
37865 | +# endif | |
37866 | + | |
37867 | +#endif | |
37868 | + | |
37869 | +#define ci_warn(x) \ | |
37870 | + do{ ci_log("WARN at %s:%d", __FILE__, __LINE__); }while(0) | |
37871 | + | |
37872 | +#define ci_fail(x) \ | |
37873 | + do{ ci_log("FAIL at %s:%d", __FILE__, __LINE__); __ci_fail x; }while(0) | |
37874 | + | |
37875 | +extern void __ci_sys_fail(const char* fn, int rc, | |
37876 | + const char* file, int line) CI_HF; | |
37877 | +#define ci_sys_fail(fn, rc) __ci_sys_fail(fn, rc, __FILE__, __LINE__) | |
37878 | + | |
37879 | +/********************************************************************** | |
37880 | + * Logging to buffer (src/citools/log_buffer.c) | |
37881 | + */ | |
37882 | + | |
37883 | +/*! Divert ci_log() messages to the log buffer | |
37884 | + * normally they go to the system console */ | |
37885 | +extern void ci_log_buffer_till_fail(void) CI_HF; | |
37886 | + | |
37887 | +/*! Dump the contents of the log buffer to the system console */ | |
37888 | +extern void ci_log_buffer_dump(void) CI_HF; | |
37889 | + | |
37890 | + | |
37891 | +/********************************************************************** | |
37892 | + * Some useful pretty-printing. | |
37893 | + */ | |
37894 | + | |
37895 | +#ifdef __linux__ | |
37896 | +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s%s%s" | |
37897 | + | |
37898 | +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \ | |
37899 | + (((x) & MSG_OOB ) ? "OOB " :""), \ | |
37900 | + (((x) & MSG_PEEK ) ? "PEEK " :""), \ | |
37901 | + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \ | |
37902 | + (((x) & MSG_EOR ) ? "EOR " :""), \ | |
37903 | + (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \ | |
37904 | + (((x) & MSG_TRUNC ) ? "TRUNC " :""), \ | |
37905 | + (((x) & MSG_WAITALL ) ? "WAITALL " :""), \ | |
37906 | + (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \ | |
37907 | + (((x) & MSG_NOSIGNAL ) ? "NOSIGNAL " :""), \ | |
37908 | + (((x) & MSG_ERRQUEUE ) ? "ERRQUEUE " :""), \ | |
37909 | + (((x) & MSG_CONFIRM ) ? "CONFIRM " :"") | |
37910 | +#endif | |
37911 | + | |
37912 | +#ifdef _WIN32 | |
37913 | +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s" | |
37914 | + | |
37915 | +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \ | |
37916 | + (((x) & MSG_OOB ) ? "OOB " :""), \ | |
37917 | + (((x) & MSG_PEEK ) ? "PEEK " :""), \ | |
37918 | + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :"") | |
37919 | +#endif | |
37920 | + | |
37921 | +#ifdef __sun__ | |
37922 | +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s" | |
37923 | + | |
37924 | +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \ | |
37925 | + (((x) & MSG_OOB ) ? "OOB " :""), \ | |
37926 | + (((x) & MSG_PEEK ) ? "PEEK " :""), \ | |
37927 | + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \ | |
37928 | + (((x) & MSG_EOR ) ? "EOR " :""), \ | |
37929 | + (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \ | |
37930 | + (((x) & MSG_TRUNC ) ? "TRUNC " :""), \ | |
37931 | + (((x) & MSG_WAITALL ) ? "WAITALL " :""), \ | |
37932 | + (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \ | |
37933 | + (((x) & MSG_NOTIFICATION) ? "NOTIFICATION" :"") | |
37934 | +#endif | |
37935 | + | |
37936 | +#endif /* __CI_TOOLS_LOG_H__ */ | |
37937 | +/*! \cidoxg_end */ | |
37938 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h | |
37939 | =================================================================== | |
37940 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
37941 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h 2008-02-20 09:32:49.000000000 +0100 | |
37942 | @@ -0,0 +1,361 @@ | |
37943 | +/**************************************************************************** | |
37944 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
37945 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
37946 | + * 9501 Jeronimo Road, Suite 250, | |
37947 | + * Irvine, CA 92618, USA | |
37948 | + * | |
37949 | + * Maintained by Solarflare Communications | |
37950 | + * <linux-xen-drivers@solarflare.com> | |
37951 | + * <onload-dev@solarflare.com> | |
37952 | + * | |
37953 | + * This program is free software; you can redistribute it and/or modify it | |
37954 | + * under the terms of the GNU General Public License version 2 as published | |
37955 | + * by the Free Software Foundation, incorporated herein by reference. | |
37956 | + * | |
37957 | + * This program is distributed in the hope that it will be useful, | |
37958 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
37959 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
37960 | + * GNU General Public License for more details. | |
37961 | + * | |
37962 | + * You should have received a copy of the GNU General Public License | |
37963 | + * along with this program; if not, write to the Free Software | |
37964 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
37965 | + **************************************************************************** | |
37966 | + */ | |
37967 | + | |
37968 | +/*! \cidoxg_include_ci_tools_platform */ | |
37969 | + | |
37970 | +#ifndef __CI_TOOLS_GCC_X86_H__ | |
37971 | +#define __CI_TOOLS_GCC_X86_H__ | |
37972 | + | |
37973 | + | |
37974 | +/********************************************************************** | |
37975 | + * Free-running cycle counters. | |
37976 | + */ | |
37977 | + | |
37978 | +#define CI_HAVE_FRC64 | |
37979 | +#define CI_HAVE_FRC32 | |
37980 | + | |
37981 | +#define ci_frc32(pval) __asm__ __volatile__("rdtsc" : "=a" (*pval) : : "edx") | |
37982 | + | |
37983 | +#if defined(__x86_64__) | |
37984 | +ci_inline void ci_frc64(ci_uint64* pval) { | |
37985 | + /* temp fix until we figure how to get this out in one bite */ | |
37986 | + ci_uint64 low, high; | |
37987 | + __asm__ __volatile__("rdtsc" : "=a" (low) , "=d" (high)); | |
37988 | + *pval = (high << 32) | low; | |
37989 | +} | |
37990 | + | |
37991 | +#else | |
37992 | +#define ci_frc64(pval) __asm__ __volatile__("rdtsc" : "=A" (*pval)) | |
37993 | +#endif | |
37994 | + | |
37995 | +#define ci_frc_flush() /* ?? Need a pipeline barrier. */ | |
37996 | + | |
37997 | + | |
37998 | +/********************************************************************** | |
37999 | + * Atomic integer. | |
38000 | + */ | |
38001 | + | |
38002 | +/* | |
38003 | +** int ci_atomic_read(a) { return a->n; } | |
38004 | +** void ci_atomic_set(a, v) { a->n = v; } | |
38005 | +** void ci_atomic_inc(a) { ++a->n; } | |
38006 | +** void ci_atomic_dec(a) { --a->n; } | |
38007 | +** int ci_atomic_inc_and_test(a) { return ++a->n == 0; } | |
38008 | +** int ci_atomic_dec_and_test(a) { return --a->n == 0; } | |
38009 | +** void ci_atomic_and(a, v) { a->n &= v; } | |
38010 | +** void ci_atomic_or(a, v) { a->n |= v; } | |
38011 | +*/ | |
38012 | + | |
38013 | +typedef struct { volatile ci_int32 n; } ci_atomic_t; | |
38014 | + | |
38015 | +#define CI_ATOMIC_INITIALISER(i) {(i)} | |
38016 | + | |
38017 | +static inline ci_int32 ci_atomic_read(const ci_atomic_t* a) { return a->n; } | |
38018 | +static inline void ci_atomic_set(ci_atomic_t* a, int v) { a->n = v; ci_wmb(); } | |
38019 | + | |
38020 | +static inline void ci_atomic_inc(ci_atomic_t* a) | |
38021 | +{ __asm__ __volatile__("lock; incl %0" : "+m" (a->n)); } | |
38022 | + | |
38023 | + | |
38024 | +static inline void ci_atomic_dec(ci_atomic_t* a) | |
38025 | +{ __asm__ __volatile__("lock; decl %0" : "+m" (a->n)); } | |
38026 | + | |
38027 | +static inline int ci_atomic_inc_and_test(ci_atomic_t* a) { | |
38028 | + char r; | |
38029 | + __asm__ __volatile__("lock; incl %0; sete %1" | |
38030 | + : "+m" (a->n), "=qm" (r)); | |
38031 | + return r; | |
38032 | +} | |
38033 | + | |
38034 | +static inline int ci_atomic_dec_and_test(ci_atomic_t* a) { | |
38035 | + char r; | |
38036 | + __asm__ __volatile__("lock; decl %0; sete %1" | |
38037 | + : "+m" (a->n), "=qm" (r)); | |
38038 | + return r; | |
38039 | +} | |
38040 | + | |
38041 | +ci_inline int | |
38042 | +ci_atomic_xadd (ci_atomic_t *a, int v) { | |
38043 | + __asm__ ("lock xadd %0, %1" : "=r" (v), "+m" (a->n) : "0" (v)); | |
38044 | + return v; | |
38045 | +} | |
38046 | +ci_inline int | |
38047 | +ci_atomic_xchg (ci_atomic_t *a, int v) { | |
38048 | + __asm__ ("lock xchg %0, %1" : "=r" (v), "+m" (a->n) : "0" (v)); | |
38049 | + return v; | |
38050 | +} | |
38051 | + | |
38052 | +ci_inline void ci_atomic32_or(volatile ci_uint32* p, ci_uint32 mask) | |
38053 | +{ __asm__ __volatile__("lock; orl %1, %0" : "+m" (*p) : "ir" (mask)); } | |
38054 | + | |
38055 | +ci_inline void ci_atomic32_and(volatile ci_uint32* p, ci_uint32 mask) | |
38056 | +{ __asm__ __volatile__("lock; andl %1, %0" : "+m" (*p) : "ir" (mask)); } | |
38057 | + | |
38058 | +ci_inline void ci_atomic32_add(volatile ci_uint32* p, ci_uint32 v) | |
38059 | +{ __asm__ __volatile__("lock; addl %1, %0" : "+m" (*p) : "ir" (v)); } | |
38060 | + | |
38061 | +#define ci_atomic_or(a, v) ci_atomic32_or ((ci_uint32*) &(a)->n, (v)) | |
38062 | +#define ci_atomic_and(a, v) ci_atomic32_and((ci_uint32*) &(a)->n, (v)) | |
38063 | +#define ci_atomic_add(a, v) ci_atomic32_add((ci_uint32*) &(a)->n, (v)) | |
38064 | + | |
38065 | +extern int ci_glibc_uses_nptl (void) CI_HF; | |
38066 | +extern int ci_glibc_nptl_broken(void) CI_HF; | |
38067 | +extern int ci_glibc_gs_get_is_multihreaded_offset (void) CI_HF; | |
38068 | +extern int ci_glibc_gs_is_multihreaded_offset CI_HV; | |
38069 | + | |
38070 | +#if !defined(__x86_64__) | |
38071 | +#ifdef __GLIBC__ | |
38072 | +/* Returns non-zero if the calling process might be mulithreaded, returns 0 if | |
38073 | + * it definitely isn't (i.e. if reimplementing this function for other | |
38074 | + * architectures and platforms, you can safely just return 1). | |
38075 | + */ | |
38076 | +static inline int ci_is_multithreaded (void) { | |
38077 | + | |
38078 | + while (1) { | |
38079 | + if (ci_glibc_gs_is_multihreaded_offset >= 0) { | |
38080 | + /* NPTL keeps a variable that tells us this hanging off gs (i.e. in thread- | |
38081 | + * local storage); just return this | |
38082 | + */ | |
38083 | + int r; | |
38084 | + __asm__ __volatile__ ("movl %%gs:(%1), %0" | |
38085 | + : "=r" (r) | |
38086 | + : "r" (ci_glibc_gs_is_multihreaded_offset)); | |
38087 | + return r; | |
38088 | + } | |
38089 | + | |
38090 | + if (ci_glibc_gs_is_multihreaded_offset == -2) { | |
38091 | + /* This means we've already determined that the libc version is NOT good | |
38092 | + * for our funky "is multithreaded" hack | |
38093 | + */ | |
38094 | + return 1; | |
38095 | + } | |
38096 | + | |
38097 | + /* If we get here, it means this is the first time the function has been | |
38098 | + * called -- detect the libc version and go around again. | |
38099 | + */ | |
38100 | + ci_glibc_gs_is_multihreaded_offset = ci_glibc_gs_get_is_multihreaded_offset (); | |
38101 | + | |
38102 | + /* Go around again. We do the test here rather than at the top so that we go | |
38103 | + * quicker in the common the case | |
38104 | + */ | |
38105 | + } | |
38106 | +} | |
38107 | + | |
38108 | +#else /* def __GLIBC__ */ | |
38109 | + | |
38110 | +#define ci_is_multithreaded() 1 /* ?? Is the the POSIX way of finding out */ | |
38111 | + /* whether the appication is single */ | |
38112 | + /* threaded? */ | |
38113 | + | |
38114 | +#endif /* def __GLIBC__ */ | |
38115 | + | |
38116 | +#else /* defined __x86_64__ */ | |
38117 | + | |
38118 | +static inline int ci_is_multithreaded (void) { | |
38119 | + /* Now easy way to tell on x86_64; so assume we're multithreaded */ | |
38120 | + return 1; | |
38121 | +} | |
38122 | + | |
38123 | +#endif /* defined __x86_64__ */ | |
38124 | + | |
38125 | + | |
38126 | +/********************************************************************** | |
38127 | + * Compare and swap. | |
38128 | + */ | |
38129 | + | |
38130 | +#define CI_HAVE_COMPARE_AND_SWAP | |
38131 | + | |
38132 | +ci_inline int ci_cas32_succeed(volatile ci_int32* p, ci_int32 oldval, | |
38133 | + ci_int32 newval) { | |
38134 | + char ret; | |
38135 | + ci_int32 prevval; | |
38136 | + __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0" | |
38137 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38138 | + : "r"(newval), "a"(oldval)); | |
38139 | + return ret; | |
38140 | +} | |
38141 | + | |
38142 | +ci_inline int ci_cas32_fail(volatile ci_int32* p, ci_int32 oldval, | |
38143 | + ci_int32 newval) { | |
38144 | + char ret; | |
38145 | + ci_int32 prevval; | |
38146 | + __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0" | |
38147 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38148 | + : "r"(newval), "a"(oldval)); | |
38149 | + return ret; | |
38150 | +} | |
38151 | + | |
38152 | +#ifdef __x86_64__ | |
38153 | +ci_inline int ci_cas64_succeed(volatile ci_int64* p, ci_int64 oldval, | |
38154 | + ci_int64 newval) { | |
38155 | + char ret; | |
38156 | + ci_int64 prevval; | |
38157 | + __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0" | |
38158 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38159 | + : "r"(newval), "a"(oldval)); | |
38160 | + return ret; | |
38161 | +} | |
38162 | + | |
38163 | +ci_inline int ci_cas64_fail(volatile ci_int64* p, ci_int64 oldval, | |
38164 | + ci_int64 newval) { | |
38165 | + char ret; | |
38166 | + ci_int64 prevval; | |
38167 | + __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0" | |
38168 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38169 | + : "r"(newval), "a"(oldval)); | |
38170 | + return ret; | |
38171 | +} | |
38172 | +#endif | |
38173 | + | |
38174 | +ci_inline int ci_cas32u_succeed(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) { | |
38175 | + char ret; | |
38176 | + ci_uint32 prevval; | |
38177 | + __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0" | |
38178 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38179 | + : "r"(newval), "a"(oldval)); | |
38180 | + return ret; | |
38181 | +} | |
38182 | + | |
38183 | +ci_inline int ci_cas32u_fail(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) { | |
38184 | + char ret; | |
38185 | + ci_uint32 prevval; | |
38186 | + __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0" | |
38187 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38188 | + : "r"(newval), "a"(oldval)); | |
38189 | + return ret; | |
38190 | +} | |
38191 | + | |
38192 | +ci_inline int ci_cas64u_succeed(volatile ci_uint64* p, ci_uint64 oldval, | |
38193 | + ci_uint64 newval) { | |
38194 | + char ret; | |
38195 | + ci_uint64 prevval; | |
38196 | + __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0" | |
38197 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38198 | + : "r"(newval), "a"(oldval)); | |
38199 | + return ret; | |
38200 | +} | |
38201 | + | |
38202 | +ci_inline int ci_cas64u_fail(volatile ci_uint64* p, ci_uint64 oldval, | |
38203 | + ci_uint64 newval) { | |
38204 | + char ret; | |
38205 | + ci_uint64 prevval; | |
38206 | + __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0" | |
38207 | + : "=q"(ret), "+m"(*p), "=a"(prevval) | |
38208 | + : "r"(newval), "a"(oldval)); | |
38209 | + return ret; | |
38210 | +} | |
38211 | + | |
38212 | +#ifdef __x86_64__ | |
38213 | + | |
38214 | +# define ci_cas_uintptr_succeed(p,o,n) \ | |
38215 | + ci_cas64u_succeed((volatile ci_uint64*) (p), (o), (n)) | |
38216 | +# define ci_cas_uintptr_fail(p,o,n) \ | |
38217 | + ci_cas64u_fail((volatile ci_uint64*) (p), (o), (n)) | |
38218 | + | |
38219 | +#else | |
38220 | + | |
38221 | +# define ci_cas_uintptr_succeed(p,o,n) \ | |
38222 | + ci_cas32u_succeed((volatile ci_uint32*) (p), (o), (n)) | |
38223 | +# define ci_cas_uintptr_fail(p,o,n) \ | |
38224 | + ci_cas32u_fail((volatile ci_uint32*) (p), (o), (n)) | |
38225 | + | |
38226 | +#endif | |
38227 | + | |
38228 | + | |
38229 | +/********************************************************************** | |
38230 | + * Atomic bit field. | |
38231 | + */ | |
38232 | + | |
38233 | +typedef ci_uint32 ci_bits; | |
38234 | +#define CI_BITS_N 32u | |
38235 | + | |
38236 | +#define CI_BITS_DECLARE(name, n) \ | |
38237 | + ci_bits name[((n) + CI_BITS_N - 1u) / CI_BITS_N] | |
38238 | + | |
38239 | +ci_inline void ci_bits_clear_all(volatile ci_bits* b, int n_bits) | |
38240 | +{ memset((void*) b, 0, (n_bits+CI_BITS_N-1u) / CI_BITS_N * sizeof(ci_bits)); } | |
38241 | + | |
38242 | +ci_inline void ci_bit_set(volatile ci_bits* b, int i) { | |
38243 | + __asm__ __volatile__("lock; btsl %1, %0" | |
38244 | + : "=m" (*b) | |
38245 | + : "Ir" (i)); | |
38246 | +} | |
38247 | + | |
38248 | +ci_inline void ci_bit_clear(volatile ci_bits* b, int i) { | |
38249 | + __asm__ __volatile__("lock; btrl %1, %0" | |
38250 | + : "=m" (*b) | |
38251 | + : "Ir" (i)); | |
38252 | +} | |
38253 | + | |
38254 | +ci_inline int ci_bit_test(volatile ci_bits* b, int i) { | |
38255 | + char rc; | |
38256 | + __asm__("btl %2, %1; setc %0" | |
38257 | + : "=r" (rc) | |
38258 | + : "m" (*b), "Ir" (i)); | |
38259 | + return rc; | |
38260 | +} | |
38261 | + | |
38262 | +ci_inline int ci_bit_test_and_set(volatile ci_bits* b, int i) { | |
38263 | + char rc; | |
38264 | + __asm__ __volatile__("lock; btsl %2, %1; setc %0" | |
38265 | + : "=r" (rc), "+m" (*b) | |
38266 | + : "Ir" (i)); | |
38267 | + return rc; | |
38268 | +} | |
38269 | + | |
38270 | +ci_inline int ci_bit_test_and_clear(volatile ci_bits* b, int i) { | |
38271 | + char rc; | |
38272 | + __asm__ __volatile__("lock; btrl %2, %1; setc %0" | |
38273 | + : "=r" (rc), "+m" (*b) | |
38274 | + : "Ir" (i)); | |
38275 | + return rc; | |
38276 | +} | |
38277 | + | |
38278 | +/* These mask ops only work within a single ci_bits word. */ | |
38279 | +#define ci_bit_mask_set(b,m) ci_atomic32_or((b), (m)) | |
38280 | +#define ci_bit_mask_clear(b,m) ci_atomic32_and((b), ~(m)) | |
38281 | + | |
38282 | + | |
38283 | +/********************************************************************** | |
38284 | + * Misc. | |
38285 | + */ | |
38286 | + | |
38287 | +#if __GNUC__ >= 3 | |
38288 | +# define ci_spinloop_pause() __asm__("pause") | |
38289 | +#else | |
38290 | +# define ci_spinloop_pause() __asm__(".byte 0xf3, 0x90") | |
38291 | +#endif | |
38292 | + | |
38293 | + | |
38294 | +#define CI_HAVE_ADDC32 | |
38295 | +#define ci_add_carry32(sum, v) __asm__("addl %1, %0 ;" \ | |
38296 | + "adcl $0, %0 ;" \ | |
38297 | + : "=r" (sum) \ | |
38298 | + : "g" ((ci_uint32) v), "0" (sum)) | |
38299 | + | |
38300 | + | |
38301 | +#endif /* __CI_TOOLS_GCC_X86_H__ */ | |
38302 | + | |
38303 | +/*! \cidoxg_end */ | |
38304 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h | |
38305 | =================================================================== | |
38306 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
38307 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2008-02-20 09:32:49.000000000 +0100 | |
38308 | @@ -0,0 +1,362 @@ | |
38309 | +/**************************************************************************** | |
38310 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
38311 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
38312 | + * 9501 Jeronimo Road, Suite 250, | |
38313 | + * Irvine, CA 92618, USA | |
38314 | + * | |
38315 | + * Maintained by Solarflare Communications | |
38316 | + * <linux-xen-drivers@solarflare.com> | |
38317 | + * <onload-dev@solarflare.com> | |
38318 | + * | |
38319 | + * This program is free software; you can redistribute it and/or modify it | |
38320 | + * under the terms of the GNU General Public License version 2 as published | |
38321 | + * by the Free Software Foundation, incorporated herein by reference. | |
38322 | + * | |
38323 | + * This program is distributed in the hope that it will be useful, | |
38324 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
38325 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
38326 | + * GNU General Public License for more details. | |
38327 | + * | |
38328 | + * You should have received a copy of the GNU General Public License | |
38329 | + * along with this program; if not, write to the Free Software | |
38330 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
38331 | + **************************************************************************** | |
38332 | + */ | |
38333 | + | |
38334 | + | |
38335 | +/*! \cidoxg_include_ci_tools_platform */ | |
38336 | + | |
38337 | +#ifndef __CI_TOOLS_LINUX_KERNEL_H__ | |
38338 | +#define __CI_TOOLS_LINUX_KERNEL_H__ | |
38339 | + | |
38340 | +/********************************************************************** | |
38341 | + * Need to know the kernel version. | |
38342 | + */ | |
38343 | + | |
38344 | +#ifndef LINUX_VERSION_CODE | |
38345 | +# include <linux/version.h> | |
38346 | +# ifndef UTS_RELEASE | |
38347 | + /* 2.6.18 onwards defines UTS_RELEASE in a separate header */ | |
38348 | +# include <linux/utsrelease.h> | |
38349 | +# endif | |
38350 | +#endif | |
38351 | + | |
38352 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) || \ | |
38353 | + LINUX_VERSION_CODE >= KERNEL_VERSION(2,7,0) | |
38354 | +# error "Linux 2.6 required" | |
38355 | +#endif | |
38356 | + | |
38357 | + | |
38358 | +#include <linux/slab.h> /* kmalloc / kfree */ | |
38359 | +#include <linux/vmalloc.h> /* vmalloc / vfree */ | |
38360 | +#include <linux/interrupt.h>/* in_interrupt() */ | |
38361 | +#include <linux/in.h> | |
38362 | +#include <linux/in6.h> | |
38363 | +#include <linux/spinlock.h> | |
38364 | +#include <linux/highmem.h> | |
38365 | +#include <linux/smp_lock.h> | |
38366 | +#include <linux/ctype.h> | |
38367 | +#include <linux/uio.h> | |
38368 | +#include <asm/current.h> | |
38369 | +#include <asm/errno.h> | |
38370 | +#include <asm/kmap_types.h> | |
38371 | +#include <asm/semaphore.h> | |
38372 | + | |
38373 | +#include <ci/tools/config.h> | |
38374 | + | |
38375 | +#define ci_in_irq in_irq | |
38376 | +#define ci_in_interrupt in_interrupt | |
38377 | +#define ci_in_atomic in_atomic | |
38378 | + | |
38379 | + | |
38380 | +/********************************************************************** | |
38381 | + * Misc stuff. | |
38382 | + */ | |
38383 | + | |
38384 | +#ifdef BUG | |
38385 | +# define CI_BOMB BUG | |
38386 | +#endif | |
38387 | + | |
38388 | +ci_inline void* __ci_alloc(size_t n) | |
38389 | +{ return kmalloc(n, (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)); } | |
38390 | + | |
38391 | +ci_inline void* __ci_atomic_alloc(size_t n) | |
38392 | +{ return kmalloc(n, GFP_ATOMIC ); } | |
38393 | + | |
38394 | +ci_inline void __ci_free(void* p) { return kfree(p); } | |
38395 | +ci_inline void* __ci_vmalloc(size_t n) { return vmalloc(n); } | |
38396 | +ci_inline void __ci_vfree(void* p) { return vfree(p); } | |
38397 | + | |
38398 | + | |
38399 | +#if CI_MEMLEAK_DEBUG_ALLOC_TABLE | |
38400 | + #define ci_alloc(s) ci_alloc_memleak_debug (s, __FILE__, __LINE__) | |
38401 | + #define ci_atomic_alloc(s) ci_atomic_alloc_memleak_debug(s, __FILE__, __LINE__) | |
38402 | + #define ci_free ci_free_memleak_debug | |
38403 | + #define ci_vmalloc(s) ci_vmalloc_memleak_debug (s, __FILE__,__LINE__) | |
38404 | + #define ci_vfree ci_vfree_memleak_debug | |
38405 | + #define ci_alloc_fn ci_alloc_fn_memleak_debug | |
38406 | + #define ci_vmalloc_fn ci_vmalloc_fn_memleak_debug | |
38407 | +#else /* !CI_MEMLEAK_DEBUG_ALLOC_TABLE */ | |
38408 | + #define ci_alloc_fn __ci_alloc | |
38409 | + #define ci_vmalloc_fn __ci_vmalloc | |
38410 | +#endif | |
38411 | + | |
38412 | +#ifndef ci_alloc | |
38413 | + #define ci_atomic_alloc __ci_atomic_alloc | |
38414 | + #define ci_alloc __ci_alloc | |
38415 | + #define ci_free __ci_free | |
38416 | + #define ci_vmalloc __ci_vmalloc | |
38417 | + #define ci_vmalloc_fn __ci_vmalloc | |
38418 | + #define ci_vfree __ci_vfree | |
38419 | +#endif | |
38420 | + | |
38421 | +#define ci_sprintf sprintf | |
38422 | +#define ci_vsprintf vsprintf | |
38423 | +#define ci_snprintf snprintf | |
38424 | +#define ci_vsnprintf vsnprintf | |
38425 | +#define ci_sscanf sscanf | |
38426 | + | |
38427 | + | |
38428 | +#define CI_LOG_FN_DEFAULT ci_log_syslog | |
38429 | + | |
38430 | + | |
38431 | +/*-------------------------------------------------------------------- | |
38432 | + * | |
38433 | + * irqs_disabled - needed for kmap helpers on some kernels | |
38434 | + * | |
38435 | + *--------------------------------------------------------------------*/ | |
38436 | +#ifdef irqs_disabled | |
38437 | +# define ci_irqs_disabled irqs_disabled | |
38438 | +#else | |
38439 | +# if defined(__i386__) | defined(__x86_64__) | |
38440 | +# define ci_irqs_disabled(x) \ | |
38441 | + ({ \ | |
38442 | + unsigned long flags; \ | |
38443 | + local_save_flags(flags); \ | |
38444 | + !(flags & (1<<9)); \ | |
38445 | + }) | |
38446 | +# else | |
38447 | +# error "Need to implement irqs_disabled() for your architecture" | |
38448 | +# endif | |
38449 | +#endif | |
38450 | + | |
38451 | + | |
38452 | +/********************************************************************** | |
38453 | + * kmap helpers. | |
38454 | + * | |
38455 | + * Use ci_k(un)map for code paths which are not in an atomic context. | |
38456 | + * For atomic code you need to use ci_k(un)map_in_atomic. This will grab | |
38457 | + * one of the per-CPU kmap slots. | |
38458 | + * | |
38459 | + * NB in_interrupt != in_irq. If you don't know the difference then | |
38460 | + * don't use kmap_in_atomic | |
38461 | + * | |
38462 | + * 2.4 allocates kmap slots by function. We are going to re-use the | |
38463 | + * skb module's slot - we also use the same interlock | |
38464 | + * | |
38465 | + * 2.6 allocates kmap slots by type as well as by function. We are | |
38466 | + * going to use the currently (2.6.10) unsused SOFTIRQ slot | |
38467 | + * | |
38468 | + */ | |
38469 | + | |
38470 | +ci_inline void* ci_kmap(struct page *page) { | |
38471 | + CI_DEBUG(if( ci_in_atomic() | ci_in_interrupt() | ci_in_irq() ) BUG()); | |
38472 | + return kmap(page); | |
38473 | +} | |
38474 | + | |
38475 | +ci_inline void ci_kunmap(struct page *page) { | |
38476 | + kunmap(page); | |
38477 | +} | |
38478 | + | |
38479 | +#define CI_KM_SLOT KM_SOFTIRQ0 | |
38480 | + | |
38481 | + | |
38482 | +typedef struct semaphore ci_semaphore_t; | |
38483 | + | |
38484 | +ci_inline void | |
38485 | +ci_sem_init (ci_semaphore_t *sem, int val) { | |
38486 | + sema_init (sem, val); | |
38487 | +} | |
38488 | + | |
38489 | +ci_inline void | |
38490 | +ci_sem_down (ci_semaphore_t *sem) { | |
38491 | + down (sem); | |
38492 | +} | |
38493 | + | |
38494 | +ci_inline int | |
38495 | +ci_sem_trydown (ci_semaphore_t *sem) { | |
38496 | + return down_trylock (sem); | |
38497 | +} | |
38498 | + | |
38499 | +ci_inline void | |
38500 | +ci_sem_up (ci_semaphore_t *sem) { | |
38501 | + up (sem); | |
38502 | +} | |
38503 | + | |
38504 | +ci_inline int | |
38505 | +ci_sem_get_count(ci_semaphore_t *sem) { | |
38506 | + return sem->count.counter; | |
38507 | +} | |
38508 | + | |
38509 | +ci_inline void* ci_kmap_in_atomic(struct page *page) | |
38510 | +{ | |
38511 | + CI_DEBUG(if( ci_in_irq() ) BUG()); | |
38512 | + | |
38513 | + /* iSCSI can call without in_interrupt() but with irqs_disabled() | |
38514 | + and in a context that can't sleep, so we need to check that | |
38515 | + too */ | |
38516 | + if(ci_in_interrupt() || ci_irqs_disabled()) | |
38517 | + return kmap_atomic(page, CI_KM_SLOT); | |
38518 | + else | |
38519 | + return kmap(page); | |
38520 | +} | |
38521 | + | |
38522 | +ci_inline void ci_kunmap_in_atomic(struct page *page, void* kaddr) | |
38523 | +{ | |
38524 | + CI_DEBUG(if( ci_in_irq() ) BUG()); | |
38525 | + | |
38526 | + /* iSCSI can call without in_interrupt() but with irqs_disabled() | |
38527 | + and in a context that can't sleep, so we need to check that | |
38528 | + too */ | |
38529 | + if(ci_in_interrupt() || ci_irqs_disabled()) | |
38530 | + kunmap_atomic(kaddr, CI_KM_SLOT); | |
38531 | + else | |
38532 | + kunmap(page); | |
38533 | +} | |
38534 | + | |
38535 | +/********************************************************************** | |
38536 | + * spinlock implementation: used by <ci/tools/spinlock.h> | |
38537 | + */ | |
38538 | + | |
38539 | +#define CI_HAVE_SPINLOCKS | |
38540 | + | |
38541 | +typedef ci_uintptr_t ci_lock_holder_t; | |
38542 | +#define ci_lock_thisthread (ci_lock_holder_t)current | |
38543 | +#define ci_lock_no_holder (ci_lock_holder_t)NULL | |
38544 | + | |
38545 | +typedef spinlock_t ci_lock_i; | |
38546 | +typedef spinlock_t ci_irqlock_i; | |
38547 | +typedef unsigned long ci_irqlock_state_t; | |
38548 | + | |
38549 | +#define IRQLOCK_CYCLES 500000 | |
38550 | + | |
38551 | +#define ci_lock_ctor_i(l) spin_lock_init(l) | |
38552 | +#define ci_lock_dtor_i(l) do{}while(0) | |
38553 | +#define ci_lock_lock_i(l) spin_lock(l) | |
38554 | +#define ci_lock_trylock_i(l) spin_trylock(l) | |
38555 | +#define ci_lock_unlock_i(l) spin_unlock(l) | |
38556 | + | |
38557 | +#define ci_irqlock_ctor_i(l) spin_lock_init(l) | |
38558 | +#define ci_irqlock_dtor_i(l) do{}while(0) | |
38559 | +#define ci_irqlock_lock_i(l,s) spin_lock_irqsave(l,*(s)) | |
38560 | +#define ci_irqlock_unlock_i(l,s) spin_unlock_irqrestore(l, *(s)) | |
38561 | + | |
38562 | + | |
38563 | +/********************************************************************** | |
38564 | + * register access | |
38565 | + */ | |
38566 | + | |
38567 | +#include <asm/io.h> | |
38568 | + | |
38569 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) | |
38570 | +typedef volatile void __iomem* ioaddr_t; | |
38571 | +#else | |
38572 | +typedef unsigned long ioaddr_t; | |
38573 | +#endif | |
38574 | + | |
38575 | + | |
38576 | + | |
38577 | +/********************************************************************** | |
38578 | + * thread implementation -- kernel dependancies probably should be | |
38579 | + * moved to driver/linux_kernel.h | |
38580 | + */ | |
38581 | + | |
38582 | +#define ci_linux_daemonize(name) daemonize(name) | |
38583 | + | |
38584 | +#include <linux/workqueue.h> | |
38585 | + | |
38586 | + | |
38587 | +typedef struct { | |
38588 | + void* (*fn)(void* arg); | |
38589 | + void* arg; | |
38590 | + const char* name; | |
38591 | + int thrd_id; | |
38592 | + struct completion exit_event; | |
38593 | + struct work_struct keventd_witem; | |
38594 | +} ci_kernel_thread_t; | |
38595 | + | |
38596 | + | |
38597 | +typedef ci_kernel_thread_t* cithread_t; | |
38598 | + | |
38599 | + | |
38600 | +extern int cithread_create(cithread_t* tid, void* (*fn)(void*), void* arg, | |
38601 | + const char* name); | |
38602 | +extern int cithread_detach(cithread_t kt); | |
38603 | +extern int cithread_join(cithread_t kt); | |
38604 | + | |
38605 | + | |
38606 | +/* Kernel sysctl variables. */ | |
38607 | +extern int sysctl_tcp_wmem[3]; | |
38608 | +extern int sysctl_tcp_rmem[3]; | |
38609 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) | |
38610 | +#define LINUX_HAS_SYSCTL_MEM_MAX | |
38611 | +extern ci_uint32 sysctl_wmem_max; | |
38612 | +extern ci_uint32 sysctl_rmem_max; | |
38613 | +#endif | |
38614 | + | |
38615 | + | |
38616 | +/*-------------------------------------------------------------------- | |
38617 | + * | |
38618 | + * ci_bigbuf_t: An abstraction of a large buffer. Needed because in the | |
38619 | + * Linux kernel, large buffers need to be allocated with vmalloc(), whereas | |
38620 | + * smaller buffers should use kmalloc(). This abstraction chooses the | |
38621 | + * appropriate mechansim. | |
38622 | + * | |
38623 | + *--------------------------------------------------------------------*/ | |
38624 | + | |
38625 | +typedef struct { | |
38626 | + char* p; | |
38627 | + int is_vmalloc; | |
38628 | +} ci_bigbuf_t; | |
38629 | + | |
38630 | + | |
38631 | +ci_inline int ci_bigbuf_alloc(ci_bigbuf_t* bb, size_t bytes) { | |
38632 | + if( bytes >= CI_PAGE_SIZE && ! ci_in_atomic() ) { | |
38633 | + bb->is_vmalloc = 1; | |
38634 | + if( (bb->p = vmalloc(bytes)) ) return 0; | |
38635 | + } | |
38636 | + bb->is_vmalloc = 0; | |
38637 | + bb->p = kmalloc(bytes, ci_in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); | |
38638 | + return bb->p ? 0 : -ENOMEM; | |
38639 | +} | |
38640 | + | |
38641 | +ci_inline void ci_bigbuf_free(ci_bigbuf_t* bb) { | |
38642 | + if( bb->is_vmalloc ) vfree(bb->p); | |
38643 | + else kfree(bb->p); | |
38644 | +} | |
38645 | + | |
38646 | +ci_inline char* ci_bigbuf_ptr(ci_bigbuf_t* bb) | |
38647 | +{ return bb->p; } | |
38648 | + | |
38649 | +/********************************************************************** | |
38650 | + * struct iovec abstraction (for Windows port) | |
38651 | + */ | |
38652 | + | |
38653 | +typedef struct iovec ci_iovec; | |
38654 | + | |
38655 | +/* Accessors for buffer/length */ | |
38656 | +#define CI_IOVEC_BASE(i) ((i)->iov_base) | |
38657 | +#define CI_IOVEC_LEN(i) ((i)->iov_len) | |
38658 | + | |
38659 | +/********************************************************************** | |
38660 | + * Signals | |
38661 | + */ | |
38662 | + | |
38663 | +ci_inline void | |
38664 | +ci_send_sig(int signum) | |
38665 | +{ | |
38666 | + send_sig(signum, current, 0); | |
38667 | +} | |
38668 | + | |
38669 | +#endif /* __CI_TOOLS_LINUX_KERNEL_H__ */ | |
38670 | +/*! \cidoxg_end */ | |
38671 | Index: head-2008-11-25/drivers/xen/sfc_netback/ci/tools/sysdep.h | |
38672 | =================================================================== | |
38673 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
38674 | +++ head-2008-11-25/drivers/xen/sfc_netback/ci/tools/sysdep.h 2008-02-20 09:32:49.000000000 +0100 | |
38675 | @@ -0,0 +1,132 @@ | |
38676 | +/**************************************************************************** | |
38677 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
38678 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
38679 | + * 9501 Jeronimo Road, Suite 250, | |
38680 | + * Irvine, CA 92618, USA | |
38681 | + * | |
38682 | + * Maintained by Solarflare Communications | |
38683 | + * <linux-xen-drivers@solarflare.com> | |
38684 | + * <onload-dev@solarflare.com> | |
38685 | + * | |
38686 | + * This program is free software; you can redistribute it and/or modify it | |
38687 | + * under the terms of the GNU General Public License version 2 as published | |
38688 | + * by the Free Software Foundation, incorporated herein by reference. | |
38689 | + * | |
38690 | + * This program is distributed in the hope that it will be useful, | |
38691 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
38692 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
38693 | + * GNU General Public License for more details. | |
38694 | + * | |
38695 | + * You should have received a copy of the GNU General Public License | |
38696 | + * along with this program; if not, write to the Free Software | |
38697 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
38698 | + **************************************************************************** | |
38699 | + */ | |
38700 | + | |
38701 | +/*! \cidoxg_include_ci_tools */ | |
38702 | + | |
38703 | +#ifndef __CI_TOOLS_SYSDEP_H__ | |
38704 | +#define __CI_TOOLS_SYSDEP_H__ | |
38705 | + | |
38706 | +/* Make this header self-sufficient */ | |
38707 | +#include <ci/compat.h> | |
38708 | +#include <ci/tools/log.h> | |
38709 | +#include <ci/tools/debug.h> | |
38710 | + | |
38711 | + | |
38712 | +/********************************************************************** | |
38713 | + * Platform dependencies. | |
38714 | + */ | |
38715 | + | |
38716 | +#if defined(__KERNEL__) | |
38717 | + | |
38718 | +# if defined(__linux__) | |
38719 | +# include <ci/tools/platform/linux_kernel.h> | |
38720 | +# elif defined(_WIN32) | |
38721 | +# include <ci/tools/platform/win32_kernel.h> | |
38722 | +# elif defined(__sun__) | |
38723 | +# include <ci/tools/platform/sunos_kernel.h> | |
38724 | +# else | |
38725 | +# error Unknown platform. | |
38726 | +# endif | |
38727 | + | |
38728 | +#elif defined(_WIN32) | |
38729 | + | |
38730 | +# include <ci/tools/platform/win32.h> | |
38731 | + | |
38732 | +#elif defined(__unix__) | |
38733 | + | |
38734 | +# include <ci/tools/platform/unix.h> | |
38735 | + | |
38736 | +#else | |
38737 | + | |
38738 | +# error Unknown platform. | |
38739 | + | |
38740 | +#endif | |
38741 | + | |
38742 | +#if defined(__linux__) | |
38743 | +/*! Linux sendfile() support enable/disable. */ | |
38744 | +# define CI_HAVE_SENDFILE /* provide sendfile i/f */ | |
38745 | + | |
38746 | +# define CI_HAVE_OS_NOPAGE | |
38747 | +#endif | |
38748 | + | |
38749 | +#if defined(__sun__) | |
38750 | +# define CI_HAVE_SENDFILE /* provide sendfile i/f */ | |
38751 | +# define CI_HAVE_SENDFILEV /* provide sendfilev i/f */ | |
38752 | + | |
38753 | +# define CI_IOCTL_SENDFILE /* use efrm CI_SENDFILEV ioctl */ | |
38754 | +#endif | |
38755 | + | |
38756 | +#if defined(_WIN32) | |
38757 | +typedef ci_uint32 ci_uerr_t; /* range of OS user-mode return codes */ | |
38758 | +typedef ci_uint32 ci_kerr_t; /* range of OS kernel-mode return codes */ | |
38759 | +#elif defined(__unix__) | |
38760 | +typedef ci_int32 ci_uerr_t; /* range of OS user-mode return codes */ | |
38761 | +typedef ci_int32 ci_kerr_t; /* range of OS kernel-mode return codes */ | |
38762 | +#endif | |
38763 | + | |
38764 | + | |
38765 | +/********************************************************************** | |
38766 | + * Compiler and processor dependencies. | |
38767 | + */ | |
38768 | + | |
38769 | +#if defined(__GNUC__) | |
38770 | + | |
38771 | +#if defined(__i386__) || defined(__x86_64__) | |
38772 | +# include <ci/tools/platform/gcc_x86.h> | |
38773 | +#elif defined(__PPC__) | |
38774 | +# include <ci/tools/platform/gcc_ppc.h> | |
38775 | +#elif defined(__ia64__) | |
38776 | +# include <ci/tools/platform/gcc_ia64.h> | |
38777 | +#else | |
38778 | +# error Unknown processor. | |
38779 | +#endif | |
38780 | + | |
38781 | +#elif defined(_MSC_VER) | |
38782 | + | |
38783 | +#if defined(__i386__) | |
38784 | +# include <ci/tools/platform/msvc_x86.h> | |
38785 | +# elif defined(__x86_64__) | |
38786 | +# include <ci/tools/platform/msvc_x86_64.h> | |
38787 | +#else | |
38788 | +# error Unknown processor. | |
38789 | +#endif | |
38790 | + | |
38791 | +#elif defined(__PGI) | |
38792 | + | |
38793 | +# include <ci/tools/platform/pg_x86.h> | |
38794 | + | |
38795 | +#elif defined(__INTEL_COMPILER) | |
38796 | + | |
38797 | +/* Intel compilers v7 claim to be very gcc compatible. */ | |
38798 | +# include <ci/tools/platform/gcc_x86.h> | |
38799 | + | |
38800 | +#else | |
38801 | +# error Unknown compiler. | |
38802 | +#endif | |
38803 | + | |
38804 | + | |
38805 | +#endif /* __CI_TOOLS_SYSDEP_H__ */ | |
38806 | + | |
38807 | +/*! \cidoxg_end */ | |
38808 | Index: head-2008-11-25/drivers/xen/sfc_netfront/Makefile | |
38809 | =================================================================== | |
38810 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
38811 | +++ head-2008-11-25/drivers/xen/sfc_netfront/Makefile 2008-02-26 10:54:11.000000000 +0100 | |
38812 | @@ -0,0 +1,11 @@ | |
38813 | +EXTRA_CFLAGS += -Idrivers/xen/sfc_netfront -Idrivers/xen/sfc_netutil -Idrivers/xen/netfront | |
38814 | +EXTRA_CFLAGS += -D__ci_driver__ | |
38815 | +EXTRA_CFLAGS += -Werror | |
38816 | + | |
38817 | +ifdef GCOV | |
38818 | +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV | |
38819 | +endif | |
38820 | + | |
38821 | +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) := sfc_netfront.o | |
38822 | + | |
38823 | +sfc_netfront-objs := accel_msg.o accel_bufs.o accel_netfront.o accel_vi.o accel_xenbus.o accel_tso.o accel_ssr.o accel_debugfs.o falcon_event.o falcon_vi.o pt_tx.o vi_init.o | |
38824 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel.h | |
38825 | =================================================================== | |
38826 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
38827 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel.h 2008-02-26 10:54:11.000000000 +0100 | |
38828 | @@ -0,0 +1,477 @@ | |
38829 | +/**************************************************************************** | |
38830 | + * Solarflare driver for Xen network acceleration | |
38831 | + * | |
38832 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
38833 | + * 9501 Jeronimo Road, Suite 250, | |
38834 | + * Irvine, CA 92618, USA | |
38835 | + * | |
38836 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
38837 | + * | |
38838 | + * This program is free software; you can redistribute it and/or modify it | |
38839 | + * under the terms of the GNU General Public License version 2 as published | |
38840 | + * by the Free Software Foundation, incorporated herein by reference. | |
38841 | + * | |
38842 | + * This program is distributed in the hope that it will be useful, | |
38843 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
38844 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
38845 | + * GNU General Public License for more details. | |
38846 | + * | |
38847 | + * You should have received a copy of the GNU General Public License | |
38848 | + * along with this program; if not, write to the Free Software | |
38849 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
38850 | + **************************************************************************** | |
38851 | + */ | |
38852 | + | |
38853 | +#ifndef NETFRONT_ACCEL_H | |
38854 | +#define NETFRONT_ACCEL_H | |
38855 | + | |
38856 | +#include "accel_msg_iface.h" | |
38857 | +#include "accel_cuckoo_hash.h" | |
38858 | +#include "accel_bufs.h" | |
38859 | + | |
38860 | +#include "etherfabric/ef_vi.h" | |
38861 | + | |
38862 | +#include <xen/xenbus.h> | |
38863 | +#include <xen/evtchn.h> | |
38864 | + | |
38865 | +#include <linux/kernel.h> | |
38866 | +#include <linux/list.h> | |
38867 | + | |
38868 | +enum netfront_accel_post_status { | |
38869 | + NETFRONT_ACCEL_STATUS_GOOD, | |
38870 | + NETFRONT_ACCEL_STATUS_BUSY, | |
38871 | + NETFRONT_ACCEL_STATUS_CANT | |
38872 | +}; | |
38873 | + | |
38874 | +#define NETFRONT_ACCEL_STATS 1 | |
38875 | +#if NETFRONT_ACCEL_STATS | |
38876 | +#define NETFRONT_ACCEL_STATS_OP(x) x | |
38877 | +#else | |
38878 | +#define NETFRONT_ACCEL_STATS_OP(x) | |
38879 | +#endif | |
38880 | + | |
38881 | + | |
38882 | +enum netfront_accel_msg_state { | |
38883 | + NETFRONT_ACCEL_MSG_NONE = 0, | |
38884 | + NETFRONT_ACCEL_MSG_HELLO = 1, | |
38885 | + NETFRONT_ACCEL_MSG_HW = 2 | |
38886 | +}; | |
38887 | + | |
38888 | + | |
38889 | +typedef struct { | |
38890 | + u32 in_progress; | |
38891 | + u32 total_len; | |
38892 | + struct sk_buff *skb; | |
38893 | +} netfront_accel_jumbo_state; | |
38894 | + | |
38895 | + | |
38896 | +struct netfront_accel_ssr_state { | |
38897 | + /** List of tracked connections. */ | |
38898 | + struct list_head conns; | |
38899 | + | |
38900 | + /** Free efx_ssr_conn instances. */ | |
38901 | + struct list_head free_conns; | |
38902 | +}; | |
38903 | + | |
38904 | + | |
38905 | +struct netfront_accel_netdev_stats { | |
38906 | + /* Fastpath stats. */ | |
38907 | + u32 fastpath_rx_pkts; | |
38908 | + u32 fastpath_rx_bytes; | |
38909 | + u32 fastpath_rx_errors; | |
38910 | + u32 fastpath_tx_pkts; | |
38911 | + u32 fastpath_tx_bytes; | |
38912 | + u32 fastpath_tx_errors; | |
38913 | +}; | |
38914 | + | |
38915 | + | |
38916 | +struct netfront_accel_netdev_dbfs { | |
38917 | + struct dentry *fastpath_rx_pkts; | |
38918 | + struct dentry *fastpath_rx_bytes; | |
38919 | + struct dentry *fastpath_rx_errors; | |
38920 | + struct dentry *fastpath_tx_pkts; | |
38921 | + struct dentry *fastpath_tx_bytes; | |
38922 | + struct dentry *fastpath_tx_errors; | |
38923 | +}; | |
38924 | + | |
38925 | + | |
38926 | +struct netfront_accel_stats { | |
38927 | + /** Fast path events */ | |
38928 | + u64 fastpath_tx_busy; | |
38929 | + | |
38930 | + /** TX DMA queue status */ | |
38931 | + u64 fastpath_tx_completions; | |
38932 | + | |
38933 | + /** The number of events processed. */ | |
38934 | + u64 event_count; | |
38935 | + | |
38936 | + /** Number of frame trunc events seen on fastpath */ | |
38937 | + u64 fastpath_frm_trunc; | |
38938 | + | |
38939 | + /** Number of no rx descriptor trunc events seen on fastpath */ | |
38940 | + u64 rx_no_desc_trunc; | |
38941 | + | |
38942 | + /** The number of misc bad events (e.g. RX_DISCARD) processed. */ | |
38943 | + u64 bad_event_count; | |
38944 | + | |
38945 | + /** Number of events dealt with in poll loop */ | |
38946 | + u32 events_per_poll_max; | |
38947 | + u32 events_per_poll_tx_max; | |
38948 | + u32 events_per_poll_rx_max; | |
38949 | + | |
38950 | + /** Largest number of concurrently outstanding tx descriptors */ | |
38951 | + u32 fastpath_tx_pending_max; | |
38952 | + | |
38953 | + /** The number of events since the last interrupts. */ | |
38954 | + u32 event_count_since_irq; | |
38955 | + | |
38956 | + /** The max number of events between interrupts. */ | |
38957 | + u32 events_per_irq_max; | |
38958 | + | |
38959 | + /** The number of interrupts. */ | |
38960 | + u64 irq_count; | |
38961 | + | |
38962 | + /** The number of useless interrupts. */ | |
38963 | + u64 useless_irq_count; | |
38964 | + | |
38965 | + /** The number of polls scheduled. */ | |
38966 | + u64 poll_schedule_count; | |
38967 | + | |
38968 | + /** The number of polls called. */ | |
38969 | + u64 poll_call_count; | |
38970 | + | |
38971 | + /** The number of rechecks. */ | |
38972 | + u64 poll_reschedule_count; | |
38973 | + | |
38974 | + /** Number of times we've called netif_stop_queue/netif_wake_queue */ | |
38975 | + u64 queue_stops; | |
38976 | + u64 queue_wakes; | |
38977 | + | |
38978 | + /** SSR stats */ | |
38979 | + u64 ssr_bursts; | |
38980 | + u64 ssr_drop_stream; | |
38981 | + u64 ssr_misorder; | |
38982 | + u64 ssr_slow_start; | |
38983 | + u64 ssr_merges; | |
38984 | + u64 ssr_too_many; | |
38985 | + u64 ssr_new_stream; | |
38986 | +}; | |
38987 | + | |
38988 | + | |
38989 | +struct netfront_accel_dbfs { | |
38990 | + struct dentry *fastpath_tx_busy; | |
38991 | + struct dentry *fastpath_tx_completions; | |
38992 | + struct dentry *fastpath_tx_pending_max; | |
38993 | + struct dentry *fastpath_frm_trunc; | |
38994 | + struct dentry *rx_no_desc_trunc; | |
38995 | + struct dentry *event_count; | |
38996 | + struct dentry *bad_event_count; | |
38997 | + struct dentry *events_per_poll_max; | |
38998 | + struct dentry *events_per_poll_rx_max; | |
38999 | + struct dentry *events_per_poll_tx_max; | |
39000 | + struct dentry *event_count_since_irq; | |
39001 | + struct dentry *events_per_irq_max; | |
39002 | + struct dentry *irq_count; | |
39003 | + struct dentry *useless_irq_count; | |
39004 | + struct dentry *poll_schedule_count; | |
39005 | + struct dentry *poll_call_count; | |
39006 | + struct dentry *poll_reschedule_count; | |
39007 | + struct dentry *queue_stops; | |
39008 | + struct dentry *queue_wakes; | |
39009 | + struct dentry *ssr_bursts; | |
39010 | + struct dentry *ssr_drop_stream; | |
39011 | + struct dentry *ssr_misorder; | |
39012 | + struct dentry *ssr_slow_start; | |
39013 | + struct dentry *ssr_merges; | |
39014 | + struct dentry *ssr_too_many; | |
39015 | + struct dentry *ssr_new_stream; | |
39016 | +}; | |
39017 | + | |
39018 | + | |
39019 | +typedef struct netfront_accel_vnic { | |
39020 | + struct netfront_accel_vnic *next; | |
39021 | + | |
39022 | + struct mutex vnic_mutex; | |
39023 | + | |
39024 | + spinlock_t tx_lock; | |
39025 | + | |
39026 | + struct netfront_accel_bufpages bufpages; | |
39027 | + struct netfront_accel_bufinfo *rx_bufs; | |
39028 | + struct netfront_accel_bufinfo *tx_bufs; | |
39029 | + | |
39030 | + /** Hardware & VI state */ | |
39031 | + ef_vi vi; | |
39032 | + | |
39033 | + ef_vi_state *vi_state; | |
39034 | + | |
39035 | + ef_eventq_state evq_state; | |
39036 | + | |
39037 | + void *evq_mapping; | |
39038 | + | |
39039 | + /** Hardware dependant state */ | |
39040 | + union { | |
39041 | + struct { | |
39042 | + /** Falcon A or B */ | |
39043 | + enum net_accel_hw_type type; | |
39044 | + u32 *evq_rptr; | |
39045 | + u32 *doorbell; | |
39046 | + void *evq_rptr_mapping; | |
39047 | + void *doorbell_mapping; | |
39048 | + void *txdmaq_mapping; | |
39049 | + void *rxdmaq_mapping; | |
39050 | + } falcon; | |
39051 | + } hw; | |
39052 | + | |
39053 | + /** RX DMA queue status */ | |
39054 | + u32 rx_dma_level; | |
39055 | + | |
39056 | + /** Number of RX descriptors waiting to be pushed to the card. */ | |
39057 | + u32 rx_dma_batched; | |
39058 | +#define NETFRONT_ACCEL_RX_DESC_BATCH 16 | |
39059 | + | |
39060 | + /** | |
39061 | + * Hash table of remote mac addresses to decide whether to try | |
39062 | + * fast path | |
39063 | + */ | |
39064 | + cuckoo_hash_table fastpath_table; | |
39065 | + spinlock_t table_lock; | |
39066 | + | |
39067 | + /** the local mac address of virtual interface we're accelerating */ | |
39068 | + u8 mac[ETH_ALEN]; | |
39069 | + | |
39070 | + int rx_pkt_stride; | |
39071 | + int rx_skb_stride; | |
39072 | + | |
39073 | + /** | |
39074 | + * Keep track of fragments of jumbo packets as events are | |
39075 | + * delivered by NIC | |
39076 | + */ | |
39077 | + netfront_accel_jumbo_state jumbo_state; | |
39078 | + | |
39079 | + struct net_device *net_dev; | |
39080 | + | |
39081 | + /** These two gate the enabling of fast path operations */ | |
39082 | + int frontend_ready; | |
39083 | + int backend_netdev_up; | |
39084 | + | |
39085 | + int irq_enabled; | |
39086 | + spinlock_t irq_enabled_lock; | |
39087 | + | |
39088 | + int tx_enabled; | |
39089 | + | |
39090 | + int poll_enabled; | |
39091 | + | |
39092 | + /** A spare slot for a TX packet. This is treated as an extension | |
39093 | + * of the DMA queue. */ | |
39094 | + struct sk_buff *tx_skb; | |
39095 | + | |
39096 | + /** Keep track of fragments of SSR packets */ | |
39097 | + struct netfront_accel_ssr_state ssr_state; | |
39098 | + | |
39099 | + struct xenbus_device *dev; | |
39100 | + | |
39101 | + /** Event channel for messages */ | |
39102 | + int msg_channel; | |
39103 | + int msg_channel_irq; | |
39104 | + | |
39105 | + /** Event channel for network interrupts. */ | |
39106 | + int net_channel; | |
39107 | + int net_channel_irq; | |
39108 | + | |
39109 | + struct net_accel_shared_page *shared_page; | |
39110 | + | |
39111 | + grant_ref_t ctrl_page_gnt; | |
39112 | + grant_ref_t msg_page_gnt; | |
39113 | + | |
39114 | + /** Message Qs, 1 each way. */ | |
39115 | + sh_msg_fifo2 to_dom0; | |
39116 | + sh_msg_fifo2 from_dom0; | |
39117 | + | |
39118 | + enum netfront_accel_msg_state msg_state; | |
39119 | + | |
39120 | + /** Watch on accelstate */ | |
39121 | + struct xenbus_watch backend_accel_watch; | |
39122 | + /** Watch on frontend's MAC address */ | |
39123 | + struct xenbus_watch mac_address_watch; | |
39124 | + | |
39125 | + /** Work to process received irq/msg */ | |
39126 | + struct work_struct msg_from_bend; | |
39127 | + | |
39128 | + /** Wait queue for changes in accelstate. */ | |
39129 | + wait_queue_head_t state_wait_queue; | |
39130 | + | |
39131 | + /** The current accelstate of this driver. */ | |
39132 | + XenbusState frontend_state; | |
39133 | + | |
39134 | + /** The most recent accelstate seen by the xenbus watch. */ | |
39135 | + XenbusState backend_state; | |
39136 | + | |
39137 | + /** Non-zero if we should reject requests to connect. */ | |
39138 | + int removing; | |
39139 | + | |
39140 | + /** Non-zero if the domU shared state has been initialised. */ | |
39141 | + int domU_state_is_setup; | |
39142 | + | |
39143 | + /** Non-zero if the dom0 shared state has been initialised. */ | |
39144 | + int dom0_state_is_setup; | |
39145 | + | |
39146 | + /* Those statistics that are added to the netdev stats */ | |
39147 | + struct netfront_accel_netdev_stats netdev_stats; | |
39148 | + struct netfront_accel_netdev_stats stats_last_read; | |
39149 | +#ifdef CONFIG_DEBUG_FS | |
39150 | + struct netfront_accel_netdev_dbfs netdev_dbfs; | |
39151 | +#endif | |
39152 | + | |
39153 | + /* These statistics are internal and optional */ | |
39154 | +#if NETFRONT_ACCEL_STATS | |
39155 | + struct netfront_accel_stats stats; | |
39156 | +#ifdef CONFIG_DEBUG_FS | |
39157 | + struct netfront_accel_dbfs dbfs; | |
39158 | +#endif | |
39159 | +#endif | |
39160 | + | |
39161 | + /** Debufs fs dir for this interface */ | |
39162 | + struct dentry *dbfs_dir; | |
39163 | +} netfront_accel_vnic; | |
39164 | + | |
39165 | + | |
39166 | +/* Module parameters */ | |
39167 | +extern unsigned sfc_netfront_max_pages; | |
39168 | +extern unsigned sfc_netfront_buffer_split; | |
39169 | + | |
39170 | +extern const char *frontend_name; | |
39171 | +extern struct netfront_accel_hooks accel_hooks; | |
39172 | +extern struct workqueue_struct *netfront_accel_workqueue; | |
39173 | + | |
39174 | + | |
39175 | +extern | |
39176 | +void netfront_accel_vi_ctor(netfront_accel_vnic *vnic); | |
39177 | + | |
39178 | +extern | |
39179 | +int netfront_accel_vi_init(netfront_accel_vnic *vnic, | |
39180 | + struct net_accel_msg_hw *hw_msg); | |
39181 | + | |
39182 | +extern | |
39183 | +void netfront_accel_vi_dtor(netfront_accel_vnic *vnic); | |
39184 | + | |
39185 | + | |
39186 | +/** | |
39187 | + * Add new buffers which have been registered with the NIC. | |
39188 | + * | |
39189 | + * @v vnic The vnic instance to process the response. | |
39190 | + * | |
39191 | + * The buffers contained in the message are added to the buffer pool. | |
39192 | + */ | |
39193 | +extern | |
39194 | +void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx); | |
39195 | + | |
39196 | +/** | |
39197 | + * Put a packet on the tx DMA queue. | |
39198 | + * | |
39199 | + * @v vnic The vnic instance to accept the packet. | |
39200 | + * @v skb A sk_buff to send. | |
39201 | + * | |
39202 | + * Attempt to send a packet. On success, the skb is owned by the DMA | |
39203 | + * queue and will be released when the completion event arrives. | |
39204 | + */ | |
39205 | +extern enum netfront_accel_post_status | |
39206 | +netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, | |
39207 | + struct sk_buff *skb); | |
39208 | + | |
39209 | + | |
39210 | +/** | |
39211 | + * Process events in response to an interrupt. | |
39212 | + * | |
39213 | + * @v vnic The vnic instance to poll. | |
39214 | + * @v rx_packets The maximum number of rx packets to process. | |
39215 | + * @ret rx_done The number of rx packets processed. | |
39216 | + * | |
39217 | + * The vnic will process events until there are no more events | |
39218 | + * remaining or the specified number of rx packets has been processed. | |
39219 | + * The split from the interrupt call is to allow Linux NAPI | |
39220 | + * polling. | |
39221 | + */ | |
39222 | +extern | |
39223 | +int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets); | |
39224 | + | |
39225 | + | |
39226 | +/** | |
39227 | + * Iterate over the fragments of a packet buffer. | |
39228 | + * | |
39229 | + * @v skb The packet buffer to examine. | |
39230 | + * @v idx A variable name for the fragment index. | |
39231 | + * @v data A variable name for the address of the fragment data. | |
39232 | + * @v length A variable name for the fragment length. | |
39233 | + * @v code A section of code to execute for each fragment. | |
39234 | + * | |
39235 | + * This macro iterates over the fragments in a packet buffer and | |
39236 | + * executes the code for each of them. | |
39237 | + */ | |
39238 | +#define NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT(skb, frag_idx, \ | |
39239 | + frag_data, frag_len, \ | |
39240 | + code) \ | |
39241 | + do { \ | |
39242 | + int frag_idx; \ | |
39243 | + void *frag_data; \ | |
39244 | + unsigned int frag_len; \ | |
39245 | + \ | |
39246 | + frag_data = skb->data; \ | |
39247 | + frag_len = skb_headlen(skb); \ | |
39248 | + frag_idx = 0; \ | |
39249 | + while (1) { /* For each fragment */ \ | |
39250 | + code; \ | |
39251 | + if (frag_idx >= skb_shinfo(skb)->nr_frags) { \ | |
39252 | + break; \ | |
39253 | + } else { \ | |
39254 | + skb_frag_t *fragment; \ | |
39255 | + fragment = &skb_shinfo(skb)->frags[frag_idx]; \ | |
39256 | + frag_len = fragment->size; \ | |
39257 | + frag_data = ((void*)page_address(fragment->page) \ | |
39258 | + + fragment->page_offset); \ | |
39259 | + }; \ | |
39260 | + frag_idx++; \ | |
39261 | + } \ | |
39262 | + } while(0) | |
39263 | + | |
39264 | +static inline | |
39265 | +void netfront_accel_disable_net_interrupts(netfront_accel_vnic *vnic) | |
39266 | +{ | |
39267 | + mask_evtchn(vnic->net_channel); | |
39268 | +} | |
39269 | + | |
39270 | +static inline | |
39271 | +void netfront_accel_enable_net_interrupts(netfront_accel_vnic *vnic) | |
39272 | +{ | |
39273 | + unmask_evtchn(vnic->net_channel); | |
39274 | +} | |
39275 | + | |
39276 | +void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac, | |
39277 | + u32 ip, u16 port, u8 protocol); | |
39278 | + | |
39279 | +/* Process an IRQ received from back end driver */ | |
39280 | +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, | |
39281 | + struct pt_regs *unused); | |
39282 | +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, | |
39283 | + struct pt_regs *unused); | |
39284 | + | |
39285 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
39286 | +extern void netfront_accel_msg_from_bend(struct work_struct *context); | |
39287 | +#else | |
39288 | +extern void netfront_accel_msg_from_bend(void *context); | |
39289 | +#endif | |
39290 | + | |
39291 | +extern void vnic_stop_fastpath(netfront_accel_vnic *vnic); | |
39292 | + | |
39293 | +extern int netfront_accel_probe(struct net_device *net_dev, | |
39294 | + struct xenbus_device *dev); | |
39295 | +extern int netfront_accel_remove(struct xenbus_device *dev); | |
39296 | +extern void netfront_accel_set_closing(netfront_accel_vnic *vnic); | |
39297 | + | |
39298 | +extern int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic); | |
39299 | + | |
39300 | +extern void netfront_accel_debugfs_init(void); | |
39301 | +extern void netfront_accel_debugfs_fini(void); | |
39302 | +extern int netfront_accel_debugfs_create(netfront_accel_vnic *vnic); | |
39303 | +extern int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic); | |
39304 | + | |
39305 | +#endif /* NETFRONT_ACCEL_H */ | |
39306 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.c | |
39307 | =================================================================== | |
39308 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
39309 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.c 2008-02-26 10:54:12.000000000 +0100 | |
39310 | @@ -0,0 +1,393 @@ | |
39311 | +/**************************************************************************** | |
39312 | + * Solarflare driver for Xen network acceleration | |
39313 | + * | |
39314 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
39315 | + * 9501 Jeronimo Road, Suite 250, | |
39316 | + * Irvine, CA 92618, USA | |
39317 | + * | |
39318 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
39319 | + * | |
39320 | + * This program is free software; you can redistribute it and/or modify it | |
39321 | + * under the terms of the GNU General Public License version 2 as published | |
39322 | + * by the Free Software Foundation, incorporated herein by reference. | |
39323 | + * | |
39324 | + * This program is distributed in the hope that it will be useful, | |
39325 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
39326 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
39327 | + * GNU General Public License for more details. | |
39328 | + * | |
39329 | + * You should have received a copy of the GNU General Public License | |
39330 | + * along with this program; if not, write to the Free Software | |
39331 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
39332 | + **************************************************************************** | |
39333 | + */ | |
39334 | + | |
39335 | +#include <xen/gnttab.h> | |
39336 | + | |
39337 | +#include "accel_bufs.h" | |
39338 | +#include "accel_util.h" | |
39339 | + | |
39340 | +#include "accel.h" | |
39341 | + | |
39342 | + | |
39343 | +static int | |
39344 | +netfront_accel_alloc_buf_desc_blocks(struct netfront_accel_bufinfo *manager, | |
39345 | + int pages) | |
39346 | +{ | |
39347 | + manager->desc_blocks = | |
39348 | + kzalloc(sizeof(struct netfront_accel_pkt_desc *) * | |
39349 | + NETFRONT_ACCEL_BUF_NUM_BLOCKS(pages), GFP_KERNEL); | |
39350 | + if (manager->desc_blocks == NULL) { | |
39351 | + return -ENOMEM; | |
39352 | + } | |
39353 | + | |
39354 | + return 0; | |
39355 | +} | |
39356 | + | |
39357 | +static int | |
39358 | +netfront_accel_alloc_buf_lists(struct netfront_accel_bufpages *bufpages, | |
39359 | + int pages) | |
39360 | +{ | |
39361 | + bufpages->page_list = kmalloc(pages * sizeof(void *), GFP_KERNEL); | |
39362 | + if (bufpages->page_list == NULL) { | |
39363 | + return -ENOMEM; | |
39364 | + } | |
39365 | + | |
39366 | + bufpages->grant_list = kzalloc(pages * sizeof(grant_ref_t), GFP_KERNEL); | |
39367 | + if (bufpages->grant_list == NULL) { | |
39368 | + kfree(bufpages->page_list); | |
39369 | + bufpages->page_list = NULL; | |
39370 | + return -ENOMEM; | |
39371 | + } | |
39372 | + | |
39373 | + return 0; | |
39374 | +} | |
39375 | + | |
39376 | + | |
39377 | +int netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages, | |
39378 | + struct netfront_accel_bufinfo *rx_manager, | |
39379 | + struct netfront_accel_bufinfo *tx_manager, | |
39380 | + int pages) | |
39381 | +{ | |
39382 | + int n, rc; | |
39383 | + | |
39384 | + if ((rc = netfront_accel_alloc_buf_desc_blocks | |
39385 | + (rx_manager, pages - (pages / sfc_netfront_buffer_split))) < 0) { | |
39386 | + goto rx_fail; | |
39387 | + } | |
39388 | + | |
39389 | + if ((rc = netfront_accel_alloc_buf_desc_blocks | |
39390 | + (tx_manager, pages / sfc_netfront_buffer_split)) < 0) { | |
39391 | + goto tx_fail; | |
39392 | + } | |
39393 | + | |
39394 | + if ((rc = netfront_accel_alloc_buf_lists(bufpages, pages)) < 0) { | |
39395 | + goto lists_fail; | |
39396 | + } | |
39397 | + | |
39398 | + for (n = 0; n < pages; n++) { | |
39399 | + void *tmp = (void*)__get_free_page(GFP_KERNEL); | |
39400 | + if (tmp == NULL) | |
39401 | + break; | |
39402 | + | |
39403 | + bufpages->page_list[n] = tmp; | |
39404 | + } | |
39405 | + | |
39406 | + if (n != pages) { | |
39407 | + EPRINTK("%s: not enough pages: %d != %d\n", __FUNCTION__, n, | |
39408 | + pages); | |
39409 | + for (; n >= 0; n--) | |
39410 | + free_page((unsigned long)(bufpages->page_list[n])); | |
39411 | + rc = -ENOMEM; | |
39412 | + goto pages_fail; | |
39413 | + } | |
39414 | + | |
39415 | + bufpages->max_pages = pages; | |
39416 | + bufpages->page_reqs = 0; | |
39417 | + | |
39418 | + return 0; | |
39419 | + | |
39420 | + pages_fail: | |
39421 | + kfree(bufpages->page_list); | |
39422 | + kfree(bufpages->grant_list); | |
39423 | + | |
39424 | + bufpages->page_list = NULL; | |
39425 | + bufpages->grant_list = NULL; | |
39426 | + lists_fail: | |
39427 | + kfree(tx_manager->desc_blocks); | |
39428 | + tx_manager->desc_blocks = NULL; | |
39429 | + | |
39430 | + tx_fail: | |
39431 | + kfree(rx_manager->desc_blocks); | |
39432 | + rx_manager->desc_blocks = NULL; | |
39433 | + rx_fail: | |
39434 | + return rc; | |
39435 | +} | |
39436 | + | |
39437 | + | |
39438 | +void netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages, | |
39439 | + struct netfront_accel_bufinfo *rx_manager, | |
39440 | + struct netfront_accel_bufinfo *tx_manager) | |
39441 | +{ | |
39442 | + int i; | |
39443 | + | |
39444 | + for (i = 0; i < bufpages->max_pages; i++) { | |
39445 | + if (bufpages->grant_list[i] != 0) | |
39446 | + net_accel_ungrant_page(bufpages->grant_list[i]); | |
39447 | + free_page((unsigned long)(bufpages->page_list[i])); | |
39448 | + } | |
39449 | + | |
39450 | + if (bufpages->max_pages) { | |
39451 | + kfree(bufpages->page_list); | |
39452 | + kfree(bufpages->grant_list); | |
39453 | + kfree(rx_manager->desc_blocks); | |
39454 | + kfree(tx_manager->desc_blocks); | |
39455 | + } | |
39456 | +} | |
39457 | + | |
39458 | + | |
39459 | +/* | |
39460 | + * Allocate memory for the buffer manager and create a lock. If no | |
39461 | + * lock is supplied its own is allocated. | |
39462 | + */ | |
39463 | +struct netfront_accel_bufinfo *netfront_accel_init_bufs(spinlock_t *lock) | |
39464 | +{ | |
39465 | + struct netfront_accel_bufinfo *res = kmalloc(sizeof(*res), GFP_KERNEL); | |
39466 | + if (res != NULL) { | |
39467 | + res->npages = res->nused = 0; | |
39468 | + res->first_free = -1; | |
39469 | + | |
39470 | + if (lock == NULL) { | |
39471 | + res->lock = kmalloc(sizeof(*res->lock), GFP_KERNEL); | |
39472 | + if (res->lock == NULL) { | |
39473 | + kfree(res); | |
39474 | + return NULL; | |
39475 | + } | |
39476 | + spin_lock_init(res->lock); | |
39477 | + res->internally_locked = 1; | |
39478 | + } else { | |
39479 | + res->lock = lock; | |
39480 | + res->internally_locked = 0; | |
39481 | + } | |
39482 | + | |
39483 | + res->desc_blocks = NULL; | |
39484 | + } | |
39485 | + | |
39486 | + return res; | |
39487 | +} | |
39488 | + | |
39489 | + | |
39490 | +void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *bufs) | |
39491 | +{ | |
39492 | + if (bufs->internally_locked) | |
39493 | + kfree(bufs->lock); | |
39494 | + kfree(bufs); | |
39495 | +} | |
39496 | + | |
39497 | + | |
39498 | +int netfront_accel_buf_map_request(struct xenbus_device *dev, | |
39499 | + struct netfront_accel_bufpages *bufpages, | |
39500 | + struct net_accel_msg *msg, | |
39501 | + int pages, int offset) | |
39502 | +{ | |
39503 | + int i, mfn; | |
39504 | + int err; | |
39505 | + | |
39506 | + net_accel_msg_init(msg, NET_ACCEL_MSG_MAPBUF); | |
39507 | + | |
39508 | + BUG_ON(pages > NET_ACCEL_MSG_MAX_PAGE_REQ); | |
39509 | + | |
39510 | + msg->u.mapbufs.pages = pages; | |
39511 | + | |
39512 | + for (i = 0; i < msg->u.mapbufs.pages; i++) { | |
39513 | + /* | |
39514 | + * This can happen if we tried to send this message | |
39515 | + * earlier but the queue was full. | |
39516 | + */ | |
39517 | + if (bufpages->grant_list[offset+i] != 0) { | |
39518 | + msg->u.mapbufs.grants[i] = | |
39519 | + bufpages->grant_list[offset+i]; | |
39520 | + continue; | |
39521 | + } | |
39522 | + | |
39523 | + mfn = virt_to_mfn(bufpages->page_list[offset+i]); | |
39524 | + VPRINTK("%s: Granting page %d, mfn %08x\n", | |
39525 | + __FUNCTION__, i, mfn); | |
39526 | + | |
39527 | + bufpages->grant_list[offset+i] = | |
39528 | + net_accel_grant_page(dev, mfn, 0); | |
39529 | + msg->u.mapbufs.grants[i] = bufpages->grant_list[offset+i]; | |
39530 | + | |
39531 | + if (msg->u.mapbufs.grants[i] < 0) { | |
39532 | + EPRINTK("%s: Failed to grant buffer: %d\n", | |
39533 | + __FUNCTION__, msg->u.mapbufs.grants[i]); | |
39534 | + err = -EIO; | |
39535 | + goto error; | |
39536 | + } | |
39537 | + } | |
39538 | + | |
39539 | + /* This is interpreted on return as the offset in the the page_list */ | |
39540 | + msg->u.mapbufs.reqid = offset; | |
39541 | + | |
39542 | + return 0; | |
39543 | + | |
39544 | +error: | |
39545 | + /* Ungrant all the pages we've successfully granted. */ | |
39546 | + for (i--; i >= 0; i--) { | |
39547 | + net_accel_ungrant_page(bufpages->grant_list[offset+i]); | |
39548 | + bufpages->grant_list[offset+i] = 0; | |
39549 | + } | |
39550 | + return err; | |
39551 | +} | |
39552 | + | |
39553 | + | |
39554 | +/* Process a response to a buffer request. */ | |
39555 | +int netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages, | |
39556 | + struct netfront_accel_bufinfo *manager, | |
39557 | + struct net_accel_msg *msg) | |
39558 | +{ | |
39559 | + int msg_pages, page_offset, i, newtot; | |
39560 | + int old_block_count, new_block_count; | |
39561 | + u32 msg_buf; | |
39562 | + unsigned long flags; | |
39563 | + | |
39564 | + VPRINTK("%s: manager %p msg %p\n", __FUNCTION__, manager, msg); | |
39565 | + | |
39566 | + BUG_ON(msg->id != (NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY)); | |
39567 | + | |
39568 | + msg_pages = msg->u.mapbufs.pages; | |
39569 | + msg_buf = msg->u.mapbufs.buf; | |
39570 | + page_offset = msg->u.mapbufs.reqid; | |
39571 | + | |
39572 | + spin_lock_irqsave(manager->lock, flags); | |
39573 | + newtot = manager->npages + msg_pages; | |
39574 | + old_block_count = | |
39575 | + (manager->npages + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >> | |
39576 | + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT; | |
39577 | + new_block_count = | |
39578 | + (newtot + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >> | |
39579 | + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT; | |
39580 | + | |
39581 | + for (i = old_block_count; i < new_block_count; i++) { | |
39582 | + struct netfront_accel_pkt_desc *block; | |
39583 | + if (manager->desc_blocks[i] != NULL) { | |
39584 | + VPRINTK("Not needed\n"); | |
39585 | + continue; | |
39586 | + } | |
39587 | + block = kzalloc(NETFRONT_ACCEL_BUFS_PER_BLOCK * | |
39588 | + sizeof(netfront_accel_pkt_desc), GFP_ATOMIC); | |
39589 | + if (block == NULL) { | |
39590 | + spin_unlock_irqrestore(manager->lock, flags); | |
39591 | + return -ENOMEM; | |
39592 | + } | |
39593 | + manager->desc_blocks[i] = block; | |
39594 | + } | |
39595 | + for (i = manager->npages; i < newtot; i++) { | |
39596 | + int k, j = i - manager->npages; | |
39597 | + int block_num; | |
39598 | + int block_idx; | |
39599 | + struct netfront_accel_pkt_desc *pkt; | |
39600 | + | |
39601 | + block_num = i >> NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT; | |
39602 | + block_idx = (NETFRONT_ACCEL_BUFS_PER_PAGE*i) | |
39603 | + & (NETFRONT_ACCEL_BUFS_PER_BLOCK-1); | |
39604 | + | |
39605 | + pkt = manager->desc_blocks[block_num] + block_idx; | |
39606 | + | |
39607 | + for (k = 0; k < NETFRONT_ACCEL_BUFS_PER_PAGE; k++) { | |
39608 | + BUG_ON(page_offset + j >= bufpages->max_pages); | |
39609 | + | |
39610 | + pkt[k].buf_id = NETFRONT_ACCEL_BUFS_PER_PAGE * i + k; | |
39611 | + pkt[k].pkt_kva = bufpages->page_list[page_offset + j] + | |
39612 | + (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * k; | |
39613 | + pkt[k].pkt_buff_addr = msg_buf + | |
39614 | + (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * | |
39615 | + (NETFRONT_ACCEL_BUFS_PER_PAGE * j + k); | |
39616 | + pkt[k].next_free = manager->first_free; | |
39617 | + manager->first_free = pkt[k].buf_id; | |
39618 | + *(int*)(pkt[k].pkt_kva) = pkt[k].buf_id; | |
39619 | + | |
39620 | + VPRINTK("buf %d desc %p kva %p buffaddr %x\n", | |
39621 | + pkt[k].buf_id, &(pkt[k]), pkt[k].pkt_kva, | |
39622 | + pkt[k].pkt_buff_addr); | |
39623 | + } | |
39624 | + } | |
39625 | + manager->npages = newtot; | |
39626 | + spin_unlock_irqrestore(manager->lock, flags); | |
39627 | + VPRINTK("Added %d pages. Total is now %d\n", msg_pages, | |
39628 | + manager->npages); | |
39629 | + return 0; | |
39630 | +} | |
39631 | + | |
39632 | + | |
39633 | +netfront_accel_pkt_desc * | |
39634 | +netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id) | |
39635 | +{ | |
39636 | + netfront_accel_pkt_desc *pkt; | |
39637 | + int block_num = id >> NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT; | |
39638 | + int block_idx = id & (NETFRONT_ACCEL_BUFS_PER_BLOCK - 1); | |
39639 | + BUG_ON(id >= manager->npages * NETFRONT_ACCEL_BUFS_PER_PAGE); | |
39640 | + BUG_ON(block_idx >= NETFRONT_ACCEL_BUFS_PER_BLOCK); | |
39641 | + pkt = manager->desc_blocks[block_num] + block_idx; | |
39642 | + return pkt; | |
39643 | +} | |
39644 | + | |
39645 | + | |
39646 | +/* Allocate a buffer from the buffer manager */ | |
39647 | +netfront_accel_pkt_desc * | |
39648 | +netfront_accel_buf_get(struct netfront_accel_bufinfo *manager) | |
39649 | +{ | |
39650 | + int bufno = -1; | |
39651 | + netfront_accel_pkt_desc *buf = NULL; | |
39652 | + unsigned long flags = 0; | |
39653 | + | |
39654 | + /* Any spare? */ | |
39655 | + if (manager->first_free == -1) | |
39656 | + return NULL; | |
39657 | + /* Take lock */ | |
39658 | + if (manager->internally_locked) | |
39659 | + spin_lock_irqsave(manager->lock, flags); | |
39660 | + bufno = manager->first_free; | |
39661 | + if (bufno != -1) { | |
39662 | + buf = netfront_accel_buf_find(manager, bufno); | |
39663 | + manager->first_free = buf->next_free; | |
39664 | + manager->nused++; | |
39665 | + } | |
39666 | + /* Release lock */ | |
39667 | + if (manager->internally_locked) | |
39668 | + spin_unlock_irqrestore(manager->lock, flags); | |
39669 | + | |
39670 | + /* Tell the world */ | |
39671 | + VPRINTK("Allocated buffer %i, buffaddr %x\n", bufno, | |
39672 | + buf->pkt_buff_addr); | |
39673 | + | |
39674 | + return buf; | |
39675 | +} | |
39676 | + | |
39677 | + | |
39678 | +/* Release a buffer back to the buffer manager pool */ | |
39679 | +int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, u16 id) | |
39680 | +{ | |
39681 | + netfront_accel_pkt_desc *buf = netfront_accel_buf_find(manager, id); | |
39682 | + unsigned long flags = 0; | |
39683 | + unsigned was_empty = 0; | |
39684 | + int bufno = id; | |
39685 | + | |
39686 | + VPRINTK("Freeing buffer %i\n", id); | |
39687 | + BUG_ON(id == (u16)-1); | |
39688 | + | |
39689 | + if (manager->internally_locked) | |
39690 | + spin_lock_irqsave(manager->lock, flags); | |
39691 | + | |
39692 | + if (manager->first_free == -1) | |
39693 | + was_empty = 1; | |
39694 | + | |
39695 | + buf->next_free = manager->first_free; | |
39696 | + manager->first_free = bufno; | |
39697 | + manager->nused--; | |
39698 | + | |
39699 | + if (manager->internally_locked) | |
39700 | + spin_unlock_irqrestore(manager->lock, flags); | |
39701 | + | |
39702 | + return was_empty; | |
39703 | +} | |
39704 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.h | |
39705 | =================================================================== | |
39706 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
39707 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_bufs.h 2008-02-20 09:32:49.000000000 +0100 | |
39708 | @@ -0,0 +1,181 @@ | |
39709 | +/**************************************************************************** | |
39710 | + * Solarflare driver for Xen network acceleration | |
39711 | + * | |
39712 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
39713 | + * 9501 Jeronimo Road, Suite 250, | |
39714 | + * Irvine, CA 92618, USA | |
39715 | + * | |
39716 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
39717 | + * | |
39718 | + * This program is free software; you can redistribute it and/or modify it | |
39719 | + * under the terms of the GNU General Public License version 2 as published | |
39720 | + * by the Free Software Foundation, incorporated herein by reference. | |
39721 | + * | |
39722 | + * This program is distributed in the hope that it will be useful, | |
39723 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
39724 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
39725 | + * GNU General Public License for more details. | |
39726 | + * | |
39727 | + * You should have received a copy of the GNU General Public License | |
39728 | + * along with this program; if not, write to the Free Software | |
39729 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
39730 | + **************************************************************************** | |
39731 | + */ | |
39732 | + | |
39733 | +#ifndef NETFRONT_ACCEL_BUFS_H | |
39734 | +#define NETFRONT_ACCEL_BUFS_H | |
39735 | + | |
39736 | +#include <linux/skbuff.h> | |
39737 | +#include <linux/spinlock.h> | |
39738 | +#include <xen/xenbus.h> | |
39739 | + | |
39740 | +#include "accel_msg_iface.h" | |
39741 | + | |
39742 | + | |
39743 | +/*! Buffer descriptor structure */ | |
39744 | +typedef struct netfront_accel_pkt_desc { | |
39745 | + int buf_id; | |
39746 | + u32 pkt_buff_addr; | |
39747 | + void *pkt_kva; | |
39748 | + /* This is the socket buffer currently married to this buffer */ | |
39749 | + struct sk_buff *skb; | |
39750 | + int next_free; | |
39751 | +} netfront_accel_pkt_desc; | |
39752 | + | |
39753 | + | |
39754 | +#define NETFRONT_ACCEL_DEFAULT_BUF_PAGES (384) | |
39755 | +#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT (4) | |
39756 | +#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK \ | |
39757 | + (1 << (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT)) | |
39758 | +#define NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT (1) | |
39759 | +#define NETFRONT_ACCEL_BUFS_PER_PAGE \ | |
39760 | + (1 << (NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT)) | |
39761 | +#define NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT \ | |
39762 | + (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT + \ | |
39763 | + NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT) | |
39764 | +#define NETFRONT_ACCEL_BUFS_PER_BLOCK \ | |
39765 | + (1 << NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT) | |
39766 | +#define NETFRONT_ACCEL_BUF_NUM_BLOCKS(max_pages) \ | |
39767 | + (((max_pages)+NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK-1) / \ | |
39768 | + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK) | |
39769 | + | |
39770 | +/*! Buffer management structure. */ | |
39771 | +struct netfront_accel_bufinfo { | |
39772 | + /* number added to this manager */ | |
39773 | + unsigned npages; | |
39774 | + /* number currently used from this manager */ | |
39775 | + unsigned nused; | |
39776 | + | |
39777 | + int first_free; | |
39778 | + | |
39779 | + int internally_locked; | |
39780 | + spinlock_t *lock; | |
39781 | + | |
39782 | + /* | |
39783 | + * array of pointers (length NETFRONT_ACCEL_BUF_NUM_BLOCKS) to | |
39784 | + * pkt descs | |
39785 | + */ | |
39786 | + struct netfront_accel_pkt_desc **desc_blocks; | |
39787 | +}; | |
39788 | + | |
39789 | + | |
39790 | +struct netfront_accel_bufpages { | |
39791 | + /* length of lists of pages/grants */ | |
39792 | + int max_pages; | |
39793 | + /* list of pages allocated for network buffers */ | |
39794 | + void **page_list; | |
39795 | + /* list of grants for the above pages */ | |
39796 | + grant_ref_t *grant_list; | |
39797 | + | |
39798 | + /* number of page requests that have been made */ | |
39799 | + unsigned page_reqs; | |
39800 | +}; | |
39801 | + | |
39802 | + | |
39803 | +/*! Allocate memory for the buffer manager, set up locks etc. | |
39804 | + * Optionally takes a lock to use, if not supplied it makes its own. | |
39805 | + * | |
39806 | + * \return pointer to netfront_accel_bufinfo structure that represents the | |
39807 | + * buffer manager | |
39808 | + */ | |
39809 | +extern struct netfront_accel_bufinfo * | |
39810 | +netfront_accel_init_bufs(spinlock_t *lock); | |
39811 | + | |
39812 | +/*! Allocate memory for the buffers | |
39813 | + */ | |
39814 | +extern int | |
39815 | +netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages, | |
39816 | + struct netfront_accel_bufinfo *rx_res, | |
39817 | + struct netfront_accel_bufinfo *tx_res, | |
39818 | + int pages); | |
39819 | +extern void | |
39820 | +netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages, | |
39821 | + struct netfront_accel_bufinfo *rx_res, | |
39822 | + struct netfront_accel_bufinfo *tx_res); | |
39823 | + | |
39824 | +/*! Release memory for the buffer manager, buffers, etc. | |
39825 | + * | |
39826 | + * \param manager pointer to netfront_accel_bufinfo structure that | |
39827 | + * represents the buffer manager | |
39828 | + */ | |
39829 | +extern void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *manager); | |
39830 | + | |
39831 | +/*! Release a buffer. | |
39832 | + * | |
39833 | + * \param manager The buffer manager which owns the buffer. | |
39834 | + * \param id The buffer identifier. | |
39835 | + */ | |
39836 | +extern int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, | |
39837 | + u16 id); | |
39838 | + | |
39839 | +/*! Get the packet descriptor associated with a buffer id. | |
39840 | + * | |
39841 | + * \param manager The buffer manager which owns the buffer. | |
39842 | + * \param id The buffer identifier. | |
39843 | + * | |
39844 | + * The returned value is the packet descriptor for this buffer. | |
39845 | + */ | |
39846 | +extern netfront_accel_pkt_desc * | |
39847 | +netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id); | |
39848 | + | |
39849 | + | |
39850 | +/*! Fill out a message request for some buffers to be mapped by the | |
39851 | + * back end driver | |
39852 | + * | |
39853 | + * \param manager The buffer manager | |
39854 | + * \param msg Pointer to an ef_msg to complete. | |
39855 | + * \return 0 on success | |
39856 | + */ | |
39857 | +extern int | |
39858 | +netfront_accel_buf_map_request(struct xenbus_device *dev, | |
39859 | + struct netfront_accel_bufpages *bufpages, | |
39860 | + struct net_accel_msg *msg, | |
39861 | + int pages, int offset); | |
39862 | + | |
39863 | +/*! Process a response to a buffer request. | |
39864 | + * | |
39865 | + * Deal with a received message from the back end in response to our | |
39866 | + * request for buffers | |
39867 | + * | |
39868 | + * \param manager The buffer manager | |
39869 | + * \param msg The received message from the back end describing new | |
39870 | + * buffers | |
39871 | + * \return 0 on success | |
39872 | + */ | |
39873 | +extern int | |
39874 | +netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages, | |
39875 | + struct netfront_accel_bufinfo *manager, | |
39876 | + struct net_accel_msg *msg); | |
39877 | + | |
39878 | + | |
39879 | +/*! Allocate a buffer from the buffer manager | |
39880 | + * | |
39881 | + * \param manager The buffer manager data structure | |
39882 | + * \param id On exit, the id of the buffer allocated | |
39883 | + * \return Pointer to buffer descriptor. | |
39884 | + */ | |
39885 | +struct netfront_accel_pkt_desc * | |
39886 | +netfront_accel_buf_get(struct netfront_accel_bufinfo *manager); | |
39887 | + | |
39888 | +#endif /* NETFRONT_ACCEL_BUFS_H */ | |
39889 | + | |
39890 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_debugfs.c | |
39891 | =================================================================== | |
39892 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
39893 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_debugfs.c 2008-02-26 10:54:12.000000000 +0100 | |
39894 | @@ -0,0 +1,211 @@ | |
39895 | +/**************************************************************************** | |
39896 | + * Solarflare driver for Xen network acceleration | |
39897 | + * | |
39898 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
39899 | + * 9501 Jeronimo Road, Suite 250, | |
39900 | + * Irvine, CA 92618, USA | |
39901 | + * | |
39902 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
39903 | + * | |
39904 | + * This program is free software; you can redistribute it and/or modify it | |
39905 | + * under the terms of the GNU General Public License version 2 as published | |
39906 | + * by the Free Software Foundation, incorporated herein by reference. | |
39907 | + * | |
39908 | + * This program is distributed in the hope that it will be useful, | |
39909 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
39910 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
39911 | + * GNU General Public License for more details. | |
39912 | + * | |
39913 | + * You should have received a copy of the GNU General Public License | |
39914 | + * along with this program; if not, write to the Free Software | |
39915 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
39916 | + **************************************************************************** | |
39917 | + */ | |
39918 | + | |
39919 | +#include <linux/fs.h> | |
39920 | +#include <linux/debugfs.h> | |
39921 | + | |
39922 | +#include "accel.h" | |
39923 | + | |
39924 | +#if defined(CONFIG_DEBUG_FS) | |
39925 | +static struct dentry *sfc_debugfs_root = NULL; | |
39926 | +#endif | |
39927 | + | |
39928 | +void netfront_accel_debugfs_init(void) | |
39929 | +{ | |
39930 | +#if defined(CONFIG_DEBUG_FS) | |
39931 | + sfc_debugfs_root = debugfs_create_dir(frontend_name, NULL); | |
39932 | +#endif | |
39933 | +} | |
39934 | + | |
39935 | + | |
39936 | +void netfront_accel_debugfs_fini(void) | |
39937 | +{ | |
39938 | +#if defined(CONFIG_DEBUG_FS) | |
39939 | + if (sfc_debugfs_root) | |
39940 | + debugfs_remove(sfc_debugfs_root); | |
39941 | +#endif | |
39942 | +} | |
39943 | + | |
39944 | + | |
39945 | +int netfront_accel_debugfs_create(netfront_accel_vnic *vnic) | |
39946 | +{ | |
39947 | +#if defined(CONFIG_DEBUG_FS) | |
39948 | + if (sfc_debugfs_root == NULL) | |
39949 | + return -ENOENT; | |
39950 | + | |
39951 | + vnic->dbfs_dir = debugfs_create_dir(vnic->net_dev->name, | |
39952 | + sfc_debugfs_root); | |
39953 | + if (vnic->dbfs_dir == NULL) | |
39954 | + return -ENOMEM; | |
39955 | + | |
39956 | + vnic->netdev_dbfs.fastpath_rx_pkts = debugfs_create_u32 | |
39957 | + ("fastpath_rx_pkts", S_IRUSR | S_IRGRP | S_IROTH, | |
39958 | + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_pkts); | |
39959 | + vnic->netdev_dbfs.fastpath_rx_bytes = debugfs_create_u32 | |
39960 | + ("fastpath_rx_bytes", S_IRUSR | S_IRGRP | S_IROTH, | |
39961 | + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_bytes); | |
39962 | + vnic->netdev_dbfs.fastpath_rx_errors = debugfs_create_u32 | |
39963 | + ("fastpath_rx_errors", S_IRUSR | S_IRGRP | S_IROTH, | |
39964 | + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_errors); | |
39965 | + vnic->netdev_dbfs.fastpath_tx_pkts = debugfs_create_u32 | |
39966 | + ("fastpath_tx_pkts", S_IRUSR | S_IRGRP | S_IROTH, | |
39967 | + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_pkts); | |
39968 | + vnic->netdev_dbfs.fastpath_tx_bytes = debugfs_create_u32 | |
39969 | + ("fastpath_tx_bytes", S_IRUSR | S_IRGRP | S_IROTH, | |
39970 | + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_bytes); | |
39971 | + vnic->netdev_dbfs.fastpath_tx_errors = debugfs_create_u32 | |
39972 | + ("fastpath_tx_errors", S_IRUSR | S_IRGRP | S_IROTH, | |
39973 | + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_errors); | |
39974 | + | |
39975 | +#if NETFRONT_ACCEL_STATS | |
39976 | + vnic->dbfs.irq_count = debugfs_create_u64 | |
39977 | + ("irq_count", S_IRUSR | S_IRGRP | S_IROTH, | |
39978 | + vnic->dbfs_dir, &vnic->stats.irq_count); | |
39979 | + vnic->dbfs.useless_irq_count = debugfs_create_u64 | |
39980 | + ("useless_irq_count", S_IRUSR | S_IRGRP | S_IROTH, | |
39981 | + vnic->dbfs_dir, &vnic->stats.useless_irq_count); | |
39982 | + vnic->dbfs.poll_schedule_count = debugfs_create_u64 | |
39983 | + ("poll_schedule_count", S_IRUSR | S_IRGRP | S_IROTH, | |
39984 | + vnic->dbfs_dir, &vnic->stats.poll_schedule_count); | |
39985 | + vnic->dbfs.poll_call_count = debugfs_create_u64 | |
39986 | + ("poll_call_count", S_IRUSR | S_IRGRP | S_IROTH, | |
39987 | + vnic->dbfs_dir, &vnic->stats.poll_call_count); | |
39988 | + vnic->dbfs.poll_reschedule_count = debugfs_create_u64 | |
39989 | + ("poll_reschedule_count", S_IRUSR | S_IRGRP | S_IROTH, | |
39990 | + vnic->dbfs_dir, &vnic->stats.poll_reschedule_count); | |
39991 | + vnic->dbfs.queue_stops = debugfs_create_u64 | |
39992 | + ("queue_stops", S_IRUSR | S_IRGRP | S_IROTH, | |
39993 | + vnic->dbfs_dir, &vnic->stats.queue_stops); | |
39994 | + vnic->dbfs.queue_wakes = debugfs_create_u64 | |
39995 | + ("queue_wakes", S_IRUSR | S_IRGRP | S_IROTH, | |
39996 | + vnic->dbfs_dir, &vnic->stats.queue_wakes); | |
39997 | + vnic->dbfs.ssr_bursts = debugfs_create_u64 | |
39998 | + ("ssr_bursts", S_IRUSR | S_IRGRP | S_IROTH, | |
39999 | + vnic->dbfs_dir, &vnic->stats.ssr_bursts); | |
40000 | + vnic->dbfs.ssr_drop_stream = debugfs_create_u64 | |
40001 | + ("ssr_drop_stream", S_IRUSR | S_IRGRP | S_IROTH, | |
40002 | + vnic->dbfs_dir, &vnic->stats.ssr_drop_stream); | |
40003 | + vnic->dbfs.ssr_misorder = debugfs_create_u64 | |
40004 | + ("ssr_misorder", S_IRUSR | S_IRGRP | S_IROTH, | |
40005 | + vnic->dbfs_dir, &vnic->stats.ssr_misorder); | |
40006 | + vnic->dbfs.ssr_slow_start = debugfs_create_u64 | |
40007 | + ("ssr_slow_start", S_IRUSR | S_IRGRP | S_IROTH, | |
40008 | + vnic->dbfs_dir, &vnic->stats.ssr_slow_start); | |
40009 | + vnic->dbfs.ssr_merges = debugfs_create_u64 | |
40010 | + ("ssr_merges", S_IRUSR | S_IRGRP | S_IROTH, | |
40011 | + vnic->dbfs_dir, &vnic->stats.ssr_merges); | |
40012 | + vnic->dbfs.ssr_too_many = debugfs_create_u64 | |
40013 | + ("ssr_too_many", S_IRUSR | S_IRGRP | S_IROTH, | |
40014 | + vnic->dbfs_dir, &vnic->stats.ssr_too_many); | |
40015 | + vnic->dbfs.ssr_new_stream = debugfs_create_u64 | |
40016 | + ("ssr_new_stream", S_IRUSR | S_IRGRP | S_IROTH, | |
40017 | + vnic->dbfs_dir, &vnic->stats.ssr_new_stream); | |
40018 | + | |
40019 | + vnic->dbfs.fastpath_tx_busy = debugfs_create_u64 | |
40020 | + ("fastpath_tx_busy", S_IRUSR | S_IRGRP | S_IROTH, | |
40021 | + vnic->dbfs_dir, &vnic->stats.fastpath_tx_busy); | |
40022 | + vnic->dbfs.fastpath_tx_completions = debugfs_create_u64 | |
40023 | + ("fastpath_tx_completions", S_IRUSR | S_IRGRP | S_IROTH, | |
40024 | + vnic->dbfs_dir, &vnic->stats.fastpath_tx_completions); | |
40025 | + vnic->dbfs.fastpath_tx_pending_max = debugfs_create_u32 | |
40026 | + ("fastpath_tx_pending_max", S_IRUSR | S_IRGRP | S_IROTH, | |
40027 | + vnic->dbfs_dir, &vnic->stats.fastpath_tx_pending_max); | |
40028 | + vnic->dbfs.event_count = debugfs_create_u64 | |
40029 | + ("event_count", S_IRUSR | S_IRGRP | S_IROTH, | |
40030 | + vnic->dbfs_dir, &vnic->stats.event_count); | |
40031 | + vnic->dbfs.bad_event_count = debugfs_create_u64 | |
40032 | + ("bad_event_count", S_IRUSR | S_IRGRP | S_IROTH, | |
40033 | + vnic->dbfs_dir, &vnic->stats.bad_event_count); | |
40034 | + vnic->dbfs.event_count_since_irq = debugfs_create_u32 | |
40035 | + ("event_count_since_irq", S_IRUSR | S_IRGRP | S_IROTH, | |
40036 | + vnic->dbfs_dir, &vnic->stats.event_count_since_irq); | |
40037 | + vnic->dbfs.events_per_irq_max = debugfs_create_u32 | |
40038 | + ("events_per_irq_max", S_IRUSR | S_IRGRP | S_IROTH, | |
40039 | + vnic->dbfs_dir, &vnic->stats.events_per_irq_max); | |
40040 | + vnic->dbfs.fastpath_frm_trunc = debugfs_create_u64 | |
40041 | + ("fastpath_frm_trunc", S_IRUSR | S_IRGRP | S_IROTH, | |
40042 | + vnic->dbfs_dir, &vnic->stats.fastpath_frm_trunc); | |
40043 | + vnic->dbfs.rx_no_desc_trunc = debugfs_create_u64 | |
40044 | + ("rx_no_desc_trunc", S_IRUSR | S_IRGRP | S_IROTH, | |
40045 | + vnic->dbfs_dir, &vnic->stats.rx_no_desc_trunc); | |
40046 | + vnic->dbfs.events_per_poll_max = debugfs_create_u32 | |
40047 | + ("events_per_poll_max", S_IRUSR | S_IRGRP | S_IROTH, | |
40048 | + vnic->dbfs_dir, &vnic->stats.events_per_poll_max); | |
40049 | + vnic->dbfs.events_per_poll_rx_max = debugfs_create_u32 | |
40050 | + ("events_per_poll_rx_max", S_IRUSR | S_IRGRP | S_IROTH, | |
40051 | + vnic->dbfs_dir, &vnic->stats.events_per_poll_rx_max); | |
40052 | + vnic->dbfs.events_per_poll_tx_max = debugfs_create_u32 | |
40053 | + ("events_per_poll_tx_max", S_IRUSR | S_IRGRP | S_IROTH, | |
40054 | + vnic->dbfs_dir, &vnic->stats.events_per_poll_tx_max); | |
40055 | +#endif | |
40056 | +#endif | |
40057 | + return 0; | |
40058 | +} | |
40059 | + | |
40060 | + | |
40061 | +int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic) | |
40062 | +{ | |
40063 | +#if defined(CONFIG_DEBUG_FS) | |
40064 | + if (vnic->dbfs_dir != NULL) { | |
40065 | + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_pkts); | |
40066 | + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_bytes); | |
40067 | + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_errors); | |
40068 | + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_pkts); | |
40069 | + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_bytes); | |
40070 | + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_errors); | |
40071 | + | |
40072 | +#if NETFRONT_ACCEL_STATS | |
40073 | + debugfs_remove(vnic->dbfs.irq_count); | |
40074 | + debugfs_remove(vnic->dbfs.useless_irq_count); | |
40075 | + debugfs_remove(vnic->dbfs.poll_schedule_count); | |
40076 | + debugfs_remove(vnic->dbfs.poll_call_count); | |
40077 | + debugfs_remove(vnic->dbfs.poll_reschedule_count); | |
40078 | + debugfs_remove(vnic->dbfs.queue_stops); | |
40079 | + debugfs_remove(vnic->dbfs.queue_wakes); | |
40080 | + debugfs_remove(vnic->dbfs.ssr_bursts); | |
40081 | + debugfs_remove(vnic->dbfs.ssr_drop_stream); | |
40082 | + debugfs_remove(vnic->dbfs.ssr_misorder); | |
40083 | + debugfs_remove(vnic->dbfs.ssr_slow_start); | |
40084 | + debugfs_remove(vnic->dbfs.ssr_merges); | |
40085 | + debugfs_remove(vnic->dbfs.ssr_too_many); | |
40086 | + debugfs_remove(vnic->dbfs.ssr_new_stream); | |
40087 | + | |
40088 | + debugfs_remove(vnic->dbfs.fastpath_tx_busy); | |
40089 | + debugfs_remove(vnic->dbfs.fastpath_tx_completions); | |
40090 | + debugfs_remove(vnic->dbfs.fastpath_tx_pending_max); | |
40091 | + debugfs_remove(vnic->dbfs.event_count); | |
40092 | + debugfs_remove(vnic->dbfs.bad_event_count); | |
40093 | + debugfs_remove(vnic->dbfs.event_count_since_irq); | |
40094 | + debugfs_remove(vnic->dbfs.events_per_irq_max); | |
40095 | + debugfs_remove(vnic->dbfs.fastpath_frm_trunc); | |
40096 | + debugfs_remove(vnic->dbfs.rx_no_desc_trunc); | |
40097 | + debugfs_remove(vnic->dbfs.events_per_poll_max); | |
40098 | + debugfs_remove(vnic->dbfs.events_per_poll_rx_max); | |
40099 | + debugfs_remove(vnic->dbfs.events_per_poll_tx_max); | |
40100 | +#endif | |
40101 | + debugfs_remove(vnic->dbfs_dir); | |
40102 | + } | |
40103 | +#endif | |
40104 | + return 0; | |
40105 | +} | |
40106 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_msg.c | |
40107 | =================================================================== | |
40108 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
40109 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_msg.c 2008-02-26 10:54:12.000000000 +0100 | |
40110 | @@ -0,0 +1,566 @@ | |
40111 | +/**************************************************************************** | |
40112 | + * Solarflare driver for Xen network acceleration | |
40113 | + * | |
40114 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
40115 | + * 9501 Jeronimo Road, Suite 250, | |
40116 | + * Irvine, CA 92618, USA | |
40117 | + * | |
40118 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
40119 | + * | |
40120 | + * This program is free software; you can redistribute it and/or modify it | |
40121 | + * under the terms of the GNU General Public License version 2 as published | |
40122 | + * by the Free Software Foundation, incorporated herein by reference. | |
40123 | + * | |
40124 | + * This program is distributed in the hope that it will be useful, | |
40125 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
40126 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
40127 | + * GNU General Public License for more details. | |
40128 | + * | |
40129 | + * You should have received a copy of the GNU General Public License | |
40130 | + * along with this program; if not, write to the Free Software | |
40131 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
40132 | + **************************************************************************** | |
40133 | + */ | |
40134 | + | |
40135 | +#include <linux/stddef.h> | |
40136 | +#include <linux/errno.h> | |
40137 | + | |
40138 | +#include <xen/xenbus.h> | |
40139 | + | |
40140 | +#include "accel.h" | |
40141 | +#include "accel_msg_iface.h" | |
40142 | +#include "accel_util.h" | |
40143 | +#include "accel_bufs.h" | |
40144 | + | |
40145 | +#include "netfront.h" /* drivers/xen/netfront/netfront.h */ | |
40146 | + | |
40147 | +static void vnic_start_interrupts(netfront_accel_vnic *vnic) | |
40148 | +{ | |
40149 | + unsigned long flags; | |
40150 | + | |
40151 | + /* Prime our interrupt */ | |
40152 | + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); | |
40153 | + if (!netfront_accel_vi_enable_interrupts(vnic)) { | |
40154 | + /* Cripes, that was quick, better pass it up */ | |
40155 | + netfront_accel_disable_net_interrupts(vnic); | |
40156 | + vnic->irq_enabled = 0; | |
40157 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++); | |
40158 | + netif_rx_schedule(vnic->net_dev); | |
40159 | + } else { | |
40160 | + /* | |
40161 | + * Nothing yet, make sure we get interrupts through | |
40162 | + * back end | |
40163 | + */ | |
40164 | + vnic->irq_enabled = 1; | |
40165 | + netfront_accel_enable_net_interrupts(vnic); | |
40166 | + } | |
40167 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); | |
40168 | +} | |
40169 | + | |
40170 | + | |
40171 | +static void vnic_stop_interrupts(netfront_accel_vnic *vnic) | |
40172 | +{ | |
40173 | + unsigned long flags; | |
40174 | + | |
40175 | + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); | |
40176 | + netfront_accel_disable_net_interrupts(vnic); | |
40177 | + vnic->irq_enabled = 0; | |
40178 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); | |
40179 | +} | |
40180 | + | |
40181 | + | |
40182 | +static void vnic_start_fastpath(netfront_accel_vnic *vnic) | |
40183 | +{ | |
40184 | + struct net_device *net_dev = vnic->net_dev; | |
40185 | + unsigned long flags; | |
40186 | + | |
40187 | + DPRINTK("%s\n", __FUNCTION__); | |
40188 | + | |
40189 | + spin_lock_irqsave(&vnic->tx_lock, flags); | |
40190 | + vnic->tx_enabled = 1; | |
40191 | + spin_unlock_irqrestore(&vnic->tx_lock, flags); | |
40192 | + | |
40193 | + netif_poll_disable(net_dev); | |
40194 | + vnic->poll_enabled = 1; | |
40195 | + netif_poll_enable(net_dev); | |
40196 | + | |
40197 | + vnic_start_interrupts(vnic); | |
40198 | +} | |
40199 | + | |
40200 | + | |
40201 | +void vnic_stop_fastpath(netfront_accel_vnic *vnic) | |
40202 | +{ | |
40203 | + struct net_device *net_dev = vnic->net_dev; | |
40204 | + struct netfront_info *np = (struct netfront_info *)netdev_priv(net_dev); | |
40205 | + unsigned long flags1, flags2; | |
40206 | + | |
40207 | + DPRINTK("%s\n", __FUNCTION__); | |
40208 | + | |
40209 | + vnic_stop_interrupts(vnic); | |
40210 | + | |
40211 | + spin_lock_irqsave(&vnic->tx_lock, flags1); | |
40212 | + vnic->tx_enabled = 0; | |
40213 | + spin_lock_irqsave(&np->tx_lock, flags2); | |
40214 | + if (vnic->tx_skb != NULL) { | |
40215 | + dev_kfree_skb_any(vnic->tx_skb); | |
40216 | + vnic->tx_skb = NULL; | |
40217 | + if (netfront_check_queue_ready(net_dev)) { | |
40218 | + netif_wake_queue(net_dev); | |
40219 | + NETFRONT_ACCEL_STATS_OP | |
40220 | + (vnic->stats.queue_wakes++); | |
40221 | + } | |
40222 | + } | |
40223 | + spin_unlock_irqrestore(&np->tx_lock, flags2); | |
40224 | + spin_unlock_irqrestore(&vnic->tx_lock, flags1); | |
40225 | + | |
40226 | + /* Must prevent polls and hold lock to modify poll_enabled */ | |
40227 | + netif_poll_disable(net_dev); | |
40228 | + spin_lock_irqsave(&vnic->irq_enabled_lock, flags1); | |
40229 | + vnic->poll_enabled = 0; | |
40230 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1); | |
40231 | + netif_poll_enable(net_dev); | |
40232 | +} | |
40233 | + | |
40234 | + | |
40235 | +static void netfront_accel_interface_up(netfront_accel_vnic *vnic) | |
40236 | +{ | |
40237 | + | |
40238 | + if (!vnic->backend_netdev_up) { | |
40239 | + vnic->backend_netdev_up = 1; | |
40240 | + | |
40241 | + if (vnic->frontend_ready) | |
40242 | + vnic_start_fastpath(vnic); | |
40243 | + } | |
40244 | +} | |
40245 | + | |
40246 | + | |
40247 | +static void netfront_accel_interface_down(netfront_accel_vnic *vnic) | |
40248 | +{ | |
40249 | + | |
40250 | + if (vnic->backend_netdev_up) { | |
40251 | + vnic->backend_netdev_up = 0; | |
40252 | + | |
40253 | + if (vnic->frontend_ready) | |
40254 | + vnic_stop_fastpath(vnic); | |
40255 | + } | |
40256 | +} | |
40257 | + | |
40258 | + | |
40259 | +static int vnic_add_bufs(netfront_accel_vnic *vnic, | |
40260 | + struct net_accel_msg *msg) | |
40261 | +{ | |
40262 | + int rc, offset; | |
40263 | + struct netfront_accel_bufinfo *bufinfo; | |
40264 | + | |
40265 | + BUG_ON(msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ); | |
40266 | + | |
40267 | + offset = msg->u.mapbufs.reqid; | |
40268 | + | |
40269 | + if (offset < vnic->bufpages.max_pages - | |
40270 | + (vnic->bufpages.max_pages / sfc_netfront_buffer_split)) { | |
40271 | + bufinfo = vnic->rx_bufs; | |
40272 | + } else | |
40273 | + bufinfo = vnic->tx_bufs; | |
40274 | + | |
40275 | + /* Queue up some Rx buffers to start things off. */ | |
40276 | + if ((rc = netfront_accel_add_bufs(&vnic->bufpages, bufinfo, msg)) == 0) { | |
40277 | + netfront_accel_vi_add_bufs(vnic, bufinfo == vnic->rx_bufs); | |
40278 | + | |
40279 | + if (offset + msg->u.mapbufs.pages == vnic->bufpages.max_pages) { | |
40280 | + VPRINTK("%s: got all buffers back\n", __FUNCTION__); | |
40281 | + vnic->frontend_ready = 1; | |
40282 | + if (vnic->backend_netdev_up) | |
40283 | + vnic_start_fastpath(vnic); | |
40284 | + } else { | |
40285 | + VPRINTK("%s: got buffers back %d %d\n", __FUNCTION__, | |
40286 | + offset, msg->u.mapbufs.pages); | |
40287 | + } | |
40288 | + } | |
40289 | + | |
40290 | + return rc; | |
40291 | +} | |
40292 | + | |
40293 | + | |
40294 | +/* The largest [o] such that (1u << o) <= n. Requires n > 0. */ | |
40295 | + | |
40296 | +inline unsigned log2_le(unsigned long n) { | |
40297 | + unsigned order = 1; | |
40298 | + while ((1ul << order) <= n) ++order; | |
40299 | + return (order - 1); | |
40300 | +} | |
40301 | + | |
40302 | +static int vnic_send_buffer_requests(netfront_accel_vnic *vnic, | |
40303 | + struct netfront_accel_bufpages *bufpages) | |
40304 | +{ | |
40305 | + int pages, offset, rc = 0, sent = 0; | |
40306 | + struct net_accel_msg msg; | |
40307 | + | |
40308 | + while (bufpages->page_reqs < bufpages->max_pages) { | |
40309 | + offset = bufpages->page_reqs; | |
40310 | + | |
40311 | + pages = pow2(log2_le(bufpages->max_pages - | |
40312 | + bufpages->page_reqs)); | |
40313 | + pages = pages < NET_ACCEL_MSG_MAX_PAGE_REQ ? | |
40314 | + pages : NET_ACCEL_MSG_MAX_PAGE_REQ; | |
40315 | + | |
40316 | + BUG_ON(offset < 0); | |
40317 | + BUG_ON(pages <= 0); | |
40318 | + | |
40319 | + rc = netfront_accel_buf_map_request(vnic->dev, bufpages, | |
40320 | + &msg, pages, offset); | |
40321 | + if (rc == 0) { | |
40322 | + rc = net_accel_msg_send(vnic->shared_page, | |
40323 | + &vnic->to_dom0, &msg); | |
40324 | + if (rc < 0) { | |
40325 | + VPRINTK("%s: queue full, stopping for now\n", | |
40326 | + __FUNCTION__); | |
40327 | + break; | |
40328 | + } | |
40329 | + sent++; | |
40330 | + } else { | |
40331 | + EPRINTK("%s: problem with grant, stopping for now\n", | |
40332 | + __FUNCTION__); | |
40333 | + break; | |
40334 | + } | |
40335 | + | |
40336 | + bufpages->page_reqs += pages; | |
40337 | + } | |
40338 | + | |
40339 | + if (sent) | |
40340 | + net_accel_msg_notify(vnic->msg_channel_irq); | |
40341 | + | |
40342 | + return rc; | |
40343 | +} | |
40344 | + | |
40345 | + | |
40346 | +/* | |
40347 | + * In response to dom0 saying "my queue is full", we reply with this | |
40348 | + * when it is no longer full | |
40349 | + */ | |
40350 | +inline void vnic_set_queue_not_full(netfront_accel_vnic *vnic) | |
40351 | +{ | |
40352 | + | |
40353 | + if (test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B, | |
40354 | + (unsigned long *)&vnic->shared_page->aflags)) | |
40355 | + notify_remote_via_irq(vnic->msg_channel_irq); | |
40356 | + else | |
40357 | + VPRINTK("queue not full bit already set, not signalling\n"); | |
40358 | +} | |
40359 | + | |
40360 | +/* | |
40361 | + * Notify dom0 that the queue we want to use is full, it should | |
40362 | + * respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course | |
40363 | + */ | |
40364 | +inline void vnic_set_queue_full(netfront_accel_vnic *vnic) | |
40365 | +{ | |
40366 | + | |
40367 | + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B, | |
40368 | + (unsigned long *)&vnic->shared_page->aflags)) | |
40369 | + notify_remote_via_irq(vnic->msg_channel_irq); | |
40370 | + else | |
40371 | + VPRINTK("queue full bit already set, not signalling\n"); | |
40372 | +} | |
40373 | + | |
40374 | + | |
40375 | +static int vnic_check_hello_version(unsigned version) | |
40376 | +{ | |
40377 | + if (version > NET_ACCEL_MSG_VERSION) { | |
40378 | + /* Newer protocol, we must refuse */ | |
40379 | + return -EPROTO; | |
40380 | + } | |
40381 | + | |
40382 | + if (version < NET_ACCEL_MSG_VERSION) { | |
40383 | + /* | |
40384 | + * We are newer, so have discretion to accept if we | |
40385 | + * wish. For now however, just reject | |
40386 | + */ | |
40387 | + return -EPROTO; | |
40388 | + } | |
40389 | + | |
40390 | + BUG_ON(version != NET_ACCEL_MSG_VERSION); | |
40391 | + return 0; | |
40392 | +} | |
40393 | + | |
40394 | + | |
40395 | +static int vnic_process_hello_msg(netfront_accel_vnic *vnic, | |
40396 | + struct net_accel_msg *msg) | |
40397 | +{ | |
40398 | + int err = 0; | |
40399 | + unsigned pages = sfc_netfront_max_pages; | |
40400 | + | |
40401 | + if (vnic_check_hello_version(msg->u.hello.version) < 0) { | |
40402 | + msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY | |
40403 | + | NET_ACCEL_MSG_ERROR; | |
40404 | + msg->u.hello.version = NET_ACCEL_MSG_VERSION; | |
40405 | + } else { | |
40406 | + vnic->backend_netdev_up | |
40407 | + = vnic->shared_page->net_dev_up; | |
40408 | + | |
40409 | + msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY; | |
40410 | + msg->u.hello.version = NET_ACCEL_MSG_VERSION; | |
40411 | + if (msg->u.hello.max_pages && | |
40412 | + msg->u.hello.max_pages < pages) | |
40413 | + pages = msg->u.hello.max_pages; | |
40414 | + msg->u.hello.max_pages = pages; | |
40415 | + | |
40416 | + /* Half of pages for rx, half for tx */ | |
40417 | + err = netfront_accel_alloc_buffer_mem(&vnic->bufpages, | |
40418 | + vnic->rx_bufs, | |
40419 | + vnic->tx_bufs, | |
40420 | + pages); | |
40421 | + if (err) | |
40422 | + msg->id |= NET_ACCEL_MSG_ERROR; | |
40423 | + } | |
40424 | + | |
40425 | + /* Send reply */ | |
40426 | + net_accel_msg_reply_notify(vnic->shared_page, vnic->msg_channel_irq, | |
40427 | + &vnic->to_dom0, msg); | |
40428 | + return err; | |
40429 | +} | |
40430 | + | |
40431 | + | |
40432 | +static int vnic_process_localmac_msg(netfront_accel_vnic *vnic, | |
40433 | + struct net_accel_msg *msg) | |
40434 | +{ | |
40435 | + unsigned long flags; | |
40436 | + cuckoo_hash_mac_key key; | |
40437 | + | |
40438 | + if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) { | |
40439 | + DPRINTK("MAC has moved, could be local: " MAC_FMT "\n", | |
40440 | + MAC_ARG(msg->u.localmac.mac)); | |
40441 | + key = cuckoo_mac_to_key(msg->u.localmac.mac); | |
40442 | + spin_lock_irqsave(&vnic->table_lock, flags); | |
40443 | + /* Try to remove it, not a big deal if not there */ | |
40444 | + cuckoo_hash_remove(&vnic->fastpath_table, | |
40445 | + (cuckoo_hash_key *)&key); | |
40446 | + spin_unlock_irqrestore(&vnic->table_lock, flags); | |
40447 | + } | |
40448 | + | |
40449 | + return 0; | |
40450 | +} | |
40451 | + | |
40452 | + | |
40453 | +static | |
40454 | +int vnic_process_rx_msg(netfront_accel_vnic *vnic, | |
40455 | + struct net_accel_msg *msg) | |
40456 | +{ | |
40457 | + int err; | |
40458 | + | |
40459 | + switch (msg->id) { | |
40460 | + case NET_ACCEL_MSG_HELLO: | |
40461 | + /* Hello, reply with Reply */ | |
40462 | + DPRINTK("got Hello, with version %.8x\n", | |
40463 | + msg->u.hello.version); | |
40464 | + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_NONE); | |
40465 | + err = vnic_process_hello_msg(vnic, msg); | |
40466 | + if (err == 0) | |
40467 | + vnic->msg_state = NETFRONT_ACCEL_MSG_HELLO; | |
40468 | + break; | |
40469 | + case NET_ACCEL_MSG_SETHW: | |
40470 | + /* Hardware info message */ | |
40471 | + DPRINTK("got H/W info\n"); | |
40472 | + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HELLO); | |
40473 | + err = netfront_accel_vi_init(vnic, &msg->u.hw); | |
40474 | + if (err == 0) | |
40475 | + vnic->msg_state = NETFRONT_ACCEL_MSG_HW; | |
40476 | + break; | |
40477 | + case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY: | |
40478 | + VPRINTK("Got mapped buffers back\n"); | |
40479 | + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW); | |
40480 | + err = vnic_add_bufs(vnic, msg); | |
40481 | + break; | |
40482 | + case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_ERROR: | |
40483 | + /* No buffers. Can't use the fast path. */ | |
40484 | + EPRINTK("Got mapped buffers error. Cannot accelerate.\n"); | |
40485 | + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW); | |
40486 | + err = -EIO; | |
40487 | + break; | |
40488 | + case NET_ACCEL_MSG_LOCALMAC: | |
40489 | + /* Should be add, remove not currently used */ | |
40490 | + EPRINTK_ON(!(msg->u.localmac.flags & NET_ACCEL_MSG_ADD)); | |
40491 | + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW); | |
40492 | + err = vnic_process_localmac_msg(vnic, msg); | |
40493 | + break; | |
40494 | + default: | |
40495 | + EPRINTK("Huh? Message code is 0x%x\n", msg->id); | |
40496 | + err = -EPROTO; | |
40497 | + break; | |
40498 | + } | |
40499 | + | |
40500 | + return err; | |
40501 | +} | |
40502 | + | |
40503 | + | |
40504 | +/* Process an IRQ received from back end driver */ | |
40505 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
40506 | +void netfront_accel_msg_from_bend(struct work_struct *context) | |
40507 | +#else | |
40508 | +void netfront_accel_msg_from_bend(void *context) | |
40509 | +#endif | |
40510 | +{ | |
40511 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
40512 | + netfront_accel_vnic *vnic = | |
40513 | + container_of(context, netfront_accel_vnic, msg_from_bend); | |
40514 | +#else | |
40515 | + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; | |
40516 | +#endif | |
40517 | + struct net_accel_msg msg; | |
40518 | + int err, queue_was_full = 0; | |
40519 | + | |
40520 | + mutex_lock(&vnic->vnic_mutex); | |
40521 | + | |
40522 | + /* | |
40523 | + * This happens when the shared pages have been unmapped but | |
40524 | + * the workqueue has yet to be flushed | |
40525 | + */ | |
40526 | + if (!vnic->dom0_state_is_setup) | |
40527 | + goto unlock_out; | |
40528 | + | |
40529 | + while ((vnic->shared_page->aflags & NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK) | |
40530 | + != 0) { | |
40531 | + if (vnic->shared_page->aflags & | |
40532 | + NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL) { | |
40533 | + /* We've been told there may now be space. */ | |
40534 | + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B, | |
40535 | + (unsigned long *)&vnic->shared_page->aflags); | |
40536 | + } | |
40537 | + | |
40538 | + if (vnic->shared_page->aflags & | |
40539 | + NET_ACCEL_MSG_AFLAGS_QUEUE0FULL) { | |
40540 | + /* | |
40541 | + * There will be space at the end of this | |
40542 | + * function if we can make any. | |
40543 | + */ | |
40544 | + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B, | |
40545 | + (unsigned long *)&vnic->shared_page->aflags); | |
40546 | + queue_was_full = 1; | |
40547 | + } | |
40548 | + | |
40549 | + if (vnic->shared_page->aflags & | |
40550 | + NET_ACCEL_MSG_AFLAGS_NETUPDOWN) { | |
40551 | + DPRINTK("%s: net interface change\n", __FUNCTION__); | |
40552 | + clear_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B, | |
40553 | + (unsigned long *)&vnic->shared_page->aflags); | |
40554 | + if (vnic->shared_page->net_dev_up) | |
40555 | + netfront_accel_interface_up(vnic); | |
40556 | + else | |
40557 | + netfront_accel_interface_down(vnic); | |
40558 | + } | |
40559 | + } | |
40560 | + | |
40561 | + /* Pull msg out of shared memory */ | |
40562 | + while ((err = net_accel_msg_recv(vnic->shared_page, &vnic->from_dom0, | |
40563 | + &msg)) == 0) { | |
40564 | + err = vnic_process_rx_msg(vnic, &msg); | |
40565 | + | |
40566 | + if (err != 0) | |
40567 | + goto done; | |
40568 | + } | |
40569 | + | |
40570 | + /* | |
40571 | + * Send any pending buffer map request messages that we can, | |
40572 | + * and mark domU->dom0 as full if necessary. | |
40573 | + */ | |
40574 | + if (vnic->msg_state == NETFRONT_ACCEL_MSG_HW && | |
40575 | + vnic->bufpages.page_reqs < vnic->bufpages.max_pages) { | |
40576 | + if (vnic_send_buffer_requests(vnic, &vnic->bufpages) == -ENOSPC) | |
40577 | + vnic_set_queue_full(vnic); | |
40578 | + } | |
40579 | + | |
40580 | + /* | |
40581 | + * If there are no messages then this is not an error. It | |
40582 | + * just means that we've finished processing the queue. | |
40583 | + */ | |
40584 | + if (err == -ENOENT) | |
40585 | + err = 0; | |
40586 | + done: | |
40587 | + /* We will now have made space in the dom0->domU queue if we can */ | |
40588 | + if (queue_was_full) | |
40589 | + vnic_set_queue_not_full(vnic); | |
40590 | + | |
40591 | + if (err != 0) { | |
40592 | + EPRINTK("%s returned %d\n", __FUNCTION__, err); | |
40593 | + netfront_accel_set_closing(vnic); | |
40594 | + } | |
40595 | + | |
40596 | + unlock_out: | |
40597 | + mutex_unlock(&vnic->vnic_mutex); | |
40598 | + | |
40599 | + return; | |
40600 | +} | |
40601 | + | |
40602 | + | |
40603 | +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, | |
40604 | + struct pt_regs *unused) | |
40605 | +{ | |
40606 | + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; | |
40607 | + VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename); | |
40608 | + | |
40609 | + queue_work(netfront_accel_workqueue, &vnic->msg_from_bend); | |
40610 | + | |
40611 | + return IRQ_HANDLED; | |
40612 | +} | |
40613 | + | |
40614 | +/* Process an interrupt received from the NIC via backend */ | |
40615 | +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, | |
40616 | + struct pt_regs *unused) | |
40617 | +{ | |
40618 | + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; | |
40619 | + struct net_device *net_dev = vnic->net_dev; | |
40620 | + unsigned long flags; | |
40621 | + | |
40622 | + VPRINTK("net irq %d from device %s\n", irq, vnic->dev->nodename); | |
40623 | + | |
40624 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.irq_count++); | |
40625 | + | |
40626 | + BUG_ON(net_dev==NULL); | |
40627 | + | |
40628 | + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); | |
40629 | + if (vnic->irq_enabled) { | |
40630 | + netfront_accel_disable_net_interrupts(vnic); | |
40631 | + vnic->irq_enabled = 0; | |
40632 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); | |
40633 | + | |
40634 | +#if NETFRONT_ACCEL_STATS | |
40635 | + vnic->stats.poll_schedule_count++; | |
40636 | + if (vnic->stats.event_count_since_irq > | |
40637 | + vnic->stats.events_per_irq_max) | |
40638 | + vnic->stats.events_per_irq_max = | |
40639 | + vnic->stats.event_count_since_irq; | |
40640 | + vnic->stats.event_count_since_irq = 0; | |
40641 | +#endif | |
40642 | + netif_rx_schedule(net_dev); | |
40643 | + } | |
40644 | + else { | |
40645 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); | |
40646 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.useless_irq_count++); | |
40647 | + DPRINTK("%s: irq when disabled\n", __FUNCTION__); | |
40648 | + } | |
40649 | + | |
40650 | + return IRQ_HANDLED; | |
40651 | +} | |
40652 | + | |
40653 | + | |
40654 | +void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac, | |
40655 | + u32 ip, u16 port, u8 protocol) | |
40656 | +{ | |
40657 | + unsigned long lock_state; | |
40658 | + struct net_accel_msg *msg; | |
40659 | + | |
40660 | + msg = net_accel_msg_start_send(vnic->shared_page, &vnic->to_dom0, | |
40661 | + &lock_state); | |
40662 | + | |
40663 | + if (msg == NULL) | |
40664 | + return; | |
40665 | + | |
40666 | + net_accel_msg_init(msg, NET_ACCEL_MSG_FASTPATH); | |
40667 | + msg->u.fastpath.flags = NET_ACCEL_MSG_REMOVE; | |
40668 | + memcpy(msg->u.fastpath.mac, mac, ETH_ALEN); | |
40669 | + | |
40670 | + msg->u.fastpath.port = port; | |
40671 | + msg->u.fastpath.ip = ip; | |
40672 | + msg->u.fastpath.proto = protocol; | |
40673 | + | |
40674 | + net_accel_msg_complete_send_notify(vnic->shared_page, &vnic->to_dom0, | |
40675 | + &lock_state, vnic->msg_channel_irq); | |
40676 | +} | |
40677 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_netfront.c | |
40678 | =================================================================== | |
40679 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
40680 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_netfront.c 2008-02-26 10:54:12.000000000 +0100 | |
40681 | @@ -0,0 +1,319 @@ | |
40682 | +/**************************************************************************** | |
40683 | + * Solarflare driver for Xen network acceleration | |
40684 | + * | |
40685 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
40686 | + * 9501 Jeronimo Road, Suite 250, | |
40687 | + * Irvine, CA 92618, USA | |
40688 | + * | |
40689 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
40690 | + * | |
40691 | + * This program is free software; you can redistribute it and/or modify it | |
40692 | + * under the terms of the GNU General Public License version 2 as published | |
40693 | + * by the Free Software Foundation, incorporated herein by reference. | |
40694 | + * | |
40695 | + * This program is distributed in the hope that it will be useful, | |
40696 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
40697 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
40698 | + * GNU General Public License for more details. | |
40699 | + * | |
40700 | + * You should have received a copy of the GNU General Public License | |
40701 | + * along with this program; if not, write to the Free Software | |
40702 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
40703 | + **************************************************************************** | |
40704 | + */ | |
40705 | + | |
40706 | +#include <linux/skbuff.h> | |
40707 | +#include <linux/netdevice.h> | |
40708 | + | |
40709 | +/* drivers/xen/netfront/netfront.h */ | |
40710 | +#include "netfront.h" | |
40711 | + | |
40712 | +#include "accel.h" | |
40713 | +#include "accel_bufs.h" | |
40714 | +#include "accel_util.h" | |
40715 | +#include "accel_msg_iface.h" | |
40716 | +#include "accel_ssr.h" | |
40717 | + | |
40718 | +#ifdef EFX_GCOV | |
40719 | +#include "gcov.h" | |
40720 | +#endif | |
40721 | + | |
40722 | +#define NETFRONT_ACCEL_VNIC_FROM_NETDEV(_nd) \ | |
40723 | + ((netfront_accel_vnic *)((struct netfront_info *)netdev_priv(net_dev))->accel_priv) | |
40724 | + | |
40725 | +static int netfront_accel_netdev_start_xmit(struct sk_buff *skb, | |
40726 | + struct net_device *net_dev) | |
40727 | +{ | |
40728 | + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); | |
40729 | + struct netfront_info *np = | |
40730 | + (struct netfront_info *)netdev_priv(net_dev); | |
40731 | + int handled, rc; | |
40732 | + unsigned long flags1, flags2; | |
40733 | + | |
40734 | + BUG_ON(vnic == NULL); | |
40735 | + | |
40736 | + /* Take our tx lock and hold for the duration */ | |
40737 | + spin_lock_irqsave(&vnic->tx_lock, flags1); | |
40738 | + | |
40739 | + if (!vnic->tx_enabled) { | |
40740 | + rc = 0; | |
40741 | + goto unlock_out; | |
40742 | + } | |
40743 | + | |
40744 | + handled = netfront_accel_vi_tx_post(vnic, skb); | |
40745 | + if (handled == NETFRONT_ACCEL_STATUS_BUSY) { | |
40746 | + BUG_ON(vnic->net_dev != net_dev); | |
40747 | + DPRINTK("%s stopping queue\n", __FUNCTION__); | |
40748 | + | |
40749 | + /* Netfront's lock protects tx_skb */ | |
40750 | + spin_lock_irqsave(&np->tx_lock, flags2); | |
40751 | + BUG_ON(vnic->tx_skb != NULL); | |
40752 | + vnic->tx_skb = skb; | |
40753 | + netif_stop_queue(net_dev); | |
40754 | + spin_unlock_irqrestore(&np->tx_lock, flags2); | |
40755 | + | |
40756 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.queue_stops++); | |
40757 | + } | |
40758 | + | |
40759 | + if (handled == NETFRONT_ACCEL_STATUS_CANT) | |
40760 | + rc = 0; | |
40761 | + else | |
40762 | + rc = 1; | |
40763 | + | |
40764 | +unlock_out: | |
40765 | + spin_unlock_irqrestore(&vnic->tx_lock, flags1); | |
40766 | + | |
40767 | + return rc; | |
40768 | +} | |
40769 | + | |
40770 | + | |
40771 | +static int netfront_accel_netdev_poll(struct net_device *net_dev, int *budget) | |
40772 | +{ | |
40773 | + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); | |
40774 | + int rx_allowed = *budget, rx_done; | |
40775 | + | |
40776 | + BUG_ON(vnic == NULL); | |
40777 | + | |
40778 | + /* Can check this without lock as modifier excludes polls */ | |
40779 | + if (!vnic->poll_enabled) | |
40780 | + return 0; | |
40781 | + | |
40782 | + rx_done = netfront_accel_vi_poll(vnic, rx_allowed); | |
40783 | + *budget -= rx_done; | |
40784 | + | |
40785 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_call_count++); | |
40786 | + | |
40787 | + VPRINTK("%s: done %d allowed %d\n", | |
40788 | + __FUNCTION__, rx_done, rx_allowed); | |
40789 | + | |
40790 | + netfront_accel_ssr_end_of_burst(vnic, &vnic->ssr_state); | |
40791 | + | |
40792 | + if (rx_done < rx_allowed) { | |
40793 | + return 0; /* Done */ | |
40794 | + } | |
40795 | + | |
40796 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_reschedule_count++); | |
40797 | + | |
40798 | + return 1; /* More to do. */ | |
40799 | +} | |
40800 | + | |
40801 | + | |
40802 | +/* | |
40803 | + * Process request from netfront to start napi interrupt | |
40804 | + * mode. (i.e. enable interrupts as it's finished polling) | |
40805 | + */ | |
40806 | +static int netfront_accel_start_napi_interrupts(struct net_device *net_dev) | |
40807 | +{ | |
40808 | + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); | |
40809 | + unsigned long flags; | |
40810 | + | |
40811 | + BUG_ON(vnic == NULL); | |
40812 | + | |
40813 | + /* | |
40814 | + * Can check this without lock as writer excludes poll before | |
40815 | + * modifying | |
40816 | + */ | |
40817 | + if (!vnic->poll_enabled) | |
40818 | + return 0; | |
40819 | + | |
40820 | + if (!netfront_accel_vi_enable_interrupts(vnic)) { | |
40821 | + /* | |
40822 | + * There was something there, tell caller we had | |
40823 | + * something to do. | |
40824 | + */ | |
40825 | + return 1; | |
40826 | + } | |
40827 | + | |
40828 | + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); | |
40829 | + vnic->irq_enabled = 1; | |
40830 | + netfront_accel_enable_net_interrupts(vnic); | |
40831 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); | |
40832 | + | |
40833 | + return 0; | |
40834 | +} | |
40835 | + | |
40836 | + | |
40837 | +/* | |
40838 | + * Process request from netfront to stop napi interrupt | |
40839 | + * mode. (i.e. disable interrupts as it's starting to poll | |
40840 | + */ | |
40841 | +static void netfront_accel_stop_napi_interrupts(struct net_device *net_dev) | |
40842 | +{ | |
40843 | + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); | |
40844 | + unsigned long flags; | |
40845 | + | |
40846 | + BUG_ON(vnic == NULL); | |
40847 | + | |
40848 | + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); | |
40849 | + | |
40850 | + if (!vnic->poll_enabled) { | |
40851 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); | |
40852 | + return; | |
40853 | + } | |
40854 | + | |
40855 | + netfront_accel_disable_net_interrupts(vnic); | |
40856 | + vnic->irq_enabled = 0; | |
40857 | + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); | |
40858 | +} | |
40859 | + | |
40860 | + | |
40861 | +static int netfront_accel_check_ready(struct net_device *net_dev) | |
40862 | +{ | |
40863 | + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); | |
40864 | + | |
40865 | + BUG_ON(vnic == NULL); | |
40866 | + | |
40867 | + /* This is protected by netfront's lock */ | |
40868 | + return vnic->tx_skb == NULL; | |
40869 | +} | |
40870 | + | |
40871 | + | |
40872 | +static int netfront_accel_get_stats(struct net_device *net_dev, | |
40873 | + struct net_device_stats *stats) | |
40874 | +{ | |
40875 | + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); | |
40876 | + struct netfront_accel_netdev_stats now; | |
40877 | + | |
40878 | + BUG_ON(vnic == NULL); | |
40879 | + | |
40880 | + now.fastpath_rx_pkts = vnic->netdev_stats.fastpath_rx_pkts; | |
40881 | + now.fastpath_rx_bytes = vnic->netdev_stats.fastpath_rx_bytes; | |
40882 | + now.fastpath_rx_errors = vnic->netdev_stats.fastpath_rx_errors; | |
40883 | + now.fastpath_tx_pkts = vnic->netdev_stats.fastpath_tx_pkts; | |
40884 | + now.fastpath_tx_bytes = vnic->netdev_stats.fastpath_tx_bytes; | |
40885 | + now.fastpath_tx_errors = vnic->netdev_stats.fastpath_tx_errors; | |
40886 | + | |
40887 | + stats->rx_packets += (now.fastpath_rx_pkts - | |
40888 | + vnic->stats_last_read.fastpath_rx_pkts); | |
40889 | + stats->rx_bytes += (now.fastpath_rx_bytes - | |
40890 | + vnic->stats_last_read.fastpath_rx_bytes); | |
40891 | + stats->rx_errors += (now.fastpath_rx_errors - | |
40892 | + vnic->stats_last_read.fastpath_rx_errors); | |
40893 | + stats->tx_packets += (now.fastpath_tx_pkts - | |
40894 | + vnic->stats_last_read.fastpath_tx_pkts); | |
40895 | + stats->tx_bytes += (now.fastpath_tx_bytes - | |
40896 | + vnic->stats_last_read.fastpath_tx_bytes); | |
40897 | + stats->tx_errors += (now.fastpath_tx_errors - | |
40898 | + vnic->stats_last_read.fastpath_tx_errors); | |
40899 | + | |
40900 | + vnic->stats_last_read = now; | |
40901 | + | |
40902 | + return 0; | |
40903 | +} | |
40904 | + | |
40905 | + | |
40906 | +struct netfront_accel_hooks accel_hooks = { | |
40907 | + .new_device = &netfront_accel_probe, | |
40908 | + .remove = &netfront_accel_remove, | |
40909 | + .netdev_poll = &netfront_accel_netdev_poll, | |
40910 | + .start_xmit = &netfront_accel_netdev_start_xmit, | |
40911 | + .start_napi_irq = &netfront_accel_start_napi_interrupts, | |
40912 | + .stop_napi_irq = &netfront_accel_stop_napi_interrupts, | |
40913 | + .check_ready = &netfront_accel_check_ready, | |
40914 | + .get_stats = &netfront_accel_get_stats | |
40915 | +}; | |
40916 | + | |
40917 | + | |
40918 | +unsigned sfc_netfront_max_pages = NETFRONT_ACCEL_DEFAULT_BUF_PAGES; | |
40919 | +module_param_named (max_pages, sfc_netfront_max_pages, uint, 0644); | |
40920 | +MODULE_PARM_DESC(max_pages, "Number of buffer pages to request"); | |
40921 | + | |
40922 | +unsigned sfc_netfront_buffer_split = 2; | |
40923 | +module_param_named (buffer_split, sfc_netfront_buffer_split, uint, 0644); | |
40924 | +MODULE_PARM_DESC(buffer_split, | |
40925 | + "Fraction of buffers to use for TX, rest for RX"); | |
40926 | + | |
40927 | + | |
40928 | +const char *frontend_name = "sfc_netfront"; | |
40929 | + | |
40930 | +struct workqueue_struct *netfront_accel_workqueue; | |
40931 | + | |
40932 | +static int __init netfront_accel_init(void) | |
40933 | +{ | |
40934 | + int rc; | |
40935 | +#ifdef EFX_GCOV | |
40936 | + gcov_provider_init(THIS_MODULE); | |
40937 | +#endif | |
40938 | + | |
40939 | + /* | |
40940 | + * If we're running on dom0, netfront hasn't initialised | |
40941 | + * itself, so we need to keep away | |
40942 | + */ | |
40943 | + if (is_initial_xendomain()) | |
40944 | + return 0; | |
40945 | + | |
40946 | + if (!is_pow2(sizeof(struct net_accel_msg))) | |
40947 | + EPRINTK("%s: bad structure size\n", __FUNCTION__); | |
40948 | + | |
40949 | + netfront_accel_workqueue = create_workqueue(frontend_name); | |
40950 | + | |
40951 | + netfront_accel_debugfs_init(); | |
40952 | + | |
40953 | + rc = netfront_accelerator_loaded(NETFRONT_ACCEL_VERSION, | |
40954 | + frontend_name, &accel_hooks); | |
40955 | + | |
40956 | + if (rc < 0) { | |
40957 | + EPRINTK("Xen netfront accelerator version mismatch\n"); | |
40958 | + return -EINVAL; | |
40959 | + } | |
40960 | + | |
40961 | + if (rc > 0) { | |
40962 | + /* | |
40963 | + * In future may want to add backwards compatibility | |
40964 | + * and accept certain subsets of previous versions | |
40965 | + */ | |
40966 | + EPRINTK("Xen netfront accelerator version mismatch\n"); | |
40967 | + return -EINVAL; | |
40968 | + } | |
40969 | + | |
40970 | + return 0; | |
40971 | +} | |
40972 | +module_init(netfront_accel_init); | |
40973 | + | |
40974 | +static void __exit netfront_accel_exit(void) | |
40975 | +{ | |
40976 | + if (is_initial_xendomain()) | |
40977 | + return; | |
40978 | + | |
40979 | + DPRINTK("%s: unhooking\n", __FUNCTION__); | |
40980 | + | |
40981 | + /* Unhook from normal netfront */ | |
40982 | + netfront_accelerator_stop(frontend_name); | |
40983 | + | |
40984 | + DPRINTK("%s: done\n", __FUNCTION__); | |
40985 | + | |
40986 | + netfront_accel_debugfs_fini(); | |
40987 | + | |
40988 | + flush_workqueue(netfront_accel_workqueue); | |
40989 | + | |
40990 | + destroy_workqueue(netfront_accel_workqueue); | |
40991 | + | |
40992 | +#ifdef EFX_GCOV | |
40993 | + gcov_provider_fini(THIS_MODULE); | |
40994 | +#endif | |
40995 | + return; | |
40996 | +} | |
40997 | +module_exit(netfront_accel_exit); | |
40998 | + | |
40999 | +MODULE_LICENSE("GPL"); | |
41000 | + | |
41001 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.c | |
41002 | =================================================================== | |
41003 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
41004 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.c 2008-02-20 09:32:49.000000000 +0100 | |
41005 | @@ -0,0 +1,308 @@ | |
41006 | +/**************************************************************************** | |
41007 | + * Solarflare driver for Xen network acceleration | |
41008 | + * | |
41009 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
41010 | + * 9501 Jeronimo Road, Suite 250, | |
41011 | + * Irvine, CA 92618, USA | |
41012 | + * | |
41013 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
41014 | + * | |
41015 | + * This program is free software; you can redistribute it and/or modify it | |
41016 | + * under the terms of the GNU General Public License version 2 as published | |
41017 | + * by the Free Software Foundation, incorporated herein by reference. | |
41018 | + * | |
41019 | + * This program is distributed in the hope that it will be useful, | |
41020 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
41021 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
41022 | + * GNU General Public License for more details. | |
41023 | + * | |
41024 | + * You should have received a copy of the GNU General Public License | |
41025 | + * along with this program; if not, write to the Free Software | |
41026 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
41027 | + **************************************************************************** | |
41028 | + */ | |
41029 | + | |
41030 | +#include <linux/socket.h> | |
41031 | +#include <linux/in.h> | |
41032 | +#include <linux/ip.h> | |
41033 | +#include <linux/tcp.h> | |
41034 | +#include <linux/list.h> | |
41035 | +#include <net/ip.h> | |
41036 | +#include <net/checksum.h> | |
41037 | + | |
41038 | +#include "accel.h" | |
41039 | +#include "accel_util.h" | |
41040 | +#include "accel_bufs.h" | |
41041 | + | |
41042 | +#include "accel_ssr.h" | |
41043 | + | |
41044 | +static inline int list_valid(struct list_head *lh) { | |
41045 | + return(lh->next != NULL); | |
41046 | +} | |
41047 | + | |
41048 | +static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic, | |
41049 | + struct netfront_accel_ssr_state *st, | |
41050 | + struct netfront_accel_ssr_conn *c); | |
41051 | + | |
41052 | +/** Construct an efx_ssr_state. | |
41053 | + * | |
41054 | + * @v st The SSR state (per channel per port) | |
41055 | + * @v port The port. | |
41056 | + */ | |
41057 | +void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) { | |
41058 | + unsigned i; | |
41059 | + | |
41060 | + INIT_LIST_HEAD(&st->conns); | |
41061 | + INIT_LIST_HEAD(&st->free_conns); | |
41062 | + for (i = 0; i < 8; ++i) { | |
41063 | + struct netfront_accel_ssr_conn *c = | |
41064 | + kmalloc(sizeof(*c), GFP_KERNEL); | |
41065 | + if (c == NULL) break; | |
41066 | + c->n_in_order_pkts = 0; | |
41067 | + c->skb = NULL; | |
41068 | + list_add(&c->link, &st->free_conns); | |
41069 | + } | |
41070 | + | |
41071 | +} | |
41072 | + | |
41073 | + | |
41074 | +/** Destructor for an efx_ssr_state. | |
41075 | + * | |
41076 | + * @v st The SSR state (per channel per port) | |
41077 | + */ | |
41078 | +void netfront_accel_ssr_fini(netfront_accel_vnic *vnic, | |
41079 | + struct netfront_accel_ssr_state *st) { | |
41080 | + struct netfront_accel_ssr_conn *c; | |
41081 | + | |
41082 | + /* Return cleanly if efx_ssr_init() not previously called */ | |
41083 | + BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns)); | |
41084 | + if (! list_valid(&st->conns)) | |
41085 | + return; | |
41086 | + | |
41087 | + while ( ! list_empty(&st->free_conns)) { | |
41088 | + c = list_entry(st->free_conns.prev, | |
41089 | + struct netfront_accel_ssr_conn, link); | |
41090 | + list_del(&c->link); | |
41091 | + BUG_ON(c->skb != NULL); | |
41092 | + kfree(c); | |
41093 | + } | |
41094 | + while ( ! list_empty(&st->conns)) { | |
41095 | + c = list_entry(st->conns.prev, | |
41096 | + struct netfront_accel_ssr_conn, link); | |
41097 | + list_del(&c->link); | |
41098 | + if (c->skb) | |
41099 | + netfront_accel_ssr_deliver(vnic, st, c); | |
41100 | + kfree(c); | |
41101 | + } | |
41102 | +} | |
41103 | + | |
41104 | + | |
41105 | +/** Calc IP checksum and deliver to the OS | |
41106 | + * | |
41107 | + * @v st The SSR state (per channel per port) | |
41108 | + * @v c The SSR connection state | |
41109 | + */ | |
41110 | +static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic, | |
41111 | + struct netfront_accel_ssr_state *st, | |
41112 | + struct netfront_accel_ssr_conn *c) { | |
41113 | + BUG_ON(c->skb == NULL); | |
41114 | + | |
41115 | + /* | |
41116 | + * If we've chained packets together, recalculate the IP | |
41117 | + * checksum. | |
41118 | + */ | |
41119 | + if (skb_shinfo(c->skb)->frag_list) { | |
41120 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts); | |
41121 | + c->iph->check = 0; | |
41122 | + c->iph->check = ip_fast_csum((unsigned char *) c->iph, | |
41123 | + c->iph->ihl); | |
41124 | + } | |
41125 | + | |
41126 | + VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len); | |
41127 | + | |
41128 | + netif_receive_skb(c->skb); | |
41129 | + c->skb = NULL; | |
41130 | +} | |
41131 | + | |
41132 | + | |
41133 | +/** Push held skbs down into network stack. | |
41134 | + * | |
41135 | + * @v st SSR state | |
41136 | + * | |
41137 | + * Only called if we are tracking one or more connections. | |
41138 | + */ | |
41139 | +void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic, | |
41140 | + struct netfront_accel_ssr_state *st) { | |
41141 | + struct netfront_accel_ssr_conn *c; | |
41142 | + | |
41143 | + BUG_ON(list_empty(&st->conns)); | |
41144 | + | |
41145 | + list_for_each_entry(c, &st->conns, link) | |
41146 | + if (c->skb) | |
41147 | + netfront_accel_ssr_deliver(vnic, st, c); | |
41148 | + | |
41149 | + /* Time-out connections that have received no traffic for 20ms. */ | |
41150 | + c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn, | |
41151 | + link); | |
41152 | + if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) { | |
41153 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream); | |
41154 | + list_del(&c->link); | |
41155 | + list_add(&c->link, &st->free_conns); | |
41156 | + } | |
41157 | +} | |
41158 | + | |
41159 | + | |
41160 | +/** Process SKB and decide whether to dispatch it to the stack now or | |
41161 | + * later. | |
41162 | + * | |
41163 | + * @v st SSR state | |
41164 | + * @v skb SKB to exmaine | |
41165 | + * @ret rc 0 => deliver SKB to kernel now, otherwise the SKB belongs | |
41166 | + * us. | |
41167 | + */ | |
41168 | +int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic, | |
41169 | + struct netfront_accel_ssr_state *st, | |
41170 | + struct sk_buff *skb) { | |
41171 | + int data_length, dont_merge; | |
41172 | + struct netfront_accel_ssr_conn *c; | |
41173 | + struct iphdr *iph; | |
41174 | + struct tcphdr *th; | |
41175 | + unsigned th_seq; | |
41176 | + | |
41177 | + BUG_ON(skb_shinfo(skb)->frag_list != NULL); | |
41178 | + BUG_ON(skb->next != NULL); | |
41179 | + | |
41180 | + /* We're not interested if it isn't TCP over IPv4. */ | |
41181 | + iph = (struct iphdr *) skb->data; | |
41182 | + if (skb->protocol != htons(ETH_P_IP) || | |
41183 | + iph->protocol != IPPROTO_TCP) { | |
41184 | + return 0; | |
41185 | + } | |
41186 | + | |
41187 | + /* Ignore segments that fail csum or are fragmented. */ | |
41188 | + if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) | | |
41189 | + (iph->frag_off & htons(IP_MF | IP_OFFSET)))) { | |
41190 | + return 0; | |
41191 | + } | |
41192 | + | |
41193 | + th = (struct tcphdr*)(skb->data + iph->ihl * 4); | |
41194 | + data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4; | |
41195 | + th_seq = ntohl(th->seq); | |
41196 | + dont_merge = (data_length == 0) | th->urg | th->syn | th->rst; | |
41197 | + | |
41198 | + list_for_each_entry(c, &st->conns, link) { | |
41199 | + if ((c->saddr - iph->saddr) | | |
41200 | + (c->daddr - iph->daddr) | | |
41201 | + (c->source - th->source) | | |
41202 | + (c->dest - th->dest )) | |
41203 | + continue; | |
41204 | + | |
41205 | + /* Re-insert at head of list to reduce lookup time. */ | |
41206 | + list_del(&c->link); | |
41207 | + list_add(&c->link, &st->conns); | |
41208 | + c->last_pkt_jiffies = jiffies; | |
41209 | + | |
41210 | + if (unlikely(th_seq - c->next_seq)) { | |
41211 | + /* Out-of-order, so start counting again. */ | |
41212 | + if (c->skb) | |
41213 | + netfront_accel_ssr_deliver(vnic, st, c); | |
41214 | + c->n_in_order_pkts = 0; | |
41215 | + c->next_seq = th_seq + data_length; | |
41216 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder); | |
41217 | + return 0; | |
41218 | + } | |
41219 | + c->next_seq = th_seq + data_length; | |
41220 | + | |
41221 | + if (++c->n_in_order_pkts < 300) { | |
41222 | + /* May be in slow-start, so don't merge. */ | |
41223 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start); | |
41224 | + return 0; | |
41225 | + } | |
41226 | + | |
41227 | + if (unlikely(dont_merge)) { | |
41228 | + if (c->skb) | |
41229 | + netfront_accel_ssr_deliver(vnic, st, c); | |
41230 | + return 0; | |
41231 | + } | |
41232 | + | |
41233 | + if (c->skb) { | |
41234 | + c->iph->tot_len = ntohs(c->iph->tot_len); | |
41235 | + c->iph->tot_len += data_length; | |
41236 | + c->iph->tot_len = htons(c->iph->tot_len); | |
41237 | + c->th->ack_seq = th->ack_seq; | |
41238 | + c->th->fin |= th->fin; | |
41239 | + c->th->psh |= th->psh; | |
41240 | + c->th->window = th->window; | |
41241 | + | |
41242 | + /* Remove the headers from this skb. */ | |
41243 | + skb_pull(skb, skb->len - data_length); | |
41244 | + | |
41245 | + /* | |
41246 | + * Tack the new skb onto the head skb's frag_list. | |
41247 | + * This is exactly the format that fragmented IP | |
41248 | + * datagrams are reassembled into. | |
41249 | + */ | |
41250 | + BUG_ON(skb->next != 0); | |
41251 | + if ( ! skb_shinfo(c->skb)->frag_list) | |
41252 | + skb_shinfo(c->skb)->frag_list = skb; | |
41253 | + else | |
41254 | + c->skb_tail->next = skb; | |
41255 | + c->skb_tail = skb; | |
41256 | + c->skb->len += skb->len; | |
41257 | + c->skb->data_len += skb->len; | |
41258 | + c->skb->truesize += skb->truesize; | |
41259 | + | |
41260 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges); | |
41261 | + | |
41262 | + /* | |
41263 | + * If the next packet might push this super-packet | |
41264 | + * over the limit for an IP packet, deliver it now. | |
41265 | + * This is slightly conservative, but close enough. | |
41266 | + */ | |
41267 | + if (c->skb->len + | |
41268 | + (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) | |
41269 | + > 16384) | |
41270 | + netfront_accel_ssr_deliver(vnic, st, c); | |
41271 | + | |
41272 | + return 1; | |
41273 | + } | |
41274 | + else { | |
41275 | + c->iph = iph; | |
41276 | + c->th = th; | |
41277 | + c->skb = skb; | |
41278 | + return 1; | |
41279 | + } | |
41280 | + } | |
41281 | + | |
41282 | + /* We're not yet tracking this connection. */ | |
41283 | + | |
41284 | + if (dont_merge) { | |
41285 | + return 0; | |
41286 | + } | |
41287 | + | |
41288 | + if (list_empty(&st->free_conns)) { | |
41289 | + c = list_entry(st->conns.prev, | |
41290 | + struct netfront_accel_ssr_conn, | |
41291 | + link); | |
41292 | + if (c->skb) { | |
41293 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many); | |
41294 | + return 0; | |
41295 | + } | |
41296 | + } | |
41297 | + else { | |
41298 | + c = list_entry(st->free_conns.next, | |
41299 | + struct netfront_accel_ssr_conn, | |
41300 | + link); | |
41301 | + } | |
41302 | + list_del(&c->link); | |
41303 | + list_add(&c->link, &st->conns); | |
41304 | + c->saddr = iph->saddr; | |
41305 | + c->daddr = iph->daddr; | |
41306 | + c->source = th->source; | |
41307 | + c->dest = th->dest; | |
41308 | + c->next_seq = th_seq + data_length; | |
41309 | + c->n_in_order_pkts = 0; | |
41310 | + BUG_ON(c->skb != NULL); | |
41311 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream); | |
41312 | + return 0; | |
41313 | +} | |
41314 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.h | |
41315 | =================================================================== | |
41316 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
41317 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_ssr.h 2008-02-20 09:32:49.000000000 +0100 | |
41318 | @@ -0,0 +1,88 @@ | |
41319 | +/**************************************************************************** | |
41320 | + * Solarflare driver for Xen network acceleration | |
41321 | + * | |
41322 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
41323 | + * 9501 Jeronimo Road, Suite 250, | |
41324 | + * Irvine, CA 92618, USA | |
41325 | + * | |
41326 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
41327 | + * | |
41328 | + * This program is free software; you can redistribute it and/or modify it | |
41329 | + * under the terms of the GNU General Public License version 2 as published | |
41330 | + * by the Free Software Foundation, incorporated herein by reference. | |
41331 | + * | |
41332 | + * This program is distributed in the hope that it will be useful, | |
41333 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
41334 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
41335 | + * GNU General Public License for more details. | |
41336 | + * | |
41337 | + * You should have received a copy of the GNU General Public License | |
41338 | + * along with this program; if not, write to the Free Software | |
41339 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
41340 | + **************************************************************************** | |
41341 | + */ | |
41342 | + | |
41343 | +#ifndef NETFRONT_ACCEL_SSR_H | |
41344 | +#define NETFRONT_ACCEL_SSR_H | |
41345 | + | |
41346 | +#include <linux/skbuff.h> | |
41347 | +#include <linux/ip.h> | |
41348 | +#include <linux/tcp.h> | |
41349 | +#include <linux/list.h> | |
41350 | + | |
41351 | +#include "accel.h" | |
41352 | + | |
41353 | +/** State for Soft Segment Reassembly (SSR). */ | |
41354 | + | |
41355 | +struct netfront_accel_ssr_conn { | |
41356 | + struct list_head link; | |
41357 | + | |
41358 | + unsigned saddr, daddr; | |
41359 | + unsigned short source, dest; | |
41360 | + | |
41361 | + /** Number of in-order packets we've seen with payload. */ | |
41362 | + unsigned n_in_order_pkts; | |
41363 | + | |
41364 | + /** Next in-order sequence number. */ | |
41365 | + unsigned next_seq; | |
41366 | + | |
41367 | + /** Time we last saw a packet on this connection. */ | |
41368 | + unsigned long last_pkt_jiffies; | |
41369 | + | |
41370 | + /** The SKB we are currently holding. If NULL, then all following | |
41371 | + * fields are undefined. | |
41372 | + */ | |
41373 | + struct sk_buff *skb; | |
41374 | + | |
41375 | + /** The tail of the frag_list of SKBs we're holding. Only valid | |
41376 | + * after at least one merge. | |
41377 | + */ | |
41378 | + struct sk_buff *skb_tail; | |
41379 | + | |
41380 | + /** The IP header of the skb we are holding. */ | |
41381 | + struct iphdr *iph; | |
41382 | + | |
41383 | + /** The TCP header of the skb we are holding. */ | |
41384 | + struct tcphdr *th; | |
41385 | +}; | |
41386 | + | |
41387 | +extern void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st); | |
41388 | +extern void netfront_accel_ssr_fini(netfront_accel_vnic *vnic, | |
41389 | + struct netfront_accel_ssr_state *st); | |
41390 | + | |
41391 | +extern void | |
41392 | +__netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic, | |
41393 | + struct netfront_accel_ssr_state *st); | |
41394 | + | |
41395 | +extern int netfront_accel_ssr_skb(netfront_accel_vnic *vnic, | |
41396 | + struct netfront_accel_ssr_state *st, | |
41397 | + struct sk_buff *skb); | |
41398 | + | |
41399 | +static inline void | |
41400 | +netfront_accel_ssr_end_of_burst (netfront_accel_vnic *vnic, | |
41401 | + struct netfront_accel_ssr_state *st) { | |
41402 | + if ( ! list_empty(&st->conns) ) | |
41403 | + __netfront_accel_ssr_end_of_burst(vnic, st); | |
41404 | +} | |
41405 | + | |
41406 | +#endif /* NETFRONT_ACCEL_SSR_H */ | |
41407 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.c | |
41408 | =================================================================== | |
41409 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
41410 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.c 2008-02-26 10:54:12.000000000 +0100 | |
41411 | @@ -0,0 +1,511 @@ | |
41412 | +/**************************************************************************** | |
41413 | + * Solarflare driver for Xen network acceleration | |
41414 | + * | |
41415 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
41416 | + * 9501 Jeronimo Road, Suite 250, | |
41417 | + * Irvine, CA 92618, USA | |
41418 | + * | |
41419 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
41420 | + * | |
41421 | + * This program is free software; you can redistribute it and/or modify it | |
41422 | + * under the terms of the GNU General Public License version 2 as published | |
41423 | + * by the Free Software Foundation, incorporated herein by reference. | |
41424 | + * | |
41425 | + * This program is distributed in the hope that it will be useful, | |
41426 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
41427 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
41428 | + * GNU General Public License for more details. | |
41429 | + * | |
41430 | + * You should have received a copy of the GNU General Public License | |
41431 | + * along with this program; if not, write to the Free Software | |
41432 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
41433 | + **************************************************************************** | |
41434 | + */ | |
41435 | + | |
41436 | +#include <linux/pci.h> | |
41437 | +#include <linux/tcp.h> | |
41438 | +#include <linux/ip.h> | |
41439 | +#include <linux/in.h> | |
41440 | +#include <linux/if_ether.h> | |
41441 | + | |
41442 | +#include "accel.h" | |
41443 | +#include "accel_util.h" | |
41444 | + | |
41445 | +#include "accel_tso.h" | |
41446 | + | |
41447 | +#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2)) | |
41448 | +#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data) | |
41449 | +#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data) | |
41450 | +#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data) | |
41451 | + | |
41452 | +/* | |
41453 | + * Set a maximum number of buffers in each output packet to make life | |
41454 | + * a little simpler - if this is reached it will just move on to | |
41455 | + * another packet | |
41456 | + */ | |
41457 | +#define ACCEL_TSO_MAX_BUFFERS (6) | |
41458 | + | |
41459 | +/** TSO State. | |
41460 | + * | |
41461 | + * The state used during segmentation. It is put into this data structure | |
41462 | + * just to make it easy to pass into inline functions. | |
41463 | + */ | |
41464 | +struct netfront_accel_tso_state { | |
41465 | + /** bytes of data we've yet to segment */ | |
41466 | + unsigned remaining_len; | |
41467 | + | |
41468 | + /** current sequence number */ | |
41469 | + unsigned seqnum; | |
41470 | + | |
41471 | + /** remaining space in current packet */ | |
41472 | + unsigned packet_space; | |
41473 | + | |
41474 | + /** List of packets to be output, containing the buffers and | |
41475 | + * iovecs to describe each packet | |
41476 | + */ | |
41477 | + struct netfront_accel_tso_output_packet *output_packets; | |
41478 | + | |
41479 | + /** Total number of buffers in output_packets */ | |
41480 | + unsigned buffers; | |
41481 | + | |
41482 | + /** Total number of packets in output_packets */ | |
41483 | + unsigned packets; | |
41484 | + | |
41485 | + /** Input Fragment Cursor. | |
41486 | + * | |
41487 | + * Where we are in the current fragment of the incoming SKB. These | |
41488 | + * values get updated in place when we split a fragment over | |
41489 | + * multiple packets. | |
41490 | + */ | |
41491 | + struct { | |
41492 | + /** address of current position */ | |
41493 | + void *addr; | |
41494 | + /** remaining length */ | |
41495 | + unsigned int len; | |
41496 | + } ifc; /* == ifc Input Fragment Cursor */ | |
41497 | + | |
41498 | + /** Parameters. | |
41499 | + * | |
41500 | + * These values are set once at the start of the TSO send and do | |
41501 | + * not get changed as the routine progresses. | |
41502 | + */ | |
41503 | + struct { | |
41504 | + /* the number of bytes of header */ | |
41505 | + unsigned int header_length; | |
41506 | + | |
41507 | + /* The number of bytes to put in each outgoing segment. */ | |
41508 | + int full_packet_size; | |
41509 | + | |
41510 | + /* Current IP ID, host endian. */ | |
41511 | + unsigned ip_id; | |
41512 | + | |
41513 | + /* Max size of each output packet payload */ | |
41514 | + int gso_size; | |
41515 | + } p; | |
41516 | +}; | |
41517 | + | |
41518 | + | |
41519 | +/** | |
41520 | + * Verify that our various assumptions about sk_buffs and the conditions | |
41521 | + * under which TSO will be attempted hold true. | |
41522 | + * | |
41523 | + * @v skb The sk_buff to check. | |
41524 | + */ | |
41525 | +static inline void tso_check_safe(struct sk_buff *skb) { | |
41526 | + EPRINTK_ON(skb->protocol != htons (ETH_P_IP)); | |
41527 | + EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP)); | |
41528 | + EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP); | |
41529 | + EPRINTK_ON((SKB_TCP_OFF(skb) | |
41530 | + + (skb->h.th->doff << 2u)) > skb_headlen(skb)); | |
41531 | +} | |
41532 | + | |
41533 | + | |
41534 | + | |
41535 | +/** Parse the SKB header and initialise state. */ | |
41536 | +static inline void tso_start(struct netfront_accel_tso_state *st, | |
41537 | + struct sk_buff *skb) { | |
41538 | + | |
41539 | + /* | |
41540 | + * All ethernet/IP/TCP headers combined size is TCP header size | |
41541 | + * plus offset of TCP header relative to start of packet. | |
41542 | + */ | |
41543 | + st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb); | |
41544 | + st->p.full_packet_size = (st->p.header_length | |
41545 | + + skb_shinfo(skb)->gso_size); | |
41546 | + st->p.gso_size = skb_shinfo(skb)->gso_size; | |
41547 | + | |
41548 | + st->p.ip_id = htons(skb->nh.iph->id); | |
41549 | + st->seqnum = ntohl(skb->h.th->seq); | |
41550 | + | |
41551 | + EPRINTK_ON(skb->h.th->urg); | |
41552 | + EPRINTK_ON(skb->h.th->syn); | |
41553 | + EPRINTK_ON(skb->h.th->rst); | |
41554 | + | |
41555 | + st->remaining_len = skb->len - st->p.header_length; | |
41556 | + | |
41557 | + st->output_packets = NULL; | |
41558 | + st->buffers = 0; | |
41559 | + st->packets = 0; | |
41560 | + | |
41561 | + VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n", | |
41562 | + st->p.header_length, st->p.full_packet_size, st->p.gso_size, | |
41563 | + st->seqnum, skb->len); | |
41564 | +} | |
41565 | + | |
41566 | +/** | |
41567 | + * Add another NIC mapped buffer onto an output packet | |
41568 | + */ | |
41569 | +static inline int tso_start_new_buffer(netfront_accel_vnic *vnic, | |
41570 | + struct netfront_accel_tso_state *st, | |
41571 | + int first) | |
41572 | +{ | |
41573 | + struct netfront_accel_tso_buffer *tso_buf; | |
41574 | + struct netfront_accel_pkt_desc *buf; | |
41575 | + | |
41576 | + /* Get a mapped packet buffer */ | |
41577 | + buf = netfront_accel_buf_get(vnic->tx_bufs); | |
41578 | + if (buf == NULL) { | |
41579 | + DPRINTK("%s: No buffer for TX\n", __FUNCTION__); | |
41580 | + return -1; | |
41581 | + } | |
41582 | + | |
41583 | + /* Store a bit of meta-data at the end */ | |
41584 | + tso_buf =(struct netfront_accel_tso_buffer *) | |
41585 | + (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH | |
41586 | + + sizeof(struct netfront_accel_tso_output_packet)); | |
41587 | + | |
41588 | + tso_buf->buf = buf; | |
41589 | + | |
41590 | + tso_buf->length = 0; | |
41591 | + | |
41592 | + if (first) { | |
41593 | + struct netfront_accel_tso_output_packet *output_packet | |
41594 | + = (struct netfront_accel_tso_output_packet *) | |
41595 | + (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH); | |
41596 | + output_packet->next = st->output_packets; | |
41597 | + st->output_packets = output_packet; | |
41598 | + tso_buf->next = NULL; | |
41599 | + st->output_packets->tso_bufs = tso_buf; | |
41600 | + st->output_packets->tso_bufs_len = 1; | |
41601 | + } else { | |
41602 | + tso_buf->next = st->output_packets->tso_bufs; | |
41603 | + st->output_packets->tso_bufs = tso_buf; | |
41604 | + st->output_packets->tso_bufs_len ++; | |
41605 | + } | |
41606 | + | |
41607 | + BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS); | |
41608 | + | |
41609 | + st->buffers ++; | |
41610 | + | |
41611 | + /* | |
41612 | + * Store the context, set to NULL, last packet buffer will get | |
41613 | + * non-NULL later | |
41614 | + */ | |
41615 | + tso_buf->buf->skb = NULL; | |
41616 | + | |
41617 | + return 0; | |
41618 | +} | |
41619 | + | |
41620 | + | |
41621 | +/* Generate a new header, and prepare for the new packet. | |
41622 | + * | |
41623 | + * @v vnic VNIC | |
41624 | + * @v skb Socket buffer | |
41625 | + * @v st TSO state | |
41626 | + * @ret rc 0 on success, or -1 if failed to alloc header | |
41627 | + */ | |
41628 | + | |
41629 | +static inline | |
41630 | +int tso_start_new_packet(netfront_accel_vnic *vnic, | |
41631 | + struct sk_buff *skb, | |
41632 | + struct netfront_accel_tso_state *st) | |
41633 | +{ | |
41634 | + struct netfront_accel_tso_buffer *tso_buf; | |
41635 | + struct iphdr *tsoh_iph; | |
41636 | + struct tcphdr *tsoh_th; | |
41637 | + unsigned ip_length; | |
41638 | + | |
41639 | + if (tso_start_new_buffer(vnic, st, 1) < 0) { | |
41640 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); | |
41641 | + return -1; | |
41642 | + } | |
41643 | + | |
41644 | + /* This has been set up by tso_start_new_buffer() */ | |
41645 | + tso_buf = st->output_packets->tso_bufs; | |
41646 | + | |
41647 | + /* Copy in the header */ | |
41648 | + memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length); | |
41649 | + tso_buf->length = st->p.header_length; | |
41650 | + | |
41651 | + tsoh_th = (struct tcphdr*) | |
41652 | + (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb)); | |
41653 | + tsoh_iph = (struct iphdr*) | |
41654 | + (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb)); | |
41655 | + | |
41656 | + /* Set to zero to encourage falcon to fill these in */ | |
41657 | + tsoh_th->check = 0; | |
41658 | + tsoh_iph->check = 0; | |
41659 | + | |
41660 | + tsoh_th->seq = htonl(st->seqnum); | |
41661 | + st->seqnum += st->p.gso_size; | |
41662 | + | |
41663 | + if (st->remaining_len > st->p.gso_size) { | |
41664 | + /* This packet will not finish the TSO burst. */ | |
41665 | + ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb); | |
41666 | + tsoh_th->fin = 0; | |
41667 | + tsoh_th->psh = 0; | |
41668 | + } else { | |
41669 | + /* This packet will be the last in the TSO burst. */ | |
41670 | + ip_length = (st->p.header_length - ETH_HDR_LEN(skb) | |
41671 | + + st->remaining_len); | |
41672 | + tsoh_th->fin = skb->h.th->fin; | |
41673 | + tsoh_th->psh = skb->h.th->psh; | |
41674 | + } | |
41675 | + | |
41676 | + tsoh_iph->tot_len = htons(ip_length); | |
41677 | + | |
41678 | + /* Linux leaves suitable gaps in the IP ID space for us to fill. */ | |
41679 | + tsoh_iph->id = st->p.ip_id++; | |
41680 | + tsoh_iph->id = htons(tsoh_iph->id); | |
41681 | + | |
41682 | + st->packet_space = st->p.gso_size; | |
41683 | + | |
41684 | + st->packets++; | |
41685 | + | |
41686 | + return 0; | |
41687 | +} | |
41688 | + | |
41689 | + | |
41690 | + | |
41691 | +static inline void tso_get_fragment(struct netfront_accel_tso_state *st, | |
41692 | + int len, void *addr) | |
41693 | +{ | |
41694 | + st->ifc.len = len; | |
41695 | + st->ifc.addr = addr; | |
41696 | + return; | |
41697 | +} | |
41698 | + | |
41699 | + | |
41700 | +static inline void tso_unwind(netfront_accel_vnic *vnic, | |
41701 | + struct netfront_accel_tso_state *st) | |
41702 | +{ | |
41703 | + struct netfront_accel_tso_buffer *tso_buf; | |
41704 | + struct netfront_accel_tso_output_packet *output_packet; | |
41705 | + | |
41706 | + DPRINTK("%s\n", __FUNCTION__); | |
41707 | + | |
41708 | + while (st->output_packets != NULL) { | |
41709 | + output_packet = st->output_packets; | |
41710 | + st->output_packets = output_packet->next; | |
41711 | + while (output_packet->tso_bufs != NULL) { | |
41712 | + tso_buf = output_packet->tso_bufs; | |
41713 | + output_packet->tso_bufs = tso_buf->next; | |
41714 | + | |
41715 | + st->buffers --; | |
41716 | + output_packet->tso_bufs_len --; | |
41717 | + | |
41718 | + netfront_accel_buf_put(vnic->tx_bufs, | |
41719 | + tso_buf->buf->buf_id); | |
41720 | + } | |
41721 | + } | |
41722 | + BUG_ON(st->buffers != 0); | |
41723 | +} | |
41724 | + | |
41725 | + | |
41726 | + | |
41727 | +static inline | |
41728 | +void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic, | |
41729 | + struct netfront_accel_tso_state *st) | |
41730 | +{ | |
41731 | + struct netfront_accel_tso_buffer *tso_buf; | |
41732 | + int n, space; | |
41733 | + | |
41734 | + BUG_ON(st->output_packets == NULL); | |
41735 | + BUG_ON(st->output_packets->tso_bufs == NULL); | |
41736 | + | |
41737 | + tso_buf = st->output_packets->tso_bufs; | |
41738 | + | |
41739 | + if (st->ifc.len == 0) return; | |
41740 | + if (st->packet_space == 0) return; | |
41741 | + if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return; | |
41742 | + | |
41743 | + n = min(st->ifc.len, st->packet_space); | |
41744 | + | |
41745 | + space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length; | |
41746 | + n = min(n, space); | |
41747 | + | |
41748 | + st->packet_space -= n; | |
41749 | + st->remaining_len -= n; | |
41750 | + st->ifc.len -= n; | |
41751 | + | |
41752 | + memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n); | |
41753 | + | |
41754 | + tso_buf->length += n; | |
41755 | + | |
41756 | + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH); | |
41757 | + | |
41758 | + st->ifc.addr += n; | |
41759 | + | |
41760 | + return; | |
41761 | +} | |
41762 | + | |
41763 | + | |
41764 | +int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic, | |
41765 | + struct sk_buff *skb) | |
41766 | +{ | |
41767 | + struct netfront_accel_tso_state state; | |
41768 | + struct netfront_accel_tso_buffer *tso_buf = NULL; | |
41769 | + struct netfront_accel_tso_output_packet *reversed_list = NULL; | |
41770 | + struct netfront_accel_tso_output_packet *tmp_pkt; | |
41771 | + ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS]; | |
41772 | + int frag_i, rc, dma_id; | |
41773 | + skb_frag_t *f; | |
41774 | + | |
41775 | + tso_check_safe(skb); | |
41776 | + | |
41777 | + if (skb->ip_summed != CHECKSUM_HW) | |
41778 | + EPRINTK("Trying to TSO send a packet without HW checksum\n"); | |
41779 | + | |
41780 | + tso_start(&state, skb); | |
41781 | + | |
41782 | + /* | |
41783 | + * Setup the first payload fragment. If the skb header area | |
41784 | + * contains exactly the headers and all payload is in the frag | |
41785 | + * list things are little simpler | |
41786 | + */ | |
41787 | + if (skb_headlen(skb) == state.p.header_length) { | |
41788 | + /* Grab the first payload fragment. */ | |
41789 | + BUG_ON(skb_shinfo(skb)->nr_frags < 1); | |
41790 | + frag_i = 0; | |
41791 | + f = &skb_shinfo(skb)->frags[frag_i]; | |
41792 | + tso_get_fragment(&state, f->size, | |
41793 | + page_address(f->page) + f->page_offset); | |
41794 | + } else { | |
41795 | + int hl = state.p.header_length; | |
41796 | + tso_get_fragment(&state, skb_headlen(skb) - hl, | |
41797 | + skb->data + hl); | |
41798 | + frag_i = -1; | |
41799 | + } | |
41800 | + | |
41801 | + if (tso_start_new_packet(vnic, skb, &state) < 0) { | |
41802 | + DPRINTK("%s: out of first start-packet memory\n", | |
41803 | + __FUNCTION__); | |
41804 | + goto unwind; | |
41805 | + } | |
41806 | + | |
41807 | + while (1) { | |
41808 | + tso_fill_packet_with_fragment(vnic, &state); | |
41809 | + | |
41810 | + /* Move onto the next fragment? */ | |
41811 | + if (state.ifc.len == 0) { | |
41812 | + if (++frag_i >= skb_shinfo(skb)->nr_frags) | |
41813 | + /* End of payload reached. */ | |
41814 | + break; | |
41815 | + f = &skb_shinfo(skb)->frags[frag_i]; | |
41816 | + tso_get_fragment(&state, f->size, | |
41817 | + page_address(f->page) + | |
41818 | + f->page_offset); | |
41819 | + } | |
41820 | + | |
41821 | + /* Start a new buffer? */ | |
41822 | + if ((state.output_packets->tso_bufs->length == | |
41823 | + NETFRONT_ACCEL_TSO_BUF_LENGTH) && | |
41824 | + tso_start_new_buffer(vnic, &state, 0)) { | |
41825 | + DPRINTK("%s: out of start-buffer memory\n", | |
41826 | + __FUNCTION__); | |
41827 | + goto unwind; | |
41828 | + } | |
41829 | + | |
41830 | + /* Start at new packet? */ | |
41831 | + if ((state.packet_space == 0 || | |
41832 | + ((state.output_packets->tso_bufs_len >= | |
41833 | + ACCEL_TSO_MAX_BUFFERS) && | |
41834 | + (state.output_packets->tso_bufs->length >= | |
41835 | + NETFRONT_ACCEL_TSO_BUF_LENGTH))) && | |
41836 | + tso_start_new_packet(vnic, skb, &state) < 0) { | |
41837 | + DPRINTK("%s: out of start-packet memory\n", | |
41838 | + __FUNCTION__); | |
41839 | + goto unwind; | |
41840 | + } | |
41841 | + | |
41842 | + } | |
41843 | + | |
41844 | + /* Check for space */ | |
41845 | + if (ef_vi_transmit_space(&vnic->vi) < state.buffers) { | |
41846 | + DPRINTK("%s: Not enough TX space (%d)\n", | |
41847 | + __FUNCTION__, state.buffers); | |
41848 | + goto unwind; | |
41849 | + } | |
41850 | + | |
41851 | + /* | |
41852 | + * Store the skb context in the most recent buffer (i.e. the | |
41853 | + * last buffer that will be sent) | |
41854 | + */ | |
41855 | + state.output_packets->tso_bufs->buf->skb = skb; | |
41856 | + | |
41857 | + /* Reverse the list of packets as we construct it on a stack */ | |
41858 | + while (state.output_packets != NULL) { | |
41859 | + tmp_pkt = state.output_packets; | |
41860 | + state.output_packets = tmp_pkt->next; | |
41861 | + tmp_pkt->next = reversed_list; | |
41862 | + reversed_list = tmp_pkt; | |
41863 | + } | |
41864 | + | |
41865 | + /* Pass off to hardware */ | |
41866 | + while (reversed_list != NULL) { | |
41867 | + tmp_pkt = reversed_list; | |
41868 | + reversed_list = tmp_pkt->next; | |
41869 | + | |
41870 | + BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS); | |
41871 | + BUG_ON(tmp_pkt->tso_bufs_len == 0); | |
41872 | + | |
41873 | + dma_id = tmp_pkt->tso_bufs->buf->buf_id; | |
41874 | + | |
41875 | + /* | |
41876 | + * Make an iovec of the buffers in the list, reversing | |
41877 | + * the buffers as we go as they are constructed on a | |
41878 | + * stack | |
41879 | + */ | |
41880 | + tso_buf = tmp_pkt->tso_bufs; | |
41881 | + for (frag_i = tmp_pkt->tso_bufs_len - 1; | |
41882 | + frag_i >= 0; | |
41883 | + frag_i--) { | |
41884 | + iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr; | |
41885 | + iovecs[frag_i].iov_len = tso_buf->length; | |
41886 | + tso_buf = tso_buf->next; | |
41887 | + } | |
41888 | + | |
41889 | + rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len, | |
41890 | + dma_id); | |
41891 | + /* | |
41892 | + * We checked for space already, so it really should | |
41893 | + * succeed | |
41894 | + */ | |
41895 | + BUG_ON(rc != 0); | |
41896 | + } | |
41897 | + | |
41898 | + /* Track number of tx fastpath stats */ | |
41899 | + vnic->netdev_stats.fastpath_tx_bytes += skb->len; | |
41900 | + vnic->netdev_stats.fastpath_tx_pkts += state.packets; | |
41901 | +#if NETFRONT_ACCEL_STATS | |
41902 | + { | |
41903 | + unsigned n; | |
41904 | + n = vnic->netdev_stats.fastpath_tx_pkts - | |
41905 | + vnic->stats.fastpath_tx_completions; | |
41906 | + if (n > vnic->stats.fastpath_tx_pending_max) | |
41907 | + vnic->stats.fastpath_tx_pending_max = n; | |
41908 | + } | |
41909 | +#endif | |
41910 | + | |
41911 | + return NETFRONT_ACCEL_STATUS_GOOD; | |
41912 | + | |
41913 | + unwind: | |
41914 | + tso_unwind(vnic, &state); | |
41915 | + | |
41916 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); | |
41917 | + | |
41918 | + return NETFRONT_ACCEL_STATUS_BUSY; | |
41919 | +} | |
41920 | + | |
41921 | + | |
41922 | + | |
41923 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.h | |
41924 | =================================================================== | |
41925 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
41926 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_tso.h 2008-02-20 09:32:49.000000000 +0100 | |
41927 | @@ -0,0 +1,57 @@ | |
41928 | +/**************************************************************************** | |
41929 | + * Solarflare driver for Xen network acceleration | |
41930 | + * | |
41931 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
41932 | + * 9501 Jeronimo Road, Suite 250, | |
41933 | + * Irvine, CA 92618, USA | |
41934 | + * | |
41935 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
41936 | + * | |
41937 | + * This program is free software; you can redistribute it and/or modify it | |
41938 | + * under the terms of the GNU General Public License version 2 as published | |
41939 | + * by the Free Software Foundation, incorporated herein by reference. | |
41940 | + * | |
41941 | + * This program is distributed in the hope that it will be useful, | |
41942 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
41943 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
41944 | + * GNU General Public License for more details. | |
41945 | + * | |
41946 | + * You should have received a copy of the GNU General Public License | |
41947 | + * along with this program; if not, write to the Free Software | |
41948 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
41949 | + **************************************************************************** | |
41950 | + */ | |
41951 | + | |
41952 | +#ifndef NETFRONT_ACCEL_TSO_H | |
41953 | +#define NETFRONT_ACCEL_TSO_H | |
41954 | + | |
41955 | +#include "accel_bufs.h" | |
41956 | + | |
41957 | +/* Track the buffers used in each output packet */ | |
41958 | +struct netfront_accel_tso_buffer { | |
41959 | + struct netfront_accel_tso_buffer *next; | |
41960 | + struct netfront_accel_pkt_desc *buf; | |
41961 | + unsigned length; | |
41962 | +}; | |
41963 | + | |
41964 | +/* Track the output packets formed from each input packet */ | |
41965 | +struct netfront_accel_tso_output_packet { | |
41966 | + struct netfront_accel_tso_output_packet *next; | |
41967 | + struct netfront_accel_tso_buffer *tso_bufs; | |
41968 | + unsigned tso_bufs_len; | |
41969 | +}; | |
41970 | + | |
41971 | + | |
41972 | +/* | |
41973 | + * Max available space in a buffer for data once meta-data has taken | |
41974 | + * its place | |
41975 | + */ | |
41976 | +#define NETFRONT_ACCEL_TSO_BUF_LENGTH \ | |
41977 | + ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \ | |
41978 | + - sizeof(struct netfront_accel_tso_buffer) \ | |
41979 | + - sizeof(struct netfront_accel_tso_output_packet)) | |
41980 | + | |
41981 | +int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic, | |
41982 | + struct sk_buff *skb); | |
41983 | + | |
41984 | +#endif /* NETFRONT_ACCEL_TSO_H */ | |
41985 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_vi.c | |
41986 | =================================================================== | |
41987 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
41988 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_vi.c 2008-02-20 09:32:49.000000000 +0100 | |
41989 | @@ -0,0 +1,1194 @@ | |
41990 | +/**************************************************************************** | |
41991 | + * Solarflare driver for Xen network acceleration | |
41992 | + * | |
41993 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
41994 | + * 9501 Jeronimo Road, Suite 250, | |
41995 | + * Irvine, CA 92618, USA | |
41996 | + * | |
41997 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
41998 | + * | |
41999 | + * This program is free software; you can redistribute it and/or modify it | |
42000 | + * under the terms of the GNU General Public License version 2 as published | |
42001 | + * by the Free Software Foundation, incorporated herein by reference. | |
42002 | + * | |
42003 | + * This program is distributed in the hope that it will be useful, | |
42004 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
42005 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
42006 | + * GNU General Public License for more details. | |
42007 | + * | |
42008 | + * You should have received a copy of the GNU General Public License | |
42009 | + * along with this program; if not, write to the Free Software | |
42010 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
42011 | + **************************************************************************** | |
42012 | + */ | |
42013 | + | |
42014 | +#include <linux/if_ether.h> | |
42015 | +#include <linux/ip.h> | |
42016 | +#include <net/checksum.h> | |
42017 | +#include <asm/io.h> | |
42018 | + | |
42019 | +#include "accel.h" | |
42020 | +#include "accel_util.h" | |
42021 | +#include "accel_bufs.h" | |
42022 | +#include "accel_tso.h" | |
42023 | +#include "accel_ssr.h" | |
42024 | +#include "netfront.h" | |
42025 | + | |
42026 | +#include "etherfabric/ef_vi.h" | |
42027 | + | |
42028 | +/* | |
42029 | + * Max available space in a buffer for data once meta-data has taken | |
42030 | + * its place | |
42031 | + */ | |
42032 | +#define NETFRONT_ACCEL_TX_BUF_LENGTH \ | |
42033 | + ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \ | |
42034 | + - sizeof(struct netfront_accel_tso_buffer)) | |
42035 | + | |
42036 | +#define ACCEL_TX_MAX_BUFFERS (6) | |
42037 | +#define ACCEL_VI_POLL_EVENTS (8) | |
42038 | + | |
42039 | +static | |
42040 | +int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic, | |
42041 | + struct net_accel_msg_hw *hw_msg) | |
42042 | +{ | |
42043 | + struct ef_vi_nic_type nic_type; | |
42044 | + struct net_accel_hw_falcon_b *hw_info; | |
42045 | + void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva; | |
42046 | + u32 *evq_gnts; | |
42047 | + u32 evq_order; | |
42048 | + int vi_state_size; | |
42049 | + u8 vi_data[VI_MAPPINGS_SIZE]; | |
42050 | + | |
42051 | + if (hw_msg == NULL) | |
42052 | + goto fini; | |
42053 | + | |
42054 | + /* And create the local macs table lock */ | |
42055 | + spin_lock_init(&vnic->table_lock); | |
42056 | + | |
42057 | + /* Create fastpath table, initial size 8, key length 8 */ | |
42058 | + if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) { | |
42059 | + EPRINTK("failed to allocate fastpath table\n"); | |
42060 | + goto fail_cuckoo; | |
42061 | + } | |
42062 | + | |
42063 | + vnic->hw.falcon.type = hw_msg->type; | |
42064 | + | |
42065 | + switch (hw_msg->type) { | |
42066 | + case NET_ACCEL_MSG_HWTYPE_FALCON_A: | |
42067 | + hw_info = &hw_msg->resources.falcon_a.common; | |
42068 | + /* Need the extra rptr register page on A1 */ | |
42069 | + io_kva = net_accel_map_iomem_page | |
42070 | + (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt, | |
42071 | + &vnic->hw.falcon.evq_rptr_mapping); | |
42072 | + if (io_kva == NULL) { | |
42073 | + EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__); | |
42074 | + goto evq_rptr_fail; | |
42075 | + } | |
42076 | + | |
42077 | + vnic->hw.falcon.evq_rptr = io_kva + | |
42078 | + (hw_info->evq_rptr & (PAGE_SIZE - 1)); | |
42079 | + break; | |
42080 | + case NET_ACCEL_MSG_HWTYPE_FALCON_B: | |
42081 | + hw_info = &hw_msg->resources.falcon_b; | |
42082 | + break; | |
42083 | + default: | |
42084 | + goto bad_type; | |
42085 | + } | |
42086 | + | |
42087 | + /**** Event Queue ****/ | |
42088 | + | |
42089 | + /* Map the event queue pages */ | |
42090 | + evq_gnts = hw_info->evq_mem_gnts; | |
42091 | + evq_order = hw_info->evq_order; | |
42092 | + | |
42093 | + EPRINTK_ON(hw_info->evq_offs != 0); | |
42094 | + | |
42095 | + DPRINTK("Will map evq %d pages\n", 1 << evq_order); | |
42096 | + | |
42097 | + evq_base = | |
42098 | + net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order, | |
42099 | + &vnic->evq_mapping); | |
42100 | + if (evq_base == NULL) { | |
42101 | + EPRINTK("%s: evq_base failed\n", __FUNCTION__); | |
42102 | + goto evq_fail; | |
42103 | + } | |
42104 | + | |
42105 | + /**** Doorbells ****/ | |
42106 | + /* Set up the doorbell mappings. */ | |
42107 | + doorbell_kva = | |
42108 | + net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt, | |
42109 | + &vnic->hw.falcon.doorbell_mapping); | |
42110 | + if (doorbell_kva == NULL) { | |
42111 | + EPRINTK("%s: doorbell permission failed\n", __FUNCTION__); | |
42112 | + goto doorbell_fail; | |
42113 | + } | |
42114 | + vnic->hw.falcon.doorbell = doorbell_kva; | |
42115 | + | |
42116 | + /* On Falcon_B we get the rptr from the doorbell page */ | |
42117 | + if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B) { | |
42118 | + vnic->hw.falcon.evq_rptr = | |
42119 | + (u32 *)((char *)vnic->hw.falcon.doorbell | |
42120 | + + hw_info->evq_rptr); | |
42121 | + } | |
42122 | + | |
42123 | + /**** DMA Queue ****/ | |
42124 | + | |
42125 | + /* Set up the DMA Queues from the message. */ | |
42126 | + tx_dma_kva = net_accel_map_grants_contig | |
42127 | + (vnic->dev, &(hw_info->txdmaq_gnt), 1, | |
42128 | + &vnic->hw.falcon.txdmaq_mapping); | |
42129 | + if (tx_dma_kva == NULL) { | |
42130 | + EPRINTK("%s: TX dma failed\n", __FUNCTION__); | |
42131 | + goto tx_dma_fail; | |
42132 | + } | |
42133 | + | |
42134 | + rx_dma_kva = net_accel_map_grants_contig | |
42135 | + (vnic->dev, &(hw_info->rxdmaq_gnt), 1, | |
42136 | + &vnic->hw.falcon.rxdmaq_mapping); | |
42137 | + if (rx_dma_kva == NULL) { | |
42138 | + EPRINTK("%s: RX dma failed\n", __FUNCTION__); | |
42139 | + goto rx_dma_fail; | |
42140 | + } | |
42141 | + | |
42142 | + /* Full confession */ | |
42143 | + DPRINTK("Mapped H/W" | |
42144 | + " Tx DMAQ grant %x -> %p\n" | |
42145 | + " Rx DMAQ grant %x -> %p\n" | |
42146 | + " EVQ grant %x -> %p\n", | |
42147 | + hw_info->txdmaq_gnt, tx_dma_kva, | |
42148 | + hw_info->rxdmaq_gnt, rx_dma_kva, | |
42149 | + evq_gnts[0], evq_base | |
42150 | + ); | |
42151 | + | |
42152 | + memset(vi_data, 0, sizeof(vi_data)); | |
42153 | + | |
42154 | + /* TODO BUG11305: convert efhw_arch to ef_vi_arch | |
42155 | + * e.g. | |
42156 | + * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch); | |
42157 | + * assert(arch >= 0); | |
42158 | + * nic_type.arch = arch; | |
42159 | + */ | |
42160 | + nic_type.arch = (unsigned char)hw_info->nic_arch; | |
42161 | + nic_type.variant = (char)hw_info->nic_variant; | |
42162 | + nic_type.revision = (unsigned char)hw_info->nic_revision; | |
42163 | + | |
42164 | + ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance, | |
42165 | + 1 << (evq_order + PAGE_SHIFT), evq_base, | |
42166 | + (void *)0xdeadbeef); | |
42167 | + | |
42168 | + ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity, | |
42169 | + hw_info->tx_capacity, hw_info->instance, | |
42170 | + doorbell_kva, rx_dma_kva, tx_dma_kva, 0); | |
42171 | + | |
42172 | + vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity, | |
42173 | + hw_info->tx_capacity); | |
42174 | + vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL); | |
42175 | + if (vnic->vi_state == NULL) { | |
42176 | + EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__); | |
42177 | + goto vi_state_fail; | |
42178 | + } | |
42179 | + ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0); | |
42180 | + | |
42181 | + ef_eventq_state_init(&vnic->vi); | |
42182 | + | |
42183 | + ef_vi_state_init(&vnic->vi); | |
42184 | + | |
42185 | + return 0; | |
42186 | + | |
42187 | +fini: | |
42188 | + kfree(vnic->vi_state); | |
42189 | + vnic->vi_state = NULL; | |
42190 | +vi_state_fail: | |
42191 | + net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping); | |
42192 | +rx_dma_fail: | |
42193 | + net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping); | |
42194 | +tx_dma_fail: | |
42195 | + net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping); | |
42196 | + vnic->hw.falcon.doorbell = NULL; | |
42197 | +doorbell_fail: | |
42198 | + net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping); | |
42199 | +evq_fail: | |
42200 | + if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A) | |
42201 | + net_accel_unmap_iomem_page(vnic->dev, | |
42202 | + vnic->hw.falcon.evq_rptr_mapping); | |
42203 | + vnic->hw.falcon.evq_rptr = NULL; | |
42204 | +evq_rptr_fail: | |
42205 | +bad_type: | |
42206 | + cuckoo_hash_destroy(&vnic->fastpath_table); | |
42207 | +fail_cuckoo: | |
42208 | + return -EIO; | |
42209 | +} | |
42210 | + | |
42211 | + | |
42212 | +void netfront_accel_vi_ctor(netfront_accel_vnic *vnic) | |
42213 | +{ | |
42214 | + /* Just mark the VI as uninitialised. */ | |
42215 | + vnic->vi_state = NULL; | |
42216 | +} | |
42217 | + | |
42218 | + | |
42219 | +int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg) | |
42220 | +{ | |
42221 | + BUG_ON(hw_msg == NULL); | |
42222 | + return netfront_accel_vi_init_fini(vnic, hw_msg); | |
42223 | +} | |
42224 | + | |
42225 | + | |
42226 | +void netfront_accel_vi_dtor(netfront_accel_vnic *vnic) | |
42227 | +{ | |
42228 | + if (vnic->vi_state != NULL) | |
42229 | + netfront_accel_vi_init_fini(vnic, NULL); | |
42230 | +} | |
42231 | + | |
42232 | + | |
42233 | +static | |
42234 | +void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id, | |
42235 | + netfront_accel_pkt_desc *buf) | |
42236 | +{ | |
42237 | + | |
42238 | + int idx = vnic->rx_dma_batched; | |
42239 | + | |
42240 | +#if 0 | |
42241 | + VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n", | |
42242 | + id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi)); | |
42243 | +#endif | |
42244 | + /* Set up a virtual buffer descriptor */ | |
42245 | + ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id, | |
42246 | + /*rx_bytes=max*/0); | |
42247 | + | |
42248 | + idx++; | |
42249 | + | |
42250 | + vnic->rx_dma_level++; | |
42251 | + | |
42252 | + /* | |
42253 | + * Only push the descriptor to the card if we've reached the | |
42254 | + * batch size. Otherwise, the descriptors can sit around for | |
42255 | + * a while. There will be plenty available. | |
42256 | + */ | |
42257 | + if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH || | |
42258 | + vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) { | |
42259 | +#if 0 | |
42260 | + VPRINTK("Flushing %d rx descriptors.\n", idx); | |
42261 | +#endif | |
42262 | + | |
42263 | + /* Push buffer to hardware */ | |
42264 | + ef_vi_receive_push(&vnic->vi); | |
42265 | + | |
42266 | + idx = 0; | |
42267 | + } | |
42268 | + | |
42269 | + vnic->rx_dma_batched = idx; | |
42270 | +} | |
42271 | + | |
42272 | + | |
42273 | +inline | |
42274 | +void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id, | |
42275 | + netfront_accel_pkt_desc *buf) | |
42276 | +{ | |
42277 | + | |
42278 | + VPRINTK("%s: %d\n", __FUNCTION__, id); | |
42279 | + | |
42280 | + if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) { | |
42281 | + VPRINTK("RX space is full\n"); | |
42282 | + netfront_accel_buf_put(vnic->rx_bufs, id); | |
42283 | + return; | |
42284 | + } | |
42285 | + | |
42286 | + VPRINTK("Completed buffer %d is reposted\n", id); | |
42287 | + netfront_accel_vi_post_rx(vnic, id, buf); | |
42288 | + | |
42289 | + /* | |
42290 | + * Let's see if there's any more to be pushed out to the NIC | |
42291 | + * while we're here | |
42292 | + */ | |
42293 | + while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) { | |
42294 | + /* Try to allocate a buffer. */ | |
42295 | + buf = netfront_accel_buf_get(vnic->rx_bufs); | |
42296 | + if (buf == NULL) | |
42297 | + break; | |
42298 | + | |
42299 | + /* Add it to the rx dma queue. */ | |
42300 | + netfront_accel_vi_post_rx(vnic, buf->buf_id, buf); | |
42301 | + } | |
42302 | +} | |
42303 | + | |
42304 | + | |
42305 | +void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx) | |
42306 | +{ | |
42307 | + | |
42308 | + while (is_rx && | |
42309 | + ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) { | |
42310 | + netfront_accel_pkt_desc *buf; | |
42311 | + | |
42312 | + VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level); | |
42313 | + | |
42314 | + /* Try to allocate a buffer. */ | |
42315 | + buf = netfront_accel_buf_get(vnic->rx_bufs); | |
42316 | + | |
42317 | + if (buf == NULL) | |
42318 | + break; | |
42319 | + | |
42320 | + /* Add it to the rx dma queue. */ | |
42321 | + netfront_accel_vi_post_rx(vnic, buf->buf_id, buf); | |
42322 | + } | |
42323 | + | |
42324 | + VPRINTK("%s: done\n", __FUNCTION__); | |
42325 | +} | |
42326 | + | |
42327 | + | |
42328 | +struct netfront_accel_multi_state { | |
42329 | + unsigned remaining_len; | |
42330 | + | |
42331 | + unsigned buffers; | |
42332 | + | |
42333 | + struct netfront_accel_tso_buffer *output_buffers; | |
42334 | + | |
42335 | + /* Where we are in the current fragment of the SKB. */ | |
42336 | + struct { | |
42337 | + /* address of current position */ | |
42338 | + void *addr; | |
42339 | + /* remaining length */ | |
42340 | + unsigned int len; | |
42341 | + } ifc; /* == Input Fragment Cursor */ | |
42342 | +}; | |
42343 | + | |
42344 | + | |
42345 | +static inline void multi_post_start(struct netfront_accel_multi_state *st, | |
42346 | + struct sk_buff *skb) | |
42347 | +{ | |
42348 | + st->remaining_len = skb->len; | |
42349 | + st->output_buffers = NULL; | |
42350 | + st->buffers = 0; | |
42351 | + st->ifc.len = skb_headlen(skb); | |
42352 | + st->ifc.addr = skb->data; | |
42353 | +} | |
42354 | + | |
42355 | +static int multi_post_start_new_buffer(netfront_accel_vnic *vnic, | |
42356 | + struct netfront_accel_multi_state *st) | |
42357 | +{ | |
42358 | + struct netfront_accel_tso_buffer *tso_buf; | |
42359 | + struct netfront_accel_pkt_desc *buf; | |
42360 | + | |
42361 | + /* Get a mapped packet buffer */ | |
42362 | + buf = netfront_accel_buf_get(vnic->tx_bufs); | |
42363 | + if (buf == NULL) { | |
42364 | + DPRINTK("%s: No buffer for TX\n", __FUNCTION__); | |
42365 | + return -1; | |
42366 | + } | |
42367 | + | |
42368 | + /* Store a bit of meta-data at the end */ | |
42369 | + tso_buf = (struct netfront_accel_tso_buffer *) | |
42370 | + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH); | |
42371 | + | |
42372 | + tso_buf->buf = buf; | |
42373 | + | |
42374 | + tso_buf->length = 0; | |
42375 | + | |
42376 | + tso_buf->next = st->output_buffers; | |
42377 | + st->output_buffers = tso_buf; | |
42378 | + st->buffers++; | |
42379 | + | |
42380 | + BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS); | |
42381 | + | |
42382 | + /* | |
42383 | + * Store the context, set to NULL, last packet buffer will get | |
42384 | + * non-NULL later | |
42385 | + */ | |
42386 | + tso_buf->buf->skb = NULL; | |
42387 | + | |
42388 | + return 0; | |
42389 | +} | |
42390 | + | |
42391 | + | |
42392 | +static void | |
42393 | +multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic, | |
42394 | + struct netfront_accel_multi_state *st) | |
42395 | +{ | |
42396 | + struct netfront_accel_tso_buffer *tso_buf; | |
42397 | + unsigned n, space; | |
42398 | + | |
42399 | + BUG_ON(st->output_buffers == NULL); | |
42400 | + tso_buf = st->output_buffers; | |
42401 | + | |
42402 | + if (st->ifc.len == 0) return; | |
42403 | + if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return; | |
42404 | + | |
42405 | + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH); | |
42406 | + | |
42407 | + space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length; | |
42408 | + n = min(st->ifc.len, space); | |
42409 | + | |
42410 | + memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n); | |
42411 | + | |
42412 | + st->remaining_len -= n; | |
42413 | + st->ifc.len -= n; | |
42414 | + tso_buf->length += n; | |
42415 | + st->ifc.addr += n; | |
42416 | + | |
42417 | + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH); | |
42418 | + | |
42419 | + return; | |
42420 | +} | |
42421 | + | |
42422 | + | |
42423 | +static inline void multi_post_unwind(netfront_accel_vnic *vnic, | |
42424 | + struct netfront_accel_multi_state *st) | |
42425 | +{ | |
42426 | + struct netfront_accel_tso_buffer *tso_buf; | |
42427 | + | |
42428 | + DPRINTK("%s\n", __FUNCTION__); | |
42429 | + | |
42430 | + while (st->output_buffers != NULL) { | |
42431 | + tso_buf = st->output_buffers; | |
42432 | + st->output_buffers = tso_buf->next; | |
42433 | + st->buffers--; | |
42434 | + netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id); | |
42435 | + } | |
42436 | + BUG_ON(st->buffers != 0); | |
42437 | +} | |
42438 | + | |
42439 | + | |
42440 | +static enum netfront_accel_post_status | |
42441 | +netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb) | |
42442 | +{ | |
42443 | + struct netfront_accel_tso_buffer *tso_buf; | |
42444 | + struct netfront_accel_multi_state state; | |
42445 | + ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS]; | |
42446 | + skb_frag_t *f; | |
42447 | + int frag_i, rc, dma_id; | |
42448 | + | |
42449 | + multi_post_start(&state, skb); | |
42450 | + | |
42451 | + frag_i = -1; | |
42452 | + | |
42453 | + if (skb->ip_summed == CHECKSUM_HW) { | |
42454 | + /* Set to zero to encourage falcon to work it out for us */ | |
42455 | + *(u16*)(skb->h.raw + skb->csum) = 0; | |
42456 | + } | |
42457 | + | |
42458 | + if (multi_post_start_new_buffer(vnic, &state)) { | |
42459 | + DPRINTK("%s: out of buffers\n", __FUNCTION__); | |
42460 | + goto unwind; | |
42461 | + } | |
42462 | + | |
42463 | + while (1) { | |
42464 | + multi_post_fill_buffer_with_fragment(vnic, &state); | |
42465 | + | |
42466 | + /* Move onto the next fragment? */ | |
42467 | + if (state.ifc.len == 0) { | |
42468 | + if (++frag_i >= skb_shinfo(skb)->nr_frags) | |
42469 | + /* End of payload reached. */ | |
42470 | + break; | |
42471 | + f = &skb_shinfo(skb)->frags[frag_i]; | |
42472 | + state.ifc.len = f->size; | |
42473 | + state.ifc.addr = page_address(f->page) + f->page_offset; | |
42474 | + } | |
42475 | + | |
42476 | + /* Start a new buffer? */ | |
42477 | + if ((state.output_buffers->length == | |
42478 | + NETFRONT_ACCEL_TX_BUF_LENGTH) && | |
42479 | + multi_post_start_new_buffer(vnic, &state)) { | |
42480 | + DPRINTK("%s: out of buffers\n", __FUNCTION__); | |
42481 | + goto unwind; | |
42482 | + } | |
42483 | + } | |
42484 | + | |
42485 | + /* Check for space */ | |
42486 | + if (ef_vi_transmit_space(&vnic->vi) < state.buffers) { | |
42487 | + DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers); | |
42488 | + goto unwind; | |
42489 | + } | |
42490 | + | |
42491 | + /* Store the skb in what will be the last buffer's context */ | |
42492 | + state.output_buffers->buf->skb = skb; | |
42493 | + /* Remember dma_id of what will be the last buffer */ | |
42494 | + dma_id = state.output_buffers->buf->buf_id; | |
42495 | + | |
42496 | + /* | |
42497 | + * Make an iovec of the buffers in the list, reversing the | |
42498 | + * buffers as we go as they are constructed on a stack | |
42499 | + */ | |
42500 | + tso_buf = state.output_buffers; | |
42501 | + for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) { | |
42502 | + iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr; | |
42503 | + iovecs[frag_i].iov_len = tso_buf->length; | |
42504 | + tso_buf = tso_buf->next; | |
42505 | + } | |
42506 | + | |
42507 | + rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id); | |
42508 | + | |
42509 | + /* Track number of tx fastpath stats */ | |
42510 | + vnic->netdev_stats.fastpath_tx_bytes += skb->len; | |
42511 | + vnic->netdev_stats.fastpath_tx_pkts ++; | |
42512 | +#if NETFRONT_ACCEL_STATS | |
42513 | + { | |
42514 | + u32 n; | |
42515 | + n = vnic->netdev_stats.fastpath_tx_pkts - | |
42516 | + (u32)vnic->stats.fastpath_tx_completions; | |
42517 | + if (n > vnic->stats.fastpath_tx_pending_max) | |
42518 | + vnic->stats.fastpath_tx_pending_max = n; | |
42519 | + } | |
42520 | +#endif | |
42521 | + return NETFRONT_ACCEL_STATUS_GOOD; | |
42522 | + | |
42523 | +unwind: | |
42524 | + multi_post_unwind(vnic, &state); | |
42525 | + | |
42526 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); | |
42527 | + | |
42528 | + return NETFRONT_ACCEL_STATUS_BUSY; | |
42529 | +} | |
42530 | + | |
42531 | + | |
42532 | +static enum netfront_accel_post_status | |
42533 | +netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb) | |
42534 | +{ | |
42535 | + struct netfront_accel_tso_buffer *tso_buf; | |
42536 | + struct netfront_accel_pkt_desc *buf; | |
42537 | + u8 *kva; | |
42538 | + int rc; | |
42539 | + | |
42540 | + if (ef_vi_transmit_space(&vnic->vi) < 1) { | |
42541 | + DPRINTK("%s: No TX space\n", __FUNCTION__); | |
42542 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); | |
42543 | + return NETFRONT_ACCEL_STATUS_BUSY; | |
42544 | + } | |
42545 | + | |
42546 | + buf = netfront_accel_buf_get(vnic->tx_bufs); | |
42547 | + if (buf == NULL) { | |
42548 | + DPRINTK("%s: No buffer for TX\n", __FUNCTION__); | |
42549 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); | |
42550 | + return NETFRONT_ACCEL_STATUS_BUSY; | |
42551 | + } | |
42552 | + | |
42553 | + /* Track number of tx fastpath stats */ | |
42554 | + vnic->netdev_stats.fastpath_tx_pkts++; | |
42555 | + vnic->netdev_stats.fastpath_tx_bytes += skb->len; | |
42556 | + | |
42557 | +#if NETFRONT_ACCEL_STATS | |
42558 | + { | |
42559 | + u32 n; | |
42560 | + n = vnic->netdev_stats.fastpath_tx_pkts - | |
42561 | + (u32)vnic->stats.fastpath_tx_completions; | |
42562 | + if (n > vnic->stats.fastpath_tx_pending_max) | |
42563 | + vnic->stats.fastpath_tx_pending_max = n; | |
42564 | + } | |
42565 | +#endif | |
42566 | + | |
42567 | + /* Store the context */ | |
42568 | + buf->skb = skb; | |
42569 | + | |
42570 | + kva = buf->pkt_kva; | |
42571 | + | |
42572 | + if (skb->ip_summed == CHECKSUM_HW) { | |
42573 | + /* Set to zero to encourage falcon to work it out for us */ | |
42574 | + *(u16*)(skb->h.raw + skb->csum) = 0; | |
42575 | + } | |
42576 | + NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT | |
42577 | + (skb, idx, frag_data, frag_len, { | |
42578 | + /* Copy in payload */ | |
42579 | + VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva); | |
42580 | + memcpy(kva, frag_data, frag_len); | |
42581 | + kva += frag_len; | |
42582 | + }); | |
42583 | + | |
42584 | + VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__, | |
42585 | + buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr); | |
42586 | + | |
42587 | + | |
42588 | + /* Set up the TSO meta-data for a single buffer/packet */ | |
42589 | + tso_buf = (struct netfront_accel_tso_buffer *) | |
42590 | + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH); | |
42591 | + tso_buf->next = NULL; | |
42592 | + tso_buf->buf = buf; | |
42593 | + tso_buf->length = skb->len; | |
42594 | + | |
42595 | + rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len, | |
42596 | + buf->buf_id); | |
42597 | + /* We checked for space already, so it really should succeed */ | |
42598 | + BUG_ON(rc != 0); | |
42599 | + | |
42600 | + return NETFRONT_ACCEL_STATUS_GOOD; | |
42601 | +} | |
42602 | + | |
42603 | + | |
42604 | +enum netfront_accel_post_status | |
42605 | +netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb) | |
42606 | +{ | |
42607 | + struct ethhdr *pkt_eth_hdr; | |
42608 | + struct iphdr *pkt_ipv4_hdr; | |
42609 | + int value, try_fastpath; | |
42610 | + | |
42611 | + /* | |
42612 | + * This assumes that the data field points to the dest mac | |
42613 | + * address. | |
42614 | + */ | |
42615 | + cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data); | |
42616 | + | |
42617 | + /* | |
42618 | + * NB very important that all things that could return "CANT" | |
42619 | + * are tested before things that return "BUSY" as if it it | |
42620 | + * returns "BUSY" it is assumed that it won't return "CANT" | |
42621 | + * next time it is tried | |
42622 | + */ | |
42623 | + | |
42624 | + /* | |
42625 | + * Do a fastpath send if fast path table lookup returns true. | |
42626 | + * We do this without the table lock and so may get the wrong | |
42627 | + * answer, but current opinion is that's not a big problem | |
42628 | + */ | |
42629 | + try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table, | |
42630 | + (cuckoo_hash_key *)(&key), &value); | |
42631 | + | |
42632 | + if (!try_fastpath) { | |
42633 | + VPRINTK("try fast path false for mac: " MAC_FMT "\n", | |
42634 | + MAC_ARG(skb->data)); | |
42635 | + | |
42636 | + return NETFRONT_ACCEL_STATUS_CANT; | |
42637 | + } | |
42638 | + | |
42639 | + /* Check to see if the packet can be sent. */ | |
42640 | + if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) { | |
42641 | + EPRINTK("%s: Packet header is too small\n", __FUNCTION__); | |
42642 | + return NETFRONT_ACCEL_STATUS_CANT; | |
42643 | + } | |
42644 | + | |
42645 | + pkt_eth_hdr = (void*)skb->data; | |
42646 | + pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1); | |
42647 | + | |
42648 | + if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) { | |
42649 | + DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__, | |
42650 | + be16_to_cpu(pkt_eth_hdr->h_proto)); | |
42651 | + return NETFRONT_ACCEL_STATUS_CANT; | |
42652 | + } | |
42653 | + | |
42654 | + if (pkt_ipv4_hdr->protocol != IPPROTO_TCP && | |
42655 | + pkt_ipv4_hdr->protocol != IPPROTO_UDP) { | |
42656 | + DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n", | |
42657 | + __FUNCTION__, pkt_ipv4_hdr->protocol); | |
42658 | + return NETFRONT_ACCEL_STATUS_CANT; | |
42659 | + } | |
42660 | + | |
42661 | + VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len, | |
42662 | + skb_shinfo(skb)->gso_size); | |
42663 | + | |
42664 | + if (skb_shinfo(skb)->gso_size) { | |
42665 | + return netfront_accel_enqueue_skb_tso(vnic, skb); | |
42666 | + } | |
42667 | + | |
42668 | + if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) { | |
42669 | + return netfront_accel_enqueue_skb_single(vnic, skb); | |
42670 | + } | |
42671 | + | |
42672 | + return netfront_accel_enqueue_skb_multi(vnic, skb); | |
42673 | +} | |
42674 | + | |
42675 | + | |
42676 | +/* | |
42677 | + * Copy the data to required end destination. NB. len is the total new | |
42678 | + * length of the socket buffer, not the amount of data to copy | |
42679 | + */ | |
42680 | +inline | |
42681 | +int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb, | |
42682 | + struct netfront_accel_pkt_desc *buf, int len) | |
42683 | +{ | |
42684 | + int i, extra = len - skb->len; | |
42685 | + char c; | |
42686 | + int pkt_stride = vnic->rx_pkt_stride; | |
42687 | + int skb_stride = vnic->rx_skb_stride; | |
42688 | + char *skb_start; | |
42689 | + | |
42690 | + /* | |
42691 | + * This pulls stuff into the cache - have seen performance | |
42692 | + * benefit in this, but disabled by default | |
42693 | + */ | |
42694 | + skb_start = skb->data; | |
42695 | + if (pkt_stride) { | |
42696 | + for (i = 0; i < len; i += pkt_stride) { | |
42697 | + c += ((volatile char*)(buf->pkt_kva))[i]; | |
42698 | + } | |
42699 | + } | |
42700 | + if (skb_stride) { | |
42701 | + for (i = skb->len; i < len ; i += skb_stride) { | |
42702 | + c += ((volatile char*)(skb_start))[i]; | |
42703 | + } | |
42704 | + } | |
42705 | + | |
42706 | + if (skb_tailroom(skb) >= extra) { | |
42707 | + memcpy(skb_put(skb, extra), buf->pkt_kva, extra); | |
42708 | + return 0; | |
42709 | + } | |
42710 | + | |
42711 | + return -ENOSPC; | |
42712 | +} | |
42713 | + | |
42714 | + | |
42715 | +static void discard_jumbo_state(netfront_accel_vnic *vnic) | |
42716 | +{ | |
42717 | + | |
42718 | + if (vnic->jumbo_state.skb != NULL) { | |
42719 | + dev_kfree_skb_any(vnic->jumbo_state.skb); | |
42720 | + | |
42721 | + vnic->jumbo_state.skb = NULL; | |
42722 | + } | |
42723 | + vnic->jumbo_state.in_progress = 0; | |
42724 | +} | |
42725 | + | |
42726 | + | |
42727 | +static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic, | |
42728 | + struct sk_buff *skb) | |
42729 | +{ | |
42730 | + cuckoo_hash_mac_key key; | |
42731 | + unsigned long flags; | |
42732 | + int value; | |
42733 | + struct net_device *net_dev; | |
42734 | + | |
42735 | + | |
42736 | + key = cuckoo_mac_to_key(skb->data + ETH_ALEN); | |
42737 | + | |
42738 | + /* | |
42739 | + * If this is a MAC address that we want to do fast path TX | |
42740 | + * to, and we don't already, add it to the fastpath table. | |
42741 | + * The initial lookup is done without the table lock and so | |
42742 | + * may get the wrong answer, but current opinion is that's not | |
42743 | + * a big problem | |
42744 | + */ | |
42745 | + if (is_valid_ether_addr(skb->data + ETH_ALEN) && | |
42746 | + !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key, | |
42747 | + &value)) { | |
42748 | + spin_lock_irqsave(&vnic->table_lock, flags); | |
42749 | + | |
42750 | + cuckoo_hash_add_check(&vnic->fastpath_table, | |
42751 | + (cuckoo_hash_key *)&key, | |
42752 | + 1, 1); | |
42753 | + | |
42754 | + spin_unlock_irqrestore(&vnic->table_lock, flags); | |
42755 | + } | |
42756 | + | |
42757 | + if (compare_ether_addr(skb->data, vnic->mac)) { | |
42758 | + struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN); | |
42759 | + u16 port; | |
42760 | + | |
42761 | + DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n", | |
42762 | + __FUNCTION__, MAC_ARG(skb->data)); | |
42763 | + | |
42764 | + if (ip->protocol == IPPROTO_TCP) { | |
42765 | + struct tcphdr *tcp = (struct tcphdr *) | |
42766 | + ((char *)ip + 4 * ip->ihl); | |
42767 | + port = tcp->dest; | |
42768 | + } else { | |
42769 | + struct udphdr *udp = (struct udphdr *) | |
42770 | + ((char *)ip + 4 * ip->ihl); | |
42771 | + EPRINTK_ON(ip->protocol != IPPROTO_UDP); | |
42772 | + port = udp->dest; | |
42773 | + } | |
42774 | + | |
42775 | + netfront_accel_msg_tx_fastpath(vnic, skb->data, | |
42776 | + ip->daddr, port, | |
42777 | + ip->protocol); | |
42778 | + } | |
42779 | + | |
42780 | + net_dev = vnic->net_dev; | |
42781 | + skb->dev = net_dev; | |
42782 | + skb->protocol = eth_type_trans(skb, net_dev); | |
42783 | + /* CHECKSUM_UNNECESSARY as hardware has done it already */ | |
42784 | + skb->ip_summed = CHECKSUM_UNNECESSARY; | |
42785 | + | |
42786 | + if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb)) | |
42787 | + netif_receive_skb(skb); | |
42788 | +} | |
42789 | + | |
42790 | + | |
42791 | +static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic, | |
42792 | + ef_event *ev) | |
42793 | +{ | |
42794 | + struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs; | |
42795 | + struct netfront_accel_pkt_desc *buf = NULL; | |
42796 | + struct sk_buff *skb; | |
42797 | + int id, len, sop = 0, cont = 0; | |
42798 | + | |
42799 | + VPRINTK("Rx event.\n"); | |
42800 | + /* | |
42801 | + * Complete the receive operation, and get the request id of | |
42802 | + * the buffer | |
42803 | + */ | |
42804 | + id = ef_vi_receive_done(&vnic->vi, ev); | |
42805 | + | |
42806 | + if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) { | |
42807 | + EPRINTK("Rx packet %d is invalid\n", id); | |
42808 | + /* Carry on round the loop if more events */ | |
42809 | + goto bad_packet; | |
42810 | + } | |
42811 | + /* Get our buffer descriptor */ | |
42812 | + buf = netfront_accel_buf_find(bufinfo, id); | |
42813 | + | |
42814 | + len = EF_EVENT_RX_BYTES(*ev); | |
42815 | + | |
42816 | + /* An RX buffer has been removed from the DMA ring. */ | |
42817 | + vnic->rx_dma_level--; | |
42818 | + | |
42819 | + if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) { | |
42820 | + sop = EF_EVENT_RX_SOP(*ev); | |
42821 | + cont = EF_EVENT_RX_CONT(*ev); | |
42822 | + | |
42823 | + skb = vnic->jumbo_state.skb; | |
42824 | + | |
42825 | + VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n", | |
42826 | + id, len, sop, cont); | |
42827 | + | |
42828 | + if (sop) { | |
42829 | + if (!vnic->jumbo_state.in_progress) { | |
42830 | + vnic->jumbo_state.in_progress = 1; | |
42831 | + BUG_ON(vnic->jumbo_state.skb != NULL); | |
42832 | + } else { | |
42833 | + /* | |
42834 | + * This fragment shows a missing tail in | |
42835 | + * previous one, but is itself possibly OK | |
42836 | + */ | |
42837 | + DPRINTK("sop and in_progress => no tail\n"); | |
42838 | + | |
42839 | + /* Release the socket buffer we already had */ | |
42840 | + discard_jumbo_state(vnic); | |
42841 | + | |
42842 | + /* Now start processing this fragment */ | |
42843 | + vnic->jumbo_state.in_progress = 1; | |
42844 | + skb = NULL; | |
42845 | + } | |
42846 | + } else if (!vnic->jumbo_state.in_progress) { | |
42847 | + DPRINTK("!sop and !in_progress => missing head\n"); | |
42848 | + goto missing_head; | |
42849 | + } | |
42850 | + | |
42851 | + if (!cont) { | |
42852 | + /* Update state for next time */ | |
42853 | + vnic->jumbo_state.in_progress = 0; | |
42854 | + vnic->jumbo_state.skb = NULL; | |
42855 | + } else if (!vnic->jumbo_state.in_progress) { | |
42856 | + DPRINTK("cont and !in_progress => missing head\n"); | |
42857 | + goto missing_head; | |
42858 | + } | |
42859 | + | |
42860 | + if (skb == NULL) { | |
42861 | + BUG_ON(!sop); | |
42862 | + | |
42863 | + if (!cont) | |
42864 | + skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC); | |
42865 | + else | |
42866 | + skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN, | |
42867 | + GFP_ATOMIC); | |
42868 | + | |
42869 | + if (skb == NULL) { | |
42870 | + DPRINTK("%s: Couldn't get an rx skb.\n", | |
42871 | + __FUNCTION__); | |
42872 | + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); | |
42873 | + /* | |
42874 | + * Dropping this fragment means we | |
42875 | + * should discard the rest too | |
42876 | + */ | |
42877 | + discard_jumbo_state(vnic); | |
42878 | + | |
42879 | + /* Carry on round the loop if more events */ | |
42880 | + return 0; | |
42881 | + } | |
42882 | + | |
42883 | + } | |
42884 | + | |
42885 | + /* Copy the data to required end destination */ | |
42886 | + if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) { | |
42887 | + /* | |
42888 | + * No space in the skb - suggests > MTU packet | |
42889 | + * received | |
42890 | + */ | |
42891 | + EPRINTK("%s: Rx packet too large (%d)\n", | |
42892 | + __FUNCTION__, len); | |
42893 | + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); | |
42894 | + discard_jumbo_state(vnic); | |
42895 | + return 0; | |
42896 | + } | |
42897 | + | |
42898 | + /* Put the buffer back in the DMA queue. */ | |
42899 | + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); | |
42900 | + | |
42901 | + if (cont) { | |
42902 | + vnic->jumbo_state.skb = skb; | |
42903 | + | |
42904 | + return 0; | |
42905 | + } else { | |
42906 | + /* Track number of rx fastpath packets */ | |
42907 | + vnic->netdev_stats.fastpath_rx_pkts++; | |
42908 | + vnic->netdev_stats.fastpath_rx_bytes += len; | |
42909 | + | |
42910 | + netfront_accel_vi_rx_complete(vnic, skb); | |
42911 | + | |
42912 | + return 1; | |
42913 | + } | |
42914 | + } else { | |
42915 | + BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD); | |
42916 | + | |
42917 | + if (EF_EVENT_RX_DISCARD_TYPE(*ev) | |
42918 | + == EF_EVENT_RX_DISCARD_TRUNC) { | |
42919 | + DPRINTK("%s: " EF_EVENT_FMT | |
42920 | + " buffer %d FRM_TRUNC q_id %d\n", | |
42921 | + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id, | |
42922 | + EF_EVENT_RX_DISCARD_Q_ID(*ev) ); | |
42923 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc); | |
42924 | + } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) | |
42925 | + == EF_EVENT_RX_DISCARD_OTHER) { | |
42926 | + DPRINTK("%s: " EF_EVENT_FMT | |
42927 | + " buffer %d RX_DISCARD_OTHER q_id %d\n", | |
42928 | + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id, | |
42929 | + EF_EVENT_RX_DISCARD_Q_ID(*ev) ); | |
42930 | + /* | |
42931 | + * Probably tail of packet for which error has | |
42932 | + * already been logged, so don't count in | |
42933 | + * stats | |
42934 | + */ | |
42935 | + } else { | |
42936 | + EPRINTK("%s: " EF_EVENT_FMT | |
42937 | + " buffer %d rx discard type %d q_id %d\n", | |
42938 | + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id, | |
42939 | + EF_EVENT_RX_DISCARD_TYPE(*ev), | |
42940 | + EF_EVENT_RX_DISCARD_Q_ID(*ev) ); | |
42941 | + NETFRONT_ACCEL_STATS_OP(++vnic->stats.bad_event_count); | |
42942 | + } | |
42943 | + } | |
42944 | + | |
42945 | + /* discard type drops through here */ | |
42946 | + | |
42947 | +bad_packet: | |
42948 | + /* Release the socket buffer we already had */ | |
42949 | + discard_jumbo_state(vnic); | |
42950 | + | |
42951 | +missing_head: | |
42952 | + BUG_ON(vnic->jumbo_state.in_progress != 0); | |
42953 | + BUG_ON(vnic->jumbo_state.skb != NULL); | |
42954 | + | |
42955 | + if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) | |
42956 | + /* Put the buffer back in the DMA queue. */ | |
42957 | + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); | |
42958 | + | |
42959 | + vnic->netdev_stats.fastpath_rx_errors++; | |
42960 | + | |
42961 | + DPRINTK("%s experienced bad packet/missing fragment error: %d \n", | |
42962 | + __FUNCTION__, ev->rx.flags); | |
42963 | + | |
42964 | + return 0; | |
42965 | +} | |
42966 | + | |
42967 | + | |
42968 | +static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic) | |
42969 | +{ | |
42970 | + struct netfront_info *np = ((struct netfront_info *) | |
42971 | + netdev_priv(vnic->net_dev)); | |
42972 | + struct sk_buff *skb; | |
42973 | + int handled; | |
42974 | + unsigned long flags; | |
42975 | + | |
42976 | + /* | |
42977 | + * TODO if we could safely check tx_skb == NULL and return | |
42978 | + * early without taking the lock, that would obviously help | |
42979 | + * performance | |
42980 | + */ | |
42981 | + | |
42982 | + /* Take the netfront lock which protects tx_skb. */ | |
42983 | + spin_lock_irqsave(&np->tx_lock, flags); | |
42984 | + if (vnic->tx_skb != NULL) { | |
42985 | + DPRINTK("%s trying to send spare buffer\n", __FUNCTION__); | |
42986 | + | |
42987 | + skb = vnic->tx_skb; | |
42988 | + vnic->tx_skb = NULL; | |
42989 | + | |
42990 | + spin_unlock_irqrestore(&np->tx_lock, flags); | |
42991 | + | |
42992 | + handled = netfront_accel_vi_tx_post(vnic, skb); | |
42993 | + | |
42994 | + spin_lock_irqsave(&np->tx_lock, flags); | |
42995 | + | |
42996 | + if (handled != NETFRONT_ACCEL_STATUS_BUSY) { | |
42997 | + DPRINTK("%s restarting tx\n", __FUNCTION__); | |
42998 | + if (netfront_check_queue_ready(vnic->net_dev)) { | |
42999 | + netif_wake_queue(vnic->net_dev); | |
43000 | + NETFRONT_ACCEL_STATS_OP | |
43001 | + (vnic->stats.queue_wakes++); | |
43002 | + } | |
43003 | + } else { | |
43004 | + vnic->tx_skb = skb; | |
43005 | + } | |
43006 | + | |
43007 | + /* | |
43008 | + * Should never get a CANT, as it checks that before | |
43009 | + * deciding it was BUSY first time round | |
43010 | + */ | |
43011 | + BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT); | |
43012 | + } | |
43013 | + spin_unlock_irqrestore(&np->tx_lock, flags); | |
43014 | +} | |
43015 | + | |
43016 | + | |
43017 | +static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic, | |
43018 | + struct netfront_accel_tso_buffer *tso_buf, | |
43019 | + int is_last) | |
43020 | +{ | |
43021 | + struct netfront_accel_tso_buffer *next; | |
43022 | + | |
43023 | + /* | |
43024 | + * We get a single completion for every call to | |
43025 | + * ef_vi_transmitv so handle any other buffers which are part | |
43026 | + * of the same packet | |
43027 | + */ | |
43028 | + while (tso_buf != NULL) { | |
43029 | + if (tso_buf->buf->skb != NULL) { | |
43030 | + dev_kfree_skb_any(tso_buf->buf->skb); | |
43031 | + tso_buf->buf->skb = NULL; | |
43032 | + } | |
43033 | + | |
43034 | + next = tso_buf->next; | |
43035 | + | |
43036 | + netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id); | |
43037 | + | |
43038 | + tso_buf = next; | |
43039 | + } | |
43040 | + | |
43041 | + /* | |
43042 | + * If this was the last one in the batch, we try and send any | |
43043 | + * pending tx_skb. There should now be buffers and | |
43044 | + * descriptors | |
43045 | + */ | |
43046 | + if (is_last) | |
43047 | + netfront_accel_vi_not_busy(vnic); | |
43048 | +} | |
43049 | + | |
43050 | + | |
43051 | +static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic, | |
43052 | + ef_event *ev) | |
43053 | +{ | |
43054 | + struct netfront_accel_pkt_desc *buf; | |
43055 | + struct netfront_accel_tso_buffer *tso_buf; | |
43056 | + ef_request_id ids[EF_VI_TRANSMIT_BATCH]; | |
43057 | + int i, n_ids; | |
43058 | + unsigned long flags; | |
43059 | + | |
43060 | + /* Get the request ids for this tx completion event. */ | |
43061 | + n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids); | |
43062 | + | |
43063 | + /* Take the tx buffer spin lock and hold for the duration */ | |
43064 | + spin_lock_irqsave(&vnic->tx_lock, flags); | |
43065 | + | |
43066 | + for (i = 0; i < n_ids; ++i) { | |
43067 | + VPRINTK("Tx packet %d complete\n", ids[i]); | |
43068 | + buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]); | |
43069 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++); | |
43070 | + | |
43071 | + tso_buf = (struct netfront_accel_tso_buffer *) | |
43072 | + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH); | |
43073 | + BUG_ON(tso_buf->buf != buf); | |
43074 | + | |
43075 | + netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1)); | |
43076 | + } | |
43077 | + | |
43078 | + spin_unlock_irqrestore(&vnic->tx_lock, flags); | |
43079 | +} | |
43080 | + | |
43081 | + | |
43082 | +int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets) | |
43083 | +{ | |
43084 | + ef_event ev[ACCEL_VI_POLL_EVENTS]; | |
43085 | + int rx_remain = rx_packets, rc, events, i; | |
43086 | +#if NETFRONT_ACCEL_STATS | |
43087 | + int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0; | |
43088 | +#endif | |
43089 | + BUG_ON(rx_packets <= 0); | |
43090 | + | |
43091 | + events = ef_eventq_poll(&vnic->vi, ev, | |
43092 | + min(rx_remain, ACCEL_VI_POLL_EVENTS)); | |
43093 | + i = 0; | |
43094 | + NETFRONT_ACCEL_STATS_OP(n_evs_polled += events); | |
43095 | + | |
43096 | + VPRINTK("%s: %d events\n", __FUNCTION__, events); | |
43097 | + | |
43098 | + /* Loop over each event */ | |
43099 | + while (events) { | |
43100 | + VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__, | |
43101 | + EF_EVENT_PRI_ARG(ev[i]), | |
43102 | + (unsigned long)(vnic->vi.evq_state->evq_ptr)); | |
43103 | + | |
43104 | + if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) || | |
43105 | + (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) { | |
43106 | + rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]); | |
43107 | + rx_remain -= rc; | |
43108 | + BUG_ON(rx_remain < 0); | |
43109 | + NETFRONT_ACCEL_STATS_OP(rx_evs_polled++); | |
43110 | + } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) { | |
43111 | + netfront_accel_vi_poll_process_tx(vnic, &ev[i]); | |
43112 | + NETFRONT_ACCEL_STATS_OP(tx_evs_polled++); | |
43113 | + } else if (EF_EVENT_TYPE(ev[i]) == | |
43114 | + EF_EVENT_TYPE_RX_NO_DESC_TRUNC) { | |
43115 | + DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n", | |
43116 | + __FUNCTION__, EF_EVENT_PRI_ARG(ev[i])); | |
43117 | + discard_jumbo_state(vnic); | |
43118 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++); | |
43119 | + } else { | |
43120 | + EPRINTK("Unexpected event " EF_EVENT_FMT "\n", | |
43121 | + EF_EVENT_PRI_ARG(ev[i])); | |
43122 | + NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++); | |
43123 | + } | |
43124 | + | |
43125 | + i++; | |
43126 | + | |
43127 | + /* Carry on round the loop if more events and more space */ | |
43128 | + if (i == events) { | |
43129 | + if (rx_remain == 0) | |
43130 | + break; | |
43131 | + | |
43132 | + events = ef_eventq_poll(&vnic->vi, ev, | |
43133 | + min(rx_remain, | |
43134 | + ACCEL_VI_POLL_EVENTS)); | |
43135 | + i = 0; | |
43136 | + NETFRONT_ACCEL_STATS_OP(n_evs_polled += events); | |
43137 | + } | |
43138 | + } | |
43139 | + | |
43140 | +#if NETFRONT_ACCEL_STATS | |
43141 | + vnic->stats.event_count += n_evs_polled; | |
43142 | + vnic->stats.event_count_since_irq += n_evs_polled; | |
43143 | + if (n_evs_polled > vnic->stats.events_per_poll_max) | |
43144 | + vnic->stats.events_per_poll_max = n_evs_polled; | |
43145 | + if (rx_evs_polled > vnic->stats.events_per_poll_rx_max) | |
43146 | + vnic->stats.events_per_poll_rx_max = rx_evs_polled; | |
43147 | + if (tx_evs_polled > vnic->stats.events_per_poll_tx_max) | |
43148 | + vnic->stats.events_per_poll_tx_max = tx_evs_polled; | |
43149 | +#endif | |
43150 | + | |
43151 | + return rx_packets - rx_remain; | |
43152 | +} | |
43153 | + | |
43154 | + | |
43155 | +int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic) | |
43156 | +{ | |
43157 | + u32 sw_evq_ptr; | |
43158 | + | |
43159 | + VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state); | |
43160 | + | |
43161 | + BUG_ON(vnic == NULL); | |
43162 | + BUG_ON(vnic->vi.evq_state == NULL); | |
43163 | + | |
43164 | + /* Do a quick check for an event. */ | |
43165 | + if (ef_eventq_has_event(&vnic->vi)) { | |
43166 | + VPRINTK("%s: found event\n", __FUNCTION__); | |
43167 | + return 0; | |
43168 | + } | |
43169 | + | |
43170 | + VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n", | |
43171 | + vnic->evq_state.evq_ptr, vnic->vi.evq_mask); | |
43172 | + | |
43173 | + /* Request a wakeup from the hardware. */ | |
43174 | + sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask; | |
43175 | + | |
43176 | + BUG_ON(vnic->hw.falcon.evq_rptr == NULL); | |
43177 | + | |
43178 | + VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr, | |
43179 | + vnic->hw.falcon.evq_rptr); | |
43180 | + *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3); | |
43181 | + | |
43182 | + return 1; | |
43183 | +} | |
43184 | Index: head-2008-11-25/drivers/xen/sfc_netfront/accel_xenbus.c | |
43185 | =================================================================== | |
43186 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
43187 | +++ head-2008-11-25/drivers/xen/sfc_netfront/accel_xenbus.c 2008-02-20 09:32:49.000000000 +0100 | |
43188 | @@ -0,0 +1,776 @@ | |
43189 | +/**************************************************************************** | |
43190 | + * Solarflare driver for Xen network acceleration | |
43191 | + * | |
43192 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
43193 | + * 9501 Jeronimo Road, Suite 250, | |
43194 | + * Irvine, CA 92618, USA | |
43195 | + * | |
43196 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
43197 | + * | |
43198 | + * This program is free software; you can redistribute it and/or modify it | |
43199 | + * under the terms of the GNU General Public License version 2 as published | |
43200 | + * by the Free Software Foundation, incorporated herein by reference. | |
43201 | + * | |
43202 | + * This program is distributed in the hope that it will be useful, | |
43203 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
43204 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
43205 | + * GNU General Public License for more details. | |
43206 | + * | |
43207 | + * You should have received a copy of the GNU General Public License | |
43208 | + * along with this program; if not, write to the Free Software | |
43209 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
43210 | + **************************************************************************** | |
43211 | + */ | |
43212 | + | |
43213 | +#include <linux/stddef.h> | |
43214 | +#include <linux/errno.h> | |
43215 | + | |
43216 | +#include <xen/xenbus.h> | |
43217 | +#include <xen/evtchn.h> | |
43218 | +#include <xen/gnttab.h> | |
43219 | + | |
43220 | +#include "accel.h" | |
43221 | +#include "accel_util.h" | |
43222 | +#include "accel_msg_iface.h" | |
43223 | +#include "accel_bufs.h" | |
43224 | +#include "accel_ssr.h" | |
43225 | +/* drivers/xen/netfront/netfront.h */ | |
43226 | +#include "netfront.h" | |
43227 | + | |
43228 | +void netfront_accel_set_closing(netfront_accel_vnic *vnic) | |
43229 | +{ | |
43230 | + | |
43231 | + vnic->frontend_state = XenbusStateClosing; | |
43232 | + net_accel_update_state(vnic->dev, XenbusStateClosing); | |
43233 | +} | |
43234 | + | |
43235 | + | |
43236 | +static void mac_address_change(struct xenbus_watch *watch, | |
43237 | + const char **vec, unsigned int len) | |
43238 | +{ | |
43239 | + netfront_accel_vnic *vnic; | |
43240 | + struct xenbus_device *dev; | |
43241 | + int rc; | |
43242 | + | |
43243 | + DPRINTK("%s\n", __FUNCTION__); | |
43244 | + | |
43245 | + vnic = container_of(watch, netfront_accel_vnic, | |
43246 | + mac_address_watch); | |
43247 | + dev = vnic->dev; | |
43248 | + | |
43249 | + rc = net_accel_xen_net_read_mac(dev, vnic->mac); | |
43250 | + | |
43251 | + if (rc != 0) | |
43252 | + EPRINTK("%s: failed to read mac (%d)\n", __FUNCTION__, rc); | |
43253 | +} | |
43254 | + | |
43255 | + | |
43256 | +static int setup_mac_address_watch(struct xenbus_device *dev, | |
43257 | + netfront_accel_vnic *vnic) | |
43258 | +{ | |
43259 | + int err; | |
43260 | + | |
43261 | + DPRINTK("Setting watch on %s/%s\n", dev->nodename, "mac"); | |
43262 | + | |
43263 | + err = xenbus_watch_path2(dev, dev->nodename, "mac", | |
43264 | + &vnic->mac_address_watch, | |
43265 | + mac_address_change); | |
43266 | + if (err) { | |
43267 | + EPRINTK("%s: Failed to register xenbus watch: %d\n", | |
43268 | + __FUNCTION__, err); | |
43269 | + goto fail; | |
43270 | + } | |
43271 | + | |
43272 | + return 0; | |
43273 | + fail: | |
43274 | + vnic->mac_address_watch.node = NULL; | |
43275 | + return err; | |
43276 | +} | |
43277 | + | |
43278 | + | |
43279 | +/* Grant access to some pages and publish through xenbus */ | |
43280 | +static int make_named_grant(struct xenbus_device *dev, void *page, | |
43281 | + const char *name, grant_ref_t *gnt_ref) | |
43282 | +{ | |
43283 | + struct xenbus_transaction tr; | |
43284 | + int err; | |
43285 | + grant_ref_t gnt; | |
43286 | + | |
43287 | + gnt = net_accel_grant_page(dev, virt_to_mfn(page), 0); | |
43288 | + if (gnt < 0) | |
43289 | + return gnt; | |
43290 | + | |
43291 | + do { | |
43292 | + err = xenbus_transaction_start(&tr); | |
43293 | + if (err != 0) { | |
43294 | + EPRINTK("%s: transaction start failed %d\n", | |
43295 | + __FUNCTION__, err); | |
43296 | + return err; | |
43297 | + } | |
43298 | + err = xenbus_printf(tr, dev->nodename, name, "%d", gnt); | |
43299 | + if (err != 0) { | |
43300 | + EPRINTK("%s: xenbus_printf failed %d\n", __FUNCTION__, | |
43301 | + err); | |
43302 | + xenbus_transaction_end(tr, 1); | |
43303 | + return err; | |
43304 | + } | |
43305 | + err = xenbus_transaction_end(tr, 0); | |
43306 | + } while (err == -EAGAIN); | |
43307 | + | |
43308 | + if (err != 0) { | |
43309 | + EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err); | |
43310 | + return err; | |
43311 | + } | |
43312 | + | |
43313 | + *gnt_ref = gnt; | |
43314 | + | |
43315 | + return 0; | |
43316 | +} | |
43317 | + | |
43318 | + | |
43319 | +static int remove_named_grant(struct xenbus_device *dev, | |
43320 | + const char *name, grant_ref_t gnt_ref) | |
43321 | +{ | |
43322 | + struct xenbus_transaction tr; | |
43323 | + int err; | |
43324 | + | |
43325 | + net_accel_ungrant_page(gnt_ref); | |
43326 | + | |
43327 | + do { | |
43328 | + err = xenbus_transaction_start(&tr); | |
43329 | + if (err != 0) { | |
43330 | + EPRINTK("%s: transaction start failed %d\n", | |
43331 | + __FUNCTION__, err); | |
43332 | + return err; | |
43333 | + } | |
43334 | + err = xenbus_rm(tr, dev->nodename, name); | |
43335 | + if (err != 0) { | |
43336 | + EPRINTK("%s: xenbus_rm failed %d\n", __FUNCTION__, | |
43337 | + err); | |
43338 | + xenbus_transaction_end(tr, 1); | |
43339 | + return err; | |
43340 | + } | |
43341 | + err = xenbus_transaction_end(tr, 0); | |
43342 | + } while (err == -EAGAIN); | |
43343 | + | |
43344 | + if (err != 0) { | |
43345 | + EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err); | |
43346 | + return err; | |
43347 | + } | |
43348 | + | |
43349 | + return 0; | |
43350 | +} | |
43351 | + | |
43352 | + | |
43353 | +static | |
43354 | +netfront_accel_vnic *netfront_accel_vnic_ctor(struct net_device *net_dev, | |
43355 | + struct xenbus_device *dev) | |
43356 | +{ | |
43357 | + struct netfront_info *np = | |
43358 | + (struct netfront_info *)netdev_priv(net_dev); | |
43359 | + netfront_accel_vnic *vnic; | |
43360 | + int err; | |
43361 | + | |
43362 | + /* | |
43363 | + * A bug in earlier versions of Xen accel plugin system meant | |
43364 | + * you could be probed twice for the same device on suspend | |
43365 | + * cancel. Be tolerant of that. | |
43366 | + */ | |
43367 | + if (np->accel_priv != NULL) | |
43368 | + return ERR_PTR(-EALREADY); | |
43369 | + | |
43370 | + /* Alloc mem for state */ | |
43371 | + vnic = kzalloc(sizeof(netfront_accel_vnic), GFP_KERNEL); | |
43372 | + if (vnic == NULL) { | |
43373 | + EPRINTK("%s: no memory for vnic state\n", __FUNCTION__); | |
43374 | + return ERR_PTR(-ENOMEM); | |
43375 | + } | |
43376 | + | |
43377 | + spin_lock_init(&vnic->tx_lock); | |
43378 | + | |
43379 | + mutex_init(&vnic->vnic_mutex); | |
43380 | + mutex_lock(&vnic->vnic_mutex); | |
43381 | + | |
43382 | + /* Store so state can be retrieved from device */ | |
43383 | + BUG_ON(np->accel_priv != NULL); | |
43384 | + np->accel_priv = vnic; | |
43385 | + vnic->dev = dev; | |
43386 | + vnic->net_dev = net_dev; | |
43387 | + spin_lock_init(&vnic->irq_enabled_lock); | |
43388 | + netfront_accel_ssr_init(&vnic->ssr_state); | |
43389 | + | |
43390 | + init_waitqueue_head(&vnic->state_wait_queue); | |
43391 | + vnic->backend_state = XenbusStateUnknown; | |
43392 | + vnic->frontend_state = XenbusStateClosed; | |
43393 | + vnic->removing = 0; | |
43394 | + vnic->domU_state_is_setup = 0; | |
43395 | + vnic->dom0_state_is_setup = 0; | |
43396 | + vnic->poll_enabled = 0; | |
43397 | + vnic->tx_enabled = 0; | |
43398 | + vnic->tx_skb = NULL; | |
43399 | + | |
43400 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
43401 | + INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend); | |
43402 | +#else | |
43403 | + INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend, vnic); | |
43404 | +#endif | |
43405 | + | |
43406 | + netfront_accel_debugfs_create(vnic); | |
43407 | + | |
43408 | + mutex_unlock(&vnic->vnic_mutex); | |
43409 | + | |
43410 | + err = net_accel_xen_net_read_mac(dev, vnic->mac); | |
43411 | + if (err) | |
43412 | + goto fail_mac; | |
43413 | + | |
43414 | + /* Setup a watch on the frontend's MAC address */ | |
43415 | + err = setup_mac_address_watch(dev, vnic); | |
43416 | + if (err) | |
43417 | + goto fail_mac; | |
43418 | + | |
43419 | + return vnic; | |
43420 | + | |
43421 | +fail_mac: | |
43422 | + | |
43423 | + mutex_lock(&vnic->vnic_mutex); | |
43424 | + | |
43425 | + netfront_accel_debugfs_remove(vnic); | |
43426 | + | |
43427 | + netfront_accel_ssr_fini(vnic, &vnic->ssr_state); | |
43428 | + | |
43429 | + EPRINTK_ON(vnic->tx_skb != NULL); | |
43430 | + | |
43431 | + vnic->frontend_state = XenbusStateUnknown; | |
43432 | + net_accel_update_state(dev, XenbusStateUnknown); | |
43433 | + | |
43434 | + mutex_unlock(&vnic->vnic_mutex); | |
43435 | + | |
43436 | + np->accel_priv = NULL; | |
43437 | + kfree(vnic); | |
43438 | + | |
43439 | + return ERR_PTR(err); | |
43440 | +} | |
43441 | + | |
43442 | + | |
43443 | +static void netfront_accel_vnic_dtor(netfront_accel_vnic *vnic) | |
43444 | +{ | |
43445 | + struct net_device *net_dev = vnic->net_dev; | |
43446 | + struct netfront_info *np = | |
43447 | + (struct netfront_info *)netdev_priv(net_dev); | |
43448 | + | |
43449 | + /* | |
43450 | + * Now we don't hold the lock any more it is safe to remove | |
43451 | + * this watch and synchonrise with the completion of | |
43452 | + * watches | |
43453 | + */ | |
43454 | + DPRINTK("%s: unregistering xenbus mac watch\n", __FUNCTION__); | |
43455 | + unregister_xenbus_watch(&vnic->mac_address_watch); | |
43456 | + kfree(vnic->mac_address_watch.node); | |
43457 | + | |
43458 | + flush_workqueue(netfront_accel_workqueue); | |
43459 | + | |
43460 | + mutex_lock(&vnic->vnic_mutex); | |
43461 | + | |
43462 | + netfront_accel_debugfs_remove(vnic); | |
43463 | + | |
43464 | + netfront_accel_ssr_fini(vnic, &vnic->ssr_state); | |
43465 | + | |
43466 | + EPRINTK_ON(vnic->tx_skb != NULL); | |
43467 | + | |
43468 | + vnic->frontend_state = XenbusStateUnknown; | |
43469 | + net_accel_update_state(vnic->dev, XenbusStateUnknown); | |
43470 | + | |
43471 | + mutex_unlock(&vnic->vnic_mutex); | |
43472 | + | |
43473 | + np->accel_priv = NULL; | |
43474 | + kfree(vnic); | |
43475 | +} | |
43476 | + | |
43477 | + | |
43478 | +static int vnic_setup_domU_shared_state(struct xenbus_device *dev, | |
43479 | + netfront_accel_vnic *vnic) | |
43480 | +{ | |
43481 | + struct xenbus_transaction tr; | |
43482 | + int err; | |
43483 | + int msgs_per_queue; | |
43484 | + | |
43485 | + | |
43486 | + DPRINTK("Setting up domU shared state.\n"); | |
43487 | + | |
43488 | + msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg); | |
43489 | + | |
43490 | + /* Allocate buffer state */ | |
43491 | + vnic->tx_bufs = netfront_accel_init_bufs(&vnic->tx_lock); | |
43492 | + if (vnic->tx_bufs == NULL) { | |
43493 | + err = -ENOMEM; | |
43494 | + EPRINTK("%s: Failed to allocate tx buffers\n", __FUNCTION__); | |
43495 | + goto fail_tx_bufs; | |
43496 | + } | |
43497 | + | |
43498 | + vnic->rx_bufs = netfront_accel_init_bufs(NULL); | |
43499 | + if (vnic->rx_bufs == NULL) { | |
43500 | + err = -ENOMEM; | |
43501 | + EPRINTK("%s: Failed to allocate rx buffers\n", __FUNCTION__); | |
43502 | + goto fail_rx_bufs; | |
43503 | + } | |
43504 | + | |
43505 | + /* | |
43506 | + * This allocates two pages, one for the shared page and one | |
43507 | + * for the message queue. | |
43508 | + */ | |
43509 | + vnic->shared_page = (struct net_accel_shared_page *) | |
43510 | + __get_free_pages(GFP_KERNEL, 1); | |
43511 | + if (vnic->shared_page == NULL) { | |
43512 | + EPRINTK("%s: no memory for shared pages\n", __FUNCTION__); | |
43513 | + err = -ENOMEM; | |
43514 | + goto fail_shared_page; | |
43515 | + } | |
43516 | + | |
43517 | + net_accel_msg_init_queue | |
43518 | + (&vnic->from_dom0, &vnic->shared_page->queue0, | |
43519 | + (struct net_accel_msg *)((u8*)vnic->shared_page + PAGE_SIZE), | |
43520 | + msgs_per_queue); | |
43521 | + | |
43522 | + net_accel_msg_init_queue | |
43523 | + (&vnic->to_dom0, &vnic->shared_page->queue1, | |
43524 | + (struct net_accel_msg *)((u8*)vnic->shared_page + | |
43525 | + (3 * PAGE_SIZE / 2)), | |
43526 | + msgs_per_queue); | |
43527 | + | |
43528 | + vnic->msg_state = NETFRONT_ACCEL_MSG_NONE; | |
43529 | + | |
43530 | + err = make_named_grant(dev, vnic->shared_page, "accel-ctrl-page", | |
43531 | + &vnic->ctrl_page_gnt); | |
43532 | + if (err) { | |
43533 | + EPRINTK("couldn't make ctrl-page named grant\n"); | |
43534 | + goto fail_ctrl_page_grant; | |
43535 | + } | |
43536 | + | |
43537 | + err = make_named_grant(dev, (u8*)vnic->shared_page + PAGE_SIZE, | |
43538 | + "accel-msg-page", &vnic->msg_page_gnt); | |
43539 | + if (err) { | |
43540 | + EPRINTK("couldn't make msg-page named grant\n"); | |
43541 | + goto fail_msg_page_grant; | |
43542 | + } | |
43543 | + | |
43544 | + /* Create xenbus msg event channel */ | |
43545 | + err = bind_listening_port_to_irqhandler | |
43546 | + (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend, | |
43547 | + SA_SAMPLE_RANDOM, "vnicctrl", vnic); | |
43548 | + if (err < 0) { | |
43549 | + EPRINTK("Couldn't bind msg event channel\n"); | |
43550 | + goto fail_msg_irq; | |
43551 | + } | |
43552 | + vnic->msg_channel_irq = err; | |
43553 | + vnic->msg_channel = irq_to_evtchn_port(vnic->msg_channel_irq); | |
43554 | + | |
43555 | + /* Create xenbus net event channel */ | |
43556 | + err = bind_listening_port_to_irqhandler | |
43557 | + (dev->otherend_id, netfront_accel_net_channel_irq_from_bend, | |
43558 | + SA_SAMPLE_RANDOM, "vnicfront", vnic); | |
43559 | + if (err < 0) { | |
43560 | + EPRINTK("Couldn't bind net event channel\n"); | |
43561 | + goto fail_net_irq; | |
43562 | + } | |
43563 | + vnic->net_channel_irq = err; | |
43564 | + vnic->net_channel = irq_to_evtchn_port(vnic->net_channel_irq); | |
43565 | + /* Want to ensure we don't get interrupts before we're ready */ | |
43566 | + netfront_accel_disable_net_interrupts(vnic); | |
43567 | + | |
43568 | + DPRINTK("otherend %d has msg ch %u (%u) and net ch %u (%u)\n", | |
43569 | + dev->otherend_id, vnic->msg_channel, vnic->msg_channel_irq, | |
43570 | + vnic->net_channel, vnic->net_channel_irq); | |
43571 | + | |
43572 | + do { | |
43573 | + err = xenbus_transaction_start(&tr); | |
43574 | + if (err != 0) { | |
43575 | + EPRINTK("%s: Transaction start failed %d\n", | |
43576 | + __FUNCTION__, err); | |
43577 | + goto fail_transaction; | |
43578 | + } | |
43579 | + | |
43580 | + err = xenbus_printf(tr, dev->nodename, "accel-msg-channel", | |
43581 | + "%u", vnic->msg_channel); | |
43582 | + if (err != 0) { | |
43583 | + EPRINTK("%s: event channel xenbus write failed %d\n", | |
43584 | + __FUNCTION__, err); | |
43585 | + xenbus_transaction_end(tr, 1); | |
43586 | + goto fail_transaction; | |
43587 | + } | |
43588 | + | |
43589 | + err = xenbus_printf(tr, dev->nodename, "accel-net-channel", | |
43590 | + "%u", vnic->net_channel); | |
43591 | + if (err != 0) { | |
43592 | + EPRINTK("%s: net channel xenbus write failed %d\n", | |
43593 | + __FUNCTION__, err); | |
43594 | + xenbus_transaction_end(tr, 1); | |
43595 | + goto fail_transaction; | |
43596 | + } | |
43597 | + | |
43598 | + err = xenbus_transaction_end(tr, 0); | |
43599 | + } while (err == -EAGAIN); | |
43600 | + | |
43601 | + if (err != 0) { | |
43602 | + EPRINTK("%s: Transaction end failed %d\n", __FUNCTION__, err); | |
43603 | + goto fail_transaction; | |
43604 | + } | |
43605 | + | |
43606 | + DPRINTK("Completed setting up domU shared state\n"); | |
43607 | + | |
43608 | + return 0; | |
43609 | + | |
43610 | +fail_transaction: | |
43611 | + | |
43612 | + unbind_from_irqhandler(vnic->net_channel_irq, vnic); | |
43613 | +fail_net_irq: | |
43614 | + | |
43615 | + unbind_from_irqhandler(vnic->msg_channel_irq, vnic); | |
43616 | +fail_msg_irq: | |
43617 | + | |
43618 | + remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt); | |
43619 | +fail_msg_page_grant: | |
43620 | + | |
43621 | + remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt); | |
43622 | +fail_ctrl_page_grant: | |
43623 | + | |
43624 | + free_pages((unsigned long)vnic->shared_page, 1); | |
43625 | + vnic->shared_page = NULL; | |
43626 | +fail_shared_page: | |
43627 | + | |
43628 | + netfront_accel_fini_bufs(vnic->rx_bufs); | |
43629 | +fail_rx_bufs: | |
43630 | + | |
43631 | + netfront_accel_fini_bufs(vnic->tx_bufs); | |
43632 | +fail_tx_bufs: | |
43633 | + | |
43634 | + /* Undo the memory allocation created when we got the HELLO */ | |
43635 | + netfront_accel_free_buffer_mem(&vnic->bufpages, | |
43636 | + vnic->rx_bufs, | |
43637 | + vnic->tx_bufs); | |
43638 | + | |
43639 | + DPRINTK("Failed to setup domU shared state with code %d\n", err); | |
43640 | + | |
43641 | + return err; | |
43642 | +} | |
43643 | + | |
43644 | + | |
43645 | +static void vnic_remove_domU_shared_state(struct xenbus_device *dev, | |
43646 | + netfront_accel_vnic *vnic) | |
43647 | +{ | |
43648 | + struct xenbus_transaction tr; | |
43649 | + | |
43650 | + /* | |
43651 | + * Don't remove any watches because we currently hold the | |
43652 | + * mutex and the watches take the mutex. | |
43653 | + */ | |
43654 | + | |
43655 | + DPRINTK("%s: removing event channel irq handlers %d %d\n", | |
43656 | + __FUNCTION__, vnic->net_channel_irq, vnic->msg_channel_irq); | |
43657 | + do { | |
43658 | + if (xenbus_transaction_start(&tr) != 0) | |
43659 | + break; | |
43660 | + xenbus_rm(tr, dev->nodename, "accel-msg-channel"); | |
43661 | + xenbus_rm(tr, dev->nodename, "accel-net-channel"); | |
43662 | + } while (xenbus_transaction_end(tr, 0) == -EAGAIN); | |
43663 | + | |
43664 | + unbind_from_irqhandler(vnic->net_channel_irq, vnic); | |
43665 | + unbind_from_irqhandler(vnic->msg_channel_irq, vnic); | |
43666 | + | |
43667 | + /* ungrant pages for msg channel */ | |
43668 | + remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt); | |
43669 | + remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt); | |
43670 | + free_pages((unsigned long)vnic->shared_page, 1); | |
43671 | + vnic->shared_page = NULL; | |
43672 | + | |
43673 | + /* ungrant pages for buffers, and free buffer memory */ | |
43674 | + netfront_accel_free_buffer_mem(&vnic->bufpages, | |
43675 | + vnic->rx_bufs, | |
43676 | + vnic->tx_bufs); | |
43677 | + netfront_accel_fini_bufs(vnic->rx_bufs); | |
43678 | + netfront_accel_fini_bufs(vnic->tx_bufs); | |
43679 | +} | |
43680 | + | |
43681 | + | |
43682 | +static void vnic_setup_dom0_shared_state(struct xenbus_device *dev, | |
43683 | + netfront_accel_vnic *vnic) | |
43684 | +{ | |
43685 | + DPRINTK("Setting up dom0 shared state\n"); | |
43686 | + | |
43687 | + netfront_accel_vi_ctor(vnic); | |
43688 | + | |
43689 | + /* | |
43690 | + * Message processing will be enabled when this function | |
43691 | + * returns, but we might have missed an interrupt. Schedule a | |
43692 | + * check just in case. | |
43693 | + */ | |
43694 | + queue_work(netfront_accel_workqueue, &vnic->msg_from_bend); | |
43695 | +} | |
43696 | + | |
43697 | + | |
43698 | +static void vnic_remove_dom0_shared_state(struct xenbus_device *dev, | |
43699 | + netfront_accel_vnic *vnic) | |
43700 | +{ | |
43701 | + DPRINTK("Removing dom0 shared state\n"); | |
43702 | + | |
43703 | + vnic_stop_fastpath(vnic); | |
43704 | + | |
43705 | + netfront_accel_vi_dtor(vnic); | |
43706 | +} | |
43707 | + | |
43708 | + | |
43709 | +/*************************************************************************/ | |
43710 | + | |
43711 | +/* | |
43712 | + * The following code handles accelstate changes between the frontend | |
43713 | + * and the backend. In response to transitions, calls the following | |
43714 | + * functions in matching pairs: | |
43715 | + * | |
43716 | + * vnic_setup_domU_shared_state | |
43717 | + * vnic_remove_domU_shared_state | |
43718 | + * | |
43719 | + * vnic_setup_dom0_shared_state | |
43720 | + * vnic_remove_dom0_shared_state | |
43721 | + * | |
43722 | + * Valid state transitions for DomU are as follows: | |
43723 | + * | |
43724 | + * Closed->Init on probe or in response to Init from dom0 | |
43725 | + * | |
43726 | + * Init->Connected in response to Init from dom0 | |
43727 | + * Init->Closing on error providing dom0 is in Init | |
43728 | + * Init->Closed on remove or in response to Closing from dom0 | |
43729 | + * | |
43730 | + * Connected->Closing on error/remove | |
43731 | + * Connected->Closed in response to Closing from dom0 | |
43732 | + * | |
43733 | + * Closing->Closed in response to Closing from dom0 | |
43734 | + * | |
43735 | + */ | |
43736 | + | |
43737 | + | |
43738 | +/* Function to deal with Xenbus accel state change in backend */ | |
43739 | +static void netfront_accel_backend_accel_changed(netfront_accel_vnic *vnic, | |
43740 | + XenbusState backend_state) | |
43741 | +{ | |
43742 | + struct xenbus_device *dev = vnic->dev; | |
43743 | + XenbusState frontend_state; | |
43744 | + int state; | |
43745 | + | |
43746 | + DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n", | |
43747 | + __FUNCTION__, xenbus_strstate(vnic->backend_state), | |
43748 | + xenbus_strstate(backend_state), dev->nodename, dev->otherend); | |
43749 | + | |
43750 | + /* | |
43751 | + * Ignore duplicate state changes. This can happen if the | |
43752 | + * backend changes state twice in quick succession and the | |
43753 | + * first watch fires in the frontend after the second | |
43754 | + * transition has completed. | |
43755 | + */ | |
43756 | + if (vnic->backend_state == backend_state) | |
43757 | + return; | |
43758 | + | |
43759 | + vnic->backend_state = backend_state; | |
43760 | + frontend_state = vnic->frontend_state; | |
43761 | + | |
43762 | + switch (backend_state) { | |
43763 | + case XenbusStateInitialising: | |
43764 | + /* | |
43765 | + * It's possible for us to miss the closed state from | |
43766 | + * dom0, so do the work here. | |
43767 | + */ | |
43768 | + if (vnic->domU_state_is_setup) { | |
43769 | + vnic_remove_domU_shared_state(dev, vnic); | |
43770 | + vnic->domU_state_is_setup = 0; | |
43771 | + } | |
43772 | + | |
43773 | + if (frontend_state != XenbusStateInitialising) { | |
43774 | + /* Make sure the backend doesn't go away. */ | |
43775 | + frontend_state = XenbusStateInitialising; | |
43776 | + net_accel_update_state(dev, frontend_state); | |
43777 | + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state); | |
43778 | + backend_state = (XenbusState)state; | |
43779 | + if (backend_state != XenbusStateInitialising) | |
43780 | + break; | |
43781 | + } | |
43782 | + | |
43783 | + /* Start the new connection. */ | |
43784 | + if (!vnic->removing) { | |
43785 | + BUG_ON(vnic->domU_state_is_setup); | |
43786 | + if (vnic_setup_domU_shared_state(dev, vnic) == 0) { | |
43787 | + vnic->domU_state_is_setup = 1; | |
43788 | + frontend_state = XenbusStateConnected; | |
43789 | + } else | |
43790 | + frontend_state = XenbusStateClosing; | |
43791 | + } | |
43792 | + break; | |
43793 | + case XenbusStateConnected: | |
43794 | + if (vnic->domU_state_is_setup && | |
43795 | + !vnic->dom0_state_is_setup) { | |
43796 | + vnic_setup_dom0_shared_state(dev, vnic); | |
43797 | + vnic->dom0_state_is_setup = 1; | |
43798 | + } | |
43799 | + break; | |
43800 | + default: | |
43801 | + case XenbusStateClosing: | |
43802 | + if (vnic->dom0_state_is_setup) { | |
43803 | + vnic_remove_dom0_shared_state(dev, vnic); | |
43804 | + vnic->dom0_state_is_setup = 0; | |
43805 | + } | |
43806 | + frontend_state = XenbusStateClosed; | |
43807 | + break; | |
43808 | + case XenbusStateUnknown: | |
43809 | + case XenbusStateClosed: | |
43810 | + if (vnic->domU_state_is_setup) { | |
43811 | + vnic_remove_domU_shared_state(dev, vnic); | |
43812 | + vnic->domU_state_is_setup = 0; | |
43813 | + } | |
43814 | + break; | |
43815 | + } | |
43816 | + | |
43817 | + if (frontend_state != vnic->frontend_state) { | |
43818 | + DPRINTK("Switching from state %s (%d) to %s (%d)\n", | |
43819 | + xenbus_strstate(vnic->frontend_state), | |
43820 | + vnic->frontend_state, | |
43821 | + xenbus_strstate(frontend_state), frontend_state); | |
43822 | + vnic->frontend_state = frontend_state; | |
43823 | + net_accel_update_state(dev, frontend_state); | |
43824 | + } | |
43825 | + | |
43826 | + wake_up(&vnic->state_wait_queue); | |
43827 | +} | |
43828 | + | |
43829 | + | |
43830 | +static void backend_accel_state_change(struct xenbus_watch *watch, | |
43831 | + const char **vec, unsigned int len) | |
43832 | +{ | |
43833 | + int state; | |
43834 | + netfront_accel_vnic *vnic; | |
43835 | + struct xenbus_device *dev; | |
43836 | + | |
43837 | + DPRINTK("%s\n", __FUNCTION__); | |
43838 | + | |
43839 | + vnic = container_of(watch, struct netfront_accel_vnic, | |
43840 | + backend_accel_watch); | |
43841 | + | |
43842 | + mutex_lock(&vnic->vnic_mutex); | |
43843 | + | |
43844 | + dev = vnic->dev; | |
43845 | + | |
43846 | + state = (int)XenbusStateUnknown; | |
43847 | + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state); | |
43848 | + netfront_accel_backend_accel_changed(vnic, state); | |
43849 | + | |
43850 | + mutex_unlock(&vnic->vnic_mutex); | |
43851 | +} | |
43852 | + | |
43853 | + | |
43854 | +static int setup_dom0_accel_watch(struct xenbus_device *dev, | |
43855 | + netfront_accel_vnic *vnic) | |
43856 | +{ | |
43857 | + int err; | |
43858 | + | |
43859 | + DPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate"); | |
43860 | + | |
43861 | + err = xenbus_watch_path2(dev, dev->otherend, "accelstate", | |
43862 | + &vnic->backend_accel_watch, | |
43863 | + backend_accel_state_change); | |
43864 | + if (err) { | |
43865 | + EPRINTK("%s: Failed to register xenbus watch: %d\n", | |
43866 | + __FUNCTION__, err); | |
43867 | + goto fail; | |
43868 | + } | |
43869 | + return 0; | |
43870 | + fail: | |
43871 | + vnic->backend_accel_watch.node = NULL; | |
43872 | + return err; | |
43873 | +} | |
43874 | + | |
43875 | + | |
43876 | +int netfront_accel_probe(struct net_device *net_dev, struct xenbus_device *dev) | |
43877 | +{ | |
43878 | + netfront_accel_vnic *vnic; | |
43879 | + int err; | |
43880 | + | |
43881 | + DPRINTK("Probe passed device %s\n", dev->nodename); | |
43882 | + | |
43883 | + vnic = netfront_accel_vnic_ctor(net_dev, dev); | |
43884 | + if (IS_ERR(vnic)) | |
43885 | + return PTR_ERR(vnic); | |
43886 | + | |
43887 | + /* | |
43888 | + * Setup a watch on the backend accel state. This sets things | |
43889 | + * going. | |
43890 | + */ | |
43891 | + err = setup_dom0_accel_watch(dev, vnic); | |
43892 | + if (err) { | |
43893 | + netfront_accel_vnic_dtor(vnic); | |
43894 | + EPRINTK("%s: probe failed with code %d\n", __FUNCTION__, err); | |
43895 | + return err; | |
43896 | + } | |
43897 | + | |
43898 | + /* | |
43899 | + * Indicate to the other end that we're ready to start unless | |
43900 | + * the watch has already fired. | |
43901 | + */ | |
43902 | + mutex_lock(&vnic->vnic_mutex); | |
43903 | + VPRINTK("setup success, updating accelstate\n"); | |
43904 | + if (vnic->frontend_state == XenbusStateClosed) { | |
43905 | + vnic->frontend_state = XenbusStateInitialising; | |
43906 | + net_accel_update_state(dev, XenbusStateInitialising); | |
43907 | + } | |
43908 | + mutex_unlock(&vnic->vnic_mutex); | |
43909 | + | |
43910 | + DPRINTK("Probe done device %s\n", dev->nodename); | |
43911 | + | |
43912 | + return 0; | |
43913 | +} | |
43914 | + | |
43915 | + | |
43916 | +int netfront_accel_remove(struct xenbus_device *dev) | |
43917 | +{ | |
43918 | + struct netfront_info *np = | |
43919 | + (struct netfront_info *)dev->dev.driver_data; | |
43920 | + netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv; | |
43921 | + | |
43922 | + DPRINTK("%s %s\n", __FUNCTION__, dev->nodename); | |
43923 | + | |
43924 | + BUG_ON(vnic == NULL); | |
43925 | + | |
43926 | + mutex_lock(&vnic->vnic_mutex); | |
43927 | + | |
43928 | + /* Reject any attempts to connect. */ | |
43929 | + vnic->removing = 1; | |
43930 | + | |
43931 | + /* Close any existing connection. */ | |
43932 | + if (vnic->frontend_state == XenbusStateConnected) { | |
43933 | + vnic->frontend_state = XenbusStateClosing; | |
43934 | + net_accel_update_state(dev, XenbusStateClosing); | |
43935 | + } | |
43936 | + | |
43937 | + mutex_unlock(&vnic->vnic_mutex); | |
43938 | + | |
43939 | + DPRINTK("%s waiting for release of %s\n", __FUNCTION__, dev->nodename); | |
43940 | + | |
43941 | + /* | |
43942 | + * Wait for the xenbus watch to release the shared resources. | |
43943 | + * This indicates that dom0 has made the transition | |
43944 | + * Closing->Closed or that dom0 was in Closed or Init and no | |
43945 | + * resources were mapped. | |
43946 | + */ | |
43947 | + wait_event(vnic->state_wait_queue, | |
43948 | + !vnic->domU_state_is_setup); | |
43949 | + | |
43950 | + /* | |
43951 | + * Now we don't need this watch anymore it is safe to remove | |
43952 | + * it (and so synchronise with it completing if outstanding) | |
43953 | + */ | |
43954 | + DPRINTK("%s: unregistering xenbus accel watch\n", | |
43955 | + __FUNCTION__); | |
43956 | + unregister_xenbus_watch(&vnic->backend_accel_watch); | |
43957 | + kfree(vnic->backend_accel_watch.node); | |
43958 | + | |
43959 | + netfront_accel_vnic_dtor(vnic); | |
43960 | + | |
43961 | + DPRINTK("%s done %s\n", __FUNCTION__, dev->nodename); | |
43962 | + | |
43963 | + return 0; | |
43964 | +} | |
43965 | Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon.h | |
43966 | =================================================================== | |
43967 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
43968 | +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon.h 2008-02-20 09:32:49.000000000 +0100 | |
43969 | @@ -0,0 +1,172 @@ | |
43970 | +/**************************************************************************** | |
43971 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
43972 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
43973 | + * 9501 Jeronimo Road, Suite 250, | |
43974 | + * Irvine, CA 92618, USA | |
43975 | + * | |
43976 | + * Maintained by Solarflare Communications | |
43977 | + * <linux-xen-drivers@solarflare.com> | |
43978 | + * <onload-dev@solarflare.com> | |
43979 | + * | |
43980 | + * This program is free software; you can redistribute it and/or modify it | |
43981 | + * under the terms of the GNU General Public License version 2 as published | |
43982 | + * by the Free Software Foundation, incorporated herein by reference. | |
43983 | + * | |
43984 | + * This program is distributed in the hope that it will be useful, | |
43985 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
43986 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
43987 | + * GNU General Public License for more details. | |
43988 | + * | |
43989 | + * You should have received a copy of the GNU General Public License | |
43990 | + * along with this program; if not, write to the Free Software | |
43991 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
43992 | + **************************************************************************** | |
43993 | + */ | |
43994 | + | |
43995 | +/* | |
43996 | + * \author slp | |
43997 | + * \brief Falcon specific definitions | |
43998 | + * \date 2004/08 | |
43999 | + */ | |
44000 | + | |
44001 | +#ifndef __EF_VI_FALCON_H__ | |
44002 | +#define __EF_VI_FALCON_H__ | |
44003 | + | |
44004 | +#define EFHW_4K 0x00001000u | |
44005 | +#define EFHW_8K 0x00002000u | |
44006 | + | |
44007 | +/* include the autogenerated register definitions */ | |
44008 | + | |
44009 | +#include "ef_vi_falcon_core.h" | |
44010 | +#include "ef_vi_falcon_desc.h" | |
44011 | +#include "ef_vi_falcon_event.h" | |
44012 | + | |
44013 | + | |
44014 | +/*---------------------------------------------------------------------------- | |
44015 | + * | |
44016 | + * Helpers to turn bit shifts into dword shifts and check that the bit fields | |
44017 | + * haven't overflown the dword etc. Aim is to preserve consistency with the | |
44018 | + * autogenerated headers - once stable we could hard code. | |
44019 | + * | |
44020 | + *---------------------------------------------------------------------------*/ | |
44021 | + | |
44022 | +/* mask constructors */ | |
44023 | +#define __FALCON_MASK(WIDTH,T) ((((T)1) << (WIDTH)) - 1) | |
44024 | +#define __EFVI_MASK32(WIDTH) __FALCON_MASK((WIDTH),uint32_t) | |
44025 | +#define __EFVI_MASK64(WIDTH) __FALCON_MASK((WIDTH),uint64_t) | |
44026 | + | |
44027 | +#define __EFVI_FALCON_MASKFIELD32(LBN, WIDTH) ((uint32_t) \ | |
44028 | + (__EFVI_MASK32(WIDTH) << (LBN))) | |
44029 | + | |
44030 | +/* constructors for fields which span the first and second dwords */ | |
44031 | +#define __LW(LBN) (32 - LBN) | |
44032 | +#define LOW(v, LBN, WIDTH) ((uint32_t) \ | |
44033 | + (((v) & __EFVI_MASK64(__LW((LBN)))) << (LBN))) | |
44034 | +#define HIGH(v, LBN, WIDTH) ((uint32_t)(((v) >> __LW((LBN))) & \ | |
44035 | + __EFVI_MASK64((WIDTH - __LW((LBN)))))) | |
44036 | +/* constructors for fields within the second dword */ | |
44037 | +#define __DW2(LBN) ((LBN) - 32) | |
44038 | + | |
44039 | +/* constructors for fields which span the second and third dwords */ | |
44040 | +#define __LW2(LBN) (64 - LBN) | |
44041 | +#define LOW2(v, LBN, WIDTH) ((uint32_t) \ | |
44042 | + (((v) & __EFVI_MASK64(__LW2((LBN)))) << ((LBN) - 32))) | |
44043 | +#define HIGH2(v, LBN, WIDTH) ((uint32_t) \ | |
44044 | + (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN)))))) | |
44045 | + | |
44046 | +/* constructors for fields within the third dword */ | |
44047 | +#define __DW3(LBN) ((LBN) - 64) | |
44048 | + | |
44049 | + | |
44050 | +/* constructors for fields which span the third and fourth dwords */ | |
44051 | +#define __LW3(LBN) (96 - LBN) | |
44052 | +#define LOW3(v, LBN, WIDTH) ((uint32_t) \ | |
44053 | + (((v) & __EFVI_MASK64(__LW3((LBN)))) << ((LBN) - 64))) | |
44054 | +#define HIGH3(v, LBN, WIDTH) ((unit32_t) \ | |
44055 | + (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN)))))) | |
44056 | + | |
44057 | +/* constructors for fields within the fourth dword */ | |
44058 | +#define __DW4(LBN) ((LBN) - 96) | |
44059 | + | |
44060 | +/* checks that the autogenerated headers our consistent with our model */ | |
44061 | +#define WIDTHCHCK(a, b) ef_assert((a) == (b)) | |
44062 | +#define RANGECHCK(v, WIDTH) \ | |
44063 | + ef_assert(((uint64_t)(v) & ~(__EFVI_MASK64((WIDTH)))) == 0) | |
44064 | + | |
44065 | +/* fields within the first dword */ | |
44066 | +#define DWCHCK(LBN, WIDTH) ef_assert(((LBN) >= 0) &&(((LBN)+(WIDTH)) <= 32)) | |
44067 | + | |
44068 | +/* fields which span the first and second dwords */ | |
44069 | +#define LWCHK(LBN, WIDTH) ef_assert(WIDTH >= __LW(LBN)) | |
44070 | + | |
44071 | +/*---------------------------------------------------------------------------- | |
44072 | + * | |
44073 | + * Buffer virtual addresses (4K buffers) | |
44074 | + * | |
44075 | + *---------------------------------------------------------------------------*/ | |
44076 | + | |
44077 | +/* Form a buffer virtual address from buffer ID and offset. If the offset | |
44078 | +** is larger than the buffer size, then the buffer indexed will be | |
44079 | +** calculated appropriately. It is the responsibility of the caller to | |
44080 | +** ensure that they have valid buffers programmed at that address. | |
44081 | +*/ | |
44082 | +#define EFVI_FALCON_VADDR_4K_S (12) | |
44083 | +#define EFVI_FALCON_VADDR_M 0xfffff /* post shift mask */ | |
44084 | + | |
44085 | + | |
44086 | +#define EFVI_FALCON_BUFFER_4K_ADDR(id,off) \ | |
44087 | + (((id) << EFVI_FALCON_VADDR_4K_S) + (off)) | |
44088 | + | |
44089 | +#define EFVI_FALCON_BUFFER_4K_PAGE(vaddr) \ | |
44090 | + (((vaddr) >> EFVI_FALCON_VADDR_4K_S) & EFVI_FALCON_VADDR_M) | |
44091 | + | |
44092 | +#define EFVI_FALCON_BUFFER_4K_OFF(vaddr) \ | |
44093 | + ((vaddr) & __EFVI_MASK32(EFVI_FALCON_VADDR_4K_S)) | |
44094 | + | |
44095 | + | |
44096 | +/*---------------------------------------------------------------------------- | |
44097 | + * | |
44098 | + * Masks | |
44099 | + * | |
44100 | + *---------------------------------------------------------------------------*/ | |
44101 | + | |
44102 | +#define EFVI_FALCON_CLOCK_ASIC_HZ (125000) | |
44103 | +#define EFVI_FALCON_CLOCK_FPGA_HZ (62500) | |
44104 | +#define EFVI_FALCON_CLOCK_HZ EFVI_FALCON_CLOCK_ASIC_HZ | |
44105 | + | |
44106 | + | |
44107 | +/*---------------------------------------------------------------------------- | |
44108 | + * | |
44109 | + * Timers | |
44110 | + * | |
44111 | + *---------------------------------------------------------------------------*/ | |
44112 | + | |
44113 | +/* Event-Queue Timer granularity - measured in us | |
44114 | + Given by: 4096 * 3 cycle * clock period */ | |
44115 | + | |
44116 | +#define EFVI_FALCON_EVQTIMER_PERIOD_US ((4096 * 3 * 1000) / EFVI_FALCON_CLOCK_HZ) | |
44117 | + | |
44118 | +/* mode bits */ | |
44119 | +#define EFVI_FALCON_TIMER_MODE_DIS 0 /* disabled */ | |
44120 | +#define EFVI_FALCON_TIMER_MODE_RUN 1 /* started counting right away */ | |
44121 | +#define EFVI_FALCON_TIMER_MODE_HOLD 2 /* trigger mode (user queues) */ | |
44122 | + | |
44123 | +#define EFVI_FALCON_EVQTIMER_HOLD (EFVI_FALCON_TIMER_MODE_HOLD << TIMER_MODE_LBN) | |
44124 | +#define EFVI_FALCON_EVQTIMER_RUN (EFVI_FALCON_TIMER_MODE_RUN << TIMER_MODE_LBN) | |
44125 | +#define EFVI_FALCON_EVQTIMER_DISABLE (EFVI_FALCON_TIMER_MODE_DIS << TIMER_MODE_LBN) | |
44126 | + | |
44127 | + | |
44128 | +/* ---- efhw_event_t helpers --- */ | |
44129 | + | |
44130 | +#define EFVI_FALCON_EVENT_CODE(evp) \ | |
44131 | + ((evp)->u64 & EFVI_FALCON_EVENT_CODE_MASK) | |
44132 | + | |
44133 | +#define EFVI_FALCON_EVENT_SW_DATA_MASK 0x0000ffff | |
44134 | + | |
44135 | +#define __EFVI_FALCON_OPEN_MASK(WIDTH) ((((uint64_t)1) << (WIDTH)) - 1) | |
44136 | + | |
44137 | +#define EFVI_FALCON_EVENT_CODE_MASK \ | |
44138 | + (__EFVI_FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN) | |
44139 | + | |
44140 | + | |
44141 | +#endif /* __EF_VI_FALCON_H__ */ | |
44142 | Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_core.h | |
44143 | =================================================================== | |
44144 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
44145 | +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_core.h 2008-02-20 09:32:49.000000000 +0100 | |
44146 | @@ -0,0 +1,1075 @@ | |
44147 | + | |
44148 | +#define EFVI_FALCON_EXTENDED_P_BAR 1 | |
44149 | + | |
44150 | +//////////////---- Bus Interface Unit Registers C Header ----////////////// | |
44151 | +#define IOM_IND_ADR_REG_OFST 0x0 // IO-mapped indirect access address register | |
44152 | + #define IOM_AUTO_ADR_INC_EN_LBN 16 | |
44153 | + #define IOM_AUTO_ADR_INC_EN_WIDTH 1 | |
44154 | + #define IOM_IND_ADR_LBN 0 | |
44155 | + #define IOM_IND_ADR_WIDTH 16 | |
44156 | +#define IOM_IND_DAT_REG_OFST 0x4 // IO-mapped indirect access data register | |
44157 | + #define IOM_IND_DAT_LBN 0 | |
44158 | + #define IOM_IND_DAT_WIDTH 32 | |
44159 | +#define ADR_REGION_REG_KER_OFST 0x0 // Address region register | |
44160 | +#define ADR_REGION_REG_OFST 0x0 // Address region register | |
44161 | + #define ADR_REGION3_LBN 96 | |
44162 | + #define ADR_REGION3_WIDTH 18 | |
44163 | + #define ADR_REGION2_LBN 64 | |
44164 | + #define ADR_REGION2_WIDTH 18 | |
44165 | + #define ADR_REGION1_LBN 32 | |
44166 | + #define ADR_REGION1_WIDTH 18 | |
44167 | + #define ADR_REGION0_LBN 0 | |
44168 | + #define ADR_REGION0_WIDTH 18 | |
44169 | +#define INT_EN_REG_KER_OFST 0x10 // Kernel driver Interrupt enable register | |
44170 | + #define KER_INT_CHAR_LBN 4 | |
44171 | + #define KER_INT_CHAR_WIDTH 1 | |
44172 | + #define KER_INT_KER_LBN 3 | |
44173 | + #define KER_INT_KER_WIDTH 1 | |
44174 | + #define ILL_ADR_ERR_INT_EN_KER_LBN 2 | |
44175 | + #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1 | |
44176 | + #define SRM_PERR_INT_EN_KER_LBN 1 | |
44177 | + #define SRM_PERR_INT_EN_KER_WIDTH 1 | |
44178 | + #define DRV_INT_EN_KER_LBN 0 | |
44179 | + #define DRV_INT_EN_KER_WIDTH 1 | |
44180 | +#define INT_EN_REG_CHAR_OFST 0x20 // Char Driver interrupt enable register | |
44181 | + #define CHAR_INT_CHAR_LBN 4 | |
44182 | + #define CHAR_INT_CHAR_WIDTH 1 | |
44183 | + #define CHAR_INT_KER_LBN 3 | |
44184 | + #define CHAR_INT_KER_WIDTH 1 | |
44185 | + #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2 | |
44186 | + #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1 | |
44187 | + #define SRM_PERR_INT_EN_CHAR_LBN 1 | |
44188 | + #define SRM_PERR_INT_EN_CHAR_WIDTH 1 | |
44189 | + #define DRV_INT_EN_CHAR_LBN 0 | |
44190 | + #define DRV_INT_EN_CHAR_WIDTH 1 | |
44191 | +#define INT_ADR_REG_KER_OFST 0x30 // Interrupt host address for Kernel driver | |
44192 | + #define INT_ADR_KER_LBN 0 | |
44193 | + #define INT_ADR_KER_WIDTH 64 | |
44194 | + #define DRV_INT_KER_LBN 32 | |
44195 | + #define DRV_INT_KER_WIDTH 1 | |
44196 | + #define EV_FF_HALF_INT_KER_LBN 3 | |
44197 | + #define EV_FF_HALF_INT_KER_WIDTH 1 | |
44198 | + #define EV_FF_FULL_INT_KER_LBN 2 | |
44199 | + #define EV_FF_FULL_INT_KER_WIDTH 1 | |
44200 | + #define ILL_ADR_ERR_INT_KER_LBN 1 | |
44201 | + #define ILL_ADR_ERR_INT_KER_WIDTH 1 | |
44202 | + #define SRAM_PERR_INT_KER_LBN 0 | |
44203 | + #define SRAM_PERR_INT_KER_WIDTH 1 | |
44204 | +#define INT_ADR_REG_CHAR_OFST 0x40 // Interrupt host address for Char driver | |
44205 | + #define INT_ADR_CHAR_LBN 0 | |
44206 | + #define INT_ADR_CHAR_WIDTH 64 | |
44207 | + #define DRV_INT_CHAR_LBN 32 | |
44208 | + #define DRV_INT_CHAR_WIDTH 1 | |
44209 | + #define EV_FF_HALF_INT_CHAR_LBN 3 | |
44210 | + #define EV_FF_HALF_INT_CHAR_WIDTH 1 | |
44211 | + #define EV_FF_FULL_INT_CHAR_LBN 2 | |
44212 | + #define EV_FF_FULL_INT_CHAR_WIDTH 1 | |
44213 | + #define ILL_ADR_ERR_INT_CHAR_LBN 1 | |
44214 | + #define ILL_ADR_ERR_INT_CHAR_WIDTH 1 | |
44215 | + #define SRAM_PERR_INT_CHAR_LBN 0 | |
44216 | + #define SRAM_PERR_INT_CHAR_WIDTH 1 | |
44217 | +#define INT_ISR0_B0_OFST 0x90 // B0 only | |
44218 | +#define INT_ISR1_B0_OFST 0xA0 | |
44219 | +#define INT_ACK_REG_KER_A1_OFST 0x50 // Kernel interrupt acknowledge register | |
44220 | + #define RESERVED_LBN 0 | |
44221 | + #define RESERVED_WIDTH 32 | |
44222 | +#define INT_ACK_REG_CHAR_A1_OFST 0x60 // CHAR interrupt acknowledge register | |
44223 | + #define RESERVED_LBN 0 | |
44224 | + #define RESERVED_WIDTH 32 | |
44225 | +//////////////---- Global CSR Registers C Header ----////////////// | |
44226 | +#define STRAP_REG_KER_OFST 0x200 // ASIC strap status register | |
44227 | +#define STRAP_REG_OFST 0x200 // ASIC strap status register | |
44228 | + #define ONCHIP_SRAM_LBN 16 | |
44229 | + #define ONCHIP_SRAM_WIDTH 0 | |
44230 | + #define STRAP_ISCSI_EN_LBN 3 | |
44231 | + #define STRAP_ISCSI_EN_WIDTH 1 | |
44232 | + #define STRAP_PINS_LBN 0 | |
44233 | + #define STRAP_PINS_WIDTH 3 | |
44234 | +#define GPIO_CTL_REG_KER_OFST 0x210 // GPIO control register | |
44235 | +#define GPIO_CTL_REG_OFST 0x210 // GPIO control register | |
44236 | + #define GPIO_OEN_LBN 24 | |
44237 | + #define GPIO_OEN_WIDTH 4 | |
44238 | + #define GPIO_OUT_LBN 16 | |
44239 | + #define GPIO_OUT_WIDTH 4 | |
44240 | + #define GPIO_IN_LBN 8 | |
44241 | + #define GPIO_IN_WIDTH 4 | |
44242 | + #define GPIO_PWRUP_VALUE_LBN 0 | |
44243 | + #define GPIO_PWRUP_VALUE_WIDTH 4 | |
44244 | +#define GLB_CTL_REG_KER_OFST 0x220 // Global control register | |
44245 | +#define GLB_CTL_REG_OFST 0x220 // Global control register | |
44246 | + #define SWRST_LBN 0 | |
44247 | + #define SWRST_WIDTH 1 | |
44248 | +#define FATAL_INTR_REG_KER_OFST 0x230 // Fatal interrupt register for Kernel | |
44249 | + #define PCI_BUSERR_INT_KER_EN_LBN 43 | |
44250 | + #define PCI_BUSERR_INT_KER_EN_WIDTH 1 | |
44251 | + #define SRAM_OOB_INT_KER_EN_LBN 42 | |
44252 | + #define SRAM_OOB_INT_KER_EN_WIDTH 1 | |
44253 | + #define BUFID_OOB_INT_KER_EN_LBN 41 | |
44254 | + #define BUFID_OOB_INT_KER_EN_WIDTH 1 | |
44255 | + #define MEM_PERR_INT_KER_EN_LBN 40 | |
44256 | + #define MEM_PERR_INT_KER_EN_WIDTH 1 | |
44257 | + #define RBUF_OWN_INT_KER_EN_LBN 39 | |
44258 | + #define RBUF_OWN_INT_KER_EN_WIDTH 1 | |
44259 | + #define TBUF_OWN_INT_KER_EN_LBN 38 | |
44260 | + #define TBUF_OWN_INT_KER_EN_WIDTH 1 | |
44261 | + #define RDESCQ_OWN_INT_KER_EN_LBN 37 | |
44262 | + #define RDESCQ_OWN_INT_KER_EN_WIDTH 1 | |
44263 | + #define TDESCQ_OWN_INT_KER_EN_LBN 36 | |
44264 | + #define TDESCQ_OWN_INT_KER_EN_WIDTH 1 | |
44265 | + #define EVQ_OWN_INT_KER_EN_LBN 35 | |
44266 | + #define EVQ_OWN_INT_KER_EN_WIDTH 1 | |
44267 | + #define EVFF_OFLO_INT_KER_EN_LBN 34 | |
44268 | + #define EVFF_OFLO_INT_KER_EN_WIDTH 1 | |
44269 | + #define ILL_ADR_INT_KER_EN_LBN 33 | |
44270 | + #define ILL_ADR_INT_KER_EN_WIDTH 1 | |
44271 | + #define SRM_PERR_INT_KER_EN_LBN 32 | |
44272 | + #define SRM_PERR_INT_KER_EN_WIDTH 1 | |
44273 | + #define PCI_BUSERR_INT_KER_LBN 11 | |
44274 | + #define PCI_BUSERR_INT_KER_WIDTH 1 | |
44275 | + #define SRAM_OOB_INT_KER_LBN 10 | |
44276 | + #define SRAM_OOB_INT_KER_WIDTH 1 | |
44277 | + #define BUFID_OOB_INT_KER_LBN 9 | |
44278 | + #define BUFID_OOB_INT_KER_WIDTH 1 | |
44279 | + #define MEM_PERR_INT_KER_LBN 8 | |
44280 | + #define MEM_PERR_INT_KER_WIDTH 1 | |
44281 | + #define RBUF_OWN_INT_KER_LBN 7 | |
44282 | + #define RBUF_OWN_INT_KER_WIDTH 1 | |
44283 | + #define TBUF_OWN_INT_KER_LBN 6 | |
44284 | + #define TBUF_OWN_INT_KER_WIDTH 1 | |
44285 | + #define RDESCQ_OWN_INT_KER_LBN 5 | |
44286 | + #define RDESCQ_OWN_INT_KER_WIDTH 1 | |
44287 | + #define TDESCQ_OWN_INT_KER_LBN 4 | |
44288 | + #define TDESCQ_OWN_INT_KER_WIDTH 1 | |
44289 | + #define EVQ_OWN_INT_KER_LBN 3 | |
44290 | + #define EVQ_OWN_INT_KER_WIDTH 1 | |
44291 | + #define EVFF_OFLO_INT_KER_LBN 2 | |
44292 | + #define EVFF_OFLO_INT_KER_WIDTH 1 | |
44293 | + #define ILL_ADR_INT_KER_LBN 1 | |
44294 | + #define ILL_ADR_INT_KER_WIDTH 1 | |
44295 | + #define SRM_PERR_INT_KER_LBN 0 | |
44296 | + #define SRM_PERR_INT_KER_WIDTH 1 | |
44297 | +#define FATAL_INTR_REG_OFST 0x240 // Fatal interrupt register for Char | |
44298 | + #define PCI_BUSERR_INT_CHAR_EN_LBN 43 | |
44299 | + #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1 | |
44300 | + #define SRAM_OOB_INT_CHAR_EN_LBN 42 | |
44301 | + #define SRAM_OOB_INT_CHAR_EN_WIDTH 1 | |
44302 | + #define BUFID_OOB_INT_CHAR_EN_LBN 41 | |
44303 | + #define BUFID_OOB_INT_CHAR_EN_WIDTH 1 | |
44304 | + #define MEM_PERR_INT_CHAR_EN_LBN 40 | |
44305 | + #define MEM_PERR_INT_CHAR_EN_WIDTH 1 | |
44306 | + #define RBUF_OWN_INT_CHAR_EN_LBN 39 | |
44307 | + #define RBUF_OWN_INT_CHAR_EN_WIDTH 1 | |
44308 | + #define TBUF_OWN_INT_CHAR_EN_LBN 38 | |
44309 | + #define TBUF_OWN_INT_CHAR_EN_WIDTH 1 | |
44310 | + #define RDESCQ_OWN_INT_CHAR_EN_LBN 37 | |
44311 | + #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1 | |
44312 | + #define TDESCQ_OWN_INT_CHAR_EN_LBN 36 | |
44313 | + #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1 | |
44314 | + #define EVQ_OWN_INT_CHAR_EN_LBN 35 | |
44315 | + #define EVQ_OWN_INT_CHAR_EN_WIDTH 1 | |
44316 | + #define EVFF_OFLO_INT_CHAR_EN_LBN 34 | |
44317 | + #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1 | |
44318 | + #define ILL_ADR_INT_CHAR_EN_LBN 33 | |
44319 | + #define ILL_ADR_INT_CHAR_EN_WIDTH 1 | |
44320 | + #define SRM_PERR_INT_CHAR_EN_LBN 32 | |
44321 | + #define SRM_PERR_INT_CHAR_EN_WIDTH 1 | |
44322 | + #define FATAL_INTR_REG_EN_BITS 0xffffffffffffffffULL | |
44323 | + #define PCI_BUSERR_INT_CHAR_LBN 11 | |
44324 | + #define PCI_BUSERR_INT_CHAR_WIDTH 1 | |
44325 | + #define SRAM_OOB_INT_CHAR_LBN 10 | |
44326 | + #define SRAM_OOB_INT_CHAR_WIDTH 1 | |
44327 | + #define BUFID_OOB_INT_CHAR_LBN 9 | |
44328 | + #define BUFID_OOB_INT_CHAR_WIDTH 1 | |
44329 | + #define MEM_PERR_INT_CHAR_LBN 8 | |
44330 | + #define MEM_PERR_INT_CHAR_WIDTH 1 | |
44331 | + #define RBUF_OWN_INT_CHAR_LBN 7 | |
44332 | + #define RBUF_OWN_INT_CHAR_WIDTH 1 | |
44333 | + #define TBUF_OWN_INT_CHAR_LBN 6 | |
44334 | + #define TBUF_OWN_INT_CHAR_WIDTH 1 | |
44335 | + #define RDESCQ_OWN_INT_CHAR_LBN 5 | |
44336 | + #define RDESCQ_OWN_INT_CHAR_WIDTH 1 | |
44337 | + #define TDESCQ_OWN_INT_CHAR_LBN 4 | |
44338 | + #define TDESCQ_OWN_INT_CHAR_WIDTH 1 | |
44339 | + #define EVQ_OWN_INT_CHAR_LBN 3 | |
44340 | + #define EVQ_OWN_INT_CHAR_WIDTH 1 | |
44341 | + #define EVFF_OFLO_INT_CHAR_LBN 2 | |
44342 | + #define EVFF_OFLO_INT_CHAR_WIDTH 1 | |
44343 | + #define ILL_ADR_INT_CHAR_LBN 1 | |
44344 | + #define ILL_ADR_INT_CHAR_WIDTH 1 | |
44345 | + #define SRM_PERR_INT_CHAR_LBN 0 | |
44346 | + #define SRM_PERR_INT_CHAR_WIDTH 1 | |
44347 | +#define DP_CTRL_REG_OFST 0x250 // Datapath control register | |
44348 | + #define FLS_EVQ_ID_LBN 0 | |
44349 | + #define FLS_EVQ_ID_WIDTH 12 | |
44350 | +#define MEM_STAT_REG_KER_OFST 0x260 // Memory status register | |
44351 | +#define MEM_STAT_REG_OFST 0x260 // Memory status register | |
44352 | + #define MEM_PERR_VEC_LBN 53 | |
44353 | + #define MEM_PERR_VEC_WIDTH 38 | |
44354 | + #define MBIST_CORR_LBN 38 | |
44355 | + #define MBIST_CORR_WIDTH 15 | |
44356 | + #define MBIST_ERR_LBN 0 | |
44357 | + #define MBIST_ERR_WIDTH 38 | |
44358 | +#define DEBUG_REG_KER_OFST 0x270 // Debug register | |
44359 | +#define DEBUG_REG_OFST 0x270 // Debug register | |
44360 | + #define DEBUG_BLK_SEL2_LBN 47 | |
44361 | + #define DEBUG_BLK_SEL2_WIDTH 3 | |
44362 | + #define DEBUG_BLK_SEL1_LBN 44 | |
44363 | + #define DEBUG_BLK_SEL1_WIDTH 3 | |
44364 | + #define DEBUG_BLK_SEL0_LBN 41 | |
44365 | + #define DEBUG_BLK_SEL0_WIDTH 3 | |
44366 | + #define MISC_DEBUG_ADDR_LBN 36 | |
44367 | + #define MISC_DEBUG_ADDR_WIDTH 5 | |
44368 | + #define SERDES_DEBUG_ADDR_LBN 31 | |
44369 | + #define SERDES_DEBUG_ADDR_WIDTH 5 | |
44370 | + #define EM_DEBUG_ADDR_LBN 26 | |
44371 | + #define EM_DEBUG_ADDR_WIDTH 5 | |
44372 | + #define SR_DEBUG_ADDR_LBN 21 | |
44373 | + #define SR_DEBUG_ADDR_WIDTH 5 | |
44374 | + #define EV_DEBUG_ADDR_LBN 16 | |
44375 | + #define EV_DEBUG_ADDR_WIDTH 5 | |
44376 | + #define RX_DEBUG_ADDR_LBN 11 | |
44377 | + #define RX_DEBUG_ADDR_WIDTH 5 | |
44378 | + #define TX_DEBUG_ADDR_LBN 6 | |
44379 | + #define TX_DEBUG_ADDR_WIDTH 5 | |
44380 | + #define BIU_DEBUG_ADDR_LBN 1 | |
44381 | + #define BIU_DEBUG_ADDR_WIDTH 5 | |
44382 | + #define DEBUG_EN_LBN 0 | |
44383 | + #define DEBUG_EN_WIDTH 1 | |
44384 | +#define DRIVER_REG0_KER_OFST 0x280 // Driver scratch register 0 | |
44385 | +#define DRIVER_REG0_OFST 0x280 // Driver scratch register 0 | |
44386 | + #define DRIVER_DW0_LBN 0 | |
44387 | + #define DRIVER_DW0_WIDTH 32 | |
44388 | +#define DRIVER_REG1_KER_OFST 0x290 // Driver scratch register 1 | |
44389 | +#define DRIVER_REG1_OFST 0x290 // Driver scratch register 1 | |
44390 | + #define DRIVER_DW1_LBN 0 | |
44391 | + #define DRIVER_DW1_WIDTH 32 | |
44392 | +#define DRIVER_REG2_KER_OFST 0x2A0 // Driver scratch register 2 | |
44393 | +#define DRIVER_REG2_OFST 0x2A0 // Driver scratch register 2 | |
44394 | + #define DRIVER_DW2_LBN 0 | |
44395 | + #define DRIVER_DW2_WIDTH 32 | |
44396 | +#define DRIVER_REG3_KER_OFST 0x2B0 // Driver scratch register 3 | |
44397 | +#define DRIVER_REG3_OFST 0x2B0 // Driver scratch register 3 | |
44398 | + #define DRIVER_DW3_LBN 0 | |
44399 | + #define DRIVER_DW3_WIDTH 32 | |
44400 | +#define DRIVER_REG4_KER_OFST 0x2C0 // Driver scratch register 4 | |
44401 | +#define DRIVER_REG4_OFST 0x2C0 // Driver scratch register 4 | |
44402 | + #define DRIVER_DW4_LBN 0 | |
44403 | + #define DRIVER_DW4_WIDTH 32 | |
44404 | +#define DRIVER_REG5_KER_OFST 0x2D0 // Driver scratch register 5 | |
44405 | +#define DRIVER_REG5_OFST 0x2D0 // Driver scratch register 5 | |
44406 | + #define DRIVER_DW5_LBN 0 | |
44407 | + #define DRIVER_DW5_WIDTH 32 | |
44408 | +#define DRIVER_REG6_KER_OFST 0x2E0 // Driver scratch register 6 | |
44409 | +#define DRIVER_REG6_OFST 0x2E0 // Driver scratch register 6 | |
44410 | + #define DRIVER_DW6_LBN 0 | |
44411 | + #define DRIVER_DW6_WIDTH 32 | |
44412 | +#define DRIVER_REG7_KER_OFST 0x2F0 // Driver scratch register 7 | |
44413 | +#define DRIVER_REG7_OFST 0x2F0 // Driver scratch register 7 | |
44414 | + #define DRIVER_DW7_LBN 0 | |
44415 | + #define DRIVER_DW7_WIDTH 32 | |
44416 | +#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register | |
44417 | +#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register | |
44418 | + #define ALTERA_BUILD_VER_LBN 0 | |
44419 | + #define ALTERA_BUILD_VER_WIDTH 32 | |
44420 | + | |
44421 | +/* so called CSR spare register | |
44422 | + - contains separate parity enable bits for the various internal memory blocks */ | |
44423 | +#define MEM_PARITY_ERR_EN_REG_KER 0x310 | |
44424 | +#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64 | |
44425 | +#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38 | |
44426 | +#define MEM_PARITY_TX_DATA_EN_LBN 72 | |
44427 | +#define MEM_PARITY_TX_DATA_EN_WIDTH 2 | |
44428 | + | |
44429 | +//////////////---- Event & Timer Module Registers C Header ----////////////// | |
44430 | + | |
44431 | +#if EFVI_FALCON_EXTENDED_P_BAR | |
44432 | +#define EVQ_RPTR_REG_KER_OFST 0x11B00 // Event queue read pointer register | |
44433 | +#else | |
44434 | +#define EVQ_RPTR_REG_KER_OFST 0x1B00 // Event queue read pointer register | |
44435 | +#endif | |
44436 | + | |
44437 | +#define EVQ_RPTR_REG_OFST 0xFA0000 // Event queue read pointer register array. | |
44438 | + #define EVQ_RPTR_LBN 0 | |
44439 | + #define EVQ_RPTR_WIDTH 15 | |
44440 | + | |
44441 | +#if EFVI_FALCON_EXTENDED_P_BAR | |
44442 | +#define EVQ_PTR_TBL_KER_OFST 0x11A00 // Event queue pointer table for kernel access | |
44443 | +#else | |
44444 | +#define EVQ_PTR_TBL_KER_OFST 0x1A00 // Event queue pointer table for kernel access | |
44445 | +#endif | |
44446 | + | |
44447 | +#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 // Event queue pointer table for char direct access | |
44448 | + #define EVQ_WKUP_OR_INT_EN_LBN 39 | |
44449 | + #define EVQ_WKUP_OR_INT_EN_WIDTH 1 | |
44450 | + #define EVQ_NXT_WPTR_LBN 24 | |
44451 | + #define EVQ_NXT_WPTR_WIDTH 15 | |
44452 | + #define EVQ_EN_LBN 23 | |
44453 | + #define EVQ_EN_WIDTH 1 | |
44454 | + #define EVQ_SIZE_LBN 20 | |
44455 | + #define EVQ_SIZE_WIDTH 3 | |
44456 | + #define EVQ_BUF_BASE_ID_LBN 0 | |
44457 | + #define EVQ_BUF_BASE_ID_WIDTH 20 | |
44458 | +#define TIMER_CMD_REG_KER_OFST 0x420 // Timer table for kernel access. Page-mapped | |
44459 | +#define TIMER_CMD_REG_PAGE4_OFST 0x8420 // Timer table for user-level access. Page-mapped. For lowest 1K queues. | |
44460 | +#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 // Timer table for user-level access. Page-mapped. For upper 3K queues. | |
44461 | +#define TIMER_TBL_OFST 0xF70000 // Timer table for char driver direct access | |
44462 | + #define TIMER_MODE_LBN 12 | |
44463 | + #define TIMER_MODE_WIDTH 2 | |
44464 | + #define TIMER_VAL_LBN 0 | |
44465 | + #define TIMER_VAL_WIDTH 12 | |
44466 | + #define TIMER_MODE_INT_HLDOFF 2 | |
44467 | + #define EVQ_BUF_SIZE_LBN 0 | |
44468 | + #define EVQ_BUF_SIZE_WIDTH 1 | |
44469 | +#define DRV_EV_REG_KER_OFST 0x440 // Driver generated event register | |
44470 | +#define DRV_EV_REG_OFST 0x440 // Driver generated event register | |
44471 | + #define DRV_EV_QID_LBN 64 | |
44472 | + #define DRV_EV_QID_WIDTH 12 | |
44473 | + #define DRV_EV_DATA_LBN 0 | |
44474 | + #define DRV_EV_DATA_WIDTH 64 | |
44475 | +#define EVQ_CTL_REG_KER_OFST 0x450 // Event queue control register | |
44476 | +#define EVQ_CTL_REG_OFST 0x450 // Event queue control register | |
44477 | + #define RX_EVQ_WAKEUP_MASK_B0_LBN 15 | |
44478 | + #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6 | |
44479 | + #define EVQ_OWNERR_CTL_LBN 14 | |
44480 | + #define EVQ_OWNERR_CTL_WIDTH 1 | |
44481 | + #define EVQ_FIFO_AF_TH_LBN 8 | |
44482 | + #define EVQ_FIFO_AF_TH_WIDTH 6 | |
44483 | + #define EVQ_FIFO_NOTAF_TH_LBN 0 | |
44484 | + #define EVQ_FIFO_NOTAF_TH_WIDTH 6 | |
44485 | +//////////////---- SRAM Module Registers C Header ----////////////// | |
44486 | +#define BUF_TBL_CFG_REG_KER_OFST 0x600 // Buffer table configuration register | |
44487 | +#define BUF_TBL_CFG_REG_OFST 0x600 // Buffer table configuration register | |
44488 | + #define BUF_TBL_MODE_LBN 3 | |
44489 | + #define BUF_TBL_MODE_WIDTH 1 | |
44490 | +#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 // SRAM receive descriptor cache configuration register | |
44491 | +#define SRM_RX_DC_CFG_REG_OFST 0x610 // SRAM receive descriptor cache configuration register | |
44492 | + #define SRM_RX_DC_BASE_ADR_LBN 0 | |
44493 | + #define SRM_RX_DC_BASE_ADR_WIDTH 21 | |
44494 | +#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 // SRAM transmit descriptor cache configuration register | |
44495 | +#define SRM_TX_DC_CFG_REG_OFST 0x620 // SRAM transmit descriptor cache configuration register | |
44496 | + #define SRM_TX_DC_BASE_ADR_LBN 0 | |
44497 | + #define SRM_TX_DC_BASE_ADR_WIDTH 21 | |
44498 | +#define SRM_CFG_REG_KER_OFST 0x630 // SRAM configuration register | |
44499 | +#define SRM_CFG_REG_OFST 0x630 // SRAM configuration register | |
44500 | + #define SRAM_OOB_ADR_INTEN_LBN 5 | |
44501 | + #define SRAM_OOB_ADR_INTEN_WIDTH 1 | |
44502 | + #define SRAM_OOB_BUF_INTEN_LBN 4 | |
44503 | + #define SRAM_OOB_BUF_INTEN_WIDTH 1 | |
44504 | + #define SRAM_BT_INIT_EN_LBN 3 | |
44505 | + #define SRAM_BT_INIT_EN_WIDTH 1 | |
44506 | + #define SRM_NUM_BANK_LBN 2 | |
44507 | + #define SRM_NUM_BANK_WIDTH 1 | |
44508 | + #define SRM_BANK_SIZE_LBN 0 | |
44509 | + #define SRM_BANK_SIZE_WIDTH 2 | |
44510 | +#define BUF_TBL_UPD_REG_KER_OFST 0x650 // Buffer table update register | |
44511 | +#define BUF_TBL_UPD_REG_OFST 0x650 // Buffer table update register | |
44512 | + #define BUF_UPD_CMD_LBN 63 | |
44513 | + #define BUF_UPD_CMD_WIDTH 1 | |
44514 | + #define BUF_CLR_CMD_LBN 62 | |
44515 | + #define BUF_CLR_CMD_WIDTH 1 | |
44516 | + #define BUF_CLR_END_ID_LBN 32 | |
44517 | + #define BUF_CLR_END_ID_WIDTH 20 | |
44518 | + #define BUF_CLR_START_ID_LBN 0 | |
44519 | + #define BUF_CLR_START_ID_WIDTH 20 | |
44520 | +#define SRM_UPD_EVQ_REG_KER_OFST 0x660 // Buffer table update register | |
44521 | +#define SRM_UPD_EVQ_REG_OFST 0x660 // Buffer table update register | |
44522 | + #define SRM_UPD_EVQ_ID_LBN 0 | |
44523 | + #define SRM_UPD_EVQ_ID_WIDTH 12 | |
44524 | +#define SRAM_PARITY_REG_KER_OFST 0x670 // SRAM parity register. | |
44525 | +#define SRAM_PARITY_REG_OFST 0x670 // SRAM parity register. | |
44526 | + #define FORCE_SRAM_PERR_LBN 0 | |
44527 | + #define FORCE_SRAM_PERR_WIDTH 1 | |
44528 | + | |
44529 | +#if EFVI_FALCON_EXTENDED_P_BAR | |
44530 | +#define BUF_HALF_TBL_KER_OFST 0x18000 // Buffer table in half buffer table mode direct access by kernel driver | |
44531 | +#else | |
44532 | +#define BUF_HALF_TBL_KER_OFST 0x8000 // Buffer table in half buffer table mode direct access by kernel driver | |
44533 | +#endif | |
44534 | + | |
44535 | + | |
44536 | +#define BUF_HALF_TBL_OFST 0x800000 // Buffer table in half buffer table mode direct access by char driver | |
44537 | + #define BUF_ADR_HBUF_ODD_LBN 44 | |
44538 | + #define BUF_ADR_HBUF_ODD_WIDTH 20 | |
44539 | + #define BUF_OWNER_ID_HBUF_ODD_LBN 32 | |
44540 | + #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12 | |
44541 | + #define BUF_ADR_HBUF_EVEN_LBN 12 | |
44542 | + #define BUF_ADR_HBUF_EVEN_WIDTH 20 | |
44543 | + #define BUF_OWNER_ID_HBUF_EVEN_LBN 0 | |
44544 | + #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12 | |
44545 | + | |
44546 | + | |
44547 | +#if EFVI_FALCON_EXTENDED_P_BAR | |
44548 | +#define BUF_FULL_TBL_KER_OFST 0x18000 // Buffer table in full buffer table mode direct access by kernel driver | |
44549 | +#else | |
44550 | +#define BUF_FULL_TBL_KER_OFST 0x8000 // Buffer table in full buffer table mode direct access by kernel driver | |
44551 | +#endif | |
44552 | + | |
44553 | + | |
44554 | + | |
44555 | + | |
44556 | +#define BUF_FULL_TBL_OFST 0x800000 // Buffer table in full buffer table mode direct access by char driver | |
44557 | + #define IP_DAT_BUF_SIZE_LBN 50 | |
44558 | + #define IP_DAT_BUF_SIZE_WIDTH 1 | |
44559 | + #define BUF_ADR_REGION_LBN 48 | |
44560 | + #define BUF_ADR_REGION_WIDTH 2 | |
44561 | + #define BUF_ADR_FBUF_LBN 14 | |
44562 | + #define BUF_ADR_FBUF_WIDTH 34 | |
44563 | + #define BUF_OWNER_ID_FBUF_LBN 0 | |
44564 | + #define BUF_OWNER_ID_FBUF_WIDTH 14 | |
44565 | +#define SRM_DBG_REG_OFST 0x3000000 // SRAM debug access | |
44566 | + #define SRM_DBG_LBN 0 | |
44567 | + #define SRM_DBG_WIDTH 64 | |
44568 | +//////////////---- RX Datapath Registers C Header ----////////////// | |
44569 | + | |
44570 | +#define RX_CFG_REG_KER_OFST 0x800 // Receive configuration register | |
44571 | +#define RX_CFG_REG_OFST 0x800 // Receive configuration register | |
44572 | + | |
44573 | +#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029) | |
44574 | +# if !defined(FALCON_128K_RXFIFO) | |
44575 | +# define FALCON_128K_RXFIFO | |
44576 | +# endif | |
44577 | +#endif | |
44578 | + | |
44579 | +#if defined(FALCON_128K_RXFIFO) | |
44580 | + | |
44581 | +/* new for B0 */ | |
44582 | + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48 | |
44583 | + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 | |
44584 | + #define RX_INGR_EN_B0_LBN 47 | |
44585 | + #define RX_INGR_EN_B0_WIDTH 1 | |
44586 | + #define RX_TOEP_IPV4_B0_LBN 46 | |
44587 | + #define RX_TOEP_IPV4_B0_WIDTH 1 | |
44588 | + #define RX_HASH_ALG_B0_LBN 45 | |
44589 | + #define RX_HASH_ALG_B0_WIDTH 1 | |
44590 | + #define RX_HASH_INSERT_HDR_B0_LBN 44 | |
44591 | + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 | |
44592 | +/* moved for B0 */ | |
44593 | + #define RX_DESC_PUSH_EN_B0_LBN 43 | |
44594 | + #define RX_DESC_PUSH_EN_B0_WIDTH 1 | |
44595 | + #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */ | |
44596 | + #define RX_RDW_PATCH_EN_WIDTH 1 | |
44597 | + #define RX_PCI_BURST_SIZE_B0_LBN 39 | |
44598 | + #define RX_PCI_BURST_SIZE_B0_WIDTH 3 | |
44599 | + #define RX_OWNERR_CTL_B0_LBN 38 | |
44600 | + #define RX_OWNERR_CTL_B0_WIDTH 1 | |
44601 | + #define RX_XON_TX_TH_B0_LBN 33 | |
44602 | + #define RX_XON_TX_TH_B0_WIDTH 5 | |
44603 | + #define RX_XOFF_TX_TH_B0_LBN 28 | |
44604 | + #define RX_XOFF_TX_TH_B0_WIDTH 5 | |
44605 | + #define RX_USR_BUF_SIZE_B0_LBN 19 | |
44606 | + #define RX_USR_BUF_SIZE_B0_WIDTH 9 | |
44607 | + #define RX_XON_MAC_TH_B0_LBN 10 | |
44608 | + #define RX_XON_MAC_TH_B0_WIDTH 9 | |
44609 | + #define RX_XOFF_MAC_TH_B0_LBN 1 | |
44610 | + #define RX_XOFF_MAC_TH_B0_WIDTH 9 | |
44611 | + #define RX_XOFF_MAC_EN_B0_LBN 0 | |
44612 | + #define RX_XOFF_MAC_EN_B0_WIDTH 1 | |
44613 | + | |
44614 | +#elif !defined(FALCON_PRE_02020029) | |
44615 | +/* new for B0 */ | |
44616 | + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46 | |
44617 | + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 | |
44618 | + #define RX_INGR_EN_B0_LBN 45 | |
44619 | + #define RX_INGR_EN_B0_WIDTH 1 | |
44620 | + #define RX_TOEP_IPV4_B0_LBN 44 | |
44621 | + #define RX_TOEP_IPV4_B0_WIDTH 1 | |
44622 | + #define RX_HASH_ALG_B0_LBN 43 | |
44623 | + #define RX_HASH_ALG_B0_WIDTH 41 | |
44624 | + #define RX_HASH_INSERT_HDR_B0_LBN 42 | |
44625 | + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 | |
44626 | +/* moved for B0 */ | |
44627 | + #define RX_DESC_PUSH_EN_B0_LBN 41 | |
44628 | + #define RX_DESC_PUSH_EN_B0_WIDTH 1 | |
44629 | + #define RX_PCI_BURST_SIZE_B0_LBN 37 | |
44630 | + #define RX_PCI_BURST_SIZE_B0_WIDTH 3 | |
44631 | + #define RX_OWNERR_CTL_B0_LBN 36 | |
44632 | + #define RX_OWNERR_CTL_B0_WIDTH 1 | |
44633 | + #define RX_XON_TX_TH_B0_LBN 31 | |
44634 | + #define RX_XON_TX_TH_B0_WIDTH 5 | |
44635 | + #define RX_XOFF_TX_TH_B0_LBN 26 | |
44636 | + #define RX_XOFF_TX_TH_B0_WIDTH 5 | |
44637 | + #define RX_USR_BUF_SIZE_B0_LBN 17 | |
44638 | + #define RX_USR_BUF_SIZE_B0_WIDTH 9 | |
44639 | + #define RX_XON_MAC_TH_B0_LBN 9 | |
44640 | + #define RX_XON_MAC_TH_B0_WIDTH 8 | |
44641 | + #define RX_XOFF_MAC_TH_B0_LBN 1 | |
44642 | + #define RX_XOFF_MAC_TH_B0_WIDTH 8 | |
44643 | + #define RX_XOFF_MAC_EN_B0_LBN 0 | |
44644 | + #define RX_XOFF_MAC_EN_B0_WIDTH 1 | |
44645 | + | |
44646 | +#else | |
44647 | +/* new for B0 */ | |
44648 | + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44 | |
44649 | + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 | |
44650 | + #define RX_INGR_EN_B0_LBN 43 | |
44651 | + #define RX_INGR_EN_B0_WIDTH 1 | |
44652 | + #define RX_TOEP_IPV4_B0_LBN 42 | |
44653 | + #define RX_TOEP_IPV4_B0_WIDTH 1 | |
44654 | + #define RX_HASH_ALG_B0_LBN 41 | |
44655 | + #define RX_HASH_ALG_B0_WIDTH 41 | |
44656 | + #define RX_HASH_INSERT_HDR_B0_LBN 40 | |
44657 | + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 | |
44658 | +/* moved for B0 */ | |
44659 | + #define RX_DESC_PUSH_EN_B0_LBN 35 | |
44660 | + #define RX_DESC_PUSH_EN_B0_WIDTH 1 | |
44661 | + #define RX_PCI_BURST_SIZE_B0_LBN 35 | |
44662 | + #define RX_PCI_BURST_SIZE_B0_WIDTH 2 | |
44663 | + #define RX_OWNERR_CTL_B0_LBN 34 | |
44664 | + #define RX_OWNERR_CTL_B0_WIDTH 1 | |
44665 | + #define RX_XON_TX_TH_B0_LBN 29 | |
44666 | + #define RX_XON_TX_TH_B0_WIDTH 5 | |
44667 | + #define RX_XOFF_TX_TH_B0_LBN 24 | |
44668 | + #define RX_XOFF_TX_TH_B0_WIDTH 5 | |
44669 | + #define RX_USR_BUF_SIZE_B0_LBN 15 | |
44670 | + #define RX_USR_BUF_SIZE_B0_WIDTH 9 | |
44671 | + #define RX_XON_MAC_TH_B0_LBN 8 | |
44672 | + #define RX_XON_MAC_TH_B0_WIDTH 7 | |
44673 | + #define RX_XOFF_MAC_TH_B0_LBN 1 | |
44674 | + #define RX_XOFF_MAC_TH_B0_WIDTH 7 | |
44675 | + #define RX_XOFF_MAC_EN_B0_LBN 0 | |
44676 | + #define RX_XOFF_MAC_EN_B0_WIDTH 1 | |
44677 | + | |
44678 | +#endif | |
44679 | + | |
44680 | +/* A0/A1 */ | |
44681 | + #define RX_PUSH_EN_A1_LBN 35 | |
44682 | + #define RX_PUSH_EN_A1_WIDTH 1 | |
44683 | + #define RX_PCI_BURST_SIZE_A1_LBN 31 | |
44684 | + #define RX_PCI_BURST_SIZE_A1_WIDTH 3 | |
44685 | + #define RX_OWNERR_CTL_A1_LBN 30 | |
44686 | + #define RX_OWNERR_CTL_A1_WIDTH 1 | |
44687 | + #define RX_XON_TX_TH_A1_LBN 25 | |
44688 | + #define RX_XON_TX_TH_A1_WIDTH 5 | |
44689 | + #define RX_XOFF_TX_TH_A1_LBN 20 | |
44690 | + #define RX_XOFF_TX_TH_A1_WIDTH 5 | |
44691 | + #define RX_USR_BUF_SIZE_A1_LBN 11 | |
44692 | + #define RX_USR_BUF_SIZE_A1_WIDTH 9 | |
44693 | + #define RX_XON_MAC_TH_A1_LBN 6 | |
44694 | + #define RX_XON_MAC_TH_A1_WIDTH 5 | |
44695 | + #define RX_XOFF_MAC_TH_A1_LBN 1 | |
44696 | + #define RX_XOFF_MAC_TH_A1_WIDTH 5 | |
44697 | + #define RX_XOFF_MAC_EN_A1_LBN 0 | |
44698 | + #define RX_XOFF_MAC_EN_A1_WIDTH 1 | |
44699 | + | |
44700 | +#define RX_FILTER_CTL_REG_OFST 0x810 // Receive filter control registers | |
44701 | + #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40 | |
44702 | + #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1 | |
44703 | + #define UDP_FULL_SRCH_LIMIT_LBN 32 | |
44704 | + #define UDP_FULL_SRCH_LIMIT_WIDTH 8 | |
44705 | + #define NUM_KER_LBN 24 | |
44706 | + #define NUM_KER_WIDTH 2 | |
44707 | + #define UDP_WILD_SRCH_LIMIT_LBN 16 | |
44708 | + #define UDP_WILD_SRCH_LIMIT_WIDTH 8 | |
44709 | + #define TCP_WILD_SRCH_LIMIT_LBN 8 | |
44710 | + #define TCP_WILD_SRCH_LIMIT_WIDTH 8 | |
44711 | + #define TCP_FULL_SRCH_LIMIT_LBN 0 | |
44712 | + #define TCP_FULL_SRCH_LIMIT_WIDTH 8 | |
44713 | +#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 // Receive flush descriptor queue register | |
44714 | +#define RX_FLUSH_DESCQ_REG_OFST 0x820 // Receive flush descriptor queue register | |
44715 | + #define RX_FLUSH_DESCQ_CMD_LBN 24 | |
44716 | + #define RX_FLUSH_DESCQ_CMD_WIDTH 1 | |
44717 | + #define RX_FLUSH_EVQ_ID_LBN 12 | |
44718 | + #define RX_FLUSH_EVQ_ID_WIDTH 12 | |
44719 | + #define RX_FLUSH_DESCQ_LBN 0 | |
44720 | + #define RX_FLUSH_DESCQ_WIDTH 12 | |
44721 | +#define RX_DESC_UPD_REG_KER_OFST 0x830 // Kernel receive descriptor update register. Page-mapped | |
44722 | +#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 // Char & user receive descriptor update register. Page-mapped. For lowest 1K queues. | |
44723 | +#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 // Char & user receive descriptor update register. Page-mapped. For upper 3K queues. | |
44724 | + #define RX_DESC_WPTR_LBN 96 | |
44725 | + #define RX_DESC_WPTR_WIDTH 12 | |
44726 | + #define RX_DESC_PUSH_CMD_LBN 95 | |
44727 | + #define RX_DESC_PUSH_CMD_WIDTH 1 | |
44728 | + #define RX_DESC_LBN 0 | |
44729 | + #define RX_DESC_WIDTH 64 | |
44730 | + #define RX_KER_DESC_LBN 0 | |
44731 | + #define RX_KER_DESC_WIDTH 64 | |
44732 | + #define RX_USR_DESC_LBN 0 | |
44733 | + #define RX_USR_DESC_WIDTH 32 | |
44734 | +#define RX_DC_CFG_REG_KER_OFST 0x840 // Receive descriptor cache configuration register | |
44735 | +#define RX_DC_CFG_REG_OFST 0x840 // Receive descriptor cache configuration register | |
44736 | + #define RX_DC_SIZE_LBN 0 | |
44737 | + #define RX_DC_SIZE_WIDTH 2 | |
44738 | +#define RX_DC_PF_WM_REG_KER_OFST 0x850 // Receive descriptor cache pre-fetch watermark register | |
44739 | +#define RX_DC_PF_WM_REG_OFST 0x850 // Receive descriptor cache pre-fetch watermark register | |
44740 | + #define RX_DC_PF_LWM_LO_LBN 0 | |
44741 | + #define RX_DC_PF_LWM_LO_WIDTH 6 | |
44742 | + | |
44743 | +#define RX_RSS_TKEY_B0_OFST 0x860 // RSS Toeplitz hash key (B0 only) | |
44744 | + | |
44745 | +#define RX_NODESC_DROP_REG 0x880 | |
44746 | + #define RX_NODESC_DROP_CNT_LBN 0 | |
44747 | + #define RX_NODESC_DROP_CNT_WIDTH 16 | |
44748 | + | |
44749 | +#define XM_TX_CFG_REG_OFST 0x1230 | |
44750 | + #define XM_AUTO_PAD_LBN 5 | |
44751 | + #define XM_AUTO_PAD_WIDTH 1 | |
44752 | + | |
44753 | +#define RX_FILTER_TBL0_OFST 0xF00000 // Receive filter table - even entries | |
44754 | + #define RSS_EN_0_B0_LBN 110 | |
44755 | + #define RSS_EN_0_B0_WIDTH 1 | |
44756 | + #define SCATTER_EN_0_B0_LBN 109 | |
44757 | + #define SCATTER_EN_0_B0_WIDTH 1 | |
44758 | + #define TCP_UDP_0_LBN 108 | |
44759 | + #define TCP_UDP_0_WIDTH 1 | |
44760 | + #define RXQ_ID_0_LBN 96 | |
44761 | + #define RXQ_ID_0_WIDTH 12 | |
44762 | + #define DEST_IP_0_LBN 64 | |
44763 | + #define DEST_IP_0_WIDTH 32 | |
44764 | + #define DEST_PORT_TCP_0_LBN 48 | |
44765 | + #define DEST_PORT_TCP_0_WIDTH 16 | |
44766 | + #define SRC_IP_0_LBN 16 | |
44767 | + #define SRC_IP_0_WIDTH 32 | |
44768 | + #define SRC_TCP_DEST_UDP_0_LBN 0 | |
44769 | + #define SRC_TCP_DEST_UDP_0_WIDTH 16 | |
44770 | +#define RX_FILTER_TBL1_OFST 0xF00010 // Receive filter table - odd entries | |
44771 | + #define RSS_EN_1_B0_LBN 110 | |
44772 | + #define RSS_EN_1_B0_WIDTH 1 | |
44773 | + #define SCATTER_EN_1_B0_LBN 109 | |
44774 | + #define SCATTER_EN_1_B0_WIDTH 1 | |
44775 | + #define TCP_UDP_1_LBN 108 | |
44776 | + #define TCP_UDP_1_WIDTH 1 | |
44777 | + #define RXQ_ID_1_LBN 96 | |
44778 | + #define RXQ_ID_1_WIDTH 12 | |
44779 | + #define DEST_IP_1_LBN 64 | |
44780 | + #define DEST_IP_1_WIDTH 32 | |
44781 | + #define DEST_PORT_TCP_1_LBN 48 | |
44782 | + #define DEST_PORT_TCP_1_WIDTH 16 | |
44783 | + #define SRC_IP_1_LBN 16 | |
44784 | + #define SRC_IP_1_WIDTH 32 | |
44785 | + #define SRC_TCP_DEST_UDP_1_LBN 0 | |
44786 | + #define SRC_TCP_DEST_UDP_1_WIDTH 16 | |
44787 | + | |
44788 | +#if EFVI_FALCON_EXTENDED_P_BAR | |
44789 | +#define RX_DESC_PTR_TBL_KER_OFST 0x11800 // Receive descriptor pointer kernel access | |
44790 | +#else | |
44791 | +#define RX_DESC_PTR_TBL_KER_OFST 0x1800 // Receive descriptor pointer kernel access | |
44792 | +#endif | |
44793 | + | |
44794 | + | |
44795 | +#define RX_DESC_PTR_TBL_OFST 0xF40000 // Receive descriptor pointer table | |
44796 | + #define RX_ISCSI_DDIG_EN_LBN 88 | |
44797 | + #define RX_ISCSI_DDIG_EN_WIDTH 1 | |
44798 | + #define RX_ISCSI_HDIG_EN_LBN 87 | |
44799 | + #define RX_ISCSI_HDIG_EN_WIDTH 1 | |
44800 | + #define RX_DESC_PREF_ACT_LBN 86 | |
44801 | + #define RX_DESC_PREF_ACT_WIDTH 1 | |
44802 | + #define RX_DC_HW_RPTR_LBN 80 | |
44803 | + #define RX_DC_HW_RPTR_WIDTH 6 | |
44804 | + #define RX_DESCQ_HW_RPTR_LBN 68 | |
44805 | + #define RX_DESCQ_HW_RPTR_WIDTH 12 | |
44806 | + #define RX_DESCQ_SW_WPTR_LBN 56 | |
44807 | + #define RX_DESCQ_SW_WPTR_WIDTH 12 | |
44808 | + #define RX_DESCQ_BUF_BASE_ID_LBN 36 | |
44809 | + #define RX_DESCQ_BUF_BASE_ID_WIDTH 20 | |
44810 | + #define RX_DESCQ_EVQ_ID_LBN 24 | |
44811 | + #define RX_DESCQ_EVQ_ID_WIDTH 12 | |
44812 | + #define RX_DESCQ_OWNER_ID_LBN 10 | |
44813 | + #define RX_DESCQ_OWNER_ID_WIDTH 14 | |
44814 | + #define RX_DESCQ_LABEL_LBN 5 | |
44815 | + #define RX_DESCQ_LABEL_WIDTH 5 | |
44816 | + #define RX_DESCQ_SIZE_LBN 3 | |
44817 | + #define RX_DESCQ_SIZE_WIDTH 2 | |
44818 | + #define RX_DESCQ_TYPE_LBN 2 | |
44819 | + #define RX_DESCQ_TYPE_WIDTH 1 | |
44820 | + #define RX_DESCQ_JUMBO_LBN 1 | |
44821 | + #define RX_DESCQ_JUMBO_WIDTH 1 | |
44822 | + #define RX_DESCQ_EN_LBN 0 | |
44823 | + #define RX_DESCQ_EN_WIDTH 1 | |
44824 | + | |
44825 | + | |
44826 | +#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 // RSS indirection table (B0 only) | |
44827 | + #define RX_RSS_INDIR_ENT_B0_LBN 0 | |
44828 | + #define RX_RSS_INDIR_ENT_B0_WIDTH 6 | |
44829 | + | |
44830 | +//////////////---- TX Datapath Registers C Header ----////////////// | |
44831 | +#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 // Transmit flush descriptor queue register | |
44832 | +#define TX_FLUSH_DESCQ_REG_OFST 0xA00 // Transmit flush descriptor queue register | |
44833 | + #define TX_FLUSH_DESCQ_CMD_LBN 12 | |
44834 | + #define TX_FLUSH_DESCQ_CMD_WIDTH 1 | |
44835 | + #define TX_FLUSH_DESCQ_LBN 0 | |
44836 | + #define TX_FLUSH_DESCQ_WIDTH 12 | |
44837 | +#define TX_DESC_UPD_REG_KER_OFST 0xA10 // Kernel transmit descriptor update register. Page-mapped | |
44838 | +#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 // Char & user transmit descriptor update register. Page-mapped | |
44839 | +#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 // Char & user transmit descriptor update register. Page-mapped | |
44840 | + #define TX_DESC_WPTR_LBN 96 | |
44841 | + #define TX_DESC_WPTR_WIDTH 12 | |
44842 | + #define TX_DESC_PUSH_CMD_LBN 95 | |
44843 | + #define TX_DESC_PUSH_CMD_WIDTH 1 | |
44844 | + #define TX_DESC_LBN 0 | |
44845 | + #define TX_DESC_WIDTH 95 | |
44846 | + #define TX_KER_DESC_LBN 0 | |
44847 | + #define TX_KER_DESC_WIDTH 64 | |
44848 | + #define TX_USR_DESC_LBN 0 | |
44849 | + #define TX_USR_DESC_WIDTH 64 | |
44850 | +#define TX_DC_CFG_REG_KER_OFST 0xA20 // Transmit descriptor cache configuration register | |
44851 | +#define TX_DC_CFG_REG_OFST 0xA20 // Transmit descriptor cache configuration register | |
44852 | + #define TX_DC_SIZE_LBN 0 | |
44853 | + #define TX_DC_SIZE_WIDTH 2 | |
44854 | + | |
44855 | +#if EFVI_FALCON_EXTENDED_P_BAR | |
44856 | +#define TX_DESC_PTR_TBL_KER_OFST 0x11900 // Transmit descriptor pointer. | |
44857 | +#else | |
44858 | +#define TX_DESC_PTR_TBL_KER_OFST 0x1900 // Transmit descriptor pointer. | |
44859 | +#endif | |
44860 | + | |
44861 | + | |
44862 | +#define TX_DESC_PTR_TBL_OFST 0xF50000 // Transmit descriptor pointer | |
44863 | + #define TX_NON_IP_DROP_DIS_B0_LBN 91 | |
44864 | + #define TX_NON_IP_DROP_DIS_B0_WIDTH 1 | |
44865 | + #define TX_IP_CHKSM_DIS_B0_LBN 90 | |
44866 | + #define TX_IP_CHKSM_DIS_B0_WIDTH 1 | |
44867 | + #define TX_TCP_CHKSM_DIS_B0_LBN 89 | |
44868 | + #define TX_TCP_CHKSM_DIS_B0_WIDTH 1 | |
44869 | + #define TX_DESCQ_EN_LBN 88 | |
44870 | + #define TX_DESCQ_EN_WIDTH 1 | |
44871 | + #define TX_ISCSI_DDIG_EN_LBN 87 | |
44872 | + #define TX_ISCSI_DDIG_EN_WIDTH 1 | |
44873 | + #define TX_ISCSI_HDIG_EN_LBN 86 | |
44874 | + #define TX_ISCSI_HDIG_EN_WIDTH 1 | |
44875 | + #define TX_DC_HW_RPTR_LBN 80 | |
44876 | + #define TX_DC_HW_RPTR_WIDTH 6 | |
44877 | + #define TX_DESCQ_HW_RPTR_LBN 68 | |
44878 | + #define TX_DESCQ_HW_RPTR_WIDTH 12 | |
44879 | + #define TX_DESCQ_SW_WPTR_LBN 56 | |
44880 | + #define TX_DESCQ_SW_WPTR_WIDTH 12 | |
44881 | + #define TX_DESCQ_BUF_BASE_ID_LBN 36 | |
44882 | + #define TX_DESCQ_BUF_BASE_ID_WIDTH 20 | |
44883 | + #define TX_DESCQ_EVQ_ID_LBN 24 | |
44884 | + #define TX_DESCQ_EVQ_ID_WIDTH 12 | |
44885 | + #define TX_DESCQ_OWNER_ID_LBN 10 | |
44886 | + #define TX_DESCQ_OWNER_ID_WIDTH 14 | |
44887 | + #define TX_DESCQ_LABEL_LBN 5 | |
44888 | + #define TX_DESCQ_LABEL_WIDTH 5 | |
44889 | + #define TX_DESCQ_SIZE_LBN 3 | |
44890 | + #define TX_DESCQ_SIZE_WIDTH 2 | |
44891 | + #define TX_DESCQ_TYPE_LBN 1 | |
44892 | + #define TX_DESCQ_TYPE_WIDTH 2 | |
44893 | + #define TX_DESCQ_FLUSH_LBN 0 | |
44894 | + #define TX_DESCQ_FLUSH_WIDTH 1 | |
44895 | +#define TX_CFG_REG_KER_OFST 0xA50 // Transmit configuration register | |
44896 | +#define TX_CFG_REG_OFST 0xA50 // Transmit configuration register | |
44897 | + #define TX_IP_ID_P1_OFS_LBN 32 | |
44898 | + #define TX_IP_ID_P1_OFS_WIDTH 15 | |
44899 | + #define TX_IP_ID_P0_OFS_LBN 16 | |
44900 | + #define TX_IP_ID_P0_OFS_WIDTH 15 | |
44901 | + #define TX_TURBO_EN_LBN 3 | |
44902 | + #define TX_TURBO_EN_WIDTH 1 | |
44903 | + #define TX_OWNERR_CTL_LBN 2 | |
44904 | + #define TX_OWNERR_CTL_WIDTH 2 | |
44905 | + #define TX_NON_IP_DROP_DIS_LBN 1 | |
44906 | + #define TX_NON_IP_DROP_DIS_WIDTH 1 | |
44907 | + #define TX_IP_ID_REP_EN_LBN 0 | |
44908 | + #define TX_IP_ID_REP_EN_WIDTH 1 | |
44909 | +#define TX_RESERVED_REG_KER_OFST 0xA80 // Transmit configuration register | |
44910 | +#define TX_RESERVED_REG_OFST 0xA80 // Transmit configuration register | |
44911 | + #define TX_CSR_PUSH_EN_LBN 89 | |
44912 | + #define TX_CSR_PUSH_EN_WIDTH 1 | |
44913 | + #define TX_RX_SPACER_LBN 64 | |
44914 | + #define TX_RX_SPACER_WIDTH 8 | |
44915 | + #define TX_SW_EV_EN_LBN 59 | |
44916 | + #define TX_SW_EV_EN_WIDTH 1 | |
44917 | + #define TX_RX_SPACER_EN_LBN 57 | |
44918 | + #define TX_RX_SPACER_EN_WIDTH 1 | |
44919 | + #define TX_CSR_PREF_WD_TMR_LBN 24 | |
44920 | + #define TX_CSR_PREF_WD_TMR_WIDTH 16 | |
44921 | + #define TX_CSR_ONLY1TAG_LBN 21 | |
44922 | + #define TX_CSR_ONLY1TAG_WIDTH 1 | |
44923 | + #define TX_PREF_THRESHOLD_LBN 19 | |
44924 | + #define TX_PREF_THRESHOLD_WIDTH 2 | |
44925 | + #define TX_ONE_PKT_PER_Q_LBN 18 | |
44926 | + #define TX_ONE_PKT_PER_Q_WIDTH 1 | |
44927 | + #define TX_DIS_NON_IP_EV_LBN 17 | |
44928 | + #define TX_DIS_NON_IP_EV_WIDTH 1 | |
44929 | + #define TX_DMA_SPACER_LBN 8 | |
44930 | + #define TX_DMA_SPACER_WIDTH 8 | |
44931 | + #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7 | |
44932 | + #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1 | |
44933 | + #define TX_TCP_DIS_A1_LBN 7 | |
44934 | + #define TX_TCP_DIS_A1_WIDTH 1 | |
44935 | + #define TX_IP_DIS_A1_LBN 6 | |
44936 | + #define TX_IP_DIS_A1_WIDTH 1 | |
44937 | + #define TX_MAX_CPL_LBN 2 | |
44938 | + #define TX_MAX_CPL_WIDTH 2 | |
44939 | + #define TX_MAX_PREF_LBN 0 | |
44940 | + #define TX_MAX_PREF_WIDTH 2 | |
44941 | +#define TX_VLAN_REG_OFST 0xAE0 // Transmit VLAN tag register | |
44942 | + #define TX_VLAN_EN_LBN 127 | |
44943 | + #define TX_VLAN_EN_WIDTH 1 | |
44944 | + #define TX_VLAN7_PORT1_EN_LBN 125 | |
44945 | + #define TX_VLAN7_PORT1_EN_WIDTH 1 | |
44946 | + #define TX_VLAN7_PORT0_EN_LBN 124 | |
44947 | + #define TX_VLAN7_PORT0_EN_WIDTH 1 | |
44948 | + #define TX_VLAN7_LBN 112 | |
44949 | + #define TX_VLAN7_WIDTH 12 | |
44950 | + #define TX_VLAN6_PORT1_EN_LBN 109 | |
44951 | + #define TX_VLAN6_PORT1_EN_WIDTH 1 | |
44952 | + #define TX_VLAN6_PORT0_EN_LBN 108 | |
44953 | + #define TX_VLAN6_PORT0_EN_WIDTH 1 | |
44954 | + #define TX_VLAN6_LBN 96 | |
44955 | + #define TX_VLAN6_WIDTH 12 | |
44956 | + #define TX_VLAN5_PORT1_EN_LBN 93 | |
44957 | + #define TX_VLAN5_PORT1_EN_WIDTH 1 | |
44958 | + #define TX_VLAN5_PORT0_EN_LBN 92 | |
44959 | + #define TX_VLAN5_PORT0_EN_WIDTH 1 | |
44960 | + #define TX_VLAN5_LBN 80 | |
44961 | + #define TX_VLAN5_WIDTH 12 | |
44962 | + #define TX_VLAN4_PORT1_EN_LBN 77 | |
44963 | + #define TX_VLAN4_PORT1_EN_WIDTH 1 | |
44964 | + #define TX_VLAN4_PORT0_EN_LBN 76 | |
44965 | + #define TX_VLAN4_PORT0_EN_WIDTH 1 | |
44966 | + #define TX_VLAN4_LBN 64 | |
44967 | + #define TX_VLAN4_WIDTH 12 | |
44968 | + #define TX_VLAN3_PORT1_EN_LBN 61 | |
44969 | + #define TX_VLAN3_PORT1_EN_WIDTH 1 | |
44970 | + #define TX_VLAN3_PORT0_EN_LBN 60 | |
44971 | + #define TX_VLAN3_PORT0_EN_WIDTH 1 | |
44972 | + #define TX_VLAN3_LBN 48 | |
44973 | + #define TX_VLAN3_WIDTH 12 | |
44974 | + #define TX_VLAN2_PORT1_EN_LBN 45 | |
44975 | + #define TX_VLAN2_PORT1_EN_WIDTH 1 | |
44976 | + #define TX_VLAN2_PORT0_EN_LBN 44 | |
44977 | + #define TX_VLAN2_PORT0_EN_WIDTH 1 | |
44978 | + #define TX_VLAN2_LBN 32 | |
44979 | + #define TX_VLAN2_WIDTH 12 | |
44980 | + #define TX_VLAN1_PORT1_EN_LBN 29 | |
44981 | + #define TX_VLAN1_PORT1_EN_WIDTH 1 | |
44982 | + #define TX_VLAN1_PORT0_EN_LBN 28 | |
44983 | + #define TX_VLAN1_PORT0_EN_WIDTH 1 | |
44984 | + #define TX_VLAN1_LBN 16 | |
44985 | + #define TX_VLAN1_WIDTH 12 | |
44986 | + #define TX_VLAN0_PORT1_EN_LBN 13 | |
44987 | + #define TX_VLAN0_PORT1_EN_WIDTH 1 | |
44988 | + #define TX_VLAN0_PORT0_EN_LBN 12 | |
44989 | + #define TX_VLAN0_PORT0_EN_WIDTH 1 | |
44990 | + #define TX_VLAN0_LBN 0 | |
44991 | + #define TX_VLAN0_WIDTH 12 | |
44992 | +#define TX_FIL_CTL_REG_OFST 0xAF0 // Transmit filter control register | |
44993 | + #define TX_MADR1_FIL_EN_LBN 65 | |
44994 | + #define TX_MADR1_FIL_EN_WIDTH 1 | |
44995 | + #define TX_MADR0_FIL_EN_LBN 64 | |
44996 | + #define TX_MADR0_FIL_EN_WIDTH 1 | |
44997 | + #define TX_IPFIL31_PORT1_EN_LBN 63 | |
44998 | + #define TX_IPFIL31_PORT1_EN_WIDTH 1 | |
44999 | + #define TX_IPFIL31_PORT0_EN_LBN 62 | |
45000 | + #define TX_IPFIL31_PORT0_EN_WIDTH 1 | |
45001 | + #define TX_IPFIL30_PORT1_EN_LBN 61 | |
45002 | + #define TX_IPFIL30_PORT1_EN_WIDTH 1 | |
45003 | + #define TX_IPFIL30_PORT0_EN_LBN 60 | |
45004 | + #define TX_IPFIL30_PORT0_EN_WIDTH 1 | |
45005 | + #define TX_IPFIL29_PORT1_EN_LBN 59 | |
45006 | + #define TX_IPFIL29_PORT1_EN_WIDTH 1 | |
45007 | + #define TX_IPFIL29_PORT0_EN_LBN 58 | |
45008 | + #define TX_IPFIL29_PORT0_EN_WIDTH 1 | |
45009 | + #define TX_IPFIL28_PORT1_EN_LBN 57 | |
45010 | + #define TX_IPFIL28_PORT1_EN_WIDTH 1 | |
45011 | + #define TX_IPFIL28_PORT0_EN_LBN 56 | |
45012 | + #define TX_IPFIL28_PORT0_EN_WIDTH 1 | |
45013 | + #define TX_IPFIL27_PORT1_EN_LBN 55 | |
45014 | + #define TX_IPFIL27_PORT1_EN_WIDTH 1 | |
45015 | + #define TX_IPFIL27_PORT0_EN_LBN 54 | |
45016 | + #define TX_IPFIL27_PORT0_EN_WIDTH 1 | |
45017 | + #define TX_IPFIL26_PORT1_EN_LBN 53 | |
45018 | + #define TX_IPFIL26_PORT1_EN_WIDTH 1 | |
45019 | + #define TX_IPFIL26_PORT0_EN_LBN 52 | |
45020 | + #define TX_IPFIL26_PORT0_EN_WIDTH 1 | |
45021 | + #define TX_IPFIL25_PORT1_EN_LBN 51 | |
45022 | + #define TX_IPFIL25_PORT1_EN_WIDTH 1 | |
45023 | + #define TX_IPFIL25_PORT0_EN_LBN 50 | |
45024 | + #define TX_IPFIL25_PORT0_EN_WIDTH 1 | |
45025 | + #define TX_IPFIL24_PORT1_EN_LBN 49 | |
45026 | + #define TX_IPFIL24_PORT1_EN_WIDTH 1 | |
45027 | + #define TX_IPFIL24_PORT0_EN_LBN 48 | |
45028 | + #define TX_IPFIL24_PORT0_EN_WIDTH 1 | |
45029 | + #define TX_IPFIL23_PORT1_EN_LBN 47 | |
45030 | + #define TX_IPFIL23_PORT1_EN_WIDTH 1 | |
45031 | + #define TX_IPFIL23_PORT0_EN_LBN 46 | |
45032 | + #define TX_IPFIL23_PORT0_EN_WIDTH 1 | |
45033 | + #define TX_IPFIL22_PORT1_EN_LBN 45 | |
45034 | + #define TX_IPFIL22_PORT1_EN_WIDTH 1 | |
45035 | + #define TX_IPFIL22_PORT0_EN_LBN 44 | |
45036 | + #define TX_IPFIL22_PORT0_EN_WIDTH 1 | |
45037 | + #define TX_IPFIL21_PORT1_EN_LBN 43 | |
45038 | + #define TX_IPFIL21_PORT1_EN_WIDTH 1 | |
45039 | + #define TX_IPFIL21_PORT0_EN_LBN 42 | |
45040 | + #define TX_IPFIL21_PORT0_EN_WIDTH 1 | |
45041 | + #define TX_IPFIL20_PORT1_EN_LBN 41 | |
45042 | + #define TX_IPFIL20_PORT1_EN_WIDTH 1 | |
45043 | + #define TX_IPFIL20_PORT0_EN_LBN 40 | |
45044 | + #define TX_IPFIL20_PORT0_EN_WIDTH 1 | |
45045 | + #define TX_IPFIL19_PORT1_EN_LBN 39 | |
45046 | + #define TX_IPFIL19_PORT1_EN_WIDTH 1 | |
45047 | + #define TX_IPFIL19_PORT0_EN_LBN 38 | |
45048 | + #define TX_IPFIL19_PORT0_EN_WIDTH 1 | |
45049 | + #define TX_IPFIL18_PORT1_EN_LBN 37 | |
45050 | + #define TX_IPFIL18_PORT1_EN_WIDTH 1 | |
45051 | + #define TX_IPFIL18_PORT0_EN_LBN 36 | |
45052 | + #define TX_IPFIL18_PORT0_EN_WIDTH 1 | |
45053 | + #define TX_IPFIL17_PORT1_EN_LBN 35 | |
45054 | + #define TX_IPFIL17_PORT1_EN_WIDTH 1 | |
45055 | + #define TX_IPFIL17_PORT0_EN_LBN 34 | |
45056 | + #define TX_IPFIL17_PORT0_EN_WIDTH 1 | |
45057 | + #define TX_IPFIL16_PORT1_EN_LBN 33 | |
45058 | + #define TX_IPFIL16_PORT1_EN_WIDTH 1 | |
45059 | + #define TX_IPFIL16_PORT0_EN_LBN 32 | |
45060 | + #define TX_IPFIL16_PORT0_EN_WIDTH 1 | |
45061 | + #define TX_IPFIL15_PORT1_EN_LBN 31 | |
45062 | + #define TX_IPFIL15_PORT1_EN_WIDTH 1 | |
45063 | + #define TX_IPFIL15_PORT0_EN_LBN 30 | |
45064 | + #define TX_IPFIL15_PORT0_EN_WIDTH 1 | |
45065 | + #define TX_IPFIL14_PORT1_EN_LBN 29 | |
45066 | + #define TX_IPFIL14_PORT1_EN_WIDTH 1 | |
45067 | + #define TX_IPFIL14_PORT0_EN_LBN 28 | |
45068 | + #define TX_IPFIL14_PORT0_EN_WIDTH 1 | |
45069 | + #define TX_IPFIL13_PORT1_EN_LBN 27 | |
45070 | + #define TX_IPFIL13_PORT1_EN_WIDTH 1 | |
45071 | + #define TX_IPFIL13_PORT0_EN_LBN 26 | |
45072 | + #define TX_IPFIL13_PORT0_EN_WIDTH 1 | |
45073 | + #define TX_IPFIL12_PORT1_EN_LBN 25 | |
45074 | + #define TX_IPFIL12_PORT1_EN_WIDTH 1 | |
45075 | + #define TX_IPFIL12_PORT0_EN_LBN 24 | |
45076 | + #define TX_IPFIL12_PORT0_EN_WIDTH 1 | |
45077 | + #define TX_IPFIL11_PORT1_EN_LBN 23 | |
45078 | + #define TX_IPFIL11_PORT1_EN_WIDTH 1 | |
45079 | + #define TX_IPFIL11_PORT0_EN_LBN 22 | |
45080 | + #define TX_IPFIL11_PORT0_EN_WIDTH 1 | |
45081 | + #define TX_IPFIL10_PORT1_EN_LBN 21 | |
45082 | + #define TX_IPFIL10_PORT1_EN_WIDTH 1 | |
45083 | + #define TX_IPFIL10_PORT0_EN_LBN 20 | |
45084 | + #define TX_IPFIL10_PORT0_EN_WIDTH 1 | |
45085 | + #define TX_IPFIL9_PORT1_EN_LBN 19 | |
45086 | + #define TX_IPFIL9_PORT1_EN_WIDTH 1 | |
45087 | + #define TX_IPFIL9_PORT0_EN_LBN 18 | |
45088 | + #define TX_IPFIL9_PORT0_EN_WIDTH 1 | |
45089 | + #define TX_IPFIL8_PORT1_EN_LBN 17 | |
45090 | + #define TX_IPFIL8_PORT1_EN_WIDTH 1 | |
45091 | + #define TX_IPFIL8_PORT0_EN_LBN 16 | |
45092 | + #define TX_IPFIL8_PORT0_EN_WIDTH 1 | |
45093 | + #define TX_IPFIL7_PORT1_EN_LBN 15 | |
45094 | + #define TX_IPFIL7_PORT1_EN_WIDTH 1 | |
45095 | + #define TX_IPFIL7_PORT0_EN_LBN 14 | |
45096 | + #define TX_IPFIL7_PORT0_EN_WIDTH 1 | |
45097 | + #define TX_IPFIL6_PORT1_EN_LBN 13 | |
45098 | + #define TX_IPFIL6_PORT1_EN_WIDTH 1 | |
45099 | + #define TX_IPFIL6_PORT0_EN_LBN 12 | |
45100 | + #define TX_IPFIL6_PORT0_EN_WIDTH 1 | |
45101 | + #define TX_IPFIL5_PORT1_EN_LBN 11 | |
45102 | + #define TX_IPFIL5_PORT1_EN_WIDTH 1 | |
45103 | + #define TX_IPFIL5_PORT0_EN_LBN 10 | |
45104 | + #define TX_IPFIL5_PORT0_EN_WIDTH 1 | |
45105 | + #define TX_IPFIL4_PORT1_EN_LBN 9 | |
45106 | + #define TX_IPFIL4_PORT1_EN_WIDTH 1 | |
45107 | + #define TX_IPFIL4_PORT0_EN_LBN 8 | |
45108 | + #define TX_IPFIL4_PORT0_EN_WIDTH 1 | |
45109 | + #define TX_IPFIL3_PORT1_EN_LBN 7 | |
45110 | + #define TX_IPFIL3_PORT1_EN_WIDTH 1 | |
45111 | + #define TX_IPFIL3_PORT0_EN_LBN 6 | |
45112 | + #define TX_IPFIL3_PORT0_EN_WIDTH 1 | |
45113 | + #define TX_IPFIL2_PORT1_EN_LBN 5 | |
45114 | + #define TX_IPFIL2_PORT1_EN_WIDTH 1 | |
45115 | + #define TX_IPFIL2_PORT0_EN_LBN 4 | |
45116 | + #define TX_IPFIL2_PORT0_EN_WIDTH 1 | |
45117 | + #define TX_IPFIL1_PORT1_EN_LBN 3 | |
45118 | + #define TX_IPFIL1_PORT1_EN_WIDTH 1 | |
45119 | + #define TX_IPFIL1_PORT0_EN_LBN 2 | |
45120 | + #define TX_IPFIL1_PORT0_EN_WIDTH 1 | |
45121 | + #define TX_IPFIL0_PORT1_EN_LBN 1 | |
45122 | + #define TX_IPFIL0_PORT1_EN_WIDTH 1 | |
45123 | + #define TX_IPFIL0_PORT0_EN_LBN 0 | |
45124 | + #define TX_IPFIL0_PORT0_EN_WIDTH 1 | |
45125 | +#define TX_IPFIL_TBL_OFST 0xB00 // Transmit IP source address filter table | |
45126 | + #define TX_IPFIL_MASK_LBN 32 | |
45127 | + #define TX_IPFIL_MASK_WIDTH 32 | |
45128 | + #define TX_IP_SRC_ADR_LBN 0 | |
45129 | + #define TX_IP_SRC_ADR_WIDTH 32 | |
45130 | +#define TX_PACE_REG_A1_OFST 0xF80000 // Transmit pace control register | |
45131 | +#define TX_PACE_REG_B0_OFST 0xA90 // Transmit pace control register | |
45132 | + #define TX_PACE_SB_AF_LBN 19 | |
45133 | + #define TX_PACE_SB_AF_WIDTH 10 | |
45134 | + #define TX_PACE_SB_NOTAF_LBN 9 | |
45135 | + #define TX_PACE_SB_NOTAF_WIDTH 10 | |
45136 | + #define TX_PACE_FB_BASE_LBN 5 | |
45137 | + #define TX_PACE_FB_BASE_WIDTH 4 | |
45138 | + #define TX_PACE_BIN_TH_LBN 0 | |
45139 | + #define TX_PACE_BIN_TH_WIDTH 5 | |
45140 | +#define TX_PACE_TBL_A1_OFST 0xF80040 // Transmit pacing table | |
45141 | +#define TX_PACE_TBL_FIRST_QUEUE_A1 4 | |
45142 | +#define TX_PACE_TBL_B0_OFST 0xF80000 // Transmit pacing table | |
45143 | +#define TX_PACE_TBL_FIRST_QUEUE_B0 0 | |
45144 | + #define TX_PACE_LBN 0 | |
45145 | + #define TX_PACE_WIDTH 5 | |
45146 | + | |
45147 | +//////////////---- EE/Flash Registers C Header ----////////////// | |
45148 | +#define EE_SPI_HCMD_REG_KER_OFST 0x100 // SPI host command register | |
45149 | +#define EE_SPI_HCMD_REG_OFST 0x100 // SPI host command register | |
45150 | + #define EE_SPI_HCMD_CMD_EN_LBN 31 | |
45151 | + #define EE_SPI_HCMD_CMD_EN_WIDTH 1 | |
45152 | + #define EE_WR_TIMER_ACTIVE_LBN 28 | |
45153 | + #define EE_WR_TIMER_ACTIVE_WIDTH 1 | |
45154 | + #define EE_SPI_HCMD_SF_SEL_LBN 24 | |
45155 | + #define EE_SPI_HCMD_SF_SEL_WIDTH 1 | |
45156 | + #define EE_SPI_HCMD_DABCNT_LBN 16 | |
45157 | + #define EE_SPI_HCMD_DABCNT_WIDTH 5 | |
45158 | + #define EE_SPI_HCMD_READ_LBN 15 | |
45159 | + #define EE_SPI_HCMD_READ_WIDTH 1 | |
45160 | + #define EE_SPI_HCMD_DUBCNT_LBN 12 | |
45161 | + #define EE_SPI_HCMD_DUBCNT_WIDTH 2 | |
45162 | + #define EE_SPI_HCMD_ADBCNT_LBN 8 | |
45163 | + #define EE_SPI_HCMD_ADBCNT_WIDTH 2 | |
45164 | + #define EE_SPI_HCMD_ENC_LBN 0 | |
45165 | + #define EE_SPI_HCMD_ENC_WIDTH 8 | |
45166 | +#define EE_SPI_HADR_REG_KER_OFST 0X110 // SPI host address register | |
45167 | +#define EE_SPI_HADR_REG_OFST 0X110 // SPI host address register | |
45168 | + #define EE_SPI_HADR_DUBYTE_LBN 24 | |
45169 | + #define EE_SPI_HADR_DUBYTE_WIDTH 8 | |
45170 | + #define EE_SPI_HADR_ADR_LBN 0 | |
45171 | + #define EE_SPI_HADR_ADR_WIDTH 24 | |
45172 | +#define EE_SPI_HDATA_REG_KER_OFST 0x120 // SPI host data register | |
45173 | +#define EE_SPI_HDATA_REG_OFST 0x120 // SPI host data register | |
45174 | + #define EE_SPI_HDATA3_LBN 96 | |
45175 | + #define EE_SPI_HDATA3_WIDTH 32 | |
45176 | + #define EE_SPI_HDATA2_LBN 64 | |
45177 | + #define EE_SPI_HDATA2_WIDTH 32 | |
45178 | + #define EE_SPI_HDATA1_LBN 32 | |
45179 | + #define EE_SPI_HDATA1_WIDTH 32 | |
45180 | + #define EE_SPI_HDATA0_LBN 0 | |
45181 | + #define EE_SPI_HDATA0_WIDTH 32 | |
45182 | +#define EE_BASE_PAGE_REG_KER_OFST 0x130 // Expansion ROM base mirror register | |
45183 | +#define EE_BASE_PAGE_REG_OFST 0x130 // Expansion ROM base mirror register | |
45184 | + #define EE_EXP_ROM_WINDOW_BASE_LBN 16 | |
45185 | + #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13 | |
45186 | + #define EE_EXPROM_MASK_LBN 0 | |
45187 | + #define EE_EXPROM_MASK_WIDTH 13 | |
45188 | +#define EE_VPD_CFG0_REG_KER_OFST 0X140 // SPI/VPD configuration register | |
45189 | +#define EE_VPD_CFG0_REG_OFST 0X140 // SPI/VPD configuration register | |
45190 | + #define EE_SF_FASTRD_EN_LBN 127 | |
45191 | + #define EE_SF_FASTRD_EN_WIDTH 1 | |
45192 | + #define EE_SF_CLOCK_DIV_LBN 120 | |
45193 | + #define EE_SF_CLOCK_DIV_WIDTH 7 | |
45194 | + #define EE_VPD_WIP_POLL_LBN 119 | |
45195 | + #define EE_VPD_WIP_POLL_WIDTH 1 | |
45196 | + #define EE_VPDW_LENGTH_LBN 80 | |
45197 | + #define EE_VPDW_LENGTH_WIDTH 15 | |
45198 | + #define EE_VPDW_BASE_LBN 64 | |
45199 | + #define EE_VPDW_BASE_WIDTH 15 | |
45200 | + #define EE_VPD_WR_CMD_EN_LBN 56 | |
45201 | + #define EE_VPD_WR_CMD_EN_WIDTH 8 | |
45202 | + #define EE_VPD_BASE_LBN 32 | |
45203 | + #define EE_VPD_BASE_WIDTH 24 | |
45204 | + #define EE_VPD_LENGTH_LBN 16 | |
45205 | + #define EE_VPD_LENGTH_WIDTH 13 | |
45206 | + #define EE_VPD_AD_SIZE_LBN 8 | |
45207 | + #define EE_VPD_AD_SIZE_WIDTH 5 | |
45208 | + #define EE_VPD_ACCESS_ON_LBN 5 | |
45209 | + #define EE_VPD_ACCESS_ON_WIDTH 1 | |
45210 | +#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 // VPD access SW control register | |
45211 | +#define EE_VPD_SW_CNTL_REG_OFST 0X150 // VPD access SW control register | |
45212 | + #define EE_VPD_CYCLE_PENDING_LBN 31 | |
45213 | + #define EE_VPD_CYCLE_PENDING_WIDTH 1 | |
45214 | + #define EE_VPD_CYC_WRITE_LBN 28 | |
45215 | + #define EE_VPD_CYC_WRITE_WIDTH 1 | |
45216 | + #define EE_VPD_CYC_ADR_LBN 0 | |
45217 | + #define EE_VPD_CYC_ADR_WIDTH 15 | |
45218 | +#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 // VPD access SW data register | |
45219 | +#define EE_VPD_SW_DATA_REG_OFST 0x160 // VPD access SW data register | |
45220 | + #define EE_VPD_CYC_DAT_LBN 0 | |
45221 | + #define EE_VPD_CYC_DAT_WIDTH 32 | |
45222 | Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h | |
45223 | =================================================================== | |
45224 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
45225 | +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h 2008-02-20 09:32:49.000000000 +0100 | |
45226 | @@ -0,0 +1,43 @@ | |
45227 | +//////////////---- Descriptors C Headers ----////////////// | |
45228 | +// Receive Kernel IP Descriptor | |
45229 | + #define RX_KER_BUF_SIZE_LBN 48 | |
45230 | + #define RX_KER_BUF_SIZE_WIDTH 14 | |
45231 | + #define RX_KER_BUF_REGION_LBN 46 | |
45232 | + #define RX_KER_BUF_REGION_WIDTH 2 | |
45233 | + #define RX_KER_BUF_REGION0_DECODE 0 | |
45234 | + #define RX_KER_BUF_REGION1_DECODE 1 | |
45235 | + #define RX_KER_BUF_REGION2_DECODE 2 | |
45236 | + #define RX_KER_BUF_REGION3_DECODE 3 | |
45237 | + #define RX_KER_BUF_ADR_LBN 0 | |
45238 | + #define RX_KER_BUF_ADR_WIDTH 46 | |
45239 | +// Receive User IP Descriptor | |
45240 | + #define RX_USR_2BYTE_OFS_LBN 20 | |
45241 | + #define RX_USR_2BYTE_OFS_WIDTH 12 | |
45242 | + #define RX_USR_BUF_ID_LBN 0 | |
45243 | + #define RX_USR_BUF_ID_WIDTH 20 | |
45244 | +// Transmit Kernel IP Descriptor | |
45245 | + #define TX_KER_PORT_LBN 63 | |
45246 | + #define TX_KER_PORT_WIDTH 1 | |
45247 | + #define TX_KER_CONT_LBN 62 | |
45248 | + #define TX_KER_CONT_WIDTH 1 | |
45249 | + #define TX_KER_BYTE_CNT_LBN 48 | |
45250 | + #define TX_KER_BYTE_CNT_WIDTH 14 | |
45251 | + #define TX_KER_BUF_REGION_LBN 46 | |
45252 | + #define TX_KER_BUF_REGION_WIDTH 2 | |
45253 | + #define TX_KER_BUF_REGION0_DECODE 0 | |
45254 | + #define TX_KER_BUF_REGION1_DECODE 1 | |
45255 | + #define TX_KER_BUF_REGION2_DECODE 2 | |
45256 | + #define TX_KER_BUF_REGION3_DECODE 3 | |
45257 | + #define TX_KER_BUF_ADR_LBN 0 | |
45258 | + #define TX_KER_BUF_ADR_WIDTH 46 | |
45259 | +// Transmit User IP Descriptor | |
45260 | + #define TX_USR_PORT_LBN 47 | |
45261 | + #define TX_USR_PORT_WIDTH 1 | |
45262 | + #define TX_USR_CONT_LBN 46 | |
45263 | + #define TX_USR_CONT_WIDTH 1 | |
45264 | + #define TX_USR_BYTE_CNT_LBN 33 | |
45265 | + #define TX_USR_BYTE_CNT_WIDTH 13 | |
45266 | + #define TX_USR_BUF_ID_LBN 13 | |
45267 | + #define TX_USR_BUF_ID_WIDTH 20 | |
45268 | + #define TX_USR_BYTE_OFS_LBN 0 | |
45269 | + #define TX_USR_BYTE_OFS_WIDTH 13 | |
45270 | Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_event.h | |
45271 | =================================================================== | |
45272 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
45273 | +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_falcon_event.h 2008-02-20 09:32:49.000000000 +0100 | |
45274 | @@ -0,0 +1,123 @@ | |
45275 | +//////////////---- Events Format C Header ----////////////// | |
45276 | +//////////////---- Event entry ----////////////// | |
45277 | + #define EV_CODE_LBN 60 | |
45278 | + #define EV_CODE_WIDTH 4 | |
45279 | + #define RX_IP_EV_DECODE 0 | |
45280 | + #define TX_IP_EV_DECODE 2 | |
45281 | + #define DRIVER_EV_DECODE 5 | |
45282 | + #define GLOBAL_EV_DECODE 6 | |
45283 | + #define DRV_GEN_EV_DECODE 7 | |
45284 | + #define EV_DATA_LBN 0 | |
45285 | + #define EV_DATA_WIDTH 60 | |
45286 | +//////////////---- Receive IP events for both Kernel & User event queues ----////////////// | |
45287 | + #define RX_EV_PKT_OK_LBN 56 | |
45288 | + #define RX_EV_PKT_OK_WIDTH 1 | |
45289 | + #define RX_EV_BUF_OWNER_ID_ERR_LBN 54 | |
45290 | + #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1 | |
45291 | + #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52 | |
45292 | + #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1 | |
45293 | + #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51 | |
45294 | + #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1 | |
45295 | + #define RX_EV_ETH_CRC_ERR_LBN 50 | |
45296 | + #define RX_EV_ETH_CRC_ERR_WIDTH 1 | |
45297 | + #define RX_EV_FRM_TRUNC_LBN 49 | |
45298 | + #define RX_EV_FRM_TRUNC_WIDTH 1 | |
45299 | + #define RX_EV_DRIB_NIB_LBN 48 | |
45300 | + #define RX_EV_DRIB_NIB_WIDTH 1 | |
45301 | + #define RX_EV_TOBE_DISC_LBN 47 | |
45302 | + #define RX_EV_TOBE_DISC_WIDTH 1 | |
45303 | + #define RX_EV_PKT_TYPE_LBN 44 | |
45304 | + #define RX_EV_PKT_TYPE_WIDTH 3 | |
45305 | + #define RX_EV_PKT_TYPE_ETH_DECODE 0 | |
45306 | + #define RX_EV_PKT_TYPE_LLC_DECODE 1 | |
45307 | + #define RX_EV_PKT_TYPE_JUMBO_DECODE 2 | |
45308 | + #define RX_EV_PKT_TYPE_VLAN_DECODE 3 | |
45309 | + #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4 | |
45310 | + #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5 | |
45311 | + #define RX_EV_HDR_TYPE_LBN 42 | |
45312 | + #define RX_EV_HDR_TYPE_WIDTH 2 | |
45313 | + #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0 | |
45314 | + #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1 | |
45315 | + #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2 | |
45316 | + #define RX_EV_HDR_TYPE_NON_IP_DECODE 3 | |
45317 | + #define RX_EV_DESC_Q_EMPTY_LBN 41 | |
45318 | + #define RX_EV_DESC_Q_EMPTY_WIDTH 1 | |
45319 | + #define RX_EV_MCAST_HASH_MATCH_LBN 40 | |
45320 | + #define RX_EV_MCAST_HASH_MATCH_WIDTH 1 | |
45321 | + #define RX_EV_MCAST_PKT_LBN 39 | |
45322 | + #define RX_EV_MCAST_PKT_WIDTH 1 | |
45323 | + #define RX_EV_Q_LABEL_LBN 32 | |
45324 | + #define RX_EV_Q_LABEL_WIDTH 5 | |
45325 | + #define RX_JUMBO_CONT_LBN 31 | |
45326 | + #define RX_JUMBO_CONT_WIDTH 1 | |
45327 | + #define RX_SOP_LBN 15 | |
45328 | + #define RX_SOP_WIDTH 1 | |
45329 | + #define RX_PORT_LBN 30 | |
45330 | + #define RX_PORT_WIDTH 1 | |
45331 | + #define RX_EV_BYTE_CNT_LBN 16 | |
45332 | + #define RX_EV_BYTE_CNT_WIDTH 14 | |
45333 | + #define RX_iSCSI_PKT_OK_LBN 14 | |
45334 | + #define RX_iSCSI_PKT_OK_WIDTH 1 | |
45335 | + #define RX_ISCSI_DDIG_ERR_LBN 13 | |
45336 | + #define RX_ISCSI_DDIG_ERR_WIDTH 1 | |
45337 | + #define RX_ISCSI_HDIG_ERR_LBN 12 | |
45338 | + #define RX_ISCSI_HDIG_ERR_WIDTH 1 | |
45339 | + #define RX_EV_DESC_PTR_LBN 0 | |
45340 | + #define RX_EV_DESC_PTR_WIDTH 12 | |
45341 | +//////////////---- Transmit IP events for both Kernel & User event queues ----////////////// | |
45342 | + #define TX_EV_PKT_ERR_LBN 38 | |
45343 | + #define TX_EV_PKT_ERR_WIDTH 1 | |
45344 | + #define TX_EV_PKT_TOO_BIG_LBN 37 | |
45345 | + #define TX_EV_PKT_TOO_BIG_WIDTH 1 | |
45346 | + #define TX_EV_Q_LABEL_LBN 32 | |
45347 | + #define TX_EV_Q_LABEL_WIDTH 5 | |
45348 | + #define TX_EV_PORT_LBN 16 | |
45349 | + #define TX_EV_PORT_WIDTH 1 | |
45350 | + #define TX_EV_WQ_FF_FULL_LBN 15 | |
45351 | + #define TX_EV_WQ_FF_FULL_WIDTH 1 | |
45352 | + #define TX_EV_BUF_OWNER_ID_ERR_LBN 14 | |
45353 | + #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1 | |
45354 | + #define TX_EV_COMP_LBN 12 | |
45355 | + #define TX_EV_COMP_WIDTH 1 | |
45356 | + #define TX_EV_DESC_PTR_LBN 0 | |
45357 | + #define TX_EV_DESC_PTR_WIDTH 12 | |
45358 | +//////////////---- Char or Kernel driver events ----////////////// | |
45359 | + #define DRIVER_EV_SUB_CODE_LBN 56 | |
45360 | + #define DRIVER_EV_SUB_CODE_WIDTH 4 | |
45361 | + #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0 | |
45362 | + #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1 | |
45363 | + #define EVQ_INIT_DONE_EV_DECODE 0x2 | |
45364 | + #define EVQ_NOT_EN_EV_DECODE 0x3 | |
45365 | + #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4 | |
45366 | + #define SRM_UPD_DONE_EV_DECODE 0x5 | |
45367 | + #define WAKE_UP_EV_DECODE 0x6 | |
45368 | + #define TX_PKT_NON_TCP_UDP_DECODE 0x9 | |
45369 | + #define TIMER_EV_DECODE 0xA | |
45370 | + #define RX_DSC_ERROR_EV_DECODE 0xE | |
45371 | + #define DRIVER_EV_TX_DESCQ_ID_LBN 0 | |
45372 | + #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12 | |
45373 | + #define DRIVER_EV_RX_DESCQ_ID_LBN 0 | |
45374 | + #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12 | |
45375 | + #define DRIVER_EV_EVQ_ID_LBN 0 | |
45376 | + #define DRIVER_EV_EVQ_ID_WIDTH 12 | |
45377 | + #define DRIVER_TMR_ID_LBN 0 | |
45378 | + #define DRIVER_TMR_ID_WIDTH 12 | |
45379 | + #define DRIVER_EV_SRM_UPD_LBN 0 | |
45380 | + #define DRIVER_EV_SRM_UPD_WIDTH 2 | |
45381 | + #define SRM_CLR_EV_DECODE 0 | |
45382 | + #define SRM_UPD_EV_DECODE 1 | |
45383 | + #define SRM_ILLCLR_EV_DECODE 2 | |
45384 | +//////////////---- Global events. Sent to both event queue 0 and 4. ----////////////// | |
45385 | + #define XFP_PHY_INTR_LBN 10 | |
45386 | + #define XFP_PHY_INTR_WIDTH 1 | |
45387 | + #define XG_PHY_INTR_LBN 9 | |
45388 | + #define XG_PHY_INTR_WIDTH 1 | |
45389 | + #define G_PHY1_INTR_LBN 8 | |
45390 | + #define G_PHY1_INTR_WIDTH 1 | |
45391 | + #define G_PHY0_INTR_LBN 7 | |
45392 | + #define G_PHY0_INTR_WIDTH 1 | |
45393 | +//////////////---- Driver generated events ----////////////// | |
45394 | + #define DRV_GEN_EV_CODE_LBN 60 | |
45395 | + #define DRV_GEN_EV_CODE_WIDTH 4 | |
45396 | + #define DRV_GEN_EV_DATA_LBN 0 | |
45397 | + #define DRV_GEN_EV_DATA_WIDTH 60 | |
45398 | Index: head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_internal.h | |
45399 | =================================================================== | |
45400 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
45401 | +++ head-2008-11-25/drivers/xen/sfc_netfront/ef_vi_internal.h 2008-02-20 09:32:49.000000000 +0100 | |
45402 | @@ -0,0 +1,256 @@ | |
45403 | +/**************************************************************************** | |
45404 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
45405 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
45406 | + * 9501 Jeronimo Road, Suite 250, | |
45407 | + * Irvine, CA 92618, USA | |
45408 | + * | |
45409 | + * Maintained by Solarflare Communications | |
45410 | + * <linux-xen-drivers@solarflare.com> | |
45411 | + * <onload-dev@solarflare.com> | |
45412 | + * | |
45413 | + * This program is free software; you can redistribute it and/or modify it | |
45414 | + * under the terms of the GNU General Public License version 2 as published | |
45415 | + * by the Free Software Foundation, incorporated herein by reference. | |
45416 | + * | |
45417 | + * This program is distributed in the hope that it will be useful, | |
45418 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
45419 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
45420 | + * GNU General Public License for more details. | |
45421 | + * | |
45422 | + * You should have received a copy of the GNU General Public License | |
45423 | + * along with this program; if not, write to the Free Software | |
45424 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
45425 | + **************************************************************************** | |
45426 | + */ | |
45427 | + | |
45428 | +/* | |
45429 | + * \author djr | |
45430 | + * \brief Really-and-truely-honestly internal stuff for libef. | |
45431 | + * \date 2004/06/13 | |
45432 | + */ | |
45433 | + | |
45434 | +/*! \cidoxg_include_ci_ul */ | |
45435 | +#ifndef __CI_EF_VI_INTERNAL_H__ | |
45436 | +#define __CI_EF_VI_INTERNAL_H__ | |
45437 | + | |
45438 | + | |
45439 | +/* These flags share space with enum ef_vi_flags. */ | |
45440 | +#define EF_VI_BUG5692_WORKAROUND 0x10000 | |
45441 | + | |
45442 | + | |
45443 | +/* *********************************************************************** | |
45444 | + * COMPILATION CONTROL FLAGS (see ef_vi.h for "workaround" controls) | |
45445 | + */ | |
45446 | + | |
45447 | +#define EF_VI_DO_MAGIC_CHECKS 1 | |
45448 | + | |
45449 | + | |
45450 | +/********************************************************************** | |
45451 | + * Headers | |
45452 | + */ | |
45453 | + | |
45454 | +#include <etherfabric/ef_vi.h> | |
45455 | +#include "sysdep.h" | |
45456 | +#include "ef_vi_falcon.h" | |
45457 | + | |
45458 | + | |
45459 | +/********************************************************************** | |
45460 | + * Debugging. | |
45461 | + */ | |
45462 | + | |
45463 | +#ifndef NDEBUG | |
45464 | + | |
45465 | +# define _ef_assert(exp, file, line) BUG_ON(!(exp)); | |
45466 | + | |
45467 | +# define _ef_assert2(exp, x, y, file, line) do { \ | |
45468 | + if (unlikely(!(exp))) \ | |
45469 | + BUG(); \ | |
45470 | + } while (0) | |
45471 | + | |
45472 | +#else | |
45473 | + | |
45474 | +# define _ef_assert(exp, file, line) | |
45475 | +# define _ef_assert2(e, x, y, file, line) | |
45476 | + | |
45477 | +#endif | |
45478 | + | |
45479 | +#define ef_assert(a) do{ _ef_assert((a),__FILE__,__LINE__); } while(0) | |
45480 | +#define ef_assert_equal(a,b) _ef_assert2((a)==(b),(a),(b),__FILE__,__LINE__) | |
45481 | +#define ef_assert_eq ef_assert_equal | |
45482 | +#define ef_assert_lt(a,b) _ef_assert2((a)<(b),(a),(b),__FILE__,__LINE__) | |
45483 | +#define ef_assert_le(a,b) _ef_assert2((a)<=(b),(a),(b),__FILE__,__LINE__) | |
45484 | +#define ef_assert_nequal(a,b) _ef_assert2((a)!=(b),(a),(b),__FILE__,__LINE__) | |
45485 | +#define ef_assert_ne ef_assert_nequal | |
45486 | +#define ef_assert_ge(a,b) _ef_assert2((a)>=(b),(a),(b),__FILE__,__LINE__) | |
45487 | +#define ef_assert_gt(a,b) _ef_assert2((a)>(b),(a),(b),__FILE__,__LINE__) | |
45488 | + | |
45489 | +/********************************************************************** | |
45490 | + * Debug checks. ****************************************************** | |
45491 | + **********************************************************************/ | |
45492 | + | |
45493 | +#ifdef NDEBUG | |
45494 | +# define EF_VI_MAGIC_SET(p, type) | |
45495 | +# define EF_VI_CHECK_VI(p) | |
45496 | +# define EF_VI_CHECK_EVENT_Q(p) | |
45497 | +# define EF_VI_CHECK_IOBUFSET(p) | |
45498 | +# define EF_VI_CHECK_FILTER(p) | |
45499 | +# define EF_VI_CHECK_SHMBUF(p) | |
45500 | +# define EF_VI_CHECK_PT_EP(p) | |
45501 | +#else | |
45502 | +# define EF_VI 0x3 | |
45503 | +# define EF_EPLOCK 0x6 | |
45504 | +# define EF_IOBUFSET 0x9 | |
45505 | +# define EF_FILTER 0xa | |
45506 | +# define EF_SHMBUF 0x11 | |
45507 | + | |
45508 | +# define EF_VI_MAGIC(p, type) \ | |
45509 | + (((unsigned)(type) << 28) | \ | |
45510 | + (((unsigned)(intptr_t)(p)) & 0x0fffffffu)) | |
45511 | + | |
45512 | +# if !EF_VI_DO_MAGIC_CHECKS | |
45513 | +# define EF_VI_MAGIC_SET(p, type) | |
45514 | +# define EF_VI_MAGIC_CHECK(p, type) | |
45515 | +# else | |
45516 | +# define EF_VI_MAGIC_SET(p, type) \ | |
45517 | + do { \ | |
45518 | + (p)->magic = EF_VI_MAGIC((p), (type)); \ | |
45519 | + } while (0) | |
45520 | + | |
45521 | +# define EF_VI_MAGIC_OKAY(p, type) \ | |
45522 | + ((p)->magic == EF_VI_MAGIC((p), (type))) | |
45523 | + | |
45524 | +# define EF_VI_MAGIC_CHECK(p, type) \ | |
45525 | + ef_assert(EF_VI_MAGIC_OKAY((p), (type))) | |
45526 | + | |
45527 | +#endif /* EF_VI_DO_MAGIC_CHECKS */ | |
45528 | + | |
45529 | +# define EF_VI_CHECK_VI(p) \ | |
45530 | + ef_assert(p); \ | |
45531 | + EF_VI_MAGIC_CHECK((p), EF_VI); | |
45532 | + | |
45533 | +# define EF_VI_CHECK_EVENT_Q(p) \ | |
45534 | + ef_assert(p); \ | |
45535 | + EF_VI_MAGIC_CHECK((p), EF_VI); \ | |
45536 | + ef_assert((p)->evq_base); \ | |
45537 | + ef_assert((p)->evq_mask); | |
45538 | + | |
45539 | +# define EF_VI_CHECK_PT_EP(p) \ | |
45540 | + ef_assert(p); \ | |
45541 | + EF_VI_MAGIC_CHECK((p), EF_VI); \ | |
45542 | + ef_assert((p)->ep_state); | |
45543 | + | |
45544 | +# define EF_VI_CHECK_IOBUFSET(p) \ | |
45545 | + ef_assert(p); \ | |
45546 | + EF_VI_MAGIC_CHECK((p), EF_IOBUFSET) | |
45547 | + | |
45548 | +# define EF_VI_CHECK_FILTER(p) \ | |
45549 | + ef_assert(p); \ | |
45550 | + EF_VI_MAGIC_CHECK((p), EF_FILTER); | |
45551 | + | |
45552 | +# define EF_VI_CHECK_SHMBUF(p) \ | |
45553 | + ef_assert(p); \ | |
45554 | + EF_VI_MAGIC_CHECK((p), EF_SHMBUF); | |
45555 | + | |
45556 | +#endif | |
45557 | + | |
45558 | +#ifndef NDEBUG | |
45559 | +# define EF_DRIVER_MAGIC 0x00f00ba4 | |
45560 | +# define EF_ASSERT_THIS_DRIVER_VALID(driver) \ | |
45561 | + do{ ef_assert(driver); \ | |
45562 | + EF_VI_MAGIC_CHECK((driver), EF_DRIVER_MAGIC); \ | |
45563 | + ef_assert((driver)->init); }while(0) | |
45564 | + | |
45565 | +# define EF_ASSERT_DRIVER_VALID() EF_ASSERT_THIS_DRIVER_VALID(&ci_driver) | |
45566 | +#else | |
45567 | +# define EF_ASSERT_THIS_DRIVER_VALID(driver) | |
45568 | +# define EF_ASSERT_DRIVER_VALID() | |
45569 | +#endif | |
45570 | + | |
45571 | + | |
45572 | +/* ************************************* | |
45573 | + * Power of 2 FIFO | |
45574 | + */ | |
45575 | + | |
45576 | +#define EF_VI_FIFO2_M(f, x) ((x) & ((f)->fifo_mask)) | |
45577 | +#define ef_vi_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \ | |
45578 | + (f)->fifo_rd_i <= (f)->fifo_mask && \ | |
45579 | + (f)->fifo_wr_i <= (f)->fifo_mask && \ | |
45580 | + EF_VI_IS_POW2((f)->fifo_mask+1u)) | |
45581 | + | |
45582 | +#define ef_vi_fifo2_init(f, cap) \ | |
45583 | + do{ ef_assert(EF_VI_IS_POW2((cap) + 1)); \ | |
45584 | + (f)->fifo_rd_i = (f)->fifo_wr_i = 0u; \ | |
45585 | + (f)->fifo_mask = (cap); \ | |
45586 | + }while(0) | |
45587 | + | |
45588 | +#define ef_vi_fifo2_is_empty(f) ((f)->fifo_rd_i == (f)->fifo_wr_i) | |
45589 | +#define ef_vi_fifo2_capacity(f) ((f)->fifo_mask) | |
45590 | +#define ef_vi_fifo2_buf_size(f) ((f)->fifo_mask + 1u) | |
45591 | +#define ef_vi_fifo2_end(f) ((f)->fifo + ef_vi_fifo2_buf_size(f)) | |
45592 | +#define ef_vi_fifo2_peek(f) ((f)->fifo[(f)->fifo_rd_i]) | |
45593 | +#define ef_vi_fifo2_poke(f) ((f)->fifo[(f)->fifo_wr_i]) | |
45594 | +#define ef_vi_fifo2_num(f) EF_VI_FIFO2_M((f),(f)->fifo_wr_i-(f)->fifo_rd_i) | |
45595 | + | |
45596 | +#define ef_vi_fifo2_wr_prev(f) \ | |
45597 | + do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i - 1u); }while(0) | |
45598 | +#define ef_vi_fifo2_wr_next(f) \ | |
45599 | + do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i + 1u); }while(0) | |
45600 | +#define ef_vi_fifo2_rd_adv(f, n) \ | |
45601 | + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + (n)); }while(0) | |
45602 | +#define ef_vi_fifo2_rd_prev(f) \ | |
45603 | + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i - 1u); }while(0) | |
45604 | +#define ef_vi_fifo2_rd_next(f) \ | |
45605 | + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + 1u); }while(0) | |
45606 | + | |
45607 | +#define ef_vi_fifo2_put(f, v) \ | |
45608 | + do{ ef_vi_fifo2_poke(f) = (v); ef_vi_fifo2_wr_next(f); }while(0) | |
45609 | +#define ef_vi_fifo2_get(f, pv) \ | |
45610 | + do{ *(pv) = ef_vi_fifo2_peek(f); ef_vi_fifo2_rd_next(f); }while(0) | |
45611 | + | |
45612 | + | |
45613 | +/* ********************************************************************* | |
45614 | + * Eventq handling | |
45615 | + */ | |
45616 | + | |
45617 | +typedef union { | |
45618 | + uint64_t u64; | |
45619 | + struct { | |
45620 | + uint32_t a; | |
45621 | + uint32_t b; | |
45622 | + } opaque; | |
45623 | +} ef_vi_event; | |
45624 | + | |
45625 | + | |
45626 | +#define EF_VI_EVENT_OFFSET(q, i) \ | |
45627 | + (((q)->evq_state->evq_ptr - (i) * sizeof(ef_vi_event)) & (q)->evq_mask) | |
45628 | + | |
45629 | +#define EF_VI_EVENT_PTR(q, i) \ | |
45630 | + ((ef_vi_event*) ((q)->evq_base + EF_VI_EVENT_OFFSET((q), (i)))) | |
45631 | + | |
45632 | +/* ********************************************************************* | |
45633 | + * Miscellaneous goodies | |
45634 | + */ | |
45635 | +#ifdef NDEBUG | |
45636 | +# define EF_VI_DEBUG(x) | |
45637 | +#else | |
45638 | +# define EF_VI_DEBUG(x) x | |
45639 | +#endif | |
45640 | + | |
45641 | +#define EF_VI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u)) | |
45642 | +#define EF_VI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u)) | |
45643 | +#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align)-1u)) | |
45644 | +#define EF_VI_PTR_ALIGN_BACK(p, align) \ | |
45645 | + ((char*)EF_VI_ALIGN_BACK(((intptr_t)(p)), ((intptr_t)(align)))) | |
45646 | +#define EF_VI_IS_POW2(x) ((x) && ! ((x) & ((x) - 1))) | |
45647 | + | |
45648 | + | |
45649 | +/* ******************************************************************** | |
45650 | + */ | |
45651 | + | |
45652 | +extern void falcon_vi_init(ef_vi*, void* vvis ) EF_VI_HF; | |
45653 | +extern void ef_eventq_state_init(ef_vi* evq) EF_VI_HF; | |
45654 | +extern void __ef_init(void) EF_VI_HF; | |
45655 | + | |
45656 | + | |
45657 | +#endif /* __CI_EF_VI_INTERNAL_H__ */ | |
45658 | + | |
45659 | Index: head-2008-11-25/drivers/xen/sfc_netfront/etherfabric/ef_vi.h | |
45660 | =================================================================== | |
45661 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
45662 | +++ head-2008-11-25/drivers/xen/sfc_netfront/etherfabric/ef_vi.h 2008-02-20 09:32:49.000000000 +0100 | |
45663 | @@ -0,0 +1,665 @@ | |
45664 | +/**************************************************************************** | |
45665 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
45666 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
45667 | + * 9501 Jeronimo Road, Suite 250, | |
45668 | + * Irvine, CA 92618, USA | |
45669 | + * | |
45670 | + * Maintained by Solarflare Communications | |
45671 | + * <linux-xen-drivers@solarflare.com> | |
45672 | + * <onload-dev@solarflare.com> | |
45673 | + * | |
45674 | + * This program is free software; you can redistribute it and/or modify it | |
45675 | + * under the terms of the GNU General Public License version 2 as published | |
45676 | + * by the Free Software Foundation, incorporated herein by reference. | |
45677 | + * | |
45678 | + * This program is distributed in the hope that it will be useful, | |
45679 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
45680 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
45681 | + * GNU General Public License for more details. | |
45682 | + * | |
45683 | + * You should have received a copy of the GNU General Public License | |
45684 | + * along with this program; if not, write to the Free Software | |
45685 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
45686 | + **************************************************************************** | |
45687 | + */ | |
45688 | + | |
45689 | +/* | |
45690 | + * \brief Virtual Interface | |
45691 | + * \date 2007/05/16 | |
45692 | + */ | |
45693 | + | |
45694 | +#ifndef __EFAB_EF_VI_H__ | |
45695 | +#define __EFAB_EF_VI_H__ | |
45696 | + | |
45697 | + | |
45698 | +/********************************************************************** | |
45699 | + * Primitive types **************************************************** | |
45700 | + **********************************************************************/ | |
45701 | + | |
45702 | +/* We standardise on the types from stdint.h and synthesise these types | |
45703 | + * for compilers/platforms that don't provide them */ | |
45704 | + | |
45705 | +# include <linux/types.h> | |
45706 | +# define EF_VI_ALIGN(x) __attribute__ ((aligned (x))) | |
45707 | +# define ef_vi_inline static inline | |
45708 | + | |
45709 | + | |
45710 | + | |
45711 | +/********************************************************************** | |
45712 | + * Types ************************************************************** | |
45713 | + **********************************************************************/ | |
45714 | + | |
45715 | +typedef uint32_t ef_eventq_ptr; | |
45716 | + | |
45717 | +typedef uint64_t ef_addr; | |
45718 | +typedef char* ef_vi_ioaddr_t; | |
45719 | + | |
45720 | +/********************************************************************** | |
45721 | + * ef_event *********************************************************** | |
45722 | + **********************************************************************/ | |
45723 | + | |
45724 | +/*! \i_ef_vi A DMA request identifier. | |
45725 | +** | |
45726 | +** This is an integer token specified by the transport and associated | |
45727 | +** with a DMA request. It is returned to the VI user with DMA completion | |
45728 | +** events. It is typically used to identify the buffer associated with | |
45729 | +** the transfer. | |
45730 | +*/ | |
45731 | +typedef int ef_request_id; | |
45732 | + | |
45733 | +typedef union { | |
45734 | + uint64_t u64[1]; | |
45735 | + uint32_t u32[2]; | |
45736 | +} ef_vi_qword; | |
45737 | + | |
45738 | +typedef ef_vi_qword ef_hw_event; | |
45739 | + | |
45740 | +#define EF_REQUEST_ID_BITS 16u | |
45741 | +#define EF_REQUEST_ID_MASK ((1u << EF_REQUEST_ID_BITS) - 1u) | |
45742 | + | |
45743 | +/*! \i_ef_event An [ef_event] is a token that identifies something that | |
45744 | +** has happened. Examples include packets received, packets transmitted | |
45745 | +** and errors. | |
45746 | +*/ | |
45747 | +typedef union { | |
45748 | + struct { | |
45749 | + ef_hw_event ev; | |
45750 | + unsigned type :16; | |
45751 | + } generic; | |
45752 | + struct { | |
45753 | + ef_hw_event ev; | |
45754 | + unsigned type :16; | |
45755 | + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ | |
45756 | + unsigned q_id :16; | |
45757 | + unsigned len :16; | |
45758 | + unsigned flags :16; | |
45759 | + } rx; | |
45760 | + struct { /* This *must* have same layout as [rx]. */ | |
45761 | + ef_hw_event ev; | |
45762 | + unsigned type :16; | |
45763 | + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ | |
45764 | + unsigned q_id :16; | |
45765 | + unsigned len :16; | |
45766 | + unsigned flags :16; | |
45767 | + unsigned subtype :16; | |
45768 | + } rx_discard; | |
45769 | + struct { | |
45770 | + ef_hw_event ev; | |
45771 | + unsigned type :16; | |
45772 | + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ | |
45773 | + unsigned q_id :16; | |
45774 | + } tx; | |
45775 | + struct { | |
45776 | + ef_hw_event ev; | |
45777 | + unsigned type :16; | |
45778 | + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ | |
45779 | + unsigned q_id :16; | |
45780 | + unsigned subtype :16; | |
45781 | + } tx_error; | |
45782 | + struct { | |
45783 | + ef_hw_event ev; | |
45784 | + unsigned type :16; | |
45785 | + unsigned q_id :16; | |
45786 | + } rx_no_desc_trunc; | |
45787 | + struct { | |
45788 | + ef_hw_event ev; | |
45789 | + unsigned type :16; | |
45790 | + unsigned data; | |
45791 | + } sw; | |
45792 | +} ef_event; | |
45793 | + | |
45794 | + | |
45795 | +#define EF_EVENT_TYPE(e) ((e).generic.type) | |
45796 | +enum { | |
45797 | + /** Good data was received. */ | |
45798 | + EF_EVENT_TYPE_RX, | |
45799 | + /** Packets have been sent. */ | |
45800 | + EF_EVENT_TYPE_TX, | |
45801 | + /** Data received and buffer consumed, but something is wrong. */ | |
45802 | + EF_EVENT_TYPE_RX_DISCARD, | |
45803 | + /** Transmit of packet failed. */ | |
45804 | + EF_EVENT_TYPE_TX_ERROR, | |
45805 | + /** Received packet was truncated due to lack of descriptors. */ | |
45806 | + EF_EVENT_TYPE_RX_NO_DESC_TRUNC, | |
45807 | + /** Software generated event. */ | |
45808 | + EF_EVENT_TYPE_SW, | |
45809 | + /** Event queue overflow. */ | |
45810 | + EF_EVENT_TYPE_OFLOW, | |
45811 | +}; | |
45812 | + | |
45813 | +#define EF_EVENT_RX_BYTES(e) ((e).rx.len) | |
45814 | +#define EF_EVENT_RX_Q_ID(e) ((e).rx.q_id) | |
45815 | +#define EF_EVENT_RX_CONT(e) ((e).rx.flags & EF_EVENT_FLAG_CONT) | |
45816 | +#define EF_EVENT_RX_SOP(e) ((e).rx.flags & EF_EVENT_FLAG_SOP) | |
45817 | +#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK) | |
45818 | +#define EF_EVENT_FLAG_SOP 0x1 | |
45819 | +#define EF_EVENT_FLAG_CONT 0x2 | |
45820 | +#define EF_EVENT_FLAG_ISCSI_OK 0x4 | |
45821 | + | |
45822 | +#define EF_EVENT_TX_Q_ID(e) ((e).tx.q_id) | |
45823 | + | |
45824 | +#define EF_EVENT_RX_DISCARD_Q_ID(e) ((e).rx_discard.q_id) | |
45825 | +#define EF_EVENT_RX_DISCARD_LEN(e) ((e).rx_discard.len) | |
45826 | +#define EF_EVENT_RX_DISCARD_TYPE(e) ((e).rx_discard.subtype) | |
45827 | +enum { | |
45828 | + EF_EVENT_RX_DISCARD_CSUM_BAD, | |
45829 | + EF_EVENT_RX_DISCARD_CRC_BAD, | |
45830 | + EF_EVENT_RX_DISCARD_TRUNC, | |
45831 | + EF_EVENT_RX_DISCARD_RIGHTS, | |
45832 | + EF_EVENT_RX_DISCARD_OTHER, | |
45833 | +}; | |
45834 | + | |
45835 | +#define EF_EVENT_TX_ERROR_Q_ID(e) ((e).tx_error.q_id) | |
45836 | +#define EF_EVENT_TX_ERROR_TYPE(e) ((e).tx_error.subtype) | |
45837 | +enum { | |
45838 | + EF_EVENT_TX_ERROR_RIGHTS, | |
45839 | + EF_EVENT_TX_ERROR_OFLOW, | |
45840 | + EF_EVENT_TX_ERROR_2BIG, | |
45841 | + EF_EVENT_TX_ERROR_BUS, | |
45842 | +}; | |
45843 | + | |
45844 | +#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e) ((e).rx_no_desc_trunc.q_id) | |
45845 | + | |
45846 | +#define EF_EVENT_SW_DATA_MASK 0xffff | |
45847 | +#define EF_EVENT_SW_DATA(e) ((e).sw.data) | |
45848 | + | |
45849 | +#define EF_EVENT_FMT "[ev:%x:%08x:%08x]" | |
45850 | +#define EF_EVENT_PRI_ARG(e) (unsigned) (e).generic.type, \ | |
45851 | + (unsigned) (e).generic.ev.u32[1], \ | |
45852 | + (unsigned) (e).generic.ev.u32[0] | |
45853 | + | |
45854 | +#define EF_GET_HW_EV(e) ((e).generic.ev) | |
45855 | +#define EF_GET_HW_EV_PTR(e) (&(e).generic.ev) | |
45856 | +#define EF_GET_HW_EV_U64(e) ((e).generic.ev.u64[0]) | |
45857 | + | |
45858 | + | |
45859 | +/* ***************** */ | |
45860 | + | |
45861 | +/*! Used by netif shared state. Must use types of explicit size. */ | |
45862 | +typedef struct { | |
45863 | + uint16_t rx_last_desc_ptr; /* for RX duplicates */ | |
45864 | + uint8_t bad_sop; /* bad SOP detected */ | |
45865 | + uint8_t frag_num; /* next fragment #, 0=>SOP */ | |
45866 | +} ef_rx_dup_state_t; | |
45867 | + | |
45868 | + | |
45869 | +/* Max number of ports on any SF NIC. */ | |
45870 | +#define EFAB_DMAQS_PER_EVQ_MAX 32 | |
45871 | + | |
45872 | +typedef struct { | |
45873 | + ef_eventq_ptr evq_ptr; | |
45874 | + int32_t trashed; | |
45875 | + ef_rx_dup_state_t rx_dup_state[EFAB_DMAQS_PER_EVQ_MAX]; | |
45876 | +} ef_eventq_state; | |
45877 | + | |
45878 | + | |
45879 | +/*! \i_ef_base [ef_iovec] is similar the standard [struct iovec]. An | |
45880 | +** array of these is used to designate a scatter/gather list of I/O | |
45881 | +** buffers. | |
45882 | +*/ | |
45883 | +typedef struct { | |
45884 | + ef_addr iov_base EF_VI_ALIGN(8); | |
45885 | + unsigned iov_len; | |
45886 | +} ef_iovec; | |
45887 | + | |
45888 | +/* Falcon constants */ | |
45889 | +#define TX_EV_DESC_PTR_LBN 0 | |
45890 | + | |
45891 | +/********************************************************************** | |
45892 | + * ef_iobufset ******************************************************** | |
45893 | + **********************************************************************/ | |
45894 | + | |
45895 | +/*! \i_ef_bufs An [ef_iobufset] is a collection of buffers to be used | |
45896 | +** with the NIC. | |
45897 | +*/ | |
45898 | +typedef struct ef_iobufset { | |
45899 | + unsigned magic; | |
45900 | + unsigned bufs_mmap_bytes; | |
45901 | + unsigned bufs_handle; | |
45902 | + int bufs_ptr_off; | |
45903 | + ef_addr bufs_addr; | |
45904 | + unsigned bufs_size; /* size rounded to pow2 */ | |
45905 | + int bufs_num; | |
45906 | + int faultonaccess; | |
45907 | +} ef_iobufset; | |
45908 | + | |
45909 | + | |
45910 | +/********************************************************************** | |
45911 | + * ef_vi ************************************************************** | |
45912 | + **********************************************************************/ | |
45913 | + | |
45914 | +enum ef_vi_flags { | |
45915 | + EF_VI_RX_SCATTER = 0x1, | |
45916 | + EF_VI_ISCSI_RX_HDIG = 0x2, | |
45917 | + EF_VI_ISCSI_TX_HDIG = 0x4, | |
45918 | + EF_VI_ISCSI_RX_DDIG = 0x8, | |
45919 | + EF_VI_ISCSI_TX_DDIG = 0x10, | |
45920 | + EF_VI_TX_PHYS_ADDR = 0x20, | |
45921 | + EF_VI_RX_PHYS_ADDR = 0x40, | |
45922 | + EF_VI_TX_IP_CSUM_DIS = 0x80, | |
45923 | + EF_VI_TX_TCPUDP_CSUM_DIS= 0x100, | |
45924 | + EF_VI_TX_TCPUDP_ONLY = 0x200, | |
45925 | + /* Flags in range 0xXXXX0000 are for internal use. */ | |
45926 | +}; | |
45927 | + | |
45928 | +typedef struct { | |
45929 | + uint32_t added; | |
45930 | + uint32_t removed; | |
45931 | +} ef_vi_txq_state; | |
45932 | + | |
45933 | +typedef struct { | |
45934 | + uint32_t added; | |
45935 | + uint32_t removed; | |
45936 | +} ef_vi_rxq_state; | |
45937 | + | |
45938 | +typedef struct { | |
45939 | + uint32_t mask; | |
45940 | + void* doorbell; | |
45941 | + void* descriptors; | |
45942 | + uint16_t* ids; | |
45943 | + unsigned misalign_mask; | |
45944 | +} ef_vi_txq; | |
45945 | + | |
45946 | +typedef struct { | |
45947 | + uint32_t mask; | |
45948 | + void* doorbell; | |
45949 | + void* descriptors; | |
45950 | + uint16_t* ids; | |
45951 | +} ef_vi_rxq; | |
45952 | + | |
45953 | +typedef struct { | |
45954 | + ef_eventq_state evq; | |
45955 | + ef_vi_txq_state txq; | |
45956 | + ef_vi_rxq_state rxq; | |
45957 | + /* Followed by request id fifos. */ | |
45958 | +} ef_vi_state; | |
45959 | + | |
45960 | +/*! \i_ef_vi A virtual interface. | |
45961 | +** | |
45962 | +** An [ef_vi] represents a virtual interface on a specific NIC. A | |
45963 | +** virtual interface is a collection of an event queue and two DMA queues | |
45964 | +** used to pass Ethernet frames between the transport implementation and | |
45965 | +** the network. | |
45966 | +*/ | |
45967 | +typedef struct ef_vi { | |
45968 | + unsigned magic; | |
45969 | + | |
45970 | + unsigned vi_resource_id; | |
45971 | + unsigned vi_resource_handle_hack; | |
45972 | + unsigned vi_i; | |
45973 | + | |
45974 | + char* vi_mem_mmap_ptr; | |
45975 | + int vi_mem_mmap_bytes; | |
45976 | + char* vi_io_mmap_ptr; | |
45977 | + int vi_io_mmap_bytes; | |
45978 | + | |
45979 | + ef_eventq_state* evq_state; | |
45980 | + char* evq_base; | |
45981 | + unsigned evq_mask; | |
45982 | + ef_vi_ioaddr_t evq_timer_reg; | |
45983 | + | |
45984 | + ef_vi_txq vi_txq; | |
45985 | + ef_vi_rxq vi_rxq; | |
45986 | + ef_vi_state* ep_state; | |
45987 | + enum ef_vi_flags vi_flags; | |
45988 | +} ef_vi; | |
45989 | + | |
45990 | + | |
45991 | +enum ef_vi_arch { | |
45992 | + EF_VI_ARCH_FALCON, | |
45993 | +}; | |
45994 | + | |
45995 | + | |
45996 | +struct ef_vi_nic_type { | |
45997 | + unsigned char arch; | |
45998 | + char variant; | |
45999 | + unsigned char revision; | |
46000 | +}; | |
46001 | + | |
46002 | + | |
46003 | +/* This structure is opaque to the client & used to pass mapping data | |
46004 | + * from the resource manager to the ef_vi lib. for ef_vi_init(). | |
46005 | + */ | |
46006 | +struct vi_mappings { | |
46007 | + uint32_t signature; | |
46008 | +# define VI_MAPPING_VERSION 0x02 /*Byte: Increment me if struct altered*/ | |
46009 | +# define VI_MAPPING_SIGNATURE (0xBA1150 + VI_MAPPING_VERSION) | |
46010 | + | |
46011 | + struct ef_vi_nic_type nic_type; | |
46012 | + | |
46013 | + int vi_instance; | |
46014 | + | |
46015 | + unsigned evq_bytes; | |
46016 | + char* evq_base; | |
46017 | + ef_vi_ioaddr_t evq_timer_reg; | |
46018 | + | |
46019 | + unsigned rx_queue_capacity; | |
46020 | + ef_vi_ioaddr_t rx_dma_ef1; | |
46021 | + char* rx_dma_falcon; | |
46022 | + ef_vi_ioaddr_t rx_bell; | |
46023 | + | |
46024 | + unsigned tx_queue_capacity; | |
46025 | + ef_vi_ioaddr_t tx_dma_ef1; | |
46026 | + char* tx_dma_falcon; | |
46027 | + ef_vi_ioaddr_t tx_bell; | |
46028 | +}; | |
46029 | +/* This is used by clients to allocate a suitably sized buffer for the | |
46030 | + * resource manager to fill & ef_vi_init() to use. */ | |
46031 | +#define VI_MAPPINGS_SIZE (sizeof(struct vi_mappings)) | |
46032 | + | |
46033 | + | |
46034 | +/********************************************************************** | |
46035 | + * ef_config ********************************************************** | |
46036 | + **********************************************************************/ | |
46037 | + | |
46038 | +struct ef_config_t { | |
46039 | + int log; /* debug logging level */ | |
46040 | +}; | |
46041 | + | |
46042 | +extern struct ef_config_t ef_config; | |
46043 | + | |
46044 | + | |
46045 | +/********************************************************************** | |
46046 | + * ef_vi ************************************************************** | |
46047 | + **********************************************************************/ | |
46048 | + | |
46049 | +/* Initialise [data_area] with information required to initialise an ef_vi. | |
46050 | + * In the following, an unused param should be set to NULL. Note the case | |
46051 | + * marked (*) of [iobuf_mmap] for falcon/driver; for normal driver this | |
46052 | + * must be NULL. | |
46053 | + * | |
46054 | + * \param data_area [in,out] required, must ref at least VI_MAPPINGS_SIZE | |
46055 | + * bytes | |
46056 | + * \param evq_capacity [in] number of events in event queue. Specify 0 for | |
46057 | + * no event queue. | |
46058 | + * \param rxq_capacity [in] number of descriptors in RX DMA queue. Specify | |
46059 | + * 0 for no RX queue. | |
46060 | + * \param txq_capacity [in] number of descriptors in TX DMA queue. Specify | |
46061 | + * 0 for no TX queue. | |
46062 | + * \param mmap_info [in] mem-map info for resource | |
46063 | + * \param io_mmap [in] ef1, required | |
46064 | + * falcon, required | |
46065 | + * \param iobuf_mmap [in] ef1, UL: unused | |
46066 | + * falcon, UL: required | |
46067 | + */ | |
46068 | +extern void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type, | |
46069 | + unsigned rxq_capacity, | |
46070 | + unsigned txq_capacity, int instance, | |
46071 | + void* io_mmap, void* iobuf_mmap_rx, | |
46072 | + void* iobuf_mmap_tx, enum ef_vi_flags); | |
46073 | + | |
46074 | + | |
46075 | +extern void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type, | |
46076 | + int instance, unsigned evq_bytes, | |
46077 | + void* base, void* timer_reg); | |
46078 | + | |
46079 | +ef_vi_inline unsigned ef_vi_resource_id(ef_vi* vi) | |
46080 | +{ | |
46081 | + return vi->vi_resource_id; | |
46082 | +} | |
46083 | + | |
46084 | +ef_vi_inline enum ef_vi_flags ef_vi_flags(ef_vi* vi) | |
46085 | +{ | |
46086 | + return vi->vi_flags; | |
46087 | +} | |
46088 | + | |
46089 | + | |
46090 | +/********************************************************************** | |
46091 | + * Receive interface ************************************************** | |
46092 | + **********************************************************************/ | |
46093 | + | |
46094 | +/*! \i_ef_vi Returns the amount of space in the RX descriptor ring. | |
46095 | +** | |
46096 | +** \return the amount of space in the queue. | |
46097 | +*/ | |
46098 | +ef_vi_inline int ef_vi_receive_space(ef_vi* vi) | |
46099 | +{ | |
46100 | + ef_vi_rxq_state* qs = &vi->ep_state->rxq; | |
46101 | + return vi->vi_rxq.mask - (qs->added - qs->removed); | |
46102 | +} | |
46103 | + | |
46104 | + | |
46105 | +/*! \i_ef_vi Returns the fill level of the RX descriptor ring. | |
46106 | +** | |
46107 | +** \return the fill level of the queue. | |
46108 | +*/ | |
46109 | +ef_vi_inline int ef_vi_receive_fill_level(ef_vi* vi) | |
46110 | +{ | |
46111 | + ef_vi_rxq_state* qs = &vi->ep_state->rxq; | |
46112 | + return qs->added - qs->removed; | |
46113 | +} | |
46114 | + | |
46115 | + | |
46116 | +ef_vi_inline int ef_vi_receive_capacity(ef_vi* vi) | |
46117 | +{ | |
46118 | + return vi->vi_rxq.mask; | |
46119 | +} | |
46120 | + | |
46121 | +/*! \i_ef_vi Complete a receive operation. | |
46122 | +** | |
46123 | +** When a receive completion event is received, it should be passed to | |
46124 | +** this function. The request-id for the buffer that the packet was | |
46125 | +** delivered to is returned. | |
46126 | +** | |
46127 | +** After this function returns, more space may be available in the | |
46128 | +** receive queue. | |
46129 | +*/ | |
46130 | +extern ef_request_id ef_vi_receive_done(const ef_vi*, const ef_event*); | |
46131 | + | |
46132 | +/*! \i_ef_vi Return request ID indicated by a receive event | |
46133 | + */ | |
46134 | +ef_vi_inline ef_request_id ef_vi_receive_request_id(const ef_vi* vi, | |
46135 | + const ef_event* ef_ev) | |
46136 | +{ | |
46137 | + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev); | |
46138 | + return ev->u32[0] & vi->vi_rxq.mask; | |
46139 | +} | |
46140 | + | |
46141 | + | |
46142 | +/*! \i_ef_vi Form a receive descriptor. | |
46143 | +** | |
46144 | +** If \c initial_rx_bytes is zero use a reception size at least as large | |
46145 | +** as an MTU. | |
46146 | +*/ | |
46147 | +extern int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id, | |
46148 | + int intial_rx_bytes); | |
46149 | + | |
46150 | +/*! \i_ef_vi Submit initialised receive descriptors to the NIC. */ | |
46151 | +extern void ef_vi_receive_push(ef_vi* vi); | |
46152 | + | |
46153 | +/*! \i_ef_vi Post a buffer on the receive queue. | |
46154 | +** | |
46155 | +** \return 0 on success, or -EAGAIN if the receive queue is full | |
46156 | +*/ | |
46157 | +extern int ef_vi_receive_post(ef_vi*, ef_addr addr, | |
46158 | + ef_request_id dma_id); | |
46159 | + | |
46160 | +/********************************************************************** | |
46161 | + * Transmit interface ************************************************* | |
46162 | + **********************************************************************/ | |
46163 | + | |
46164 | +/*! \i_ef_vi Return the amount of space (in descriptors) in the transmit | |
46165 | +** queue. | |
46166 | +** | |
46167 | +** \return the amount of space in the queue (in descriptors) | |
46168 | +*/ | |
46169 | +ef_vi_inline int ef_vi_transmit_space(ef_vi* vi) | |
46170 | +{ | |
46171 | + ef_vi_txq_state* qs = &vi->ep_state->txq; | |
46172 | + return vi->vi_txq.mask - (qs->added - qs->removed); | |
46173 | +} | |
46174 | + | |
46175 | + | |
46176 | +/*! \i_ef_vi Returns the fill level of the TX descriptor ring. | |
46177 | +** | |
46178 | +** \return the fill level of the queue. | |
46179 | +*/ | |
46180 | +ef_vi_inline int ef_vi_transmit_fill_level(ef_vi* vi) | |
46181 | +{ | |
46182 | + ef_vi_txq_state* qs = &vi->ep_state->txq; | |
46183 | + return qs->added - qs->removed; | |
46184 | +} | |
46185 | + | |
46186 | + | |
46187 | +/*! \i_ef_vi Returns the total capacity of the TX descriptor ring. | |
46188 | +** | |
46189 | +** \return the capacity of the queue. | |
46190 | +*/ | |
46191 | +ef_vi_inline int ef_vi_transmit_capacity(ef_vi* vi) | |
46192 | +{ | |
46193 | + return vi->vi_txq.mask; | |
46194 | +} | |
46195 | + | |
46196 | + | |
46197 | +/*! \i_ef_vi Transmit a packet. | |
46198 | +** | |
46199 | +** \param bytes must be greater than ETH_ZLEN. | |
46200 | +** \return -EAGAIN if the transmit queue is full, or 0 on success | |
46201 | +*/ | |
46202 | +extern int ef_vi_transmit(ef_vi*, ef_addr, int bytes, ef_request_id dma_id); | |
46203 | + | |
46204 | +/*! \i_ef_vi Transmit a packet using a gather list. | |
46205 | +** | |
46206 | +** \param iov_len must be greater than zero | |
46207 | +** \param iov the first must be non-zero in length (but others need not) | |
46208 | +** | |
46209 | +** \return -EAGAIN if the queue is full, or 0 on success | |
46210 | +*/ | |
46211 | +extern int ef_vi_transmitv(ef_vi*, const ef_iovec* iov, int iov_len, | |
46212 | + ef_request_id dma_id); | |
46213 | + | |
46214 | +/*! \i_ef_vi Initialise a DMA request. | |
46215 | +** | |
46216 | +** \return -EAGAIN if the queue is full, or 0 on success | |
46217 | +*/ | |
46218 | +extern int ef_vi_transmit_init(ef_vi*, ef_addr, int bytes, | |
46219 | + ef_request_id dma_id); | |
46220 | + | |
46221 | +/*! \i_ef_vi Initialise a DMA request. | |
46222 | +** | |
46223 | +** \return -EAGAIN if the queue is full, or 0 on success | |
46224 | +*/ | |
46225 | +extern int ef_vi_transmitv_init(ef_vi*, const ef_iovec*, int iov_len, | |
46226 | + ef_request_id dma_id); | |
46227 | + | |
46228 | +/*! \i_ef_vi Submit DMA requests to the NIC. | |
46229 | +** | |
46230 | +** The DMA requests must have been initialised using | |
46231 | +** ef_vi_transmit_init() or ef_vi_transmitv_init(). | |
46232 | +*/ | |
46233 | +extern void ef_vi_transmit_push(ef_vi*); | |
46234 | + | |
46235 | + | |
46236 | +/*! \i_ef_vi Maximum number of transmit completions per transmit event. */ | |
46237 | +#define EF_VI_TRANSMIT_BATCH 64 | |
46238 | + | |
46239 | +/*! \i_ef_vi Determine the set of [ef_request_id]s for each DMA request | |
46240 | +** which has been completed by a given transmit completion | |
46241 | +** event. | |
46242 | +** | |
46243 | +** \param ids must point to an array of length EF_VI_TRANSMIT_BATCH | |
46244 | +** \return the number of valid [ef_request_id]s (can be zero) | |
46245 | +*/ | |
46246 | +extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event*, | |
46247 | + ef_request_id* ids); | |
46248 | + | |
46249 | + | |
46250 | +/*! \i_ef_event Returns true if ef_eventq_poll() will return event(s). */ | |
46251 | +extern int ef_eventq_has_event(ef_vi* vi); | |
46252 | + | |
46253 | +/*! \i_ef_event Returns true if there are quite a few events in the event | |
46254 | +** queue. | |
46255 | +** | |
46256 | +** This looks ahead in the event queue, so has the property that it will | |
46257 | +** not ping-pong a cache-line when it is called concurrently with events | |
46258 | +** being delivered. | |
46259 | +*/ | |
46260 | +extern int ef_eventq_has_many_events(ef_vi* evq, int look_ahead); | |
46261 | + | |
46262 | +/*! Type of function to handle unknown events arriving on event queue | |
46263 | +** Return CI_TRUE iff the event has been handled. | |
46264 | +*/ | |
46265 | +typedef int/*bool*/ ef_event_handler_fn(void* priv, ef_vi* evq, ef_event* ev); | |
46266 | + | |
46267 | +/*! Standard poll exception routine */ | |
46268 | +extern int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, | |
46269 | + ef_event* ev); | |
46270 | + | |
46271 | +/*! \i_ef_event Retrieve events from the event queue, handle RX/TX events | |
46272 | +** and pass any others to an exception handler function | |
46273 | +** | |
46274 | +** \return The number of events retrieved. | |
46275 | +*/ | |
46276 | +extern int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len, | |
46277 | + ef_event_handler_fn *exception, void *expt_priv); | |
46278 | + | |
46279 | +/*! \i_ef_event Retrieve events from the event queue. | |
46280 | +** | |
46281 | +** \return The number of events retrieved. | |
46282 | +*/ | |
46283 | +ef_vi_inline int ef_eventq_poll(ef_vi* evq, ef_event* evs, int evs_len) | |
46284 | +{ | |
46285 | + return ef_eventq_poll_evs(evq, evs, evs_len, | |
46286 | + &ef_eventq_poll_exception, (void*)0); | |
46287 | +} | |
46288 | + | |
46289 | +/*! \i_ef_event Returns the capacity of an event queue. */ | |
46290 | +ef_vi_inline int ef_eventq_capacity(ef_vi* vi) | |
46291 | +{ | |
46292 | + return (vi->evq_mask + 1u) / sizeof(ef_hw_event); | |
46293 | +} | |
46294 | + | |
46295 | +/* Returns the instance ID of [vi] */ | |
46296 | +ef_vi_inline unsigned ef_vi_instance(ef_vi* vi) | |
46297 | +{ return vi->vi_i; } | |
46298 | + | |
46299 | + | |
46300 | +/********************************************************************** | |
46301 | + * Initialisation ***************************************************** | |
46302 | + **********************************************************************/ | |
46303 | + | |
46304 | +/*! Return size of state buffer of an initialised VI. */ | |
46305 | +extern int ef_vi_state_bytes(ef_vi*); | |
46306 | + | |
46307 | +/*! Return size of buffer needed for VI state given sizes of RX and TX | |
46308 | +** DMA queues. Queue sizes must be legal sizes (power of 2), or 0 (no | |
46309 | +** queue). | |
46310 | +*/ | |
46311 | +extern int ef_vi_calc_state_bytes(int rxq_size, int txq_size); | |
46312 | + | |
46313 | +/*! Initialise [ef_vi] from the provided resources. [vvis] must have been | |
46314 | +** created by ef_make_vi_data() & remains owned by the caller. | |
46315 | +*/ | |
46316 | +extern void ef_vi_init(ef_vi*, void* vi_info, ef_vi_state* state, | |
46317 | + ef_eventq_state* evq_state, enum ef_vi_flags); | |
46318 | + | |
46319 | +extern void ef_vi_state_init(ef_vi*); | |
46320 | +extern void ef_eventq_state_init(ef_vi*); | |
46321 | + | |
46322 | +/*! Convert an efhw device arch to ef_vi_arch, or returns -1 if not | |
46323 | +** recognised. | |
46324 | +*/ | |
46325 | +extern int ef_vi_arch_from_efhw_arch(int efhw_arch); | |
46326 | + | |
46327 | + | |
46328 | +#endif /* __EFAB_EF_VI_H__ */ | |
46329 | Index: head-2008-11-25/drivers/xen/sfc_netfront/falcon_event.c | |
46330 | =================================================================== | |
46331 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
46332 | +++ head-2008-11-25/drivers/xen/sfc_netfront/falcon_event.c 2008-02-20 09:32:49.000000000 +0100 | |
46333 | @@ -0,0 +1,346 @@ | |
46334 | +/**************************************************************************** | |
46335 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
46336 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
46337 | + * 9501 Jeronimo Road, Suite 250, | |
46338 | + * Irvine, CA 92618, USA | |
46339 | + * | |
46340 | + * Maintained by Solarflare Communications | |
46341 | + * <linux-xen-drivers@solarflare.com> | |
46342 | + * <onload-dev@solarflare.com> | |
46343 | + * | |
46344 | + * This program is free software; you can redistribute it and/or modify it | |
46345 | + * under the terms of the GNU General Public License version 2 as published | |
46346 | + * by the Free Software Foundation, incorporated herein by reference. | |
46347 | + * | |
46348 | + * This program is distributed in the hope that it will be useful, | |
46349 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
46350 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
46351 | + * GNU General Public License for more details. | |
46352 | + * | |
46353 | + * You should have received a copy of the GNU General Public License | |
46354 | + * along with this program; if not, write to the Free Software | |
46355 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
46356 | + **************************************************************************** | |
46357 | + */ | |
46358 | + | |
46359 | +/* | |
46360 | + * \author djr | |
46361 | + * \brief Routine to poll event queues. | |
46362 | + * \date 2003/03/04 | |
46363 | + */ | |
46364 | + | |
46365 | +/*! \cidoxg_lib_ef */ | |
46366 | +#include "ef_vi_internal.h" | |
46367 | + | |
46368 | +/* Be worried about this on byteswapped machines */ | |
46369 | +/* Due to crazy chipsets, we see the event words being written in | |
46370 | +** arbitrary order (bug4539). So test for presence of event must ensure | |
46371 | +** that both halves have changed from the null. | |
46372 | +*/ | |
46373 | +# define EF_VI_IS_EVENT(evp) \ | |
46374 | + ( (((evp)->opaque.a != (uint32_t)-1) && \ | |
46375 | + ((evp)->opaque.b != (uint32_t)-1)) ) | |
46376 | + | |
46377 | + | |
46378 | +#ifdef NDEBUG | |
46379 | +# define IS_DEBUG 0 | |
46380 | +#else | |
46381 | +# define IS_DEBUG 1 | |
46382 | +#endif | |
46383 | + | |
46384 | + | |
46385 | +/*! Check for RX events with inconsistent SOP/CONT | |
46386 | +** | |
46387 | +** Returns true if this event should be discarded | |
46388 | +*/ | |
46389 | +ef_vi_inline int ef_eventq_is_rx_sop_cont_bad_efab(ef_vi* vi, | |
46390 | + const ef_vi_qword* ev) | |
46391 | +{ | |
46392 | + ef_rx_dup_state_t* rx_dup_state; | |
46393 | + uint8_t* bad_sop; | |
46394 | + | |
46395 | + unsigned label = QWORD_GET_U(RX_EV_Q_LABEL, *ev); | |
46396 | + unsigned sop = QWORD_TEST_BIT(RX_SOP, *ev); | |
46397 | + | |
46398 | + ef_assert(vi); | |
46399 | + ef_assert_lt(label, EFAB_DMAQS_PER_EVQ_MAX); | |
46400 | + | |
46401 | + rx_dup_state = &vi->evq_state->rx_dup_state[label]; | |
46402 | + bad_sop = &rx_dup_state->bad_sop; | |
46403 | + | |
46404 | + if( ! ((vi->vi_flags & EF_VI_BUG5692_WORKAROUND) || IS_DEBUG) ) { | |
46405 | + *bad_sop = (*bad_sop && !sop); | |
46406 | + } | |
46407 | + else { | |
46408 | + unsigned cont = QWORD_TEST_BIT(RX_JUMBO_CONT, *ev); | |
46409 | + uint8_t *frag_num = &rx_dup_state->frag_num; | |
46410 | + | |
46411 | + /* bad_sop should latch till the next sop */ | |
46412 | + *bad_sop = (*bad_sop && !sop) || ( !!sop != (*frag_num==0) ); | |
46413 | + | |
46414 | + /* we do not check the number of bytes relative to the | |
46415 | + * fragment number and size of the user rx buffer here | |
46416 | + * because we don't know the size of the user rx | |
46417 | + * buffer - we probably should perform this check in | |
46418 | + * the nearest code calling this though. | |
46419 | + */ | |
46420 | + *frag_num = cont ? (*frag_num + 1) : 0; | |
46421 | + } | |
46422 | + | |
46423 | + return *bad_sop; | |
46424 | +} | |
46425 | + | |
46426 | + | |
46427 | +ef_vi_inline int falcon_rx_check_dup(ef_vi* evq, ef_event* ev_out, | |
46428 | + const ef_vi_qword* ev) | |
46429 | +{ | |
46430 | + unsigned q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev); | |
46431 | + unsigned desc_ptr = QWORD_GET_U(RX_EV_DESC_PTR, *ev); | |
46432 | + ef_rx_dup_state_t* rx_dup_state = &evq->evq_state->rx_dup_state[q_id]; | |
46433 | + | |
46434 | + if(likely( desc_ptr != rx_dup_state->rx_last_desc_ptr )) { | |
46435 | + rx_dup_state->rx_last_desc_ptr = desc_ptr; | |
46436 | + return 0; | |
46437 | + } | |
46438 | + | |
46439 | + rx_dup_state->rx_last_desc_ptr = desc_ptr; | |
46440 | + rx_dup_state->bad_sop = 1; | |
46441 | +#ifndef NDEBUG | |
46442 | + rx_dup_state->frag_num = 0; | |
46443 | +#endif | |
46444 | + BUG_ON(!QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev)); | |
46445 | + BUG_ON( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev)); | |
46446 | + BUG_ON(!QWORD_GET_U(RX_EV_BYTE_CNT, *ev) == 0); | |
46447 | + ev_out->rx_no_desc_trunc.type = EF_EVENT_TYPE_RX_NO_DESC_TRUNC; | |
46448 | + ev_out->rx_no_desc_trunc.q_id = q_id; | |
46449 | + return 1; | |
46450 | +} | |
46451 | + | |
46452 | + | |
46453 | +ef_vi_inline void falcon_rx_event(ef_event* ev_out, const ef_vi_qword* ev) | |
46454 | +{ | |
46455 | + if(likely( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev) )) { | |
46456 | + ev_out->rx.type = EF_EVENT_TYPE_RX; | |
46457 | + ev_out->rx.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev); | |
46458 | + ev_out->rx.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev); | |
46459 | + if( QWORD_TEST_BIT(RX_SOP, *ev) ) | |
46460 | + ev_out->rx.flags = EF_EVENT_FLAG_SOP; | |
46461 | + else | |
46462 | + ev_out->rx.flags = 0; | |
46463 | + if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) ) | |
46464 | + ev_out->rx.flags |= EF_EVENT_FLAG_CONT; | |
46465 | + if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) ) | |
46466 | + ev_out->rx.flags |= EF_EVENT_FLAG_ISCSI_OK; | |
46467 | + } | |
46468 | + else { | |
46469 | + ev_out->rx_discard.type = EF_EVENT_TYPE_RX_DISCARD; | |
46470 | + ev_out->rx_discard.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev); | |
46471 | + ev_out->rx_discard.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev); | |
46472 | +#if 1 /* hack for ptloop compatability: ?? TODO purge */ | |
46473 | + if( QWORD_TEST_BIT(RX_SOP, *ev) ) | |
46474 | + ev_out->rx_discard.flags = EF_EVENT_FLAG_SOP; | |
46475 | + else | |
46476 | + ev_out->rx_discard.flags = 0; | |
46477 | + if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) ) | |
46478 | + ev_out->rx_discard.flags |= EF_EVENT_FLAG_CONT; | |
46479 | + if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) ) | |
46480 | + ev_out->rx_discard.flags |= EF_EVENT_FLAG_ISCSI_OK; | |
46481 | +#endif | |
46482 | + /* Order matters here: more fundamental errors first. */ | |
46483 | + if( QWORD_TEST_BIT(RX_EV_BUF_OWNER_ID_ERR, *ev) ) | |
46484 | + ev_out->rx_discard.subtype = | |
46485 | + EF_EVENT_RX_DISCARD_RIGHTS; | |
46486 | + else if( QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev) ) | |
46487 | + ev_out->rx_discard.subtype = | |
46488 | + EF_EVENT_RX_DISCARD_TRUNC; | |
46489 | + else if( QWORD_TEST_BIT(RX_EV_ETH_CRC_ERR, *ev) ) | |
46490 | + ev_out->rx_discard.subtype = | |
46491 | + EF_EVENT_RX_DISCARD_CRC_BAD; | |
46492 | + else if( QWORD_TEST_BIT(RX_EV_IP_HDR_CHKSUM_ERR, *ev) ) | |
46493 | + ev_out->rx_discard.subtype = | |
46494 | + EF_EVENT_RX_DISCARD_CSUM_BAD; | |
46495 | + else if( QWORD_TEST_BIT(RX_EV_TCP_UDP_CHKSUM_ERR, *ev) ) | |
46496 | + ev_out->rx_discard.subtype = | |
46497 | + EF_EVENT_RX_DISCARD_CSUM_BAD; | |
46498 | + else | |
46499 | + ev_out->rx_discard.subtype = | |
46500 | + EF_EVENT_RX_DISCARD_OTHER; | |
46501 | + } | |
46502 | +} | |
46503 | + | |
46504 | + | |
46505 | +ef_vi_inline void falcon_tx_event(ef_event* ev_out, const ef_vi_qword* ev) | |
46506 | +{ | |
46507 | + /* Danger danger! No matter what we ask for wrt batching, we | |
46508 | + ** will get a batched event every 16 descriptors, and we also | |
46509 | + ** get dma-queue-empty events. i.e. Duplicates are expected. | |
46510 | + ** | |
46511 | + ** In addition, if it's been requested in the descriptor, we | |
46512 | + ** get an event per descriptor. (We don't currently request | |
46513 | + ** this). | |
46514 | + */ | |
46515 | + if(likely( QWORD_TEST_BIT(TX_EV_COMP, *ev) )) { | |
46516 | + ev_out->tx.type = EF_EVENT_TYPE_TX; | |
46517 | + ev_out->tx.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev); | |
46518 | + } | |
46519 | + else { | |
46520 | + ev_out->tx_error.type = EF_EVENT_TYPE_TX_ERROR; | |
46521 | + ev_out->tx_error.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev); | |
46522 | + if(likely( QWORD_TEST_BIT(TX_EV_BUF_OWNER_ID_ERR, *ev) )) | |
46523 | + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_RIGHTS; | |
46524 | + else if(likely( QWORD_TEST_BIT(TX_EV_WQ_FF_FULL, *ev) )) | |
46525 | + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_OFLOW; | |
46526 | + else if(likely( QWORD_TEST_BIT(TX_EV_PKT_TOO_BIG, *ev) )) | |
46527 | + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_2BIG; | |
46528 | + else if(likely( QWORD_TEST_BIT(TX_EV_PKT_ERR, *ev) )) | |
46529 | + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_BUS; | |
46530 | + } | |
46531 | +} | |
46532 | + | |
46533 | + | |
46534 | +static void mark_bad(ef_event* ev) | |
46535 | +{ | |
46536 | + ev->generic.ev.u64[0] &=~ ((uint64_t) 1u << RX_EV_PKT_OK_LBN); | |
46537 | +} | |
46538 | + | |
46539 | + | |
46540 | +int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len, | |
46541 | + ef_event_handler_fn *exception, void *expt_priv) | |
46542 | +{ | |
46543 | + int evs_len_orig = evs_len; | |
46544 | + | |
46545 | + EF_VI_CHECK_EVENT_Q(evq); | |
46546 | + ef_assert(evs); | |
46547 | + ef_assert_gt(evs_len, 0); | |
46548 | + | |
46549 | + if(unlikely( EF_VI_IS_EVENT(EF_VI_EVENT_PTR(evq, 1)) )) | |
46550 | + goto overflow; | |
46551 | + | |
46552 | + do { | |
46553 | + { /* Read the event out of the ring, then fiddle with | |
46554 | + * copied version. Reason is that the ring is | |
46555 | + * likely to get pushed out of cache by another | |
46556 | + * event being delivered by hardware. */ | |
46557 | + ef_vi_event* ev = EF_VI_EVENT_PTR(evq, 0); | |
46558 | + if( ! EF_VI_IS_EVENT(ev) ) | |
46559 | + break; | |
46560 | + evs->generic.ev.u64[0] = cpu_to_le64 (ev->u64); | |
46561 | + evq->evq_state->evq_ptr += sizeof(ef_vi_event); | |
46562 | + ev->u64 = (uint64_t)(int64_t) -1; | |
46563 | + } | |
46564 | + | |
46565 | + /* Ugly: Exploit the fact that event code lies in top | |
46566 | + * bits of event. */ | |
46567 | + ef_assert_ge(EV_CODE_LBN, 32u); | |
46568 | + switch( evs->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) { | |
46569 | + case RX_IP_EV_DECODE: | |
46570 | + /* Look for duplicate desc_ptr: it signals | |
46571 | + * that a jumbo frame was truncated because we | |
46572 | + * ran out of descriptors. */ | |
46573 | + if(unlikely( falcon_rx_check_dup | |
46574 | + (evq, evs, &evs->generic.ev) )) { | |
46575 | + --evs_len; | |
46576 | + ++evs; | |
46577 | + break; | |
46578 | + } | |
46579 | + else { | |
46580 | + /* Cope with FalconA1 bugs where RX | |
46581 | + * gives inconsistent RX events Mark | |
46582 | + * events as bad until SOP becomes | |
46583 | + * consistent again | |
46584 | + * ef_eventq_is_rx_sop_cont_bad() has | |
46585 | + * side effects - order is important | |
46586 | + */ | |
46587 | + if(unlikely | |
46588 | + (ef_eventq_is_rx_sop_cont_bad_efab | |
46589 | + (evq, &evs->generic.ev) )) { | |
46590 | + mark_bad(evs); | |
46591 | + } | |
46592 | + } | |
46593 | + falcon_rx_event(evs, &evs->generic.ev); | |
46594 | + --evs_len; | |
46595 | + ++evs; | |
46596 | + break; | |
46597 | + | |
46598 | + case TX_IP_EV_DECODE: | |
46599 | + falcon_tx_event(evs, &evs->generic.ev); | |
46600 | + --evs_len; | |
46601 | + ++evs; | |
46602 | + break; | |
46603 | + | |
46604 | + default: | |
46605 | + break; | |
46606 | + } | |
46607 | + } while( evs_len ); | |
46608 | + | |
46609 | + return evs_len_orig - evs_len; | |
46610 | + | |
46611 | + | |
46612 | + overflow: | |
46613 | + evs->generic.type = EF_EVENT_TYPE_OFLOW; | |
46614 | + evs->generic.ev.u64[0] = (uint64_t)((int64_t)-1); | |
46615 | + return 1; | |
46616 | +} | |
46617 | + | |
46618 | + | |
46619 | +int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, ef_event* ev) | |
46620 | +{ | |
46621 | + int /*bool*/ handled = 0; | |
46622 | + | |
46623 | + switch( ev->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) { | |
46624 | + case DRIVER_EV_DECODE: | |
46625 | + if( QWORD_GET_U(DRIVER_EV_SUB_CODE, ev->generic.ev) == | |
46626 | + EVQ_INIT_DONE_EV_DECODE ) | |
46627 | + /* EVQ initialised event: ignore. */ | |
46628 | + handled = 1; | |
46629 | + break; | |
46630 | + } | |
46631 | + return handled; | |
46632 | +} | |
46633 | + | |
46634 | + | |
46635 | +void ef_eventq_iterate(ef_vi* vi, | |
46636 | + void (*fn)(void* arg, ef_vi*, int rel_pos, | |
46637 | + int abs_pos, void* event), | |
46638 | + void* arg, int stop_at_end) | |
46639 | +{ | |
46640 | + int i, size_evs = (vi->evq_mask + 1) / sizeof(ef_vi_event); | |
46641 | + | |
46642 | + for( i = 0; i < size_evs; ++i ) { | |
46643 | + ef_vi_event* e = EF_VI_EVENT_PTR(vi, -i); | |
46644 | + if( EF_VI_IS_EVENT(e) ) | |
46645 | + fn(arg, vi, i, | |
46646 | + EF_VI_EVENT_OFFSET(vi, -i) / sizeof(ef_vi_event), | |
46647 | + e); | |
46648 | + else if( stop_at_end ) | |
46649 | + break; | |
46650 | + } | |
46651 | +} | |
46652 | + | |
46653 | + | |
46654 | +int ef_eventq_has_event(ef_vi* vi) | |
46655 | +{ | |
46656 | + return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, 0)); | |
46657 | +} | |
46658 | + | |
46659 | + | |
46660 | +int ef_eventq_has_many_events(ef_vi* vi, int look_ahead) | |
46661 | +{ | |
46662 | + ef_assert_ge(look_ahead, 0); | |
46663 | + return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, -look_ahead)); | |
46664 | +} | |
46665 | + | |
46666 | + | |
46667 | +int ef_eventq_has_rx_event(ef_vi* vi) | |
46668 | +{ | |
46669 | + ef_vi_event* ev; | |
46670 | + int i, n_evs = 0; | |
46671 | + | |
46672 | + for( i = 0; EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, i)); --i ) { | |
46673 | + ev = EF_VI_EVENT_PTR(vi, i); | |
46674 | + if( EFVI_FALCON_EVENT_CODE(ev) == EF_EVENT_TYPE_RX ) n_evs++; | |
46675 | + } | |
46676 | + return n_evs; | |
46677 | +} | |
46678 | + | |
46679 | +/*! \cidoxg_end */ | |
46680 | Index: head-2008-11-25/drivers/xen/sfc_netfront/falcon_vi.c | |
46681 | =================================================================== | |
46682 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
46683 | +++ head-2008-11-25/drivers/xen/sfc_netfront/falcon_vi.c 2008-02-20 09:32:49.000000000 +0100 | |
46684 | @@ -0,0 +1,465 @@ | |
46685 | +/**************************************************************************** | |
46686 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
46687 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
46688 | + * 9501 Jeronimo Road, Suite 250, | |
46689 | + * Irvine, CA 92618, USA | |
46690 | + * | |
46691 | + * Maintained by Solarflare Communications | |
46692 | + * <linux-xen-drivers@solarflare.com> | |
46693 | + * <onload-dev@solarflare.com> | |
46694 | + * | |
46695 | + * This program is free software; you can redistribute it and/or modify it | |
46696 | + * under the terms of the GNU General Public License version 2 as published | |
46697 | + * by the Free Software Foundation, incorporated herein by reference. | |
46698 | + * | |
46699 | + * This program is distributed in the hope that it will be useful, | |
46700 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
46701 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
46702 | + * GNU General Public License for more details. | |
46703 | + * | |
46704 | + * You should have received a copy of the GNU General Public License | |
46705 | + * along with this program; if not, write to the Free Software | |
46706 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
46707 | + **************************************************************************** | |
46708 | + */ | |
46709 | + | |
46710 | +/* | |
46711 | + * \author djr, stg | |
46712 | + * \brief Falcon-specific VI | |
46713 | + * \date 2006/11/30 | |
46714 | + */ | |
46715 | + | |
46716 | +#include "ef_vi_internal.h" | |
46717 | + | |
46718 | + | |
46719 | +#define EFVI_FALCON_DMA_TX_FRAG 1 | |
46720 | + | |
46721 | + | |
46722 | +/* TX descriptor for both physical and virtual packet transfers */ | |
46723 | +typedef union { | |
46724 | + uint32_t dword[2]; | |
46725 | +} ef_vi_falcon_dma_tx_buf_desc; | |
46726 | +typedef ef_vi_falcon_dma_tx_buf_desc ef_vi_falcon_dma_tx_phys_desc; | |
46727 | + | |
46728 | + | |
46729 | +/* RX descriptor for physical addressed transfers */ | |
46730 | +typedef union { | |
46731 | + uint32_t dword[2]; | |
46732 | +} ef_vi_falcon_dma_rx_phys_desc; | |
46733 | + | |
46734 | + | |
46735 | +/* RX descriptor for virtual packet transfers */ | |
46736 | +typedef struct { | |
46737 | + uint32_t dword[1]; | |
46738 | +} ef_vi_falcon_dma_rx_buf_desc; | |
46739 | + | |
46740 | +/* Buffer table index */ | |
46741 | +typedef uint32_t ef_vi_buffer_addr_t; | |
46742 | + | |
46743 | +ef_vi_inline int64_t dma_addr_to_u46(int64_t src_dma_addr) | |
46744 | +{ | |
46745 | + return (src_dma_addr & __FALCON_MASK(46, int64_t)); | |
46746 | +} | |
46747 | + | |
46748 | +/*! Setup a physical address based descriptor with a specified length */ | |
46749 | +ef_vi_inline void | |
46750 | +__falcon_dma_rx_calc_ip_phys(ef_vi_dma_addr_t dest_pa, | |
46751 | + ef_vi_falcon_dma_rx_phys_desc *desc, | |
46752 | + int bytes) | |
46753 | +{ | |
46754 | + int region = 0; /* TODO fixme */ | |
46755 | + int64_t dest = dma_addr_to_u46(dest_pa); /* lower 46 bits */ | |
46756 | + | |
46757 | + DWCHCK(__DW2(RX_KER_BUF_SIZE_LBN), RX_KER_BUF_SIZE_WIDTH); | |
46758 | + DWCHCK(__DW2(RX_KER_BUF_REGION_LBN),RX_KER_BUF_REGION_WIDTH); | |
46759 | + | |
46760 | + LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH); | |
46761 | + | |
46762 | + RANGECHCK(bytes, RX_KER_BUF_SIZE_WIDTH); | |
46763 | + RANGECHCK(region, RX_KER_BUF_REGION_WIDTH); | |
46764 | + | |
46765 | + ef_assert(desc); | |
46766 | + | |
46767 | + desc->dword[1] = ((bytes << __DW2(RX_KER_BUF_SIZE_LBN)) | | |
46768 | + (region << __DW2(RX_KER_BUF_REGION_LBN)) | | |
46769 | + (HIGH(dest, | |
46770 | + RX_KER_BUF_ADR_LBN, | |
46771 | + RX_KER_BUF_ADR_WIDTH))); | |
46772 | + | |
46773 | + desc->dword[0] = LOW(dest, | |
46774 | + RX_KER_BUF_ADR_LBN, | |
46775 | + RX_KER_BUF_ADR_WIDTH); | |
46776 | +} | |
46777 | + | |
46778 | +/*! Setup a virtual buffer descriptor for an IPMODE transfer */ | |
46779 | +ef_vi_inline void | |
46780 | +__falcon_dma_tx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, unsigned bytes, | |
46781 | + int port, int frag, | |
46782 | + ef_vi_falcon_dma_tx_buf_desc *desc) | |
46783 | +{ | |
46784 | + DWCHCK(__DW2(TX_USR_PORT_LBN), TX_USR_PORT_WIDTH); | |
46785 | + DWCHCK(__DW2(TX_USR_CONT_LBN), TX_USR_CONT_WIDTH); | |
46786 | + DWCHCK(__DW2(TX_USR_BYTE_CNT_LBN), TX_USR_BYTE_CNT_WIDTH); | |
46787 | + LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH); | |
46788 | + DWCHCK(TX_USR_BYTE_OFS_LBN, TX_USR_BYTE_OFS_WIDTH); | |
46789 | + | |
46790 | + RANGECHCK(bytes, TX_USR_BYTE_CNT_WIDTH); | |
46791 | + RANGECHCK(port, TX_USR_PORT_WIDTH); | |
46792 | + RANGECHCK(frag, TX_USR_CONT_WIDTH); | |
46793 | + RANGECHCK(buf_id, TX_USR_BUF_ID_WIDTH); | |
46794 | + RANGECHCK(buf_ofs, TX_USR_BYTE_OFS_WIDTH); | |
46795 | + | |
46796 | + ef_assert(desc); | |
46797 | + | |
46798 | + desc->dword[1] = ((port << __DW2(TX_USR_PORT_LBN)) | | |
46799 | + (frag << __DW2(TX_USR_CONT_LBN)) | | |
46800 | + (bytes << __DW2(TX_USR_BYTE_CNT_LBN)) | | |
46801 | + (HIGH(buf_id, | |
46802 | + TX_USR_BUF_ID_LBN, | |
46803 | + TX_USR_BUF_ID_WIDTH))); | |
46804 | + | |
46805 | + desc->dword[0] = ((LOW(buf_id, | |
46806 | + TX_USR_BUF_ID_LBN, | |
46807 | + (TX_USR_BUF_ID_WIDTH))) | | |
46808 | + (buf_ofs << TX_USR_BYTE_OFS_LBN)); | |
46809 | +} | |
46810 | + | |
46811 | +ef_vi_inline void | |
46812 | +falcon_dma_tx_calc_ip_buf_4k(unsigned buf_vaddr, unsigned bytes, | |
46813 | + int port, int frag, | |
46814 | + ef_vi_falcon_dma_tx_buf_desc *desc) | |
46815 | +{ | |
46816 | + /* TODO FIXME [buf_vaddr] consists of the buffer index in the | |
46817 | + ** high bits, and an offset in the low bits. Assumptions | |
46818 | + ** permate the code that these can be rolled into one 32bit | |
46819 | + ** value, so this is currently preserved for Falcon. But we | |
46820 | + ** should change to support 8K pages | |
46821 | + */ | |
46822 | + unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr); | |
46823 | + unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr); | |
46824 | + | |
46825 | + __falcon_dma_tx_calc_ip_buf( buf_id, buf_ofs, bytes, port, frag, desc); | |
46826 | +} | |
46827 | + | |
46828 | +ef_vi_inline void | |
46829 | +falcon_dma_tx_calc_ip_buf(unsigned buf_vaddr, unsigned bytes, int port, | |
46830 | + int frag, ef_vi_falcon_dma_tx_buf_desc *desc) | |
46831 | +{ | |
46832 | + falcon_dma_tx_calc_ip_buf_4k(buf_vaddr, bytes, port, frag, desc); | |
46833 | +} | |
46834 | + | |
46835 | +/*! Setup a virtual buffer based descriptor */ | |
46836 | +ef_vi_inline void | |
46837 | +__falcon_dma_rx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, | |
46838 | + ef_vi_falcon_dma_rx_buf_desc *desc) | |
46839 | +{ | |
46840 | + /* check alignment of buffer offset and pack */ | |
46841 | + ef_assert((buf_ofs & 0x1) == 0); | |
46842 | + | |
46843 | + buf_ofs >>= 1; | |
46844 | + | |
46845 | + DWCHCK(RX_USR_2BYTE_OFS_LBN, RX_USR_2BYTE_OFS_WIDTH); | |
46846 | + DWCHCK(RX_USR_BUF_ID_LBN, RX_USR_BUF_ID_WIDTH); | |
46847 | + | |
46848 | + RANGECHCK(buf_ofs, RX_USR_2BYTE_OFS_WIDTH); | |
46849 | + RANGECHCK(buf_id, RX_USR_BUF_ID_WIDTH); | |
46850 | + | |
46851 | + ef_assert(desc); | |
46852 | + | |
46853 | + desc->dword[0] = ((buf_ofs << RX_USR_2BYTE_OFS_LBN) | | |
46854 | + (buf_id << RX_USR_BUF_ID_LBN)); | |
46855 | +} | |
46856 | + | |
46857 | +ef_vi_inline void | |
46858 | +falcon_dma_rx_calc_ip_buf_4k(unsigned buf_vaddr, | |
46859 | + ef_vi_falcon_dma_rx_buf_desc *desc) | |
46860 | +{ | |
46861 | + /* TODO FIXME [buf_vaddr] consists of the buffer index in the | |
46862 | + ** high bits, and an offset in the low bits. Assumptions | |
46863 | + ** permeate the code that these can be rolled into one 32bit | |
46864 | + ** value, so this is currently preserved for Falcon. But we | |
46865 | + ** should change to support 8K pages | |
46866 | + */ | |
46867 | + unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr); | |
46868 | + unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr); | |
46869 | + | |
46870 | + __falcon_dma_rx_calc_ip_buf(buf_id, buf_ofs, desc); | |
46871 | +} | |
46872 | + | |
46873 | +ef_vi_inline void | |
46874 | +falcon_dma_rx_calc_ip_buf(unsigned buf_vaddr, | |
46875 | + ef_vi_falcon_dma_rx_buf_desc *desc) | |
46876 | +{ | |
46877 | + falcon_dma_rx_calc_ip_buf_4k(buf_vaddr, desc); | |
46878 | +} | |
46879 | + | |
46880 | + | |
46881 | +ef_vi_inline ef_vi_dma_addr_t ef_physaddr(ef_addr efaddr) | |
46882 | +{ | |
46883 | + return (ef_vi_dma_addr_t) efaddr; | |
46884 | +} | |
46885 | + | |
46886 | + | |
46887 | +/*! Convert between an ef_addr and a buffer table index | |
46888 | +** Assert that this was not a physical address | |
46889 | +*/ | |
46890 | +ef_vi_inline ef_vi_buffer_addr_t ef_bufaddr(ef_addr efaddr) | |
46891 | +{ | |
46892 | + ef_assert(efaddr < ((uint64_t)1 << 32) ); | |
46893 | + | |
46894 | + return (ef_vi_buffer_addr_t) efaddr; | |
46895 | +} | |
46896 | + | |
46897 | + | |
46898 | +/*! Setup an physical address based descriptor for an IPMODE transfer */ | |
46899 | +ef_vi_inline void | |
46900 | +falcon_dma_tx_calc_ip_phys(ef_vi_dma_addr_t src_dma_addr, unsigned bytes, | |
46901 | + int port, int frag, | |
46902 | + ef_vi_falcon_dma_tx_phys_desc *desc) | |
46903 | +{ | |
46904 | + | |
46905 | + int region = 0; /* FIXME */ | |
46906 | + int64_t src = dma_addr_to_u46(src_dma_addr); /* lower 46 bits */ | |
46907 | + | |
46908 | + DWCHCK(__DW2(TX_KER_PORT_LBN), TX_KER_PORT_WIDTH); | |
46909 | + DWCHCK(__DW2(TX_KER_CONT_LBN), TX_KER_CONT_WIDTH); | |
46910 | + DWCHCK(__DW2(TX_KER_BYTE_CNT_LBN), TX_KER_BYTE_CNT_WIDTH); | |
46911 | + DWCHCK(__DW2(TX_KER_BUF_REGION_LBN),TX_KER_BUF_REGION_WIDTH); | |
46912 | + | |
46913 | + LWCHK(TX_KER_BUF_ADR_LBN, TX_KER_BUF_ADR_WIDTH); | |
46914 | + | |
46915 | + RANGECHCK(port, TX_KER_PORT_WIDTH); | |
46916 | + RANGECHCK(frag, TX_KER_CONT_WIDTH); | |
46917 | + RANGECHCK(bytes, TX_KER_BYTE_CNT_WIDTH); | |
46918 | + RANGECHCK(region, TX_KER_BUF_REGION_WIDTH); | |
46919 | + | |
46920 | + desc->dword[1] = ((port << __DW2(TX_KER_PORT_LBN)) | | |
46921 | + (frag << __DW2(TX_KER_CONT_LBN)) | | |
46922 | + (bytes << __DW2(TX_KER_BYTE_CNT_LBN)) | | |
46923 | + (region << __DW2(TX_KER_BUF_REGION_LBN)) | | |
46924 | + (HIGH(src, | |
46925 | + TX_KER_BUF_ADR_LBN, | |
46926 | + TX_KER_BUF_ADR_WIDTH))); | |
46927 | + | |
46928 | + ef_assert_equal(TX_KER_BUF_ADR_LBN, 0); | |
46929 | + desc->dword[0] = (uint32_t) src_dma_addr; | |
46930 | +} | |
46931 | + | |
46932 | + | |
46933 | +void falcon_vi_init(ef_vi* vi, void* vvis) | |
46934 | +{ | |
46935 | + struct vi_mappings *vm = (struct vi_mappings*)vvis; | |
46936 | + uint16_t* ids; | |
46937 | + | |
46938 | + ef_assert(vi); | |
46939 | + ef_assert(vvis); | |
46940 | + ef_assert_equal(vm->signature, VI_MAPPING_SIGNATURE); | |
46941 | + ef_assert_equal(vm->nic_type.arch, EF_VI_ARCH_FALCON); | |
46942 | + | |
46943 | + /* Initialise masks to zero, so that ef_vi_state_init() will | |
46944 | + ** not do any harm when we don't have DMA queues. */ | |
46945 | + vi->vi_rxq.mask = vi->vi_txq.mask = 0; | |
46946 | + | |
46947 | + /* Used for BUG5391_WORKAROUND. */ | |
46948 | + vi->vi_txq.misalign_mask = 0; | |
46949 | + | |
46950 | + /* Initialise doorbell addresses to a distinctive small value | |
46951 | + ** which will cause a segfault, to trap doorbell pushes to VIs | |
46952 | + ** without DMA queues. */ | |
46953 | + vi->vi_rxq.doorbell = vi->vi_txq.doorbell = (ef_vi_ioaddr_t)0xdb; | |
46954 | + | |
46955 | + ids = (uint16_t*) (vi->ep_state + 1); | |
46956 | + | |
46957 | + if( vm->tx_queue_capacity ) { | |
46958 | + vi->vi_txq.mask = vm->tx_queue_capacity - 1; | |
46959 | + vi->vi_txq.doorbell = vm->tx_bell + 12; | |
46960 | + vi->vi_txq.descriptors = vm->tx_dma_falcon; | |
46961 | + vi->vi_txq.ids = ids; | |
46962 | + ids += vi->vi_txq.mask + 1; | |
46963 | + /* Check that the id fifo fits in the space allocated. */ | |
46964 | + ef_assert_le((char*) (vi->vi_txq.ids + vm->tx_queue_capacity), | |
46965 | + (char*) vi->ep_state | |
46966 | + + ef_vi_calc_state_bytes(vm->rx_queue_capacity, | |
46967 | + vm->tx_queue_capacity)); | |
46968 | + } | |
46969 | + if( vm->rx_queue_capacity ) { | |
46970 | + vi->vi_rxq.mask = vm->rx_queue_capacity - 1; | |
46971 | + vi->vi_rxq.doorbell = vm->rx_bell + 12; | |
46972 | + vi->vi_rxq.descriptors = vm->rx_dma_falcon; | |
46973 | + vi->vi_rxq.ids = ids; | |
46974 | + /* Check that the id fifo fits in the space allocated. */ | |
46975 | + ef_assert_le((char*) (vi->vi_rxq.ids + vm->rx_queue_capacity), | |
46976 | + (char*) vi->ep_state | |
46977 | + + ef_vi_calc_state_bytes(vm->rx_queue_capacity, | |
46978 | + vm->tx_queue_capacity)); | |
46979 | + } | |
46980 | + | |
46981 | + if( vm->nic_type.variant == 'A' ) { | |
46982 | + vi->vi_txq.misalign_mask = 15; /* BUG5391_WORKAROUND */ | |
46983 | + vi->vi_flags |= EF_VI_BUG5692_WORKAROUND; | |
46984 | + } | |
46985 | +} | |
46986 | + | |
46987 | + | |
46988 | +int ef_vi_transmitv_init(ef_vi* vi, const ef_iovec* iov, int iov_len, | |
46989 | + ef_request_id dma_id) | |
46990 | +{ | |
46991 | + ef_vi_txq* q = &vi->vi_txq; | |
46992 | + ef_vi_txq_state* qs = &vi->ep_state->txq; | |
46993 | + ef_vi_falcon_dma_tx_buf_desc* dp; | |
46994 | + unsigned len, dma_len, di; | |
46995 | + unsigned added_save = qs->added; | |
46996 | + ef_addr dma_addr; | |
46997 | + unsigned last_len = 0; | |
46998 | + | |
46999 | + ef_assert(iov_len > 0); | |
47000 | + ef_assert(iov); | |
47001 | + ef_assert_equal((dma_id & EF_REQUEST_ID_MASK), dma_id); | |
47002 | + ef_assert_nequal(dma_id, 0xffff); | |
47003 | + | |
47004 | + dma_addr = iov->iov_base; | |
47005 | + len = iov->iov_len; | |
47006 | + | |
47007 | + if( vi->vi_flags & EF_VI_ISCSI_TX_DDIG ) { | |
47008 | + /* Last 4 bytes of placeholder for digest must be | |
47009 | + * removed for h/w */ | |
47010 | + ef_assert(len > 4); | |
47011 | + last_len = iov[iov_len - 1].iov_len; | |
47012 | + if( last_len <= 4 ) { | |
47013 | + ef_assert(iov_len > 1); | |
47014 | + --iov_len; | |
47015 | + last_len = iov[iov_len - 1].iov_len - (4 - last_len); | |
47016 | + } | |
47017 | + else { | |
47018 | + last_len = iov[iov_len - 1].iov_len - 4; | |
47019 | + } | |
47020 | + if( iov_len == 1 ) | |
47021 | + len = last_len; | |
47022 | + } | |
47023 | + | |
47024 | + while( 1 ) { | |
47025 | + if( qs->added - qs->removed >= q->mask ) { | |
47026 | + qs->added = added_save; | |
47027 | + return -EAGAIN; | |
47028 | + } | |
47029 | + | |
47030 | + dma_len = (~((unsigned) dma_addr) & 0xfff) + 1; | |
47031 | + if( dma_len > len ) dma_len = len; | |
47032 | + { /* BUG5391_WORKAROUND */ | |
47033 | + unsigned misalign = | |
47034 | + (unsigned) dma_addr & q->misalign_mask; | |
47035 | + if( misalign && dma_len + misalign > 512 ) | |
47036 | + dma_len = 512 - misalign; | |
47037 | + } | |
47038 | + | |
47039 | + di = qs->added++ & q->mask; | |
47040 | + dp = (ef_vi_falcon_dma_tx_buf_desc*) q->descriptors + di; | |
47041 | + if( vi->vi_flags & EF_VI_TX_PHYS_ADDR ) | |
47042 | + falcon_dma_tx_calc_ip_phys | |
47043 | + (ef_physaddr(dma_addr), dma_len, /*port*/ 0, | |
47044 | + (iov_len == 1 && dma_len == len) ? 0 : | |
47045 | + EFVI_FALCON_DMA_TX_FRAG, dp); | |
47046 | + else | |
47047 | + falcon_dma_tx_calc_ip_buf | |
47048 | + (ef_bufaddr(dma_addr), dma_len, /*port*/ 0, | |
47049 | + (iov_len == 1 && dma_len == len) ? 0 : | |
47050 | + EFVI_FALCON_DMA_TX_FRAG, dp); | |
47051 | + | |
47052 | + dma_addr += dma_len; | |
47053 | + len -= dma_len; | |
47054 | + | |
47055 | + if( len == 0 ) { | |
47056 | + if( --iov_len == 0 ) break; | |
47057 | + ++iov; | |
47058 | + dma_addr = iov->iov_base; | |
47059 | + len = iov->iov_len; | |
47060 | + if( (vi->vi_flags & EF_VI_ISCSI_TX_DDIG) && | |
47061 | + (iov_len == 1) ) | |
47062 | + len = last_len; | |
47063 | + } | |
47064 | + } | |
47065 | + | |
47066 | + q->ids[di] = (uint16_t) dma_id; | |
47067 | + return 0; | |
47068 | +} | |
47069 | + | |
47070 | + | |
47071 | +void ef_vi_transmit_push(ef_vi* vi) | |
47072 | +{ | |
47073 | + ef_vi_wiob(); | |
47074 | + writel((vi->ep_state->txq.added & vi->vi_txq.mask) << | |
47075 | + __DW4(TX_DESC_WPTR_LBN), | |
47076 | + vi->vi_txq.doorbell); | |
47077 | +} | |
47078 | + | |
47079 | + | |
47080 | +/*! The value of initial_rx_bytes is used to set RX_KER_BUF_SIZE in an initial | |
47081 | +** receive descriptor here if physical addressing is being used. A value of | |
47082 | +** zero represents 16384 bytes. This is okay, because caller must provide a | |
47083 | +** buffer than is > MTU, and mac should filter anything bigger than that. | |
47084 | +*/ | |
47085 | +int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id, | |
47086 | + int initial_rx_bytes) | |
47087 | +{ | |
47088 | + ef_vi_rxq* q = &vi->vi_rxq; | |
47089 | + ef_vi_rxq_state* qs = &vi->ep_state->rxq; | |
47090 | + unsigned di; | |
47091 | + | |
47092 | + if( ef_vi_receive_space(vi) ) { | |
47093 | + di = qs->added++ & q->mask; | |
47094 | + ef_assert_equal(q->ids[di], 0xffff); | |
47095 | + q->ids[di] = (uint16_t) dma_id; | |
47096 | + | |
47097 | + if( ! (vi->vi_flags & EF_VI_RX_PHYS_ADDR) ) { | |
47098 | + ef_vi_falcon_dma_rx_buf_desc* dp; | |
47099 | + dp = (ef_vi_falcon_dma_rx_buf_desc*) | |
47100 | + q->descriptors + di; | |
47101 | + falcon_dma_rx_calc_ip_buf(ef_bufaddr(addr), dp); | |
47102 | + } | |
47103 | + else { | |
47104 | + ef_vi_falcon_dma_rx_phys_desc* dp; | |
47105 | + dp = (ef_vi_falcon_dma_rx_phys_desc*) | |
47106 | + q->descriptors + di; | |
47107 | + __falcon_dma_rx_calc_ip_phys(addr, dp, | |
47108 | + initial_rx_bytes); | |
47109 | + } | |
47110 | + | |
47111 | + return 0; | |
47112 | + } | |
47113 | + | |
47114 | + return -EAGAIN; | |
47115 | +} | |
47116 | + | |
47117 | + | |
47118 | +void ef_vi_receive_push(ef_vi* vi) | |
47119 | +{ | |
47120 | + ef_vi_wiob(); | |
47121 | + writel ((vi->ep_state->rxq.added & vi->vi_rxq.mask) << | |
47122 | + __DW4(RX_DESC_WPTR_LBN), | |
47123 | + vi->vi_rxq.doorbell); | |
47124 | +} | |
47125 | + | |
47126 | + | |
47127 | +ef_request_id ef_vi_receive_done(const ef_vi* vi, const ef_event* ef_ev) | |
47128 | +{ | |
47129 | + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev); | |
47130 | + unsigned di = ev->u32[0] & vi->vi_rxq.mask; | |
47131 | + ef_request_id rq_id; | |
47132 | + | |
47133 | + ef_assert(EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX || | |
47134 | + EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX_DISCARD); | |
47135 | + | |
47136 | + /* Detect spurious / duplicate RX events. We may need to modify this | |
47137 | + ** code so that we are robust if they happen. */ | |
47138 | + ef_assert_equal(di, vi->ep_state->rxq.removed & vi->vi_rxq.mask); | |
47139 | + | |
47140 | + /* We only support 1 port: so events should be in order. */ | |
47141 | + ef_assert(vi->vi_rxq.ids[di] != 0xffff); | |
47142 | + | |
47143 | + rq_id = vi->vi_rxq.ids[di]; | |
47144 | + vi->vi_rxq.ids[di] = 0xffff; | |
47145 | + ++vi->ep_state->rxq.removed; | |
47146 | + return rq_id; | |
47147 | +} | |
47148 | + | |
47149 | +/*! \cidoxg_end */ | |
47150 | Index: head-2008-11-25/drivers/xen/sfc_netfront/pt_tx.c | |
47151 | =================================================================== | |
47152 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
47153 | +++ head-2008-11-25/drivers/xen/sfc_netfront/pt_tx.c 2008-02-20 09:32:49.000000000 +0100 | |
47154 | @@ -0,0 +1,91 @@ | |
47155 | +/**************************************************************************** | |
47156 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
47157 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
47158 | + * 9501 Jeronimo Road, Suite 250, | |
47159 | + * Irvine, CA 92618, USA | |
47160 | + * | |
47161 | + * Maintained by Solarflare Communications | |
47162 | + * <linux-xen-drivers@solarflare.com> | |
47163 | + * <onload-dev@solarflare.com> | |
47164 | + * | |
47165 | + * This program is free software; you can redistribute it and/or modify it | |
47166 | + * under the terms of the GNU General Public License version 2 as published | |
47167 | + * by the Free Software Foundation, incorporated herein by reference. | |
47168 | + * | |
47169 | + * This program is distributed in the hope that it will be useful, | |
47170 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
47171 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
47172 | + * GNU General Public License for more details. | |
47173 | + * | |
47174 | + * You should have received a copy of the GNU General Public License | |
47175 | + * along with this program; if not, write to the Free Software | |
47176 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
47177 | + **************************************************************************** | |
47178 | + */ | |
47179 | + | |
47180 | +/* | |
47181 | + * \author djr | |
47182 | + * \brief Packet-mode transmit interface. | |
47183 | + * \date 2003/04/02 | |
47184 | + */ | |
47185 | + | |
47186 | +/*! \cidoxg_lib_ef */ | |
47187 | +#include "ef_vi_internal.h" | |
47188 | + | |
47189 | + | |
47190 | +int ef_vi_transmit_init(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id) | |
47191 | +{ | |
47192 | + ef_iovec iov = { base, len }; | |
47193 | + return ef_vi_transmitv_init(vi, &iov, 1, dma_id); | |
47194 | +} | |
47195 | + | |
47196 | + | |
47197 | +int ef_vi_transmit(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id) | |
47198 | +{ | |
47199 | + ef_iovec iov = { base, len }; | |
47200 | + int rc = ef_vi_transmitv_init(vi, &iov, 1, dma_id); | |
47201 | + if( rc == 0 ) ef_vi_transmit_push(vi); | |
47202 | + return rc; | |
47203 | +} | |
47204 | + | |
47205 | + | |
47206 | +int ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len, | |
47207 | + ef_request_id dma_id) | |
47208 | +{ | |
47209 | + int rc = ef_vi_transmitv_init(vi, iov, iov_len, dma_id); | |
47210 | + if( rc == 0 ) ef_vi_transmit_push(vi); | |
47211 | + return rc; | |
47212 | +} | |
47213 | + | |
47214 | + | |
47215 | +int ef_vi_transmit_unbundle(ef_vi* vi, const ef_event* __ev, | |
47216 | + ef_request_id* ids) | |
47217 | +{ | |
47218 | + ef_request_id* ids_in = ids; | |
47219 | + ef_vi_txq* q = &vi->vi_txq; | |
47220 | + ef_vi_txq_state* qs = &vi->ep_state->txq; | |
47221 | + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*__ev); | |
47222 | + unsigned i, stop = (ev->u32[0] + 1) & q->mask; | |
47223 | + | |
47224 | + ef_assert(EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX || | |
47225 | + EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX_ERROR); | |
47226 | + | |
47227 | + /* Shouldn't be batching more than 64 descriptors, and should not go | |
47228 | + ** backwards. */ | |
47229 | + ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), 64); | |
47230 | + /* Should not complete more than we've posted. */ | |
47231 | + ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), | |
47232 | + qs->added - qs->removed); | |
47233 | + | |
47234 | + for( i = qs->removed & q->mask; i != stop; i = ++qs->removed & q->mask ) | |
47235 | + if( q->ids[i] != 0xffff ) { | |
47236 | + *ids++ = q->ids[i]; | |
47237 | + q->ids[i] = 0xffff; | |
47238 | + } | |
47239 | + | |
47240 | + ef_assert_le(ids - ids_in, EF_VI_TRANSMIT_BATCH); | |
47241 | + | |
47242 | + return (int) (ids - ids_in); | |
47243 | +} | |
47244 | + | |
47245 | +/*! \cidoxg_end */ | |
47246 | Index: head-2008-11-25/drivers/xen/sfc_netfront/sysdep.h | |
47247 | =================================================================== | |
47248 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
47249 | +++ head-2008-11-25/drivers/xen/sfc_netfront/sysdep.h 2008-02-20 09:32:49.000000000 +0100 | |
47250 | @@ -0,0 +1,184 @@ | |
47251 | +/**************************************************************************** | |
47252 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
47253 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
47254 | + * 9501 Jeronimo Road, Suite 250, | |
47255 | + * Irvine, CA 92618, USA | |
47256 | + * | |
47257 | + * Maintained by Solarflare Communications | |
47258 | + * <linux-xen-drivers@solarflare.com> | |
47259 | + * <onload-dev@solarflare.com> | |
47260 | + * | |
47261 | + * This program is free software; you can redistribute it and/or modify it | |
47262 | + * under the terms of the GNU General Public License version 2 as published | |
47263 | + * by the Free Software Foundation, incorporated herein by reference. | |
47264 | + * | |
47265 | + * This program is distributed in the hope that it will be useful, | |
47266 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
47267 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
47268 | + * GNU General Public License for more details. | |
47269 | + * | |
47270 | + * You should have received a copy of the GNU General Public License | |
47271 | + * along with this program; if not, write to the Free Software | |
47272 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
47273 | + **************************************************************************** | |
47274 | + */ | |
47275 | + | |
47276 | +/* | |
47277 | + * \author stg | |
47278 | + * \brief System dependent support for ef vi lib | |
47279 | + * \date 2007/05/10 | |
47280 | + */ | |
47281 | + | |
47282 | +/*! \cidoxg_include_ci_ul */ | |
47283 | +#ifndef __CI_CIUL_SYSDEP_LINUX_H__ | |
47284 | +#define __CI_CIUL_SYSDEP_LINUX_H__ | |
47285 | + | |
47286 | +/********************************************************************** | |
47287 | + * Kernel version compatability | |
47288 | + */ | |
47289 | + | |
47290 | +#if defined(__GNUC__) | |
47291 | + | |
47292 | +/* Linux kernel doesn't have stdint.h or [u]intptr_t. */ | |
47293 | +# if !defined(LINUX_VERSION_CODE) | |
47294 | +# include <linux/version.h> | |
47295 | +# endif | |
47296 | +# include <asm/io.h> | |
47297 | + | |
47298 | +/* In Linux 2.6.24, linux/types.h has uintptr_t */ | |
47299 | +# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) | |
47300 | +# if BITS_PER_LONG == 32 | |
47301 | + typedef __u32 uintptr_t; | |
47302 | +# else | |
47303 | + typedef __u64 uintptr_t; | |
47304 | +# endif | |
47305 | +# endif | |
47306 | + | |
47307 | +/* But even 2.6.24 doesn't define intptr_t */ | |
47308 | +# if BITS_PER_LONG == 32 | |
47309 | + typedef __s32 intptr_t; | |
47310 | +# else | |
47311 | + typedef __s64 intptr_t; | |
47312 | +# endif | |
47313 | + | |
47314 | +# if defined(__ia64__) | |
47315 | +# define EF_VI_PRIx64 "lx" | |
47316 | +# else | |
47317 | +# define EF_VI_PRIx64 "llx" | |
47318 | +# endif | |
47319 | + | |
47320 | +# define EF_VI_HF __attribute__((visibility("hidden"))) | |
47321 | +# define EF_VI_HV __attribute__((visibility("hidden"))) | |
47322 | + | |
47323 | +# if defined(__i386__) || defined(__x86_64__) /* GCC x86/x64 */ | |
47324 | + typedef unsigned long long ef_vi_dma_addr_t; | |
47325 | +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) | |
47326 | +# define ef_vi_wiob() __asm__ __volatile__ ("sfence") | |
47327 | +# else | |
47328 | +# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8") | |
47329 | +# endif | |
47330 | + | |
47331 | +# endif | |
47332 | +#endif | |
47333 | + | |
47334 | +#ifdef EFX_NOT_UPSTREAM | |
47335 | + | |
47336 | +/* Stuff for architectures/compilers not officially supported */ | |
47337 | + | |
47338 | +#if !defined(__GNUC__) | |
47339 | +# if defined(__PPC__) /* GCC, PPC */ | |
47340 | + typedef unsigned long ef_vi_dma_addr_t; | |
47341 | +# define ef_vi_wiob() wmb() | |
47342 | + | |
47343 | +# ifdef __powerpc64__ | |
47344 | +# ifdef CONFIG_SMP | |
47345 | +# define CI_SMP_SYNC "\n eieio \n" /* memory cache sync */ | |
47346 | +# define CI_SMP_ISYNC "\n isync \n" /* instr cache sync */ | |
47347 | +# else | |
47348 | +# define CI_SMP_SYNC | |
47349 | +# define CI_SMP_ISYNC | |
47350 | +# endif | |
47351 | +# else /* for ppc32 systems */ | |
47352 | +# ifdef CONFIG_SMP | |
47353 | +# define CI_SMP_SYNC "\n eieio \n" | |
47354 | +# define CI_SMP_ISYNC "\n sync \n" | |
47355 | +# else | |
47356 | +# define CI_SMP_SYNC | |
47357 | +# define CI_SMP_ISYNC | |
47358 | +# endif | |
47359 | +# endif | |
47360 | + | |
47361 | +# elif defined(__ia64__) /* GCC, IA64 */ | |
47362 | + typedef unsigned long ef_vi_dma_addr_t; | |
47363 | +# define ef_vi_wiob() __asm__ __volatile__("mf.a": : :"memory") | |
47364 | + | |
47365 | +# else | |
47366 | +# error Unknown processor - GNU C | |
47367 | +# endif | |
47368 | + | |
47369 | +#elif defined(__PGI) | |
47370 | +# error PGI not supported | |
47371 | + | |
47372 | +#elif defined(__INTEL_COMPILER) | |
47373 | + | |
47374 | +/* Intel compilers v7 claim to be very gcc compatible. */ | |
47375 | +# if __INTEL_COMPILER >= 700 | |
47376 | +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91) | |
47377 | +# define EF_VI_LIKELY(t) __builtin_expect((t), 1) | |
47378 | +# define EF_VI_UNLIKELY(t) __builtin_expect((t), 0) | |
47379 | +# endif | |
47380 | + | |
47381 | +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) | |
47382 | +# define ef_vi_wiob() __asm__ __volatile__ ("sfence") | |
47383 | +# else | |
47384 | +# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8") | |
47385 | +# endif | |
47386 | + | |
47387 | +# else | |
47388 | +# error Old Intel compiler not supported. | |
47389 | +# endif | |
47390 | + | |
47391 | +#else | |
47392 | +# error Unknown compiler. | |
47393 | +#endif | |
47394 | + | |
47395 | +#endif | |
47396 | + | |
47397 | + | |
47398 | +# include <linux/errno.h> | |
47399 | + | |
47400 | + | |
47401 | +/********************************************************************** | |
47402 | + * Extracting bit fields. | |
47403 | + */ | |
47404 | + | |
47405 | +#define _QWORD_GET_LOW(f, v) \ | |
47406 | + (((v).u32[0] >> (f##_LBN)) & ((1u << f##_WIDTH) - 1u)) | |
47407 | +#define _QWORD_GET_HIGH(f, v) \ | |
47408 | + (((v).u32[1] >> (f##_LBN - 32u)) & ((1u << f##_WIDTH) - 1u)) | |
47409 | +#define _QWORD_GET_ANY(f, v) \ | |
47410 | + (((v).u64[0] >> f##_LBN) & (((uint64_t) 1u << f##_WIDTH) - 1u)) | |
47411 | + | |
47412 | +#define QWORD_GET(f, v) \ | |
47413 | + ((f##_LBN + f##_WIDTH) <= 32u \ | |
47414 | + ? _QWORD_GET_LOW(f, (v)) \ | |
47415 | + : ((f##_LBN >= 32u) ? _QWORD_GET_HIGH(f, (v)) : _QWORD_GET_ANY(f, (v)))) | |
47416 | + | |
47417 | +#define QWORD_GET_U(f, v) ((unsigned) QWORD_GET(f, (v))) | |
47418 | + | |
47419 | +#define _QWORD_TEST_BIT_LOW(f, v) ((v).u32[0] & (1u << (f##_LBN))) | |
47420 | +#define _QWORD_TEST_BIT_HIGH(f, v) ((v).u32[1] & (1u << (f##_LBN - 32u))) | |
47421 | + | |
47422 | +#define QWORD_TEST_BIT(f, v) \ | |
47423 | + (f##_LBN < 32 ? _QWORD_TEST_BIT_LOW(f, (v)) : _QWORD_TEST_BIT_HIGH(f, (v))) | |
47424 | + | |
47425 | + | |
47426 | + | |
47427 | + | |
47428 | +#ifndef DECLSPEC_NORETURN | |
47429 | +/* normally defined on Windows to expand to a declaration that the | |
47430 | + function will not return */ | |
47431 | +# define DECLSPEC_NORETURN | |
47432 | +#endif | |
47433 | + | |
47434 | +#endif /* __CI_CIUL_SYSDEP_LINUX_H__ */ | |
47435 | Index: head-2008-11-25/drivers/xen/sfc_netfront/vi_init.c | |
47436 | =================================================================== | |
47437 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
47438 | +++ head-2008-11-25/drivers/xen/sfc_netfront/vi_init.c 2008-02-20 09:32:49.000000000 +0100 | |
47439 | @@ -0,0 +1,183 @@ | |
47440 | +/**************************************************************************** | |
47441 | + * Copyright 2002-2005: Level 5 Networks Inc. | |
47442 | + * Copyright 2005-2008: Solarflare Communications Inc, | |
47443 | + * 9501 Jeronimo Road, Suite 250, | |
47444 | + * Irvine, CA 92618, USA | |
47445 | + * | |
47446 | + * Maintained by Solarflare Communications | |
47447 | + * <linux-xen-drivers@solarflare.com> | |
47448 | + * <onload-dev@solarflare.com> | |
47449 | + * | |
47450 | + * This program is free software; you can redistribute it and/or modify it | |
47451 | + * under the terms of the GNU General Public License version 2 as published | |
47452 | + * by the Free Software Foundation, incorporated herein by reference. | |
47453 | + * | |
47454 | + * This program is distributed in the hope that it will be useful, | |
47455 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
47456 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
47457 | + * GNU General Public License for more details. | |
47458 | + * | |
47459 | + * You should have received a copy of the GNU General Public License | |
47460 | + * along with this program; if not, write to the Free Software | |
47461 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
47462 | + **************************************************************************** | |
47463 | + */ | |
47464 | + | |
47465 | +/* | |
47466 | + * \author djr | |
47467 | + * \brief Initialisation of VIs. | |
47468 | + * \date 2007/06/08 | |
47469 | + */ | |
47470 | + | |
47471 | +#include "ef_vi_internal.h" | |
47472 | + | |
47473 | +#define EF_VI_STATE_BYTES(rxq_sz, txq_sz) \ | |
47474 | + (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint16_t) \ | |
47475 | + + (txq_sz) * sizeof(uint16_t)) | |
47476 | + | |
47477 | +int ef_vi_calc_state_bytes(int rxq_sz, int txq_sz) | |
47478 | +{ | |
47479 | + ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz)); | |
47480 | + ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz)); | |
47481 | + | |
47482 | + return EF_VI_STATE_BYTES(rxq_sz, txq_sz); | |
47483 | +} | |
47484 | + | |
47485 | + | |
47486 | +int ef_vi_state_bytes(ef_vi* vi) | |
47487 | +{ | |
47488 | + int rxq_sz = 0, txq_sz = 0; | |
47489 | + if( ef_vi_receive_capacity(vi) ) | |
47490 | + rxq_sz = ef_vi_receive_capacity(vi) + 1; | |
47491 | + if( ef_vi_transmit_capacity(vi) ) | |
47492 | + txq_sz = ef_vi_transmit_capacity(vi) + 1; | |
47493 | + | |
47494 | + ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz)); | |
47495 | + ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz)); | |
47496 | + | |
47497 | + return EF_VI_STATE_BYTES(rxq_sz, txq_sz); | |
47498 | +} | |
47499 | + | |
47500 | + | |
47501 | +void ef_eventq_state_init(ef_vi* evq) | |
47502 | +{ | |
47503 | + int j; | |
47504 | + | |
47505 | + for (j = 0; j<EFAB_DMAQS_PER_EVQ_MAX; j++) { | |
47506 | + ef_rx_dup_state_t *rx_dup_state = | |
47507 | + &evq->evq_state->rx_dup_state[j]; | |
47508 | + rx_dup_state->bad_sop = 0; | |
47509 | + rx_dup_state->rx_last_desc_ptr = -1; | |
47510 | + rx_dup_state->frag_num = 0; | |
47511 | + } | |
47512 | + | |
47513 | + evq->evq_state->evq_ptr = 0; | |
47514 | +} | |
47515 | + | |
47516 | + | |
47517 | +void ef_vi_state_init(ef_vi* vi) | |
47518 | +{ | |
47519 | + ef_vi_state* state = vi->ep_state; | |
47520 | + unsigned i; | |
47521 | + | |
47522 | + state->txq.added = state->txq.removed = 0; | |
47523 | + state->rxq.added = state->rxq.removed = 0; | |
47524 | + | |
47525 | + if( vi->vi_rxq.mask ) | |
47526 | + for( i = 0; i <= vi->vi_rxq.mask; ++i ) | |
47527 | + vi->vi_rxq.ids[i] = (uint16_t) -1; | |
47528 | + if( vi->vi_txq.mask ) | |
47529 | + for( i = 0; i <= vi->vi_txq.mask; ++i ) | |
47530 | + vi->vi_txq.ids[i] = (uint16_t) -1; | |
47531 | +} | |
47532 | + | |
47533 | + | |
47534 | +void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type nic_type, | |
47535 | + int instance, unsigned evq_bytes, void* base, | |
47536 | + void* timer_reg) | |
47537 | +{ | |
47538 | + struct vi_mappings* vm = (struct vi_mappings*) data_area; | |
47539 | + | |
47540 | + vm->signature = VI_MAPPING_SIGNATURE; | |
47541 | + vm->vi_instance = instance; | |
47542 | + vm->nic_type = nic_type; | |
47543 | + vm->evq_bytes = evq_bytes; | |
47544 | + vm->evq_base = base; | |
47545 | + vm->evq_timer_reg = timer_reg; | |
47546 | +} | |
47547 | + | |
47548 | + | |
47549 | +void ef_vi_init(ef_vi* vi, void* vvis, ef_vi_state* state, | |
47550 | + ef_eventq_state* evq_state, enum ef_vi_flags vi_flags) | |
47551 | +{ | |
47552 | + struct vi_mappings* vm = (struct vi_mappings*) vvis; | |
47553 | + | |
47554 | + vi->vi_i = vm->vi_instance; | |
47555 | + vi->ep_state = state; | |
47556 | + vi->vi_flags = vi_flags; | |
47557 | + | |
47558 | + switch( vm->nic_type.arch ) { | |
47559 | + case EF_VI_ARCH_FALCON: | |
47560 | + falcon_vi_init(vi, vvis); | |
47561 | + break; | |
47562 | + default: | |
47563 | + /* ?? TODO: We should return an error code. */ | |
47564 | + ef_assert(0); | |
47565 | + break; | |
47566 | + } | |
47567 | + | |
47568 | + if( vm->evq_bytes ) { | |
47569 | + vi->evq_state = evq_state; | |
47570 | + vi->evq_mask = vm->evq_bytes - 1u; | |
47571 | + vi->evq_base = vm->evq_base; | |
47572 | + vi->evq_timer_reg = vm->evq_timer_reg; | |
47573 | + } | |
47574 | + | |
47575 | + EF_VI_MAGIC_SET(vi, EF_VI); | |
47576 | +} | |
47577 | + | |
47578 | + | |
47579 | +/* Initialise [data_area] with information required to initialise an ef_vi. | |
47580 | + * In the following, an unused param should be set to NULL. Note the case | |
47581 | + * marked (*) of [iobuf_mmap] for falcon/driver; for the normal driver this | |
47582 | + * must be NULL. | |
47583 | + * | |
47584 | + * \param data_area [in,out] required, must ref at least VI_MAPPING_SIZE | |
47585 | + * bytes | |
47586 | + * \param io_mmap [in] ef1, required | |
47587 | + * falcon, required | |
47588 | + * \param iobuf_mmap [in] ef1, unused | |
47589 | + * falcon, required | |
47590 | + */ | |
47591 | +void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type nic_type, | |
47592 | + unsigned rxq_capacity, unsigned txq_capacity, | |
47593 | + int instance, void* io_mmap, | |
47594 | + void* iobuf_mmap_rx, void* iobuf_mmap_tx, | |
47595 | + enum ef_vi_flags vi_flags) | |
47596 | +{ | |
47597 | + struct vi_mappings* vm = (struct vi_mappings*) data_area; | |
47598 | + int rx_desc_bytes, rxq_bytes; | |
47599 | + | |
47600 | + ef_assert(rxq_capacity > 0 || txq_capacity > 0); | |
47601 | + ef_assert(vm); | |
47602 | + ef_assert(io_mmap); | |
47603 | + ef_assert(iobuf_mmap_rx || iobuf_mmap_tx); | |
47604 | + | |
47605 | + vm->signature = VI_MAPPING_SIGNATURE; | |
47606 | + vm->vi_instance = instance; | |
47607 | + vm->nic_type = nic_type; | |
47608 | + | |
47609 | + rx_desc_bytes = (vi_flags & EF_VI_RX_PHYS_ADDR) ? 8 : 4; | |
47610 | + rxq_bytes = rxq_capacity * rx_desc_bytes; | |
47611 | + rxq_bytes = (rxq_bytes + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); | |
47612 | + | |
47613 | + if( iobuf_mmap_rx == iobuf_mmap_tx ) | |
47614 | + iobuf_mmap_tx = (char*) iobuf_mmap_rx + rxq_bytes; | |
47615 | + | |
47616 | + vm->rx_queue_capacity = rxq_capacity; | |
47617 | + vm->rx_dma_falcon = iobuf_mmap_rx; | |
47618 | + vm->rx_bell = (char*) io_mmap + (RX_DESC_UPD_REG_KER_OFST & 4095); | |
47619 | + vm->tx_queue_capacity = txq_capacity; | |
47620 | + vm->tx_dma_falcon = iobuf_mmap_tx; | |
47621 | + vm->tx_bell = (char*) io_mmap + (TX_DESC_UPD_REG_KER_OFST & 4095); | |
47622 | +} | |
47623 | Index: head-2008-11-25/drivers/xen/sfc_netutil/Makefile | |
47624 | =================================================================== | |
47625 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
47626 | +++ head-2008-11-25/drivers/xen/sfc_netutil/Makefile 2008-02-26 10:54:12.000000000 +0100 | |
47627 | @@ -0,0 +1,11 @@ | |
47628 | +EXTRA_CFLAGS += -Idrivers/xen/sfc_netutil | |
47629 | +EXTRA_CFLAGS += -Werror | |
47630 | + | |
47631 | +ifdef GGOV | |
47632 | +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV | |
47633 | +endif | |
47634 | + | |
47635 | +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) := sfc_netutil.o | |
47636 | + | |
47637 | +sfc_netutil-objs := accel_cuckoo_hash.o accel_msg_iface.o accel_util.o | |
47638 | + | |
47639 | Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c | |
47640 | =================================================================== | |
47641 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
47642 | +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2008-02-20 09:32:49.000000000 +0100 | |
47643 | @@ -0,0 +1,651 @@ | |
47644 | +/**************************************************************************** | |
47645 | + * Solarflare driver for Xen network acceleration | |
47646 | + * | |
47647 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
47648 | + * 9501 Jeronimo Road, Suite 250, | |
47649 | + * Irvine, CA 92618, USA | |
47650 | + * | |
47651 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
47652 | + * | |
47653 | + * This program is free software; you can redistribute it and/or modify it | |
47654 | + * under the terms of the GNU General Public License version 2 as published | |
47655 | + * by the Free Software Foundation, incorporated herein by reference. | |
47656 | + * | |
47657 | + * This program is distributed in the hope that it will be useful, | |
47658 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
47659 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
47660 | + * GNU General Public License for more details. | |
47661 | + * | |
47662 | + * You should have received a copy of the GNU General Public License | |
47663 | + * along with this program; if not, write to the Free Software | |
47664 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
47665 | + **************************************************************************** | |
47666 | + */ | |
47667 | + | |
47668 | +#include <linux/types.h> /* needed for linux/random.h */ | |
47669 | +#include <linux/random.h> | |
47670 | + | |
47671 | +#include "accel_cuckoo_hash.h" | |
47672 | +#include "accel_util.h" | |
47673 | + | |
47674 | +static inline int cuckoo_hash_key_compare(cuckoo_hash_table *hashtab, | |
47675 | + cuckoo_hash_key *key1, | |
47676 | + cuckoo_hash_key *key2) | |
47677 | +{ | |
47678 | + return !memcmp(key1, key2, hashtab->key_length); | |
47679 | +} | |
47680 | + | |
47681 | + | |
47682 | +static inline void cuckoo_hash_key_set(cuckoo_hash_key *key1, | |
47683 | + cuckoo_hash_key *key2) | |
47684 | +{ | |
47685 | + *key1 = *key2; | |
47686 | +} | |
47687 | + | |
47688 | + | |
47689 | +/* | |
47690 | + * Sets hash function parameters. Chooses "a" to be odd, 0 < a < 2^w | |
47691 | + * where w is the length of the key | |
47692 | + */ | |
47693 | +static void set_hash_parameters(cuckoo_hash_table *hashtab) | |
47694 | +{ | |
47695 | + again: | |
47696 | + hashtab->a0 = hashtab->a1 = 0; | |
47697 | + | |
47698 | + /* Make sure random */ | |
47699 | + get_random_bytes(&hashtab->a0, hashtab->key_length); | |
47700 | + get_random_bytes(&hashtab->a1, hashtab->key_length); | |
47701 | + | |
47702 | + /* Make sure odd */ | |
47703 | + hashtab->a0 |= 1; | |
47704 | + hashtab->a1 |= 1; | |
47705 | + | |
47706 | + /* Being different is good */ | |
47707 | + if (hashtab->a0 != hashtab->a1) | |
47708 | + return; | |
47709 | + | |
47710 | + goto again; | |
47711 | +} | |
47712 | + | |
47713 | +int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits, | |
47714 | + unsigned key_length) | |
47715 | +{ | |
47716 | + char *table_mem; | |
47717 | + unsigned length = 1 << length_bits; | |
47718 | + | |
47719 | + BUG_ON(length_bits >= sizeof(unsigned) * 8); | |
47720 | + BUG_ON(key_length > sizeof(cuckoo_hash_key)); | |
47721 | + | |
47722 | + table_mem = kmalloc(sizeof(cuckoo_hash_entry) * 2 * length, GFP_KERNEL); | |
47723 | + | |
47724 | + if (table_mem == NULL) | |
47725 | + return -ENOMEM; | |
47726 | + | |
47727 | + hashtab->length = length; | |
47728 | + hashtab->length_bits = length_bits; | |
47729 | + hashtab->key_length = key_length; | |
47730 | + hashtab->entries = 0; | |
47731 | + | |
47732 | + hashtab->table0 = (cuckoo_hash_entry *)table_mem; | |
47733 | + hashtab->table1 = (cuckoo_hash_entry *) | |
47734 | + (table_mem + length * sizeof(cuckoo_hash_entry)); | |
47735 | + | |
47736 | + set_hash_parameters(hashtab); | |
47737 | + | |
47738 | + /* Zero the table */ | |
47739 | + memset(hashtab->table0, 0, length * 2 * sizeof(cuckoo_hash_entry)); | |
47740 | + | |
47741 | + return 0; | |
47742 | +} | |
47743 | +EXPORT_SYMBOL_GPL(cuckoo_hash_init); | |
47744 | + | |
47745 | +void cuckoo_hash_destroy(cuckoo_hash_table *hashtab) | |
47746 | +{ | |
47747 | + if (hashtab->table0 != NULL) | |
47748 | + kfree(hashtab->table0); | |
47749 | +} | |
47750 | + | |
47751 | +EXPORT_SYMBOL_GPL(cuckoo_hash_destroy); | |
47752 | + | |
47753 | +/* | |
47754 | + * This computes sizeof(cuckoo_hash) bits of hash, not all will be | |
47755 | + * necessarily used, but the hash function throws away any that | |
47756 | + * aren't | |
47757 | + */ | |
47758 | +static inline void cuckoo_compute_hash_helper(cuckoo_hash_table *hashtab, | |
47759 | + cuckoo_hash_key *a, | |
47760 | + cuckoo_hash_key *x, | |
47761 | + cuckoo_hash *result) | |
47762 | +{ | |
47763 | + u64 multiply_result = 0, a_temp, x_temp; | |
47764 | + u32 carry = 0; | |
47765 | + u32 *a_words; | |
47766 | + u32 *x_words; | |
47767 | + int i; | |
47768 | + | |
47769 | + /* | |
47770 | + * As the mod and div operations in the function effectively | |
47771 | + * reduce and shift the bits of the product down to just the | |
47772 | + * third word, we need only compute that and return it as a | |
47773 | + * result. | |
47774 | + * | |
47775 | + * Do enough long multiplication to get the word we need | |
47776 | + */ | |
47777 | + | |
47778 | + /* This assumes things about the sizes of the key and hash */ | |
47779 | + BUG_ON(hashtab->key_length % sizeof(u32) != 0); | |
47780 | + BUG_ON(sizeof(cuckoo_hash) != sizeof(u32)); | |
47781 | + | |
47782 | + a_words = (u32 *)a; | |
47783 | + x_words = (u32 *)x; | |
47784 | + | |
47785 | + for (i = 0; i < hashtab->key_length / sizeof(u32); i++) { | |
47786 | + a_temp = a_words[i]; | |
47787 | + x_temp = x_words[i]; | |
47788 | + | |
47789 | + multiply_result = (a_temp * x_temp) + carry; | |
47790 | + carry = (multiply_result >> 32) & 0xffffffff; | |
47791 | + } | |
47792 | + | |
47793 | + *result = multiply_result & 0xffffffff; | |
47794 | +} | |
47795 | + | |
47796 | + | |
47797 | +/* | |
47798 | + * Want to implement (ax mod 2^w) div 2^(w-q) for odd a, 0 < a < 2^w; | |
47799 | + * w is the length of the key, q is the length of the hash, I think. | |
47800 | + * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf | |
47801 | + */ | |
47802 | +static cuckoo_hash cuckoo_compute_hash(cuckoo_hash_table *hashtab, | |
47803 | + cuckoo_hash_key *key, | |
47804 | + cuckoo_hash_key *a) | |
47805 | +{ | |
47806 | + unsigned q = hashtab->length_bits; | |
47807 | + unsigned shift = 32 - q; | |
47808 | + unsigned mask = ((1 << q) - 1) << shift; | |
47809 | + cuckoo_hash hash; | |
47810 | + | |
47811 | + cuckoo_compute_hash_helper(hashtab, a, key, &hash); | |
47812 | + | |
47813 | + /* | |
47814 | + * Take the top few bits to get the right length for this | |
47815 | + * hash table | |
47816 | + */ | |
47817 | + hash = (hash & mask) >> shift; | |
47818 | + | |
47819 | + BUG_ON(hash >= hashtab->length); | |
47820 | + | |
47821 | + return hash; | |
47822 | +} | |
47823 | + | |
47824 | + | |
47825 | +static int cuckoo_hash_lookup0(cuckoo_hash_table *hashtab, | |
47826 | + cuckoo_hash_key *key, | |
47827 | + cuckoo_hash_value *value) | |
47828 | +{ | |
47829 | + cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0); | |
47830 | + | |
47831 | + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) | |
47832 | + && cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key), | |
47833 | + key)) { | |
47834 | + *value = hashtab->table0[hash].value; | |
47835 | + return 1; | |
47836 | + } | |
47837 | + | |
47838 | + return 0; | |
47839 | +} | |
47840 | + | |
47841 | +static int cuckoo_hash_lookup1(cuckoo_hash_table *hashtab, | |
47842 | + cuckoo_hash_key *key, | |
47843 | + cuckoo_hash_value *value) | |
47844 | +{ | |
47845 | + cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1); | |
47846 | + | |
47847 | + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) | |
47848 | + && cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key), | |
47849 | + key)) { | |
47850 | + *value = hashtab->table1[hash].value; | |
47851 | + return 1; | |
47852 | + } | |
47853 | + | |
47854 | + return 0; | |
47855 | +} | |
47856 | + | |
47857 | + | |
47858 | +int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, | |
47859 | + cuckoo_hash_value *value) | |
47860 | +{ | |
47861 | + return cuckoo_hash_lookup0(hashtab, key, value) | |
47862 | + || cuckoo_hash_lookup1(hashtab, key, value); | |
47863 | +} | |
47864 | +EXPORT_SYMBOL_GPL(cuckoo_hash_lookup); | |
47865 | + | |
47866 | + | |
47867 | +/* Transfer any active entries from "old_table" into hashtab */ | |
47868 | +static int cuckoo_hash_transfer_entries(cuckoo_hash_table *hashtab, | |
47869 | + cuckoo_hash_entry *old_table, | |
47870 | + unsigned capacity) | |
47871 | +{ | |
47872 | + int i, rc; | |
47873 | + cuckoo_hash_entry *entry; | |
47874 | + | |
47875 | + hashtab->entries = 0; | |
47876 | + | |
47877 | + for (i = 0; i < capacity; i++) { | |
47878 | + entry = &old_table[i]; | |
47879 | + if (entry->state == CUCKOO_HASH_STATE_OCCUPIED) { | |
47880 | + rc = cuckoo_hash_add(hashtab, &(entry->key), | |
47881 | + entry->value, 0); | |
47882 | + if (rc != 0) { | |
47883 | + return rc; | |
47884 | + } | |
47885 | + } | |
47886 | + } | |
47887 | + | |
47888 | + return 0; | |
47889 | +} | |
47890 | + | |
47891 | + | |
47892 | +int cuckoo_hash_rehash(cuckoo_hash_table *hashtab) | |
47893 | +{ | |
47894 | + cuckoo_hash_entry *new_table; | |
47895 | + cuckoo_hash_table old_hashtab; | |
47896 | + int resize = 0, rc, rehash_count; | |
47897 | + | |
47898 | + /* | |
47899 | + * Store old tables so we can access the existing values and | |
47900 | + * copy across | |
47901 | + */ | |
47902 | + memcpy(&old_hashtab, hashtab, sizeof(cuckoo_hash_table)); | |
47903 | + | |
47904 | + /* resize if hashtable is more than half full */ | |
47905 | + if (old_hashtab.entries > old_hashtab.length && | |
47906 | + old_hashtab.length_bits < 32) | |
47907 | + resize = 1; | |
47908 | + | |
47909 | + resize: | |
47910 | + if (resize) { | |
47911 | + new_table = kmalloc(sizeof(cuckoo_hash_entry) * 4 * hashtab->length, | |
47912 | + GFP_ATOMIC); | |
47913 | + if (new_table == NULL) { | |
47914 | + rc = -ENOMEM; | |
47915 | + goto err; | |
47916 | + } | |
47917 | + | |
47918 | + hashtab->length = 2 * hashtab->length; | |
47919 | + hashtab->length_bits++; | |
47920 | + } else { | |
47921 | + new_table = kmalloc(sizeof(cuckoo_hash_entry) * 2 * hashtab->length, | |
47922 | + GFP_ATOMIC); | |
47923 | + if (new_table == NULL) { | |
47924 | + rc = -ENOMEM; | |
47925 | + goto err; | |
47926 | + } | |
47927 | + } | |
47928 | + | |
47929 | + /* | |
47930 | + * Point hashtab to new memory region so we can try to | |
47931 | + * construct new table | |
47932 | + */ | |
47933 | + hashtab->table0 = new_table; | |
47934 | + hashtab->table1 = (cuckoo_hash_entry *) | |
47935 | + ((char *)new_table + hashtab->length * sizeof(cuckoo_hash_entry)); | |
47936 | + | |
47937 | + rehash_count = 0; | |
47938 | + | |
47939 | + again: | |
47940 | + /* Zero the new tables */ | |
47941 | + memset(new_table, 0, hashtab->length * 2 * sizeof(cuckoo_hash_entry)); | |
47942 | + | |
47943 | + /* Choose new parameters for the hash functions */ | |
47944 | + set_hash_parameters(hashtab); | |
47945 | + | |
47946 | + /* | |
47947 | + * Multiply old_table_length by 2 as the length refers to each | |
47948 | + * table, and there are two of them. This assumes that they | |
47949 | + * are arranged sequentially in memory, so assert it | |
47950 | + */ | |
47951 | + BUG_ON(((char *)old_hashtab.table1) != | |
47952 | + ((char *)old_hashtab.table0 + old_hashtab.length | |
47953 | + * sizeof(cuckoo_hash_entry))); | |
47954 | + rc = cuckoo_hash_transfer_entries(hashtab, old_hashtab.table0, | |
47955 | + old_hashtab.length * 2); | |
47956 | + if (rc < 0) { | |
47957 | + /* Problem */ | |
47958 | + if (rc == -ENOSPC) { | |
47959 | + ++rehash_count; | |
47960 | + if (rehash_count < CUCKOO_HASH_MAX_LOOP) { | |
47961 | + /* | |
47962 | + * Wanted to rehash, but rather than | |
47963 | + * recurse we can just do it here | |
47964 | + */ | |
47965 | + goto again; | |
47966 | + } else { | |
47967 | + /* | |
47968 | + * Didn't manage to rehash, so let's | |
47969 | + * go up a size (if we haven't already | |
47970 | + * and there's space) | |
47971 | + */ | |
47972 | + if (!resize && hashtab->length_bits < 32) { | |
47973 | + resize = 1; | |
47974 | + kfree(new_table); | |
47975 | + goto resize; | |
47976 | + } | |
47977 | + else | |
47978 | + goto err; | |
47979 | + } | |
47980 | + } | |
47981 | + else | |
47982 | + goto err; | |
47983 | + } | |
47984 | + | |
47985 | + /* Success, I think. Free up the old table */ | |
47986 | + kfree(old_hashtab.table0); | |
47987 | + | |
47988 | + /* We should have put all the entries from old table in the new one */ | |
47989 | + BUG_ON(hashtab->entries != old_hashtab.entries); | |
47990 | + | |
47991 | + return 0; | |
47992 | + err: | |
47993 | + EPRINTK("%s: Rehash failed, giving up\n", __FUNCTION__); | |
47994 | + /* Some other error, give up, at least restore table to how it was */ | |
47995 | + memcpy(hashtab, &old_hashtab, sizeof(cuckoo_hash_table)); | |
47996 | + if (new_table) | |
47997 | + kfree(new_table); | |
47998 | + return rc; | |
47999 | +} | |
48000 | +EXPORT_SYMBOL_GPL(cuckoo_hash_rehash); | |
48001 | + | |
48002 | + | |
48003 | +static int | |
48004 | +cuckoo_hash_insert_or_displace(cuckoo_hash_entry *table, unsigned hash, | |
48005 | + cuckoo_hash_key *key, | |
48006 | + cuckoo_hash_value value, | |
48007 | + cuckoo_hash_key *displaced_key, | |
48008 | + cuckoo_hash_value *displaced_value) | |
48009 | +{ | |
48010 | + if (table[hash].state == CUCKOO_HASH_STATE_VACANT) { | |
48011 | + cuckoo_hash_key_set(&(table[hash].key), key); | |
48012 | + table[hash].value = value; | |
48013 | + table[hash].state = CUCKOO_HASH_STATE_OCCUPIED; | |
48014 | + | |
48015 | + return 1; | |
48016 | + } else { | |
48017 | + cuckoo_hash_key_set(displaced_key, &(table[hash].key)); | |
48018 | + *displaced_value = table[hash].value; | |
48019 | + cuckoo_hash_key_set(&(table[hash].key), key); | |
48020 | + table[hash].value = value; | |
48021 | + | |
48022 | + return 0; | |
48023 | + } | |
48024 | +} | |
48025 | + | |
48026 | + | |
48027 | +int cuckoo_hash_add(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, | |
48028 | + cuckoo_hash_value value, int can_rehash) | |
48029 | +{ | |
48030 | + cuckoo_hash hash0, hash1; | |
48031 | + int i, rc; | |
48032 | + cuckoo_hash_key key1, key2; | |
48033 | + | |
48034 | + cuckoo_hash_key_set(&key1, key); | |
48035 | + | |
48036 | + again: | |
48037 | + i = 0; | |
48038 | + do { | |
48039 | + hash0 = cuckoo_compute_hash(hashtab, &key1, &hashtab->a0); | |
48040 | + if (cuckoo_hash_insert_or_displace(hashtab->table0, hash0, | |
48041 | + &key1, value, &key2, | |
48042 | + &value)) { | |
48043 | + /* Success */ | |
48044 | + hashtab->entries++; | |
48045 | + return 0; | |
48046 | + } | |
48047 | + | |
48048 | + hash1 = cuckoo_compute_hash(hashtab, &key2, &hashtab->a1); | |
48049 | + if (cuckoo_hash_insert_or_displace(hashtab->table1, hash1, | |
48050 | + &key2, value, &key1, | |
48051 | + &value)) { | |
48052 | + /* Success */ | |
48053 | + hashtab->entries++; | |
48054 | + return 0; | |
48055 | + } | |
48056 | + } while (++i < CUCKOO_HASH_MAX_LOOP); | |
48057 | + | |
48058 | + if (can_rehash) { | |
48059 | + if ((rc = cuckoo_hash_rehash(hashtab)) < 0) { | |
48060 | + /* | |
48061 | + * Give up - this will drop whichever | |
48062 | + * key/value pair we have currently displaced | |
48063 | + * on the floor | |
48064 | + */ | |
48065 | + return rc; | |
48066 | + } | |
48067 | + goto again; | |
48068 | + } | |
48069 | + | |
48070 | + EPRINTK("%s: failed hash add\n", __FUNCTION__); | |
48071 | + /* | |
48072 | + * Couldn't do it - bad as we've now removed some random thing | |
48073 | + * from the table, and will just drop it on the floor. Better | |
48074 | + * would be to somehow revert the table to the state it was in | |
48075 | + * at the start | |
48076 | + */ | |
48077 | + return -ENOSPC; | |
48078 | +} | |
48079 | +EXPORT_SYMBOL_GPL(cuckoo_hash_add); | |
48080 | + | |
48081 | + | |
48082 | +int cuckoo_hash_add_check(cuckoo_hash_table *hashtab, | |
48083 | + cuckoo_hash_key *key, cuckoo_hash_value value, | |
48084 | + int can_rehash) | |
48085 | +{ | |
48086 | + int stored_value; | |
48087 | + | |
48088 | + if (cuckoo_hash_lookup(hashtab, key, &stored_value)) | |
48089 | + return -EBUSY; | |
48090 | + | |
48091 | + return cuckoo_hash_add(hashtab, key, value, can_rehash); | |
48092 | +} | |
48093 | +EXPORT_SYMBOL_GPL(cuckoo_hash_add_check); | |
48094 | + | |
48095 | + | |
48096 | +int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key) | |
48097 | +{ | |
48098 | + cuckoo_hash hash; | |
48099 | + | |
48100 | + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0); | |
48101 | + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && | |
48102 | + cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key), | |
48103 | + key)) { | |
48104 | + hashtab->table0[hash].state = CUCKOO_HASH_STATE_VACANT; | |
48105 | + hashtab->entries--; | |
48106 | + return 0; | |
48107 | + } | |
48108 | + | |
48109 | + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1); | |
48110 | + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && | |
48111 | + cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key), | |
48112 | + key)) { | |
48113 | + hashtab->table1[hash].state = CUCKOO_HASH_STATE_VACANT; | |
48114 | + hashtab->entries--; | |
48115 | + return 0; | |
48116 | + } | |
48117 | + | |
48118 | + return -EINVAL; | |
48119 | +} | |
48120 | +EXPORT_SYMBOL_GPL(cuckoo_hash_remove); | |
48121 | + | |
48122 | + | |
48123 | +int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, | |
48124 | + cuckoo_hash_value value) | |
48125 | +{ | |
48126 | + cuckoo_hash hash; | |
48127 | + | |
48128 | + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0); | |
48129 | + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && | |
48130 | + cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key), | |
48131 | + key)) { | |
48132 | + hashtab->table0[hash].value = value; | |
48133 | + return 0; | |
48134 | + } | |
48135 | + | |
48136 | + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1); | |
48137 | + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && | |
48138 | + cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key), | |
48139 | + key)) { | |
48140 | + hashtab->table1[hash].value = value; | |
48141 | + return 0; | |
48142 | + } | |
48143 | + | |
48144 | + return -EINVAL; | |
48145 | +} | |
48146 | +EXPORT_SYMBOL_GPL(cuckoo_hash_update); | |
48147 | + | |
48148 | + | |
48149 | +void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab) | |
48150 | +{ | |
48151 | + hashtab->iterate_index = 0; | |
48152 | +} | |
48153 | +EXPORT_SYMBOL_GPL(cuckoo_hash_iterate_reset); | |
48154 | + | |
48155 | + | |
48156 | +int cuckoo_hash_iterate(cuckoo_hash_table *hashtab, | |
48157 | + cuckoo_hash_key *key, cuckoo_hash_value *value) | |
48158 | +{ | |
48159 | + unsigned index; | |
48160 | + | |
48161 | + while (hashtab->iterate_index < hashtab->length) { | |
48162 | + index = hashtab->iterate_index; | |
48163 | + ++hashtab->iterate_index; | |
48164 | + if (hashtab->table0[index].state == CUCKOO_HASH_STATE_OCCUPIED) { | |
48165 | + *key = hashtab->table0[index].key; | |
48166 | + *value = hashtab->table0[index].value; | |
48167 | + return 0; | |
48168 | + } | |
48169 | + } | |
48170 | + | |
48171 | + while (hashtab->iterate_index >= hashtab->length && | |
48172 | + hashtab->iterate_index < hashtab->length * 2) { | |
48173 | + index = hashtab->iterate_index - hashtab->length; | |
48174 | + ++hashtab->iterate_index; | |
48175 | + if (hashtab->table1[index].state == CUCKOO_HASH_STATE_OCCUPIED) { | |
48176 | + *key = hashtab->table1[index].key; | |
48177 | + *value = hashtab->table1[index].value; | |
48178 | + return 0; | |
48179 | + } | |
48180 | + } | |
48181 | + | |
48182 | + return -ENOSPC; | |
48183 | +} | |
48184 | +EXPORT_SYMBOL_GPL(cuckoo_hash_iterate); | |
48185 | + | |
48186 | + | |
48187 | +#if 0 | |
48188 | +void cuckoo_hash_valid(cuckoo_hash_table *hashtab) | |
48189 | +{ | |
48190 | + int i, entry_count = 0; | |
48191 | + | |
48192 | + for (i=0; i < hashtab->length; i++) { | |
48193 | + EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT && | |
48194 | + hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED); | |
48195 | + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48196 | + entry_count++; | |
48197 | + EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT && | |
48198 | + hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED); | |
48199 | + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48200 | + entry_count++; | |
48201 | + } | |
48202 | + | |
48203 | + if (entry_count != hashtab->entries) { | |
48204 | + EPRINTK("%s: bad count\n", __FUNCTION__); | |
48205 | + cuckoo_hash_dump(hashtab); | |
48206 | + return; | |
48207 | + } | |
48208 | + | |
48209 | + for (i=0; i< hashtab->length; i++) { | |
48210 | + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48211 | + if (i != cuckoo_compute_hash(hashtab, | |
48212 | + &hashtab->table0[i].key, | |
48213 | + &hashtab->a0)) { | |
48214 | + EPRINTK("%s: Bad key table 0 index %d\n", | |
48215 | + __FUNCTION__, i); | |
48216 | + cuckoo_hash_dump(hashtab); | |
48217 | + return; | |
48218 | + } | |
48219 | + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48220 | + if (i != cuckoo_compute_hash(hashtab, | |
48221 | + &hashtab->table1[i].key, | |
48222 | + &hashtab->a1)) { | |
48223 | + EPRINTK("%s: Bad key table 1 index %d\n", | |
48224 | + __FUNCTION__, i); | |
48225 | + cuckoo_hash_dump(hashtab); | |
48226 | + return; | |
48227 | + } | |
48228 | + } | |
48229 | + | |
48230 | +} | |
48231 | +EXPORT_SYMBOL_GPL(cuckoo_hash_valid); | |
48232 | + | |
48233 | + | |
48234 | +void cuckoo_hash_dump(cuckoo_hash_table *hashtab) | |
48235 | +{ | |
48236 | + int i, entry_count; | |
48237 | + | |
48238 | + entry_count = 0; | |
48239 | + for (i=0; i < hashtab->length; i++) { | |
48240 | + EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT && | |
48241 | + hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED); | |
48242 | + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48243 | + entry_count++; | |
48244 | + EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT && | |
48245 | + hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED); | |
48246 | + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48247 | + entry_count++; | |
48248 | + } | |
48249 | + | |
48250 | + EPRINTK("======================\n"); | |
48251 | + EPRINTK("Cuckoo hash table dump\n"); | |
48252 | + EPRINTK("======================\n"); | |
48253 | + EPRINTK("length: %d; length_bits: %d; key_length: %d\n", hashtab->length, | |
48254 | + hashtab->length_bits, hashtab->key_length); | |
48255 | + EPRINTK("Recorded entries: %d\n", hashtab->entries); | |
48256 | + EPRINTK("Counted entries: %d\n", entry_count); | |
48257 | + EPRINTK("a0: %llx; a1: %llx\n", hashtab->a0, hashtab->a1); | |
48258 | + EPRINTK("-----------------------------------------\n"); | |
48259 | + EPRINTK("Index Occupied Key Value Index0 Index1\n"); | |
48260 | + EPRINTK("-----------------------------------------\n"); | |
48261 | + for (i=0; i< hashtab->length; i++) { | |
48262 | + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48263 | + EPRINTK("%d %d %llx %d %d %d\n", i, | |
48264 | + hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED, | |
48265 | + hashtab->table0[i].key, hashtab->table0[i].value, | |
48266 | + cuckoo_compute_hash(hashtab, &hashtab->table0[i].key, | |
48267 | + &hashtab->a0), | |
48268 | + cuckoo_compute_hash(hashtab, &hashtab->table0[i].key, | |
48269 | + &hashtab->a1)); | |
48270 | + else | |
48271 | + EPRINTK("%d %d - - - -\n", i, | |
48272 | + hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED); | |
48273 | + | |
48274 | + } | |
48275 | + EPRINTK("-----------------------------------------\n"); | |
48276 | + EPRINTK("Index Occupied Key Value Index0 Index1\n"); | |
48277 | + EPRINTK("-----------------------------------------\n"); | |
48278 | + for (i=0; i< hashtab->length; i++) { | |
48279 | + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) | |
48280 | + EPRINTK("%d %d %llx %d %d %d\n", i, | |
48281 | + hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED, | |
48282 | + hashtab->table1[i].key, hashtab->table1[i].value, | |
48283 | + cuckoo_compute_hash(hashtab, &hashtab->table1[i].key, | |
48284 | + &hashtab->a0), | |
48285 | + cuckoo_compute_hash(hashtab, &hashtab->table1[i].key, | |
48286 | + &hashtab->a1)); | |
48287 | + else | |
48288 | + EPRINTK("%d %d - - - -\n", i, | |
48289 | + hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED); | |
48290 | + } | |
48291 | + EPRINTK("======================\n"); | |
48292 | +} | |
48293 | +EXPORT_SYMBOL_GPL(cuckoo_hash_dump); | |
48294 | +#endif | |
48295 | Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.h | |
48296 | =================================================================== | |
48297 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
48298 | +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.h 2008-02-20 09:32:49.000000000 +0100 | |
48299 | @@ -0,0 +1,227 @@ | |
48300 | +/**************************************************************************** | |
48301 | + * Solarflare driver for Xen network acceleration | |
48302 | + * | |
48303 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
48304 | + * 9501 Jeronimo Road, Suite 250, | |
48305 | + * Irvine, CA 92618, USA | |
48306 | + * | |
48307 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
48308 | + * | |
48309 | + * This program is free software; you can redistribute it and/or modify it | |
48310 | + * under the terms of the GNU General Public License version 2 as published | |
48311 | + * by the Free Software Foundation, incorporated herein by reference. | |
48312 | + * | |
48313 | + * This program is distributed in the hope that it will be useful, | |
48314 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
48315 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
48316 | + * GNU General Public License for more details. | |
48317 | + * | |
48318 | + * You should have received a copy of the GNU General Public License | |
48319 | + * along with this program; if not, write to the Free Software | |
48320 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
48321 | + **************************************************************************** | |
48322 | + */ | |
48323 | + | |
48324 | +/* | |
48325 | + * A cuckoo hash table consists of two sub tables. Each entry can | |
48326 | + * hash to a position in each table. If, on entry, its position is | |
48327 | + * found to be occupied, the existing element is moved to it's other | |
48328 | + * location. This recurses until success or a loop is found. If a | |
48329 | + * loop is found the table is rehashed. | |
48330 | + * | |
48331 | + * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf | |
48332 | + */ | |
48333 | + | |
48334 | +#ifndef NET_ACCEL_CUCKOO_HASH_H | |
48335 | +#define NET_ACCEL_CUCKOO_HASH_H | |
48336 | + | |
48337 | +/*! Type used for hash table keys of ip pairs */ | |
48338 | +typedef struct { | |
48339 | + u32 local_ip; | |
48340 | + //u32 remote_ip; | |
48341 | + u16 local_port; | |
48342 | + //u16 remote_port; | |
48343 | + /* Technically only 1 bit, but use 16 to make key a round | |
48344 | + number size */ | |
48345 | + u16 proto; | |
48346 | +} cuckoo_hash_ip_key; | |
48347 | + | |
48348 | +/*! Type used for hash table keys of mac addresses */ | |
48349 | +typedef u64 cuckoo_hash_mac_key; | |
48350 | + | |
48351 | +/*! This type is designed to be large enough to hold all supported key | |
48352 | + * sizes to avoid having to malloc storage for them. | |
48353 | + */ | |
48354 | +typedef u64 cuckoo_hash_key; | |
48355 | + | |
48356 | +/*! Type used for the values stored in the hash table */ | |
48357 | +typedef int cuckoo_hash_value; | |
48358 | + | |
48359 | +/*! Type used for the hash used to index the table */ | |
48360 | +typedef u32 cuckoo_hash; | |
48361 | + | |
48362 | +/*! How long to spend displacing values when adding before giving up | |
48363 | + * and rehashing */ | |
48364 | +#define CUCKOO_HASH_MAX_LOOP (hashtab->length) | |
48365 | + | |
48366 | +/*! State of hash table entry */ | |
48367 | +typedef enum { | |
48368 | + CUCKOO_HASH_STATE_VACANT = 0, | |
48369 | + CUCKOO_HASH_STATE_OCCUPIED | |
48370 | +} cuckoo_hash_state; | |
48371 | + | |
48372 | +/*! An entry in the hash table */ | |
48373 | +typedef struct { | |
48374 | + cuckoo_hash_state state; | |
48375 | + cuckoo_hash_key key; | |
48376 | + cuckoo_hash_value value; | |
48377 | +} cuckoo_hash_entry; | |
48378 | + | |
48379 | +/*! A cuckoo hash table */ | |
48380 | +typedef struct { | |
48381 | + /*! The length of each table (NB. there are two tables of this | |
48382 | + * length) */ | |
48383 | + unsigned length; | |
48384 | + /*! The length of each table in bits */ | |
48385 | + unsigned length_bits; | |
48386 | + /*! The length of the key in bytes */ | |
48387 | + unsigned key_length; | |
48388 | + /*! The number of entries currently stored in the table */ | |
48389 | + unsigned entries; | |
48390 | + /*! Index into table used by cuckoo_hash_iterate */ | |
48391 | + unsigned iterate_index; | |
48392 | + | |
48393 | + /* parameter of hash functions */ | |
48394 | + /*! The "a" parameter of the first hash function */ | |
48395 | + cuckoo_hash_key a0; | |
48396 | + /*! The "a" parameter of the second hash function */ | |
48397 | + cuckoo_hash_key a1; | |
48398 | + | |
48399 | + /*! The first table */ | |
48400 | + cuckoo_hash_entry *table0; | |
48401 | + /*! The second table */ | |
48402 | + cuckoo_hash_entry *table1; | |
48403 | +} cuckoo_hash_table; | |
48404 | + | |
48405 | +/*! Initialise the cuckoo has table | |
48406 | + * | |
48407 | + * \param hashtab A pointer to an unitialised hash table structure | |
48408 | + * \param length_bits The number of elements in each table equals | |
48409 | + * 2**length_bits | |
48410 | + * \param key_length The length of the key in bytes | |
48411 | + * | |
48412 | + * \return 0 on success, -ENOMEM if it couldn't allocate the tables | |
48413 | + */ | |
48414 | +extern | |
48415 | +int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits, | |
48416 | + unsigned key_length); | |
48417 | + | |
48418 | + | |
48419 | +/*! Destroy a hash table | |
48420 | + * | |
48421 | + * \param hashtab A hash table that has previously been passed to a | |
48422 | + * successful call of cuckoo_hash_init() | |
48423 | + */ | |
48424 | +extern | |
48425 | +void cuckoo_hash_destroy(cuckoo_hash_table *hashtab); | |
48426 | + | |
48427 | + | |
48428 | +/*! Lookup an entry in the hash table | |
48429 | + * | |
48430 | + * \param hashtab The hash table in which to look. | |
48431 | + * \param key Pointer to a mac address to use as the key | |
48432 | + * \param value On exit set to the value stored if key was present | |
48433 | + * | |
48434 | + * \return 0 if not present in the table, non-zero if it is (and value | |
48435 | + * is set accordingly) | |
48436 | + */ | |
48437 | +extern | |
48438 | +int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, | |
48439 | + cuckoo_hash_key *key, | |
48440 | + cuckoo_hash_value *value); | |
48441 | + | |
48442 | +/*! Add an entry to the hash table. Key must not be a duplicate of | |
48443 | + * anything already in the table. If this is a risk, see | |
48444 | + * cuckoo_hash_add_check | |
48445 | + * | |
48446 | + * \param hashtab The hash table to add the entry to | |
48447 | + * \param key Pointer to a mac address to use as a key | |
48448 | + * \param value The value to store | |
48449 | + * \param can_rehash Flag to allow the add function to rehash the | |
48450 | + * table if necessary | |
48451 | + * | |
48452 | + * \return 0 on success, non-zero on failure. -ENOSPC means it just | |
48453 | + * couldn't find anywhere to put it - this is bad and probably means | |
48454 | + * an entry has been dropped on the floor (but the entry you just | |
48455 | + * tried to add may now be included) | |
48456 | + */ | |
48457 | +extern | |
48458 | +int cuckoo_hash_add(cuckoo_hash_table *hashtab, | |
48459 | + cuckoo_hash_key *key, | |
48460 | + cuckoo_hash_value value, | |
48461 | + int can_rehash); | |
48462 | + | |
48463 | +/*! Same as cuckoo_hash_add but first checks to ensure entry is not | |
48464 | + * already there | |
48465 | + * \return -EBUSY if already there | |
48466 | + */ | |
48467 | + | |
48468 | +extern | |
48469 | +int cuckoo_hash_add_check(cuckoo_hash_table *hashtab, | |
48470 | + cuckoo_hash_key *key, | |
48471 | + cuckoo_hash_value value, | |
48472 | + int can_rehash); | |
48473 | +/*! Remove an entry from the table | |
48474 | + * | |
48475 | + * \param hashtab The hash table to remove the entry from | |
48476 | + * \param key The key that was used to previously add the entry | |
48477 | + * | |
48478 | + * \return 0 on success, -EINVAL if the entry couldn't be found | |
48479 | + */ | |
48480 | +extern | |
48481 | +int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key); | |
48482 | + | |
48483 | + | |
48484 | +/*! Helper for those using mac addresses to convert to a key for the | |
48485 | + * hash table | |
48486 | + */ | |
48487 | +static inline cuckoo_hash_mac_key cuckoo_mac_to_key(const u8 *mac) | |
48488 | +{ | |
48489 | + return (cuckoo_hash_mac_key)(mac[0]) | |
48490 | + | (cuckoo_hash_mac_key)(mac[1]) << 8 | |
48491 | + | (cuckoo_hash_mac_key)(mac[2]) << 16 | |
48492 | + | (cuckoo_hash_mac_key)(mac[3]) << 24 | |
48493 | + | (cuckoo_hash_mac_key)(mac[4]) << 32 | |
48494 | + | (cuckoo_hash_mac_key)(mac[5]) << 40; | |
48495 | +} | |
48496 | + | |
48497 | + | |
48498 | +/*! Update an entry already in the hash table to take a new value | |
48499 | + * | |
48500 | + * \param hashtab The hash table to add the entry to | |
48501 | + * \param key Pointer to a mac address to use as a key | |
48502 | + * \param value The value to store | |
48503 | + * | |
48504 | + * \return 0 on success, non-zero on failure. | |
48505 | + */ | |
48506 | +int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, | |
48507 | + cuckoo_hash_value value); | |
48508 | + | |
48509 | + | |
48510 | +/*! Go through the hash table and return all used entries (one per call) | |
48511 | + * | |
48512 | + * \param hashtab The hash table to iterate over | |
48513 | + * \param key Pointer to a key to take the returned key | |
48514 | + * \param value Pointer to a value to take the returned value | |
48515 | + * | |
48516 | + * \return 0 on success (key, value set), non-zero on failure. | |
48517 | + */ | |
48518 | +int cuckoo_hash_iterate(cuckoo_hash_table *hashtab, | |
48519 | + cuckoo_hash_key *key, cuckoo_hash_value *value); | |
48520 | +void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab); | |
48521 | + | |
48522 | +/* debug, not compiled by default */ | |
48523 | +void cuckoo_hash_valid(cuckoo_hash_table *hashtab); | |
48524 | +void cuckoo_hash_dump(cuckoo_hash_table *hashtab); | |
48525 | + | |
48526 | +#endif /* NET_ACCEL_CUCKOO_HASH_H */ | |
48527 | Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.c | |
48528 | =================================================================== | |
48529 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
48530 | +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.c 2008-02-20 09:32:49.000000000 +0100 | |
48531 | @@ -0,0 +1,301 @@ | |
48532 | +/**************************************************************************** | |
48533 | + * Solarflare driver for Xen network acceleration | |
48534 | + * | |
48535 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
48536 | + * 9501 Jeronimo Road, Suite 250, | |
48537 | + * Irvine, CA 92618, USA | |
48538 | + * | |
48539 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
48540 | + * | |
48541 | + * This program is free software; you can redistribute it and/or modify it | |
48542 | + * under the terms of the GNU General Public License version 2 as published | |
48543 | + * by the Free Software Foundation, incorporated herein by reference. | |
48544 | + * | |
48545 | + * This program is distributed in the hope that it will be useful, | |
48546 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
48547 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
48548 | + * GNU General Public License for more details. | |
48549 | + * | |
48550 | + * You should have received a copy of the GNU General Public License | |
48551 | + * along with this program; if not, write to the Free Software | |
48552 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
48553 | + **************************************************************************** | |
48554 | + */ | |
48555 | + | |
48556 | +#include <xen/evtchn.h> | |
48557 | + | |
48558 | +#include "accel_util.h" | |
48559 | +#include "accel_msg_iface.h" | |
48560 | + | |
48561 | +#define NET_ACCEL_MSG_Q_SIZE (1024) | |
48562 | +#define NET_ACCEL_MSG_Q_MASK (NET_ACCEL_MSG_Q_SIZE - 1) | |
48563 | + | |
48564 | +#ifdef NDEBUG | |
48565 | +#define NET_ACCEL_CHECK_MAGIC(_p, _errval) | |
48566 | +#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id) | |
48567 | +#else | |
48568 | +#define NET_ACCEL_CHECK_MAGIC(_p, _errval) \ | |
48569 | + if (_p->magic != NET_ACCEL_MSG_MAGIC) { \ | |
48570 | + printk(KERN_ERR "%s: passed invalid shared page %p!\n", \ | |
48571 | + __FUNCTION__, _p); \ | |
48572 | + return _errval; \ | |
48573 | + } | |
48574 | +#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id) \ | |
48575 | + printk(_t ": queue %d write %x read %x base %x limit %x\n", \ | |
48576 | + _id, _q->write, _q->read, _q->base, _q->limit); | |
48577 | +#endif | |
48578 | + | |
48579 | +/* | |
48580 | + * We've been passed at least 2 pages. 1 control page and 1 or more | |
48581 | + * data pages. | |
48582 | + */ | |
48583 | +int net_accel_msg_init_page(void *mem, int len, int up) | |
48584 | +{ | |
48585 | + struct net_accel_shared_page *shared_page = | |
48586 | + (struct net_accel_shared_page*)mem; | |
48587 | + | |
48588 | + if ((unsigned long)shared_page & NET_ACCEL_MSG_Q_MASK) | |
48589 | + return -EINVAL; | |
48590 | + | |
48591 | + shared_page->magic = NET_ACCEL_MSG_MAGIC; | |
48592 | + | |
48593 | + shared_page->aflags = 0; | |
48594 | + | |
48595 | + shared_page->net_dev_up = up; | |
48596 | + | |
48597 | + return 0; | |
48598 | +} | |
48599 | +EXPORT_SYMBOL_GPL(net_accel_msg_init_page); | |
48600 | + | |
48601 | + | |
48602 | +void net_accel_msg_init_queue(sh_msg_fifo2 *queue, | |
48603 | + struct net_accel_msg_queue *indices, | |
48604 | + struct net_accel_msg *base, int size) | |
48605 | +{ | |
48606 | + queue->fifo = base; | |
48607 | + spin_lock_init(&queue->lock); | |
48608 | + sh_fifo2_init(queue, size-1, &indices->read, &indices->write); | |
48609 | +} | |
48610 | +EXPORT_SYMBOL_GPL(net_accel_msg_init_queue); | |
48611 | + | |
48612 | + | |
48613 | +static inline int _net_accel_msg_send(struct net_accel_shared_page *sp, | |
48614 | + sh_msg_fifo2 *queue, | |
48615 | + struct net_accel_msg *msg, | |
48616 | + int is_reply) | |
48617 | +{ | |
48618 | + int rc = 0; | |
48619 | + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); | |
48620 | + rmb(); | |
48621 | + if (is_reply) { | |
48622 | + EPRINTK_ON(sh_fifo2_is_full(queue)); | |
48623 | + sh_fifo2_put(queue, *msg); | |
48624 | + } else { | |
48625 | + if (sh_fifo2_not_half_full(queue)) { | |
48626 | + sh_fifo2_put(queue, *msg); | |
48627 | + } else { | |
48628 | + rc = -ENOSPC; | |
48629 | + } | |
48630 | + } | |
48631 | + wmb(); | |
48632 | + return rc; | |
48633 | +} | |
48634 | + | |
48635 | +/* Notify after a batch of messages have been sent */ | |
48636 | +void net_accel_msg_notify(int irq) | |
48637 | +{ | |
48638 | + notify_remote_via_irq(irq); | |
48639 | +} | |
48640 | +EXPORT_SYMBOL_GPL(net_accel_msg_notify); | |
48641 | + | |
48642 | +/* | |
48643 | + * Send a message on the specified FIFO. Returns 0 on success, -errno | |
48644 | + * on failure. The message in msg is copied to the current slot of the | |
48645 | + * FIFO. | |
48646 | + */ | |
48647 | +int net_accel_msg_send(struct net_accel_shared_page *sp, sh_msg_fifo2 *q, | |
48648 | + struct net_accel_msg *msg) | |
48649 | +{ | |
48650 | + unsigned long flags; | |
48651 | + int rc; | |
48652 | + net_accel_msg_lock_queue(q, &flags); | |
48653 | + rc = _net_accel_msg_send(sp, q, msg, 0); | |
48654 | + net_accel_msg_unlock_queue(q, &flags); | |
48655 | + return rc; | |
48656 | +} | |
48657 | +EXPORT_SYMBOL_GPL(net_accel_msg_send); | |
48658 | + | |
48659 | + | |
48660 | +/* As net_accel_msg_send but also posts a notification to the far end. */ | |
48661 | +int net_accel_msg_send_notify(struct net_accel_shared_page *sp, int irq, | |
48662 | + sh_msg_fifo2 *q, struct net_accel_msg *msg) | |
48663 | +{ | |
48664 | + unsigned long flags; | |
48665 | + int rc; | |
48666 | + net_accel_msg_lock_queue(q, &flags); | |
48667 | + rc = _net_accel_msg_send(sp, q, msg, 0); | |
48668 | + net_accel_msg_unlock_queue(q, &flags); | |
48669 | + if (rc >= 0) | |
48670 | + notify_remote_via_irq(irq); | |
48671 | + return rc; | |
48672 | +} | |
48673 | +EXPORT_SYMBOL_GPL(net_accel_msg_send_notify); | |
48674 | + | |
48675 | + | |
48676 | +int net_accel_msg_reply(struct net_accel_shared_page *sp, sh_msg_fifo2 *q, | |
48677 | + struct net_accel_msg *msg) | |
48678 | +{ | |
48679 | + unsigned long flags; | |
48680 | + int rc; | |
48681 | + net_accel_msg_lock_queue(q, &flags); | |
48682 | + rc = _net_accel_msg_send(sp, q, msg, 1); | |
48683 | + net_accel_msg_unlock_queue(q, &flags); | |
48684 | + return rc; | |
48685 | +} | |
48686 | +EXPORT_SYMBOL_GPL(net_accel_msg_reply); | |
48687 | + | |
48688 | + | |
48689 | +/* As net_accel_msg_send but also posts a notification to the far end. */ | |
48690 | +int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, int irq, | |
48691 | + sh_msg_fifo2 *q, struct net_accel_msg *msg) | |
48692 | +{ | |
48693 | + unsigned long flags; | |
48694 | + int rc; | |
48695 | + net_accel_msg_lock_queue(q, &flags); | |
48696 | + rc = _net_accel_msg_send(sp, q, msg, 1); | |
48697 | + net_accel_msg_unlock_queue(q, &flags); | |
48698 | + if (rc >= 0) | |
48699 | + notify_remote_via_irq(irq); | |
48700 | + return rc; | |
48701 | +} | |
48702 | +EXPORT_SYMBOL_GPL(net_accel_msg_reply_notify); | |
48703 | + | |
48704 | + | |
48705 | +/* | |
48706 | + * Look at a received message, if any, so a decision can be made about | |
48707 | + * whether to read it now or not. Cookie is a bit of debug which is | |
48708 | + * set here and checked when passed to net_accel_msg_recv_next() | |
48709 | + */ | |
48710 | +int net_accel_msg_peek(struct net_accel_shared_page *sp, | |
48711 | + sh_msg_fifo2 *queue, | |
48712 | + struct net_accel_msg *msg, int *cookie) | |
48713 | +{ | |
48714 | + unsigned long flags; | |
48715 | + int rc = 0; | |
48716 | + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); | |
48717 | + net_accel_msg_lock_queue(queue, &flags); | |
48718 | + rmb(); | |
48719 | + if (sh_fifo2_is_empty(queue)) { | |
48720 | + rc = -ENOENT; | |
48721 | + } else { | |
48722 | + *msg = sh_fifo2_peek(queue); | |
48723 | + *cookie = *(queue->fifo_rd_i); | |
48724 | + } | |
48725 | + net_accel_msg_unlock_queue(queue, &flags); | |
48726 | + return rc; | |
48727 | +} | |
48728 | +EXPORT_SYMBOL_GPL(net_accel_msg_peek); | |
48729 | + | |
48730 | + | |
48731 | +/* | |
48732 | + * Move the queue onto the next element, used after finished with a | |
48733 | + * peeked msg | |
48734 | + */ | |
48735 | +int net_accel_msg_recv_next(struct net_accel_shared_page *sp, | |
48736 | + sh_msg_fifo2 *queue, int cookie) | |
48737 | +{ | |
48738 | + unsigned long flags; | |
48739 | + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); | |
48740 | + net_accel_msg_lock_queue(queue, &flags); | |
48741 | + rmb(); | |
48742 | + /* Mustn't be empty */ | |
48743 | + BUG_ON(sh_fifo2_is_empty(queue)); | |
48744 | + /* | |
48745 | + * Check cookie matches, i.e. we're advancing over the same message | |
48746 | + * as was got using peek | |
48747 | + */ | |
48748 | + BUG_ON(cookie != *(queue->fifo_rd_i)); | |
48749 | + sh_fifo2_rd_next(queue); | |
48750 | + wmb(); | |
48751 | + net_accel_msg_unlock_queue(queue, &flags); | |
48752 | + return 0; | |
48753 | +} | |
48754 | +EXPORT_SYMBOL_GPL(net_accel_msg_recv_next); | |
48755 | + | |
48756 | + | |
48757 | +/* | |
48758 | + * Receive a message on the specified FIFO. Returns 0 on success, | |
48759 | + * -errno on failure. | |
48760 | + */ | |
48761 | +int net_accel_msg_recv(struct net_accel_shared_page *sp, sh_msg_fifo2 *queue, | |
48762 | + struct net_accel_msg *msg) | |
48763 | +{ | |
48764 | + unsigned long flags; | |
48765 | + int rc = 0; | |
48766 | + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); | |
48767 | + net_accel_msg_lock_queue(queue, &flags); | |
48768 | + rmb(); | |
48769 | + if (sh_fifo2_is_empty(queue)) { | |
48770 | + rc = -ENOENT; | |
48771 | + } else { | |
48772 | + sh_fifo2_get(queue, msg); | |
48773 | + } | |
48774 | + wmb(); | |
48775 | + net_accel_msg_unlock_queue(queue, &flags); | |
48776 | + return rc; | |
48777 | +} | |
48778 | +EXPORT_SYMBOL_GPL(net_accel_msg_recv); | |
48779 | + | |
48780 | + | |
48781 | +/* | |
48782 | + * Start sending a message without copying. returns a pointer to a message | |
48783 | + * that will be filled out in place. The queue is locked until the message | |
48784 | + * is sent. | |
48785 | + */ | |
48786 | +struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp, | |
48787 | + sh_msg_fifo2 *queue, unsigned long *flags) | |
48788 | +{ | |
48789 | + struct net_accel_msg *msg; | |
48790 | + NET_ACCEL_CHECK_MAGIC(sp, NULL); | |
48791 | + net_accel_msg_lock_queue(queue, flags); | |
48792 | + rmb(); | |
48793 | + if (sh_fifo2_not_half_full(queue)) { | |
48794 | + msg = sh_fifo2_pokep(queue); | |
48795 | + } else { | |
48796 | + net_accel_msg_unlock_queue(queue, flags); | |
48797 | + msg = NULL; | |
48798 | + } | |
48799 | + return msg; | |
48800 | +} | |
48801 | +EXPORT_SYMBOL_GPL(net_accel_msg_start_send); | |
48802 | + | |
48803 | + | |
48804 | +static inline void _msg_complete(struct net_accel_shared_page *sp, | |
48805 | + sh_msg_fifo2 *queue, | |
48806 | + unsigned long *flags) | |
48807 | +{ | |
48808 | + sh_fifo2_wr_next(queue); | |
48809 | + net_accel_msg_unlock_queue(queue, flags); | |
48810 | +} | |
48811 | + | |
48812 | +/* | |
48813 | + * Complete the sending of a message started with net_accel_msg_start_send. The | |
48814 | + * message is implicit since the queue was locked by _start | |
48815 | + */ | |
48816 | +void net_accel_msg_complete_send(struct net_accel_shared_page *sp, | |
48817 | + sh_msg_fifo2 *queue, | |
48818 | + unsigned long *flags) | |
48819 | +{ | |
48820 | + _msg_complete(sp, queue, flags); | |
48821 | +} | |
48822 | +EXPORT_SYMBOL_GPL(net_accel_msg_complete_send); | |
48823 | + | |
48824 | +/* As net_accel_msg_complete_send but does the notify. */ | |
48825 | +void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp, | |
48826 | + sh_msg_fifo2 *queue, | |
48827 | + unsigned long *flags, int irq) | |
48828 | +{ | |
48829 | + _msg_complete(sp, queue, flags); | |
48830 | + notify_remote_via_irq(irq); | |
48831 | +} | |
48832 | +EXPORT_SYMBOL_GPL(net_accel_msg_complete_send_notify); | |
48833 | Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.h | |
48834 | =================================================================== | |
48835 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
48836 | +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_msg_iface.h 2008-02-20 09:32:49.000000000 +0100 | |
48837 | @@ -0,0 +1,414 @@ | |
48838 | +/**************************************************************************** | |
48839 | + * Solarflare driver for Xen network acceleration | |
48840 | + * | |
48841 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
48842 | + * 9501 Jeronimo Road, Suite 250, | |
48843 | + * Irvine, CA 92618, USA | |
48844 | + * | |
48845 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
48846 | + * | |
48847 | + * This program is free software; you can redistribute it and/or modify it | |
48848 | + * under the terms of the GNU General Public License version 2 as published | |
48849 | + * by the Free Software Foundation, incorporated herein by reference. | |
48850 | + * | |
48851 | + * This program is distributed in the hope that it will be useful, | |
48852 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
48853 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
48854 | + * GNU General Public License for more details. | |
48855 | + * | |
48856 | + * You should have received a copy of the GNU General Public License | |
48857 | + * along with this program; if not, write to the Free Software | |
48858 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
48859 | + **************************************************************************** | |
48860 | + */ | |
48861 | + | |
48862 | +#ifndef NET_ACCEL_MSG_IFACE_H | |
48863 | +#define NET_ACCEL_MSG_IFACE_H | |
48864 | + | |
48865 | +#include <linux/ip.h> | |
48866 | +#include <linux/tcp.h> | |
48867 | +#include <linux/udp.h> | |
48868 | +#include <linux/in.h> | |
48869 | +#include <linux/netdevice.h> | |
48870 | +#include <linux/etherdevice.h> | |
48871 | + | |
48872 | +#include "accel_shared_fifo.h" | |
48873 | + | |
48874 | +#define NET_ACCEL_MSG_MAGIC (0x85465479) | |
48875 | + | |
48876 | +/*! We talk version 0.010 of the interdomain protocol */ | |
48877 | +#define NET_ACCEL_MSG_VERSION (0x00001000) | |
48878 | + | |
48879 | +/*! Shared memory portion of inter-domain FIFO */ | |
48880 | +struct net_accel_msg_queue { | |
48881 | + u32 read; | |
48882 | + u32 write; | |
48883 | +}; | |
48884 | + | |
48885 | + | |
48886 | +/* | |
48887 | + * The aflags in the following structure is used as follows: | |
48888 | + * | |
48889 | + * - each bit is set when one of the corresponding variables is | |
48890 | + * changed by either end. | |
48891 | + * | |
48892 | + * - the end that has made the change then forwards an IRQ to the | |
48893 | + * other | |
48894 | + * | |
48895 | + * - the IRQ handler deals with these bits either on the fast path, or | |
48896 | + * for less common changes, by jumping onto the slow path. | |
48897 | + * | |
48898 | + * - once it has seen a change, it clears the relevant bit. | |
48899 | + * | |
48900 | + * aflags is accessed atomically using clear_bit, test_bit, | |
48901 | + * test_and_set_bit etc | |
48902 | + */ | |
48903 | + | |
48904 | +/* | |
48905 | + * The following used to signify to the other domain when the queue | |
48906 | + * they want to use is full, and when it is no longer full. Could be | |
48907 | + * compressed to use fewer bits but done this way for simplicity and | |
48908 | + * clarity | |
48909 | + */ | |
48910 | + | |
48911 | +/* "dom0->domU queue" is full */ | |
48912 | +#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL 0x1 | |
48913 | +#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B 0 | |
48914 | +/* "dom0->domU queue" is not full */ | |
48915 | +#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL 0x2 | |
48916 | +#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B 1 | |
48917 | +/* "domU->dom0 queue" is full */ | |
48918 | +#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL 0x4 | |
48919 | +#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B 2 | |
48920 | +/* "domU->dom0 queue" is not full */ | |
48921 | +#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL 0x8 | |
48922 | +#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B 3 | |
48923 | +/* dom0 -> domU net_dev up/down events */ | |
48924 | +#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN 0x10 | |
48925 | +#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B 4 | |
48926 | + | |
48927 | +/* | |
48928 | + * Masks used to test if there are any messages for domU and dom0 | |
48929 | + * respectively | |
48930 | + */ | |
48931 | +#define NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK \ | |
48932 | + (NET_ACCEL_MSG_AFLAGS_QUEUE0FULL | \ | |
48933 | + NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL | \ | |
48934 | + NET_ACCEL_MSG_AFLAGS_NETUPDOWN) | |
48935 | +#define NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK \ | |
48936 | + (NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL | \ | |
48937 | + NET_ACCEL_MSG_AFLAGS_QUEUEUFULL) | |
48938 | + | |
48939 | +/*! The shared data structure used for inter-VM communication. */ | |
48940 | +struct net_accel_shared_page { | |
48941 | + /*! Sanity check */ | |
48942 | + u32 magic; | |
48943 | + /*! Used by host/Dom0 */ | |
48944 | + struct net_accel_msg_queue queue0; | |
48945 | + /*! Used by guest/DomU */ | |
48946 | + struct net_accel_msg_queue queue1; | |
48947 | + /*! Atomic flags, used to communicate simple state changes */ | |
48948 | + u32 aflags; | |
48949 | + /*! State of net_dev used for acceleration */ | |
48950 | + u32 net_dev_up; | |
48951 | +}; | |
48952 | + | |
48953 | + | |
48954 | +enum net_accel_hw_type { | |
48955 | + /*! Not a virtualisable NIC: use slow path. */ | |
48956 | + NET_ACCEL_MSG_HWTYPE_NONE = 0, | |
48957 | + /*! NIC is Falcon-based */ | |
48958 | + NET_ACCEL_MSG_HWTYPE_FALCON_A = 1, | |
48959 | + NET_ACCEL_MSG_HWTYPE_FALCON_B = 2, | |
48960 | +}; | |
48961 | + | |
48962 | +/*! The maximum number of pages used by an event queue. */ | |
48963 | +#define EF_HW_FALCON_EVQ_PAGES 8 | |
48964 | + | |
48965 | +struct net_accel_hw_falcon_b { | |
48966 | + /* VI */ | |
48967 | + /*! Grant for Tx DMA Q */ | |
48968 | + u32 txdmaq_gnt; | |
48969 | + /*! Grant for Rx DMA Q */ | |
48970 | + u32 rxdmaq_gnt; | |
48971 | + /*! Machine frame number for Tx/Rx doorbell page */ | |
48972 | + u32 doorbell_mfn; | |
48973 | + /*! Grant for Tx/Rx doorbell page */ | |
48974 | + u32 doorbell_gnt; | |
48975 | + | |
48976 | + /* Event Q */ | |
48977 | + /*! Grants for the pages of the EVQ */ | |
48978 | + u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES]; | |
48979 | + u32 evq_offs; | |
48980 | + /*! log2(pages in event Q) */ | |
48981 | + u32 evq_order; | |
48982 | + /*! Capacity in events */ | |
48983 | + u32 evq_capacity; | |
48984 | + /*! Eventq pointer register physical address */ | |
48985 | + u32 evq_rptr; | |
48986 | + /*! Interface instance */ | |
48987 | + u32 instance; | |
48988 | + /*! Capacity of RX queue */ | |
48989 | + u32 rx_capacity; | |
48990 | + /*! Capacity of TX queue */ | |
48991 | + u32 tx_capacity; | |
48992 | + | |
48993 | + /* NIC */ | |
48994 | + s32 nic_arch; | |
48995 | + s32 nic_revision; | |
48996 | + u8 nic_variant; | |
48997 | +}; | |
48998 | + | |
48999 | +struct net_accel_hw_falcon_a { | |
49000 | + struct net_accel_hw_falcon_b common; | |
49001 | + u32 evq_rptr_gnt; | |
49002 | +}; | |
49003 | + | |
49004 | + | |
49005 | +/*! Description of the hardware that the DomU is being given. */ | |
49006 | +struct net_accel_msg_hw { | |
49007 | + u32 type; /*!< Hardware type */ | |
49008 | + union { | |
49009 | + struct net_accel_hw_falcon_a falcon_a; | |
49010 | + struct net_accel_hw_falcon_b falcon_b; | |
49011 | + } resources; | |
49012 | +}; | |
49013 | + | |
49014 | +/*! Start-of-day handshake message. Dom0 fills in its version and | |
49015 | + * sends, DomU checks, inserts its version and replies | |
49016 | + */ | |
49017 | +struct net_accel_msg_hello { | |
49018 | + /*! Sender's version (set by each side in turn) */ | |
49019 | + u32 version; | |
49020 | + /*! max pages allocated/allowed for buffers */ | |
49021 | + u32 max_pages; | |
49022 | +}; | |
49023 | + | |
49024 | +/*! Maximum number of page requests that can fit in a message. */ | |
49025 | +#define NET_ACCEL_MSG_MAX_PAGE_REQ (8) | |
49026 | + | |
49027 | +/*! Request for NIC buffers. DomU fils out pages and grants (and | |
49028 | + * optionally) reqid, dom0 fills out buf and sends reply | |
49029 | + */ | |
49030 | +struct net_accel_msg_map_buffers { | |
49031 | + u32 reqid; /*!< Optional request ID */ | |
49032 | + u32 pages; /*!< Number of pages to map */ | |
49033 | + u32 grants[NET_ACCEL_MSG_MAX_PAGE_REQ]; /*!< Grant ids to map */ | |
49034 | + u32 buf; /*!< NIC buffer address of pages obtained */ | |
49035 | +}; | |
49036 | + | |
49037 | +/*! Notification of a change to local mac address, used to filter | |
49038 | + locally destined packets off the fast path */ | |
49039 | +struct net_accel_msg_localmac { | |
49040 | + u32 flags; /*!< Should this be added or removed? */ | |
49041 | + u8 mac[ETH_ALEN]; /*!< The mac address to filter onto slow path */ | |
49042 | +}; | |
49043 | + | |
49044 | +struct net_accel_msg_fastpath { | |
49045 | + u32 flags; /*!< Should this be added or removed? */ | |
49046 | + u8 mac[ETH_ALEN];/*!< The mac address to filter onto fast path */ | |
49047 | + u16 port; /*!< The port of the connection */ | |
49048 | + u32 ip; /*!< The IP address of the connection */ | |
49049 | + u8 proto; /*!< The protocol of connection (TCP/UDP) */ | |
49050 | +}; | |
49051 | + | |
49052 | +/*! Values for struct ef_msg_localmac/fastpath.flags */ | |
49053 | +#define NET_ACCEL_MSG_ADD 0x1 | |
49054 | +#define NET_ACCEL_MSG_REMOVE 0x2 | |
49055 | + | |
49056 | +/*! Overall message structure */ | |
49057 | +struct net_accel_msg { | |
49058 | + /*! ID specifying type of messge */ | |
49059 | + u32 id; | |
49060 | + union { | |
49061 | + /*! handshake */ | |
49062 | + struct net_accel_msg_hello hello; | |
49063 | + /*! hardware description */ | |
49064 | + struct net_accel_msg_hw hw; | |
49065 | + /*! buffer map request */ | |
49066 | + struct net_accel_msg_map_buffers mapbufs; | |
49067 | + /*! mac address of a local interface */ | |
49068 | + struct net_accel_msg_localmac localmac; | |
49069 | + /*! address of a new fastpath connection */ | |
49070 | + struct net_accel_msg_fastpath fastpath; | |
49071 | + /*! make the message a fixed size */ | |
49072 | + u8 pad[128 - sizeof(u32)]; | |
49073 | + } u; | |
49074 | +}; | |
49075 | + | |
49076 | + | |
49077 | +#define NET_ACCEL_MSG_HW_TO_MSG(_u) container_of(_u, struct net_accel_msg, u.hw) | |
49078 | + | |
49079 | +/*! Inter-domain message FIFO */ | |
49080 | +typedef struct { | |
49081 | + struct net_accel_msg *fifo; | |
49082 | + u32 fifo_mask; | |
49083 | + u32 *fifo_rd_i; | |
49084 | + u32 *fifo_wr_i; | |
49085 | + spinlock_t lock; | |
49086 | + u32 is_locked; /* Debug flag */ | |
49087 | +} sh_msg_fifo2; | |
49088 | + | |
49089 | + | |
49090 | +#define NET_ACCEL_MSG_OFFSET_MASK PAGE_MASK | |
49091 | + | |
49092 | +/* Modifiers */ | |
49093 | +#define NET_ACCEL_MSG_REPLY (0x80000000) | |
49094 | +#define NET_ACCEL_MSG_ERROR (0x40000000) | |
49095 | + | |
49096 | +/* Dom0 -> DomU and reply. Handshake/version check. */ | |
49097 | +#define NET_ACCEL_MSG_HELLO (0x00000001) | |
49098 | +/* Dom0 -> DomU : hardware setup (VI info.) */ | |
49099 | +#define NET_ACCEL_MSG_SETHW (0x00000002) | |
49100 | +/* | |
49101 | + * Dom0 -> DomU. Notification of a local mac to add/remove from slow | |
49102 | + * path filter | |
49103 | + */ | |
49104 | +#define NET_ACCEL_MSG_LOCALMAC (0x00000003) | |
49105 | +/* | |
49106 | + * DomU -> Dom0 and reply. Request for buffer table entries for | |
49107 | + * preallocated pages. | |
49108 | + */ | |
49109 | +#define NET_ACCEL_MSG_MAPBUF (0x00000004) | |
49110 | +/* | |
49111 | + * Dom0 -> DomU. Notification of a local mac to add/remove from fast | |
49112 | + * path filter | |
49113 | + */ | |
49114 | +#define NET_ACCEL_MSG_FASTPATH (0x00000005) | |
49115 | + | |
49116 | +/*! Initialise a message and set the type | |
49117 | + * \param message : the message | |
49118 | + * \param code : the message type | |
49119 | + */ | |
49120 | +static inline void net_accel_msg_init(struct net_accel_msg *msg, int code) { | |
49121 | + msg->id = (u32)code; | |
49122 | +} | |
49123 | + | |
49124 | +/*! initialise a shared page structure | |
49125 | + * \param shared_page : mapped memory in which the structure resides | |
49126 | + * \param len : size of the message FIFO area that follows | |
49127 | + * \param up : initial up/down state of netdev | |
49128 | + * \return 0 or an error code | |
49129 | + */ | |
49130 | +extern int net_accel_msg_init_page(void *shared_page, int len, int up); | |
49131 | + | |
49132 | +/*! initialise a message queue | |
49133 | + * \param queue : the message FIFO to initialise | |
49134 | + * \param indices : the read and write indices in shared memory | |
49135 | + * \param base : the start of the memory area for the FIFO | |
49136 | + * \param size : the size of the FIFO in bytes | |
49137 | + */ | |
49138 | +extern void net_accel_msg_init_queue(sh_msg_fifo2 *queue, | |
49139 | + struct net_accel_msg_queue *indices, | |
49140 | + struct net_accel_msg *base, int size); | |
49141 | + | |
49142 | +/* Notify after a batch of messages have been sent */ | |
49143 | +extern void net_accel_msg_notify(int irq); | |
49144 | + | |
49145 | +/*! Send a message on the specified FIFO. The message is copied to the | |
49146 | + * current slot of the FIFO. | |
49147 | + * \param sp : pointer to shared page | |
49148 | + * \param q : pointer to message FIFO to use | |
49149 | + * \param msg : pointer to message | |
49150 | + * \return 0 on success, -errno on | |
49151 | + */ | |
49152 | +extern int net_accel_msg_send(struct net_accel_shared_page *sp, | |
49153 | + sh_msg_fifo2 *q, | |
49154 | + struct net_accel_msg *msg); | |
49155 | +extern int net_accel_msg_reply(struct net_accel_shared_page *sp, | |
49156 | + sh_msg_fifo2 *q, | |
49157 | + struct net_accel_msg *msg); | |
49158 | + | |
49159 | +/*! As net_accel_msg_send but also posts a notification to the far end. */ | |
49160 | +extern int net_accel_msg_send_notify(struct net_accel_shared_page *sp, | |
49161 | + int irq, sh_msg_fifo2 *q, | |
49162 | + struct net_accel_msg *msg); | |
49163 | +/*! As net_accel_msg_send but also posts a notification to the far end. */ | |
49164 | +extern int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, | |
49165 | + int irq, sh_msg_fifo2 *q, | |
49166 | + struct net_accel_msg *msg); | |
49167 | + | |
49168 | +/*! Receive a message on the specified FIFO. Returns 0 on success, | |
49169 | + * -errno on failure. | |
49170 | + */ | |
49171 | +extern int net_accel_msg_recv(struct net_accel_shared_page *sp, | |
49172 | + sh_msg_fifo2 *q, | |
49173 | + struct net_accel_msg *msg); | |
49174 | + | |
49175 | +/*! Look at a received message, if any, so a decision can be made | |
49176 | + * about whether to read it now or not. Cookie is a bit of debug | |
49177 | + * which is set here and checked when passed to | |
49178 | + * net_accel_msg_recv_next() | |
49179 | + */ | |
49180 | +extern int net_accel_msg_peek(struct net_accel_shared_page *sp, | |
49181 | + sh_msg_fifo2 *queue, | |
49182 | + struct net_accel_msg *msg, int *cookie); | |
49183 | +/*! Move the queue onto the next element, used after finished with a | |
49184 | + * peeked msg | |
49185 | + */ | |
49186 | +extern int net_accel_msg_recv_next(struct net_accel_shared_page *sp, | |
49187 | + sh_msg_fifo2 *queue, int cookie); | |
49188 | + | |
49189 | +/*! Start sending a message without copying. returns a pointer to a | |
49190 | + * message that will be filled out in place. The queue is locked | |
49191 | + * until the message is sent. | |
49192 | + */ | |
49193 | +extern | |
49194 | +struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp, | |
49195 | + sh_msg_fifo2 *queue, | |
49196 | + unsigned long *flags); | |
49197 | + | |
49198 | + | |
49199 | +/*! Complete the sending of a message started with | |
49200 | + * net_accel_msg_start_send. The message is implicit since the queue | |
49201 | + * was locked by _start | |
49202 | + */ | |
49203 | +extern void net_accel_msg_complete_send(struct net_accel_shared_page *sp, | |
49204 | + sh_msg_fifo2 *queue, | |
49205 | + unsigned long *flags); | |
49206 | + | |
49207 | +/*! As net_accel_msg_complete_send but does the notify. */ | |
49208 | +extern void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp, | |
49209 | + sh_msg_fifo2 *queue, | |
49210 | + unsigned long *flags, int irq); | |
49211 | + | |
49212 | +/*! Lock the queue so that multiple "_locked" functions can be called | |
49213 | + * without the queue being modified by others | |
49214 | + */ | |
49215 | +static inline | |
49216 | +void net_accel_msg_lock_queue(sh_msg_fifo2 *queue, unsigned long *flags) | |
49217 | +{ | |
49218 | + spin_lock_irqsave(&queue->lock, (*flags)); | |
49219 | + rmb(); | |
49220 | + BUG_ON(queue->is_locked); | |
49221 | + queue->is_locked = 1; | |
49222 | +} | |
49223 | + | |
49224 | +/*! Unlock the queue */ | |
49225 | +static inline | |
49226 | +void net_accel_msg_unlock_queue(sh_msg_fifo2 *queue, unsigned long *flags) | |
49227 | +{ | |
49228 | + BUG_ON(!queue->is_locked); | |
49229 | + queue->is_locked = 0; | |
49230 | + wmb(); | |
49231 | + spin_unlock_irqrestore(&queue->lock, (*flags)); | |
49232 | +} | |
49233 | + | |
49234 | +/*! Give up without sending a message that was started with | |
49235 | + * net_accel_msg_start_send() | |
49236 | + */ | |
49237 | +static inline | |
49238 | +void net_accel_msg_abort_send(struct net_accel_shared_page *sp, | |
49239 | + sh_msg_fifo2 *queue, unsigned long *flags) | |
49240 | +{ | |
49241 | + net_accel_msg_unlock_queue(queue, flags); | |
49242 | +} | |
49243 | + | |
49244 | +/*! Test the queue to ensure there is sufficient space */ | |
49245 | +static inline | |
49246 | +int net_accel_msg_check_space(sh_msg_fifo2 *queue, unsigned space) | |
49247 | +{ | |
49248 | + return sh_fifo2_space(queue) >= space; | |
49249 | +} | |
49250 | + | |
49251 | +#endif /* NET_ACCEL_MSG_IFACE_H */ | |
49252 | Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_shared_fifo.h | |
49253 | =================================================================== | |
49254 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
49255 | +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_shared_fifo.h 2008-02-20 09:32:49.000000000 +0100 | |
49256 | @@ -0,0 +1,127 @@ | |
49257 | +/**************************************************************************** | |
49258 | + * Solarflare driver for Xen network acceleration | |
49259 | + * | |
49260 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
49261 | + * 9501 Jeronimo Road, Suite 250, | |
49262 | + * Irvine, CA 92618, USA | |
49263 | + * | |
49264 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
49265 | + * | |
49266 | + * This program is free software; you can redistribute it and/or modify it | |
49267 | + * under the terms of the GNU General Public License version 2 as published | |
49268 | + * by the Free Software Foundation, incorporated herein by reference. | |
49269 | + * | |
49270 | + * This program is distributed in the hope that it will be useful, | |
49271 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
49272 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
49273 | + * GNU General Public License for more details. | |
49274 | + * | |
49275 | + * You should have received a copy of the GNU General Public License | |
49276 | + * along with this program; if not, write to the Free Software | |
49277 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
49278 | + **************************************************************************** | |
49279 | + */ | |
49280 | + | |
49281 | +#ifndef NET_ACCEL_SHARED_FIFO_H | |
49282 | +#define NET_ACCEL_SHARED_FIFO_H | |
49283 | + | |
49284 | +/* | |
49285 | + * This is based on fifo.h, but handles sharing between address spaces | |
49286 | + * that don't trust each other, by splitting out the read and write | |
49287 | + * indices. This costs at least one pointer indirection more than the | |
49288 | + * vanilla version per access. | |
49289 | + */ | |
49290 | + | |
49291 | +typedef struct { | |
49292 | + char* fifo; | |
49293 | + unsigned fifo_mask; | |
49294 | + unsigned *fifo_rd_i; | |
49295 | + unsigned *fifo_wr_i; | |
49296 | +} sh_byte_fifo2; | |
49297 | + | |
49298 | +#define SH_FIFO2_M(f, x) ((x) & ((f)->fifo_mask)) | |
49299 | + | |
49300 | +static inline unsigned log2_ge(unsigned long n, unsigned min_order) { | |
49301 | + unsigned order = min_order; | |
49302 | + while((1ul << order) < n) ++order; | |
49303 | + return order; | |
49304 | +} | |
49305 | + | |
49306 | +static inline unsigned long pow2(unsigned order) { | |
49307 | + return (1ul << order); | |
49308 | +} | |
49309 | + | |
49310 | +#define is_pow2(x) (pow2(log2_ge((x), 0)) == (x)) | |
49311 | + | |
49312 | +#define sh_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \ | |
49313 | + is_pow2((f)->fifo_mask+1u)) | |
49314 | + | |
49315 | +#define sh_fifo2_init(f, cap, _rptr, _wptr) \ | |
49316 | + do { \ | |
49317 | + BUG_ON(!is_pow2((cap) + 1)); \ | |
49318 | + (f)->fifo_rd_i = _rptr; \ | |
49319 | + (f)->fifo_wr_i = _wptr; \ | |
49320 | + *(f)->fifo_rd_i = *(f)->fifo_wr_i = 0u; \ | |
49321 | + (f)->fifo_mask = (cap); \ | |
49322 | + } while(0) | |
49323 | + | |
49324 | +#define sh_fifo2_num(f) SH_FIFO2_M((f),*(f)->fifo_wr_i - *(f)->fifo_rd_i) | |
49325 | +#define sh_fifo2_space(f) SH_FIFO2_M((f),*(f)->fifo_rd_i - *(f)->fifo_wr_i-1u) | |
49326 | +#define sh_fifo2_is_empty(f) (sh_fifo2_num(f)==0) | |
49327 | +#define sh_fifo2_not_empty(f) (sh_fifo2_num(f)!=0) | |
49328 | +#define sh_fifo2_is_full(f) (sh_fifo2_space(f)==0u) | |
49329 | +#define sh_fifo2_not_full(f) (sh_fifo2_space(f)!=0u) | |
49330 | +#define sh_fifo2_buf_size(f) ((f)->fifo_mask + 1u) | |
49331 | +#define sh_fifo2_capacity(f) ((f)->fifo_mask) | |
49332 | +#define sh_fifo2_end(f) ((f)->fifo + sh_fifo2_buf_size(f)) | |
49333 | +#define sh_fifo2_not_half_full(f) (sh_fifo2_space(f) > (sh_fifo2_capacity(f) >> 1)) | |
49334 | + | |
49335 | +#define sh_fifo2_peek(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i)]) | |
49336 | +#define sh_fifo2_peekp(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_rd_i)) | |
49337 | +#define sh_fifo2_poke(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i)]) | |
49338 | +#define sh_fifo2_pokep(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_wr_i)) | |
49339 | +#define sh_fifo2_peek_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i+(i))]) | |
49340 | +#define sh_fifo2_poke_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i+(i))]) | |
49341 | + | |
49342 | +#define sh_fifo2_rd_next(f) \ | |
49343 | + do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + 1u;} while(0) | |
49344 | +#define sh_fifo2_wr_next(f) \ | |
49345 | + do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + 1u;} while(0) | |
49346 | +#define sh_fifo2_rd_adv(f, n) \ | |
49347 | + do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + (n);} while(0) | |
49348 | +#define sh_fifo2_wr_adv(f, n) \ | |
49349 | + do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + (n);} while(0) | |
49350 | + | |
49351 | +#define sh_fifo2_put(f, v) \ | |
49352 | + do {sh_fifo2_poke(f) = (v); wmb(); sh_fifo2_wr_next(f);} while(0) | |
49353 | + | |
49354 | +#define sh_fifo2_get(f, pv) \ | |
49355 | + do {*(pv) = sh_fifo2_peek(f); mb(); sh_fifo2_rd_next(f);} while(0) | |
49356 | + | |
49357 | +static inline unsigned sh_fifo2_contig_num(sh_byte_fifo2 *f) | |
49358 | +{ | |
49359 | + unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i); | |
49360 | + unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i); | |
49361 | + | |
49362 | + return (fifo_wr_i >= fifo_rd_i) | |
49363 | + ? fifo_wr_i - fifo_rd_i | |
49364 | + : f->fifo_mask + 1u - *(f)->fifo_rd_i; | |
49365 | +} | |
49366 | + | |
49367 | +static inline unsigned sh_fifo2_contig_space(sh_byte_fifo2 *f) | |
49368 | +{ | |
49369 | + unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i); | |
49370 | + unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i); | |
49371 | + | |
49372 | + return (fifo_rd_i > fifo_wr_i) | |
49373 | + ? fifo_rd_i - fifo_wr_i - 1 | |
49374 | + : (f->fifo_mask + 1u - fifo_wr_i | |
49375 | + /* | |
49376 | + * The last byte can't be used if the read pointer | |
49377 | + * is at zero. | |
49378 | + */ | |
49379 | + - (fifo_rd_i==0)); | |
49380 | +} | |
49381 | + | |
49382 | + | |
49383 | +#endif /* NET_ACCEL_SHARED_FIFO_H */ | |
49384 | Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_util.c | |
49385 | =================================================================== | |
49386 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
49387 | +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_util.c 2008-02-20 09:32:49.000000000 +0100 | |
49388 | @@ -0,0 +1,333 @@ | |
49389 | +/**************************************************************************** | |
49390 | + * Solarflare driver for Xen network acceleration | |
49391 | + * | |
49392 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
49393 | + * 9501 Jeronimo Road, Suite 250, | |
49394 | + * Irvine, CA 92618, USA | |
49395 | + * | |
49396 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
49397 | + * | |
49398 | + * This program is free software; you can redistribute it and/or modify it | |
49399 | + * under the terms of the GNU General Public License version 2 as published | |
49400 | + * by the Free Software Foundation, incorporated herein by reference. | |
49401 | + * | |
49402 | + * This program is distributed in the hope that it will be useful, | |
49403 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
49404 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
49405 | + * GNU General Public License for more details. | |
49406 | + * | |
49407 | + * You should have received a copy of the GNU General Public License | |
49408 | + * along with this program; if not, write to the Free Software | |
49409 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
49410 | + **************************************************************************** | |
49411 | + */ | |
49412 | + | |
49413 | +#include <linux/if_ether.h> | |
49414 | +#include <asm/io.h> | |
49415 | +#include <asm/pgtable.h> | |
49416 | +#include <asm/hypercall.h> | |
49417 | +#include <xen/xenbus.h> | |
49418 | +#include <xen/driver_util.h> | |
49419 | +#include <xen/gnttab.h> | |
49420 | + | |
49421 | +#include "accel_util.h" | |
49422 | + | |
49423 | +#ifdef EFX_GCOV | |
49424 | +#include "gcov.h" | |
49425 | + | |
49426 | +static int __init net_accel_init(void) | |
49427 | +{ | |
49428 | + gcov_provider_init(THIS_MODULE); | |
49429 | + return 0; | |
49430 | +} | |
49431 | +module_init(net_accel_init); | |
49432 | + | |
49433 | +static void __exit net_accel_exit(void) | |
49434 | +{ | |
49435 | + gcov_provider_fini(THIS_MODULE); | |
49436 | +} | |
49437 | +module_exit(net_accel_exit); | |
49438 | +#endif | |
49439 | + | |
49440 | +/* Shutdown remote domain that is misbehaving */ | |
49441 | +int net_accel_shutdown_remote(int domain) | |
49442 | +{ | |
49443 | + struct sched_remote_shutdown sched_shutdown = { | |
49444 | + .domain_id = domain, | |
49445 | + .reason = SHUTDOWN_crash | |
49446 | + }; | |
49447 | + | |
49448 | + EPRINTK("Crashing domain %d\n", domain); | |
49449 | + | |
49450 | + return HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &sched_shutdown); | |
49451 | +} | |
49452 | +EXPORT_SYMBOL(net_accel_shutdown_remote); | |
49453 | + | |
49454 | + | |
49455 | +/* Based on xenbus_backend_client.c:xenbus_map_ring() */ | |
49456 | +static int net_accel_map_grant(struct xenbus_device *dev, int gnt_ref, | |
49457 | + grant_handle_t *handle, void *vaddr, | |
49458 | + u64 *dev_bus_addr, unsigned flags) | |
49459 | +{ | |
49460 | + struct gnttab_map_grant_ref op; | |
49461 | + | |
49462 | + gnttab_set_map_op(&op, (unsigned long)vaddr, flags, | |
49463 | + gnt_ref, dev->otherend_id); | |
49464 | + | |
49465 | + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); | |
49466 | + | |
49467 | + if (op.status != GNTST_okay) { | |
49468 | + xenbus_dev_error | |
49469 | + (dev, op.status, | |
49470 | + "failed mapping in shared page %d from domain %d\n", | |
49471 | + gnt_ref, dev->otherend_id); | |
49472 | + } else { | |
49473 | + *handle = op.handle; | |
49474 | + if (dev_bus_addr) | |
49475 | + *dev_bus_addr = op.dev_bus_addr; | |
49476 | + } | |
49477 | + | |
49478 | + return op.status; | |
49479 | +} | |
49480 | + | |
49481 | + | |
49482 | +/* Based on xenbus_backend_client.c:xenbus_unmap_ring() */ | |
49483 | +static int net_accel_unmap_grant(struct xenbus_device *dev, | |
49484 | + grant_handle_t handle, | |
49485 | + void *vaddr, u64 dev_bus_addr, | |
49486 | + unsigned flags) | |
49487 | +{ | |
49488 | + struct gnttab_unmap_grant_ref op; | |
49489 | + | |
49490 | + gnttab_set_unmap_op(&op, (unsigned long)vaddr, flags, handle); | |
49491 | + | |
49492 | + if (dev_bus_addr) | |
49493 | + op.dev_bus_addr = dev_bus_addr; | |
49494 | + | |
49495 | + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); | |
49496 | + | |
49497 | + if (op.status != GNTST_okay) | |
49498 | + xenbus_dev_error(dev, op.status, | |
49499 | + "failed unmapping page at handle %d error %d\n", | |
49500 | + handle, op.status); | |
49501 | + | |
49502 | + return op.status; | |
49503 | +} | |
49504 | + | |
49505 | + | |
49506 | +int net_accel_map_device_page(struct xenbus_device *dev, | |
49507 | + int gnt_ref, grant_handle_t *handle, | |
49508 | + u64 *dev_bus_addr) | |
49509 | +{ | |
49510 | + return net_accel_map_grant(dev, gnt_ref, handle, 0, dev_bus_addr, | |
49511 | + GNTMAP_device_map); | |
49512 | +} | |
49513 | +EXPORT_SYMBOL_GPL(net_accel_map_device_page); | |
49514 | + | |
49515 | + | |
49516 | +int net_accel_unmap_device_page(struct xenbus_device *dev, | |
49517 | + grant_handle_t handle, u64 dev_bus_addr) | |
49518 | +{ | |
49519 | + return net_accel_unmap_grant(dev, handle, 0, dev_bus_addr, | |
49520 | + GNTMAP_device_map); | |
49521 | +} | |
49522 | +EXPORT_SYMBOL_GPL(net_accel_unmap_device_page); | |
49523 | + | |
49524 | + | |
49525 | +struct net_accel_valloc_grant_mapping { | |
49526 | + struct vm_struct *vm; | |
49527 | + int pages; | |
49528 | + grant_handle_t grant_handles[0]; | |
49529 | +}; | |
49530 | + | |
49531 | +/* Map a series of grants into a contiguous virtual area */ | |
49532 | +static void *net_accel_map_grants_valloc(struct xenbus_device *dev, | |
49533 | + unsigned *grants, int npages, | |
49534 | + unsigned flags, void **priv) | |
49535 | +{ | |
49536 | + struct net_accel_valloc_grant_mapping *map; | |
49537 | + struct vm_struct *vm; | |
49538 | + void *addr; | |
49539 | + int i, j, rc; | |
49540 | + | |
49541 | + vm = alloc_vm_area(PAGE_SIZE * npages); | |
49542 | + if (vm == NULL) { | |
49543 | + EPRINTK("No memory from alloc_vm_area.\n"); | |
49544 | + return NULL; | |
49545 | + } | |
49546 | + /* | |
49547 | + * Get a structure in which we will record all the info needed | |
49548 | + * to undo the mapping. | |
49549 | + */ | |
49550 | + map = kzalloc(sizeof(struct net_accel_valloc_grant_mapping) + | |
49551 | + npages * sizeof(grant_handle_t), GFP_KERNEL); | |
49552 | + if (map == NULL) { | |
49553 | + EPRINTK("No memory for net_accel_valloc_grant_mapping\n"); | |
49554 | + free_vm_area(vm); | |
49555 | + return NULL; | |
49556 | + } | |
49557 | + map->vm = vm; | |
49558 | + map->pages = npages; | |
49559 | + | |
49560 | + /* Do the actual mapping */ | |
49561 | + addr = vm->addr; | |
49562 | + for (i = 0; i < npages; i++) { | |
49563 | + rc = net_accel_map_grant(dev, grants[i], map->grant_handles + i, | |
49564 | + addr, NULL, flags); | |
49565 | + if (rc != 0) | |
49566 | + goto undo; | |
49567 | + addr = (void*)((unsigned long)addr + PAGE_SIZE); | |
49568 | + } | |
49569 | + | |
49570 | + if (priv) | |
49571 | + *priv = (void *)map; | |
49572 | + else | |
49573 | + kfree(map); | |
49574 | + | |
49575 | + return vm->addr; | |
49576 | + | |
49577 | + undo: | |
49578 | + EPRINTK("Aborting contig map due to single map failure %d (%d of %d)\n", | |
49579 | + rc, i+1, npages); | |
49580 | + for (j = 0; j < i; j++) { | |
49581 | + addr = (void*)((unsigned long)vm->addr + (j * PAGE_SIZE)); | |
49582 | + net_accel_unmap_grant(dev, map->grant_handles[j], addr, 0, | |
49583 | + flags); | |
49584 | + } | |
49585 | + free_vm_area(vm); | |
49586 | + kfree(map); | |
49587 | + return NULL; | |
49588 | +} | |
49589 | + | |
49590 | +/* Undo the result of the mapping */ | |
49591 | +static void net_accel_unmap_grants_vfree(struct xenbus_device *dev, | |
49592 | + unsigned flags, void *priv) | |
49593 | +{ | |
49594 | + struct net_accel_valloc_grant_mapping *map = | |
49595 | + (struct net_accel_valloc_grant_mapping *)priv; | |
49596 | + | |
49597 | + void *addr = map->vm->addr; | |
49598 | + int npages = map->pages; | |
49599 | + int i; | |
49600 | + | |
49601 | + for (i = 0; i < npages; i++) { | |
49602 | + net_accel_unmap_grant(dev, map->grant_handles[i], addr, 0, | |
49603 | + flags); | |
49604 | + addr = (void*)((unsigned long)addr + PAGE_SIZE); | |
49605 | + } | |
49606 | + free_vm_area(map->vm); | |
49607 | + kfree(map); | |
49608 | +} | |
49609 | + | |
49610 | + | |
49611 | +void *net_accel_map_grants_contig(struct xenbus_device *dev, | |
49612 | + unsigned *grants, int npages, | |
49613 | + void **priv) | |
49614 | +{ | |
49615 | + return net_accel_map_grants_valloc(dev, grants, npages, | |
49616 | + GNTMAP_host_map, priv); | |
49617 | +} | |
49618 | +EXPORT_SYMBOL(net_accel_map_grants_contig); | |
49619 | + | |
49620 | + | |
49621 | +void net_accel_unmap_grants_contig(struct xenbus_device *dev, | |
49622 | + void *priv) | |
49623 | +{ | |
49624 | + net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv); | |
49625 | +} | |
49626 | +EXPORT_SYMBOL(net_accel_unmap_grants_contig); | |
49627 | + | |
49628 | + | |
49629 | +void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref, | |
49630 | + void **priv) | |
49631 | +{ | |
49632 | + return net_accel_map_grants_valloc(dev, &gnt_ref, 1, | |
49633 | + GNTMAP_host_map, priv); | |
49634 | +} | |
49635 | +EXPORT_SYMBOL(net_accel_map_iomem_page); | |
49636 | + | |
49637 | + | |
49638 | +void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv) | |
49639 | +{ | |
49640 | + net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv); | |
49641 | +} | |
49642 | +EXPORT_SYMBOL(net_accel_unmap_iomem_page); | |
49643 | + | |
49644 | + | |
49645 | +int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn, | |
49646 | + int is_iomem) | |
49647 | +{ | |
49648 | + int err = gnttab_grant_foreign_access(dev->otherend_id, mfn, | |
49649 | + is_iomem ? GTF_PCD : 0); | |
49650 | + if (err < 0) | |
49651 | + xenbus_dev_error(dev, err, "failed granting access to page\n"); | |
49652 | + return err; | |
49653 | +} | |
49654 | +EXPORT_SYMBOL_GPL(net_accel_grant_page); | |
49655 | + | |
49656 | + | |
49657 | +int net_accel_ungrant_page(grant_ref_t gntref) | |
49658 | +{ | |
49659 | + if (unlikely(gnttab_query_foreign_access(gntref) != 0)) { | |
49660 | + EPRINTK("%s: remote domain still using grant %d\n", __FUNCTION__, | |
49661 | + gntref); | |
49662 | + return -EBUSY; | |
49663 | + } | |
49664 | + | |
49665 | + gnttab_end_foreign_access(gntref, 0); | |
49666 | + return 0; | |
49667 | +} | |
49668 | +EXPORT_SYMBOL_GPL(net_accel_ungrant_page); | |
49669 | + | |
49670 | + | |
49671 | +int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) | |
49672 | +{ | |
49673 | + char *s, *e, *macstr; | |
49674 | + int i; | |
49675 | + | |
49676 | + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); | |
49677 | + if (IS_ERR(macstr)) | |
49678 | + return PTR_ERR(macstr); | |
49679 | + | |
49680 | + for (i = 0; i < ETH_ALEN; i++) { | |
49681 | + mac[i] = simple_strtoul(s, &e, 16); | |
49682 | + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { | |
49683 | + kfree(macstr); | |
49684 | + return -ENOENT; | |
49685 | + } | |
49686 | + s = e+1; | |
49687 | + } | |
49688 | + | |
49689 | + kfree(macstr); | |
49690 | + return 0; | |
49691 | +} | |
49692 | +EXPORT_SYMBOL_GPL(net_accel_xen_net_read_mac); | |
49693 | + | |
49694 | + | |
49695 | +void net_accel_update_state(struct xenbus_device *dev, int state) | |
49696 | +{ | |
49697 | + struct xenbus_transaction tr; | |
49698 | + int err; | |
49699 | + | |
49700 | + DPRINTK("%s: setting accelstate to %s\n", __FUNCTION__, | |
49701 | + xenbus_strstate(state)); | |
49702 | + | |
49703 | + if (xenbus_exists(XBT_NIL, dev->nodename, "")) { | |
49704 | + VPRINTK("%s: nodename %s\n", __FUNCTION__, dev->nodename); | |
49705 | + again: | |
49706 | + err = xenbus_transaction_start(&tr); | |
49707 | + if (err == 0) | |
49708 | + err = xenbus_printf(tr, dev->nodename, "accelstate", | |
49709 | + "%d", state); | |
49710 | + if (err != 0) { | |
49711 | + xenbus_transaction_end(tr, 1); | |
49712 | + } else { | |
49713 | + err = xenbus_transaction_end(tr, 0); | |
49714 | + if (err == -EAGAIN) | |
49715 | + goto again; | |
49716 | + } | |
49717 | + } | |
49718 | +} | |
49719 | +EXPORT_SYMBOL_GPL(net_accel_update_state); | |
49720 | + | |
49721 | +MODULE_LICENSE("GPL"); | |
49722 | Index: head-2008-11-25/drivers/xen/sfc_netutil/accel_util.h | |
49723 | =================================================================== | |
49724 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
49725 | +++ head-2008-11-25/drivers/xen/sfc_netutil/accel_util.h 2008-02-20 09:32:49.000000000 +0100 | |
49726 | @@ -0,0 +1,127 @@ | |
49727 | +/**************************************************************************** | |
49728 | + * Solarflare driver for Xen network acceleration | |
49729 | + * | |
49730 | + * Copyright 2006-2008: Solarflare Communications Inc, | |
49731 | + * 9501 Jeronimo Road, Suite 250, | |
49732 | + * Irvine, CA 92618, USA | |
49733 | + * | |
49734 | + * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com> | |
49735 | + * | |
49736 | + * This program is free software; you can redistribute it and/or modify it | |
49737 | + * under the terms of the GNU General Public License version 2 as published | |
49738 | + * by the Free Software Foundation, incorporated herein by reference. | |
49739 | + * | |
49740 | + * This program is distributed in the hope that it will be useful, | |
49741 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
49742 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
49743 | + * GNU General Public License for more details. | |
49744 | + * | |
49745 | + * You should have received a copy of the GNU General Public License | |
49746 | + * along with this program; if not, write to the Free Software | |
49747 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
49748 | + **************************************************************************** | |
49749 | + */ | |
49750 | + | |
49751 | +#ifndef NETBACK_ACCEL_UTIL_H | |
49752 | +#define NETBACK_ACCEL_UTIL_H | |
49753 | + | |
49754 | +#ifdef DPRINTK | |
49755 | +#undef DPRINTK | |
49756 | +#endif | |
49757 | + | |
49758 | +#define FILE_LEAF strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__ | |
49759 | + | |
49760 | +#if 1 | |
49761 | +#define VPRINTK(_f, _a...) | |
49762 | +#else | |
49763 | +#define VPRINTK(_f, _a...) \ | |
49764 | + printk("(file=%s, line=%d) " _f, \ | |
49765 | + FILE_LEAF , __LINE__ , ## _a ) | |
49766 | +#endif | |
49767 | + | |
49768 | +#if 1 | |
49769 | +#define DPRINTK(_f, _a...) | |
49770 | +#else | |
49771 | +#define DPRINTK(_f, _a...) \ | |
49772 | + printk("(file=%s, line=%d) " _f, \ | |
49773 | + FILE_LEAF , __LINE__ , ## _a ) | |
49774 | +#endif | |
49775 | + | |
49776 | +#define EPRINTK(_f, _a...) \ | |
49777 | + printk("(file=%s, line=%d) " _f, \ | |
49778 | + FILE_LEAF , __LINE__ , ## _a ) | |
49779 | + | |
49780 | +#define EPRINTK_ON(exp) \ | |
49781 | + do { \ | |
49782 | + if (exp) \ | |
49783 | + EPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \ | |
49784 | + } while(0) | |
49785 | + | |
49786 | +#define DPRINTK_ON(exp) \ | |
49787 | + do { \ | |
49788 | + if (exp) \ | |
49789 | + DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \ | |
49790 | + } while(0) | |
49791 | + | |
49792 | +#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x" | |
49793 | +#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] | |
49794 | + | |
49795 | +#include <xen/xenbus.h> | |
49796 | + | |
49797 | +/*! Map a set of pages from another domain | |
49798 | + * \param dev The xenbus device context | |
49799 | + * \param priv The private data returned by the mapping function | |
49800 | + */ | |
49801 | +extern | |
49802 | +void *net_accel_map_grants_contig(struct xenbus_device *dev, | |
49803 | + unsigned *grants, int npages, | |
49804 | + void **priv); | |
49805 | + | |
49806 | +/*! Unmap a set of pages mapped using net_accel_map_grants_contig. | |
49807 | + * \param dev The xenbus device context | |
49808 | + * \param priv The private data returned by the mapping function | |
49809 | + */ | |
49810 | +extern | |
49811 | +void net_accel_unmap_grants_contig(struct xenbus_device *dev, void *priv); | |
49812 | + | |
49813 | +/*! Read the MAC address of a device from xenstore */ | |
49814 | +extern | |
49815 | +int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[]); | |
49816 | + | |
49817 | +/*! Update the accelstate field for a device in xenstore */ | |
49818 | +extern | |
49819 | +void net_accel_update_state(struct xenbus_device *dev, int state); | |
49820 | + | |
49821 | +/* These four map/unmap functions are based on | |
49822 | + * xenbus_backend_client.c:xenbus_map_ring(). However, they are not | |
49823 | + * used for ring buffers, instead just to map pages between domains, | |
49824 | + * or to map a page so that it is accessible by a device | |
49825 | + */ | |
49826 | +extern | |
49827 | +int net_accel_map_device_page(struct xenbus_device *dev, | |
49828 | + int gnt_ref, grant_handle_t *handle, | |
49829 | + u64 *dev_bus_addr); | |
49830 | +extern | |
49831 | +int net_accel_unmap_device_page(struct xenbus_device *dev, | |
49832 | + grant_handle_t handle, u64 dev_bus_addr); | |
49833 | +extern | |
49834 | +void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref, | |
49835 | + void **priv); | |
49836 | +extern | |
49837 | +void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv); | |
49838 | + | |
49839 | +/*! Grrant a page to remote domain */ | |
49840 | +extern | |
49841 | +int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn, | |
49842 | + int is_iomem); | |
49843 | +/*! Undo a net_accel_grant_page */ | |
49844 | +extern | |
49845 | +int net_accel_ungrant_page(grant_ref_t gntref); | |
49846 | + | |
49847 | + | |
49848 | +/*! Shutdown remote domain that is misbehaving */ | |
49849 | +extern | |
49850 | +int net_accel_shutdown_remote(int domain); | |
49851 | + | |
49852 | + | |
49853 | +#endif | |
49854 | Index: head-2008-11-25/drivers/xen/tpmback/Makefile | |
49855 | =================================================================== | |
49856 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
49857 | +++ head-2008-11-25/drivers/xen/tpmback/Makefile 2007-06-12 13:13:45.000000000 +0200 | |
49858 | @@ -0,0 +1,4 @@ | |
49859 | + | |
49860 | +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmbk.o | |
49861 | + | |
49862 | +tpmbk-y += tpmback.o interface.o xenbus.o | |
49863 | Index: head-2008-11-25/drivers/xen/tpmback/common.h | |
49864 | =================================================================== | |
49865 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
49866 | +++ head-2008-11-25/drivers/xen/tpmback/common.h 2007-06-12 13:13:45.000000000 +0200 | |
49867 | @@ -0,0 +1,85 @@ | |
49868 | +/****************************************************************************** | |
49869 | + * drivers/xen/tpmback/common.h | |
49870 | + */ | |
49871 | + | |
49872 | +#ifndef __TPM__BACKEND__COMMON_H__ | |
49873 | +#define __TPM__BACKEND__COMMON_H__ | |
49874 | + | |
49875 | +#include <linux/version.h> | |
49876 | +#include <linux/module.h> | |
49877 | +#include <linux/interrupt.h> | |
49878 | +#include <linux/slab.h> | |
49879 | +#include <xen/evtchn.h> | |
49880 | +#include <xen/driver_util.h> | |
49881 | +#include <xen/interface/grant_table.h> | |
49882 | +#include <xen/interface/io/tpmif.h> | |
49883 | +#include <asm/io.h> | |
49884 | +#include <asm/pgalloc.h> | |
49885 | + | |
49886 | +#define DPRINTK(_f, _a...) \ | |
49887 | + pr_debug("(file=%s, line=%d) " _f, \ | |
49888 | + __FILE__ , __LINE__ , ## _a ) | |
49889 | + | |
49890 | +struct backend_info; | |
49891 | + | |
49892 | +typedef struct tpmif_st { | |
49893 | + struct list_head tpmif_list; | |
49894 | + /* Unique identifier for this interface. */ | |
49895 | + domid_t domid; | |
49896 | + unsigned int handle; | |
49897 | + | |
49898 | + /* Physical parameters of the comms window. */ | |
49899 | + unsigned int irq; | |
49900 | + | |
49901 | + /* The shared rings and indexes. */ | |
49902 | + tpmif_tx_interface_t *tx; | |
49903 | + struct vm_struct *tx_area; | |
49904 | + | |
49905 | + /* Miscellaneous private stuff. */ | |
49906 | + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; | |
49907 | + int active; | |
49908 | + | |
49909 | + struct tpmif_st *hash_next; | |
49910 | + struct list_head list; /* scheduling list */ | |
49911 | + atomic_t refcnt; | |
49912 | + | |
49913 | + struct backend_info *bi; | |
49914 | + | |
49915 | + grant_handle_t shmem_handle; | |
49916 | + grant_ref_t shmem_ref; | |
49917 | + struct page **mmap_pages; | |
49918 | + | |
49919 | + char devname[20]; | |
49920 | +} tpmif_t; | |
49921 | + | |
49922 | +void tpmif_disconnect_complete(tpmif_t * tpmif); | |
49923 | +tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi); | |
49924 | +void tpmif_interface_init(void); | |
49925 | +void tpmif_interface_exit(void); | |
49926 | +void tpmif_schedule_work(tpmif_t * tpmif); | |
49927 | +void tpmif_deschedule_work(tpmif_t * tpmif); | |
49928 | +void tpmif_xenbus_init(void); | |
49929 | +void tpmif_xenbus_exit(void); | |
49930 | +int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); | |
49931 | +irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs); | |
49932 | + | |
49933 | +long int tpmback_get_instance(struct backend_info *bi); | |
49934 | + | |
49935 | +int vtpm_release_packets(tpmif_t * tpmif, int send_msgs); | |
49936 | + | |
49937 | + | |
49938 | +#define tpmif_get(_b) (atomic_inc(&(_b)->refcnt)) | |
49939 | +#define tpmif_put(_b) \ | |
49940 | + do { \ | |
49941 | + if (atomic_dec_and_test(&(_b)->refcnt)) \ | |
49942 | + tpmif_disconnect_complete(_b); \ | |
49943 | + } while (0) | |
49944 | + | |
49945 | +extern int num_frontends; | |
49946 | + | |
49947 | +static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx) | |
49948 | +{ | |
49949 | + return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx])); | |
49950 | +} | |
49951 | + | |
49952 | +#endif /* __TPMIF__BACKEND__COMMON_H__ */ | |
49953 | Index: head-2008-11-25/drivers/xen/tpmback/interface.c | |
49954 | =================================================================== | |
49955 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
49956 | +++ head-2008-11-25/drivers/xen/tpmback/interface.c 2008-01-21 11:15:26.000000000 +0100 | |
49957 | @@ -0,0 +1,168 @@ | |
49958 | + /***************************************************************************** | |
49959 | + * drivers/xen/tpmback/interface.c | |
49960 | + * | |
49961 | + * Vritual TPM interface management. | |
49962 | + * | |
49963 | + * Copyright (c) 2005, IBM Corporation | |
49964 | + * | |
49965 | + * Author: Stefan Berger, stefanb@us.ibm.com | |
49966 | + * | |
49967 | + * This code has been derived from drivers/xen/netback/interface.c | |
49968 | + * Copyright (c) 2004, Keir Fraser | |
49969 | + */ | |
49970 | + | |
49971 | +#include "common.h" | |
49972 | +#include <xen/balloon.h> | |
49973 | +#include <xen/gnttab.h> | |
49974 | + | |
49975 | +static kmem_cache_t *tpmif_cachep; | |
49976 | +int num_frontends = 0; | |
49977 | + | |
49978 | +LIST_HEAD(tpmif_list); | |
49979 | + | |
49980 | +static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi) | |
49981 | +{ | |
49982 | + tpmif_t *tpmif; | |
49983 | + | |
49984 | + tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL); | |
49985 | + if (tpmif == NULL) | |
49986 | + goto out_of_memory; | |
49987 | + | |
49988 | + memset(tpmif, 0, sizeof (*tpmif)); | |
49989 | + tpmif->domid = domid; | |
49990 | + tpmif->status = DISCONNECTED; | |
49991 | + tpmif->bi = bi; | |
49992 | + snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid); | |
49993 | + atomic_set(&tpmif->refcnt, 1); | |
49994 | + | |
49995 | + tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE); | |
49996 | + if (tpmif->mmap_pages == NULL) | |
49997 | + goto out_of_memory; | |
49998 | + | |
49999 | + list_add(&tpmif->tpmif_list, &tpmif_list); | |
50000 | + num_frontends++; | |
50001 | + | |
50002 | + return tpmif; | |
50003 | + | |
50004 | + out_of_memory: | |
50005 | + if (tpmif != NULL) | |
50006 | + kmem_cache_free(tpmif_cachep, tpmif); | |
50007 | + printk("%s: out of memory\n", __FUNCTION__); | |
50008 | + return ERR_PTR(-ENOMEM); | |
50009 | +} | |
50010 | + | |
50011 | +static void free_tpmif(tpmif_t * tpmif) | |
50012 | +{ | |
50013 | + num_frontends--; | |
50014 | + list_del(&tpmif->tpmif_list); | |
50015 | + free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE); | |
50016 | + kmem_cache_free(tpmif_cachep, tpmif); | |
50017 | +} | |
50018 | + | |
50019 | +tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi) | |
50020 | +{ | |
50021 | + tpmif_t *tpmif; | |
50022 | + | |
50023 | + list_for_each_entry(tpmif, &tpmif_list, tpmif_list) { | |
50024 | + if (tpmif->bi == bi) { | |
50025 | + if (tpmif->domid == domid) { | |
50026 | + tpmif_get(tpmif); | |
50027 | + return tpmif; | |
50028 | + } else { | |
50029 | + return ERR_PTR(-EEXIST); | |
50030 | + } | |
50031 | + } | |
50032 | + } | |
50033 | + | |
50034 | + return alloc_tpmif(domid, bi); | |
50035 | +} | |
50036 | + | |
50037 | +static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page) | |
50038 | +{ | |
50039 | + struct gnttab_map_grant_ref op; | |
50040 | + | |
50041 | + gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr, | |
50042 | + GNTMAP_host_map, shared_page, tpmif->domid); | |
50043 | + | |
50044 | + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | |
50045 | + BUG(); | |
50046 | + | |
50047 | + if (op.status) { | |
50048 | + DPRINTK(" Grant table operation failure !\n"); | |
50049 | + return op.status; | |
50050 | + } | |
50051 | + | |
50052 | + tpmif->shmem_ref = shared_page; | |
50053 | + tpmif->shmem_handle = op.handle; | |
50054 | + | |
50055 | + return 0; | |
50056 | +} | |
50057 | + | |
50058 | +static void unmap_frontend_page(tpmif_t *tpmif) | |
50059 | +{ | |
50060 | + struct gnttab_unmap_grant_ref op; | |
50061 | + | |
50062 | + gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr, | |
50063 | + GNTMAP_host_map, tpmif->shmem_handle); | |
50064 | + | |
50065 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | |
50066 | + BUG(); | |
50067 | +} | |
50068 | + | |
50069 | +int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn) | |
50070 | +{ | |
50071 | + int err; | |
50072 | + | |
50073 | + if (tpmif->irq) | |
50074 | + return 0; | |
50075 | + | |
50076 | + if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL) | |
50077 | + return -ENOMEM; | |
50078 | + | |
50079 | + err = map_frontend_page(tpmif, shared_page); | |
50080 | + if (err) { | |
50081 | + free_vm_area(tpmif->tx_area); | |
50082 | + return err; | |
50083 | + } | |
50084 | + | |
50085 | + tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr; | |
50086 | + memset(tpmif->tx, 0, PAGE_SIZE); | |
50087 | + | |
50088 | + err = bind_interdomain_evtchn_to_irqhandler( | |
50089 | + tpmif->domid, evtchn, tpmif_be_int, 0, tpmif->devname, tpmif); | |
50090 | + if (err < 0) { | |
50091 | + unmap_frontend_page(tpmif); | |
50092 | + free_vm_area(tpmif->tx_area); | |
50093 | + return err; | |
50094 | + } | |
50095 | + tpmif->irq = err; | |
50096 | + | |
50097 | + tpmif->shmem_ref = shared_page; | |
50098 | + tpmif->active = 1; | |
50099 | + | |
50100 | + return 0; | |
50101 | +} | |
50102 | + | |
50103 | +void tpmif_disconnect_complete(tpmif_t *tpmif) | |
50104 | +{ | |
50105 | + if (tpmif->irq) | |
50106 | + unbind_from_irqhandler(tpmif->irq, tpmif); | |
50107 | + | |
50108 | + if (tpmif->tx) { | |
50109 | + unmap_frontend_page(tpmif); | |
50110 | + free_vm_area(tpmif->tx_area); | |
50111 | + } | |
50112 | + | |
50113 | + free_tpmif(tpmif); | |
50114 | +} | |
50115 | + | |
50116 | +void __init tpmif_interface_init(void) | |
50117 | +{ | |
50118 | + tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t), | |
50119 | + 0, 0, NULL, NULL); | |
50120 | +} | |
50121 | + | |
50122 | +void __exit tpmif_interface_exit(void) | |
50123 | +{ | |
50124 | + kmem_cache_destroy(tpmif_cachep); | |
50125 | +} | |
50126 | Index: head-2008-11-25/drivers/xen/tpmback/tpmback.c | |
50127 | =================================================================== | |
50128 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
50129 | +++ head-2008-11-25/drivers/xen/tpmback/tpmback.c 2007-06-12 13:13:45.000000000 +0200 | |
50130 | @@ -0,0 +1,944 @@ | |
50131 | +/****************************************************************************** | |
50132 | + * drivers/xen/tpmback/tpmback.c | |
50133 | + * | |
50134 | + * Copyright (c) 2005, IBM Corporation | |
50135 | + * | |
50136 | + * Author: Stefan Berger, stefanb@us.ibm.com | |
50137 | + * Grant table support: Mahadevan Gomathisankaran | |
50138 | + * | |
50139 | + * This code has been derived from drivers/xen/netback/netback.c | |
50140 | + * Copyright (c) 2002-2004, K A Fraser | |
50141 | + * | |
50142 | + */ | |
50143 | + | |
50144 | +#include "common.h" | |
50145 | +#include <xen/evtchn.h> | |
50146 | + | |
50147 | +#include <linux/types.h> | |
50148 | +#include <linux/list.h> | |
50149 | +#include <linux/miscdevice.h> | |
50150 | +#include <linux/poll.h> | |
50151 | +#include <asm/uaccess.h> | |
50152 | +#include <xen/xenbus.h> | |
50153 | +#include <xen/interface/grant_table.h> | |
50154 | +#include <xen/gnttab.h> | |
50155 | + | |
50156 | +/* local data structures */ | |
50157 | +struct data_exchange { | |
50158 | + struct list_head pending_pak; | |
50159 | + struct list_head current_pak; | |
50160 | + unsigned int copied_so_far; | |
50161 | + u8 has_opener:1; | |
50162 | + u8 aborted:1; | |
50163 | + rwlock_t pak_lock; // protects all of the previous fields | |
50164 | + wait_queue_head_t wait_queue; | |
50165 | +}; | |
50166 | + | |
50167 | +struct vtpm_resp_hdr { | |
50168 | + uint32_t instance_no; | |
50169 | + uint16_t tag_no; | |
50170 | + uint32_t len_no; | |
50171 | + uint32_t ordinal_no; | |
50172 | +} __attribute__ ((packed)); | |
50173 | + | |
50174 | +struct packet { | |
50175 | + struct list_head next; | |
50176 | + unsigned int data_len; | |
50177 | + u8 *data_buffer; | |
50178 | + tpmif_t *tpmif; | |
50179 | + u32 tpm_instance; | |
50180 | + u8 req_tag; | |
50181 | + u32 last_read; | |
50182 | + u8 flags; | |
50183 | + struct timer_list processing_timer; | |
50184 | +}; | |
50185 | + | |
50186 | +enum { | |
50187 | + PACKET_FLAG_DISCARD_RESPONSE = 1, | |
50188 | +}; | |
50189 | + | |
50190 | +/* local variables */ | |
50191 | +static struct data_exchange dataex; | |
50192 | + | |
50193 | +/* local function prototypes */ | |
50194 | +static int _packet_write(struct packet *pak, | |
50195 | + const char *data, size_t size, int userbuffer); | |
50196 | +static void processing_timeout(unsigned long ptr); | |
50197 | +static int packet_read_shmem(struct packet *pak, | |
50198 | + tpmif_t * tpmif, | |
50199 | + u32 offset, | |
50200 | + char *buffer, int isuserbuffer, u32 left); | |
50201 | +static int vtpm_queue_packet(struct packet *pak); | |
50202 | + | |
50203 | +/*************************************************************** | |
50204 | + Buffer copying fo user and kernel space buffes. | |
50205 | +***************************************************************/ | |
50206 | +static inline int copy_from_buffer(void *to, | |
50207 | + const void *from, unsigned long size, | |
50208 | + int isuserbuffer) | |
50209 | +{ | |
50210 | + if (isuserbuffer) { | |
50211 | + if (copy_from_user(to, (void __user *)from, size)) | |
50212 | + return -EFAULT; | |
50213 | + } else { | |
50214 | + memcpy(to, from, size); | |
50215 | + } | |
50216 | + return 0; | |
50217 | +} | |
50218 | + | |
50219 | +static inline int copy_to_buffer(void *to, | |
50220 | + const void *from, unsigned long size, | |
50221 | + int isuserbuffer) | |
50222 | +{ | |
50223 | + if (isuserbuffer) { | |
50224 | + if (copy_to_user((void __user *)to, from, size)) | |
50225 | + return -EFAULT; | |
50226 | + } else { | |
50227 | + memcpy(to, from, size); | |
50228 | + } | |
50229 | + return 0; | |
50230 | +} | |
50231 | + | |
50232 | + | |
50233 | +static void dataex_init(struct data_exchange *dataex) | |
50234 | +{ | |
50235 | + INIT_LIST_HEAD(&dataex->pending_pak); | |
50236 | + INIT_LIST_HEAD(&dataex->current_pak); | |
50237 | + dataex->has_opener = 0; | |
50238 | + rwlock_init(&dataex->pak_lock); | |
50239 | + init_waitqueue_head(&dataex->wait_queue); | |
50240 | +} | |
50241 | + | |
50242 | +/*************************************************************** | |
50243 | + Packet-related functions | |
50244 | +***************************************************************/ | |
50245 | + | |
50246 | +static struct packet *packet_find_instance(struct list_head *head, | |
50247 | + u32 tpm_instance) | |
50248 | +{ | |
50249 | + struct packet *pak; | |
50250 | + struct list_head *p; | |
50251 | + | |
50252 | + /* | |
50253 | + * traverse the list of packets and return the first | |
50254 | + * one with the given instance number | |
50255 | + */ | |
50256 | + list_for_each(p, head) { | |
50257 | + pak = list_entry(p, struct packet, next); | |
50258 | + | |
50259 | + if (pak->tpm_instance == tpm_instance) { | |
50260 | + return pak; | |
50261 | + } | |
50262 | + } | |
50263 | + return NULL; | |
50264 | +} | |
50265 | + | |
50266 | +static struct packet *packet_find_packet(struct list_head *head, void *packet) | |
50267 | +{ | |
50268 | + struct packet *pak; | |
50269 | + struct list_head *p; | |
50270 | + | |
50271 | + /* | |
50272 | + * traverse the list of packets and return the first | |
50273 | + * one with the given instance number | |
50274 | + */ | |
50275 | + list_for_each(p, head) { | |
50276 | + pak = list_entry(p, struct packet, next); | |
50277 | + | |
50278 | + if (pak == packet) { | |
50279 | + return pak; | |
50280 | + } | |
50281 | + } | |
50282 | + return NULL; | |
50283 | +} | |
50284 | + | |
50285 | +static struct packet *packet_alloc(tpmif_t * tpmif, | |
50286 | + u32 size, u8 req_tag, u8 flags) | |
50287 | +{ | |
50288 | + struct packet *pak = NULL; | |
50289 | + pak = kzalloc(sizeof (struct packet), GFP_ATOMIC); | |
50290 | + if (NULL != pak) { | |
50291 | + if (tpmif) { | |
50292 | + pak->tpmif = tpmif; | |
50293 | + pak->tpm_instance = tpmback_get_instance(tpmif->bi); | |
50294 | + tpmif_get(tpmif); | |
50295 | + } | |
50296 | + pak->data_len = size; | |
50297 | + pak->req_tag = req_tag; | |
50298 | + pak->last_read = 0; | |
50299 | + pak->flags = flags; | |
50300 | + | |
50301 | + /* | |
50302 | + * cannot do tpmif_get(tpmif); bad things happen | |
50303 | + * on the last tpmif_put() | |
50304 | + */ | |
50305 | + init_timer(&pak->processing_timer); | |
50306 | + pak->processing_timer.function = processing_timeout; | |
50307 | + pak->processing_timer.data = (unsigned long)pak; | |
50308 | + } | |
50309 | + return pak; | |
50310 | +} | |
50311 | + | |
50312 | +static void inline packet_reset(struct packet *pak) | |
50313 | +{ | |
50314 | + pak->last_read = 0; | |
50315 | +} | |
50316 | + | |
50317 | +static void packet_free(struct packet *pak) | |
50318 | +{ | |
50319 | + if (timer_pending(&pak->processing_timer)) { | |
50320 | + BUG(); | |
50321 | + } | |
50322 | + | |
50323 | + if (pak->tpmif) | |
50324 | + tpmif_put(pak->tpmif); | |
50325 | + kfree(pak->data_buffer); | |
50326 | + /* | |
50327 | + * cannot do tpmif_put(pak->tpmif); bad things happen | |
50328 | + * on the last tpmif_put() | |
50329 | + */ | |
50330 | + kfree(pak); | |
50331 | +} | |
50332 | + | |
50333 | + | |
50334 | +/* | |
50335 | + * Write data to the shared memory and send it to the FE. | |
50336 | + */ | |
50337 | +static int packet_write(struct packet *pak, | |
50338 | + const char *data, size_t size, int isuserbuffer) | |
50339 | +{ | |
50340 | + int rc = 0; | |
50341 | + | |
50342 | + if (0 != (pak->flags & PACKET_FLAG_DISCARD_RESPONSE)) { | |
50343 | + /* Don't send a respone to this packet. Just acknowledge it. */ | |
50344 | + rc = size; | |
50345 | + } else { | |
50346 | + rc = _packet_write(pak, data, size, isuserbuffer); | |
50347 | + } | |
50348 | + | |
50349 | + return rc; | |
50350 | +} | |
50351 | + | |
50352 | +int _packet_write(struct packet *pak, | |
50353 | + const char *data, size_t size, int isuserbuffer) | |
50354 | +{ | |
50355 | + /* | |
50356 | + * Write into the shared memory pages directly | |
50357 | + * and send it to the front end. | |
50358 | + */ | |
50359 | + tpmif_t *tpmif = pak->tpmif; | |
50360 | + grant_handle_t handle; | |
50361 | + int rc = 0; | |
50362 | + unsigned int i = 0; | |
50363 | + unsigned int offset = 0; | |
50364 | + | |
50365 | + if (tpmif == NULL) { | |
50366 | + return -EFAULT; | |
50367 | + } | |
50368 | + | |
50369 | + if (tpmif->status == DISCONNECTED) { | |
50370 | + return size; | |
50371 | + } | |
50372 | + | |
50373 | + while (offset < size && i < TPMIF_TX_RING_SIZE) { | |
50374 | + unsigned int tocopy; | |
50375 | + struct gnttab_map_grant_ref map_op; | |
50376 | + struct gnttab_unmap_grant_ref unmap_op; | |
50377 | + tpmif_tx_request_t *tx; | |
50378 | + | |
50379 | + tx = &tpmif->tx->ring[i].req; | |
50380 | + | |
50381 | + if (0 == tx->addr) { | |
50382 | + DPRINTK("ERROR: Buffer for outgoing packet NULL?! i=%d\n", i); | |
50383 | + return 0; | |
50384 | + } | |
50385 | + | |
50386 | + gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i), | |
50387 | + GNTMAP_host_map, tx->ref, tpmif->domid); | |
50388 | + | |
50389 | + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
50390 | + &map_op, 1))) { | |
50391 | + BUG(); | |
50392 | + } | |
50393 | + | |
50394 | + handle = map_op.handle; | |
50395 | + | |
50396 | + if (map_op.status) { | |
50397 | + DPRINTK(" Grant table operation failure !\n"); | |
50398 | + return 0; | |
50399 | + } | |
50400 | + | |
50401 | + tocopy = min_t(size_t, size - offset, PAGE_SIZE); | |
50402 | + | |
50403 | + if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) | | |
50404 | + (tx->addr & ~PAGE_MASK)), | |
50405 | + &data[offset], tocopy, isuserbuffer)) { | |
50406 | + tpmif_put(tpmif); | |
50407 | + return -EFAULT; | |
50408 | + } | |
50409 | + tx->size = tocopy; | |
50410 | + | |
50411 | + gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i), | |
50412 | + GNTMAP_host_map, handle); | |
50413 | + | |
50414 | + if (unlikely | |
50415 | + (HYPERVISOR_grant_table_op | |
50416 | + (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) { | |
50417 | + BUG(); | |
50418 | + } | |
50419 | + | |
50420 | + offset += tocopy; | |
50421 | + i++; | |
50422 | + } | |
50423 | + | |
50424 | + rc = offset; | |
50425 | + DPRINTK("Notifying frontend via irq %d\n", tpmif->irq); | |
50426 | + notify_remote_via_irq(tpmif->irq); | |
50427 | + | |
50428 | + return rc; | |
50429 | +} | |
50430 | + | |
50431 | +/* | |
50432 | + * Read data from the shared memory and copy it directly into the | |
50433 | + * provided buffer. Advance the read_last indicator which tells | |
50434 | + * how many bytes have already been read. | |
50435 | + */ | |
50436 | +static int packet_read(struct packet *pak, size_t numbytes, | |
50437 | + char *buffer, size_t buffersize, int isuserbuffer) | |
50438 | +{ | |
50439 | + tpmif_t *tpmif = pak->tpmif; | |
50440 | + | |
50441 | + /* | |
50442 | + * Read 'numbytes' of data from the buffer. The first 4 | |
50443 | + * bytes are the instance number in network byte order, | |
50444 | + * after that come the data from the shared memory buffer. | |
50445 | + */ | |
50446 | + u32 to_copy; | |
50447 | + u32 offset = 0; | |
50448 | + u32 room_left = buffersize; | |
50449 | + | |
50450 | + if (pak->last_read < 4) { | |
50451 | + /* | |
50452 | + * copy the instance number into the buffer | |
50453 | + */ | |
50454 | + u32 instance_no = htonl(pak->tpm_instance); | |
50455 | + u32 last_read = pak->last_read; | |
50456 | + | |
50457 | + to_copy = min_t(size_t, 4 - last_read, numbytes); | |
50458 | + | |
50459 | + if (copy_to_buffer(&buffer[0], | |
50460 | + &(((u8 *) & instance_no)[last_read]), | |
50461 | + to_copy, isuserbuffer)) { | |
50462 | + return -EFAULT; | |
50463 | + } | |
50464 | + | |
50465 | + pak->last_read += to_copy; | |
50466 | + offset += to_copy; | |
50467 | + room_left -= to_copy; | |
50468 | + } | |
50469 | + | |
50470 | + /* | |
50471 | + * If the packet has a data buffer appended, read from it... | |
50472 | + */ | |
50473 | + | |
50474 | + if (room_left > 0) { | |
50475 | + if (pak->data_buffer) { | |
50476 | + u32 to_copy = min_t(u32, pak->data_len - offset, room_left); | |
50477 | + u32 last_read = pak->last_read - 4; | |
50478 | + | |
50479 | + if (copy_to_buffer(&buffer[offset], | |
50480 | + &pak->data_buffer[last_read], | |
50481 | + to_copy, isuserbuffer)) { | |
50482 | + return -EFAULT; | |
50483 | + } | |
50484 | + pak->last_read += to_copy; | |
50485 | + offset += to_copy; | |
50486 | + } else { | |
50487 | + offset = packet_read_shmem(pak, | |
50488 | + tpmif, | |
50489 | + offset, | |
50490 | + buffer, | |
50491 | + isuserbuffer, room_left); | |
50492 | + } | |
50493 | + } | |
50494 | + return offset; | |
50495 | +} | |
50496 | + | |
50497 | +static int packet_read_shmem(struct packet *pak, | |
50498 | + tpmif_t * tpmif, | |
50499 | + u32 offset, char *buffer, int isuserbuffer, | |
50500 | + u32 room_left) | |
50501 | +{ | |
50502 | + u32 last_read = pak->last_read - 4; | |
50503 | + u32 i = (last_read / PAGE_SIZE); | |
50504 | + u32 pg_offset = last_read & (PAGE_SIZE - 1); | |
50505 | + u32 to_copy; | |
50506 | + grant_handle_t handle; | |
50507 | + | |
50508 | + tpmif_tx_request_t *tx; | |
50509 | + | |
50510 | + tx = &tpmif->tx->ring[0].req; | |
50511 | + /* | |
50512 | + * Start copying data at the page with index 'index' | |
50513 | + * and within that page at offset 'offset'. | |
50514 | + * Copy a maximum of 'room_left' bytes. | |
50515 | + */ | |
50516 | + to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left); | |
50517 | + while (to_copy > 0) { | |
50518 | + void *src; | |
50519 | + struct gnttab_map_grant_ref map_op; | |
50520 | + struct gnttab_unmap_grant_ref unmap_op; | |
50521 | + | |
50522 | + tx = &tpmif->tx->ring[i].req; | |
50523 | + | |
50524 | + gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i), | |
50525 | + GNTMAP_host_map, tx->ref, tpmif->domid); | |
50526 | + | |
50527 | + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
50528 | + &map_op, 1))) { | |
50529 | + BUG(); | |
50530 | + } | |
50531 | + | |
50532 | + if (map_op.status) { | |
50533 | + DPRINTK(" Grant table operation failure !\n"); | |
50534 | + return -EFAULT; | |
50535 | + } | |
50536 | + | |
50537 | + handle = map_op.handle; | |
50538 | + | |
50539 | + if (to_copy > tx->size) { | |
50540 | + /* | |
50541 | + * User requests more than what's available | |
50542 | + */ | |
50543 | + to_copy = min_t(u32, tx->size, to_copy); | |
50544 | + } | |
50545 | + | |
50546 | + DPRINTK("Copying from mapped memory at %08lx\n", | |
50547 | + (unsigned long)(idx_to_kaddr(tpmif, i) | | |
50548 | + (tx->addr & ~PAGE_MASK))); | |
50549 | + | |
50550 | + src = (void *)(idx_to_kaddr(tpmif, i) | | |
50551 | + ((tx->addr & ~PAGE_MASK) + pg_offset)); | |
50552 | + if (copy_to_buffer(&buffer[offset], | |
50553 | + src, to_copy, isuserbuffer)) { | |
50554 | + return -EFAULT; | |
50555 | + } | |
50556 | + | |
50557 | + DPRINTK("Data from TPM-FE of domain %d are %d %d %d %d\n", | |
50558 | + tpmif->domid, buffer[offset], buffer[offset + 1], | |
50559 | + buffer[offset + 2], buffer[offset + 3]); | |
50560 | + | |
50561 | + gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i), | |
50562 | + GNTMAP_host_map, handle); | |
50563 | + | |
50564 | + if (unlikely | |
50565 | + (HYPERVISOR_grant_table_op | |
50566 | + (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) { | |
50567 | + BUG(); | |
50568 | + } | |
50569 | + | |
50570 | + offset += to_copy; | |
50571 | + pg_offset = 0; | |
50572 | + last_read += to_copy; | |
50573 | + room_left -= to_copy; | |
50574 | + | |
50575 | + to_copy = min_t(u32, PAGE_SIZE, room_left); | |
50576 | + i++; | |
50577 | + } /* while (to_copy > 0) */ | |
50578 | + /* | |
50579 | + * Adjust the last_read pointer | |
50580 | + */ | |
50581 | + pak->last_read = last_read + 4; | |
50582 | + return offset; | |
50583 | +} | |
50584 | + | |
50585 | +/* ============================================================ | |
50586 | + * The file layer for reading data from this device | |
50587 | + * ============================================================ | |
50588 | + */ | |
50589 | +static int vtpm_op_open(struct inode *inode, struct file *f) | |
50590 | +{ | |
50591 | + int rc = 0; | |
50592 | + unsigned long flags; | |
50593 | + | |
50594 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50595 | + if (dataex.has_opener == 0) { | |
50596 | + dataex.has_opener = 1; | |
50597 | + } else { | |
50598 | + rc = -EPERM; | |
50599 | + } | |
50600 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50601 | + return rc; | |
50602 | +} | |
50603 | + | |
50604 | +static ssize_t vtpm_op_read(struct file *file, | |
50605 | + char __user * data, size_t size, loff_t * offset) | |
50606 | +{ | |
50607 | + int ret_size = -ENODATA; | |
50608 | + struct packet *pak = NULL; | |
50609 | + unsigned long flags; | |
50610 | + | |
50611 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50612 | + if (dataex.aborted) { | |
50613 | + dataex.aborted = 0; | |
50614 | + dataex.copied_so_far = 0; | |
50615 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50616 | + return -EIO; | |
50617 | + } | |
50618 | + | |
50619 | + if (list_empty(&dataex.pending_pak)) { | |
50620 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50621 | + wait_event_interruptible(dataex.wait_queue, | |
50622 | + !list_empty(&dataex.pending_pak)); | |
50623 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50624 | + dataex.copied_so_far = 0; | |
50625 | + } | |
50626 | + | |
50627 | + if (!list_empty(&dataex.pending_pak)) { | |
50628 | + unsigned int left; | |
50629 | + | |
50630 | + pak = list_entry(dataex.pending_pak.next, struct packet, next); | |
50631 | + left = pak->data_len - dataex.copied_so_far; | |
50632 | + list_del(&pak->next); | |
50633 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50634 | + | |
50635 | + DPRINTK("size given by app: %d, available: %d\n", size, left); | |
50636 | + | |
50637 | + ret_size = min_t(size_t, size, left); | |
50638 | + | |
50639 | + ret_size = packet_read(pak, ret_size, data, size, 1); | |
50640 | + | |
50641 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50642 | + | |
50643 | + if (ret_size < 0) { | |
50644 | + del_singleshot_timer_sync(&pak->processing_timer); | |
50645 | + packet_free(pak); | |
50646 | + dataex.copied_so_far = 0; | |
50647 | + } else { | |
50648 | + DPRINTK("Copied %d bytes to user buffer\n", ret_size); | |
50649 | + | |
50650 | + dataex.copied_so_far += ret_size; | |
50651 | + if (dataex.copied_so_far >= pak->data_len + 4) { | |
50652 | + DPRINTK("All data from this packet given to app.\n"); | |
50653 | + /* All data given to app */ | |
50654 | + | |
50655 | + del_singleshot_timer_sync(&pak-> | |
50656 | + processing_timer); | |
50657 | + list_add_tail(&pak->next, &dataex.current_pak); | |
50658 | + /* | |
50659 | + * The more fontends that are handled at the same time, | |
50660 | + * the more time we give the TPM to process the request. | |
50661 | + */ | |
50662 | + mod_timer(&pak->processing_timer, | |
50663 | + jiffies + (num_frontends * 60 * HZ)); | |
50664 | + dataex.copied_so_far = 0; | |
50665 | + } else { | |
50666 | + list_add(&pak->next, &dataex.pending_pak); | |
50667 | + } | |
50668 | + } | |
50669 | + } | |
50670 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50671 | + | |
50672 | + DPRINTK("Returning result from read to app: %d\n", ret_size); | |
50673 | + | |
50674 | + return ret_size; | |
50675 | +} | |
50676 | + | |
50677 | +/* | |
50678 | + * Write operation - only works after a previous read operation! | |
50679 | + */ | |
50680 | +static ssize_t vtpm_op_write(struct file *file, | |
50681 | + const char __user * data, size_t size, | |
50682 | + loff_t * offset) | |
50683 | +{ | |
50684 | + struct packet *pak; | |
50685 | + int rc = 0; | |
50686 | + unsigned int off = 4; | |
50687 | + unsigned long flags; | |
50688 | + struct vtpm_resp_hdr vrh; | |
50689 | + | |
50690 | + /* | |
50691 | + * Minimum required packet size is: | |
50692 | + * 4 bytes for instance number | |
50693 | + * 2 bytes for tag | |
50694 | + * 4 bytes for paramSize | |
50695 | + * 4 bytes for the ordinal | |
50696 | + * sum: 14 bytes | |
50697 | + */ | |
50698 | + if (size < sizeof (vrh)) | |
50699 | + return -EFAULT; | |
50700 | + | |
50701 | + if (copy_from_user(&vrh, data, sizeof (vrh))) | |
50702 | + return -EFAULT; | |
50703 | + | |
50704 | + /* malformed packet? */ | |
50705 | + if ((off + ntohl(vrh.len_no)) != size) | |
50706 | + return -EFAULT; | |
50707 | + | |
50708 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50709 | + pak = packet_find_instance(&dataex.current_pak, | |
50710 | + ntohl(vrh.instance_no)); | |
50711 | + | |
50712 | + if (pak == NULL) { | |
50713 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50714 | + DPRINTK(KERN_ALERT "No associated packet! (inst=%d)\n", | |
50715 | + ntohl(vrh.instance_no)); | |
50716 | + return -EFAULT; | |
50717 | + } | |
50718 | + | |
50719 | + del_singleshot_timer_sync(&pak->processing_timer); | |
50720 | + list_del(&pak->next); | |
50721 | + | |
50722 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50723 | + | |
50724 | + /* | |
50725 | + * The first 'offset' bytes must be the instance number - skip them. | |
50726 | + */ | |
50727 | + size -= off; | |
50728 | + | |
50729 | + rc = packet_write(pak, &data[off], size, 1); | |
50730 | + | |
50731 | + if (rc > 0) { | |
50732 | + /* I neglected the first 4 bytes */ | |
50733 | + rc += off; | |
50734 | + } | |
50735 | + packet_free(pak); | |
50736 | + return rc; | |
50737 | +} | |
50738 | + | |
50739 | +static int vtpm_op_release(struct inode *inode, struct file *file) | |
50740 | +{ | |
50741 | + unsigned long flags; | |
50742 | + | |
50743 | + vtpm_release_packets(NULL, 1); | |
50744 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50745 | + dataex.has_opener = 0; | |
50746 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50747 | + return 0; | |
50748 | +} | |
50749 | + | |
50750 | +static unsigned int vtpm_op_poll(struct file *file, | |
50751 | + struct poll_table_struct *pts) | |
50752 | +{ | |
50753 | + unsigned int flags = POLLOUT | POLLWRNORM; | |
50754 | + | |
50755 | + poll_wait(file, &dataex.wait_queue, pts); | |
50756 | + if (!list_empty(&dataex.pending_pak)) { | |
50757 | + flags |= POLLIN | POLLRDNORM; | |
50758 | + } | |
50759 | + return flags; | |
50760 | +} | |
50761 | + | |
50762 | +static const struct file_operations vtpm_ops = { | |
50763 | + .owner = THIS_MODULE, | |
50764 | + .llseek = no_llseek, | |
50765 | + .open = vtpm_op_open, | |
50766 | + .read = vtpm_op_read, | |
50767 | + .write = vtpm_op_write, | |
50768 | + .release = vtpm_op_release, | |
50769 | + .poll = vtpm_op_poll, | |
50770 | +}; | |
50771 | + | |
50772 | +static struct miscdevice vtpms_miscdevice = { | |
50773 | + .minor = 225, | |
50774 | + .name = "vtpm", | |
50775 | + .fops = &vtpm_ops, | |
50776 | +}; | |
50777 | + | |
50778 | +/*************************************************************** | |
50779 | + Utility functions | |
50780 | +***************************************************************/ | |
50781 | + | |
50782 | +static int tpm_send_fail_message(struct packet *pak, u8 req_tag) | |
50783 | +{ | |
50784 | + int rc; | |
50785 | + static const unsigned char tpm_error_message_fail[] = { | |
50786 | + 0x00, 0x00, | |
50787 | + 0x00, 0x00, 0x00, 0x0a, | |
50788 | + 0x00, 0x00, 0x00, 0x09 /* TPM_FAIL */ | |
50789 | + }; | |
50790 | + unsigned char buffer[sizeof (tpm_error_message_fail)]; | |
50791 | + | |
50792 | + memcpy(buffer, tpm_error_message_fail, | |
50793 | + sizeof (tpm_error_message_fail)); | |
50794 | + /* | |
50795 | + * Insert the right response tag depending on the given tag | |
50796 | + * All response tags are '+3' to the request tag. | |
50797 | + */ | |
50798 | + buffer[1] = req_tag + 3; | |
50799 | + | |
50800 | + /* | |
50801 | + * Write the data to shared memory and notify the front-end | |
50802 | + */ | |
50803 | + rc = packet_write(pak, buffer, sizeof (buffer), 0); | |
50804 | + | |
50805 | + return rc; | |
50806 | +} | |
50807 | + | |
50808 | +static int _vtpm_release_packets(struct list_head *head, | |
50809 | + tpmif_t * tpmif, int send_msgs) | |
50810 | +{ | |
50811 | + int aborted = 0; | |
50812 | + int c = 0; | |
50813 | + struct packet *pak; | |
50814 | + struct list_head *pos, *tmp; | |
50815 | + | |
50816 | + list_for_each_safe(pos, tmp, head) { | |
50817 | + pak = list_entry(pos, struct packet, next); | |
50818 | + c += 1; | |
50819 | + | |
50820 | + if (tpmif == NULL || pak->tpmif == tpmif) { | |
50821 | + int can_send = 0; | |
50822 | + | |
50823 | + del_singleshot_timer_sync(&pak->processing_timer); | |
50824 | + list_del(&pak->next); | |
50825 | + | |
50826 | + if (pak->tpmif && pak->tpmif->status == CONNECTED) { | |
50827 | + can_send = 1; | |
50828 | + } | |
50829 | + | |
50830 | + if (send_msgs && can_send) { | |
50831 | + tpm_send_fail_message(pak, pak->req_tag); | |
50832 | + } | |
50833 | + packet_free(pak); | |
50834 | + if (c == 1) | |
50835 | + aborted = 1; | |
50836 | + } | |
50837 | + } | |
50838 | + return aborted; | |
50839 | +} | |
50840 | + | |
50841 | +int vtpm_release_packets(tpmif_t * tpmif, int send_msgs) | |
50842 | +{ | |
50843 | + unsigned long flags; | |
50844 | + | |
50845 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50846 | + | |
50847 | + dataex.aborted = _vtpm_release_packets(&dataex.pending_pak, | |
50848 | + tpmif, | |
50849 | + send_msgs); | |
50850 | + _vtpm_release_packets(&dataex.current_pak, tpmif, send_msgs); | |
50851 | + | |
50852 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50853 | + return 0; | |
50854 | +} | |
50855 | + | |
50856 | +static int vtpm_queue_packet(struct packet *pak) | |
50857 | +{ | |
50858 | + int rc = 0; | |
50859 | + | |
50860 | + if (dataex.has_opener) { | |
50861 | + unsigned long flags; | |
50862 | + | |
50863 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50864 | + list_add_tail(&pak->next, &dataex.pending_pak); | |
50865 | + /* give the TPM some time to pick up the request */ | |
50866 | + mod_timer(&pak->processing_timer, jiffies + (30 * HZ)); | |
50867 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50868 | + | |
50869 | + wake_up_interruptible(&dataex.wait_queue); | |
50870 | + } else { | |
50871 | + rc = -EFAULT; | |
50872 | + } | |
50873 | + return rc; | |
50874 | +} | |
50875 | + | |
50876 | +static int vtpm_receive(tpmif_t * tpmif, u32 size) | |
50877 | +{ | |
50878 | + int rc = 0; | |
50879 | + unsigned char buffer[10]; | |
50880 | + __be32 *native_size; | |
50881 | + struct packet *pak = packet_alloc(tpmif, size, 0, 0); | |
50882 | + | |
50883 | + if (!pak) | |
50884 | + return -ENOMEM; | |
50885 | + /* | |
50886 | + * Read 10 bytes from the received buffer to test its | |
50887 | + * content for validity. | |
50888 | + */ | |
50889 | + if (sizeof (buffer) != packet_read(pak, | |
50890 | + sizeof (buffer), buffer, | |
50891 | + sizeof (buffer), 0)) { | |
50892 | + goto failexit; | |
50893 | + } | |
50894 | + /* | |
50895 | + * Reset the packet read pointer so we can read all its | |
50896 | + * contents again. | |
50897 | + */ | |
50898 | + packet_reset(pak); | |
50899 | + | |
50900 | + native_size = (__force __be32 *) (&buffer[4 + 2]); | |
50901 | + /* | |
50902 | + * Verify that the size of the packet is correct | |
50903 | + * as indicated and that there's actually someone reading packets. | |
50904 | + * The minimum size of the packet is '10' for tag, size indicator | |
50905 | + * and ordinal. | |
50906 | + */ | |
50907 | + if (size < 10 || | |
50908 | + be32_to_cpu(*native_size) != size || | |
50909 | + 0 == dataex.has_opener || tpmif->status != CONNECTED) { | |
50910 | + rc = -EINVAL; | |
50911 | + goto failexit; | |
50912 | + } else { | |
50913 | + rc = vtpm_queue_packet(pak); | |
50914 | + if (rc < 0) | |
50915 | + goto failexit; | |
50916 | + } | |
50917 | + return 0; | |
50918 | + | |
50919 | + failexit: | |
50920 | + if (pak) { | |
50921 | + tpm_send_fail_message(pak, buffer[4 + 1]); | |
50922 | + packet_free(pak); | |
50923 | + } | |
50924 | + return rc; | |
50925 | +} | |
50926 | + | |
50927 | +/* | |
50928 | + * Timeout function that gets invoked when a packet has not been processed | |
50929 | + * during the timeout period. | |
50930 | + * The packet must be on a list when this function is invoked. This | |
50931 | + * also means that once its taken off a list, the timer must be | |
50932 | + * destroyed as well. | |
50933 | + */ | |
50934 | +static void processing_timeout(unsigned long ptr) | |
50935 | +{ | |
50936 | + struct packet *pak = (struct packet *)ptr; | |
50937 | + unsigned long flags; | |
50938 | + | |
50939 | + write_lock_irqsave(&dataex.pak_lock, flags); | |
50940 | + /* | |
50941 | + * The packet needs to be searched whether it | |
50942 | + * is still on the list. | |
50943 | + */ | |
50944 | + if (pak == packet_find_packet(&dataex.pending_pak, pak) || | |
50945 | + pak == packet_find_packet(&dataex.current_pak, pak)) { | |
50946 | + if ((pak->flags & PACKET_FLAG_DISCARD_RESPONSE) == 0) { | |
50947 | + tpm_send_fail_message(pak, pak->req_tag); | |
50948 | + } | |
50949 | + /* discard future responses */ | |
50950 | + pak->flags |= PACKET_FLAG_DISCARD_RESPONSE; | |
50951 | + } | |
50952 | + | |
50953 | + write_unlock_irqrestore(&dataex.pak_lock, flags); | |
50954 | +} | |
50955 | + | |
50956 | +static void tpm_tx_action(unsigned long unused); | |
50957 | +static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0); | |
50958 | + | |
50959 | +static struct list_head tpm_schedule_list; | |
50960 | +static spinlock_t tpm_schedule_list_lock; | |
50961 | + | |
50962 | +static inline void maybe_schedule_tx_action(void) | |
50963 | +{ | |
50964 | + smp_mb(); | |
50965 | + tasklet_schedule(&tpm_tx_tasklet); | |
50966 | +} | |
50967 | + | |
50968 | +static inline int __on_tpm_schedule_list(tpmif_t * tpmif) | |
50969 | +{ | |
50970 | + return tpmif->list.next != NULL; | |
50971 | +} | |
50972 | + | |
50973 | +static void remove_from_tpm_schedule_list(tpmif_t * tpmif) | |
50974 | +{ | |
50975 | + spin_lock_irq(&tpm_schedule_list_lock); | |
50976 | + if (likely(__on_tpm_schedule_list(tpmif))) { | |
50977 | + list_del(&tpmif->list); | |
50978 | + tpmif->list.next = NULL; | |
50979 | + tpmif_put(tpmif); | |
50980 | + } | |
50981 | + spin_unlock_irq(&tpm_schedule_list_lock); | |
50982 | +} | |
50983 | + | |
50984 | +static void add_to_tpm_schedule_list_tail(tpmif_t * tpmif) | |
50985 | +{ | |
50986 | + if (__on_tpm_schedule_list(tpmif)) | |
50987 | + return; | |
50988 | + | |
50989 | + spin_lock_irq(&tpm_schedule_list_lock); | |
50990 | + if (!__on_tpm_schedule_list(tpmif) && tpmif->active) { | |
50991 | + list_add_tail(&tpmif->list, &tpm_schedule_list); | |
50992 | + tpmif_get(tpmif); | |
50993 | + } | |
50994 | + spin_unlock_irq(&tpm_schedule_list_lock); | |
50995 | +} | |
50996 | + | |
50997 | +void tpmif_schedule_work(tpmif_t * tpmif) | |
50998 | +{ | |
50999 | + add_to_tpm_schedule_list_tail(tpmif); | |
51000 | + maybe_schedule_tx_action(); | |
51001 | +} | |
51002 | + | |
51003 | +void tpmif_deschedule_work(tpmif_t * tpmif) | |
51004 | +{ | |
51005 | + remove_from_tpm_schedule_list(tpmif); | |
51006 | +} | |
51007 | + | |
51008 | +static void tpm_tx_action(unsigned long unused) | |
51009 | +{ | |
51010 | + struct list_head *ent; | |
51011 | + tpmif_t *tpmif; | |
51012 | + tpmif_tx_request_t *tx; | |
51013 | + | |
51014 | + DPRINTK("%s: Getting data from front-end(s)!\n", __FUNCTION__); | |
51015 | + | |
51016 | + while (!list_empty(&tpm_schedule_list)) { | |
51017 | + /* Get a tpmif from the list with work to do. */ | |
51018 | + ent = tpm_schedule_list.next; | |
51019 | + tpmif = list_entry(ent, tpmif_t, list); | |
51020 | + tpmif_get(tpmif); | |
51021 | + remove_from_tpm_schedule_list(tpmif); | |
51022 | + | |
51023 | + tx = &tpmif->tx->ring[0].req; | |
51024 | + | |
51025 | + /* pass it up */ | |
51026 | + vtpm_receive(tpmif, tx->size); | |
51027 | + | |
51028 | + tpmif_put(tpmif); | |
51029 | + } | |
51030 | +} | |
51031 | + | |
51032 | +irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs) | |
51033 | +{ | |
51034 | + tpmif_t *tpmif = (tpmif_t *) dev_id; | |
51035 | + | |
51036 | + add_to_tpm_schedule_list_tail(tpmif); | |
51037 | + maybe_schedule_tx_action(); | |
51038 | + return IRQ_HANDLED; | |
51039 | +} | |
51040 | + | |
51041 | +static int __init tpmback_init(void) | |
51042 | +{ | |
51043 | + int rc; | |
51044 | + | |
51045 | + if ((rc = misc_register(&vtpms_miscdevice)) != 0) { | |
51046 | + printk(KERN_ALERT | |
51047 | + "Could not register misc device for TPM BE.\n"); | |
51048 | + return rc; | |
51049 | + } | |
51050 | + | |
51051 | + dataex_init(&dataex); | |
51052 | + | |
51053 | + spin_lock_init(&tpm_schedule_list_lock); | |
51054 | + INIT_LIST_HEAD(&tpm_schedule_list); | |
51055 | + | |
51056 | + tpmif_interface_init(); | |
51057 | + tpmif_xenbus_init(); | |
51058 | + | |
51059 | + printk(KERN_ALERT "Successfully initialized TPM backend driver.\n"); | |
51060 | + | |
51061 | + return 0; | |
51062 | +} | |
51063 | + | |
51064 | +module_init(tpmback_init); | |
51065 | + | |
51066 | +void __exit tpmback_exit(void) | |
51067 | +{ | |
51068 | + vtpm_release_packets(NULL, 0); | |
51069 | + tpmif_xenbus_exit(); | |
51070 | + tpmif_interface_exit(); | |
51071 | + misc_deregister(&vtpms_miscdevice); | |
51072 | +} | |
51073 | + | |
51074 | +MODULE_LICENSE("Dual BSD/GPL"); | |
51075 | Index: head-2008-11-25/drivers/xen/tpmback/xenbus.c | |
51076 | =================================================================== | |
51077 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
51078 | +++ head-2008-11-25/drivers/xen/tpmback/xenbus.c 2008-03-06 08:54:32.000000000 +0100 | |
51079 | @@ -0,0 +1,289 @@ | |
51080 | +/* Xenbus code for tpmif backend | |
51081 | + Copyright (C) 2005 IBM Corporation | |
51082 | + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | |
51083 | + | |
51084 | + This program is free software; you can redistribute it and/or modify | |
51085 | + it under the terms of the GNU General Public License as published by | |
51086 | + the Free Software Foundation; either version 2 of the License, or | |
51087 | + (at your option) any later version. | |
51088 | + | |
51089 | + This program is distributed in the hope that it will be useful, | |
51090 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
51091 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
51092 | + GNU General Public License for more details. | |
51093 | + | |
51094 | + You should have received a copy of the GNU General Public License | |
51095 | + along with this program; if not, write to the Free Software | |
51096 | + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
51097 | +*/ | |
51098 | +#include <stdarg.h> | |
51099 | +#include <linux/module.h> | |
51100 | +#include <xen/xenbus.h> | |
51101 | +#include "common.h" | |
51102 | + | |
51103 | +struct backend_info | |
51104 | +{ | |
51105 | + struct xenbus_device *dev; | |
51106 | + | |
51107 | + /* our communications channel */ | |
51108 | + tpmif_t *tpmif; | |
51109 | + | |
51110 | + long int frontend_id; | |
51111 | + long int instance; // instance of TPM | |
51112 | + u8 is_instance_set;// whether instance number has been set | |
51113 | + | |
51114 | + /* watch front end for changes */ | |
51115 | + struct xenbus_watch backend_watch; | |
51116 | +}; | |
51117 | + | |
51118 | +static void maybe_connect(struct backend_info *be); | |
51119 | +static void connect(struct backend_info *be); | |
51120 | +static int connect_ring(struct backend_info *be); | |
51121 | +static void backend_changed(struct xenbus_watch *watch, | |
51122 | + const char **vec, unsigned int len); | |
51123 | +static void frontend_changed(struct xenbus_device *dev, | |
51124 | + enum xenbus_state frontend_state); | |
51125 | + | |
51126 | +long int tpmback_get_instance(struct backend_info *bi) | |
51127 | +{ | |
51128 | + long int res = -1; | |
51129 | + if (bi && bi->is_instance_set) | |
51130 | + res = bi->instance; | |
51131 | + return res; | |
51132 | +} | |
51133 | + | |
51134 | +static int tpmback_remove(struct xenbus_device *dev) | |
51135 | +{ | |
51136 | + struct backend_info *be = dev->dev.driver_data; | |
51137 | + | |
51138 | + if (!be) return 0; | |
51139 | + | |
51140 | + if (be->backend_watch.node) { | |
51141 | + unregister_xenbus_watch(&be->backend_watch); | |
51142 | + kfree(be->backend_watch.node); | |
51143 | + be->backend_watch.node = NULL; | |
51144 | + } | |
51145 | + if (be->tpmif) { | |
51146 | + be->tpmif->bi = NULL; | |
51147 | + vtpm_release_packets(be->tpmif, 0); | |
51148 | + tpmif_put(be->tpmif); | |
51149 | + be->tpmif = NULL; | |
51150 | + } | |
51151 | + kfree(be); | |
51152 | + dev->dev.driver_data = NULL; | |
51153 | + return 0; | |
51154 | +} | |
51155 | + | |
51156 | +static int tpmback_probe(struct xenbus_device *dev, | |
51157 | + const struct xenbus_device_id *id) | |
51158 | +{ | |
51159 | + int err; | |
51160 | + struct backend_info *be = kzalloc(sizeof(struct backend_info), | |
51161 | + GFP_KERNEL); | |
51162 | + | |
51163 | + if (!be) { | |
51164 | + xenbus_dev_fatal(dev, -ENOMEM, | |
51165 | + "allocating backend structure"); | |
51166 | + return -ENOMEM; | |
51167 | + } | |
51168 | + | |
51169 | + be->is_instance_set = 0; | |
51170 | + be->dev = dev; | |
51171 | + dev->dev.driver_data = be; | |
51172 | + | |
51173 | + err = xenbus_watch_path2(dev, dev->nodename, | |
51174 | + "instance", &be->backend_watch, | |
51175 | + backend_changed); | |
51176 | + if (err) { | |
51177 | + goto fail; | |
51178 | + } | |
51179 | + | |
51180 | + err = xenbus_switch_state(dev, XenbusStateInitWait); | |
51181 | + if (err) { | |
51182 | + goto fail; | |
51183 | + } | |
51184 | + return 0; | |
51185 | +fail: | |
51186 | + tpmback_remove(dev); | |
51187 | + return err; | |
51188 | +} | |
51189 | + | |
51190 | + | |
51191 | +static void backend_changed(struct xenbus_watch *watch, | |
51192 | + const char **vec, unsigned int len) | |
51193 | +{ | |
51194 | + int err; | |
51195 | + long instance; | |
51196 | + struct backend_info *be | |
51197 | + = container_of(watch, struct backend_info, backend_watch); | |
51198 | + struct xenbus_device *dev = be->dev; | |
51199 | + | |
51200 | + err = xenbus_scanf(XBT_NIL, dev->nodename, | |
51201 | + "instance","%li", &instance); | |
51202 | + if (XENBUS_EXIST_ERR(err)) { | |
51203 | + return; | |
51204 | + } | |
51205 | + | |
51206 | + if (err != 1) { | |
51207 | + xenbus_dev_fatal(dev, err, "reading instance"); | |
51208 | + return; | |
51209 | + } | |
51210 | + | |
51211 | + if (be->is_instance_set == 0) { | |
51212 | + be->instance = instance; | |
51213 | + be->is_instance_set = 1; | |
51214 | + } | |
51215 | +} | |
51216 | + | |
51217 | + | |
51218 | +static void frontend_changed(struct xenbus_device *dev, | |
51219 | + enum xenbus_state frontend_state) | |
51220 | +{ | |
51221 | + struct backend_info *be = dev->dev.driver_data; | |
51222 | + int err; | |
51223 | + | |
51224 | + switch (frontend_state) { | |
51225 | + case XenbusStateInitialising: | |
51226 | + case XenbusStateInitialised: | |
51227 | + break; | |
51228 | + | |
51229 | + case XenbusStateConnected: | |
51230 | + err = connect_ring(be); | |
51231 | + if (err) { | |
51232 | + return; | |
51233 | + } | |
51234 | + maybe_connect(be); | |
51235 | + break; | |
51236 | + | |
51237 | + case XenbusStateClosing: | |
51238 | + be->instance = -1; | |
51239 | + xenbus_switch_state(dev, XenbusStateClosing); | |
51240 | + break; | |
51241 | + | |
51242 | + case XenbusStateUnknown: /* keep it here */ | |
51243 | + case XenbusStateClosed: | |
51244 | + xenbus_switch_state(dev, XenbusStateClosed); | |
51245 | + device_unregister(&be->dev->dev); | |
51246 | + tpmback_remove(dev); | |
51247 | + break; | |
51248 | + | |
51249 | + default: | |
51250 | + xenbus_dev_fatal(dev, -EINVAL, | |
51251 | + "saw state %d at frontend", | |
51252 | + frontend_state); | |
51253 | + break; | |
51254 | + } | |
51255 | +} | |
51256 | + | |
51257 | + | |
51258 | + | |
51259 | +static void maybe_connect(struct backend_info *be) | |
51260 | +{ | |
51261 | + if (be->tpmif == NULL || be->tpmif->status == CONNECTED) | |
51262 | + return; | |
51263 | + | |
51264 | + connect(be); | |
51265 | +} | |
51266 | + | |
51267 | + | |
51268 | +static void connect(struct backend_info *be) | |
51269 | +{ | |
51270 | + struct xenbus_transaction xbt; | |
51271 | + int err; | |
51272 | + struct xenbus_device *dev = be->dev; | |
51273 | + unsigned long ready = 1; | |
51274 | + | |
51275 | +again: | |
51276 | + err = xenbus_transaction_start(&xbt); | |
51277 | + if (err) { | |
51278 | + xenbus_dev_fatal(be->dev, err, "starting transaction"); | |
51279 | + return; | |
51280 | + } | |
51281 | + | |
51282 | + err = xenbus_printf(xbt, be->dev->nodename, | |
51283 | + "ready", "%lu", ready); | |
51284 | + if (err) { | |
51285 | + xenbus_dev_fatal(be->dev, err, "writing 'ready'"); | |
51286 | + goto abort; | |
51287 | + } | |
51288 | + | |
51289 | + err = xenbus_transaction_end(xbt, 0); | |
51290 | + if (err == -EAGAIN) | |
51291 | + goto again; | |
51292 | + if (err) | |
51293 | + xenbus_dev_fatal(be->dev, err, "end of transaction"); | |
51294 | + | |
51295 | + err = xenbus_switch_state(dev, XenbusStateConnected); | |
51296 | + if (!err) | |
51297 | + be->tpmif->status = CONNECTED; | |
51298 | + return; | |
51299 | +abort: | |
51300 | + xenbus_transaction_end(xbt, 1); | |
51301 | +} | |
51302 | + | |
51303 | + | |
51304 | +static int connect_ring(struct backend_info *be) | |
51305 | +{ | |
51306 | + struct xenbus_device *dev = be->dev; | |
51307 | + unsigned long ring_ref; | |
51308 | + unsigned int evtchn; | |
51309 | + int err; | |
51310 | + | |
51311 | + err = xenbus_gather(XBT_NIL, dev->otherend, | |
51312 | + "ring-ref", "%lu", &ring_ref, | |
51313 | + "event-channel", "%u", &evtchn, NULL); | |
51314 | + if (err) { | |
51315 | + xenbus_dev_error(dev, err, | |
51316 | + "reading %s/ring-ref and event-channel", | |
51317 | + dev->otherend); | |
51318 | + return err; | |
51319 | + } | |
51320 | + | |
51321 | + if (!be->tpmif) { | |
51322 | + be->tpmif = tpmif_find(dev->otherend_id, be); | |
51323 | + if (IS_ERR(be->tpmif)) { | |
51324 | + err = PTR_ERR(be->tpmif); | |
51325 | + be->tpmif = NULL; | |
51326 | + xenbus_dev_fatal(dev,err,"creating vtpm interface"); | |
51327 | + return err; | |
51328 | + } | |
51329 | + } | |
51330 | + | |
51331 | + if (be->tpmif != NULL) { | |
51332 | + err = tpmif_map(be->tpmif, ring_ref, evtchn); | |
51333 | + if (err) { | |
51334 | + xenbus_dev_error(dev, err, | |
51335 | + "mapping shared-frame %lu port %u", | |
51336 | + ring_ref, evtchn); | |
51337 | + return err; | |
51338 | + } | |
51339 | + } | |
51340 | + return 0; | |
51341 | +} | |
51342 | + | |
51343 | + | |
51344 | +static const struct xenbus_device_id tpmback_ids[] = { | |
51345 | + { "vtpm" }, | |
51346 | + { "" } | |
51347 | +}; | |
51348 | + | |
51349 | + | |
51350 | +static struct xenbus_driver tpmback = { | |
51351 | + .name = "vtpm", | |
51352 | + .owner = THIS_MODULE, | |
51353 | + .ids = tpmback_ids, | |
51354 | + .probe = tpmback_probe, | |
51355 | + .remove = tpmback_remove, | |
51356 | + .otherend_changed = frontend_changed, | |
51357 | +}; | |
51358 | + | |
51359 | + | |
51360 | +void tpmif_xenbus_init(void) | |
51361 | +{ | |
51362 | + xenbus_register_backend(&tpmback); | |
51363 | +} | |
51364 | + | |
51365 | +void tpmif_xenbus_exit(void) | |
51366 | +{ | |
51367 | + xenbus_unregister_driver(&tpmback); | |
51368 | +} | |
51369 | Index: head-2008-11-25/drivers/xen/util.c | |
51370 | =================================================================== | |
51371 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
51372 | +++ head-2008-11-25/drivers/xen/util.c 2007-07-10 09:42:30.000000000 +0200 | |
51373 | @@ -0,0 +1,65 @@ | |
51374 | +#include <linux/mm.h> | |
51375 | +#include <linux/module.h> | |
51376 | +#include <linux/slab.h> | |
51377 | +#include <linux/vmalloc.h> | |
51378 | +#include <asm/uaccess.h> | |
51379 | +#include <xen/driver_util.h> | |
51380 | + | |
51381 | +struct class *get_xen_class(void) | |
51382 | +{ | |
51383 | + static struct class *xen_class; | |
51384 | + | |
51385 | + if (xen_class) | |
51386 | + return xen_class; | |
51387 | + | |
51388 | + xen_class = class_create(THIS_MODULE, "xen"); | |
51389 | + if (IS_ERR(xen_class)) { | |
51390 | + printk("Failed to create xen sysfs class.\n"); | |
51391 | + xen_class = NULL; | |
51392 | + } | |
51393 | + | |
51394 | + return xen_class; | |
51395 | +} | |
51396 | +EXPORT_SYMBOL_GPL(get_xen_class); | |
51397 | + | |
51398 | +#ifdef CONFIG_X86 | |
51399 | +static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | |
51400 | +{ | |
51401 | + /* apply_to_page_range() does all the hard work. */ | |
51402 | + return 0; | |
51403 | +} | |
51404 | + | |
51405 | +struct vm_struct *alloc_vm_area(unsigned long size) | |
51406 | +{ | |
51407 | + struct vm_struct *area; | |
51408 | + | |
51409 | + area = get_vm_area(size, VM_IOREMAP); | |
51410 | + if (area == NULL) | |
51411 | + return NULL; | |
51412 | + | |
51413 | + /* | |
51414 | + * This ensures that page tables are constructed for this region | |
51415 | + * of kernel virtual address space and mapped into init_mm. | |
51416 | + */ | |
51417 | + if (apply_to_page_range(&init_mm, (unsigned long)area->addr, | |
51418 | + area->size, f, NULL)) { | |
51419 | + free_vm_area(area); | |
51420 | + return NULL; | |
51421 | + } | |
51422 | + | |
51423 | + /* Map page directories into every address space. */ | |
51424 | + vmalloc_sync_all(); | |
51425 | + | |
51426 | + return area; | |
51427 | +} | |
51428 | +EXPORT_SYMBOL_GPL(alloc_vm_area); | |
51429 | + | |
51430 | +void free_vm_area(struct vm_struct *area) | |
51431 | +{ | |
51432 | + struct vm_struct *ret; | |
51433 | + ret = remove_vm_area(area->addr); | |
51434 | + BUG_ON(ret != area); | |
51435 | + kfree(area); | |
51436 | +} | |
51437 | +EXPORT_SYMBOL_GPL(free_vm_area); | |
51438 | +#endif /* CONFIG_X86 */ | |
51439 | Index: head-2008-11-25/drivers/xen/xenbus/xenbus_backend_client.c | |
51440 | =================================================================== | |
51441 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
51442 | +++ head-2008-11-25/drivers/xen/xenbus/xenbus_backend_client.c 2007-06-12 13:13:45.000000000 +0200 | |
51443 | @@ -0,0 +1,147 @@ | |
51444 | +/****************************************************************************** | |
51445 | + * Backend-client-facing interface for the Xenbus driver. In other words, the | |
51446 | + * interface between the Xenbus and the device-specific code in the backend | |
51447 | + * driver. | |
51448 | + * | |
51449 | + * Copyright (C) 2005-2006 XenSource Ltd | |
51450 | + * | |
51451 | + * This program is free software; you can redistribute it and/or | |
51452 | + * modify it under the terms of the GNU General Public License version 2 | |
51453 | + * as published by the Free Software Foundation; or, when distributed | |
51454 | + * separately from the Linux kernel or incorporated into other | |
51455 | + * software packages, subject to the following license: | |
51456 | + * | |
51457 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
51458 | + * of this source file (the "Software"), to deal in the Software without | |
51459 | + * restriction, including without limitation the rights to use, copy, modify, | |
51460 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
51461 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
51462 | + * the following conditions: | |
51463 | + * | |
51464 | + * The above copyright notice and this permission notice shall be included in | |
51465 | + * all copies or substantial portions of the Software. | |
51466 | + * | |
51467 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
51468 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
51469 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
51470 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
51471 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
51472 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
51473 | + * IN THE SOFTWARE. | |
51474 | + */ | |
51475 | + | |
51476 | +#include <linux/err.h> | |
51477 | +#include <xen/gnttab.h> | |
51478 | +#include <xen/xenbus.h> | |
51479 | +#include <xen/driver_util.h> | |
51480 | + | |
51481 | +/* Based on Rusty Russell's skeleton driver's map_page */ | |
51482 | +struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref) | |
51483 | +{ | |
51484 | + struct gnttab_map_grant_ref op; | |
51485 | + struct vm_struct *area; | |
51486 | + | |
51487 | + area = alloc_vm_area(PAGE_SIZE); | |
51488 | + if (!area) | |
51489 | + return ERR_PTR(-ENOMEM); | |
51490 | + | |
51491 | + gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map, | |
51492 | + gnt_ref, dev->otherend_id); | |
51493 | + | |
51494 | + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | |
51495 | + BUG(); | |
51496 | + | |
51497 | + if (op.status != GNTST_okay) { | |
51498 | + free_vm_area(area); | |
51499 | + xenbus_dev_fatal(dev, op.status, | |
51500 | + "mapping in shared page %d from domain %d", | |
51501 | + gnt_ref, dev->otherend_id); | |
51502 | + BUG_ON(!IS_ERR(ERR_PTR(op.status))); | |
51503 | + return ERR_PTR(op.status); | |
51504 | + } | |
51505 | + | |
51506 | + /* Stuff the handle in an unused field */ | |
51507 | + area->phys_addr = (unsigned long)op.handle; | |
51508 | + | |
51509 | + return area; | |
51510 | +} | |
51511 | +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); | |
51512 | + | |
51513 | + | |
51514 | +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, | |
51515 | + grant_handle_t *handle, void *vaddr) | |
51516 | +{ | |
51517 | + struct gnttab_map_grant_ref op; | |
51518 | + | |
51519 | + gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, | |
51520 | + gnt_ref, dev->otherend_id); | |
51521 | + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | |
51522 | + BUG(); | |
51523 | + | |
51524 | + if (op.status != GNTST_okay) { | |
51525 | + xenbus_dev_fatal(dev, op.status, | |
51526 | + "mapping in shared page %d from domain %d", | |
51527 | + gnt_ref, dev->otherend_id); | |
51528 | + } else | |
51529 | + *handle = op.handle; | |
51530 | + | |
51531 | + return op.status; | |
51532 | +} | |
51533 | +EXPORT_SYMBOL_GPL(xenbus_map_ring); | |
51534 | + | |
51535 | + | |
51536 | +/* Based on Rusty Russell's skeleton driver's unmap_page */ | |
51537 | +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *area) | |
51538 | +{ | |
51539 | + struct gnttab_unmap_grant_ref op; | |
51540 | + | |
51541 | + gnttab_set_unmap_op(&op, (unsigned long)area->addr, GNTMAP_host_map, | |
51542 | + (grant_handle_t)area->phys_addr); | |
51543 | + | |
51544 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | |
51545 | + BUG(); | |
51546 | + | |
51547 | + if (op.status == GNTST_okay) | |
51548 | + free_vm_area(area); | |
51549 | + else | |
51550 | + xenbus_dev_error(dev, op.status, | |
51551 | + "unmapping page at handle %d error %d", | |
51552 | + (int16_t)area->phys_addr, op.status); | |
51553 | + | |
51554 | + return op.status; | |
51555 | +} | |
51556 | +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); | |
51557 | + | |
51558 | + | |
51559 | +int xenbus_unmap_ring(struct xenbus_device *dev, | |
51560 | + grant_handle_t handle, void *vaddr) | |
51561 | +{ | |
51562 | + struct gnttab_unmap_grant_ref op; | |
51563 | + | |
51564 | + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, | |
51565 | + handle); | |
51566 | + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | |
51567 | + BUG(); | |
51568 | + | |
51569 | + if (op.status != GNTST_okay) | |
51570 | + xenbus_dev_error(dev, op.status, | |
51571 | + "unmapping page at handle %d error %d", | |
51572 | + handle, op.status); | |
51573 | + | |
51574 | + return op.status; | |
51575 | +} | |
51576 | +EXPORT_SYMBOL_GPL(xenbus_unmap_ring); | |
51577 | + | |
51578 | +int xenbus_dev_is_online(struct xenbus_device *dev) | |
51579 | +{ | |
51580 | + int rc, val; | |
51581 | + | |
51582 | + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); | |
51583 | + if (rc != 1) | |
51584 | + val = 0; /* no online node present */ | |
51585 | + | |
51586 | + return val; | |
51587 | +} | |
51588 | +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); | |
51589 | + | |
51590 | +MODULE_LICENSE("Dual BSD/GPL"); | |
51591 | Index: head-2008-11-25/drivers/xen/xenbus/xenbus_dev.c | |
51592 | =================================================================== | |
51593 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
51594 | +++ head-2008-11-25/drivers/xen/xenbus/xenbus_dev.c 2008-07-21 11:00:33.000000000 +0200 | |
51595 | @@ -0,0 +1,408 @@ | |
51596 | +/* | |
51597 | + * xenbus_dev.c | |
51598 | + * | |
51599 | + * Driver giving user-space access to the kernel's xenbus connection | |
51600 | + * to xenstore. | |
51601 | + * | |
51602 | + * Copyright (c) 2005, Christian Limpach | |
51603 | + * Copyright (c) 2005, Rusty Russell, IBM Corporation | |
51604 | + * | |
51605 | + * This program is free software; you can redistribute it and/or | |
51606 | + * modify it under the terms of the GNU General Public License version 2 | |
51607 | + * as published by the Free Software Foundation; or, when distributed | |
51608 | + * separately from the Linux kernel or incorporated into other | |
51609 | + * software packages, subject to the following license: | |
51610 | + * | |
51611 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
51612 | + * of this source file (the "Software"), to deal in the Software without | |
51613 | + * restriction, including without limitation the rights to use, copy, modify, | |
51614 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
51615 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
51616 | + * the following conditions: | |
51617 | + * | |
51618 | + * The above copyright notice and this permission notice shall be included in | |
51619 | + * all copies or substantial portions of the Software. | |
51620 | + * | |
51621 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
51622 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
51623 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
51624 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
51625 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
51626 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
51627 | + * IN THE SOFTWARE. | |
51628 | + */ | |
51629 | + | |
51630 | +#include <linux/kernel.h> | |
51631 | +#include <linux/errno.h> | |
51632 | +#include <linux/uio.h> | |
51633 | +#include <linux/notifier.h> | |
51634 | +#include <linux/wait.h> | |
51635 | +#include <linux/fs.h> | |
51636 | +#include <linux/poll.h> | |
51637 | +#include <linux/mutex.h> | |
51638 | + | |
51639 | +#include "xenbus_comms.h" | |
51640 | + | |
51641 | +#include <asm/uaccess.h> | |
51642 | +#include <asm/hypervisor.h> | |
51643 | +#include <xen/xenbus.h> | |
51644 | +#include <xen/xen_proc.h> | |
51645 | +#include <asm/hypervisor.h> | |
51646 | + | |
51647 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
51648 | +#include <xen/platform-compat.h> | |
51649 | +#endif | |
51650 | + | |
51651 | +struct xenbus_dev_transaction { | |
51652 | + struct list_head list; | |
51653 | + struct xenbus_transaction handle; | |
51654 | +}; | |
51655 | + | |
51656 | +struct read_buffer { | |
51657 | + struct list_head list; | |
51658 | + unsigned int cons; | |
51659 | + unsigned int len; | |
51660 | + char msg[]; | |
51661 | +}; | |
51662 | + | |
51663 | +struct xenbus_dev_data { | |
51664 | + /* In-progress transaction. */ | |
51665 | + struct list_head transactions; | |
51666 | + | |
51667 | + /* Active watches. */ | |
51668 | + struct list_head watches; | |
51669 | + | |
51670 | + /* Partial request. */ | |
51671 | + unsigned int len; | |
51672 | + union { | |
51673 | + struct xsd_sockmsg msg; | |
51674 | + char buffer[PAGE_SIZE]; | |
51675 | + } u; | |
51676 | + | |
51677 | + /* Response queue. */ | |
51678 | + struct list_head read_buffers; | |
51679 | + wait_queue_head_t read_waitq; | |
51680 | + | |
51681 | + struct mutex reply_mutex; | |
51682 | +}; | |
51683 | + | |
51684 | +static struct proc_dir_entry *xenbus_dev_intf; | |
51685 | + | |
51686 | +static ssize_t xenbus_dev_read(struct file *filp, | |
51687 | + char __user *ubuf, | |
51688 | + size_t len, loff_t *ppos) | |
51689 | +{ | |
51690 | + struct xenbus_dev_data *u = filp->private_data; | |
51691 | + struct read_buffer *rb; | |
51692 | + int i, ret; | |
51693 | + | |
51694 | + mutex_lock(&u->reply_mutex); | |
51695 | + while (list_empty(&u->read_buffers)) { | |
51696 | + mutex_unlock(&u->reply_mutex); | |
51697 | + ret = wait_event_interruptible(u->read_waitq, | |
51698 | + !list_empty(&u->read_buffers)); | |
51699 | + if (ret) | |
51700 | + return ret; | |
51701 | + mutex_lock(&u->reply_mutex); | |
51702 | + } | |
51703 | + | |
51704 | + rb = list_entry(u->read_buffers.next, struct read_buffer, list); | |
51705 | + for (i = 0; i < len;) { | |
51706 | + put_user(rb->msg[rb->cons], ubuf + i); | |
51707 | + i++; | |
51708 | + rb->cons++; | |
51709 | + if (rb->cons == rb->len) { | |
51710 | + list_del(&rb->list); | |
51711 | + kfree(rb); | |
51712 | + if (list_empty(&u->read_buffers)) | |
51713 | + break; | |
51714 | + rb = list_entry(u->read_buffers.next, | |
51715 | + struct read_buffer, list); | |
51716 | + } | |
51717 | + } | |
51718 | + mutex_unlock(&u->reply_mutex); | |
51719 | + | |
51720 | + return i; | |
51721 | +} | |
51722 | + | |
51723 | +static void queue_reply(struct xenbus_dev_data *u, | |
51724 | + char *data, unsigned int len) | |
51725 | +{ | |
51726 | + struct read_buffer *rb; | |
51727 | + | |
51728 | + if (len == 0) | |
51729 | + return; | |
51730 | + | |
51731 | + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); | |
51732 | + BUG_ON(rb == NULL); | |
51733 | + | |
51734 | + rb->cons = 0; | |
51735 | + rb->len = len; | |
51736 | + | |
51737 | + memcpy(rb->msg, data, len); | |
51738 | + | |
51739 | + list_add_tail(&rb->list, &u->read_buffers); | |
51740 | + | |
51741 | + wake_up(&u->read_waitq); | |
51742 | +} | |
51743 | + | |
51744 | +struct watch_adapter | |
51745 | +{ | |
51746 | + struct list_head list; | |
51747 | + struct xenbus_watch watch; | |
51748 | + struct xenbus_dev_data *dev_data; | |
51749 | + char *token; | |
51750 | +}; | |
51751 | + | |
51752 | +static void free_watch_adapter (struct watch_adapter *watch) | |
51753 | +{ | |
51754 | + kfree(watch->watch.node); | |
51755 | + kfree(watch->token); | |
51756 | + kfree(watch); | |
51757 | +} | |
51758 | + | |
51759 | +static void watch_fired(struct xenbus_watch *watch, | |
51760 | + const char **vec, | |
51761 | + unsigned int len) | |
51762 | +{ | |
51763 | + struct watch_adapter *adap = | |
51764 | + container_of(watch, struct watch_adapter, watch); | |
51765 | + struct xsd_sockmsg hdr; | |
51766 | + const char *path, *token; | |
51767 | + int path_len, tok_len, body_len, data_len = 0; | |
51768 | + | |
51769 | + path = vec[XS_WATCH_PATH]; | |
51770 | + token = adap->token; | |
51771 | + | |
51772 | + path_len = strlen(path) + 1; | |
51773 | + tok_len = strlen(token) + 1; | |
51774 | + if (len > 2) | |
51775 | + data_len = vec[len] - vec[2] + 1; | |
51776 | + body_len = path_len + tok_len + data_len; | |
51777 | + | |
51778 | + hdr.type = XS_WATCH_EVENT; | |
51779 | + hdr.len = body_len; | |
51780 | + | |
51781 | + mutex_lock(&adap->dev_data->reply_mutex); | |
51782 | + queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr)); | |
51783 | + queue_reply(adap->dev_data, (char *)path, path_len); | |
51784 | + queue_reply(adap->dev_data, (char *)token, tok_len); | |
51785 | + if (len > 2) | |
51786 | + queue_reply(adap->dev_data, (char *)vec[2], data_len); | |
51787 | + mutex_unlock(&adap->dev_data->reply_mutex); | |
51788 | +} | |
51789 | + | |
51790 | +static LIST_HEAD(watch_list); | |
51791 | + | |
51792 | +static ssize_t xenbus_dev_write(struct file *filp, | |
51793 | + const char __user *ubuf, | |
51794 | + size_t len, loff_t *ppos) | |
51795 | +{ | |
51796 | + struct xenbus_dev_data *u = filp->private_data; | |
51797 | + struct xenbus_dev_transaction *trans = NULL; | |
51798 | + uint32_t msg_type; | |
51799 | + void *reply; | |
51800 | + char *path, *token; | |
51801 | + struct watch_adapter *watch, *tmp_watch; | |
51802 | + int err, rc = len; | |
51803 | + | |
51804 | + if ((len + u->len) > sizeof(u->u.buffer)) { | |
51805 | + rc = -EINVAL; | |
51806 | + goto out; | |
51807 | + } | |
51808 | + | |
51809 | + if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) { | |
51810 | + rc = -EFAULT; | |
51811 | + goto out; | |
51812 | + } | |
51813 | + | |
51814 | + u->len += len; | |
51815 | + if ((u->len < sizeof(u->u.msg)) || | |
51816 | + (u->len < (sizeof(u->u.msg) + u->u.msg.len))) | |
51817 | + return rc; | |
51818 | + | |
51819 | + msg_type = u->u.msg.type; | |
51820 | + | |
51821 | + switch (msg_type) { | |
51822 | + case XS_TRANSACTION_START: | |
51823 | + case XS_TRANSACTION_END: | |
51824 | + case XS_DIRECTORY: | |
51825 | + case XS_READ: | |
51826 | + case XS_GET_PERMS: | |
51827 | + case XS_RELEASE: | |
51828 | + case XS_GET_DOMAIN_PATH: | |
51829 | + case XS_WRITE: | |
51830 | + case XS_MKDIR: | |
51831 | + case XS_RM: | |
51832 | + case XS_SET_PERMS: | |
51833 | + if (msg_type == XS_TRANSACTION_START) { | |
51834 | + trans = kmalloc(sizeof(*trans), GFP_KERNEL); | |
51835 | + if (!trans) { | |
51836 | + rc = -ENOMEM; | |
51837 | + goto out; | |
51838 | + } | |
51839 | + } | |
51840 | + | |
51841 | + reply = xenbus_dev_request_and_reply(&u->u.msg); | |
51842 | + if (IS_ERR(reply)) { | |
51843 | + kfree(trans); | |
51844 | + rc = PTR_ERR(reply); | |
51845 | + goto out; | |
51846 | + } | |
51847 | + | |
51848 | + if (msg_type == XS_TRANSACTION_START) { | |
51849 | + trans->handle.id = simple_strtoul(reply, NULL, 0); | |
51850 | + list_add(&trans->list, &u->transactions); | |
51851 | + } else if (msg_type == XS_TRANSACTION_END) { | |
51852 | + list_for_each_entry(trans, &u->transactions, list) | |
51853 | + if (trans->handle.id == u->u.msg.tx_id) | |
51854 | + break; | |
51855 | + BUG_ON(&trans->list == &u->transactions); | |
51856 | + list_del(&trans->list); | |
51857 | + kfree(trans); | |
51858 | + } | |
51859 | + mutex_lock(&u->reply_mutex); | |
51860 | + queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); | |
51861 | + queue_reply(u, (char *)reply, u->u.msg.len); | |
51862 | + mutex_unlock(&u->reply_mutex); | |
51863 | + kfree(reply); | |
51864 | + break; | |
51865 | + | |
51866 | + case XS_WATCH: | |
51867 | + case XS_UNWATCH: { | |
51868 | + static const char *XS_RESP = "OK"; | |
51869 | + struct xsd_sockmsg hdr; | |
51870 | + | |
51871 | + path = u->u.buffer + sizeof(u->u.msg); | |
51872 | + token = memchr(path, 0, u->u.msg.len); | |
51873 | + if (token == NULL) { | |
51874 | + rc = -EILSEQ; | |
51875 | + goto out; | |
51876 | + } | |
51877 | + token++; | |
51878 | + | |
51879 | + if (msg_type == XS_WATCH) { | |
51880 | + watch = kzalloc(sizeof(*watch), GFP_KERNEL); | |
51881 | + watch->watch.node = kmalloc(strlen(path)+1, | |
51882 | + GFP_KERNEL); | |
51883 | + strcpy((char *)watch->watch.node, path); | |
51884 | + watch->watch.callback = watch_fired; | |
51885 | + watch->token = kmalloc(strlen(token)+1, GFP_KERNEL); | |
51886 | + strcpy(watch->token, token); | |
51887 | + watch->dev_data = u; | |
51888 | + | |
51889 | + err = register_xenbus_watch(&watch->watch); | |
51890 | + if (err) { | |
51891 | + free_watch_adapter(watch); | |
51892 | + rc = err; | |
51893 | + goto out; | |
51894 | + } | |
51895 | + | |
51896 | + list_add(&watch->list, &u->watches); | |
51897 | + } else { | |
51898 | + list_for_each_entry_safe(watch, tmp_watch, | |
51899 | + &u->watches, list) { | |
51900 | + if (!strcmp(watch->token, token) && | |
51901 | + !strcmp(watch->watch.node, path)) | |
51902 | + { | |
51903 | + unregister_xenbus_watch(&watch->watch); | |
51904 | + list_del(&watch->list); | |
51905 | + free_watch_adapter(watch); | |
51906 | + break; | |
51907 | + } | |
51908 | + } | |
51909 | + } | |
51910 | + | |
51911 | + hdr.type = msg_type; | |
51912 | + hdr.len = strlen(XS_RESP) + 1; | |
51913 | + mutex_lock(&u->reply_mutex); | |
51914 | + queue_reply(u, (char *)&hdr, sizeof(hdr)); | |
51915 | + queue_reply(u, (char *)XS_RESP, hdr.len); | |
51916 | + mutex_unlock(&u->reply_mutex); | |
51917 | + break; | |
51918 | + } | |
51919 | + | |
51920 | + default: | |
51921 | + rc = -EINVAL; | |
51922 | + break; | |
51923 | + } | |
51924 | + | |
51925 | + out: | |
51926 | + u->len = 0; | |
51927 | + return rc; | |
51928 | +} | |
51929 | + | |
51930 | +static int xenbus_dev_open(struct inode *inode, struct file *filp) | |
51931 | +{ | |
51932 | + struct xenbus_dev_data *u; | |
51933 | + | |
51934 | + if (xen_store_evtchn == 0) | |
51935 | + return -ENOENT; | |
51936 | + | |
51937 | + nonseekable_open(inode, filp); | |
51938 | + | |
51939 | + u = kzalloc(sizeof(*u), GFP_KERNEL); | |
51940 | + if (u == NULL) | |
51941 | + return -ENOMEM; | |
51942 | + | |
51943 | + INIT_LIST_HEAD(&u->transactions); | |
51944 | + INIT_LIST_HEAD(&u->watches); | |
51945 | + INIT_LIST_HEAD(&u->read_buffers); | |
51946 | + init_waitqueue_head(&u->read_waitq); | |
51947 | + | |
51948 | + mutex_init(&u->reply_mutex); | |
51949 | + | |
51950 | + filp->private_data = u; | |
51951 | + | |
51952 | + return 0; | |
51953 | +} | |
51954 | + | |
51955 | +static int xenbus_dev_release(struct inode *inode, struct file *filp) | |
51956 | +{ | |
51957 | + struct xenbus_dev_data *u = filp->private_data; | |
51958 | + struct xenbus_dev_transaction *trans, *tmp; | |
51959 | + struct watch_adapter *watch, *tmp_watch; | |
51960 | + | |
51961 | + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { | |
51962 | + xenbus_transaction_end(trans->handle, 1); | |
51963 | + list_del(&trans->list); | |
51964 | + kfree(trans); | |
51965 | + } | |
51966 | + | |
51967 | + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | |
51968 | + unregister_xenbus_watch(&watch->watch); | |
51969 | + list_del(&watch->list); | |
51970 | + free_watch_adapter(watch); | |
51971 | + } | |
51972 | + | |
51973 | + kfree(u); | |
51974 | + | |
51975 | + return 0; | |
51976 | +} | |
51977 | + | |
51978 | +static unsigned int xenbus_dev_poll(struct file *file, poll_table *wait) | |
51979 | +{ | |
51980 | + struct xenbus_dev_data *u = file->private_data; | |
51981 | + | |
51982 | + poll_wait(file, &u->read_waitq, wait); | |
51983 | + if (!list_empty(&u->read_buffers)) | |
51984 | + return POLLIN | POLLRDNORM; | |
51985 | + return 0; | |
51986 | +} | |
51987 | + | |
51988 | +static const struct file_operations xenbus_dev_file_ops = { | |
51989 | + .read = xenbus_dev_read, | |
51990 | + .write = xenbus_dev_write, | |
51991 | + .open = xenbus_dev_open, | |
51992 | + .release = xenbus_dev_release, | |
51993 | + .poll = xenbus_dev_poll, | |
51994 | +}; | |
51995 | + | |
51996 | +int xenbus_dev_init(void) | |
51997 | +{ | |
51998 | + xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400); | |
51999 | + if (xenbus_dev_intf) | |
52000 | + xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops; | |
52001 | + | |
52002 | + return 0; | |
52003 | +} | |
52004 | Index: head-2008-11-25/drivers/xen/xenbus/xenbus_probe_backend.c | |
52005 | =================================================================== | |
52006 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
52007 | +++ head-2008-11-25/drivers/xen/xenbus/xenbus_probe_backend.c 2008-01-21 11:15:26.000000000 +0100 | |
52008 | @@ -0,0 +1,292 @@ | |
52009 | +/****************************************************************************** | |
52010 | + * Talks to Xen Store to figure out what devices we have (backend half). | |
52011 | + * | |
52012 | + * Copyright (C) 2005 Rusty Russell, IBM Corporation | |
52013 | + * Copyright (C) 2005 Mike Wray, Hewlett-Packard | |
52014 | + * Copyright (C) 2005, 2006 XenSource Ltd | |
52015 | + * Copyright (C) 2007 Solarflare Communications, Inc. | |
52016 | + * | |
52017 | + * This program is free software; you can redistribute it and/or | |
52018 | + * modify it under the terms of the GNU General Public License version 2 | |
52019 | + * as published by the Free Software Foundation; or, when distributed | |
52020 | + * separately from the Linux kernel or incorporated into other | |
52021 | + * software packages, subject to the following license: | |
52022 | + * | |
52023 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
52024 | + * of this source file (the "Software"), to deal in the Software without | |
52025 | + * restriction, including without limitation the rights to use, copy, modify, | |
52026 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
52027 | + * and to permit persons to whom the Software is furnished to do so, subject to | |
52028 | + * the following conditions: | |
52029 | + * | |
52030 | + * The above copyright notice and this permission notice shall be included in | |
52031 | + * all copies or substantial portions of the Software. | |
52032 | + * | |
52033 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
52034 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
52035 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
52036 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
52037 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
52038 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
52039 | + * IN THE SOFTWARE. | |
52040 | + */ | |
52041 | + | |
52042 | +#define DPRINTK(fmt, args...) \ | |
52043 | + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ | |
52044 | + __FUNCTION__, __LINE__, ##args) | |
52045 | + | |
52046 | +#include <linux/kernel.h> | |
52047 | +#include <linux/err.h> | |
52048 | +#include <linux/string.h> | |
52049 | +#include <linux/ctype.h> | |
52050 | +#include <linux/fcntl.h> | |
52051 | +#include <linux/mm.h> | |
52052 | +#include <linux/notifier.h> | |
52053 | + | |
52054 | +#include <asm/io.h> | |
52055 | +#include <asm/page.h> | |
52056 | +#include <asm/maddr.h> | |
52057 | +#include <asm/pgtable.h> | |
52058 | +#include <asm/hypervisor.h> | |
52059 | +#include <xen/xenbus.h> | |
52060 | +#include <xen/xen_proc.h> | |
52061 | +#include <xen/evtchn.h> | |
52062 | +#include <xen/features.h> | |
52063 | + | |
52064 | +#include "xenbus_comms.h" | |
52065 | +#include "xenbus_probe.h" | |
52066 | + | |
52067 | +#ifdef HAVE_XEN_PLATFORM_COMPAT_H | |
52068 | +#include <xen/platform-compat.h> | |
52069 | +#endif | |
52070 | + | |
52071 | +static int xenbus_uevent_backend(struct device *dev, char **envp, | |
52072 | + int num_envp, char *buffer, int buffer_size); | |
52073 | +static int xenbus_probe_backend(const char *type, const char *domid); | |
52074 | + | |
52075 | +extern int read_otherend_details(struct xenbus_device *xendev, | |
52076 | + char *id_node, char *path_node); | |
52077 | + | |
52078 | +static int read_frontend_details(struct xenbus_device *xendev) | |
52079 | +{ | |
52080 | + return read_otherend_details(xendev, "frontend-id", "frontend"); | |
52081 | +} | |
52082 | + | |
52083 | +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ | |
52084 | +static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) | |
52085 | +{ | |
52086 | + int domid, err; | |
52087 | + const char *devid, *type, *frontend; | |
52088 | + unsigned int typelen; | |
52089 | + | |
52090 | + type = strchr(nodename, '/'); | |
52091 | + if (!type) | |
52092 | + return -EINVAL; | |
52093 | + type++; | |
52094 | + typelen = strcspn(type, "/"); | |
52095 | + if (!typelen || type[typelen] != '/') | |
52096 | + return -EINVAL; | |
52097 | + | |
52098 | + devid = strrchr(nodename, '/') + 1; | |
52099 | + | |
52100 | + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, | |
52101 | + "frontend", NULL, &frontend, | |
52102 | + NULL); | |
52103 | + if (err) | |
52104 | + return err; | |
52105 | + if (strlen(frontend) == 0) | |
52106 | + err = -ERANGE; | |
52107 | + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) | |
52108 | + err = -ENOENT; | |
52109 | + kfree(frontend); | |
52110 | + | |
52111 | + if (err) | |
52112 | + return err; | |
52113 | + | |
52114 | + if (snprintf(bus_id, BUS_ID_SIZE, | |
52115 | + "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE) | |
52116 | + return -ENOSPC; | |
52117 | + return 0; | |
52118 | +} | |
52119 | + | |
52120 | +static struct xen_bus_type xenbus_backend = { | |
52121 | + .root = "backend", | |
52122 | + .levels = 3, /* backend/type/<frontend>/<id> */ | |
52123 | + .get_bus_id = backend_bus_id, | |
52124 | + .probe = xenbus_probe_backend, | |
52125 | + .error = -ENODEV, | |
52126 | + .bus = { | |
52127 | + .name = "xen-backend", | |
52128 | + .match = xenbus_match, | |
52129 | + .probe = xenbus_dev_probe, | |
52130 | + .remove = xenbus_dev_remove, | |
52131 | +// .shutdown = xenbus_dev_shutdown, | |
52132 | + .uevent = xenbus_uevent_backend, | |
52133 | + }, | |
52134 | + .dev = { | |
52135 | + .bus_id = "xen-backend", | |
52136 | + }, | |
52137 | +}; | |
52138 | + | |
52139 | +static int xenbus_uevent_backend(struct device *dev, char **envp, | |
52140 | + int num_envp, char *buffer, int buffer_size) | |
52141 | +{ | |
52142 | + struct xenbus_device *xdev; | |
52143 | + struct xenbus_driver *drv; | |
52144 | + int i = 0; | |
52145 | + int length = 0; | |
52146 | + | |
52147 | + DPRINTK(""); | |
52148 | + | |
52149 | + if (dev == NULL) | |
52150 | + return -ENODEV; | |
52151 | + | |
52152 | + xdev = to_xenbus_device(dev); | |
52153 | + if (xdev == NULL) | |
52154 | + return -ENODEV; | |
52155 | + | |
52156 | + /* stuff we want to pass to /sbin/hotplug */ | |
52157 | + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, | |
52158 | + "XENBUS_TYPE=%s", xdev->devicetype); | |
52159 | + | |
52160 | + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, | |
52161 | + "XENBUS_PATH=%s", xdev->nodename); | |
52162 | + | |
52163 | + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, | |
52164 | + "XENBUS_BASE_PATH=%s", xenbus_backend.root); | |
52165 | + | |
52166 | + /* terminate, set to next free slot, shrink available space */ | |
52167 | + envp[i] = NULL; | |
52168 | + envp = &envp[i]; | |
52169 | + num_envp -= i; | |
52170 | + buffer = &buffer[length]; | |
52171 | + buffer_size -= length; | |
52172 | + | |
52173 | + if (dev->driver) { | |
52174 | + drv = to_xenbus_driver(dev->driver); | |
52175 | + if (drv && drv->uevent) | |
52176 | + return drv->uevent(xdev, envp, num_envp, buffer, | |
52177 | + buffer_size); | |
52178 | + } | |
52179 | + | |
52180 | + return 0; | |
52181 | +} | |
52182 | + | |
52183 | +int xenbus_register_backend(struct xenbus_driver *drv) | |
52184 | +{ | |
52185 | + drv->read_otherend_details = read_frontend_details; | |
52186 | + | |
52187 | + return xenbus_register_driver_common(drv, &xenbus_backend); | |
52188 | +} | |
52189 | +EXPORT_SYMBOL_GPL(xenbus_register_backend); | |
52190 | + | |
52191 | +/* backend/<typename>/<frontend-uuid>/<name> */ | |
52192 | +static int xenbus_probe_backend_unit(const char *dir, | |
52193 | + const char *type, | |
52194 | + const char *name) | |
52195 | +{ | |
52196 | + char *nodename; | |
52197 | + int err; | |
52198 | + | |
52199 | + nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); | |
52200 | + if (!nodename) | |
52201 | + return -ENOMEM; | |
52202 | + | |
52203 | + DPRINTK("%s\n", nodename); | |
52204 | + | |
52205 | + err = xenbus_probe_node(&xenbus_backend, type, nodename); | |
52206 | + kfree(nodename); | |
52207 | + return err; | |
52208 | +} | |
52209 | + | |
52210 | +/* backend/<typename>/<frontend-domid> */ | |
52211 | +static int xenbus_probe_backend(const char *type, const char *domid) | |
52212 | +{ | |
52213 | + char *nodename; | |
52214 | + int err = 0; | |
52215 | + char **dir; | |
52216 | + unsigned int i, dir_n = 0; | |
52217 | + | |
52218 | + DPRINTK(""); | |
52219 | + | |
52220 | + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid); | |
52221 | + if (!nodename) | |
52222 | + return -ENOMEM; | |
52223 | + | |
52224 | + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); | |
52225 | + if (IS_ERR(dir)) { | |
52226 | + kfree(nodename); | |
52227 | + return PTR_ERR(dir); | |
52228 | + } | |
52229 | + | |
52230 | + for (i = 0; i < dir_n; i++) { | |
52231 | + err = xenbus_probe_backend_unit(nodename, type, dir[i]); | |
52232 | + if (err) | |
52233 | + break; | |
52234 | + } | |
52235 | + kfree(dir); | |
52236 | + kfree(nodename); | |
52237 | + return err; | |
52238 | +} | |
52239 | + | |
52240 | +static void backend_changed(struct xenbus_watch *watch, | |
52241 | + const char **vec, unsigned int len) | |
52242 | +{ | |
52243 | + DPRINTK(""); | |
52244 | + | |
52245 | + dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); | |
52246 | +} | |
52247 | + | |
52248 | +static struct xenbus_watch be_watch = { | |
52249 | + .node = "backend", | |
52250 | + .callback = backend_changed, | |
52251 | +}; | |
52252 | + | |
52253 | +void xenbus_backend_suspend(int (*fn)(struct device *, void *)) | |
52254 | +{ | |
52255 | + DPRINTK(""); | |
52256 | + if (!xenbus_backend.error) | |
52257 | + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); | |
52258 | +} | |
52259 | + | |
52260 | +void xenbus_backend_resume(int (*fn)(struct device *, void *)) | |
52261 | +{ | |
52262 | + DPRINTK(""); | |
52263 | + if (!xenbus_backend.error) | |
52264 | + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); | |
52265 | +} | |
52266 | + | |
52267 | +void xenbus_backend_probe_and_watch(void) | |
52268 | +{ | |
52269 | + xenbus_probe_devices(&xenbus_backend); | |
52270 | + register_xenbus_watch(&be_watch); | |
52271 | +} | |
52272 | + | |
52273 | +void xenbus_backend_bus_register(void) | |
52274 | +{ | |
52275 | + xenbus_backend.error = bus_register(&xenbus_backend.bus); | |
52276 | + if (xenbus_backend.error) | |
52277 | + printk(KERN_WARNING | |
52278 | + "XENBUS: Error registering backend bus: %i\n", | |
52279 | + xenbus_backend.error); | |
52280 | +} | |
52281 | + | |
52282 | +void xenbus_backend_device_register(void) | |
52283 | +{ | |
52284 | + if (xenbus_backend.error) | |
52285 | + return; | |
52286 | + | |
52287 | + xenbus_backend.error = device_register(&xenbus_backend.dev); | |
52288 | + if (xenbus_backend.error) { | |
52289 | + bus_unregister(&xenbus_backend.bus); | |
52290 | + printk(KERN_WARNING | |
52291 | + "XENBUS: Error registering backend device: %i\n", | |
52292 | + xenbus_backend.error); | |
52293 | + } | |
52294 | +} | |
52295 | + | |
52296 | +int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *)) | |
52297 | +{ | |
52298 | + return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn); | |
52299 | +} | |
52300 | +EXPORT_SYMBOL_GPL(xenbus_for_each_backend); | |
52301 | Index: head-2008-11-25/drivers/xen/xenoprof/xenoprofile.c | |
52302 | =================================================================== | |
52303 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | |
52304 | +++ head-2008-11-25/drivers/xen/xenoprof/xenoprofile.c 2008-09-15 13:40:15.000000000 +0200 | |
52305 | @@ -0,0 +1,545 @@ | |
52306 | +/** | |
52307 | + * @file xenoprofile.c | |
52308 | + * | |
52309 | + * @remark Copyright 2002 OProfile authors | |
52310 | + * @remark Read the file COPYING | |
52311 | + * | |
52312 | + * @author John Levon <levon@movementarian.org> | |
52313 | + * | |
52314 | + * Modified by Aravind Menon and Jose Renato Santos for Xen | |
52315 | + * These modifications are: | |
52316 | + * Copyright (C) 2005 Hewlett-Packard Co. | |
52317 | + * | |
52318 | + * Separated out arch-generic part | |
52319 | + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> | |
52320 | + * VA Linux Systems Japan K.K. | |
52321 | + */ | |
52322 | + | |
52323 | +#include <linux/init.h> | |
52324 | +#include <linux/notifier.h> | |
52325 | +#include <linux/smp.h> | |
52326 | +#include <linux/oprofile.h> | |
52327 | +#include <linux/sysdev.h> | |
52328 | +#include <linux/slab.h> | |
52329 | +#include <linux/interrupt.h> | |
52330 | +#include <linux/vmalloc.h> | |
52331 | +#include <asm/pgtable.h> | |
52332 | +#include <xen/evtchn.h> | |
52333 | +#include <xen/xenoprof.h> | |
52334 | +#include <xen/driver_util.h> | |
52335 | +#include <xen/interface/xen.h> | |
52336 | +#include <xen/interface/xenoprof.h> | |
52337 | +#include "../../../drivers/oprofile/cpu_buffer.h" | |
52338 | +#include "../../../drivers/oprofile/event_buffer.h" | |
52339 | + | |
52340 | +#define MAX_XENOPROF_SAMPLES 16 | |
52341 | + | |
52342 | +/* sample buffers shared with Xen */ | |
52343 | +static xenoprof_buf_t *xenoprof_buf[MAX_VIRT_CPUS]; | |
52344 | +/* Shared buffer area */ | |
52345 | +static struct xenoprof_shared_buffer shared_buffer; | |
52346 | + | |
52347 | +/* Passive sample buffers shared with Xen */ | |
52348 | +static xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS]; | |
52349 | +/* Passive shared buffer area */ | |
52350 | +static struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS]; | |
52351 | + | |
52352 | +static int xenoprof_start(void); | |
52353 | +static void xenoprof_stop(void); | |
52354 | + | |
52355 | +static int xenoprof_enabled = 0; | |
52356 | +static int xenoprof_is_primary = 0; | |
52357 | +static int active_defined; | |
52358 | + | |
52359 | +extern unsigned long backtrace_depth; | |
52360 | + | |
52361 | +/* Number of buffers in shared area (one per VCPU) */ | |
52362 | +static int nbuf; | |
52363 | +/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */ | |
52364 | +static int ovf_irq[NR_CPUS]; | |
52365 | +/* cpu model type string - copied from Xen on XENOPROF_init command */ | |
52366 | +static char cpu_type[XENOPROF_CPU_TYPE_SIZE]; | |
52367 | + | |
52368 | +#ifdef CONFIG_PM | |
52369 | + | |
52370 | +static int xenoprof_suspend(struct sys_device * dev, pm_message_t state) | |
52371 | +{ | |
52372 | + if (xenoprof_enabled == 1) | |
52373 | + xenoprof_stop(); | |
52374 | + return 0; | |
52375 | +} | |
52376 | + | |
52377 | + | |
52378 | +static int xenoprof_resume(struct sys_device * dev) | |
52379 | +{ | |
52380 | + if (xenoprof_enabled == 1) | |
52381 | + xenoprof_start(); | |
52382 | + return 0; | |
52383 | +} | |
52384 | + | |
52385 | + | |
52386 | +static struct sysdev_class oprofile_sysclass = { | |
52387 | + set_kset_name("oprofile"), | |
52388 | + .resume = xenoprof_resume, | |
52389 | + .suspend = xenoprof_suspend | |
52390 | +}; | |
52391 | + | |
52392 | + | |
52393 | +static struct sys_device device_oprofile = { | |
52394 | + .id = 0, | |
52395 | + .cls = &oprofile_sysclass, | |
52396 | +}; | |
52397 | + | |
52398 | + | |
52399 | +static int __init init_driverfs(void) | |
52400 | +{ | |
52401 | + int error; | |
52402 | + if (!(error = sysdev_class_register(&oprofile_sysclass))) | |
52403 | + error = sysdev_register(&device_oprofile); | |
52404 | + return error; | |
52405 | +} | |
52406 | + | |
52407 | + | |
52408 | +static void exit_driverfs(void) | |
52409 | +{ | |
52410 | + sysdev_unregister(&device_oprofile); | |
52411 | + sysdev_class_unregister(&oprofile_sysclass); | |
52412 | +} | |
52413 | + | |
52414 | +#else | |
52415 | +#define init_driverfs() do { } while (0) | |
52416 | +#define exit_driverfs() do { } while (0) | |
52417 | +#endif /* CONFIG_PM */ | |
52418 | + | |
52419 | +static unsigned long long oprofile_samples; | |
52420 | +static unsigned long long p_oprofile_samples; | |
52421 | + | |
52422 | +static unsigned int pdomains; | |
52423 | +static struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS]; | |
52424 | + | |
52425 | +/* Check whether the given entry is an escape code */ | |
52426 | +static int xenoprof_is_escape(xenoprof_buf_t * buf, int tail) | |
52427 | +{ | |
52428 | + return (buf->event_log[tail].eip == XENOPROF_ESCAPE_CODE); | |
52429 | +} | |
52430 | + | |
52431 | +/* Get the event at the given entry */ | |
52432 | +static uint8_t xenoprof_get_event(xenoprof_buf_t * buf, int tail) | |
52433 | +{ | |
52434 | + return (buf->event_log[tail].event); | |
52435 | +} | |
52436 | + | |
52437 | +static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive) | |
52438 | +{ | |
52439 | + int head, tail, size; | |
52440 | + int tracing = 0; | |
52441 | + | |
52442 | + head = buf->event_head; | |
52443 | + tail = buf->event_tail; | |
52444 | + size = buf->event_size; | |
52445 | + | |
52446 | + while (tail != head) { | |
52447 | + if (xenoprof_is_escape(buf, tail) && | |
52448 | + xenoprof_get_event(buf, tail) == XENOPROF_TRACE_BEGIN) { | |
52449 | + tracing=1; | |
52450 | + oprofile_add_pc(ESCAPE_CODE, buf->event_log[tail].mode, | |
52451 | + CPU_TRACE_BEGIN); | |
52452 | + if (!is_passive) | |
52453 | + oprofile_samples++; | |
52454 | + else | |
52455 | + p_oprofile_samples++; | |
52456 | + | |
52457 | + } else { | |
52458 | + oprofile_add_pc(buf->event_log[tail].eip, | |
52459 | + buf->event_log[tail].mode, | |
52460 | + buf->event_log[tail].event); | |
52461 | + if (!tracing) { | |
52462 | + if (!is_passive) | |
52463 | + oprofile_samples++; | |
52464 | + else | |
52465 | + p_oprofile_samples++; | |
52466 | + } | |
52467 | + | |
52468 | + } | |
52469 | + tail++; | |
52470 | + if(tail==size) | |
52471 | + tail=0; | |
52472 | + } | |
52473 | + buf->event_tail = tail; | |
52474 | +} | |
52475 | + | |
52476 | +static void xenoprof_handle_passive(void) | |
52477 | +{ | |
52478 | + int i, j; | |
52479 | + int flag_domain, flag_switch = 0; | |
52480 | + | |
52481 | + for (i = 0; i < pdomains; i++) { | |
52482 | + flag_domain = 0; | |
52483 | + for (j = 0; j < passive_domains[i].nbuf; j++) { | |
52484 | + xenoprof_buf_t *buf = p_xenoprof_buf[i][j]; | |
52485 | + if (buf->event_head == buf->event_tail) | |
52486 | + continue; | |
52487 | + if (!flag_domain) { | |
52488 | + if (!oprofile_add_domain_switch( | |
52489 | + passive_domains[i].domain_id)) | |
52490 | + goto done; | |
52491 | + flag_domain = 1; | |
52492 | + } | |
52493 | + xenoprof_add_pc(buf, 1); | |
52494 | + flag_switch = 1; | |
52495 | + } | |
52496 | + } | |
52497 | +done: | |
52498 | + if (flag_switch) | |
52499 | + oprofile_add_domain_switch(COORDINATOR_DOMAIN); | |
52500 | +} | |
52501 | + | |
52502 | +static irqreturn_t | |
52503 | +xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs) | |
52504 | +{ | |
52505 | + struct xenoprof_buf * buf; | |
52506 | + static unsigned long flag; | |
52507 | + | |
52508 | + buf = xenoprof_buf[smp_processor_id()]; | |
52509 | + | |
52510 | + xenoprof_add_pc(buf, 0); | |
52511 | + | |
52512 | + if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) { | |
52513 | + xenoprof_handle_passive(); | |
52514 | + smp_mb__before_clear_bit(); | |
52515 | + clear_bit(0, &flag); | |
52516 | + } | |
52517 | + | |
52518 | + return IRQ_HANDLED; | |
52519 | +} | |
52520 | + | |
52521 | + | |
52522 | +static void unbind_virq(void) | |
52523 | +{ | |
52524 | + unsigned int i; | |
52525 | + | |
52526 | + for_each_online_cpu(i) { | |
52527 | + if (ovf_irq[i] >= 0) { | |
52528 | + unbind_from_irqhandler(ovf_irq[i], NULL); | |
52529 | + ovf_irq[i] = -1; | |
52530 | + } | |
52531 | + } | |
52532 | +} | |
52533 | + | |
52534 | + | |
52535 | +static int bind_virq(void) | |
52536 | +{ | |
52537 | + unsigned int i; | |
52538 | + int result; | |
52539 | + | |
52540 | + for_each_online_cpu(i) { | |
52541 | + result = bind_virq_to_irqhandler(VIRQ_XENOPROF, | |
52542 | + i, | |
52543 | + xenoprof_ovf_interrupt, | |
52544 | + SA_INTERRUPT, | |
52545 | + "xenoprof", | |
52546 | + NULL); | |
52547 | + | |
52548 | + if (result < 0) { | |
52549 | + unbind_virq(); | |
52550 | + return result; | |
52551 | + } | |
52552 | + | |
52553 | + ovf_irq[i] = result; | |
52554 | + } | |
52555 | + | |
52556 | + return 0; | |
52557 | +} | |
52558 | + | |
52559 | + | |
52560 | +static void unmap_passive_list(void) | |
52561 | +{ | |
52562 | + int i; | |
52563 | + for (i = 0; i < pdomains; i++) | |
52564 | + xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]); | |
52565 | + pdomains = 0; | |
52566 | +} | |
52567 | + | |
52568 | + | |
52569 | +static int map_xenoprof_buffer(int max_samples) | |
52570 | +{ | |
52571 | + struct xenoprof_get_buffer get_buffer; | |
52572 | + struct xenoprof_buf *buf; | |
52573 | + int ret, i; | |
52574 | + | |
52575 | + if ( shared_buffer.buffer ) | |
52576 | + return 0; | |
52577 | + | |
52578 | + get_buffer.max_samples = max_samples; | |
52579 | + ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer); | |
52580 | + if (ret) | |
52581 | + return ret; | |
52582 | + nbuf = get_buffer.nbuf; | |
52583 | + | |
52584 | + for (i=0; i< nbuf; i++) { | |
52585 | + buf = (struct xenoprof_buf*) | |
52586 | + &shared_buffer.buffer[i * get_buffer.bufsize]; | |
52587 | + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); | |
52588 | + xenoprof_buf[buf->vcpu_id] = buf; | |
52589 | + } | |
52590 | + | |
52591 | + return 0; | |
52592 | +} | |
52593 | + | |
52594 | + | |
52595 | +static int xenoprof_setup(void) | |
52596 | +{ | |
52597 | + int ret; | |
52598 | + | |
52599 | + if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) ) | |
52600 | + return ret; | |
52601 | + | |
52602 | + if ( (ret = bind_virq()) ) | |
52603 | + return ret; | |
52604 | + | |
52605 | + if (xenoprof_is_primary) { | |
52606 | + /* Define dom0 as an active domain if not done yet */ | |
52607 | + if (!active_defined) { | |
52608 | + domid_t domid; | |
52609 | + ret = HYPERVISOR_xenoprof_op( | |
52610 | + XENOPROF_reset_active_list, NULL); | |
52611 | + if (ret) | |
52612 | + goto err; | |
52613 | + domid = 0; | |
52614 | + ret = HYPERVISOR_xenoprof_op( | |
52615 | + XENOPROF_set_active, &domid); | |
52616 | + if (ret) | |
52617 | + goto err; | |
52618 | + active_defined = 1; | |
52619 | + } | |
52620 | + | |
52621 | + if (backtrace_depth > 0) { | |
52622 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_backtrace, | |
52623 | + &backtrace_depth); | |
52624 | + if (ret) | |
52625 | + backtrace_depth = 0; | |
52626 | + } | |
52627 | + | |
52628 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL); | |
52629 | + if (ret) | |
52630 | + goto err; | |
52631 | + | |
52632 | + xenoprof_arch_counter(); | |
52633 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL); | |
52634 | + if (ret) | |
52635 | + goto err; | |
52636 | + } | |
52637 | + | |
52638 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL); | |
52639 | + if (ret) | |
52640 | + goto err; | |
52641 | + | |
52642 | + xenoprof_enabled = 1; | |
52643 | + return 0; | |
52644 | + err: | |
52645 | + unbind_virq(); | |
52646 | + return ret; | |
52647 | +} | |
52648 | + | |
52649 | + | |
52650 | +static void xenoprof_shutdown(void) | |
52651 | +{ | |
52652 | + xenoprof_enabled = 0; | |
52653 | + | |
52654 | + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL)); | |
52655 | + | |
52656 | + if (xenoprof_is_primary) { | |
52657 | + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_release_counters, | |
52658 | + NULL)); | |
52659 | + active_defined = 0; | |
52660 | + } | |
52661 | + | |
52662 | + unbind_virq(); | |
52663 | + | |
52664 | + xenoprof_arch_unmap_shared_buffer(&shared_buffer); | |
52665 | + if (xenoprof_is_primary) | |
52666 | + unmap_passive_list(); | |
52667 | +} | |
52668 | + | |
52669 | + | |
52670 | +static int xenoprof_start(void) | |
52671 | +{ | |
52672 | + int ret = 0; | |
52673 | + | |
52674 | + if (xenoprof_is_primary) | |
52675 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL); | |
52676 | + if (!ret) | |
52677 | + xenoprof_arch_start(); | |
52678 | + return ret; | |
52679 | +} | |
52680 | + | |
52681 | + | |
52682 | +static void xenoprof_stop(void) | |
52683 | +{ | |
52684 | + if (xenoprof_is_primary) | |
52685 | + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL)); | |
52686 | + xenoprof_arch_stop(); | |
52687 | +} | |
52688 | + | |
52689 | + | |
52690 | +static int xenoprof_set_active(int * active_domains, | |
52691 | + unsigned int adomains) | |
52692 | +{ | |
52693 | + int ret = 0; | |
52694 | + int i; | |
52695 | + int set_dom0 = 0; | |
52696 | + domid_t domid; | |
52697 | + | |
52698 | + if (!xenoprof_is_primary) | |
52699 | + return 0; | |
52700 | + | |
52701 | + if (adomains > MAX_OPROF_DOMAINS) | |
52702 | + return -E2BIG; | |
52703 | + | |
52704 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL); | |
52705 | + if (ret) | |
52706 | + return ret; | |
52707 | + | |
52708 | + for (i=0; i<adomains; i++) { | |
52709 | + domid = active_domains[i]; | |
52710 | + if (domid != active_domains[i]) { | |
52711 | + ret = -EINVAL; | |
52712 | + goto out; | |
52713 | + } | |
52714 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); | |
52715 | + if (ret) | |
52716 | + goto out; | |
52717 | + if (active_domains[i] == 0) | |
52718 | + set_dom0 = 1; | |
52719 | + } | |
52720 | + /* dom0 must always be active but may not be in the list */ | |
52721 | + if (!set_dom0) { | |
52722 | + domid = 0; | |
52723 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); | |
52724 | + } | |
52725 | + | |
52726 | +out: | |
52727 | + if (ret) | |
52728 | + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, | |
52729 | + NULL)); | |
52730 | + active_defined = !ret; | |
52731 | + return ret; | |
52732 | +} | |
52733 | + | |
52734 | +static int xenoprof_set_passive(int * p_domains, | |
52735 | + unsigned int pdoms) | |
52736 | +{ | |
52737 | + int ret; | |
52738 | + unsigned int i, j; | |
52739 | + struct xenoprof_buf *buf; | |
52740 | + | |
52741 | + if (!xenoprof_is_primary) | |
52742 | + return 0; | |
52743 | + | |
52744 | + if (pdoms > MAX_OPROF_DOMAINS) | |
52745 | + return -E2BIG; | |
52746 | + | |
52747 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL); | |
52748 | + if (ret) | |
52749 | + return ret; | |
52750 | + unmap_passive_list(); | |
52751 | + | |
52752 | + for (i = 0; i < pdoms; i++) { | |
52753 | + passive_domains[i].domain_id = p_domains[i]; | |
52754 | + passive_domains[i].max_samples = 2048; | |
52755 | + ret = xenoprof_arch_set_passive(&passive_domains[i], | |
52756 | + &p_shared_buffer[i]); | |
52757 | + if (ret) | |
52758 | + goto out; | |
52759 | + for (j = 0; j < passive_domains[i].nbuf; j++) { | |
52760 | + buf = (struct xenoprof_buf *) | |
52761 | + &p_shared_buffer[i].buffer[ | |
52762 | + j * passive_domains[i].bufsize]; | |
52763 | + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); | |
52764 | + p_xenoprof_buf[i][buf->vcpu_id] = buf; | |
52765 | + } | |
52766 | + } | |
52767 | + | |
52768 | + pdomains = pdoms; | |
52769 | + return 0; | |
52770 | + | |
52771 | +out: | |
52772 | + for (j = 0; j < i; j++) | |
52773 | + xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]); | |
52774 | + | |
52775 | + return ret; | |
52776 | +} | |
52777 | + | |
52778 | + | |
52779 | +/* The dummy backtrace function to keep oprofile happy | |
52780 | + * The real backtrace is done in xen | |
52781 | + */ | |
52782 | +static void xenoprof_dummy_backtrace(struct pt_regs * const regs, | |
52783 | + unsigned int depth) | |
52784 | +{ | |
52785 | + /* this should never be called */ | |
52786 | + BUG(); | |
52787 | + return; | |
52788 | +} | |
52789 | + | |
52790 | + | |
52791 | +static struct oprofile_operations xenoprof_ops = { | |
52792 | +#ifdef HAVE_XENOPROF_CREATE_FILES | |
52793 | + .create_files = xenoprof_create_files, | |
52794 | +#endif | |
52795 | + .set_active = xenoprof_set_active, | |
52796 | + .set_passive = xenoprof_set_passive, | |
52797 | + .setup = xenoprof_setup, | |
52798 | + .shutdown = xenoprof_shutdown, | |
52799 | + .start = xenoprof_start, | |
52800 | + .stop = xenoprof_stop, | |
52801 | + .backtrace = xenoprof_dummy_backtrace | |
52802 | +}; | |
52803 | + | |
52804 | + | |
52805 | +/* in order to get driverfs right */ | |
52806 | +static int using_xenoprof; | |
52807 | + | |
52808 | +int __init xenoprofile_init(struct oprofile_operations * ops) | |
52809 | +{ | |
52810 | + struct xenoprof_init init; | |
52811 | + unsigned int i; | |
52812 | + int ret; | |
52813 | + | |
52814 | + ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init); | |
52815 | + if (!ret) { | |
52816 | + xenoprof_arch_init_counter(&init); | |
52817 | + xenoprof_is_primary = init.is_primary; | |
52818 | + | |
52819 | + /* cpu_type is detected by Xen */ | |
52820 | + cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0; | |
52821 | + strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1); | |
52822 | + xenoprof_ops.cpu_type = cpu_type; | |
52823 | + | |
52824 | + init_driverfs(); | |
52825 | + using_xenoprof = 1; | |
52826 | + *ops = xenoprof_ops; | |
52827 | + | |
52828 | + for (i=0; i<NR_CPUS; i++) | |
52829 | + ovf_irq[i] = -1; | |
52830 | + | |
52831 | + active_defined = 0; | |
52832 | + } | |
52833 | + | |
52834 | + printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n", | |
52835 | + __func__, ret, init.num_events, xenoprof_is_primary); | |
52836 | + return ret; | |
52837 | +} | |
52838 | + | |
52839 | + | |
52840 | +void xenoprofile_exit(void) | |
52841 | +{ | |
52842 | + if (using_xenoprof) | |
52843 | + exit_driverfs(); | |
52844 | + | |
52845 | + xenoprof_arch_unmap_shared_buffer(&shared_buffer); | |
52846 | + if (xenoprof_is_primary) { | |
52847 | + unmap_passive_list(); | |
52848 | + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL)); | |
52849 | + } | |
52850 | +} |