From: Greg Kroah-Hartman Date: Tue, 18 Feb 2014 22:43:55 +0000 (-0800) Subject: 3.4-stable patches X-Git-Tag: v3.4.81~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0490963f1ee2839e919e250694eb5d1da922d055;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: input-synaptics-handle-out-of-bounds-values-from-the-hardware.patch kvm-fix-buffer-overflow-in-kvm_set_irq.patch lib-vsprintf.c-kptr_restrict-fix-pk-error-in-sysrq-show-all-timers-q.patch nfs-tear-down-caches-in-nfs_init_writepagecache-when-allocation-fails.patch pm-hibernate-hibernate-thaw-fixes-improvements.patch target-file-re-enable-optional-fd_buffered_io-1-operation.patch target-file-use-o_dsync-by-default-for-fileio-backends.patch virtio-blk-use-block-layer-provided-spinlock.patch --- diff --git a/queue-3.4/input-synaptics-handle-out-of-bounds-values-from-the-hardware.patch b/queue-3.4/input-synaptics-handle-out-of-bounds-values-from-the-hardware.patch new file mode 100644 index 00000000000..bbffe96f3e7 --- /dev/null +++ b/queue-3.4/input-synaptics-handle-out-of-bounds-values-from-the-hardware.patch @@ -0,0 +1,88 @@ +From 2028a493b8ff0cfb853be5839759e9522a6da64a Mon Sep 17 00:00:00 2001 +From: Seth Forshee +Date: Tue, 24 Jul 2012 23:54:11 -0700 +Subject: Input: synaptics - handle out of bounds values from the hardware + +From: Seth Forshee + +commit c0394506e69b37c47d391c2a7bbea3ea236d8ec8 upstream. + +The touchpad on the Acer Aspire One D250 will report out of range values +in the extreme lower portion of the touchpad. These appear as abrupt +changes in the values reported by the hardware from very low values to +very high values, which can cause unexpected vertical jumps in the +position of the mouse pointer. + +What seems to be happening is that the value is wrapping to a two's +compliment negative value of higher resolution than the 13-bit value +reported by the hardware, with the high-order bits being truncated. This +patch adds handling for these values by converting them to the +appropriate negative values. + +The only tricky part about this is deciding when to treat a number as +negative. It stands to reason that if out of range values can be +reported on the low end then it could also happen on the high end, so +not all out of range values should be treated as negative. The approach +taken here is to split the difference between the maximum legitimate +value for the axis and the maximum possible value that the hardware can +report, treating values greater than this number as negative and all +other values as positive. This can be tweaked later if hardware is found +that operates outside of these parameters. + +BugLink: http://bugs.launchpad.net/bugs/1001251 +Signed-off-by: Seth Forshee +Reviewed-by: Daniel Kurtz +Signed-off-by: Dmitry Torokhov +[bwh: Backported to 3.2: adjust context] +Signed-off-by: Ben Hutchings +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/input/mouse/synaptics.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +--- a/drivers/input/mouse/synaptics.c ++++ b/drivers/input/mouse/synaptics.c +@@ -40,11 +40,28 @@ + * Note that newer firmware allows querying device for maximum useable + * coordinates. + */ ++#define XMIN 0 ++#define XMAX 6143 ++#define YMIN 0 ++#define YMAX 6143 + #define XMIN_NOMINAL 1472 + #define XMAX_NOMINAL 5472 + #define YMIN_NOMINAL 1408 + #define YMAX_NOMINAL 4448 + ++/* Size in bits of absolute position values reported by the hardware */ ++#define ABS_POS_BITS 13 ++ ++/* ++ * Any position values from the hardware above the following limits are ++ * treated as "wrapped around negative" values that have been truncated to ++ * the 13-bit reporting range of the hardware. These are just reasonable ++ * guesses and can be adjusted if hardware is found that operates outside ++ * of these parameters. ++ */ ++#define X_MAX_POSITIVE (((1 << ABS_POS_BITS) + XMAX) / 2) ++#define Y_MAX_POSITIVE (((1 << ABS_POS_BITS) + YMAX) / 2) ++ + /* + * Synaptics touchpads report the y coordinate from bottom to top, which is + * opposite from what userspace expects. +@@ -555,6 +572,12 @@ static int synaptics_parse_hw_state(cons + hw->right = (buf[0] & 0x02) ? 1 : 0; + } + ++ /* Convert wrap-around values to negative */ ++ if (hw->x > X_MAX_POSITIVE) ++ hw->x -= 1 << ABS_POS_BITS; ++ if (hw->y > Y_MAX_POSITIVE) ++ hw->y -= 1 << ABS_POS_BITS; ++ + return 0; + } + diff --git a/queue-3.4/kvm-fix-buffer-overflow-in-kvm_set_irq.patch b/queue-3.4/kvm-fix-buffer-overflow-in-kvm_set_irq.patch new file mode 100644 index 00000000000..ddf9b4c3cff --- /dev/null +++ b/queue-3.4/kvm-fix-buffer-overflow-in-kvm_set_irq.patch @@ -0,0 +1,35 @@ +From 0f3cbc35d2097d2c655789dd4996e7b87bdb5d34 Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Sun, 22 Apr 2012 17:02:11 +0300 +Subject: KVM: Fix buffer overflow in kvm_set_irq() + +From: Avi Kivity + +commit f2ebd422f71cda9c791f76f85d2ca102ae34a1ed upstream. + +kvm_set_irq() has an internal buffer of three irq routing entries, allowing +connecting a GSI to three IRQ chips or on MSI. However setup_routing_entry() +does not properly enforce this, allowing three irqchip routes followed by +an MSI route to overflow the buffer. + +Fix by ensuring that an MSI entry is added to an empty list. + +Signed-off-by: Avi Kivity +Signed-off-by: Ben Hutchings +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/irq_comm.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/virt/kvm/irq_comm.c ++++ b/virt/kvm/irq_comm.c +@@ -318,6 +318,7 @@ static int setup_routing_entry(struct kv + */ + hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) + if (ei->type == KVM_IRQ_ROUTING_MSI || ++ ue->type == KVM_IRQ_ROUTING_MSI || + ue->u.irqchip.irqchip == ei->irqchip.irqchip) + return r; + diff --git a/queue-3.4/lib-vsprintf.c-kptr_restrict-fix-pk-error-in-sysrq-show-all-timers-q.patch b/queue-3.4/lib-vsprintf.c-kptr_restrict-fix-pk-error-in-sysrq-show-all-timers-q.patch new file mode 100644 index 00000000000..dbe70412bfb --- /dev/null +++ b/queue-3.4/lib-vsprintf.c-kptr_restrict-fix-pk-error-in-sysrq-show-all-timers-q.patch @@ -0,0 +1,47 @@ +From 1b772a147a183c09462f0d23e041b077f158fa0f Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg +Date: Mon, 30 Jul 2012 14:40:26 -0700 +Subject: lib/vsprintf.c: kptr_restrict: fix pK-error in SysRq show-all-timers(Q) + +From: Dan Rosenberg + +commit 3715c5309f6d175c3053672b73fd4f73be16fd07 upstream. + +When using ALT+SysRq+Q all the pointers are replaced with "pK-error" like +this: + + [23153.208033] .base: pK-error + +with echo h > /proc/sysrq-trigger it works: + + [23107.776363] .base: ffff88023e60d540 + +The intent behind this behavior was to return "pK-error" in cases where +the %pK format specifier was used in interrupt context, because the +CAP_SYSLOG check wouldn't be meaningful. Clearly this should only apply +when kptr_restrict is actually enabled though. + +Reported-by: Stevie Trujillo +Signed-off-by: Dan Rosenberg +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Ben Hutchings +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + lib/vsprintf.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/lib/vsprintf.c ++++ b/lib/vsprintf.c +@@ -926,7 +926,8 @@ char *pointer(const char *fmt, char *buf + * %pK cannot be used in IRQ context because its test + * for CAP_SYSLOG would be meaningless. + */ +- if (in_irq() || in_serving_softirq() || in_nmi()) { ++ if (kptr_restrict && (in_irq() || in_serving_softirq() || ++ in_nmi())) { + if (spec.field_width == -1) + spec.field_width = 2 * sizeof(void *); + return string(buf, end, "pK-error", spec); diff --git a/queue-3.4/nfs-tear-down-caches-in-nfs_init_writepagecache-when-allocation-fails.patch b/queue-3.4/nfs-tear-down-caches-in-nfs_init_writepagecache-when-allocation-fails.patch new file mode 100644 index 00000000000..ff07c590e61 --- /dev/null +++ b/queue-3.4/nfs-tear-down-caches-in-nfs_init_writepagecache-when-allocation-fails.patch @@ -0,0 +1,54 @@ +From c52f6cc5d3e4dcb9167448614338a25a31573484 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Thu, 2 Aug 2012 14:30:56 -0400 +Subject: nfs: tear down caches in nfs_init_writepagecache when allocation fails + +From: Jeff Layton + +commit 3dd4765fce04c0b4af1e0bc4c0b10f906f95fabc upstream. + +...and ensure that we tear down the nfs_commit_data cache too when +unloading the module. + +Cc: Bryan Schumaker +Signed-off-by: Jeff Layton +Signed-off-by: Trond Myklebust +[bwh: Backported to 3.2: drop the nfs_cdata_cachep cleanup; it doesn't exist] +Signed-off-by: Ben Hutchings +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/write.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -1751,12 +1751,12 @@ int __init nfs_init_writepagecache(void) + nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, + nfs_wdata_cachep); + if (nfs_wdata_mempool == NULL) +- return -ENOMEM; ++ goto out_destroy_write_cache; + + nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, + nfs_wdata_cachep); + if (nfs_commit_mempool == NULL) +- return -ENOMEM; ++ goto out_destroy_write_mempool; + + /* + * NFS congestion size, scale with available memory. +@@ -1779,6 +1779,12 @@ int __init nfs_init_writepagecache(void) + nfs_congestion_kb = 256*1024; + + return 0; ++ ++out_destroy_write_mempool: ++ mempool_destroy(nfs_wdata_mempool); ++out_destroy_write_cache: ++ kmem_cache_destroy(nfs_wdata_cachep); ++ return -ENOMEM; + } + + void nfs_destroy_writepagecache(void) diff --git a/queue-3.4/pm-hibernate-hibernate-thaw-fixes-improvements.patch b/queue-3.4/pm-hibernate-hibernate-thaw-fixes-improvements.patch new file mode 100644 index 00000000000..2afc1b1c8e9 --- /dev/null +++ b/queue-3.4/pm-hibernate-hibernate-thaw-fixes-improvements.patch @@ -0,0 +1,178 @@ +From 6ed6791a1697afcb1615b4252d0c304a743b5f4d Mon Sep 17 00:00:00 2001 +From: Bojan Smojver +Date: Sun, 29 Apr 2012 22:42:06 +0200 +Subject: PM / Hibernate: Hibernate/thaw fixes/improvements + +From: Bojan Smojver + +commit 5a21d489fd9541a4a66b9a500659abaca1b19a51 upstream. + + 1. Do not allocate memory for buffers from emergency pools, unless + absolutely required. Do not warn about and do not retry non-essential + failed allocations. + + 2. Do not check the amount of free pages left on every single page + write, but wait until one map is completely populated and then check. + + 3. Set maximum number of pages for read buffering consistently, instead + of inadvertently depending on the size of the sector type. + + 4. Fix copyright line, which I missed when I submitted the hibernation + threading patch. + + 5. Dispense with bit shifting arithmetic to improve readability. + + 6. Really recalculate the number of pages required to be free after all + allocations have been done. + + 7. Fix calculation of pages required for read buffering. Only count in + pages that do not belong to high memory. + +Signed-off-by: Bojan Smojver +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Ben Hutchings +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/power/swap.c | 62 ++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 39 insertions(+), 23 deletions(-) + +--- a/kernel/power/swap.c ++++ b/kernel/power/swap.c +@@ -6,7 +6,7 @@ + * + * Copyright (C) 1998,2001-2005 Pavel Machek + * Copyright (C) 2006 Rafael J. Wysocki +- * Copyright (C) 2010 Bojan Smojver ++ * Copyright (C) 2010-2012 Bojan Smojver + * + * This file is released under the GPLv2. + * +@@ -282,14 +282,17 @@ static int write_page(void *buf, sector_ + return -ENOSPC; + + if (bio_chain) { +- src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); ++ src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | ++ __GFP_NORETRY); + if (src) { + copy_page(src, buf); + } else { + ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ + if (ret) + return ret; +- src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); ++ src = (void *)__get_free_page(__GFP_WAIT | ++ __GFP_NOWARN | ++ __GFP_NORETRY); + if (src) { + copy_page(src, buf); + } else { +@@ -367,12 +370,17 @@ static int swap_write_page(struct swap_m + clear_page(handle->cur); + handle->cur_swap = offset; + handle->k = 0; +- } +- if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { +- error = hib_wait_on_bio_chain(bio_chain); +- if (error) +- goto out; +- handle->reqd_free_pages = reqd_free_pages(); ++ ++ if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { ++ error = hib_wait_on_bio_chain(bio_chain); ++ if (error) ++ goto out; ++ /* ++ * Recalculate the number of required free pages, to ++ * make sure we never take more than half. ++ */ ++ handle->reqd_free_pages = reqd_free_pages(); ++ } + } + out: + return error; +@@ -419,8 +427,9 @@ static int swap_writer_finish(struct swa + /* Maximum number of threads for compression/decompression. */ + #define LZO_THREADS 3 + +-/* Maximum number of pages for read buffering. */ +-#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) ++/* Minimum/maximum number of pages for read buffering. */ ++#define LZO_MIN_RD_PAGES 1024 ++#define LZO_MAX_RD_PAGES 8192 + + + /** +@@ -631,12 +640,6 @@ static int save_image_lzo(struct swap_ma + } + + /* +- * Adjust number of free pages after all allocations have been done. +- * We don't want to run out of pages when writing. +- */ +- handle->reqd_free_pages = reqd_free_pages(); +- +- /* + * Start the CRC32 thread. + */ + init_waitqueue_head(&crc->go); +@@ -657,6 +660,12 @@ static int save_image_lzo(struct swap_ma + goto out_clean; + } + ++ /* ++ * Adjust the number of required free pages after all allocations have ++ * been done. We don't want to run out of pages when writing. ++ */ ++ handle->reqd_free_pages = reqd_free_pages(); ++ + printk(KERN_INFO + "PM: Using %u thread(s) for compression.\n" + "PM: Compressing and saving image data (%u pages) ... ", +@@ -1067,7 +1076,7 @@ static int load_image_lzo(struct swap_ma + unsigned i, thr, run_threads, nr_threads; + unsigned ring = 0, pg = 0, ring_size = 0, + have = 0, want, need, asked = 0; +- unsigned long read_pages; ++ unsigned long read_pages = 0; + unsigned char **page = NULL; + struct dec_data *data = NULL; + struct crc_data *crc = NULL; +@@ -1079,7 +1088,7 @@ static int load_image_lzo(struct swap_ma + nr_threads = num_online_cpus() - 1; + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + +- page = vmalloc(sizeof(*page) * LZO_READ_PAGES); ++ page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + ret = -ENOMEM; +@@ -1144,15 +1153,22 @@ static int load_image_lzo(struct swap_ma + } + + /* +- * Adjust number of pages for read buffering, in case we are short. ++ * Set the number of pages for read buffering. ++ * This is complete guesswork, because we'll only know the real ++ * picture once prepare_image() is called, which is much later on ++ * during the image load phase. We'll assume the worst case and ++ * say that none of the image pages are from high memory. + */ +- read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; +- read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); ++ if (low_free_pages() > snapshot_get_image_size()) ++ read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; ++ read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); + + for (i = 0; i < read_pages; i++) { + page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? + __GFP_WAIT | __GFP_HIGH : +- __GFP_WAIT); ++ __GFP_WAIT | __GFP_NOWARN | ++ __GFP_NORETRY); ++ + if (!page[i]) { + if (i < LZO_CMP_PAGES) { + ring_size = i; diff --git a/queue-3.4/series b/queue-3.4/series index b49f32ea2b6..a03af3ef448 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -14,3 +14,11 @@ ftrace-have-function-graph-only-trace-based-on-global_ops-filters.patch sched-nohz-fix-rq-cpu_load-calculations.patch sched-nohz-fix-rq-cpu_load-calculations-some-more.patch ib-qib-convert-qib_user_sdma_pin_pages-to-use-get_user_pages_fast.patch +target-file-use-o_dsync-by-default-for-fileio-backends.patch +target-file-re-enable-optional-fd_buffered_io-1-operation.patch +kvm-fix-buffer-overflow-in-kvm_set_irq.patch +pm-hibernate-hibernate-thaw-fixes-improvements.patch +input-synaptics-handle-out-of-bounds-values-from-the-hardware.patch +virtio-blk-use-block-layer-provided-spinlock.patch +lib-vsprintf.c-kptr_restrict-fix-pk-error-in-sysrq-show-all-timers-q.patch +nfs-tear-down-caches-in-nfs_init_writepagecache-when-allocation-fails.patch diff --git a/queue-3.4/target-file-re-enable-optional-fd_buffered_io-1-operation.patch b/queue-3.4/target-file-re-enable-optional-fd_buffered_io-1-operation.patch new file mode 100644 index 00000000000..a775c8b62c2 --- /dev/null +++ b/queue-3.4/target-file-re-enable-optional-fd_buffered_io-1-operation.patch @@ -0,0 +1,135 @@ +From 14b557c19dbe677059243246522da15800ca94f0 Mon Sep 17 00:00:00 2001 +From: Nicholas Bellinger +Date: Sat, 29 Sep 2012 17:15:37 -0700 +Subject: target/file: Re-enable optional fd_buffered_io=1 operation + +From: Nicholas Bellinger + +commit b32f4c7ed85c5cee2a21a55c9f59ebc9d57a2463 upstream. + +This patch re-adds the ability to optionally run in buffered FILEIO mode +(eg: w/o O_DSYNC) for device backends in order to once again use the +Linux buffered cache as a write-back storage mechanism. + +This logic was originally dropped with mainline v3.5-rc commit: + +commit a4dff3043c231d57f982af635c9d2192ee40e5ae +Author: Nicholas Bellinger +Date: Wed May 30 16:25:41 2012 -0700 + + target/file: Use O_DSYNC by default for FILEIO backends + +This difference with this patch is that fd_create_virtdevice() now +forces the explicit setting of emulate_write_cache=1 when buffered FILEIO +operation has been enabled. + +(v2: Switch to FDBD_HAS_BUFFERED_IO_WCE + add more detailed + comment as requested by hch) + +Reported-by: Ferry +Cc: Christoph Hellwig +Signed-off-by: Nicholas Bellinger +Signed-off-by: Ben Hutchings +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/target/target_core_file.c | 41 +++++++++++++++++++++++++++++++++++--- + drivers/target/target_core_file.h | 1 + 2 files changed, 39 insertions(+), 3 deletions(-) + +--- a/drivers/target/target_core_file.c ++++ b/drivers/target/target_core_file.c +@@ -138,6 +138,19 @@ static struct se_device *fd_create_virtd + * of pure timestamp updates. + */ + flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC; ++ /* ++ * Optionally allow fd_buffered_io=1 to be enabled for people ++ * who want use the fs buffer cache as an WriteCache mechanism. ++ * ++ * This means that in event of a hard failure, there is a risk ++ * of silent data-loss if the SCSI client has *not* performed a ++ * forced unit access (FUA) write, or issued SYNCHRONIZE_CACHE ++ * to write-out the entire device cache. ++ */ ++ if (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) { ++ pr_debug("FILEIO: Disabling O_DSYNC, using buffered FILEIO\n"); ++ flags &= ~O_DSYNC; ++ } + + file = filp_open(dev_p, flags, 0600); + if (IS_ERR(file)) { +@@ -205,6 +218,12 @@ static struct se_device *fd_create_virtd + if (!dev) + goto fail; + ++ if (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) { ++ pr_debug("FILEIO: Forcing setting of emulate_write_cache=1" ++ " with FDBD_HAS_BUFFERED_IO_WCE\n"); ++ dev->se_sub_dev->se_dev_attrib.emulate_write_cache = 1; ++ } ++ + fd_dev->fd_dev_id = fd_host->fd_host_dev_id_count++; + fd_dev->fd_queue_depth = dev->queue_depth; + +@@ -449,6 +468,7 @@ enum { + static match_table_t tokens = { + {Opt_fd_dev_name, "fd_dev_name=%s"}, + {Opt_fd_dev_size, "fd_dev_size=%s"}, ++ {Opt_fd_buffered_io, "fd_buffered_io=%d"}, + {Opt_err, NULL} + }; + +@@ -460,7 +480,7 @@ static ssize_t fd_set_configfs_dev_param + struct fd_dev *fd_dev = se_dev->se_dev_su_ptr; + char *orig, *ptr, *arg_p, *opts; + substring_t args[MAX_OPT_ARGS]; +- int ret = 0, token; ++ int ret = 0, arg, token; + + opts = kstrdup(page, GFP_KERNEL); + if (!opts) +@@ -504,6 +524,19 @@ static ssize_t fd_set_configfs_dev_param + " bytes\n", fd_dev->fd_dev_size); + fd_dev->fbd_flags |= FBDF_HAS_SIZE; + break; ++ case Opt_fd_buffered_io: ++ match_int(args, &arg); ++ if (arg != 1) { ++ pr_err("bogus fd_buffered_io=%d value\n", arg); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ pr_debug("FILEIO: Using buffered I/O" ++ " operations for struct fd_dev\n"); ++ ++ fd_dev->fbd_flags |= FDBD_HAS_BUFFERED_IO_WCE; ++ break; + default: + break; + } +@@ -535,8 +568,10 @@ static ssize_t fd_show_configfs_dev_para + ssize_t bl = 0; + + bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id); +- bl += sprintf(b + bl, " File: %s Size: %llu Mode: O_DSYNC\n", +- fd_dev->fd_dev_name, fd_dev->fd_dev_size); ++ bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n", ++ fd_dev->fd_dev_name, fd_dev->fd_dev_size, ++ (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) ? ++ "Buffered-WCE" : "O_DSYNC"); + return bl; + } + +--- a/drivers/target/target_core_file.h ++++ b/drivers/target/target_core_file.h +@@ -18,6 +18,7 @@ struct fd_request { + + #define FBDF_HAS_PATH 0x01 + #define FBDF_HAS_SIZE 0x02 ++#define FDBD_HAS_BUFFERED_IO_WCE 0x04 + + struct fd_dev { + u32 fbd_flags; diff --git a/queue-3.4/target-file-use-o_dsync-by-default-for-fileio-backends.patch b/queue-3.4/target-file-use-o_dsync-by-default-for-fileio-backends.patch new file mode 100644 index 00000000000..b8951690639 --- /dev/null +++ b/queue-3.4/target-file-use-o_dsync-by-default-for-fileio-backends.patch @@ -0,0 +1,185 @@ +From 2492cd118dc5221953054e58f3fd81927b8beda9 Mon Sep 17 00:00:00 2001 +From: Nicholas Bellinger +Date: Wed, 30 May 2012 16:25:41 -0700 +Subject: target/file: Use O_DSYNC by default for FILEIO backends + +From: Nicholas Bellinger + +commit a4dff3043c231d57f982af635c9d2192ee40e5ae upstream. + +Convert to use O_DSYNC for all cases at FILEIO backend creation time to +avoid the extra syncing of pure timestamp updates with legacy O_SYNC during +default operation as recommended by hch. Continue to do this independently of +Write Cache Enable (WCE) bit, as WCE=0 is currently the default for all backend +devices and enabled by user on per device basis via attrib/emulate_write_cache. + +This patch drops the now unnecessary fd_buffered_io= token usage that was +originally signalling when to explictly disable O_SYNC at backend creation +time for buffered I/O operation. This can end up being dangerous for a number +of reasons during physical node failure, so go ahead and drop this option +for now when O_DSYNC is used as the default. + +Also allow explict FUA WRITEs -> vfs_fsync_range() call to function in +fd_execute_cmd() independently of WCE bit setting. + +Reported-by: Christoph Hellwig +Cc: Linus Torvalds +Signed-off-by: Nicholas Bellinger +Signed-off-by: Ben Hutchings +[bwh: Backported to 3.2: + - We have fd_do_task() and not fd_execute_cmd() + - Various fields are in struct se_task rather than struct se_cmd + - fd_create_virtdevice() flags initialisation hasn't been cleaned up] +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/target/target_core_file.c | 78 ++++++++------------------------------ + drivers/target/target_core_file.h | 1 + 2 files changed, 17 insertions(+), 62 deletions(-) + +--- a/drivers/target/target_core_file.c ++++ b/drivers/target/target_core_file.c +@@ -133,21 +133,11 @@ static struct se_device *fd_create_virtd + ret = PTR_ERR(dev_p); + goto fail; + } +-#if 0 +- if (di->no_create_file) +- flags = O_RDWR | O_LARGEFILE; +- else +- flags = O_RDWR | O_CREAT | O_LARGEFILE; +-#else +- flags = O_RDWR | O_CREAT | O_LARGEFILE; +-#endif +-/* flags |= O_DIRECT; */ + /* +- * If fd_buffered_io=1 has not been set explicitly (the default), +- * use O_SYNC to force FILEIO writes to disk. ++ * Use O_DSYNC by default instead of O_SYNC to forgo syncing ++ * of pure timestamp updates. + */ +- if (!(fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO)) +- flags |= O_SYNC; ++ flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC; + + file = filp_open(dev_p, flags, 0600); + if (IS_ERR(file)) { +@@ -399,26 +389,6 @@ static void fd_emulate_sync_cache(struct + transport_complete_sync_cache(cmd, ret == 0); + } + +-/* +- * WRITE Force Unit Access (FUA) emulation on a per struct se_task +- * LBA range basis.. +- */ +-static void fd_emulate_write_fua(struct se_cmd *cmd, struct se_task *task) +-{ +- struct se_device *dev = cmd->se_dev; +- struct fd_dev *fd_dev = dev->dev_ptr; +- loff_t start = task->task_lba * dev->se_sub_dev->se_dev_attrib.block_size; +- loff_t end = start + task->task_size; +- int ret; +- +- pr_debug("FILEIO: FUA WRITE LBA: %llu, bytes: %u\n", +- task->task_lba, task->task_size); +- +- ret = vfs_fsync_range(fd_dev->fd_file, start, end, 1); +- if (ret != 0) +- pr_err("FILEIO: vfs_fsync_range() failed: %d\n", ret); +-} +- + static int fd_do_task(struct se_task *task) + { + struct se_cmd *cmd = task->task_se_cmd; +@@ -433,19 +403,21 @@ static int fd_do_task(struct se_task *ta + ret = fd_do_readv(task); + } else { + ret = fd_do_writev(task); +- ++ /* ++ * Perform implict vfs_fsync_range() for fd_do_writev() ops ++ * for SCSI WRITEs with Forced Unit Access (FUA) set. ++ * Allow this to happen independent of WCE=0 setting. ++ */ + if (ret > 0 && +- dev->se_sub_dev->se_dev_attrib.emulate_write_cache > 0 && + dev->se_sub_dev->se_dev_attrib.emulate_fua_write > 0 && + (cmd->se_cmd_flags & SCF_FUA)) { +- /* +- * We might need to be a bit smarter here +- * and return some sense data to let the initiator +- * know the FUA WRITE cache sync failed..? +- */ +- fd_emulate_write_fua(cmd, task); +- } ++ struct fd_dev *fd_dev = dev->dev_ptr; ++ loff_t start = task->task_lba * ++ dev->se_sub_dev->se_dev_attrib.block_size; ++ loff_t end = start + task->task_size; + ++ vfs_fsync_range(fd_dev->fd_file, start, end, 1); ++ } + } + + if (ret < 0) { +@@ -477,7 +449,6 @@ enum { + static match_table_t tokens = { + {Opt_fd_dev_name, "fd_dev_name=%s"}, + {Opt_fd_dev_size, "fd_dev_size=%s"}, +- {Opt_fd_buffered_io, "fd_buffered_io=%d"}, + {Opt_err, NULL} + }; + +@@ -489,7 +460,7 @@ static ssize_t fd_set_configfs_dev_param + struct fd_dev *fd_dev = se_dev->se_dev_su_ptr; + char *orig, *ptr, *arg_p, *opts; + substring_t args[MAX_OPT_ARGS]; +- int ret = 0, arg, token; ++ int ret = 0, token; + + opts = kstrdup(page, GFP_KERNEL); + if (!opts) +@@ -533,19 +504,6 @@ static ssize_t fd_set_configfs_dev_param + " bytes\n", fd_dev->fd_dev_size); + fd_dev->fbd_flags |= FBDF_HAS_SIZE; + break; +- case Opt_fd_buffered_io: +- match_int(args, &arg); +- if (arg != 1) { +- pr_err("bogus fd_buffered_io=%d value\n", arg); +- ret = -EINVAL; +- goto out; +- } +- +- pr_debug("FILEIO: Using buffered I/O" +- " operations for struct fd_dev\n"); +- +- fd_dev->fbd_flags |= FDBD_USE_BUFFERED_IO; +- break; + default: + break; + } +@@ -577,10 +535,8 @@ static ssize_t fd_show_configfs_dev_para + ssize_t bl = 0; + + bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id); +- bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n", +- fd_dev->fd_dev_name, fd_dev->fd_dev_size, +- (fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO) ? +- "Buffered" : "Synchronous"); ++ bl += sprintf(b + bl, " File: %s Size: %llu Mode: O_DSYNC\n", ++ fd_dev->fd_dev_name, fd_dev->fd_dev_size); + return bl; + } + +--- a/drivers/target/target_core_file.h ++++ b/drivers/target/target_core_file.h +@@ -18,7 +18,6 @@ struct fd_request { + + #define FBDF_HAS_PATH 0x01 + #define FBDF_HAS_SIZE 0x02 +-#define FDBD_USE_BUFFERED_IO 0x04 + + struct fd_dev { + u32 fbd_flags; diff --git a/queue-3.4/virtio-blk-use-block-layer-provided-spinlock.patch b/queue-3.4/virtio-blk-use-block-layer-provided-spinlock.patch new file mode 100644 index 00000000000..f2559bdeee9 --- /dev/null +++ b/queue-3.4/virtio-blk-use-block-layer-provided-spinlock.patch @@ -0,0 +1,103 @@ +From e551857183b52b1af787e35b25a4e556fa0c0c75 Mon Sep 17 00:00:00 2001 +From: Asias He +Date: Fri, 25 May 2012 16:03:27 +0800 +Subject: virtio-blk: Use block layer provided spinlock + +From: Asias He + +commit 2c95a3290919541b846bee3e0fbaa75860929f53 upstream. + +Block layer will allocate a spinlock for the queue if the driver does +not provide one in blk_init_queue(). + +The reason to use the internal spinlock is that blk_cleanup_queue() will +switch to use the internal spinlock in the cleanup code path. + + if (q->queue_lock != &q->__queue_lock) + q->queue_lock = &q->__queue_lock; + +However, processes which are in D state might have taken the driver +provided spinlock, when the processes wake up, they would release the +block provided spinlock. + +===================================== +[ BUG: bad unlock balance detected! ] +3.4.0-rc7+ #238 Not tainted +------------------------------------- +fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock) at: +[] blk_queue_bio+0x2a2/0x380 +but there are no more locks to release! + +other info that might help us debug this: +1 lock held by fio/3587: + #0: (&(&vblk->lock)->rlock){......}, at: +[] get_request_wait+0x19a/0x250 + +Other drivers use block layer provided spinlock as well, e.g. SCSI. + +Switching to the block layer provided spinlock saves a bit of memory and +does not increase lock contention. Performance test shows no real +difference is observed before and after this patch. + +Changes in v2: Improve commit log as Michael suggested. + +Cc: virtualization@lists.linux-foundation.org +Cc: kvm@vger.kernel.org +Signed-off-by: Asias He +Acked-by: Michael S. Tsirkin +Signed-off-by: Rusty Russell +[bwh: Backported to 3.2: adjust context] +Signed-off-by: Ben Hutchings +Cc: Li Zefan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/virtio_blk.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +--- a/drivers/block/virtio_blk.c ++++ b/drivers/block/virtio_blk.c +@@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq; + + struct virtio_blk + { +- spinlock_t lock; +- + struct virtio_device *vdev; + struct virtqueue *vq; + +@@ -69,7 +67,7 @@ static void blk_done(struct virtqueue *v + unsigned int len; + unsigned long flags; + +- spin_lock_irqsave(&vblk->lock, flags); ++ spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); + while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { + int error; + +@@ -104,7 +102,7 @@ static void blk_done(struct virtqueue *v + } + /* In case queue is stopped waiting for more buffers. */ + blk_start_queue(vblk->disk->queue); +- spin_unlock_irqrestore(&vblk->lock, flags); ++ spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); + } + + static bool do_req(struct request_queue *q, struct virtio_blk *vblk, +@@ -438,7 +436,6 @@ static int __devinit virtblk_probe(struc + } + + INIT_LIST_HEAD(&vblk->reqs); +- spin_lock_init(&vblk->lock); + vblk->vdev = vdev; + vblk->sg_elems = sg_elems; + sg_init_table(vblk->sg, vblk->sg_elems); +@@ -463,7 +460,7 @@ static int __devinit virtblk_probe(struc + goto out_mempool; + } + +- q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); ++ q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); + if (!q) { + err = -ENOMEM; + goto out_put_disk;