From: Greg Kroah-Hartman Date: Tue, 24 Jun 2014 15:47:29 +0000 (-0400) Subject: 3.15-stable patches X-Git-Tag: v3.4.95~1 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=2c685889f878c6fa132230c30d0e289c1e1797f3;p=thirdparty%2Fkernel%2Fstable-queue.git 3.15-stable patches added patches: slab-fix-oops-when-reading-proc-slab_allocators.patch tmpfs-zero_range-and-collapse_range-not-currently-supported.patch --- diff --git a/queue-3.15/series b/queue-3.15/series index 358fc164bdb..272bb73de3f 100644 --- a/queue-3.15/series +++ b/queue-3.15/series @@ -57,3 +57,5 @@ alsa-control-fix-replacing-user-controls.patch alsa-control-don-t-access-controls-outside-of-protected-regions.patch alsa-control-handle-numid-overflow.patch alsa-control-make-sure-that-id-index-does-not-overflow.patch +tmpfs-zero_range-and-collapse_range-not-currently-supported.patch +slab-fix-oops-when-reading-proc-slab_allocators.patch diff --git a/queue-3.15/slab-fix-oops-when-reading-proc-slab_allocators.patch b/queue-3.15/slab-fix-oops-when-reading-proc-slab_allocators.patch new file mode 100644 index 00000000000..eea7ab32fcd --- /dev/null +++ b/queue-3.15/slab-fix-oops-when-reading-proc-slab_allocators.patch @@ -0,0 +1,282 @@ +From 03787301420376ae41fbaf4267f4a6253d152ac5 Mon Sep 17 00:00:00 2001 +From: Joonsoo Kim +Date: Mon, 23 Jun 2014 13:22:06 -0700 +Subject: slab: fix oops when reading /proc/slab_allocators + +From: Joonsoo Kim + +commit 03787301420376ae41fbaf4267f4a6253d152ac5 upstream. + +Commit b1cb0982bdd6 ("change the management method of free objects of +the slab") introduced a bug on slab leak detector +('/proc/slab_allocators'). This detector works like as following +decription. + + 1. traverse all objects on all the slabs. + 2. determine whether it is active or not. + 3. if active, print who allocate this object. + +but that commit changed the way how to manage free objects, so the logic +determining whether it is active or not is also changed. In before, we +regard object in cpu caches as inactive one, but, with this commit, we +mistakenly regard object in cpu caches as active one. + +This intoduces kernel oops if DEBUG_PAGEALLOC is enabled. If +DEBUG_PAGEALLOC is enabled, kernel_map_pages() is used to detect who +corrupt free memory in the slab. It unmaps page table mapping if object +is free and map it if object is active. When slab leak detector check +object in cpu caches, it mistakenly think this object active so try to +access object memory to retrieve caller of allocation. At this point, +page table mapping to this object doesn't exist, so oops occurs. + +Following is oops message reported from Dave. + +It blew up when something tried to read /proc/slab_allocators +(Just cat it, and you should see the oops below) + + Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC + Modules linked in: + [snip...] + CPU: 1 PID: 9386 Comm: trinity-c33 Not tainted 3.14.0-rc5+ #131 + task: ffff8801aa46e890 ti: ffff880076924000 task.ti: ffff880076924000 + RIP: 0010:[] [] handle_slab+0x8a/0x180 + RSP: 0018:ffff880076925de0 EFLAGS: 00010002 + RAX: 0000000000001000 RBX: 0000000000000000 RCX: 000000005ce85ce7 + RDX: ffffea00079be100 RSI: 0000000000001000 RDI: ffff880107458000 + RBP: ffff880076925e18 R08: 0000000000000001 R09: 0000000000000000 + R10: 0000000000000000 R11: 000000000000000f R12: ffff8801e6f84000 + R13: ffffea00079be100 R14: ffff880107458000 R15: ffff88022bb8d2c0 + FS: 00007fb769e45740(0000) GS:ffff88024d040000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: ffff8801e6f84ff8 CR3: 00000000a22db000 CR4: 00000000001407e0 + DR0: 0000000002695000 DR1: 0000000002695000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000070602 + Call Trace: + leaks_show+0xce/0x240 + seq_read+0x28e/0x490 + proc_reg_read+0x3d/0x80 + vfs_read+0x9b/0x160 + SyS_read+0x58/0xb0 + tracesys+0xd4/0xd9 + Code: f5 00 00 00 0f 1f 44 00 00 48 63 c8 44 3b 0c 8a 0f 84 e3 00 00 00 83 c0 01 44 39 c0 72 eb 41 f6 47 1a 01 0f 84 e9 00 00 00 89 f0 <4d> 8b 4c 04 f8 4d 85 c9 0f 84 88 00 00 00 49 8b 7e 08 4d 8d 46 + RIP handle_slab+0x8a/0x180 + +To fix the problem, I introduce an object status buffer on each slab. +With this, we can track object status precisely, so slab leak detector +would not access active object and no kernel oops would occur. Memory +overhead caused by this fix is only imposed to CONFIG_DEBUG_SLAB_LEAK +which is mainly used for debugging, so memory overhead isn't big +problem. + +Signed-off-by: Joonsoo Kim +Reported-by: Dave Jones +Reported-by: Tetsuo Handa +Reviewed-by: Vladimir Davydov +Cc: Christoph Lameter +Cc: Pekka Enberg +Cc: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/slab.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 71 insertions(+), 19 deletions(-) + +--- a/mm/slab.c ++++ b/mm/slab.c +@@ -386,6 +386,39 @@ static void **dbg_userword(struct kmem_c + + #endif + ++#define OBJECT_FREE (0) ++#define OBJECT_ACTIVE (1) ++ ++#ifdef CONFIG_DEBUG_SLAB_LEAK ++ ++static void set_obj_status(struct page *page, int idx, int val) ++{ ++ int freelist_size; ++ char *status; ++ struct kmem_cache *cachep = page->slab_cache; ++ ++ freelist_size = cachep->num * sizeof(freelist_idx_t); ++ status = (char *)page->freelist + freelist_size; ++ status[idx] = val; ++} ++ ++static inline unsigned int get_obj_status(struct page *page, int idx) ++{ ++ int freelist_size; ++ char *status; ++ struct kmem_cache *cachep = page->slab_cache; ++ ++ freelist_size = cachep->num * sizeof(freelist_idx_t); ++ status = (char *)page->freelist + freelist_size; ++ ++ return status[idx]; ++} ++ ++#else ++static inline void set_obj_status(struct page *page, int idx, int val) {} ++ ++#endif ++ + /* + * Do not go above this order unless 0 objects fit into the slab or + * overridden on the command line. +@@ -576,12 +609,30 @@ static inline struct array_cache *cpu_ca + return cachep->array[smp_processor_id()]; + } + ++static size_t calculate_freelist_size(int nr_objs, size_t align) ++{ ++ size_t freelist_size; ++ ++ freelist_size = nr_objs * sizeof(freelist_idx_t); ++ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) ++ freelist_size += nr_objs * sizeof(char); ++ ++ if (align) ++ freelist_size = ALIGN(freelist_size, align); ++ ++ return freelist_size; ++} ++ + static int calculate_nr_objs(size_t slab_size, size_t buffer_size, + size_t idx_size, size_t align) + { + int nr_objs; ++ size_t remained_size; + size_t freelist_size; ++ int extra_space = 0; + ++ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) ++ extra_space = sizeof(char); + /* + * Ignore padding for the initial guess. The padding + * is at most @align-1 bytes, and @buffer_size is at +@@ -590,14 +641,15 @@ static int calculate_nr_objs(size_t slab + * into the memory allocation when taking the padding + * into account. + */ +- nr_objs = slab_size / (buffer_size + idx_size); ++ nr_objs = slab_size / (buffer_size + idx_size + extra_space); + + /* + * This calculated number will be either the right + * amount, or one greater than what we want. + */ +- freelist_size = slab_size - nr_objs * buffer_size; +- if (freelist_size < ALIGN(nr_objs * idx_size, align)) ++ remained_size = slab_size - nr_objs * buffer_size; ++ freelist_size = calculate_freelist_size(nr_objs, align); ++ if (remained_size < freelist_size) + nr_objs--; + + return nr_objs; +@@ -635,7 +687,7 @@ static void cache_estimate(unsigned long + } else { + nr_objs = calculate_nr_objs(slab_size, buffer_size, + sizeof(freelist_idx_t), align); +- mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align); ++ mgmt_size = calculate_freelist_size(nr_objs, align); + } + *num = nr_objs; + *left_over = slab_size - nr_objs*buffer_size - mgmt_size; +@@ -2032,13 +2084,16 @@ static size_t calculate_slab_order(struc + break; + + if (flags & CFLGS_OFF_SLAB) { ++ size_t freelist_size_per_obj = sizeof(freelist_idx_t); + /* + * Max number of objs-per-slab for caches which + * use off-slab slabs. Needed to avoid a possible + * looping condition in cache_grow(). + */ ++ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) ++ freelist_size_per_obj += sizeof(char); + offslab_limit = size; +- offslab_limit /= sizeof(freelist_idx_t); ++ offslab_limit /= freelist_size_per_obj; + + if (num > offslab_limit) + break; +@@ -2285,8 +2340,7 @@ __kmem_cache_create (struct kmem_cache * + if (!cachep->num) + return -E2BIG; + +- freelist_size = +- ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align); ++ freelist_size = calculate_freelist_size(cachep->num, cachep->align); + + /* + * If the slab has been placed off-slab, and we have enough space then +@@ -2299,7 +2353,7 @@ __kmem_cache_create (struct kmem_cache * + + if (flags & CFLGS_OFF_SLAB) { + /* really off slab. No need for manual alignment */ +- freelist_size = cachep->num * sizeof(freelist_idx_t); ++ freelist_size = calculate_freelist_size(cachep->num, 0); + + #ifdef CONFIG_PAGE_POISONING + /* If we're going to use the generic kernel_map_pages() +@@ -2625,6 +2679,7 @@ static void cache_init_objs(struct kmem_ + if (cachep->ctor) + cachep->ctor(objp); + #endif ++ set_obj_status(page, i, OBJECT_FREE); + set_free_obj(page, i, i); + } + } +@@ -2833,6 +2888,7 @@ static void *cache_free_debugcheck(struc + BUG_ON(objnr >= cachep->num); + BUG_ON(objp != index_to_obj(cachep, page, objnr)); + ++ set_obj_status(page, objnr, OBJECT_FREE); + if (cachep->flags & SLAB_POISON) { + #ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { +@@ -2966,6 +3022,8 @@ static inline void cache_alloc_debugchec + static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, + gfp_t flags, void *objp, unsigned long caller) + { ++ struct page *page; ++ + if (!objp) + return objp; + if (cachep->flags & SLAB_POISON) { +@@ -2996,6 +3054,9 @@ static void *cache_alloc_debugcheck_afte + *dbg_redzone1(cachep, objp) = RED_ACTIVE; + *dbg_redzone2(cachep, objp) = RED_ACTIVE; + } ++ ++ page = virt_to_head_page(objp); ++ set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE); + objp += obj_offset(cachep); + if (cachep->ctor && cachep->flags & SLAB_POISON) + cachep->ctor(objp); +@@ -4232,21 +4293,12 @@ static void handle_slab(unsigned long *n + struct page *page) + { + void *p; +- int i, j; ++ int i; + + if (n[0] == n[1]) + return; + for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { +- bool active = true; +- +- for (j = page->active; j < c->num; j++) { +- /* Skip freed item */ +- if (get_free_obj(page, j) == i) { +- active = false; +- break; +- } +- } +- if (!active) ++ if (get_obj_status(page, i) != OBJECT_ACTIVE) + continue; + + if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) diff --git a/queue-3.15/tmpfs-zero_range-and-collapse_range-not-currently-supported.patch b/queue-3.15/tmpfs-zero_range-and-collapse_range-not-currently-supported.patch new file mode 100644 index 00000000000..8b34a485a05 --- /dev/null +++ b/queue-3.15/tmpfs-zero_range-and-collapse_range-not-currently-supported.patch @@ -0,0 +1,36 @@ +From 13ace4d0d9db40e10ecd66dfda14e297571be813 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Mon, 23 Jun 2014 13:22:03 -0700 +Subject: tmpfs: ZERO_RANGE and COLLAPSE_RANGE not currently supported + +From: Hugh Dickins + +commit 13ace4d0d9db40e10ecd66dfda14e297571be813 upstream. + +I was well aware of FALLOC_FL_ZERO_RANGE and FALLOC_FL_COLLAPSE_RANGE +support being added to fallocate(); but didn't realize until now that I +had been too stupid to future-proof shmem_fallocate() against new +additions. -EOPNOTSUPP instead of going on to ordinary fallocation. + +Signed-off-by: Hugh Dickins +Reviewed-by: Lukas Czerner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/shmem.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1728,6 +1728,9 @@ static long shmem_fallocate(struct file + pgoff_t start, index, end; + int error; + ++ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) ++ return -EOPNOTSUPP; ++ + mutex_lock(&inode->i_mutex); + + if (mode & FALLOC_FL_PUNCH_HOLE) {