From a95a4f76d4107424dfc4adc98ff9cddb33ca59c4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 24 May 2010 14:10:59 -0700 Subject: [PATCH] .32 patches --- ...rovide-stack-information-for-threads.patch | 203 ++++++++++++++++++ ...on-for-threads-and-its-fixup-commits.patch | 179 +++++++++++++++ queue-2.6.32/series | 2 + 3 files changed, 384 insertions(+) create mode 100644 queue-2.6.32/proc-partially-revert-procfs-provide-stack-information-for-threads.patch create mode 100644 queue-2.6.32/revert-procfs-provide-stack-information-for-threads-and-its-fixup-commits.patch diff --git a/queue-2.6.32/proc-partially-revert-procfs-provide-stack-information-for-threads.patch b/queue-2.6.32/proc-partially-revert-procfs-provide-stack-information-for-threads.patch new file mode 100644 index 00000000000..83665b8807c --- /dev/null +++ b/queue-2.6.32/proc-partially-revert-procfs-provide-stack-information-for-threads.patch @@ -0,0 +1,203 @@ +From 1306d603fcf1f6682f8575d1ff23631a24184b21 Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Fri, 8 Jan 2010 14:42:56 -0800 +Subject: proc: partially revert "procfs: provide stack information for threads" + +From: KOSAKI Motohiro + +commit 1306d603fcf1f6682f8575d1ff23631a24184b21 upstream. + +Commit d899bf7b (procfs: provide stack information for threads) introduced +to show stack information in /proc/{pid}/status. But it cause large +performance regression. Unfortunately /proc/{pid}/status is used ps +command too and ps is one of most important component. Because both to +take mmap_sem and page table walk are heavily operation. + +If many process run, the ps performance is, + +[before d899bf7b] + +% perf stat ps >/dev/null + + Performance counter stats for 'ps': + + 4090.435806 task-clock-msecs # 0.032 CPUs + 229 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 234 page-faults # 0.000 M/sec + 8587565207 cycles # 2099.425 M/sec + 9866662403 instructions # 1.149 IPC + 3789415411 cache-references # 926.409 M/sec + 30419509 cache-misses # 7.437 M/sec + + 128.859521955 seconds time elapsed + +[after d899bf7b] + +% perf stat ps > /dev/null + + Performance counter stats for 'ps': + + 4305.081146 task-clock-msecs # 0.028 CPUs + 480 context-switches # 0.000 M/sec + 2 CPU-migrations # 0.000 M/sec + 237 page-faults # 0.000 M/sec + 9021211334 cycles # 2095.480 M/sec + 10605887536 instructions # 1.176 IPC + 3612650999 cache-references # 839.160 M/sec + 23917502 cache-misses # 5.556 M/sec + + 152.277819582 seconds time elapsed + +Thus, this patch revert it. Fortunately /proc/{pid}/task/{tid}/smaps +provide almost same information. we can use it. + +Commit d899bf7b introduced two features: + + 1) Add the annotattion of [thread stack: xxxx] mark to + /proc/{pid}/task/{tid}/maps. + 2) Add StackUsage field to /proc/{pid}/status. + +I only revert (2), because I haven't seen (1) cause regression. + +Signed-off-by: KOSAKI Motohiro +Cc: Stefani Seibold +Cc: Ingo Molnar +Cc: Peter Zijlstra +Cc: Alexey Dobriyan +Cc: "Eric W. Biederman" +Cc: Randy Dunlap +Cc: Andrew Morton +Cc: Andi Kleen +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/filesystems/proc.txt | 2 + fs/proc/array.c | 89 ------------------------------------- + 2 files changed, 91 deletions(-) + +--- a/Documentation/filesystems/proc.txt ++++ b/Documentation/filesystems/proc.txt +@@ -176,7 +176,6 @@ read the file /proc/PID/status: + CapBnd: ffffffffffffffff + voluntary_ctxt_switches: 0 + nonvoluntary_ctxt_switches: 1 +- Stack usage: 12 kB + + This shows you nearly the same information you would get if you viewed it with + the ps command. In fact, ps uses the proc file system to obtain its +@@ -230,7 +229,6 @@ Table 1-2: Contents of the statm files ( + Mems_allowed_list Same as previous, but in "list format" + voluntary_ctxt_switches number of voluntary context switches + nonvoluntary_ctxt_switches number of non voluntary context switches +- Stack usage: stack usage high water mark (round up to page size) + .............................................................................. + + Table 1-3: Contents of the statm files (as of 2.6.8-rc3) +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -322,94 +322,6 @@ static inline void task_context_switch_c + p->nivcsw); + } + +-#ifdef CONFIG_MMU +- +-struct stack_stats { +- struct vm_area_struct *vma; +- unsigned long startpage; +- unsigned long usage; +-}; +- +-static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr, +- unsigned long end, struct mm_walk *walk) +-{ +- struct stack_stats *ss = walk->private; +- struct vm_area_struct *vma = ss->vma; +- pte_t *pte, ptent; +- spinlock_t *ptl; +- int ret = 0; +- +- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); +- for (; addr != end; pte++, addr += PAGE_SIZE) { +- ptent = *pte; +- +-#ifdef CONFIG_STACK_GROWSUP +- if (pte_present(ptent) || is_swap_pte(ptent)) +- ss->usage = addr - ss->startpage + PAGE_SIZE; +-#else +- if (pte_present(ptent) || is_swap_pte(ptent)) { +- ss->usage = ss->startpage - addr + PAGE_SIZE; +- pte++; +- ret = 1; +- break; +- } +-#endif +- } +- pte_unmap_unlock(pte - 1, ptl); +- cond_resched(); +- return ret; +-} +- +-static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma, +- struct task_struct *task) +-{ +- struct stack_stats ss; +- struct mm_walk stack_walk = { +- .pmd_entry = stack_usage_pte_range, +- .mm = vma->vm_mm, +- .private = &ss, +- }; +- +- if (!vma->vm_mm || is_vm_hugetlb_page(vma)) +- return 0; +- +- ss.vma = vma; +- ss.startpage = task->stack_start & PAGE_MASK; +- ss.usage = 0; +- +-#ifdef CONFIG_STACK_GROWSUP +- walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end, +- &stack_walk); +-#else +- walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE, +- &stack_walk); +-#endif +- return ss.usage; +-} +- +-static inline void task_show_stack_usage(struct seq_file *m, +- struct task_struct *task) +-{ +- struct vm_area_struct *vma; +- struct mm_struct *mm = get_task_mm(task); +- +- if (mm) { +- down_read(&mm->mmap_sem); +- vma = find_vma(mm, task->stack_start); +- if (vma) +- seq_printf(m, "Stack usage:\t%lu kB\n", +- get_stack_usage_in_bytes(vma, task) >> 10); +- +- up_read(&mm->mmap_sem); +- mmput(mm); +- } +-} +-#else +-static void task_show_stack_usage(struct seq_file *m, struct task_struct *task) +-{ +-} +-#endif /* CONFIG_MMU */ +- + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) + { +@@ -429,7 +341,6 @@ int proc_pid_status(struct seq_file *m, + task_show_regs(m, task); + #endif + task_context_switch_counts(m, task); +- task_show_stack_usage(m, task); + return 0; + } + diff --git a/queue-2.6.32/revert-procfs-provide-stack-information-for-threads-and-its-fixup-commits.patch b/queue-2.6.32/revert-procfs-provide-stack-information-for-threads-and-its-fixup-commits.patch new file mode 100644 index 00000000000..252ce7b9f5c --- /dev/null +++ b/queue-2.6.32/revert-procfs-provide-stack-information-for-threads-and-its-fixup-commits.patch @@ -0,0 +1,179 @@ +From 34441427aab4bdb3069a4ffcda69a99357abcb2e Mon Sep 17 00:00:00 2001 +From: Robin Holt +Date: Tue, 11 May 2010 14:06:46 -0700 +Subject: revert "procfs: provide stack information for threads" and its fixup commits + +From: Robin Holt + +commit 34441427aab4bdb3069a4ffcda69a99357abcb2e upstream. + +Originally, commit d899bf7b ("procfs: provide stack information for +threads") attempted to introduce a new feature for showing where the +threadstack was located and how many pages are being utilized by the +stack. + +Commit c44972f1 ("procfs: disable per-task stack usage on NOMMU") was +applied to fix the NO_MMU case. + +Commit 89240ba0 ("x86, fs: Fix x86 procfs stack information for threads on +64-bit") was applied to fix a bug in ia32 executables being loaded. + +Commit 9ebd4eba7 ("procfs: fix /proc//stat stack pointer for kernel +threads") was applied to fix a bug which had kernel threads printing a +userland stack address. + +Commit 1306d603f ('proc: partially revert "procfs: provide stack +information for threads"') was then applied to revert the stack pages +being used to solve a significant performance regression. + +This patch nearly undoes the effect of all these patches. + +The reason for reverting these is it provides an unusable value in +field 28. For x86_64, a fork will result in the task->stack_start +value being updated to the current user top of stack and not the stack +start address. This unpredictability of the stack_start value makes +it worthless. That includes the intended use of showing how much stack +space a thread has. + +Other architectures will get different values. As an example, ia64 +gets 0. The do_fork() and copy_process() functions appear to treat the +stack_start and stack_size parameters as architecture specific. + +I only partially reverted c44972f1 ("procfs: disable per-task stack usage +on NOMMU") . If I had completely reverted it, I would have had to change +mm/Makefile only build pagewalk.o when CONFIG_PROC_PAGE_MONITOR is +configured. Since I could not test the builds without significant effort, +I decided to not change mm/Makefile. + +I only partially reverted 89240ba0 ("x86, fs: Fix x86 procfs stack +information for threads on 64-bit") . I left the KSTK_ESP() change in +place as that seemed worthwhile. + +Signed-off-by: Robin Holt +Cc: Stefani Seibold +Cc: KOSAKI Motohiro +Cc: Michal Simek +Cc: Ingo Molnar +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/filesystems/proc.txt | 3 +-- + fs/compat.c | 2 -- + fs/exec.c | 2 -- + fs/proc/array.c | 3 +-- + fs/proc/task_mmu.c | 19 ------------------- + include/linux/sched.h | 1 - + kernel/fork.c | 2 -- + 7 files changed, 2 insertions(+), 30 deletions(-) + +--- a/Documentation/filesystems/proc.txt ++++ b/Documentation/filesystems/proc.txt +@@ -307,7 +307,7 @@ address perms offset dev in + 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test + 0804a000-0806b000 rw-p 00000000 00:00 0 [heap] + a7cb1000-a7cb2000 ---p 00000000 00:00 0 +-a7cb2000-a7eb2000 rw-p 00000000 00:00 0 [threadstack:001ff4b4] ++a7cb2000-a7eb2000 rw-p 00000000 00:00 0 + a7eb2000-a7eb3000 ---p 00000000 00:00 0 + a7eb3000-a7ed5000 rw-p 00000000 00:00 0 + a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 +@@ -343,7 +343,6 @@ is not associated with a file: + [stack] = the stack of the main process + [vdso] = the "virtual dynamic shared object", + the kernel system call handler +- [threadstack:xxxxxxxx] = the stack of the thread, xxxxxxxx is the stack size + + or if empty, the mapping is anonymous. + +--- a/fs/compat.c ++++ b/fs/compat.c +@@ -1532,8 +1532,6 @@ int compat_do_execve(char * filename, + if (retval < 0) + goto out; + +- current->stack_start = current->mm->start_stack; +- + /* execve succeeded */ + current->fs->in_exec = 0; + current->in_execve = 0; +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1379,8 +1379,6 @@ int do_execve(char * filename, + if (retval < 0) + goto out; + +- current->stack_start = current->mm->start_stack; +- + /* execve succeeded */ + current->fs->in_exec = 0; + current->in_execve = 0; +--- a/fs/proc/array.c ++++ b/fs/proc/array.c +@@ -82,7 +82,6 @@ + #include + #include + #include +-#include + + #include + #include +@@ -482,7 +481,7 @@ static int do_task_stat(struct seq_file + rsslim, + mm ? mm->start_code : 0, + mm ? mm->end_code : 0, +- (permitted && mm) ? task->stack_start : 0, ++ (permitted && mm) ? mm->start_stack : 0, + esp, + eip, + /* The signal information here is obsolete. +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -243,25 +243,6 @@ static void show_map_vma(struct seq_file + } else if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) { + name = "[stack]"; +- } else { +- unsigned long stack_start; +- struct proc_maps_private *pmp; +- +- pmp = m->private; +- stack_start = pmp->task->stack_start; +- +- if (vma->vm_start <= stack_start && +- vma->vm_end >= stack_start) { +- pad_len_spaces(m, len); +- seq_printf(m, +- "[threadstack:%08lx]", +-#ifdef CONFIG_STACK_GROWSUP +- vma->vm_end - stack_start +-#else +- stack_start - vma->vm_start +-#endif +- ); +- } + } + } else { + name = "[vdso]"; +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1541,7 +1541,6 @@ struct task_struct { + /* bitmask of trace recursion */ + unsigned long trace_recursion; + #endif /* CONFIG_TRACING */ +- unsigned long stack_start; + }; + + /* Future-safe accessor for struct task_struct's cpus_allowed. */ +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1123,8 +1123,6 @@ static struct task_struct *copy_process( + + p->bts = NULL; + +- p->stack_start = stack_start; +- + /* Perform scheduler related setup. Assign this task to a CPU. */ + sched_fork(p, clone_flags); + diff --git a/queue-2.6.32/series b/queue-2.6.32/series index 860a17a6921..4e9a7878f84 100644 --- a/queue-2.6.32/series +++ b/queue-2.6.32/series @@ -21,3 +21,5 @@ x86-cacheinfo-turn-off-l3-cache-index-disable-feature-in-virtualized-environment x86-amd-check-x86_feature_osvw-bit-before-accessing-osvw-msrs.patch btrfs-check-for-read-permission-on-src-file-in-the-clone-ioctl.patch alsa-hda-new-intel-hda-controller.patch +proc-partially-revert-procfs-provide-stack-information-for-threads.patch +revert-procfs-provide-stack-information-for-threads-and-its-fixup-commits.patch -- 2.47.3