From: Greg Kroah-Hartman Date: Fri, 2 Aug 2019 15:48:30 +0000 (+0200) Subject: drop 4.14 sched patch as it didn't apply. X-Git-Tag: v5.2.6~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a63868bc4762f449b8ca5ac3566922338cfc0af8;p=thirdparty%2Fkernel%2Fstable-queue.git drop 4.14 sched patch as it didn't apply. --- diff --git a/queue-4.14/sched-fair-use-rcu-accessors-consistently-for-numa_group.patch b/queue-4.14/sched-fair-use-rcu-accessors-consistently-for-numa_group.patch deleted file mode 100644 index 83e2528474b..00000000000 --- a/queue-4.14/sched-fair-use-rcu-accessors-consistently-for-numa_group.patch +++ /dev/null @@ -1,385 +0,0 @@ -From cb361d8cdef69990f6b4504dc1fd9a594d983c97 Mon Sep 17 00:00:00 2001 -From: Jann Horn -Date: Tue, 16 Jul 2019 17:20:47 +0200 -Subject: sched/fair: Use RCU accessors consistently for ->numa_group - -From: Jann Horn - -commit cb361d8cdef69990f6b4504dc1fd9a594d983c97 upstream. - -The old code used RCU annotations and accessors inconsistently for -->numa_group, which can lead to use-after-frees and NULL dereferences. - -Let all accesses to ->numa_group use proper RCU helpers to prevent such -issues. - -Signed-off-by: Jann Horn -Signed-off-by: Peter Zijlstra (Intel) -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Petr Mladek -Cc: Sergey Senozhatsky -Cc: Thomas Gleixner -Cc: Will Deacon -Fixes: 8c8a743c5087 ("sched/numa: Use {cpu, pid} to create task groups for shared faults") -Link: https://lkml.kernel.org/r/20190716152047.14424-3-jannh@google.com -Signed-off-by: Ingo Molnar -Signed-off-by: Greg Kroah-Hartman - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 8dc1811487f5..9f51932bd543 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -1092,7 +1092,15 @@ struct task_struct { - u64 last_sum_exec_runtime; - struct callback_head numa_work; - -- struct numa_group *numa_group; -+ /* -+ * This pointer is only modified for current in syscall and -+ * pagefault context (and for tasks being destroyed), so it can be read -+ * from any of the following contexts: -+ * - RCU read-side critical section -+ * - current->numa_group from everywhere -+ * - task's runqueue locked, task not running -+ */ -+ struct numa_group __rcu *numa_group; - - /* - * numa_faults is an array split into four regions: -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 6adb0e0f5feb..bc9cfeaac8bd 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -1086,6 +1086,21 @@ struct numa_group { - unsigned long faults[0]; - }; - -+/* -+ * For functions that can be called in multiple contexts that permit reading -+ * ->numa_group (see struct task_struct for locking rules). -+ */ -+static struct numa_group *deref_task_numa_group(struct task_struct *p) -+{ -+ return rcu_dereference_check(p->numa_group, p == current || -+ (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu))); -+} -+ -+static struct numa_group *deref_curr_numa_group(struct task_struct *p) -+{ -+ return rcu_dereference_protected(p->numa_group, p == current); -+} -+ - static inline unsigned long group_faults_priv(struct numa_group *ng); - static inline unsigned long group_faults_shared(struct numa_group *ng); - -@@ -1129,10 +1144,12 @@ static unsigned int task_scan_start(struct task_struct *p) - { - unsigned long smin = task_scan_min(p); - unsigned long period = smin; -+ struct numa_group *ng; - - /* Scale the maximum scan period with the amount of shared memory. */ -- if (p->numa_group) { -- struct numa_group *ng = p->numa_group; -+ rcu_read_lock(); -+ ng = rcu_dereference(p->numa_group); -+ if (ng) { - unsigned long shared = group_faults_shared(ng); - unsigned long private = group_faults_priv(ng); - -@@ -1140,6 +1157,7 @@ static unsigned int task_scan_start(struct task_struct *p) - period *= shared + 1; - period /= private + shared + 1; - } -+ rcu_read_unlock(); - - return max(smin, period); - } -@@ -1148,13 +1166,14 @@ static unsigned int task_scan_max(struct task_struct *p) - { - unsigned long smin = task_scan_min(p); - unsigned long smax; -+ struct numa_group *ng; - - /* Watch for min being lower than max due to floor calculations */ - smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p); - - /* Scale the maximum scan period with the amount of shared memory. */ -- if (p->numa_group) { -- struct numa_group *ng = p->numa_group; -+ ng = deref_curr_numa_group(p); -+ if (ng) { - unsigned long shared = group_faults_shared(ng); - unsigned long private = group_faults_priv(ng); - unsigned long period = smax; -@@ -1186,7 +1205,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) - p->numa_scan_period = sysctl_numa_balancing_scan_delay; - p->numa_work.next = &p->numa_work; - p->numa_faults = NULL; -- p->numa_group = NULL; -+ RCU_INIT_POINTER(p->numa_group, NULL); - p->last_task_numa_placement = 0; - p->last_sum_exec_runtime = 0; - -@@ -1233,7 +1252,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p) - - pid_t task_numa_group_id(struct task_struct *p) - { -- return p->numa_group ? p->numa_group->gid : 0; -+ struct numa_group *ng; -+ pid_t gid = 0; -+ -+ rcu_read_lock(); -+ ng = rcu_dereference(p->numa_group); -+ if (ng) -+ gid = ng->gid; -+ rcu_read_unlock(); -+ -+ return gid; - } - - /* -@@ -1258,11 +1286,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid) - - static inline unsigned long group_faults(struct task_struct *p, int nid) - { -- if (!p->numa_group) -+ struct numa_group *ng = deref_task_numa_group(p); -+ -+ if (!ng) - return 0; - -- return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] + -- p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)]; -+ return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] + -+ ng->faults[task_faults_idx(NUMA_MEM, nid, 1)]; - } - - static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) -@@ -1400,12 +1430,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid, - static inline unsigned long group_weight(struct task_struct *p, int nid, - int dist) - { -+ struct numa_group *ng = deref_task_numa_group(p); - unsigned long faults, total_faults; - -- if (!p->numa_group) -+ if (!ng) - return 0; - -- total_faults = p->numa_group->total_faults; -+ total_faults = ng->total_faults; - - if (!total_faults) - return 0; -@@ -1419,7 +1450,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid, - bool should_numa_migrate_memory(struct task_struct *p, struct page * page, - int src_nid, int dst_cpu) - { -- struct numa_group *ng = p->numa_group; -+ struct numa_group *ng = deref_curr_numa_group(p); - int dst_nid = cpu_to_node(dst_cpu); - int last_cpupid, this_cpupid; - -@@ -1600,13 +1631,14 @@ static bool load_too_imbalanced(long src_load, long dst_load, - static void task_numa_compare(struct task_numa_env *env, - long taskimp, long groupimp, bool maymove) - { -+ struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p); - struct rq *dst_rq = cpu_rq(env->dst_cpu); -+ long imp = p_ng ? groupimp : taskimp; - struct task_struct *cur; - long src_load, dst_load; -- long load; -- long imp = env->p->numa_group ? groupimp : taskimp; -- long moveimp = imp; - int dist = env->dist; -+ long moveimp = imp; -+ long load; - - if (READ_ONCE(dst_rq->numa_migrate_on)) - return; -@@ -1645,21 +1677,22 @@ static void task_numa_compare(struct task_numa_env *env, - * If dst and source tasks are in the same NUMA group, or not - * in any group then look only at task weights. - */ -- if (cur->numa_group == env->p->numa_group) { -+ cur_ng = rcu_dereference(cur->numa_group); -+ if (cur_ng == p_ng) { - imp = taskimp + task_weight(cur, env->src_nid, dist) - - task_weight(cur, env->dst_nid, dist); - /* - * Add some hysteresis to prevent swapping the - * tasks within a group over tiny differences. - */ -- if (cur->numa_group) -+ if (cur_ng) - imp -= imp / 16; - } else { - /* - * Compare the group weights. If a task is all by itself - * (not part of a group), use the task weight instead. - */ -- if (cur->numa_group && env->p->numa_group) -+ if (cur_ng && p_ng) - imp += group_weight(cur, env->src_nid, dist) - - group_weight(cur, env->dst_nid, dist); - else -@@ -1757,11 +1790,12 @@ static int task_numa_migrate(struct task_struct *p) - .best_imp = 0, - .best_cpu = -1, - }; -+ unsigned long taskweight, groupweight; - struct sched_domain *sd; -+ long taskimp, groupimp; -+ struct numa_group *ng; - struct rq *best_rq; -- unsigned long taskweight, groupweight; - int nid, ret, dist; -- long taskimp, groupimp; - - /* - * Pick the lowest SD_NUMA domain, as that would have the smallest -@@ -1807,7 +1841,8 @@ static int task_numa_migrate(struct task_struct *p) - * multiple NUMA nodes; in order to better consolidate the group, - * we need to check other locations. - */ -- if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) { -+ ng = deref_curr_numa_group(p); -+ if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) { - for_each_online_node(nid) { - if (nid == env.src_nid || nid == p->numa_preferred_nid) - continue; -@@ -1840,7 +1875,7 @@ static int task_numa_migrate(struct task_struct *p) - * A task that migrated to a second choice node will be better off - * trying for a better one later. Do not set the preferred node here. - */ -- if (p->numa_group) { -+ if (ng) { - if (env.best_cpu == -1) - nid = env.src_nid; - else -@@ -2135,6 +2170,7 @@ static void task_numa_placement(struct task_struct *p) - unsigned long total_faults; - u64 runtime, period; - spinlock_t *group_lock = NULL; -+ struct numa_group *ng; - - /* - * The p->mm->numa_scan_seq field gets updated without -@@ -2152,8 +2188,9 @@ static void task_numa_placement(struct task_struct *p) - runtime = numa_get_avg_runtime(p, &period); - - /* If the task is part of a group prevent parallel updates to group stats */ -- if (p->numa_group) { -- group_lock = &p->numa_group->lock; -+ ng = deref_curr_numa_group(p); -+ if (ng) { -+ group_lock = &ng->lock; - spin_lock_irq(group_lock); - } - -@@ -2194,7 +2231,7 @@ static void task_numa_placement(struct task_struct *p) - p->numa_faults[cpu_idx] += f_diff; - faults += p->numa_faults[mem_idx]; - p->total_numa_faults += diff; -- if (p->numa_group) { -+ if (ng) { - /* - * safe because we can only change our own group - * -@@ -2202,14 +2239,14 @@ static void task_numa_placement(struct task_struct *p) - * nid and priv in a specific region because it - * is at the beginning of the numa_faults array. - */ -- p->numa_group->faults[mem_idx] += diff; -- p->numa_group->faults_cpu[mem_idx] += f_diff; -- p->numa_group->total_faults += diff; -- group_faults += p->numa_group->faults[mem_idx]; -+ ng->faults[mem_idx] += diff; -+ ng->faults_cpu[mem_idx] += f_diff; -+ ng->total_faults += diff; -+ group_faults += ng->faults[mem_idx]; - } - } - -- if (!p->numa_group) { -+ if (!ng) { - if (faults > max_faults) { - max_faults = faults; - max_nid = nid; -@@ -2220,8 +2257,8 @@ static void task_numa_placement(struct task_struct *p) - } - } - -- if (p->numa_group) { -- numa_group_count_active_nodes(p->numa_group); -+ if (ng) { -+ numa_group_count_active_nodes(ng); - spin_unlock_irq(group_lock); - max_nid = preferred_group_nid(p, max_nid); - } -@@ -2255,7 +2292,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, - int cpu = cpupid_to_cpu(cpupid); - int i; - -- if (unlikely(!p->numa_group)) { -+ if (unlikely(!deref_curr_numa_group(p))) { - unsigned int size = sizeof(struct numa_group) + - 4*nr_node_ids*sizeof(unsigned long); - -@@ -2291,7 +2328,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, - if (!grp) - goto no_join; - -- my_grp = p->numa_group; -+ my_grp = deref_curr_numa_group(p); - if (grp == my_grp) - goto no_join; - -@@ -2362,7 +2399,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, - */ - void task_numa_free(struct task_struct *p, bool final) - { -- struct numa_group *grp = p->numa_group; -+ /* safe: p either is current or is being freed by current */ -+ struct numa_group *grp = rcu_dereference_raw(p->numa_group); - unsigned long *numa_faults = p->numa_faults; - unsigned long flags; - int i; -@@ -2442,7 +2480,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) - * actively using should be counted as local. This allows the - * scan rate to slow down when a workload has settled down. - */ -- ng = p->numa_group; -+ ng = deref_curr_numa_group(p); - if (!priv && !local && ng && ng->active_nodes > 1 && - numa_is_active_node(cpu_node, ng) && - numa_is_active_node(mem_node, ng)) -@@ -10460,18 +10498,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m) - { - int node; - unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0; -+ struct numa_group *ng; - -+ rcu_read_lock(); -+ ng = rcu_dereference(p->numa_group); - for_each_online_node(node) { - if (p->numa_faults) { - tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)]; - tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)]; - } -- if (p->numa_group) { -- gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)], -- gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)]; -+ if (ng) { -+ gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)], -+ gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)]; - } - print_numa_stats(m, node, tsf, tpf, gsf, gpf); - } -+ rcu_read_unlock(); - } - #endif /* CONFIG_NUMA_BALANCING */ - #endif /* CONFIG_SCHED_DEBUG */ diff --git a/queue-4.14/series b/queue-4.14/series index 387cb752357..59be9bb9b13 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -23,4 +23,3 @@ drivers-pps-pps.c-clear-offset-flags-in-pps_setparams-ioctl.patch fix-allyesconfig-output.patch ceph-hold-i_ceph_lock-when-removing-caps-for-freeing-inode.patch ip_tunnel-allow-not-to-count-pkts-on-tstats-by-setting-skb-s-dev-to-null.patch -sched-fair-use-rcu-accessors-consistently-for-numa_group.patch