1 From 5b77c7b8a855880bc956e31197262cc3c78391f6 Mon Sep 17 00:00:00 2001
2 From: Daniel Jordan <daniel.m.jordan@oracle.com>
3 Date: Tue, 5 Mar 2019 15:48:19 -0800
4 Subject: mm, swap: bounds check swap_info array accesses to avoid NULL derefs
6 [ Upstream commit c10d38cc8d3e43f946b6c2bf4602c86791587f30 ]
8 Dan Carpenter reports a potential NULL dereference in
11 Smatch complains that the NULL checks on "si" aren't consistent. This
12 seems like a real bug because we have not ensured that the type is
13 valid and so "si" can be NULL.
15 Add the missing check for NULL, taking care to use a read barrier to
16 ensure CPU1 observes CPU0's updates in the correct order:
19 alloc_swap_info() if (type >= nr_swapfiles)
20 swap_info[type] = p /* handle invalid entry */
22 ++nr_swapfiles p = swap_info[type]
24 Without smp_rmb, CPU1 might observe CPU0's write to nr_swapfiles before
25 CPU0's write to swap_info[type] and read NULL from swap_info[type].
27 Ying Huang noticed other places in swapfile.c don't order these reads
28 properly. Introduce swap_type_to_swap_info to encourage correct usage.
30 Use READ_ONCE and WRITE_ONCE to follow the Linux Kernel Memory Model
31 (see tools/memory-model/Documentation/explanation.txt).
33 This ordering need not be enforced in places where swap_lock is held
34 (e.g. si_swapinfo) because swap_lock serializes updates to nr_swapfiles
35 and the swap_info array.
37 Link: http://lkml.kernel.org/r/20190131024410.29859-1-daniel.m.jordan@oracle.com
38 Fixes: ec8acf20afb8 ("swap: add per-partition lock for swapfile")
39 Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
40 Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
41 Suggested-by: "Huang, Ying" <ying.huang@intel.com>
42 Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
43 Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
44 Cc: Alan Stern <stern@rowland.harvard.edu>
45 Cc: Andi Kleen <ak@linux.intel.com>
46 Cc: Dave Hansen <dave.hansen@linux.intel.com>
47 Cc: Omar Sandoval <osandov@fb.com>
48 Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
49 Cc: Shaohua Li <shli@kernel.org>
50 Cc: Stephen Rothwell <sfr@canb.auug.org.au>
51 Cc: Tejun Heo <tj@kernel.org>
52 Cc: Will Deacon <will.deacon@arm.com>
53 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
54 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
55 Signed-off-by: Sasha Levin <sashal@kernel.org>
57 mm/swapfile.c | 51 +++++++++++++++++++++++++++++----------------------
58 1 file changed, 29 insertions(+), 22 deletions(-)
60 diff --git a/mm/swapfile.c b/mm/swapfile.c
61 index 340ef3177686..0047dcaf9369 100644
64 @@ -98,6 +98,15 @@ static atomic_t proc_poll_event = ATOMIC_INIT(0);
66 atomic_t nr_rotate_swap = ATOMIC_INIT(0);
68 +static struct swap_info_struct *swap_type_to_swap_info(int type)
70 + if (type >= READ_ONCE(nr_swapfiles))
73 + smp_rmb(); /* Pairs with smp_wmb in alloc_swap_info. */
74 + return READ_ONCE(swap_info[type]);
77 static inline unsigned char swap_count(unsigned char ent)
79 return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */
80 @@ -1030,12 +1039,14 @@ noswap:
81 /* The only caller of this function is now suspend routine */
82 swp_entry_t get_swap_page_of_type(int type)
84 - struct swap_info_struct *si;
85 + struct swap_info_struct *si = swap_type_to_swap_info(type);
88 - si = swap_info[type];
93 - if (si && (si->flags & SWP_WRITEOK)) {
94 + if (si->flags & SWP_WRITEOK) {
95 atomic_long_dec(&nr_swap_pages);
96 /* This is called for allocating swap entry, not cache */
97 offset = scan_swap_map(si, 1);
98 @@ -1046,6 +1057,7 @@ swp_entry_t get_swap_page_of_type(int type)
99 atomic_long_inc(&nr_swap_pages);
101 spin_unlock(&si->lock);
103 return (swp_entry_t) {0};
106 @@ -1057,9 +1069,9 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
109 type = swp_type(entry);
110 - if (type >= nr_swapfiles)
111 + p = swap_type_to_swap_info(type);
114 - p = swap_info[type];
115 if (!(p->flags & SWP_USED))
117 offset = swp_offset(entry);
118 @@ -1708,10 +1720,9 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
119 sector_t swapdev_block(int type, pgoff_t offset)
121 struct block_device *bdev;
122 + struct swap_info_struct *si = swap_type_to_swap_info(type);
124 - if ((unsigned int)type >= nr_swapfiles)
126 - if (!(swap_info[type]->flags & SWP_WRITEOK))
127 + if (!si || !(si->flags & SWP_WRITEOK))
129 return map_swap_entry(swp_entry(type, offset), &bdev);
131 @@ -2269,7 +2280,7 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
132 struct swap_extent *se;
135 - sis = swap_info[swp_type(entry)];
136 + sis = swp_swap_info(entry);
139 offset = swp_offset(entry);
140 @@ -2707,9 +2718,7 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
142 return SEQ_START_TOKEN;
144 - for (type = 0; type < nr_swapfiles; type++) {
145 - smp_rmb(); /* read nr_swapfiles before swap_info[type] */
146 - si = swap_info[type];
147 + for (type = 0; (si = swap_type_to_swap_info(type)); type++) {
148 if (!(si->flags & SWP_USED) || !si->swap_map)
151 @@ -2729,9 +2738,7 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
155 - for (; type < nr_swapfiles; type++) {
156 - smp_rmb(); /* read nr_swapfiles before swap_info[type] */
157 - si = swap_info[type];
158 + for (; (si = swap_type_to_swap_info(type)); type++) {
159 if (!(si->flags & SWP_USED) || !si->swap_map)
162 @@ -2838,14 +2845,14 @@ static struct swap_info_struct *alloc_swap_info(void)
164 if (type >= nr_swapfiles) {
166 - swap_info[type] = p;
167 + WRITE_ONCE(swap_info[type], p);
169 * Write swap_info[type] before nr_swapfiles, in case a
170 * racing procfs swap_start() or swap_next() is reading them.
171 * (We never shrink nr_swapfiles, we never free this entry.)
175 + WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1);
179 @@ -3365,7 +3372,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
181 struct swap_info_struct *p;
182 struct swap_cluster_info *ci;
183 - unsigned long offset, type;
184 + unsigned long offset;
186 unsigned char has_cache;
188 @@ -3373,10 +3380,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
189 if (non_swap_entry(entry))
192 - type = swp_type(entry);
193 - if (type >= nr_swapfiles)
194 + p = swp_swap_info(entry);
197 - p = swap_info[type];
199 offset = swp_offset(entry);
200 if (unlikely(offset >= p->max))
202 @@ -3473,7 +3480,7 @@ int swapcache_prepare(swp_entry_t entry)
204 struct swap_info_struct *swp_swap_info(swp_entry_t entry)
206 - return swap_info[swp_type(entry)];
207 + return swap_type_to_swap_info(swp_type(entry));
210 struct swap_info_struct *page_swap_info(struct page *page)