[thirdparty/kernel/stable-queue.git] / releases / 2.6.27.47 / ext4-invalidate-pages-if-delalloc-block-allocation-fails.patch

From tytso@mit.edu  Mon Apr 19 10:19:40 2010
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Mar 2010 20:25:55 -0400
Subject: ext4: invalidate pages if delalloc block allocation fails.
To: stable@kernel.org
Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Message-ID: <1268699165-17461-2-git-send-email-tytso@mit.edu>


From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

commit c4a0c46ec92c194c873232b88debce4e1a448483 upstream.

We are a bit agressive in invalidating all the pages. But
it is ok because we really don't know why the block allocation
failed and it is better to come of the writeback path
so that user can look for more info.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 fs/ext4/inode.c |   85 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 73 insertions(+), 12 deletions(-)

--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1821,6 +1821,39 @@ static inline void __unmap_underlying_bl
 		unmap_underlying_metadata(bdev, bh->b_blocknr + i);
 }
 
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
+					sector_t logical, long blk_cnt)
+{
+	int nr_pages, i;
+	pgoff_t index, end;
+	struct pagevec pvec;
+	struct inode *inode = mpd->inode;
+	struct address_space *mapping = inode->i_mapping;
+
+	index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	end   = (logical + blk_cnt - 1) >>
+				(PAGE_CACHE_SHIFT - inode->i_blkbits);
+	while (index <= end) {
+		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+		if (nr_pages == 0)
+			break;
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+			index = page->index;
+			if (index > end)
+				break;
+			index++;
+
+			BUG_ON(!PageLocked(page));
+			BUG_ON(PageWriteback(page));
+			block_invalidatepage(page, 0);
+			ClearPageUptodate(page);
+			unlock_page(page);
+		}
+	}
+	return;
+}
+
 /*
  * mpage_da_map_blocks - go through given space
  *
@@ -1830,7 +1863,7 @@ static inline void __unmap_underlying_bl
  * The function skips space we know is already mapped to disk blocks.
  *
  */
-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
+static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
 	int err = 0;
 	struct buffer_head *lbh = &mpd->lbh;
@@ -1841,7 +1874,7 @@ static void mpage_da_map_blocks(struct m
 	 * We consider only non-mapped and non-allocated blocks
 	 */
 	if (buffer_mapped(lbh) && !buffer_delay(lbh))
-		return;
+		return 0;
 
 	new.b_state = lbh->b_state;
 	new.b_blocknr = 0;
@@ -1852,10 +1885,38 @@ static void mpage_da_map_blocks(struct m
 	 * to write simply return
 	 */
 	if (!new.b_size)
-		return;
+		return 0;
 	err = mpd->get_block(mpd->inode, next, &new, 1);
-	if (err)
-		return;
+	if (err) {
+
+		/* If get block returns with error
+		 * we simply return. Later writepage
+		 * will redirty the page and writepages
+		 * will find the dirty page again
+		 */
+		if (err == -EAGAIN)
+			return 0;
+		/*
+		 * get block failure will cause us
+		 * to loop in writepages. Because
+		 * a_ops->writepage won't be able to
+		 * make progress. The page will be redirtied
+		 * by writepage and writepages will again
+		 * try to write the same.
+		 */
+		printk(KERN_EMERG "%s block allocation failed for inode %lu "
+				  "at logical offset %llu with max blocks "
+				  "%zd with error %d\n",
+				  __func__, mpd->inode->i_ino,
+				  (unsigned long long)next,
+				  lbh->b_size >> mpd->inode->i_blkbits, err);
+		printk(KERN_EMERG "This should not happen.!! "
+					"Data will be lost\n");
+		/* invlaidate all the pages */
+		ext4_da_block_invalidatepages(mpd, next,
+				lbh->b_size >> mpd->inode->i_blkbits);
+		return err;
+	}
 	BUG_ON(new.b_size == 0);
 
 	if (buffer_new(&new))
@@ -1868,7 +1929,7 @@ static void mpage_da_map_blocks(struct m
 	if (buffer_delay(lbh) || buffer_unwritten(lbh))
 		mpage_put_bnr_to_bhs(mpd, next, &new);
 
-	return;
+	return 0;
 }
 
 #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1937,8 +1998,8 @@ flush_it:
 	 * We couldn't merge the block to our extent, so we
 	 * need to flush current  extent and start new one
 	 */
-	mpage_da_map_blocks(mpd);
-	mpage_da_submit_io(mpd);
+	if (mpage_da_map_blocks(mpd) == 0)
+		mpage_da_submit_io(mpd);
 	mpd->io_done = 1;
 	return;
 }
@@ -1980,8 +2041,8 @@ static int __mpage_da_writepage(struct p
 		 * and start IO on them using writepage()
 		 */
 		if (mpd->next_page != mpd->first_page) {
-			mpage_da_map_blocks(mpd);
-			mpage_da_submit_io(mpd);
+			if (mpage_da_map_blocks(mpd) == 0)
+				mpage_da_submit_io(mpd);
 			/*
 			 * skip rest of the page in the page_vec
 			 */
@@ -2102,8 +2163,8 @@ static int mpage_da_writepages(struct ad
 	 * Handle last extent of pages
 	 */
 	if (!mpd.io_done && mpd.next_page != mpd.first_page) {
-		mpage_da_map_blocks(&mpd);
-		mpage_da_submit_io(&mpd);
+		if (mpage_da_map_blocks(&mpd) == 0)
+			mpage_da_submit_io(&mpd);
 	}
 
 	wbc->nr_to_write = to_write - mpd.pages_written;
Commit	Line	Data
d7b4be12 GKH	1	From tytso@mit.edu Mon Apr 19 10:19:40 2010
	2	From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
	3	Date: Mon, 15 Mar 2010 20:25:55 -0400
	4	Subject: ext4: invalidate pages if delalloc block allocation fails.
	5	To: stable@kernel.org
	6	Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
	7	Message-ID: <1268699165-17461-2-git-send-email-tytso@mit.edu>
	8
	9
	10	From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
	11
	12	commit c4a0c46ec92c194c873232b88debce4e1a448483 upstream.
	13
	14	We are a bit agressive in invalidating all the pages. But
	15	it is ok because we really don't know why the block allocation
	16	failed and it is better to come of the writeback path
	17	so that user can look for more info.
	18
	19	Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
	20	Signed-off-by: Jayson R. King <dev@jaysonking.com>
	21	Signed-off-by: Theodore Ts'o <tytso@mit.edu>
	22	Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
	23
	24	---
	25	fs/ext4/inode.c \| 85 ++++++++++++++++++++++++++++++++++++++++++++++++--------
	26	1 file changed, 73 insertions(+), 12 deletions(-)
	27
	28	--- a/fs/ext4/inode.c
	29	+++ b/fs/ext4/inode.c
	30	@@ -1821,6 +1821,39 @@ static inline void __unmap_underlying_bl
	31	unmap_underlying_metadata(bdev, bh->b_blocknr + i);
	32	}
	33
	34	+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
	35	+ sector_t logical, long blk_cnt)
	36	+{
	37	+ int nr_pages, i;
	38	+ pgoff_t index, end;
	39	+ struct pagevec pvec;
	40	+ struct inode *inode = mpd->inode;
	41	+ struct address_space *mapping = inode->i_mapping;
	42	+
	43	+ index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
	44	+ end = (logical + blk_cnt - 1) >>
	45	+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
	46	+ while (index <= end) {
	47	+ nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
	48	+ if (nr_pages == 0)
	49	+ break;
	50	+ for (i = 0; i < nr_pages; i++) {
	51	+ struct page *page = pvec.pages[i];
	52	+ index = page->index;
	53	+ if (index > end)
	54	+ break;
	55	+ index++;
	56	+
	57	+ BUG_ON(!PageLocked(page));
	58	+ BUG_ON(PageWriteback(page));
	59	+ block_invalidatepage(page, 0);
	60	+ ClearPageUptodate(page);
	61	+ unlock_page(page);
	62	+ }
	63	+ }
	64	+ return;
65	+}
66	+
67	/*
68	* mpage_da_map_blocks - go through given space
69	*
70	@@ -1830,7 +1863,7 @@ static inline void __unmap_underlying_bl
71	* The function skips space we know is already mapped to disk blocks.
72	*
73	*/
74	-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
75	+static int mpage_da_map_blocks(struct mpage_da_data *mpd)
76	{
77	int err = 0;
78	struct buffer_head *lbh = &mpd->lbh;
79	@@ -1841,7 +1874,7 @@ static void mpage_da_map_blocks(struct m
80	* We consider only non-mapped and non-allocated blocks
81	*/
82	if (buffer_mapped(lbh) && !buffer_delay(lbh))
83	- return;
84	+ return 0;
85
86	new.b_state = lbh->b_state;
87	new.b_blocknr = 0;
88	@@ -1852,10 +1885,38 @@ static void mpage_da_map_blocks(struct m
89	* to write simply return
90	*/
91	if (!new.b_size)
92	- return;
93	+ return 0;
94	err = mpd->get_block(mpd->inode, next, &new, 1);
95	- if (err)
96	- return;
97	+ if (err) {
98	+
99	+ /* If get block returns with error
100	+ * we simply return. Later writepage
101	+ * will redirty the page and writepages
102	+ * will find the dirty page again
103	+ */
104	+ if (err == -EAGAIN)
105	+ return 0;
106	+ /*
107	+ * get block failure will cause us
108	+ * to loop in writepages. Because
109	+ * a_ops->writepage won't be able to
110	+ * make progress. The page will be redirtied
111	+ * by writepage and writepages will again
112	+ * try to write the same.
113	+ */
114	+ printk(KERN_EMERG "%s block allocation failed for inode %lu "
115	+ "at logical offset %llu with max blocks "
116	+ "%zd with error %d\n",
117	+ __func__, mpd->inode->i_ino,
118	+ (unsigned long long)next,
119	+ lbh->b_size >> mpd->inode->i_blkbits, err);
120	+ printk(KERN_EMERG "This should not happen.!! "
121	+ "Data will be lost\n");
122	+ /* invlaidate all the pages */
123	+ ext4_da_block_invalidatepages(mpd, next,
124	+ lbh->b_size >> mpd->inode->i_blkbits);
125	+ return err;
126	+ }
127	BUG_ON(new.b_size == 0);
128
129	if (buffer_new(&new))
130	@@ -1868,7 +1929,7 @@ static void mpage_da_map_blocks(struct m
131	if (buffer_delay(lbh) \|\| buffer_unwritten(lbh))
132	mpage_put_bnr_to_bhs(mpd, next, &new);
133
134	- return;
135	+ return 0;
136	}
137
138	#define BH_FLAGS ((1 << BH_Uptodate) \| (1 << BH_Mapped) \| \
139	@@ -1937,8 +1998,8 @@ flush_it:
140	* We couldn't merge the block to our extent, so we
141	* need to flush current extent and start new one
142	*/
143	- mpage_da_map_blocks(mpd);
144	- mpage_da_submit_io(mpd);
145	+ if (mpage_da_map_blocks(mpd) == 0)
146	+ mpage_da_submit_io(mpd);
147	mpd->io_done = 1;
148	return;
149	}
150	@@ -1980,8 +2041,8 @@ static int __mpage_da_writepage(struct p
151	* and start IO on them using writepage()
152	*/
153	if (mpd->next_page != mpd->first_page) {
154	- mpage_da_map_blocks(mpd);
155	- mpage_da_submit_io(mpd);
156	+ if (mpage_da_map_blocks(mpd) == 0)
157	+ mpage_da_submit_io(mpd);
158	/*
159	* skip rest of the page in the page_vec
160	*/
161	@@ -2102,8 +2163,8 @@ static int mpage_da_writepages(struct ad
162	* Handle last extent of pages
163	*/
164	if (!mpd.io_done && mpd.next_page != mpd.first_page) {
165	- mpage_da_map_blocks(&mpd);
166	- mpage_da_submit_io(&mpd);
167	+ if (mpage_da_map_blocks(&mpd) == 0)
168	+ mpage_da_submit_io(&mpd);
169	}
170
171	wbc->nr_to_write = to_write - mpd.pages_written;