--- /dev/null
+From c11f1df5003d534fd067f0168bfad7befffb3b5c Mon Sep 17 00:00:00 2001
+From: Sachin Prabhu <sprabhu@redhat.com>
+Date: Tue, 11 Mar 2014 16:11:47 +0000
+Subject: cifs: Wait for writebacks to complete before attempting write.
+
+From: Sachin Prabhu <sprabhu@redhat.com>
+
+commit c11f1df5003d534fd067f0168bfad7befffb3b5c upstream.
+
+Problem reported in Red Hat bz 1040329 for strict writes where we cache
+only when we hold oplock and write direct to the server when we don't.
+
+When we receive an oplock break, we first change the oplock value for
+the inode in cifsInodeInfo->oplock to indicate that we no longer hold
+the oplock before we enqueue a task to flush changes to the backing
+device. Once we have completed flushing the changes, we return the
+oplock to the server.
+
+There are 2 ways here where we can have data corruption
+1) While we flush changes to the backing device as part of the oplock
+break, we can have processes write to the file. These writes check for
+the oplock, find none and attempt to write directly to the server.
+These direct writes made while we are flushing from cache could be
+overwritten by data being flushed from the cache causing data
+corruption.
+2) While a thread runs in cifs_strict_writev, the machine could receive
+and process an oplock break after the thread has checked the oplock and
+found that it allows us to cache and before we have made changes to the
+cache. In that case, we end up with a dirty page in cache when we
+shouldn't have any. This will be flushed later and will overwrite all
+subsequent writes to the part of the file represented by this page.
+
+Before making any writes to the server, we need to confirm that we are
+not in the process of flushing data to the server and if we are, we
+should wait until the process is complete before we attempt the write.
+We should also wait for existing writes to complete before we process
+an oplock break request which changes oplock values.
+
+We add a version specific downgrade_oplock() operation to allow for
+differences in the oplock values set for the different smb versions.
+
+Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Reviewed-by: Pavel Shilovsky <piastry@etersoft.ru>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifsfs.c | 14 +++++++++
+ fs/cifs/cifsglob.h | 8 +++++
+ fs/cifs/cifsproto.h | 3 ++
+ fs/cifs/file.c | 31 +++++++++++++++++++--
+ fs/cifs/misc.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++--
+ fs/cifs/smb1ops.c | 11 +++++++
+ fs/cifs/smb2misc.c | 18 ++++++++++--
+ fs/cifs/smb2ops.c | 14 +++++++++
+ 8 files changed, 164 insertions(+), 9 deletions(-)
+
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -253,6 +253,11 @@ cifs_alloc_inode(struct super_block *sb)
+ cifs_set_oplock_level(cifs_inode, 0);
+ cifs_inode->delete_pending = false;
+ cifs_inode->invalid_mapping = false;
++ clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags);
++ clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags);
++ clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags);
++ spin_lock_init(&cifs_inode->writers_lock);
++ cifs_inode->writers = 0;
+ cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
+ cifs_inode->server_eof = 0;
+ cifs_inode->uniqueid = 0;
+@@ -731,19 +736,26 @@ static ssize_t cifs_file_aio_write(struc
+ unsigned long nr_segs, loff_t pos)
+ {
+ struct inode *inode = file_inode(iocb->ki_filp);
++ struct cifsInodeInfo *cinode = CIFS_I(inode);
+ ssize_t written;
+ int rc;
+
++ written = cifs_get_writer(cinode);
++ if (written)
++ return written;
++
+ written = generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+ if (CIFS_CACHE_WRITE(CIFS_I(inode)))
+- return written;
++ goto out;
+
+ rc = filemap_fdatawrite(inode->i_mapping);
+ if (rc)
+ cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
+ rc, inode);
+
++out:
++ cifs_put_writer(cinode);
+ return written;
+ }
+
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -228,6 +228,8 @@ struct smb_version_operations {
+ /* verify the message */
+ int (*check_message)(char *, unsigned int);
+ bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
++ void (*downgrade_oplock)(struct TCP_Server_Info *,
++ struct cifsInodeInfo *, bool);
+ /* process transaction2 response */
+ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
+ char *, int);
+@@ -1113,6 +1115,12 @@ struct cifsInodeInfo {
+ unsigned int epoch; /* used to track lease state changes */
+ bool delete_pending; /* DELETE_ON_CLOSE is set */
+ bool invalid_mapping; /* pagecache is invalid */
++ unsigned long flags;
++#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */
++#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */
++#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */
++ spinlock_t writers_lock;
++ unsigned int writers; /* Number of writers on this inode */
+ unsigned long time; /* jiffies of last update of inode */
+ u64 server_eof; /* current file size on server -- protected by i_lock */
+ u64 uniqueid; /* server inode number */
+--- a/fs/cifs/cifsproto.h
++++ b/fs/cifs/cifsproto.h
+@@ -127,6 +127,9 @@ extern u64 cifs_UnixTimeToNT(struct time
+ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
+ int offset);
+ extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
++extern int cifs_get_writer(struct cifsInodeInfo *cinode);
++extern void cifs_put_writer(struct cifsInodeInfo *cinode);
++extern void cifs_done_oplock_break(struct cifsInodeInfo *cinode);
+ extern int cifs_unlock_range(struct cifsFileInfo *cfile,
+ struct file_lock *flock, const unsigned int xid);
+ extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -2621,12 +2621,20 @@ cifs_strict_writev(struct kiocb *iocb, c
+ struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+ ssize_t written;
+
++ written = cifs_get_writer(cinode);
++ if (written)
++ return written;
++
+ if (CIFS_CACHE_WRITE(cinode)) {
+ if (cap_unix(tcon->ses) &&
+ (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
+- && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+- return generic_file_aio_write(iocb, iov, nr_segs, pos);
+- return cifs_writev(iocb, iov, nr_segs, pos);
++ && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
++ written = generic_file_aio_write(
++ iocb, iov, nr_segs, pos);
++ goto out;
++ }
++ written = cifs_writev(iocb, iov, nr_segs, pos);
++ goto out;
+ }
+ /*
+ * For non-oplocked files in strict cache mode we need to write the data
+@@ -2646,6 +2654,8 @@ cifs_strict_writev(struct kiocb *iocb, c
+ inode);
+ cinode->oplock = 0;
+ }
++out:
++ cifs_put_writer(cinode);
+ return written;
+ }
+
+@@ -3657,6 +3667,13 @@ static int cifs_launder_page(struct page
+ return rc;
+ }
+
++static int
++cifs_pending_writers_wait(void *unused)
++{
++ schedule();
++ return 0;
++}
++
+ void cifs_oplock_break(struct work_struct *work)
+ {
+ struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
+@@ -3664,8 +3681,15 @@ void cifs_oplock_break(struct work_struc
+ struct inode *inode = cfile->dentry->d_inode;
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+ struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
++ struct TCP_Server_Info *server = tcon->ses->server;
+ int rc = 0;
+
++ wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
++ cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
++
++ server->ops->downgrade_oplock(server, cinode,
++ test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
++
+ if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
+ cifs_has_mand_locks(cinode)) {
+ cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
+@@ -3702,6 +3726,7 @@ void cifs_oplock_break(struct work_struc
+ cinode);
+ cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
+ }
++ cifs_done_oplock_break(cinode);
+ }
+
+ /*
+--- a/fs/cifs/misc.c
++++ b/fs/cifs/misc.c
+@@ -466,8 +466,22 @@ is_valid_oplock_break(char *buffer, stru
+ cifs_dbg(FYI, "file id match, oplock break\n");
+ pCifsInode = CIFS_I(netfile->dentry->d_inode);
+
+- cifs_set_oplock_level(pCifsInode,
+- pSMB->OplockLevel ? OPLOCK_READ : 0);
++ set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
++ &pCifsInode->flags);
++
++ /*
++ * Set flag if the server downgrades the oplock
++ * to L2 else clear.
++ */
++ if (pSMB->OplockLevel)
++ set_bit(
++ CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
++ &pCifsInode->flags);
++ else
++ clear_bit(
++ CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
++ &pCifsInode->flags);
++
+ queue_work(cifsiod_wq,
+ &netfile->oplock_break);
+ netfile->oplock_break_cancelled = false;
+@@ -551,6 +565,62 @@ void cifs_set_oplock_level(struct cifsIn
+ cinode->oplock = 0;
+ }
+
++static int
++cifs_oplock_break_wait(void *unused)
++{
++ schedule();
++ return signal_pending(current) ? -ERESTARTSYS : 0;
++}
++
++/*
++ * We wait for oplock breaks to be processed before we attempt to perform
++ * writes.
++ */
++int cifs_get_writer(struct cifsInodeInfo *cinode)
++{
++ int rc;
++
++start:
++ rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
++ cifs_oplock_break_wait, TASK_KILLABLE);
++ if (rc)
++ return rc;
++
++ spin_lock(&cinode->writers_lock);
++ if (!cinode->writers)
++ set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
++ cinode->writers++;
++ /* Check to see if we have started servicing an oplock break */
++ if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) {
++ cinode->writers--;
++ if (cinode->writers == 0) {
++ clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
++ wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
++ }
++ spin_unlock(&cinode->writers_lock);
++ goto start;
++ }
++ spin_unlock(&cinode->writers_lock);
++ return 0;
++}
++
++void cifs_put_writer(struct cifsInodeInfo *cinode)
++{
++ spin_lock(&cinode->writers_lock);
++ cinode->writers--;
++ if (cinode->writers == 0) {
++ clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
++ wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
++ }
++ spin_unlock(&cinode->writers_lock);
++}
++
++void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
++{
++ clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
++ wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK);
++}
++
+ bool
+ backup_cred(struct cifs_sb_info *cifs_sb)
+ {
+--- a/fs/cifs/smb1ops.c
++++ b/fs/cifs/smb1ops.c
+@@ -372,6 +372,16 @@ coalesce_t2(char *second_buf, struct smb
+ return 0;
+ }
+
++static void
++cifs_downgrade_oplock(struct TCP_Server_Info *server,
++ struct cifsInodeInfo *cinode, bool set_level2)
++{
++ if (set_level2)
++ cifs_set_oplock_level(cinode, OPLOCK_READ);
++ else
++ cifs_set_oplock_level(cinode, 0);
++}
++
+ static bool
+ cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server,
+ char *buf, int malformed)
+@@ -1019,6 +1029,7 @@ struct smb_version_operations smb1_opera
+ .clear_stats = cifs_clear_stats,
+ .print_stats = cifs_print_stats,
+ .is_oplock_break = is_valid_oplock_break,
++ .downgrade_oplock = cifs_downgrade_oplock,
+ .check_trans2 = cifs_check_trans2,
+ .need_neg = cifs_need_neg,
+ .negotiate = cifs_negotiate,
+--- a/fs/cifs/smb2misc.c
++++ b/fs/cifs/smb2misc.c
+@@ -575,9 +575,21 @@ smb2_is_valid_oplock_break(char *buffer,
+ else
+ cfile->oplock_break_cancelled = false;
+
+- server->ops->set_oplock_level(cinode,
+- rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0,
+- 0, NULL);
++ set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
++ &cinode->flags);
++
++ /*
++ * Set flag if the server downgrades the oplock
++ * to L2 else clear.
++ */
++ if (rsp->OplockLevel)
++ set_bit(
++ CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
++ &cinode->flags);
++ else
++ clear_bit(
++ CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
++ &cinode->flags);
+
+ queue_work(cifsiod_wq, &cfile->oplock_break);
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -905,6 +905,17 @@ smb2_query_symlink(const unsigned int xi
+ }
+
+ static void
++smb2_downgrade_oplock(struct TCP_Server_Info *server,
++ struct cifsInodeInfo *cinode, bool set_level2)
++{
++ if (set_level2)
++ server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II,
++ 0, NULL);
++ else
++ server->ops->set_oplock_level(cinode, 0, 0, NULL);
++}
++
++static void
+ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
+ unsigned int epoch, bool *purge_cache)
+ {
+@@ -1110,6 +1121,7 @@ struct smb_version_operations smb20_oper
+ .clear_stats = smb2_clear_stats,
+ .print_stats = smb2_print_stats,
+ .is_oplock_break = smb2_is_valid_oplock_break,
++ .downgrade_oplock = smb2_downgrade_oplock,
+ .need_neg = smb2_need_neg,
+ .negotiate = smb2_negotiate,
+ .negotiate_wsize = smb2_negotiate_wsize,
+@@ -1184,6 +1196,7 @@ struct smb_version_operations smb21_oper
+ .clear_stats = smb2_clear_stats,
+ .print_stats = smb2_print_stats,
+ .is_oplock_break = smb2_is_valid_oplock_break,
++ .downgrade_oplock = smb2_downgrade_oplock,
+ .need_neg = smb2_need_neg,
+ .negotiate = smb2_negotiate,
+ .negotiate_wsize = smb2_negotiate_wsize,
+@@ -1259,6 +1272,7 @@ struct smb_version_operations smb30_oper
+ .print_stats = smb2_print_stats,
+ .dump_share_caps = smb2_dump_share_caps,
+ .is_oplock_break = smb2_is_valid_oplock_break,
++ .downgrade_oplock = smb2_downgrade_oplock,
+ .need_neg = smb2_need_neg,
+ .negotiate = smb2_negotiate,
+ .negotiate_wsize = smb2_negotiate_wsize,
--- /dev/null
+From 05a812ac474d0d6aef6d54b66bb08b81abde79c6 Mon Sep 17 00:00:00 2001
+From: Vladimir Murzin <murzin.v@gmail.com>
+Date: Sun, 27 Apr 2014 10:09:12 +0100
+Subject: xen/events/fifo: correctly align bitops
+
+From: Vladimir Murzin <murzin.v@gmail.com>
+
+commit 05a812ac474d0d6aef6d54b66bb08b81abde79c6 upstream.
+
+FIFO event channels require bitops on 32-bit aligned values (the event
+words). Linux's bitops require unsigned long alignment which may be
+64-bits.
+
+On arm64 an incorrectly unaligned access will fault.
+
+Fix this by aligning the bitops along with an adjustment for bit
+position and using an unsigned long for the local copy of the ready
+word.
+
+Signed-off-by: Vladimir Murzin <murzin.v@gmail.com>
+Tested-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
+Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/events/events_fifo.c | 41 ++++++++++++++++++++++++++++-----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+--- a/drivers/xen/events/events_fifo.c
++++ b/drivers/xen/events/events_fifo.c
+@@ -66,7 +66,22 @@ static DEFINE_PER_CPU(struct evtchn_fifo
+ static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
+ static unsigned event_array_pages __read_mostly;
+
++/*
++ * sync_set_bit() and friends must be unsigned long aligned on non-x86
++ * platforms.
++ */
++#if !defined(CONFIG_X86) && BITS_PER_LONG > 32
++
++#define BM(w) (unsigned long *)((unsigned long)w & ~0x7UL)
++#define EVTCHN_FIFO_BIT(b, w) \
++ (((unsigned long)w & 0x4UL) ? (EVTCHN_FIFO_ ##b + 32) : EVTCHN_FIFO_ ##b)
++
++#else
++
+ #define BM(w) ((unsigned long *)(w))
++#define EVTCHN_FIFO_BIT(b, w) EVTCHN_FIFO_ ##b
++
++#endif
+
+ static inline event_word_t *event_word_from_port(unsigned port)
+ {
+@@ -161,33 +176,38 @@ static void evtchn_fifo_bind_to_cpu(stru
+ static void evtchn_fifo_clear_pending(unsigned port)
+ {
+ event_word_t *word = event_word_from_port(port);
+- sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
++ sync_clear_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+ }
+
+ static void evtchn_fifo_set_pending(unsigned port)
+ {
+ event_word_t *word = event_word_from_port(port);
+- sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
++ sync_set_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+ }
+
+ static bool evtchn_fifo_is_pending(unsigned port)
+ {
+ event_word_t *word = event_word_from_port(port);
+- return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
++ return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+ }
+
+ static bool evtchn_fifo_test_and_set_mask(unsigned port)
+ {
+ event_word_t *word = event_word_from_port(port);
+- return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
++ return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+ }
+
+ static void evtchn_fifo_mask(unsigned port)
+ {
+ event_word_t *word = event_word_from_port(port);
+- sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
++ sync_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+ }
+
++static bool evtchn_fifo_is_masked(unsigned port)
++{
++ event_word_t *word = event_word_from_port(port);
++ return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
++}
+ /*
+ * Clear MASKED, spinning if BUSY is set.
+ */
+@@ -211,7 +231,7 @@ static void evtchn_fifo_unmask(unsigned
+ BUG_ON(!irqs_disabled());
+
+ clear_masked(word);
+- if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))) {
++ if (evtchn_fifo_is_pending(port)) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ }
+@@ -247,7 +267,7 @@ static void handle_irq_for_port(unsigned
+
+ static void consume_one_event(unsigned cpu,
+ struct evtchn_fifo_control_block *control_block,
+- unsigned priority, uint32_t *ready)
++ unsigned priority, unsigned long *ready)
+ {
+ struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
+ uint32_t head;
+@@ -277,10 +297,9 @@ static void consume_one_event(unsigned c
+ * copy of the ready word.
+ */
+ if (head == 0)
+- clear_bit(priority, BM(ready));
++ clear_bit(priority, ready);
+
+- if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
+- && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
++ if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
+ handle_irq_for_port(port);
+
+ q->head[priority] = head;
+@@ -289,7 +308,7 @@ static void consume_one_event(unsigned c
+ static void evtchn_fifo_handle_events(unsigned cpu)
+ {
+ struct evtchn_fifo_control_block *control_block;
+- uint32_t ready;
++ unsigned long ready;
+ unsigned q;
+
+ control_block = per_cpu(cpu_control_block, cpu);