--- /dev/null
+From af5040da01ef980670b3741b3e10733ee3e33566 Mon Sep 17 00:00:00 2001
+From: Roman Pen <r.peniaev@gmail.com>
+Date: Tue, 4 Mar 2014 23:13:10 +0900
+Subject: blktrace: fix accounting of partially completed requests
+
+From: Roman Pen <r.peniaev@gmail.com>
+
+commit af5040da01ef980670b3741b3e10733ee3e33566 upstream.
+
+trace_block_rq_complete does not take into account that request can
+be partially completed, so we can get the following incorrect output
+of blkparser:
+
+ C R 232 + 240 [0]
+ C R 240 + 232 [0]
+ C R 248 + 224 [0]
+ C R 256 + 216 [0]
+
+but should be:
+
+ C R 232 + 8 [0]
+ C R 240 + 8 [0]
+ C R 248 + 8 [0]
+ C R 256 + 8 [0]
+
+Also, the whole output summary statistics of completed requests and
+final throughput will be incorrect.
+
+This patch takes into account real completion size of the request and
+fixes wrong completion accounting.
+
+Signed-off-by: Roman Pen <r.peniaev@gmail.com>
+CC: Steven Rostedt <rostedt@goodmis.org>
+CC: Frederic Weisbecker <fweisbec@gmail.com>
+CC: Ingo Molnar <mingo@redhat.com>
+CC: linux-kernel@vger.kernel.org
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c | 2 +-
+ include/trace/events/block.h | 33 ++++++++++++++++++++++++++++++---
+ kernel/trace/blktrace.c | 20 +++++++++++---------
+ 3 files changed, 42 insertions(+), 13 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -2353,7 +2353,7 @@ bool blk_update_request(struct request *
+ if (!req->bio)
+ return false;
+
+- trace_block_rq_complete(req->q, req);
++ trace_block_rq_complete(req->q, req, nr_bytes);
+
+ /*
+ * For fs requests, rq is just carrier of independent bio's
+--- a/include/trace/events/block.h
++++ b/include/trace/events/block.h
+@@ -132,6 +132,7 @@ DEFINE_EVENT(block_rq_with_error, block_
+ * block_rq_complete - block IO operation completed by device driver
+ * @q: queue containing the block operation request
+ * @rq: block operations request
++ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver. If
+@@ -139,11 +140,37 @@ DEFINE_EVENT(block_rq_with_error, block_
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
+-DEFINE_EVENT(block_rq_with_error, block_rq_complete,
++TRACE_EVENT(block_rq_complete,
+
+- TP_PROTO(struct request_queue *q, struct request *rq),
++ TP_PROTO(struct request_queue *q, struct request *rq,
++ unsigned int nr_bytes),
+
+- TP_ARGS(q, rq)
++ TP_ARGS(q, rq, nr_bytes),
++
++ TP_STRUCT__entry(
++ __field( dev_t, dev )
++ __field( sector_t, sector )
++ __field( unsigned int, nr_sector )
++ __field( int, errors )
++ __array( char, rwbs, RWBS_LEN )
++ __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
++ ),
++
++ TP_fast_assign(
++ __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
++ __entry->sector = blk_rq_pos(rq);
++ __entry->nr_sector = nr_bytes >> 9;
++ __entry->errors = rq->errors;
++
++ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
++ blk_dump_cmd(__get_str(cmd), rq);
++ ),
++
++ TP_printk("%d,%d %s (%s) %llu + %u [%d]",
++ MAJOR(__entry->dev), MINOR(__entry->dev),
++ __entry->rwbs, __get_str(cmd),
++ (unsigned long long)__entry->sector,
++ __entry->nr_sector, __entry->errors)
+ );
+
+ DECLARE_EVENT_CLASS(block_rq,
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -702,6 +702,7 @@ void blk_trace_shutdown(struct request_q
+ * blk_add_trace_rq - Add a trace for a request oriented action
+ * @q: queue the io is for
+ * @rq: the source request
++ * @nr_bytes: number of completed bytes
+ * @what: the action
+ *
+ * Description:
+@@ -709,7 +710,7 @@ void blk_trace_shutdown(struct request_q
+ *
+ **/
+ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+- u32 what)
++ unsigned int nr_bytes, u32 what)
+ {
+ struct blk_trace *bt = q->blk_trace;
+
+@@ -718,11 +719,11 @@ static void blk_add_trace_rq(struct requ
+
+ if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+ what |= BLK_TC_ACT(BLK_TC_PC);
+- __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
++ __blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags,
+ what, rq->errors, rq->cmd_len, rq->cmd);
+ } else {
+ what |= BLK_TC_ACT(BLK_TC_FS);
+- __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
++ __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes,
+ rq->cmd_flags, what, rq->errors, 0, NULL);
+ }
+ }
+@@ -730,33 +731,34 @@ static void blk_add_trace_rq(struct requ
+ static void blk_add_trace_rq_abort(void *ignore,
+ struct request_queue *q, struct request *rq)
+ {
+- blk_add_trace_rq(q, rq, BLK_TA_ABORT);
++ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT);
+ }
+
+ static void blk_add_trace_rq_insert(void *ignore,
+ struct request_queue *q, struct request *rq)
+ {
+- blk_add_trace_rq(q, rq, BLK_TA_INSERT);
++ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT);
+ }
+
+ static void blk_add_trace_rq_issue(void *ignore,
+ struct request_queue *q, struct request *rq)
+ {
+- blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
++ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE);
+ }
+
+ static void blk_add_trace_rq_requeue(void *ignore,
+ struct request_queue *q,
+ struct request *rq)
+ {
+- blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
++ blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE);
+ }
+
+ static void blk_add_trace_rq_complete(void *ignore,
+ struct request_queue *q,
+- struct request *rq)
++ struct request *rq,
++ unsigned int nr_bytes)
+ {
+- blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
++ blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE);
+ }
+
+ /**
--- /dev/null
+From b855d416dc17061ebb271ea7ef1201d100531770 Mon Sep 17 00:00:00 2001
+From: Patrick McHardy <kaber@trash.net>
+Date: Sat, 12 Apr 2014 13:17:57 +0200
+Subject: netfilter: nf_tables: fix nft_cmp_fast failure on big endian for size < 4
+
+From: Patrick McHardy <kaber@trash.net>
+
+commit b855d416dc17061ebb271ea7ef1201d100531770 upstream.
+
+nft_cmp_fast is used for equality comparisions of size <= 4. For
+comparisions of size < 4 byte a mask is calculated that is applied to
+both the data from userspace (during initialization) and the register
+value (during runtime). Both values are stored using (in effect) memcpy
+to a memory area that is then interpreted as u32 by nft_cmp_fast.
+
+This works fine on little endian since smaller types have the same base
+address, however on big endian this is not true and the smaller types
+are interpreted as a big number with trailing zero bytes.
+
+The mask therefore must not include the lower bytes, but the higher bytes
+on big endian. Add a helper function that does a cpu_to_le32 to switch
+the bytes on big endian. Since we're dealing with a mask of just consequitive
+bits, this works out fine.
+
+Signed-off-by: Patrick McHardy <kaber@trash.net>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/netfilter/nf_tables_core.h | 10 ++++++++++
+ net/netfilter/nf_tables_core.c | 3 +--
+ net/netfilter/nft_cmp.c | 2 +-
+ 3 files changed, 12 insertions(+), 3 deletions(-)
+
+--- a/include/net/netfilter/nf_tables_core.h
++++ b/include/net/netfilter/nf_tables_core.h
+@@ -13,6 +13,16 @@ struct nft_cmp_fast_expr {
+ u8 len;
+ };
+
++/* Calculate the mask for the nft_cmp_fast expression. On big endian the
++ * mask needs to include the *upper* bytes when interpreting that data as
++ * something smaller than the full u32, therefore a cpu_to_le32 is done.
++ */
++static inline u32 nft_cmp_fast_mask(unsigned int len)
++{
++ return cpu_to_le32(~0U >> (FIELD_SIZEOF(struct nft_cmp_fast_expr,
++ data) * BITS_PER_BYTE - len));
++}
++
+ extern const struct nft_expr_ops nft_cmp_fast_ops;
+
+ int nft_cmp_module_init(void);
+--- a/net/netfilter/nf_tables_core.c
++++ b/net/netfilter/nf_tables_core.c
+@@ -25,9 +25,8 @@ static void nft_cmp_fast_eval(const stru
+ struct nft_data data[NFT_REG_MAX + 1])
+ {
+ const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+- u32 mask;
++ u32 mask = nft_cmp_fast_mask(priv->len);
+
+- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
+ if ((data[priv->sreg].data[0] & mask) == priv->data)
+ return;
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+--- a/net/netfilter/nft_cmp.c
++++ b/net/netfilter/nft_cmp.c
+@@ -128,7 +128,7 @@ static int nft_cmp_fast_init(const struc
+ BUG_ON(err < 0);
+ desc.len *= BITS_PER_BYTE;
+
+- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
++ mask = nft_cmp_fast_mask(desc.len);
+ priv->data = data.data[0] & mask;
+ priv->len = desc.len;
+ return 0;