--- /dev/null
+Subject: cxgb3i - fixes over-run of skb MAX_SKB_FRAGS
+From: Karen Xie <kxie@chelsio.com>
+References: bnc#468314
+
+This patch fixes the over-run of skb's MAX_SKB_FRAGS between the cxgb3i and
+cxgb3 driver on PPC64 systems.
+
+Signed-off-by: Karen Xie <kxie@chelsio.com>
+Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
+Acked-by: Hannes Reinecke <hare@suse.de>
+---
+
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h 2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h 2009-01-26 22:00:17.000000000 -0800
+@@ -36,6 +36,12 @@
+ #define CXGB3I_MAX_TARGET CXGB3I_MAX_CONN
+ #define CXGB3I_MAX_LUN 512
+ #define ISCSI_PDU_HEADER_MAX (56 + 256) /* bhs + digests + ahs */
++#define ULP2_MAX_PKT_SIZE 16224
++#define ISCSI_PDU_NONPAYLOAD_MAX \
++ (sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + 2*ISCSI_DIGEST_SIZE)
++#define ULP2_MAX_PDU_PAYLOAD \
++ (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_MAX)
++
+
+ struct cxgb3i_adapter;
+ struct cxgb3i_hba;
+@@ -53,12 +59,11 @@
+ *
+ */
+ struct cxgb3i_tag_format {
+- unsigned char idx_bits;
+- unsigned char age_bits;
++ unsigned char sw_bits;
+ unsigned char rsvd_bits;
+ unsigned char rsvd_shift;
++ unsigned char filler[1];
+ u32 rsvd_mask;
+- u32 rsvd_tag_mask;
+ };
+
+ /**
+@@ -95,11 +100,137 @@
+ unsigned int ulimit;
+ unsigned int nppods;
+ unsigned int idx_last;
++ unsigned char idx_bits;
++ unsigned char filler[3];
++ u32 idx_mask;
++ u32 rsvd_tag_mask;
+ spinlock_t map_lock;
+ struct cxgb3i_gather_list **gl_map;
+ struct sk_buff **gl_skb;
+ };
+
++/*
++ * cxgb3i ddp tag are 32 bits, it consists of reserved bits used by h/w and
++ * non-reserved bits that can be used by the iscsi s/w.
++ * The reserved bits are identified by the rsvd_bits and rsvd_shift fields
++ * in struct cxgb3i_tag_format.
++ *
++ * The upper most reserved bit can be used to check if a tag is ddp tag or not:
++ * if the bit is 0, the tag is a valid ddp tag
++ */
++
++/**
++ * cxgb3i_is_ddp_tag - check if a given tag is a hw/ddp tag
++ * @tformat: tag format information
++ * @tag: tag to be checked
++ *
++ * return true if the tag is a ddp tag, false otherwise.
++ */
++static inline int cxgb3i_is_ddp_tag(struct cxgb3i_tag_format *tformat, u32 tag)
++{
++ return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)));
++}
++
++/**
++ * cxgb3i_sw_tag_usable - check if a given s/w tag has enough bits left for
++ * the reserved/hw bits
++ * @tformat: tag format information
++ * @sw_tag: s/w tag to be checked
++ *
++ * return true if the tag is a ddp tag, false otherwise.
++ */
++static inline int cxgb3i_sw_tag_usable(struct cxgb3i_tag_format *tformat,
++ u32 sw_tag)
++{
++ sw_tag >>= (32 - tformat->rsvd_bits);
++ return !sw_tag;
++}
++
++/**
++ * cxgb3i_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag
++ * @tformat: tag format information
++ * @sw_tag: s/w tag to be checked
++ *
++ * insert 1 at the upper most reserved bit to mark it as an invalid ddp tag.
++ */
++static inline u32 cxgb3i_set_non_ddp_tag(struct cxgb3i_tag_format *tformat,
++ u32 sw_tag)
++{
++ unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
++ u32 mask = (1 << shift) - 1;
++
++ if (sw_tag && (sw_tag & ~mask)) {
++ u32 v1 = sw_tag & ((1 << shift) - 1);
++ u32 v2 = (sw_tag >> (shift - 1)) << shift;
++
++ return v2 | v1 | 1 << shift;
++ }
++ return sw_tag | 1 << shift;
++}
++
++/**
++ * cxgb3i_ddp_tag_base - shift the s/w tag bits so that reserved bits are not
++ * used.
++ * @tformat: tag format information
++ * @sw_tag: s/w tag to be checked
++ */
++static inline u32 cxgb3i_ddp_tag_base(struct cxgb3i_tag_format *tformat,
++ u32 sw_tag)
++{
++ u32 mask = (1 << tformat->rsvd_shift) - 1;
++
++ if (sw_tag && (sw_tag & ~mask)) {
++ u32 v1 = sw_tag & mask;
++ u32 v2 = sw_tag >> tformat->rsvd_shift;
++
++ v2 <<= tformat->rsvd_shift + tformat->rsvd_bits;
++ return v2 | v1;
++ }
++ return sw_tag;
++}
++
++/**
++ * cxgb3i_tag_rsvd_bits - get the reserved bits used by the h/w
++ * @tformat: tag format information
++ * @tag: tag to be checked
++ *
++ * return the reserved bits in the tag
++ */
++static inline u32 cxgb3i_tag_rsvd_bits(struct cxgb3i_tag_format *tformat,
++ u32 tag)
++{
++ if (cxgb3i_is_ddp_tag(tformat, tag))
++ return (tag >> tformat->rsvd_shift) & tformat->rsvd_mask;
++ return 0;
++}
++
++/**
++ * cxgb3i_tag_nonrsvd_bits - get the non-reserved bits used by the s/w
++ * @tformat: tag format information
++ * @tag: tag to be checked
++ *
++ * return the non-reserved bits in the tag.
++ */
++static inline u32 cxgb3i_tag_nonrsvd_bits(struct cxgb3i_tag_format *tformat,
++ u32 tag)
++{
++ unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
++ u32 v1, v2;
++
++ if (cxgb3i_is_ddp_tag(tformat, tag)) {
++ v1 = tag & ((1 << tformat->rsvd_shift) - 1);
++ v2 = (tag >> (shift + 1)) << tformat->rsvd_shift;
++ } else {
++ u32 mask = (1 << shift) - 1;
++
++ tag &= ~(1 << shift);
++ v1 = tag & mask;
++ v2 = (tag >> 1) & ~mask;
++ }
++ return v1 | v2;
++}
++
++
+ /**
+ * struct cxgb3i_hba - cxgb3i iscsi structure (per port)
+ *
+@@ -146,16 +277,22 @@
+ * struct cxgb3i_conn - cxgb3i iscsi connection
+ *
+ * @tcp_conn: pointer to iscsi_tcp_conn structure
+- * @listhead: list head to link elements
++ * @list_head: list head to link elements
++ * @cep: pointer to iscsi_endpoint structure
+ * @conn: pointer to iscsi_conn structure
+ * @hba: pointer to the hba this conn. is going through
++ * @task_idx_bits: # of bits needed for session->cmds_max
++ * @frags: temp. holding area for tx coalesced sg list pages.
+ */
++#define TX_PDU_PAGES_MAX (16384/512 + 1)
+ struct cxgb3i_conn {
+ struct iscsi_tcp_conn tcp_conn;
+ struct list_head list_head;
+ struct cxgb3i_endpoint *cep;
+ struct iscsi_conn *conn;
+ struct cxgb3i_hba *hba;
++ unsigned int task_idx_bits;
++ skb_frag_t frags[TX_PDU_PAGES_MAX];
+ };
+
+ /**
+@@ -190,8 +327,7 @@
+ int cxgb3i_ulp2_init(void);
+ void cxgb3i_ulp2_cleanup(void);
+ int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *, int, int);
+-void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32,
+- struct scatterlist *, unsigned int);
++void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32);
+ u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *, unsigned int,
+ u32, unsigned int, struct scatterlist *,
+ unsigned int, int);
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c 2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c 2009-01-26 22:00:17.000000000 -0800
+@@ -11,8 +11,8 @@
+
+ #include "cxgb3i.h"
+
+-#define DRV_MODULE_NAME "cxgb3i"
+-#define DRV_MODULE_VERSION "0.1.0"
++#define DRV_MODULE_NAME "cxgb3i"
++#define DRV_MODULE_VERSION "0.9.0"
+ #define DRV_MODULE_RELDATE "Jun. 1, 2008"
+
+ static char version[] =
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c 2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c 2009-01-26 22:00:17.000000000 -0800
+@@ -36,10 +36,10 @@
+ #define cxgb3i_api_debug(fmt...)
+ #endif
+
+-#define align_to_4k_boundary(n) \
+- do { \
+- n = (n) & ~((1 << 12) - 1); \
+- } while(0)
++/*
++ * align pdu size to multiple of 512 for better performance
++ */
++#define align_pdu_size(n) do { n = (n) & (~511); } while (0)
+
+ static struct scsi_transport_template *cxgb3i_scsi_transport;
+ static struct scsi_host_template cxgb3i_host_template;
+@@ -102,7 +102,7 @@
+ struct cxgb3i_adapter *snic;
+
+ /* remove from the list */
+- read_lock(&cxgb3i_snic_rwlock);
++ write_lock(&cxgb3i_snic_rwlock);
+ list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
+ if (snic->tdev == t3dev) {
+ list_del(&snic->list_head);
+@@ -295,6 +295,8 @@
+ * stop the xmit path so the xmit_segment function is
+ * not being called
+ */
++ iscsi_suspend_tx(cconn->conn);
++
+ write_lock_bh(&cep->c3cn->callback_lock);
+ set_bit(ISCSI_SUSPEND_BIT, &cconn->conn->suspend_rx);
+ cep->c3cn->user_data = NULL;
+@@ -391,20 +393,17 @@
+ static inline int cxgb3i_conn_max_xmit_dlength(struct iscsi_conn *conn)
+ {
+ struct cxgb3i_conn *cconn = conn->dd_data;
+- unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
+- cconn->hba->snic->tx_max_size -
+- ISCSI_PDU_HEADER_MAX);
+-
+- cxgb3i_api_debug("conn 0x%p, max xmit %u.\n",
+- conn, conn->max_xmit_dlength);
++ unsigned int max = min_t(unsigned int,
++ ULP2_MAX_PDU_PAYLOAD,
++ cconn->hba->snic->tx_max_size -
++ ISCSI_PDU_NONPAYLOAD_MAX);
+
+ if (conn->max_xmit_dlength)
+ conn->max_xmit_dlength = min_t(unsigned int,
+- conn->max_xmit_dlength, max);
++ conn->max_xmit_dlength, max);
+ else
+ conn->max_xmit_dlength = max;
+-
+- align_to_4k_boundary(conn->max_xmit_dlength);
++ align_pdu_size(conn->max_xmit_dlength);
+
+ cxgb3i_api_debug("conn 0x%p, set max xmit %u.\n",
+ conn, conn->max_xmit_dlength);
+@@ -415,14 +414,10 @@
+ static inline int cxgb3i_conn_max_recv_dlength(struct iscsi_conn *conn)
+ {
+ struct cxgb3i_conn *cconn = conn->dd_data;
+- unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
+- cconn->hba->snic->rx_max_size -
+- ISCSI_PDU_HEADER_MAX);
+-
+- cxgb3i_api_debug("conn 0x%p, max recv %u.\n",
+- conn, conn->max_recv_dlength);
+-
+- align_to_4k_boundary(max);
++ unsigned int max = min_t(unsigned int,
++ ULP2_MAX_PDU_PAYLOAD,
++ cconn->hba->snic->tx_max_size -
++ ISCSI_PDU_NONPAYLOAD_MAX);
+
+ if (conn->max_recv_dlength) {
+ if (conn->max_recv_dlength > max) {
+@@ -433,9 +428,9 @@
+ }
+ conn->max_recv_dlength = min_t(unsigned int,
+ conn->max_recv_dlength, max);
+- align_to_4k_boundary(conn->max_recv_dlength);
+ } else
+ conn->max_recv_dlength = max;
++ align_pdu_size(conn->max_recv_dlength);
+
+ cxgb3i_api_debug("conn 0x%p, set max recv %u.\n",
+ conn, conn->max_recv_dlength);
+@@ -516,12 +511,14 @@
+
+ cep = ep->dd_data;
+ c3cn = cep->c3cn;
++ /* calculate the tag idx bits needed for this conn based on cmds_max */
++ cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
+
+- read_lock(&c3cn->callback_lock);
++ write_lock(&c3cn->callback_lock);
+ /* mnc: TODO don't abuse iscsi_tcp fields */
+ tcp_conn->sock = (struct socket *)c3cn;
+ c3cn->user_data = conn;
+- read_unlock(&c3cn->callback_lock);
++ write_unlock(&c3cn->callback_lock);
+
+ cconn->hba = cep->hba;
+ cconn->cep = cep;
+@@ -609,11 +606,13 @@
+ return -ENOMEM;
+ case ISCSI_PARAM_MAX_RECV_DLENGTH:
+ err = iscsi_set_param(cls_conn, param, buf, buflen);
+- err = cxgb3i_conn_max_recv_dlength(conn);
++ if (!err)
++ err = cxgb3i_conn_max_recv_dlength(conn);
+ break;
+ case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+ err = iscsi_set_param(cls_conn, param, buf, buflen);
+- err = cxgb3i_conn_max_xmit_dlength(conn);
++ if (!err)
++ err = cxgb3i_conn_max_xmit_dlength(conn);
+ break;
+ default:
+ return iscsi_set_param(cls_conn, param, buf, buflen);
+@@ -718,49 +717,23 @@
+ stats->custom[0].value = conn->eh_abort_cnt;
+ }
+
+-static inline u32 tag_base(struct cxgb3i_tag_format *format,
+- unsigned int idx, unsigned int age)
+-{
+- u32 sw_bits = idx | (age << format->idx_bits);
+- u32 tag = sw_bits >> format->rsvd_shift;
+-
+- tag <<= format->rsvd_bits + format->rsvd_shift;
+- tag |= sw_bits & ((1 << format->rsvd_shift) - 1);
+- return tag;
+-}
+-
+-static inline void cxgb3i_parse_tag(struct cxgb3i_tag_format *format,
+- u32 tag, u32 *rsvd_bits, u32 *sw_bits)
+-{
+- if (rsvd_bits)
+- *rsvd_bits = (tag >> format->rsvd_shift) & format->rsvd_mask;
+- if (sw_bits) {
+- *sw_bits = (tag >> (format->rsvd_shift + format->rsvd_bits))
+- << format->rsvd_shift;
+- *sw_bits |= tag & ((1 << format->rsvd_shift) - 1);
+- }
+-
+- cxgb3i_tag_debug("parse tag 0x%x, rsvd 0x%x, sw 0x%x.\n",
+- tag, rsvd_bits ? *rsvd_bits : 0xFFFFFFFF,
+- sw_bits ? *sw_bits : 0xFFFFFFFF);
+-}
+-
+-
+ static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt,
+ int *idx, int *age)
+ {
+ struct cxgb3i_conn *cconn = conn->dd_data;
+ struct cxgb3i_adapter *snic = cconn->hba->snic;
++ u32 tag = itt;
+ u32 sw_bits;
+
+- cxgb3i_parse_tag(&snic->tag_format, itt, NULL, &sw_bits);
++ sw_bits = cxgb3i_tag_nonrsvd_bits(&snic->tag_format, tag);
+ if (idx)
+- *idx = sw_bits & ISCSI_ITT_MASK;
++ *idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
+ if (age)
+- *age = (sw_bits >> snic->tag_format.idx_bits) & ISCSI_AGE_MASK;
++ *age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
+
+- cxgb3i_tag_debug("parse itt 0x%x, idx 0x%x, age 0x%x.\n",
+- itt, idx ? *idx : 0xFFFFF, age ? *age : 0xFF);
++ cxgb3i_tag_debug("parse tag 0x%x/0x%x, sw 0x%x, idx 0x%x, age 0x%x.\n",
++ tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
++ age ? *age : 0xFF);
+ }
+
+ static int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
+@@ -771,26 +744,40 @@
+ struct cxgb3i_conn *cconn = conn->dd_data;
+ struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
+ struct cxgb3i_adapter *snic = cconn->hba->snic;
+- u32 sw_tag = tag_base(&snic->tag_format, task->itt, sess->age);
++ struct cxgb3i_tag_format *tformat = &snic->tag_format;
++ u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
+ u32 tag = RESERVED_ITT;
+
+- if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) {
++ if (sc && (scsi_bidi_cmnd(sc) ||
++ sc->sc_data_direction == DMA_FROM_DEVICE) &&
++ cxgb3i_sw_tag_usable(tformat, sw_tag)) {
+ struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
+- tag =
+- cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
++ tag = cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
+ scsi_in(sc)->length,
+ scsi_in(sc)->table.sgl,
+ scsi_in(sc)->table.nents,
+ GFP_ATOMIC);
+ }
+ if (tag == RESERVED_ITT)
+- tag = sw_tag | (snic->tag_format.rsvd_mask <<
+- snic->tag_format.rsvd_shift);
++ tag = cxgb3i_set_non_ddp_tag(tformat, sw_tag);
++ /* the itt need to sent in big-endian order */
+ *hdr_itt = htonl(tag);
+
+- cxgb3i_tag_debug("new tag 0x%x/0x%x (itt 0x%x, age 0x%x).\n",
+- tag, *hdr_itt, task->itt, sess->age);
+-
++ if (sc) {
++ if (sc->sc_data_direction == DMA_FROM_DEVICE)
++ cxgb3i_tag_debug("read, len %u, tag 0x%x/0x%x "
++ "(itt 0x%x, age 0x%x, sw 0x%x).\n",
++ scsi_in(sc)->length, tag, *hdr_itt,
++ task->itt, sess->age, sw_tag);
++ else
++ cxgb3i_tag_debug("write, len %u, tag 0x%x/0x%x "
++ "(itt 0x%x, age 0x%x, sw 0x%x).\n",
++ scsi_out(sc)->length, tag, *hdr_itt,
++ task->itt, sess->age, sw_tag);
++ } else
++ cxgb3i_tag_debug("ctrl, tag 0x%x/0x%x (itt 0x%x, age 0x%x, "
++ "sw 0x%x).\n",
++ tag, *hdr_itt, task->itt, sess->age, sw_tag);
+ return 0;
+ }
+
+@@ -800,14 +787,15 @@
+ struct iscsi_conn *conn = task->conn;
+ struct cxgb3i_conn *cconn = conn->dd_data;
+ struct cxgb3i_adapter *snic = cconn->hba->snic;
++ struct cxgb3i_tag_format *tformat = &snic->tag_format;
+ u32 tag = ntohl(hdr_itt);
+
+- cxgb3i_tag_debug("release tag 0x%x.\n", tag);
++ cxgb3i_tag_debug("release %s tag 0x%x.\n", sc ? "scsi" : "ctrl", tag);
+
+- if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE))
+- cxgb3i_ddp_tag_release(snic, tag,
+- scsi_in(sc)->table.sgl,
+- scsi_in(sc)->table.nents);
++ if (sc && (scsi_bidi_cmnd(sc) ||
++ sc->sc_data_direction == DMA_FROM_DEVICE) &&
++ cxgb3i_is_ddp_tag(tformat, tag))
++ cxgb3i_ddp_tag_release(snic, tag);
+ }
+
+ /**
+@@ -820,7 +808,7 @@
+ .proc_name = "cxgb3i",
+ .queuecommand = iscsi_queuecommand,
+ .change_queue_depth = iscsi_change_queue_depth,
+- .can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1),
++ .can_queue = CXGB3I_SCSI_QDEPTH_DFLT - 1,
+ .sg_tablesize = SG_ALL,
+ .max_sectors = 0xFFFF,
+ .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c 2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c 2009-01-26 22:00:17.000000000 -0800
+@@ -22,19 +22,19 @@
+ #include "cxgb3i_ulp2.h"
+
+ #ifdef __DEBUG_C3CN_CONN__
+-#define c3cn_conn_debug cxgb3i_log_debug
++#define c3cn_conn_debug cxgb3i_log_debug
+ #else
+ #define c3cn_conn_debug(fmt...)
+ #endif
+
+ #ifdef __DEBUG_C3CN_TX__
+-#define c3cn_tx_debug cxgb3i_log_debug
++#define c3cn_tx_debug cxgb3i_log_debug
+ #else
+ #define c3cn_tx_debug(fmt...)
+ #endif
+
+ #ifdef __DEBUG_C3CN_RX__
+-#define c3cn_rx_debug cxgb3i_log_debug
++#define c3cn_rx_debug cxgb3i_log_debug
+ #else
+ #define c3cn_rx_debug(fmt...)
+ #endif
+@@ -42,9 +42,9 @@
+ /*
+ * module parameters releated to offloaded iscsi connection
+ */
+-static int cxgb3_rcv_win = 256 * 1024;
++static int cxgb3_rcv_win = 128 * 1024;
+ module_param(cxgb3_rcv_win, int, 0644);
+-MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=256KB)");
++MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=128KB)");
+
+ static int cxgb3_snd_win = 64 * 1024;
+ module_param(cxgb3_snd_win, int, 0644);
+@@ -456,12 +456,9 @@
+ * The number of WRs needed for an skb depends on the number of fragments
+ * in the skb and whether it has any payload in its main body. This maps the
+ * length of the gather list represented by an skb into the # of necessary WRs.
+- *
+- * The max. length of an skb is controlled by the max pdu size which is ~16K.
+- * Also, assume the min. fragment length is the sector size (512), then add
+- * extra fragment counts for iscsi bhs and payload padding.
++ * The extra two fragments are for iscsi bhs and payload padding.
+ */
+-#define SKB_WR_LIST_SIZE (16384/512 + 3)
++#define SKB_WR_LIST_SIZE (MAX_SKB_FRAGS + 2)
+ static unsigned int skb_wrs[SKB_WR_LIST_SIZE] __read_mostly;
+
+ static void s3_init_wr_tab(unsigned int wr_len)
+@@ -484,7 +481,7 @@
+
+ static inline void reset_wr_list(struct s3_conn *c3cn)
+ {
+- c3cn->wr_pending_head = NULL;
++ c3cn->wr_pending_head = c3cn->wr_pending_tail = NULL;
+ }
+
+ /*
+@@ -495,7 +492,7 @@
+ static inline void enqueue_wr(struct s3_conn *c3cn,
+ struct sk_buff *skb)
+ {
+- skb->sp = NULL;
++ skb_wr_next(skb) = NULL;
+
+ /*
+ * We want to take an extra reference since both us and the driver
+@@ -508,10 +505,22 @@
+ if (!c3cn->wr_pending_head)
+ c3cn->wr_pending_head = skb;
+ else
+- c3cn->wr_pending_tail->sp = (void *)skb;
++ skb_wr_next(c3cn->wr_pending_tail) = skb;
+ c3cn->wr_pending_tail = skb;
+ }
+
++static int count_pending_wrs(struct s3_conn *c3cn)
++{
++ int n = 0;
++ const struct sk_buff *skb = c3cn->wr_pending_head;
++
++ while (skb) {
++ n += skb->csum;
++ skb = skb_wr_next(skb);
++ }
++ return n;
++}
++
+ static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn)
+ {
+ return c3cn->wr_pending_head;
+@@ -528,8 +537,8 @@
+
+ if (likely(skb)) {
+ /* Don't bother clearing the tail */
+- c3cn->wr_pending_head = (struct sk_buff *)skb->sp;
+- skb->sp = NULL;
++ c3cn->wr_pending_head = skb_wr_next(skb);
++ skb_wr_next(skb) = NULL;
+ }
+ return skb;
+ }
+@@ -542,13 +551,15 @@
+ }
+
+ static inline void make_tx_data_wr(struct s3_conn *c3cn, struct sk_buff *skb,
+- int len)
++ int len, int req_completion)
+ {
+ struct tx_data_wr *req;
+
+ skb_reset_transport_header(skb);
+ req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
+- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
++
++ req->wr_hi = htonl((V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)) |
++ (req_completion ? F_WR_COMPL : 0));
+ req->wr_lo = htonl(V_WR_TID(c3cn->tid));
+ req->sndseq = htonl(c3cn->snd_nxt);
+ /* len includes the length of any HW ULP additions */
+@@ -556,11 +567,11 @@
+ req->param = htonl(V_TX_PORT(c3cn->l2t->smt_idx));
+ /* V_TX_ULP_SUBMODE sets both the mode and submode */
+ req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) |
+- V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
++ V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
+
+ if (!c3cn_flag(c3cn, C3CN_TX_DATA_SENT)) {
+- req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
+- V_TX_CPU_IDX(c3cn->qset));
++ req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
++ V_TX_CPU_IDX(c3cn->qset));
+ /* Sendbuffer is in units of 32KB. */
+ req->param |= htonl(V_TX_SNDBUF(cxgb3_snd_win >> 15));
+ c3cn_set_flag(c3cn, C3CN_TX_DATA_SENT);
+@@ -591,7 +602,7 @@
+
+ if (unlikely(c3cn->state == C3CN_STATE_CONNECTING ||
+ c3cn->state == C3CN_STATE_CLOSE_WAIT_1 ||
+- c3cn->state == C3CN_STATE_ABORTING)) {
++ c3cn->state >= C3CN_STATE_ABORTING)) {
+ c3cn_tx_debug("c3cn 0x%p, in closing state %u.\n",
+ c3cn, c3cn->state);
+ return 0;
+@@ -626,19 +637,22 @@
+ c3cn->wr_unacked += wrs_needed;
+ enqueue_wr(c3cn, skb);
+
+- if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
+- len += ulp_extra_len(skb);
+- make_tx_data_wr(c3cn, skb, len);
+- c3cn->snd_nxt += len;
+- if ((req_completion
+- && c3cn->wr_unacked == wrs_needed)
+- || (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL)
+- || c3cn->wr_unacked >= c3cn->wr_max / 2) {
+- struct work_request_hdr *wr = cplhdr(skb);
++ c3cn_tx_debug("c3cn 0x%p, enqueue, skb len %u/%u, frag %u, "
++ "wr %d, left %u, unack %u.\n",
++ c3cn, skb->len, skb->data_len, frags,
++ wrs_needed, c3cn->wr_avail, c3cn->wr_unacked);
+
+- wr->wr_hi |= htonl(F_WR_COMPL);
++ if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
++ if ((req_completion &&
++ c3cn->wr_unacked == wrs_needed) ||
++ (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL) ||
++ c3cn->wr_unacked >= c3cn->wr_max / 2) {
++ req_completion = 1;
+ c3cn->wr_unacked = 0;
+ }
++ len += ulp_extra_len(skb);
++ make_tx_data_wr(c3cn, skb, len, req_completion);
++ c3cn->snd_nxt += len;
+ CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR;
+ }
+
+@@ -1153,12 +1167,28 @@
+ * Process an acknowledgment of WR completion. Advance snd_una and send the
+ * next batch of work requests from the write queue.
+ */
++
++static void check_wr_invariants(struct s3_conn *c3cn)
++{
++ int pending = count_pending_wrs(c3cn);
++
++ if (unlikely(c3cn->wr_avail + pending != c3cn->wr_max))
++ cxgb3i_log_error("TID %u: credit imbalance: avail %u, "
++ "pending %u, total should be %u\n",
++ c3cn->tid, c3cn->wr_avail, pending,
++ c3cn->wr_max);
++}
++
+ static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
+ {
+ struct cpl_wr_ack *hdr = cplhdr(skb);
+ unsigned int credits = ntohs(hdr->credits);
+ u32 snd_una = ntohl(hdr->snd_una);
+
++ c3cn_tx_debug("%u WR credits, avail %u, unack %u, TID %u, state %u.\n",
++ credits, c3cn->wr_avail, c3cn->wr_unacked,
++ c3cn->tid, c3cn->state);
++
+ c3cn->wr_avail += credits;
+ if (c3cn->wr_unacked > c3cn->wr_max - c3cn->wr_avail)
+ c3cn->wr_unacked = c3cn->wr_max - c3cn->wr_avail;
+@@ -1173,6 +1203,17 @@
+ break;
+ }
+ if (unlikely(credits < p->csum)) {
++ struct tx_data_wr *w = cplhdr(p);
++ cxgb3i_log_error("TID %u got %u WR credits need %u, "
++ "len %u, main body %u, frags %u, "
++ "seq # %u, ACK una %u, ACK nxt %u, "
++ "WR_AVAIL %u, WRs pending %u\n",
++ c3cn->tid, credits, p->csum, p->len,
++ p->len - p->data_len,
++ skb_shinfo(p)->nr_frags,
++ ntohl(w->sndseq), snd_una,
++ ntohl(hdr->snd_nxt), c3cn->wr_avail,
++ count_pending_wrs(c3cn) - credits);
+ p->csum -= credits;
+ break;
+ } else {
+@@ -1182,8 +1223,14 @@
+ }
+ }
+
+- if (unlikely(before(snd_una, c3cn->snd_una)))
++ check_wr_invariants(c3cn);
++
++ if (unlikely(before(snd_una, c3cn->snd_una))) {
++ cxgb3i_log_error("TID %u, unexpected sequence # %u in WR_ACK "
++ "snd_una %u\n",
++ c3cn->tid, snd_una, c3cn->snd_una);
+ goto out_free;
++ }
+
+ if (c3cn->snd_una != snd_una) {
+ c3cn->snd_una = snd_una;
+@@ -1454,11 +1501,14 @@
+ struct dst_entry *dst)
+ {
+ BUG_ON(c3cn->cdev != cdev);
+- c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs;
++ c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs - 1;
+ c3cn->wr_unacked = 0;
+ c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst));
+
+ reset_wr_list(c3cn);
++
++ c3cn_conn_debug("c3cn 0x%p, wr max %u, avail %u.\n",
++ c3cn, c3cn->wr_max, c3cn->wr_avail);
+ }
+
+ static int initiate_act_open(struct s3_conn *c3cn, struct net_device *dev)
+@@ -1673,9 +1723,17 @@
+ goto out_err;
+ }
+
+- err = -EPIPE;
+ if (c3cn->err) {
+ c3cn_tx_debug("c3cn 0x%p, err %d.\n", c3cn, c3cn->err);
++ err = -EPIPE;
++ goto out_err;
++ }
++
++ if (c3cn->write_seq - c3cn->snd_una >= cxgb3_snd_win) {
++ c3cn_tx_debug("c3cn 0x%p, snd %u - %u > %u.\n",
++ c3cn, c3cn->write_seq, c3cn->snd_una,
++ cxgb3_snd_win);
++ err = -EAGAIN;
+ goto out_err;
+ }
+
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h 2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h 2009-01-26 22:00:17.000000000 -0800
+@@ -180,7 +180,7 @@
+ * @seq: tcp sequence number
+ * @ddigest: pdu data digest
+ * @pdulen: recovered pdu length
+- * @ulp_data: scratch area for ULP
++ * @wr_next: scratch area for tx wr
+ */
+ struct cxgb3_skb_cb {
+ __u8 flags;
+@@ -188,7 +188,7 @@
+ __u32 seq;
+ __u32 ddigest;
+ __u32 pdulen;
+- __u8 ulp_data[16];
++ struct sk_buff *wr_next;
+ };
+
+ #define CXGB3_SKB_CB(skb) ((struct cxgb3_skb_cb *)&((skb)->cb[0]))
+@@ -196,7 +196,7 @@
+ #define skb_ulp_mode(skb) (CXGB3_SKB_CB(skb)->ulp_mode)
+ #define skb_ulp_ddigest(skb) (CXGB3_SKB_CB(skb)->ddigest)
+ #define skb_ulp_pdulen(skb) (CXGB3_SKB_CB(skb)->pdulen)
+-#define skb_ulp_data(skb) (CXGB3_SKB_CB(skb)->ulp_data)
++#define skb_wr_next(skb) (CXGB3_SKB_CB(skb)->wr_next)
+
+ enum c3cb_flags {
+ C3CB_FLAG_NEED_HDR = 1 << 0, /* packet needs a TX_DATA_WR header */
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c 2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c 2009-01-26 22:00:17.000000000 -0800
+@@ -51,6 +51,7 @@
+ static unsigned char sw_tag_idx_bits;
+ static unsigned char sw_tag_age_bits;
+ static unsigned char page_idx = ULP2_PGIDX_MAX;
++static unsigned int skb_copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
+
+ static void cxgb3i_ddp_page_init(void)
+ {
+@@ -59,6 +60,10 @@
+ sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
+ sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
+
++ cxgb3i_log_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
++ ISCSI_ITT_MASK, sw_tag_idx_bits,
++ ISCSI_AGE_MASK, sw_tag_age_bits);
++
+ for (i = 0; i < ULP2_PGIDX_MAX; i++) {
+ if (PAGE_SIZE == (1UL << ddp_page_shift[i])) {
+ page_idx = i;
+@@ -312,7 +317,6 @@
+ page_idx, sgcnt, xferlen, ULP2_DDP_THRESHOLD);
+ return RESERVED_ITT;
+ }
+- return RESERVED_ITT;
+
+ gl = ddp_make_gl(xferlen, sgl, sgcnt, gfp);
+ if (!gl) {
+@@ -322,9 +326,9 @@
+ }
+
+ npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+- idx_max = ddp->nppods - npods + 1;
++ idx_max = ddp->nppods - npods;
+
+- if (ddp->idx_last == ddp->nppods)
++ if (ddp->idx_last >= idx_max)
+ idx = ddp_find_unused_entries(ddp, 0, idx_max, npods, gl);
+ else {
+ idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1, idx_max,
+@@ -345,12 +349,13 @@
+
+ if (ddp_gl_map(snic->pdev, gl) < 0)
+ goto unmap_sgl;
+-
+- tag = sw_tag | (idx << snic->tag_format.rsvd_shift);
++
++ tag = cxgb3i_ddp_tag_base(&snic->tag_format, sw_tag);
++ tag |= idx << PPOD_IDX_SHIFT;
+
+ hdr.rsvd = 0;
+ hdr.vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
+- hdr.pgsz_tag_clr = htonl(tag & snic->tag_format.rsvd_tag_mask);
++ hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
+ hdr.maxoffset = htonl(xferlen);
+ hdr.pgoffset = htonl(gl->offset);
+
+@@ -372,30 +377,35 @@
+ return RESERVED_ITT;
+ }
+
+-void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag,
+- struct scatterlist *sgl, unsigned int sgcnt)
++void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag)
+ {
+- u32 idx = (tag >> snic->tag_format.rsvd_shift) &
+- snic->tag_format.rsvd_mask;
++ struct cxgb3i_ddp_info *ddp = snic->ddp;
++ u32 idx;
+
+- if (idx < snic->tag_format.rsvd_mask) {
+- struct cxgb3i_ddp_info *ddp = snic->ddp;
++ if (!ddp) {
++ cxgb3i_log_error("release ddp tag 0x%x, ddp NULL.\n", tag);
++ return;
++ }
++
++ idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
++ if (idx < ddp->nppods) {
+ struct cxgb3i_gather_list *gl = ddp->gl_map[idx];
+ unsigned int npods;
+
+ if (!gl || !gl->nelem) {
+- cxgb3i_log_warn("release tag 0x%x, idx 0x%x, no gl.\n",
+- tag, idx);
++ cxgb3i_log_error("release tag 0x%x, idx 0x%x, no gl.\n",
++ tag, idx);
+ return;
+ }
+ npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+-
+ cxgb3i_tag_debug("ddp tag 0x%x, release idx 0x%x, npods %u.\n",
+ tag, idx, npods);
+ clear_ddp_map(snic, idx, npods);
+ ddp_unmark_entries(ddp, idx, npods);
+ ddp_gl_unmap(snic->pdev, gl);
+- }
++ } else
++ cxgb3i_log_error("ddp tag 0x%x, idx 0x%x > max 0x%x.\n",
++ tag, idx, ddp->nppods);
+ }
+
+ int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *cconn, int hcrc, int dcrc)
+@@ -403,12 +413,18 @@
+ struct iscsi_tcp_conn *tcp_conn = cconn->conn->dd_data;
+ struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
+ struct sk_buff *skb = alloc_skb(sizeof(struct cpl_set_tcb_field),
+- GFP_KERNEL | __GFP_NOFAIL);
++ GFP_KERNEL);
+ struct cpl_set_tcb_field *req;
+ u64 val = (hcrc ? 1 : 0) | (dcrc ? 2 : 0);
+
++ if (!skb)
++ return -ENOMEM;
++
+ if (page_idx < ULP2_PGIDX_MAX)
+ val |= page_idx << 4;
++ else
++ cxgb3i_log_warn("TID 0x%x, host page 0x%lx default to 4K.\n",
++ c3cn->tid, PAGE_SIZE);
+
+ /* set up ulp submode and page size */
+ req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req));
+@@ -476,14 +492,14 @@
+ (skb_ulp_mode(skb) & ULP2_FLAG_DCRC_ERROR)) ?
+ ISCSI_SEGMENT_DGST_ERR : 0;
+ if (skb_ulp_mode(skb) & ULP2_FLAG_DATA_DDPED) {
+- cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
+- "ddp'ed, itt 0x%x.\n",
+- skb, hdr->opcode & ISCSI_OPCODE_MASK,
+- tcp_conn->in.datalen, hdr->itt);
++ cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
++ "ddp'ed, itt 0x%x.\n",
++ skb, hdr->opcode & ISCSI_OPCODE_MASK,
++ tcp_conn->in.datalen, hdr->itt);
+ segment->total_copied = segment->total_size;
+ } else {
+- cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
+- "not ddp'ed, itt 0x%x.\n",
++ cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
++ "not ddp'ed, itt 0x%x.\n",
+ skb, hdr->opcode & ISCSI_OPCODE_MASK,
+ tcp_conn->in.datalen, hdr->itt);
+ offset += sizeof(struct cpl_iscsi_hdr_norss);
+@@ -520,24 +536,141 @@
+ skb_ulp_mode(skb) = (ULP_MODE_ISCSI << 4) | submode;
+ }
+
++static int sg_page_coalesce(struct scatterlist *sg, unsigned int offset,
++ unsigned int dlen, skb_frag_t *frags, int frag_max)
++{
++ unsigned int sglen = sg->length - offset;
++ struct page *page = sg_page(sg);
++ unsigned int datalen = dlen, copy;
++ int i;
++
++ i = 0;
++ do {
++ if (!sglen) {
++ sg = sg_next(sg);
++ offset = 0;
++ sglen = sg->length;
++ page = sg_page(sg);
++ }
++ copy = min(datalen, sglen);
++ if (i && page == frags[i - 1].page &&
++ offset + sg->offset ==
++ frags[i - 1].page_offset + frags[i - 1].size) {
++ frags[i - 1].size += copy;
++ } else {
++ if (i >= frag_max) {
++ cxgb3i_log_error("%s, too many pages > %u, "
++ "dlen %u.\n", __func__,
++ frag_max, dlen);
++ return -EINVAL;
++ }
++
++ frags[i].page = page;
++ frags[i].page_offset = sg->offset + offset;
++ frags[i].size = copy;
++ i++;
++ }
++ datalen -= copy;
++ offset += copy;
++ sglen -= copy;
++ } while (datalen);
++
++ return i;
++}
++
++static int copy_frags_to_skb_pages(struct sk_buff *skb, skb_frag_t *frags,
++ int frag_cnt, unsigned int datalen)
++{
++ struct page *page = NULL;
++ unsigned char *dp;
++ unsigned int pg_left = 0;
++ unsigned int copy_total = 0;
++ int i;
++
++ for (i = 0; i < frag_cnt; i++, frags++) {
++ while (frags->size) {
++ unsigned char *sp = page_address(frags->page);
++ unsigned int copy;
++
++ if (!pg_left) {
++ int cnt = skb_shinfo(skb)->nr_frags;
++
++ if (cnt >= MAX_SKB_FRAGS) {
++ cxgb3i_log_error("%s: pdu data %u.\n",
++ __func__, datalen);
++ return -EINVAL;
++ }
++ page = alloc_page(GFP_ATOMIC);
++ if (!page)
++ return -ENOMEM;
++ dp = page_address(page);
++ pg_left = PAGE_SIZE;
++
++ copy = min(pg_left, datalen);
++ skb_fill_page_desc(skb, cnt, page, 0, copy);
++
++ skb->len += copy;
++ skb->data_len += copy;
++ skb->truesize += copy;
++ datalen -= copy;
++ }
++ copy = min(pg_left, frags->size);
++ memcpy(dp, sp + frags->page_offset, copy);
++
++ frags->size -= copy;
++ frags->page_offset += copy;
++ dp += copy;
++ pg_left -= copy;
++ copy_total += copy;
++ }
++ }
++
++ return copy_total;
++}
++
+ int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *conn)
+ {
+- struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
++ struct cxgb3i_conn *cconn = conn->dd_data;
++ struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
+ struct iscsi_segment *hdr_seg = &tcp_conn->out.segment;
+ struct iscsi_segment *data_seg = &tcp_conn->out.data_segment;
+ unsigned int hdrlen = hdr_seg->total_size;
+ unsigned int datalen = data_seg->total_size;
+ unsigned int padlen = iscsi_padding(datalen);
+- unsigned int copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
+- unsigned int copylen;
++ unsigned int copylen = hdrlen;
++ unsigned int copy_dlen = 0;
+ struct sk_buff *skb;
+ unsigned char *dst;
++ int i, frag_cnt = 0;
+ int err = -EAGAIN;
+
+- if (data_seg->data && ((datalen + padlen) < copymax))
+- copylen = hdrlen + datalen + padlen;
+- else
+- copylen = hdrlen;
++ /*
++ * the whole pdu needs to fit into one skb, make sure we don't overrun
++ * the skb's frag_list. If there are more sg pages than MAX_SKB_FRAGS,
++ * we have to copy the data either to the head or newly allocated
++ * whole new page(s). This could happen if the sg contains a lot of
++ * fragmented data chunks (pages).
++ */
++ if (datalen) {
++ if (!data_seg->data) {
++ err = sg_page_coalesce(data_seg->sg,
++ data_seg->sg_offset,
++ data_seg->total_size,
++ cconn->frags,
++ TX_PDU_PAGES_MAX);
++ if (err < 0)
++ return err;
++ frag_cnt = err;
++
++ if (frag_cnt > MAX_SKB_FRAGS ||
++ (padlen && frag_cnt + 1 > MAX_SKB_FRAGS))
++ copy_dlen = datalen + padlen;
++ } else
++ copy_dlen += datalen + padlen;
++ }
++
++ if (copylen + copy_dlen < skb_copymax)
++ copylen += copy_dlen;
+
+ /* supports max. 16K pdus, so one skb is enough to hold all the data */
+ skb = alloc_skb(TX_HEADER_LEN + copylen, GFP_ATOMIC);
+@@ -575,70 +708,84 @@
+ skb->data_len += datalen;
+ skb->truesize += datalen;
+ }
+- } else {
+- struct scatterlist *sg = data_seg->sg;
+- unsigned int offset = data_seg->sg_offset;
+- struct page *page = sg_page(sg);
+- unsigned int sglen = sg->length - offset;
+-
+- do {
+- int i = skb_shinfo(skb)->nr_frags;
+- unsigned int copy;
++ } else if (copy_dlen) {
++ /* need to copy the page fragments */
++ if (copylen > hdrlen) {
++ skb_frag_t *frag = cconn->frags;
+
+- if (!sglen) {
+- sg = sg_next(sg);
+- page = sg_page(sg);
+- offset = 0;
+- sglen = sg->length;
++ /* data fits in the skb's headroom */
++ for (i = 0; i < frag_cnt; i++, frag++) {
++ memcpy(dst,
++ page_address(frag->page) +
++ frag->page_offset,
++ frag->size);
++ dst += frag->size;
+ }
+- copy = min(sglen, datalen);
+-
+- if (i && skb_can_coalesce(skb, i, page,
+- sg->offset + offset)) {
+- skb_shinfo(skb)->frags[i - 1].size += copy;
+- } else {
+- get_page(page);
+- skb_fill_page_desc(skb, i, page,
+- sg->offset + offset, copy);
++ if (padlen)
++ memset(dst, 0, padlen);
++ } else {
++ /* allocate pages to hold the data */
++ err = copy_frags_to_skb_pages(skb, cconn->frags,
++ frag_cnt, datalen);
++ if (err < 0) {
++ err = -EAGAIN;
++ goto free_skb;
+ }
+- skb->len += copy;
+- skb->data_len += copy;
+- skb->truesize += copy;
+- offset += copy;
+- sglen -= copy;
+- datalen -= copy;
+- } while (datalen);
+- }
+-
+- if (padlen && skb_shinfo(skb)->nr_frags) {
+- int idx = skb_shinfo(skb)->nr_frags;
+- get_page(pad_page);
+- skb_fill_page_desc(skb, idx, pad_page, 0, padlen);
+- skb->data_len += padlen;
+- skb->truesize += padlen;
+- skb->len += padlen;
++ WARN_ON(err != datalen);
++ if (padlen) {
++ skb_frag_t *frag;
++
++ i = skb_shinfo(skb)->nr_frags;
++ frag = &skb_shinfo(skb)->frags[i];
++ dst = page_address(frag->page);
++
++ memset(dst + frag->page_offset + frag->size,
++ 0, padlen);
++ frag->size += padlen;
++ }
++ }
++ } else {
++ /* sg pages fit into frag_list */
++ for (i = 0; i < frag_cnt; i++)
++ get_page(cconn->frags[i].page);
++ memcpy(skb_shinfo(skb)->frags, cconn->frags,
++ sizeof(skb_frag_t) * frag_cnt);
++ skb_shinfo(skb)->nr_frags = frag_cnt;
++ skb->len += datalen;
++ skb->data_len += datalen;
++ skb->truesize += datalen;
++
++ if (padlen) {
++ i = skb_shinfo(skb)->nr_frags;
++ get_page(pad_page);
++ skb_fill_page_desc(skb, i, pad_page, 0, padlen);
++ skb->len += padlen;
++ skb->data_len += padlen;
++ skb->truesize += padlen;
++ }
+ }
+
+ send_pdu:
+ err = cxgb3i_c3cn_send_pdus((struct s3_conn *)tcp_conn->sock, skb);
+-
+ if (err > 0) {
+ int pdulen = hdrlen + datalen + padlen;
++
+ if (conn->hdrdgst_en)
+ pdulen += ISCSI_DIGEST_SIZE;
+ if (datalen && conn->datadgst_en)
+ pdulen += ISCSI_DIGEST_SIZE;
+
+ hdr_seg->total_copied = hdr_seg->total_size;
+- if (datalen)
+- data_seg->total_copied = data_seg->total_size;
++ data_seg->total_copied = data_seg->total_size;
+ conn->txdata_octets += pdulen;
+ return pdulen;
+ }
+
++free_skb:
+ kfree_skb(skb);
+ if (err < 0 && err != -EAGAIN) {
+- cxgb3i_log_error("conn 0x%p, xmit err %d.\n", conn, err);
++ cxgb3i_log_error("conn 0x%p, xmit err %d, skb len %u/%u.\n",
++ conn, err, skb->len, skb->data_len);
+ iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+ return err;
+ }
+@@ -652,6 +799,9 @@
+ return -ENOMEM;
+ memset(page_address(pad_page), 0, PAGE_SIZE);
+ cxgb3i_ddp_page_init();
++ cxgb3i_log_info("skb max. frag %u, head %u.\n",
++ (unsigned int)MAX_SKB_FRAGS,
++ (unsigned int)skb_copymax);
+ return 0;
+ }
+
+@@ -720,7 +870,7 @@
+
+ read_lock(&c3cn->callback_lock);
+ conn = c3cn->user_data;
+- if (conn && c3cn->state != C3CN_STATE_ESTABLISHED)
++ if (conn)
+ iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+ read_unlock(&c3cn->callback_lock);
+ }
+@@ -730,7 +880,7 @@
+ struct t3cdev *tdev = snic->tdev;
+ struct cxgb3i_ddp_info *ddp;
+ struct ulp_iscsi_info uinfo;
+- unsigned int ppmax, bits, max_bits;
++ unsigned int ppmax, bits;
+ int i, err;
+
+ err = tdev->ctl(tdev, ULP_ISCSI_GET_PARAMS, &uinfo);
+@@ -740,26 +890,21 @@
+ return err;
+ }
+
+- ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
+- max_bits = min(PPOD_IDX_MAX_SIZE,
+- (32 - sw_tag_idx_bits - sw_tag_age_bits));
+- bits = __ilog2_u32(ppmax) + 1;
+- if (bits > max_bits)
+- bits = max_bits;
+- ppmax = (1 << bits) - 1;
+-
+ snic->tx_max_size = min_t(unsigned int,
+ uinfo.max_txsz, ULP2_MAX_PKT_SIZE);
+ snic->rx_max_size = min_t(unsigned int,
+ uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
++ cxgb3i_log_info("ddp max pkt size: %u/%u,%u, %u/%u,%u.\n",
++ snic->tx_max_size, uinfo.max_txsz, ULP2_MAX_PKT_SIZE,
++ snic->rx_max_size, uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
+
+- snic->tag_format.idx_bits = sw_tag_idx_bits;
+- snic->tag_format.age_bits = sw_tag_age_bits;
+- snic->tag_format.rsvd_bits = bits;
+- snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
+- snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
+- snic->tag_format.rsvd_tag_mask =
+- (1 << (snic->tag_format.rsvd_bits + PPOD_IDX_SHIFT)) - 1;
++ snic->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
++
++ ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
++ bits = __ilog2_u32(ppmax) + 1;
++ if (bits > PPOD_IDX_MAX_SIZE)
++ bits = PPOD_IDX_MAX_SIZE;
++ ppmax = (1 << (bits - 1)) - 1;
+
+ ddp = cxgb3i_alloc_big_mem(sizeof(struct cxgb3i_ddp_info) +
+ ppmax *
+@@ -779,12 +924,15 @@
+ spin_lock_init(&ddp->map_lock);
+ ddp->llimit = uinfo.llimit;
+ ddp->ulimit = uinfo.ulimit;
++ ddp->nppods = ppmax;
++ ddp->idx_last = ppmax;
++ ddp->idx_bits = bits;
++ ddp->idx_mask = (1 << bits) - 1;
++ ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
+
+- uinfo.tagmask =
+- snic->tag_format.rsvd_mask << snic->tag_format.rsvd_shift;
++ uinfo.tagmask = ddp->idx_mask << PPOD_IDX_SHIFT;
+ for (i = 0; i < ULP2_PGIDX_MAX; i++)
+ uinfo.pgsz_factor[i] = ddp_page_order[i];
+-
+ uinfo.ulimit = uinfo.llimit + (ppmax << PPOD_SIZE_SHIFT);
+
+ err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo);
+@@ -794,19 +942,20 @@
+ goto free_ppod_map;
+ }
+
+- ddp->nppods = ppmax;
+- ddp->idx_last = ppmax;
+-
+ tdev->ulp_iscsi = snic->ddp = ddp;
+
+- cxgb3i_log_info("snic nppods %u (0x%x ~ 0x%x), rsvd shift %u, "
+- "bits %u, mask 0x%x, 0x%x, pkt %u,%u.\n",
+- ppmax, ddp->llimit, ddp->ulimit,
+- snic->tag_format.rsvd_shift,
+- snic->tag_format.rsvd_bits,
+- snic->tag_format.rsvd_mask, uinfo.tagmask,
+- snic->tx_max_size, snic->rx_max_size);
++ cxgb3i_log_info("nppods %u (0x%x ~ 0x%x), bits %u, mask 0x%x,0x%x.\n",
++ ppmax, ddp->llimit, ddp->ulimit, ddp->idx_bits,
++ ddp->idx_mask, ddp->rsvd_tag_mask);
+
++ snic->tag_format.rsvd_bits = ddp->idx_bits;
++ snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
++ snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
++
++ cxgb3i_log_info("tag format: sw %u, rsvd %u,%u, mask 0x%x.\n",
++ snic->tag_format.sw_bits, snic->tag_format.rsvd_bits,
++ snic->tag_format.rsvd_shift,
++ snic->tag_format.rsvd_mask);
+ return 0;
+
+ free_ppod_map:
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h 2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h 2009-01-26 22:00:17.000000000 -0800
+@@ -13,7 +13,6 @@
+ #ifndef __CXGB3I_ULP2_H__
+ #define __CXGB3I_ULP2_H__
+
+-#define ULP2_PDU_PAYLOAD_DFLT (16224 - ISCSI_PDU_HEADER_MAX)
+ #define PPOD_PAGES_MAX 4
+ #define PPOD_PAGES_SHIFT 2 /* 4 pages per pod */
+
+@@ -100,9 +99,6 @@
+ #define ULP2_FLAG_DCRC_ERROR 0x20
+ #define ULP2_FLAG_PAD_ERROR 0x40
+
+-#define ULP2_MAX_PKT_SIZE 16224
+-#define ULP2_MAX_PDU_SIZE 8192
+-
+ void cxgb3i_conn_closing(struct s3_conn *);
+ void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn);
+ void cxgb3i_conn_tx_open(struct s3_conn *c3cn);