]> git.ipfire.org Git - ipfire-2.x.git/blobdiff - src/patches/suse-2.6.27.31/patches.drivers/cxgb3i-fix-skb-overrun
Move xen patchset to new version's subdir.
[ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.drivers / cxgb3i-fix-skb-overrun
diff --git a/src/patches/suse-2.6.27.31/patches.drivers/cxgb3i-fix-skb-overrun b/src/patches/suse-2.6.27.31/patches.drivers/cxgb3i-fix-skb-overrun
new file mode 100644 (file)
index 0000000..84498c5
--- /dev/null
@@ -0,0 +1,1360 @@
+Subject: cxgb3i - fixes over-run of skb MAX_SKB_FRAGS
+From: Karen Xie <kxie@chelsio.com>
+References: bnc#468314
+
+This patch fixes the over-run of skb's MAX_SKB_FRAGS between the cxgb3i and
+cxgb3 driver on PPC64 systems.
+
+Signed-off-by: Karen Xie <kxie@chelsio.com>
+Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
+Acked-by: Hannes Reinecke <hare@suse.de>
+---
+
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h        2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h        2009-01-26 22:00:17.000000000 -0800
+@@ -36,6 +36,12 @@
+ #define CXGB3I_MAX_TARGET     CXGB3I_MAX_CONN
+ #define CXGB3I_MAX_LUN                512
+ #define ISCSI_PDU_HEADER_MAX  (56 + 256) /* bhs + digests + ahs */
++#define ULP2_MAX_PKT_SIZE             16224
++#define ISCSI_PDU_NONPAYLOAD_MAX  \
++      (sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + 2*ISCSI_DIGEST_SIZE)
++#define ULP2_MAX_PDU_PAYLOAD \
++      (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_MAX)
++
+ struct cxgb3i_adapter;
+ struct cxgb3i_hba;
+@@ -53,12 +59,11 @@
+  *
+  */
+ struct cxgb3i_tag_format {
+-      unsigned char idx_bits;
+-      unsigned char age_bits;
++      unsigned char sw_bits;
+       unsigned char rsvd_bits;
+       unsigned char rsvd_shift;
++      unsigned char filler[1];
+       u32 rsvd_mask;
+-      u32 rsvd_tag_mask;
+ };
+ /**
+@@ -95,11 +100,137 @@
+       unsigned int ulimit;
+       unsigned int nppods;
+       unsigned int idx_last;
++      unsigned char idx_bits;
++      unsigned char filler[3];
++      u32 idx_mask;
++      u32 rsvd_tag_mask;
+       spinlock_t map_lock;
+       struct cxgb3i_gather_list **gl_map;
+       struct sk_buff **gl_skb;
+ };
++/*
++ * cxgb3i ddp tag are 32 bits, it consists of reserved bits used by h/w and
++ * non-reserved bits that can be used by the iscsi s/w.
++ * The reserved bits are identified by the rsvd_bits and rsvd_shift fields
++ * in struct cxgb3i_tag_format.
++ *
++ * The upper most reserved bit can be used to check if a tag is ddp tag or not:
++ *    if the bit is 0, the tag is a valid ddp tag
++ */
++
++/**
++ * cxgb3i_is_ddp_tag - check if a given tag is a hw/ddp tag
++ * @tformat: tag format information
++ * @tag: tag to be checked
++ *
++ * return true if the tag is a ddp tag, false otherwise.
++ */
++static inline int cxgb3i_is_ddp_tag(struct cxgb3i_tag_format *tformat, u32 tag)
++{
++      return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)));
++}
++
++/**
++ * cxgb3i_sw_tag_usable - check if a given s/w tag has enough bits left for
++ *                      the reserved/hw bits
++ * @tformat: tag format information
++ * @sw_tag: s/w tag to be checked
++ *
++ * return true if the tag is a ddp tag, false otherwise.
++ */
++static inline int cxgb3i_sw_tag_usable(struct cxgb3i_tag_format *tformat,
++                                      u32 sw_tag)
++{
++      sw_tag >>= (32 - tformat->rsvd_bits);
++      return !sw_tag;
++}
++
++/**
++ * cxgb3i_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag
++ * @tformat: tag format information
++ * @sw_tag: s/w tag to be checked
++ *
++ * insert 1 at the upper most reserved bit to mark it as an invalid ddp tag.
++ */
++static inline u32 cxgb3i_set_non_ddp_tag(struct cxgb3i_tag_format *tformat,
++                                       u32 sw_tag)
++{
++      unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
++      u32 mask = (1 << shift) - 1;
++
++      if (sw_tag && (sw_tag & ~mask)) {
++              u32 v1 = sw_tag & ((1 << shift) - 1);
++              u32 v2 = (sw_tag >> (shift - 1)) << shift;
++
++              return v2 | v1 | 1 << shift;
++      }
++      return sw_tag | 1 << shift;
++}
++
++/**
++ * cxgb3i_ddp_tag_base - shift the s/w tag bits so that reserved bits are not
++ *                     used.
++ * @tformat: tag format information
++ * @sw_tag: s/w tag to be checked
++ */
++static inline u32 cxgb3i_ddp_tag_base(struct cxgb3i_tag_format *tformat,
++                                    u32 sw_tag)
++{
++      u32 mask = (1 << tformat->rsvd_shift) - 1;
++
++      if (sw_tag && (sw_tag & ~mask)) {
++              u32 v1 = sw_tag & mask;
++              u32 v2 = sw_tag >> tformat->rsvd_shift;
++
++              v2 <<= tformat->rsvd_shift + tformat->rsvd_bits;
++              return v2 | v1;
++      }
++      return sw_tag;
++}
++
++/**
++ * cxgb3i_tag_rsvd_bits - get the reserved bits used by the h/w
++ * @tformat: tag format information
++ * @tag: tag to be checked
++ *
++ * return the reserved bits in the tag
++ */
++static inline u32 cxgb3i_tag_rsvd_bits(struct cxgb3i_tag_format *tformat,
++                                     u32 tag)
++{
++      if (cxgb3i_is_ddp_tag(tformat, tag))
++              return (tag >> tformat->rsvd_shift) & tformat->rsvd_mask;
++      return 0;
++}
++
++/**
++ * cxgb3i_tag_nonrsvd_bits - get the non-reserved bits used by the s/w
++ * @tformat: tag format information
++ * @tag: tag to be checked
++ *
++ * return the non-reserved bits in the tag.
++ */
++static inline u32 cxgb3i_tag_nonrsvd_bits(struct cxgb3i_tag_format *tformat,
++                                        u32 tag)
++{
++      unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
++      u32 v1, v2;
++
++      if (cxgb3i_is_ddp_tag(tformat, tag)) {
++              v1 = tag & ((1 << tformat->rsvd_shift) - 1);
++              v2 = (tag >> (shift + 1)) << tformat->rsvd_shift;
++      } else {
++              u32 mask = (1 << shift) - 1;
++
++              tag &= ~(1 << shift);
++              v1 = tag & mask;
++              v2 = (tag >> 1) & ~mask;
++      }
++      return v1 | v2;
++}
++
++
+ /**
+  * struct cxgb3i_hba - cxgb3i iscsi structure (per port)
+  *
+@@ -146,16 +277,22 @@
+  * struct cxgb3i_conn - cxgb3i iscsi connection
+  *
+  * @tcp_conn: pointer to iscsi_tcp_conn structure
+- * @listhead: list head to link elements
++ * @list_head:        list head to link elements
++ * @cep:      pointer to iscsi_endpoint structure
+  * @conn:     pointer to iscsi_conn structure
+  * @hba:      pointer to the hba this conn. is going through
++ * @task_idx_bits: # of bits needed for session->cmds_max
++ * @frags:    temp. holding area for tx coalesced sg list pages.
+  */
++#define TX_PDU_PAGES_MAX   (16384/512 + 1)
+ struct cxgb3i_conn {
+       struct iscsi_tcp_conn tcp_conn;
+       struct list_head list_head;
+       struct cxgb3i_endpoint *cep;
+       struct iscsi_conn *conn;
+       struct cxgb3i_hba *hba;
++      unsigned int task_idx_bits;
++      skb_frag_t frags[TX_PDU_PAGES_MAX];
+ };
+ /**
+@@ -190,8 +327,7 @@
+ int cxgb3i_ulp2_init(void);
+ void cxgb3i_ulp2_cleanup(void);
+ int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *, int, int);
+-void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32,
+-                          struct scatterlist *, unsigned int);
++void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32);
+ u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *, unsigned int,
+                          u32, unsigned int, struct scatterlist *,
+                          unsigned int, int);
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c   2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c   2009-01-26 22:00:17.000000000 -0800
+@@ -11,8 +11,8 @@
+ #include "cxgb3i.h"
+-#define DRV_MODULE_NAME         "cxgb3i"
+-#define DRV_MODULE_VERSION    "0.1.0"
++#define DRV_MODULE_NAME               "cxgb3i"
++#define DRV_MODULE_VERSION    "0.9.0"
+ #define DRV_MODULE_RELDATE    "Jun. 1, 2008"
+ static char version[] =
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c  2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c  2009-01-26 22:00:17.000000000 -0800
+@@ -36,10 +36,10 @@
+ #define cxgb3i_api_debug(fmt...)
+ #endif
+-#define align_to_4k_boundary(n)       \
+-      do { \
+-              n = (n) & ~((1 << 12) - 1); \
+-      } while(0)
++/*
++ * align pdu size to multiple of 512 for better performance
++ */
++#define align_pdu_size(n) do { n = (n) & (~511); } while (0)
+ static struct scsi_transport_template *cxgb3i_scsi_transport;
+ static struct scsi_host_template cxgb3i_host_template;
+@@ -102,7 +102,7 @@
+       struct cxgb3i_adapter *snic;
+       /* remove from the list */
+-      read_lock(&cxgb3i_snic_rwlock);
++      write_lock(&cxgb3i_snic_rwlock);
+       list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
+               if (snic->tdev == t3dev) {
+                       list_del(&snic->list_head);
+@@ -295,6 +295,8 @@
+                * stop the xmit path so the xmit_segment function is
+                * not being called
+                */
++              iscsi_suspend_tx(cconn->conn);
++
+               write_lock_bh(&cep->c3cn->callback_lock);
+               set_bit(ISCSI_SUSPEND_BIT, &cconn->conn->suspend_rx);
+               cep->c3cn->user_data = NULL;
+@@ -391,20 +393,17 @@
+ static inline int cxgb3i_conn_max_xmit_dlength(struct iscsi_conn *conn)
+ {
+       struct cxgb3i_conn *cconn = conn->dd_data;
+-      unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
+-                                         cconn->hba->snic->tx_max_size -
+-                                         ISCSI_PDU_HEADER_MAX);
+-
+-      cxgb3i_api_debug("conn 0x%p, max xmit %u.\n",
+-                       conn, conn->max_xmit_dlength);
++      unsigned int max = min_t(unsigned int,
++                              ULP2_MAX_PDU_PAYLOAD,
++                              cconn->hba->snic->tx_max_size -
++                                      ISCSI_PDU_NONPAYLOAD_MAX);
+       if (conn->max_xmit_dlength)
+               conn->max_xmit_dlength = min_t(unsigned int,
+-                                              conn->max_xmit_dlength, max);
++                                      conn->max_xmit_dlength, max);
+       else
+               conn->max_xmit_dlength = max;
+-
+-      align_to_4k_boundary(conn->max_xmit_dlength);
++      align_pdu_size(conn->max_xmit_dlength);
+       cxgb3i_api_debug("conn 0x%p, set max xmit %u.\n",
+                        conn, conn->max_xmit_dlength);
+@@ -415,14 +414,10 @@
+ static inline int cxgb3i_conn_max_recv_dlength(struct iscsi_conn *conn)
+ {
+       struct cxgb3i_conn *cconn = conn->dd_data;
+-      unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
+-                                         cconn->hba->snic->rx_max_size -
+-                                         ISCSI_PDU_HEADER_MAX);
+-
+-      cxgb3i_api_debug("conn 0x%p, max recv %u.\n",
+-                       conn, conn->max_recv_dlength);
+-
+-      align_to_4k_boundary(max);
++      unsigned int max = min_t(unsigned int,
++                              ULP2_MAX_PDU_PAYLOAD,
++                              cconn->hba->snic->tx_max_size -
++                                      ISCSI_PDU_NONPAYLOAD_MAX);
+       if (conn->max_recv_dlength) {
+               if (conn->max_recv_dlength > max) {
+@@ -433,9 +428,9 @@
+               }
+               conn->max_recv_dlength = min_t(unsigned int,
+                                               conn->max_recv_dlength, max);
+-              align_to_4k_boundary(conn->max_recv_dlength);
+       } else
+               conn->max_recv_dlength = max;
++      align_pdu_size(conn->max_recv_dlength);
+       cxgb3i_api_debug("conn 0x%p, set max recv %u.\n",
+                        conn, conn->max_recv_dlength);
+@@ -516,12 +511,14 @@
+       cep = ep->dd_data;
+       c3cn = cep->c3cn;
++      /* calculate the tag idx bits needed for this conn based on cmds_max */
++      cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
+-      read_lock(&c3cn->callback_lock);
++      write_lock(&c3cn->callback_lock);
+       /* mnc: TODO don't abuse iscsi_tcp fields */
+       tcp_conn->sock = (struct socket *)c3cn;
+       c3cn->user_data = conn;
+-      read_unlock(&c3cn->callback_lock);
++      write_unlock(&c3cn->callback_lock);
+       cconn->hba = cep->hba;
+       cconn->cep = cep;
+@@ -609,11 +606,13 @@
+                       return -ENOMEM;
+       case ISCSI_PARAM_MAX_RECV_DLENGTH:
+               err = iscsi_set_param(cls_conn, param, buf, buflen);
+-              err = cxgb3i_conn_max_recv_dlength(conn);
++              if (!err)
++                      err = cxgb3i_conn_max_recv_dlength(conn);
+               break;
+       case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+               err = iscsi_set_param(cls_conn, param, buf, buflen);
+-              err = cxgb3i_conn_max_xmit_dlength(conn);
++              if (!err)
++                      err = cxgb3i_conn_max_xmit_dlength(conn);
+               break;
+       default:
+               return iscsi_set_param(cls_conn, param, buf, buflen);
+@@ -718,49 +717,23 @@
+       stats->custom[0].value = conn->eh_abort_cnt;
+ }
+-static inline u32 tag_base(struct cxgb3i_tag_format *format,
+-                         unsigned int idx, unsigned int age)
+-{
+-      u32 sw_bits = idx | (age << format->idx_bits);
+-      u32 tag = sw_bits >> format->rsvd_shift;
+-
+-      tag <<= format->rsvd_bits + format->rsvd_shift;
+-      tag |= sw_bits & ((1 << format->rsvd_shift) - 1);
+-      return tag;
+-}
+-
+-static inline void cxgb3i_parse_tag(struct cxgb3i_tag_format *format,
+-                                  u32 tag, u32 *rsvd_bits, u32 *sw_bits)
+-{
+-      if (rsvd_bits)
+-              *rsvd_bits = (tag >> format->rsvd_shift) & format->rsvd_mask;
+-      if (sw_bits) {
+-              *sw_bits = (tag >> (format->rsvd_shift + format->rsvd_bits))
+-                          << format->rsvd_shift;
+-              *sw_bits |= tag & ((1 << format->rsvd_shift) - 1);
+-      }
+-
+-      cxgb3i_tag_debug("parse tag 0x%x, rsvd 0x%x, sw  0x%x.\n",
+-                       tag, rsvd_bits ? *rsvd_bits : 0xFFFFFFFF,
+-                       sw_bits ? *sw_bits : 0xFFFFFFFF);
+-}
+-
+-
+ static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt,
+                            int *idx, int *age)
+ {
+       struct cxgb3i_conn *cconn = conn->dd_data;
+       struct cxgb3i_adapter *snic = cconn->hba->snic;
++      u32 tag = itt;
+       u32 sw_bits;
+-      cxgb3i_parse_tag(&snic->tag_format, itt, NULL, &sw_bits);
++      sw_bits = cxgb3i_tag_nonrsvd_bits(&snic->tag_format, tag);
+       if (idx)
+-              *idx = sw_bits & ISCSI_ITT_MASK;
++              *idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
+       if (age)
+-              *age = (sw_bits >> snic->tag_format.idx_bits) & ISCSI_AGE_MASK;
++              *age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
+-      cxgb3i_tag_debug("parse itt 0x%x, idx 0x%x, age 0x%x.\n",
+-                       itt,  idx ? *idx : 0xFFFFF, age ? *age : 0xFF);
++      cxgb3i_tag_debug("parse tag 0x%x/0x%x, sw 0x%x, idx 0x%x, age 0x%x.\n",
++                      tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
++                      age ? *age : 0xFF);
+ }
+ static int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
+@@ -771,26 +744,40 @@
+       struct cxgb3i_conn *cconn = conn->dd_data;
+       struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
+       struct cxgb3i_adapter *snic = cconn->hba->snic;
+-      u32 sw_tag = tag_base(&snic->tag_format, task->itt, sess->age);
++      struct cxgb3i_tag_format *tformat = &snic->tag_format;
++      u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
+       u32 tag = RESERVED_ITT;
+-      if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) {
++      if (sc && (scsi_bidi_cmnd(sc) ||
++              sc->sc_data_direction == DMA_FROM_DEVICE) &&
++              cxgb3i_sw_tag_usable(tformat, sw_tag)) {
+               struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
+-              tag =
+-                  cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
++              tag = cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
+                                          scsi_in(sc)->length,
+                                          scsi_in(sc)->table.sgl,
+                                          scsi_in(sc)->table.nents,
+                                          GFP_ATOMIC);
+       }
+       if (tag == RESERVED_ITT)
+-              tag = sw_tag | (snic->tag_format.rsvd_mask <<
+-                              snic->tag_format.rsvd_shift);
++              tag = cxgb3i_set_non_ddp_tag(tformat, sw_tag);
++      /* the itt need to sent in big-endian order */
+       *hdr_itt = htonl(tag);
+-      cxgb3i_tag_debug("new tag 0x%x/0x%x (itt 0x%x, age 0x%x).\n",
+-                       tag, *hdr_itt, task->itt, sess->age);
+-
++      if (sc) {
++              if (sc->sc_data_direction == DMA_FROM_DEVICE)
++                      cxgb3i_tag_debug("read, len %u, tag 0x%x/0x%x "
++                                       "(itt 0x%x, age 0x%x, sw 0x%x).\n",
++                                       scsi_in(sc)->length, tag, *hdr_itt,
++                                       task->itt, sess->age, sw_tag);
++              else
++                      cxgb3i_tag_debug("write, len %u, tag 0x%x/0x%x "
++                                       "(itt 0x%x, age 0x%x, sw 0x%x).\n",
++                                       scsi_out(sc)->length, tag, *hdr_itt,
++                                       task->itt, sess->age, sw_tag);
++      } else
++              cxgb3i_tag_debug("ctrl, tag 0x%x/0x%x (itt 0x%x, age 0x%x, "
++                               "sw 0x%x).\n",
++                               tag, *hdr_itt, task->itt, sess->age, sw_tag);
+       return 0;
+ }
+@@ -800,14 +787,15 @@
+       struct iscsi_conn *conn = task->conn;
+       struct cxgb3i_conn *cconn = conn->dd_data;
+       struct cxgb3i_adapter *snic = cconn->hba->snic;
++      struct cxgb3i_tag_format *tformat = &snic->tag_format;
+       u32 tag = ntohl(hdr_itt);
+-      cxgb3i_tag_debug("release tag 0x%x.\n", tag);
++      cxgb3i_tag_debug("release %s tag 0x%x.\n", sc ? "scsi" : "ctrl", tag);
+-      if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE))
+-              cxgb3i_ddp_tag_release(snic, tag,
+-                                     scsi_in(sc)->table.sgl,
+-                                     scsi_in(sc)->table.nents);
++      if (sc && (scsi_bidi_cmnd(sc) ||
++              sc->sc_data_direction == DMA_FROM_DEVICE) &&
++              cxgb3i_is_ddp_tag(tformat, tag))
++              cxgb3i_ddp_tag_release(snic, tag);
+ }
+ /**
+@@ -820,7 +808,7 @@
+       .proc_name = "cxgb3i",
+       .queuecommand = iscsi_queuecommand,
+       .change_queue_depth = iscsi_change_queue_depth,
+-      .can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1),
++      .can_queue = CXGB3I_SCSI_QDEPTH_DFLT - 1,
+       .sg_tablesize = SG_ALL,
+       .max_sectors = 0xFFFF,
+       .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c        2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c        2009-01-26 22:00:17.000000000 -0800
+@@ -22,19 +22,19 @@
+ #include "cxgb3i_ulp2.h"
+ #ifdef __DEBUG_C3CN_CONN__
+-#define c3cn_conn_debug         cxgb3i_log_debug
++#define c3cn_conn_debug               cxgb3i_log_debug
+ #else
+ #define c3cn_conn_debug(fmt...)
+ #endif
+ #ifdef __DEBUG_C3CN_TX__
+-#define c3cn_tx_debug         cxgb3i_log_debug
++#define c3cn_tx_debug         cxgb3i_log_debug
+ #else
+ #define c3cn_tx_debug(fmt...)
+ #endif
+ #ifdef __DEBUG_C3CN_RX__
+-#define c3cn_rx_debug         cxgb3i_log_debug
++#define c3cn_rx_debug         cxgb3i_log_debug
+ #else
+ #define c3cn_rx_debug(fmt...)
+ #endif
+@@ -42,9 +42,9 @@
+ /*
+  * module parameters releated to offloaded iscsi connection
+  */
+-static int cxgb3_rcv_win = 256 * 1024;
++static int cxgb3_rcv_win = 128 * 1024;
+ module_param(cxgb3_rcv_win, int, 0644);
+-MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=256KB)");
++MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=128KB)");
+ static int cxgb3_snd_win = 64 * 1024;
+ module_param(cxgb3_snd_win, int, 0644);
+@@ -456,12 +456,9 @@
+  * The number of WRs needed for an skb depends on the number of fragments
+  * in the skb and whether it has any payload in its main body.  This maps the
+  * length of the gather list represented by an skb into the # of necessary WRs.
+- *
+- * The max. length of an skb is controlled by the max pdu size which is ~16K.
+- * Also, assume the min. fragment length is the sector size (512), then add
+- * extra fragment counts for iscsi bhs and payload padding.
++ * The extra two fragments are for iscsi bhs and payload padding.
+  */
+-#define SKB_WR_LIST_SIZE      (16384/512 + 3)
++#define SKB_WR_LIST_SIZE      (MAX_SKB_FRAGS + 2)
+ static unsigned int skb_wrs[SKB_WR_LIST_SIZE] __read_mostly;
+ static void s3_init_wr_tab(unsigned int wr_len)
+@@ -484,7 +481,7 @@
+ static inline void reset_wr_list(struct s3_conn *c3cn)
+ {
+-      c3cn->wr_pending_head = NULL;
++      c3cn->wr_pending_head = c3cn->wr_pending_tail = NULL;
+ }
+ /*
+@@ -495,7 +492,7 @@
+ static inline void enqueue_wr(struct s3_conn *c3cn,
+                             struct sk_buff *skb)
+ {
+-      skb->sp = NULL;
++      skb_wr_next(skb) = NULL;
+       /*
+        * We want to take an extra reference since both us and the driver
+@@ -508,10 +505,22 @@
+       if (!c3cn->wr_pending_head)
+               c3cn->wr_pending_head = skb;
+       else
+-              c3cn->wr_pending_tail->sp = (void *)skb;
++              skb_wr_next(c3cn->wr_pending_tail) = skb;
+       c3cn->wr_pending_tail = skb;
+ }
++static int count_pending_wrs(struct s3_conn *c3cn)
++{
++      int n = 0;
++      const struct sk_buff *skb = c3cn->wr_pending_head;
++
++      while (skb) {
++              n += skb->csum;
++              skb = skb_wr_next(skb);
++      }
++      return n;
++}
++
+ static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn)
+ {
+       return c3cn->wr_pending_head;
+@@ -528,8 +537,8 @@
+       if (likely(skb)) {
+               /* Don't bother clearing the tail */
+-              c3cn->wr_pending_head = (struct sk_buff *)skb->sp;
+-              skb->sp = NULL;
++              c3cn->wr_pending_head = skb_wr_next(skb);
++              skb_wr_next(skb) = NULL;
+       }
+       return skb;
+ }
+@@ -542,13 +551,15 @@
+ }
+ static inline void make_tx_data_wr(struct s3_conn *c3cn, struct sk_buff *skb,
+-                                 int len)
++                                      int len, int req_completion)
+ {
+       struct tx_data_wr *req;
+       skb_reset_transport_header(skb);
+       req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
+-      req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
++
++      req->wr_hi = htonl((V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)) |
++                              (req_completion ? F_WR_COMPL : 0));
+       req->wr_lo = htonl(V_WR_TID(c3cn->tid));
+       req->sndseq = htonl(c3cn->snd_nxt);
+       /* len includes the length of any HW ULP additions */
+@@ -556,11 +567,11 @@
+       req->param = htonl(V_TX_PORT(c3cn->l2t->smt_idx));
+       /* V_TX_ULP_SUBMODE sets both the mode and submode */
+       req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) |
+-                         V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
++                      V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
+       if (!c3cn_flag(c3cn, C3CN_TX_DATA_SENT)) {
+-              req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
+-                                  V_TX_CPU_IDX(c3cn->qset));
++                      req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
++                                      V_TX_CPU_IDX(c3cn->qset));
+               /* Sendbuffer is in units of 32KB. */
+               req->param |= htonl(V_TX_SNDBUF(cxgb3_snd_win >> 15));
+               c3cn_set_flag(c3cn, C3CN_TX_DATA_SENT);
+@@ -591,7 +602,7 @@
+       if (unlikely(c3cn->state == C3CN_STATE_CONNECTING ||
+                    c3cn->state == C3CN_STATE_CLOSE_WAIT_1 ||
+-                   c3cn->state == C3CN_STATE_ABORTING)) {
++                   c3cn->state >= C3CN_STATE_ABORTING)) {
+               c3cn_tx_debug("c3cn 0x%p, in closing state %u.\n",
+                             c3cn, c3cn->state);
+               return 0;
+@@ -626,19 +637,22 @@
+               c3cn->wr_unacked += wrs_needed;
+               enqueue_wr(c3cn, skb);
+-              if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
+-                      len += ulp_extra_len(skb);
+-                      make_tx_data_wr(c3cn, skb, len);
+-                      c3cn->snd_nxt += len;
+-                      if ((req_completion
+-                           && c3cn->wr_unacked == wrs_needed)
+-                          || (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL)
+-                          || c3cn->wr_unacked >= c3cn->wr_max / 2) {
+-                              struct work_request_hdr *wr = cplhdr(skb);
++              c3cn_tx_debug("c3cn 0x%p, enqueue, skb len %u/%u, frag %u, "
++                              "wr %d, left %u, unack %u.\n",
++                              c3cn, skb->len, skb->data_len, frags,
++                              wrs_needed, c3cn->wr_avail, c3cn->wr_unacked);
+-                              wr->wr_hi |= htonl(F_WR_COMPL);
++              if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
++                      if ((req_completion &&
++                              c3cn->wr_unacked == wrs_needed) ||
++                              (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL) ||
++                              c3cn->wr_unacked >= c3cn->wr_max / 2) {
++                              req_completion = 1;
+                               c3cn->wr_unacked = 0;
+                       }
++                      len += ulp_extra_len(skb);
++                      make_tx_data_wr(c3cn, skb, len, req_completion);
++                      c3cn->snd_nxt += len;
+                       CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR;
+               }
+@@ -1153,12 +1167,28 @@
+  * Process an acknowledgment of WR completion.  Advance snd_una and send the
+  * next batch of work requests from the write queue.
+  */
++
++static void check_wr_invariants(struct s3_conn *c3cn)
++{
++      int pending = count_pending_wrs(c3cn);
++
++      if (unlikely(c3cn->wr_avail + pending != c3cn->wr_max))
++                      cxgb3i_log_error("TID %u: credit imbalance: avail %u, "
++                                      "pending %u, total should be %u\n",
++                                      c3cn->tid, c3cn->wr_avail, pending,
++                                      c3cn->wr_max);
++}
++
+ static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
+ {
+       struct cpl_wr_ack *hdr = cplhdr(skb);
+       unsigned int credits = ntohs(hdr->credits);
+       u32 snd_una = ntohl(hdr->snd_una);
++      c3cn_tx_debug("%u WR credits, avail %u, unack %u, TID %u, state %u.\n",
++                      credits, c3cn->wr_avail, c3cn->wr_unacked,
++                      c3cn->tid, c3cn->state);
++
+       c3cn->wr_avail += credits;
+       if (c3cn->wr_unacked > c3cn->wr_max - c3cn->wr_avail)
+               c3cn->wr_unacked = c3cn->wr_max - c3cn->wr_avail;
+@@ -1173,6 +1203,17 @@
+                       break;
+               }
+               if (unlikely(credits < p->csum)) {
++                      struct tx_data_wr *w = cplhdr(p);
++                      cxgb3i_log_error("TID %u got %u WR credits need %u, "
++                                      "len %u, main body %u, frags %u, "
++                                      "seq # %u, ACK una %u, ACK nxt %u, "
++                                      "WR_AVAIL %u, WRs pending %u\n",
++                                      c3cn->tid, credits, p->csum, p->len,
++                                      p->len - p->data_len,
++                                      skb_shinfo(p)->nr_frags,
++                                      ntohl(w->sndseq), snd_una,
++                                      ntohl(hdr->snd_nxt), c3cn->wr_avail,
++                                      count_pending_wrs(c3cn) - credits);
+                       p->csum -= credits;
+                       break;
+               } else {
+@@ -1182,8 +1223,14 @@
+               }
+       }
+-      if (unlikely(before(snd_una, c3cn->snd_una)))
++      check_wr_invariants(c3cn);
++
++      if (unlikely(before(snd_una, c3cn->snd_una))) {
++              cxgb3i_log_error("TID %u, unexpected sequence # %u in WR_ACK "
++                              "snd_una %u\n",
++                              c3cn->tid, snd_una, c3cn->snd_una);
+               goto out_free;
++      }
+       if (c3cn->snd_una != snd_una) {
+               c3cn->snd_una = snd_una;
+@@ -1454,11 +1501,14 @@
+                             struct dst_entry *dst)
+ {
+       BUG_ON(c3cn->cdev != cdev);
+-      c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs;
++      c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs - 1;
+       c3cn->wr_unacked = 0;
+       c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst));
+       reset_wr_list(c3cn);
++
++      c3cn_conn_debug("c3cn 0x%p, wr max %u, avail %u.\n",
++                                      c3cn, c3cn->wr_max, c3cn->wr_avail);
+ }
+ static int initiate_act_open(struct s3_conn *c3cn, struct net_device *dev)
+@@ -1673,9 +1723,17 @@
+               goto out_err;
+       }
+-      err = -EPIPE;
+       if (c3cn->err) {
+               c3cn_tx_debug("c3cn 0x%p, err %d.\n", c3cn, c3cn->err);
++              err = -EPIPE;
++              goto out_err;
++      }
++
++      if (c3cn->write_seq - c3cn->snd_una >= cxgb3_snd_win) {
++              c3cn_tx_debug("c3cn 0x%p, snd %u - %u > %u.\n",
++                              c3cn, c3cn->write_seq, c3cn->snd_una,
++                              cxgb3_snd_win);
++              err = -EAGAIN;
+               goto out_err;
+       }
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h        2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h        2009-01-26 22:00:17.000000000 -0800
+@@ -180,7 +180,7 @@
+  * @seq:      tcp sequence number
+  * @ddigest:  pdu data digest
+  * @pdulen:   recovered pdu length
+- * @ulp_data: scratch area for ULP
++ * @wr_next:  scratch area for tx wr
+  */
+ struct cxgb3_skb_cb {
+       __u8 flags;
+@@ -188,7 +188,7 @@
+       __u32 seq;
+       __u32 ddigest;
+       __u32 pdulen;
+-      __u8 ulp_data[16];
++      struct sk_buff *wr_next;
+ };
+ #define CXGB3_SKB_CB(skb)     ((struct cxgb3_skb_cb *)&((skb)->cb[0]))
+@@ -196,7 +196,7 @@
+ #define skb_ulp_mode(skb)     (CXGB3_SKB_CB(skb)->ulp_mode)
+ #define skb_ulp_ddigest(skb)  (CXGB3_SKB_CB(skb)->ddigest)
+ #define skb_ulp_pdulen(skb)   (CXGB3_SKB_CB(skb)->pdulen)
+-#define skb_ulp_data(skb)     (CXGB3_SKB_CB(skb)->ulp_data)
++#define skb_wr_next(skb)      (CXGB3_SKB_CB(skb)->wr_next)
+ enum c3cb_flags {
+       C3CB_FLAG_NEED_HDR = 1 << 0,    /* packet needs a TX_DATA_WR header */
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c   2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c   2009-01-26 22:00:17.000000000 -0800
+@@ -51,6 +51,7 @@
+ static unsigned char sw_tag_idx_bits;
+ static unsigned char sw_tag_age_bits;
+ static unsigned char page_idx = ULP2_PGIDX_MAX;
++static unsigned int skb_copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
+ static void cxgb3i_ddp_page_init(void)
+ {
+@@ -59,6 +60,10 @@
+       sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
+       sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
++      cxgb3i_log_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
++                                      ISCSI_ITT_MASK, sw_tag_idx_bits,
++                                      ISCSI_AGE_MASK, sw_tag_age_bits);
++
+       for (i = 0; i < ULP2_PGIDX_MAX; i++) {
+               if (PAGE_SIZE == (1UL << ddp_page_shift[i])) {
+                       page_idx = i;
+@@ -312,7 +317,6 @@
+                                page_idx, sgcnt, xferlen, ULP2_DDP_THRESHOLD);
+               return RESERVED_ITT;
+       }
+-      return RESERVED_ITT;
+       gl = ddp_make_gl(xferlen, sgl, sgcnt, gfp);
+       if (!gl) {
+@@ -322,9 +326,9 @@
+       }
+       npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+-      idx_max = ddp->nppods - npods + 1;
++      idx_max = ddp->nppods - npods;
+-      if (ddp->idx_last == ddp->nppods)
++      if (ddp->idx_last >= idx_max)
+               idx = ddp_find_unused_entries(ddp, 0, idx_max, npods, gl);
+       else {
+               idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1, idx_max,
+@@ -345,12 +349,13 @@
+       if (ddp_gl_map(snic->pdev, gl) < 0)
+               goto unmap_sgl;
+-      
+-      tag = sw_tag | (idx << snic->tag_format.rsvd_shift);
++
++      tag = cxgb3i_ddp_tag_base(&snic->tag_format, sw_tag);
++      tag |= idx << PPOD_IDX_SHIFT;
+       hdr.rsvd = 0;
+       hdr.vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
+-      hdr.pgsz_tag_clr = htonl(tag & snic->tag_format.rsvd_tag_mask);
++      hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
+       hdr.maxoffset = htonl(xferlen);
+       hdr.pgoffset = htonl(gl->offset);
+@@ -372,30 +377,35 @@
+       return RESERVED_ITT;
+ }
+-void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag,
+-                          struct scatterlist *sgl, unsigned int sgcnt)
++void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag)
+ {
+-      u32 idx = (tag >> snic->tag_format.rsvd_shift) &
+-                snic->tag_format.rsvd_mask;
++      struct cxgb3i_ddp_info *ddp = snic->ddp;
++      u32 idx;
+-      if (idx < snic->tag_format.rsvd_mask) {
+-              struct cxgb3i_ddp_info *ddp = snic->ddp;
++      if (!ddp) {
++              cxgb3i_log_error("release ddp tag 0x%x, ddp NULL.\n", tag);
++              return;
++      }
++
++      idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
++      if (idx < ddp->nppods) {
+               struct cxgb3i_gather_list *gl = ddp->gl_map[idx];
+               unsigned int npods;
+               if (!gl || !gl->nelem) {
+-                      cxgb3i_log_warn("release tag 0x%x, idx 0x%x, no gl.\n",
+-                                      tag, idx);
++                      cxgb3i_log_error("release tag 0x%x, idx 0x%x, no gl.\n",
++                                       tag, idx);
+                       return;
+               }
+               npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+-
+               cxgb3i_tag_debug("ddp tag 0x%x, release idx 0x%x, npods %u.\n",
+                                tag, idx, npods);
+               clear_ddp_map(snic, idx, npods);
+               ddp_unmark_entries(ddp, idx, npods);
+               ddp_gl_unmap(snic->pdev, gl);
+-      }
++      } else
++               cxgb3i_log_error("ddp tag 0x%x, idx 0x%x > max 0x%x.\n",
++                                                tag, idx, ddp->nppods);
+ }
+ int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *cconn, int hcrc, int dcrc)
+@@ -403,12 +413,18 @@
+       struct iscsi_tcp_conn *tcp_conn = cconn->conn->dd_data;
+       struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
+       struct sk_buff *skb = alloc_skb(sizeof(struct cpl_set_tcb_field),
+-                                      GFP_KERNEL | __GFP_NOFAIL);
++                                      GFP_KERNEL);
+       struct cpl_set_tcb_field *req;
+       u64 val = (hcrc ? 1 : 0) | (dcrc ? 2 : 0);
++      if (!skb)
++              return -ENOMEM;
++
+       if (page_idx < ULP2_PGIDX_MAX)
+               val |= page_idx << 4;
++      else
++              cxgb3i_log_warn("TID 0x%x, host page 0x%lx default to 4K.\n",
++                              c3cn->tid, PAGE_SIZE);
+       /* set up ulp submode and page size */
+       req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req));
+@@ -476,14 +492,14 @@
+                                  (skb_ulp_mode(skb) & ULP2_FLAG_DCRC_ERROR)) ?
+                   ISCSI_SEGMENT_DGST_ERR : 0;
+               if (skb_ulp_mode(skb) & ULP2_FLAG_DATA_DDPED) {
+-                      cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
+-                                       "ddp'ed, itt 0x%x.\n",
+-                                       skb, hdr->opcode & ISCSI_OPCODE_MASK,
+-                                       tcp_conn->in.datalen, hdr->itt);
++                      cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
++                                      "ddp'ed, itt 0x%x.\n",
++                                      skb, hdr->opcode & ISCSI_OPCODE_MASK,
++                                      tcp_conn->in.datalen, hdr->itt);
+                       segment->total_copied = segment->total_size;
+               } else {
+-                      cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
+-                                       "not ddp'ed, itt 0x%x.\n",
++                      cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
++                                      "not ddp'ed, itt 0x%x.\n",
+                                        skb, hdr->opcode & ISCSI_OPCODE_MASK,
+                                        tcp_conn->in.datalen, hdr->itt);
+                       offset += sizeof(struct cpl_iscsi_hdr_norss);
+@@ -520,24 +536,141 @@
+       skb_ulp_mode(skb) = (ULP_MODE_ISCSI << 4) | submode;
+ }
++static int sg_page_coalesce(struct scatterlist *sg, unsigned int offset,
++                          unsigned int dlen, skb_frag_t *frags, int frag_max)
++{
++      unsigned int sglen = sg->length - offset;
++      struct page *page = sg_page(sg);
++      unsigned int datalen = dlen, copy;
++      int i;
++
++      i = 0;
++      do {
++              if (!sglen) {
++                      sg = sg_next(sg);
++                      offset = 0;
++                      sglen = sg->length;
++                      page = sg_page(sg);
++              }
++              copy = min(datalen, sglen);
++              if (i && page == frags[i - 1].page &&
++                  offset + sg->offset ==
++                      frags[i - 1].page_offset + frags[i - 1].size) {
++                      frags[i - 1].size += copy;
++              } else {
++                      if (i >= frag_max) {
++                              cxgb3i_log_error("%s, too many pages > %u, "
++                                               "dlen %u.\n", __func__,
++                                               frag_max, dlen);
++                              return -EINVAL;
++                      }
++
++                      frags[i].page = page;
++                      frags[i].page_offset = sg->offset + offset;
++                      frags[i].size = copy;
++                      i++;
++              }
++              datalen -= copy;
++              offset += copy;
++              sglen -= copy;
++      } while (datalen);
++
++      return i;
++}
++
++static int copy_frags_to_skb_pages(struct sk_buff *skb, skb_frag_t *frags,
++                                 int frag_cnt, unsigned int datalen)
++{
++      struct page *page = NULL;
++      unsigned char *dp;
++      unsigned int pg_left = 0;
++      unsigned int copy_total = 0;
++      int i;
++
++      for (i = 0; i < frag_cnt; i++, frags++) {
++              while (frags->size) {
++                      unsigned char *sp = page_address(frags->page);
++                      unsigned int copy;
++
++                      if (!pg_left) {
++                              int cnt = skb_shinfo(skb)->nr_frags;
++
++                              if (cnt >= MAX_SKB_FRAGS) {
++                                      cxgb3i_log_error("%s: pdu data %u.\n",
++                                                       __func__, datalen);
++                                      return -EINVAL;
++                              }
++                              page = alloc_page(GFP_ATOMIC);
++                              if (!page)
++                                      return -ENOMEM;
++                              dp = page_address(page);
++                              pg_left = PAGE_SIZE;
++
++                              copy = min(pg_left, datalen);
++                              skb_fill_page_desc(skb, cnt, page, 0, copy);
++
++                              skb->len += copy;
++                              skb->data_len += copy;
++                              skb->truesize += copy;
++                              datalen -= copy;
++                      }
++                      copy = min(pg_left, frags->size);
++                      memcpy(dp, sp + frags->page_offset, copy);
++
++                      frags->size -= copy;
++                      frags->page_offset += copy;
++                      dp += copy;
++                      pg_left -= copy;
++                      copy_total += copy;
++              }
++      }
++
++      return copy_total;
++}
++
+ int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *conn)
+ {
+-      struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
++      struct cxgb3i_conn *cconn = conn->dd_data;
++      struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
+       struct iscsi_segment *hdr_seg = &tcp_conn->out.segment;
+       struct iscsi_segment *data_seg = &tcp_conn->out.data_segment;
+       unsigned int hdrlen = hdr_seg->total_size;
+       unsigned int datalen = data_seg->total_size;
+       unsigned int padlen = iscsi_padding(datalen);
+-      unsigned int copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
+-      unsigned int copylen;
++      unsigned int copylen = hdrlen;
++      unsigned int copy_dlen = 0;
+       struct sk_buff *skb;
+       unsigned char *dst;
++      int i, frag_cnt = 0;
+       int err = -EAGAIN;
+-      if (data_seg->data && ((datalen + padlen) < copymax))
+-              copylen = hdrlen + datalen + padlen;
+-      else
+-              copylen = hdrlen;
++      /*
++       * the whole pdu needs to fit into one skb, make sure we don't overrun
++       * the skb's frag_list. If there are more sg pages than MAX_SKB_FRAGS,
++       * we have to copy the data either to the head or newly allocated
++       * whole new page(s). This could happen if the sg contains a lot of
++       * fragmented data chunks (pages).
++       */
++      if (datalen) {
++              if (!data_seg->data) {
++                      err = sg_page_coalesce(data_seg->sg,
++                                              data_seg->sg_offset,
++                                              data_seg->total_size,
++                                              cconn->frags,
++                                              TX_PDU_PAGES_MAX);
++                      if (err < 0)
++                              return err;
++                      frag_cnt = err;
++
++                      if (frag_cnt > MAX_SKB_FRAGS ||
++                          (padlen && frag_cnt + 1 > MAX_SKB_FRAGS))
++                              copy_dlen = datalen + padlen;
++              } else
++                      copy_dlen += datalen + padlen;
++      }
++
++      if (copylen + copy_dlen < skb_copymax)
++              copylen += copy_dlen;
+       /* supports max. 16K pdus, so one skb is enough to hold all the data */
+       skb = alloc_skb(TX_HEADER_LEN + copylen, GFP_ATOMIC);
+@@ -575,70 +708,84 @@
+                       skb->data_len += datalen;
+                       skb->truesize += datalen;
+               }
+-      } else {
+-              struct scatterlist *sg = data_seg->sg;
+-              unsigned int offset = data_seg->sg_offset;
+-              struct page *page = sg_page(sg);
+-              unsigned int sglen = sg->length - offset;
+-
+-              do {
+-                      int i = skb_shinfo(skb)->nr_frags;
+-                      unsigned int copy;
++      } else if (copy_dlen) {
++              /* need to copy the page fragments */
++              if (copylen > hdrlen) {
++                      skb_frag_t *frag = cconn->frags;
+-                      if (!sglen) {
+-                              sg = sg_next(sg);
+-                              page = sg_page(sg);
+-                              offset = 0;
+-                              sglen = sg->length;
++                      /* data fits in the skb's headroom */
++                      for (i = 0; i < frag_cnt; i++, frag++) {
++                              memcpy(dst,
++                                      page_address(frag->page) +
++                                              frag->page_offset,
++                                      frag->size);
++                              dst += frag->size;
+                       }
+-                      copy = min(sglen, datalen);
+-
+-                      if (i && skb_can_coalesce(skb, i, page,
+-                                                sg->offset + offset)) {
+-                              skb_shinfo(skb)->frags[i - 1].size += copy;
+-                      } else {
+-                              get_page(page);
+-                              skb_fill_page_desc(skb, i, page,
+-                                                 sg->offset + offset, copy);
++                      if (padlen)
++                              memset(dst, 0, padlen);
++              } else {
++                      /* allocate pages to hold the data */
++                      err = copy_frags_to_skb_pages(skb, cconn->frags,
++                                                    frag_cnt, datalen);
++                      if (err < 0) {
++                              err = -EAGAIN;
++                              goto free_skb;
+                       }
+-                      skb->len += copy;
+-                      skb->data_len += copy;
+-                      skb->truesize += copy;
+-                      offset += copy;
+-                      sglen -= copy;
+-                      datalen -= copy;
+-              } while (datalen);
+-      }
+-
+-      if (padlen && skb_shinfo(skb)->nr_frags) {
+-              int idx = skb_shinfo(skb)->nr_frags;
+-              get_page(pad_page);
+-              skb_fill_page_desc(skb, idx, pad_page, 0, padlen);
+-              skb->data_len += padlen;
+-              skb->truesize += padlen;
+-              skb->len += padlen;
++                      WARN_ON(err != datalen);
++                      if (padlen) {
++                              skb_frag_t *frag;
++
++                              i = skb_shinfo(skb)->nr_frags;
++                              frag = &skb_shinfo(skb)->frags[i];
++                              dst = page_address(frag->page);
++
++                              memset(dst + frag->page_offset + frag->size,
++                                     0, padlen);
++                              frag->size += padlen;
++                      }
++              }
++      } else {
++              /* sg pages fit into frag_list */
++              for (i = 0; i < frag_cnt; i++)
++                      get_page(cconn->frags[i].page);
++              memcpy(skb_shinfo(skb)->frags, cconn->frags,
++                      sizeof(skb_frag_t) * frag_cnt);
++              skb_shinfo(skb)->nr_frags = frag_cnt;
++              skb->len += datalen;
++              skb->data_len += datalen;
++              skb->truesize += datalen;
++
++              if (padlen) {
++                      i = skb_shinfo(skb)->nr_frags;
++                      get_page(pad_page);
++                      skb_fill_page_desc(skb, i, pad_page, 0, padlen);
++                      skb->len += padlen;
++                      skb->data_len += padlen;
++                      skb->truesize += padlen;
++              }
+       }
+ send_pdu:
+       err = cxgb3i_c3cn_send_pdus((struct s3_conn *)tcp_conn->sock, skb);
+-
+       if (err > 0) {
+               int pdulen = hdrlen + datalen + padlen;
++
+               if (conn->hdrdgst_en)
+                       pdulen += ISCSI_DIGEST_SIZE;
+               if (datalen && conn->datadgst_en)
+                       pdulen += ISCSI_DIGEST_SIZE;
+               hdr_seg->total_copied = hdr_seg->total_size;
+-              if (datalen)
+-                      data_seg->total_copied = data_seg->total_size;
++              data_seg->total_copied = data_seg->total_size;
+               conn->txdata_octets += pdulen;
+               return pdulen;
+       }
++free_skb:
+       kfree_skb(skb);
+       if (err < 0 && err != -EAGAIN) {
+-              cxgb3i_log_error("conn 0x%p, xmit err %d.\n", conn, err);
++              cxgb3i_log_error("conn 0x%p, xmit err %d, skb len %u/%u.\n",
++                               conn, err, skb->len, skb->data_len);
+               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+               return err;
+       }
+@@ -652,6 +799,9 @@
+               return -ENOMEM;
+       memset(page_address(pad_page), 0, PAGE_SIZE);
+       cxgb3i_ddp_page_init();
++      cxgb3i_log_info("skb max. frag %u, head %u.\n",
++                      (unsigned int)MAX_SKB_FRAGS,
++                      (unsigned int)skb_copymax);
+       return 0;
+ }
+@@ -720,7 +870,7 @@
+       read_lock(&c3cn->callback_lock);
+       conn = c3cn->user_data;
+-      if (conn && c3cn->state != C3CN_STATE_ESTABLISHED)
++      if (conn)
+               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+       read_unlock(&c3cn->callback_lock);
+ }
+@@ -730,7 +880,7 @@
+       struct t3cdev *tdev = snic->tdev;
+       struct cxgb3i_ddp_info *ddp;
+       struct ulp_iscsi_info uinfo;
+-      unsigned int ppmax, bits, max_bits;
++      unsigned int ppmax, bits;
+       int i, err;
+       err = tdev->ctl(tdev, ULP_ISCSI_GET_PARAMS, &uinfo);
+@@ -740,26 +890,21 @@
+               return err;
+       }
+-      ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
+-      max_bits = min(PPOD_IDX_MAX_SIZE,
+-                     (32 - sw_tag_idx_bits - sw_tag_age_bits));
+-      bits = __ilog2_u32(ppmax) + 1;
+-      if (bits > max_bits)
+-              bits = max_bits;
+-      ppmax = (1 << bits) - 1;
+-
+       snic->tx_max_size = min_t(unsigned int,
+                                 uinfo.max_txsz, ULP2_MAX_PKT_SIZE);
+       snic->rx_max_size = min_t(unsigned int,
+                                 uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
++      cxgb3i_log_info("ddp max pkt size: %u/%u,%u, %u/%u,%u.\n",
++                      snic->tx_max_size, uinfo.max_txsz, ULP2_MAX_PKT_SIZE,
++                      snic->rx_max_size, uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
+-      snic->tag_format.idx_bits = sw_tag_idx_bits;
+-      snic->tag_format.age_bits = sw_tag_age_bits;
+-      snic->tag_format.rsvd_bits = bits;
+-      snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
+-      snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
+-      snic->tag_format.rsvd_tag_mask =
+-              (1 << (snic->tag_format.rsvd_bits + PPOD_IDX_SHIFT)) - 1;
++      snic->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
++
++      ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
++      bits = __ilog2_u32(ppmax) + 1;
++      if (bits > PPOD_IDX_MAX_SIZE)
++              bits = PPOD_IDX_MAX_SIZE;
++      ppmax = (1 << (bits - 1)) - 1;
+       ddp = cxgb3i_alloc_big_mem(sizeof(struct cxgb3i_ddp_info) +
+                                  ppmax *
+@@ -779,12 +924,15 @@
+       spin_lock_init(&ddp->map_lock);
+       ddp->llimit = uinfo.llimit;
+       ddp->ulimit = uinfo.ulimit;
++      ddp->nppods = ppmax;
++      ddp->idx_last = ppmax;
++      ddp->idx_bits = bits;
++      ddp->idx_mask = (1 << bits) - 1;
++      ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
+-      uinfo.tagmask =
+-          snic->tag_format.rsvd_mask << snic->tag_format.rsvd_shift;
++      uinfo.tagmask = ddp->idx_mask << PPOD_IDX_SHIFT;
+       for (i = 0; i < ULP2_PGIDX_MAX; i++)
+               uinfo.pgsz_factor[i] = ddp_page_order[i];
+-
+       uinfo.ulimit = uinfo.llimit + (ppmax << PPOD_SIZE_SHIFT);
+       err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo);
+@@ -794,19 +942,20 @@
+               goto free_ppod_map;
+       }
+-      ddp->nppods = ppmax;
+-      ddp->idx_last = ppmax;
+-
+       tdev->ulp_iscsi = snic->ddp = ddp;
+-      cxgb3i_log_info("snic nppods %u (0x%x ~ 0x%x), rsvd shift %u, "
+-                      "bits %u, mask 0x%x, 0x%x, pkt %u,%u.\n",
+-                      ppmax, ddp->llimit, ddp->ulimit,
+-                      snic->tag_format.rsvd_shift,
+-                      snic->tag_format.rsvd_bits,
+-                      snic->tag_format.rsvd_mask, uinfo.tagmask,
+-                      snic->tx_max_size, snic->rx_max_size);
++      cxgb3i_log_info("nppods %u (0x%x ~ 0x%x), bits %u, mask 0x%x,0x%x.\n",
++                      ppmax, ddp->llimit, ddp->ulimit, ddp->idx_bits,
++                      ddp->idx_mask, ddp->rsvd_tag_mask);
++      snic->tag_format.rsvd_bits = ddp->idx_bits;
++      snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
++      snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
++
++      cxgb3i_log_info("tag format: sw %u, rsvd %u,%u, mask 0x%x.\n",
++                      snic->tag_format.sw_bits, snic->tag_format.rsvd_bits,
++                      snic->tag_format.rsvd_shift,
++                      snic->tag_format.rsvd_mask);
+       return 0;
+ free_ppod_map:
+diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h
+--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h   2009-01-14 15:17:57.000000000 -0800
++++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h   2009-01-26 22:00:17.000000000 -0800
+@@ -13,7 +13,6 @@
+ #ifndef __CXGB3I_ULP2_H__
+ #define __CXGB3I_ULP2_H__
+-#define ULP2_PDU_PAYLOAD_DFLT (16224 - ISCSI_PDU_HEADER_MAX)
+ #define PPOD_PAGES_MAX                4
+ #define PPOD_PAGES_SHIFT      2       /* 4 pages per pod */
+@@ -100,9 +99,6 @@
+ #define ULP2_FLAG_DCRC_ERROR          0x20
+ #define ULP2_FLAG_PAD_ERROR           0x40
+-#define ULP2_MAX_PKT_SIZE             16224
+-#define ULP2_MAX_PDU_SIZE             8192
+-
+ void cxgb3i_conn_closing(struct s3_conn *);
+ void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn);
+ void cxgb3i_conn_tx_open(struct s3_conn *c3cn);