]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.drivers/cxgb3i-fix-skb-overrun
Move xen patchset to new version's subdir.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.drivers / cxgb3i-fix-skb-overrun
CommitLineData
00e5a55c
BS
1Subject: cxgb3i - fixes over-run of skb MAX_SKB_FRAGS
2From: Karen Xie <kxie@chelsio.com>
3References: bnc#468314
4
5This patch fixes the over-run of skb's MAX_SKB_FRAGS between the cxgb3i and
6cxgb3 driver on PPC64 systems.
7
8Signed-off-by: Karen Xie <kxie@chelsio.com>
9Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
10Acked-by: Hannes Reinecke <hare@suse.de>
11---
12
13diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h
14--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h 2009-01-14 15:17:57.000000000 -0800
15+++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h 2009-01-26 22:00:17.000000000 -0800
16@@ -36,6 +36,12 @@
17 #define CXGB3I_MAX_TARGET CXGB3I_MAX_CONN
18 #define CXGB3I_MAX_LUN 512
19 #define ISCSI_PDU_HEADER_MAX (56 + 256) /* bhs + digests + ahs */
20+#define ULP2_MAX_PKT_SIZE 16224
21+#define ISCSI_PDU_NONPAYLOAD_MAX \
22+ (sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + 2*ISCSI_DIGEST_SIZE)
23+#define ULP2_MAX_PDU_PAYLOAD \
24+ (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_MAX)
25+
26
27 struct cxgb3i_adapter;
28 struct cxgb3i_hba;
29@@ -53,12 +59,11 @@
30 *
31 */
32 struct cxgb3i_tag_format {
33- unsigned char idx_bits;
34- unsigned char age_bits;
35+ unsigned char sw_bits;
36 unsigned char rsvd_bits;
37 unsigned char rsvd_shift;
38+ unsigned char filler[1];
39 u32 rsvd_mask;
40- u32 rsvd_tag_mask;
41 };
42
43 /**
44@@ -95,11 +100,137 @@
45 unsigned int ulimit;
46 unsigned int nppods;
47 unsigned int idx_last;
48+ unsigned char idx_bits;
49+ unsigned char filler[3];
50+ u32 idx_mask;
51+ u32 rsvd_tag_mask;
52 spinlock_t map_lock;
53 struct cxgb3i_gather_list **gl_map;
54 struct sk_buff **gl_skb;
55 };
56
57+/*
58+ * cxgb3i ddp tag are 32 bits, it consists of reserved bits used by h/w and
59+ * non-reserved bits that can be used by the iscsi s/w.
60+ * The reserved bits are identified by the rsvd_bits and rsvd_shift fields
61+ * in struct cxgb3i_tag_format.
62+ *
63+ * The upper most reserved bit can be used to check if a tag is ddp tag or not:
64+ * if the bit is 0, the tag is a valid ddp tag
65+ */
66+
67+/**
68+ * cxgb3i_is_ddp_tag - check if a given tag is a hw/ddp tag
69+ * @tformat: tag format information
70+ * @tag: tag to be checked
71+ *
72+ * return true if the tag is a ddp tag, false otherwise.
73+ */
74+static inline int cxgb3i_is_ddp_tag(struct cxgb3i_tag_format *tformat, u32 tag)
75+{
76+ return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)));
77+}
78+
79+/**
80+ * cxgb3i_sw_tag_usable - check if a given s/w tag has enough bits left for
81+ * the reserved/hw bits
82+ * @tformat: tag format information
83+ * @sw_tag: s/w tag to be checked
84+ *
85+ * return true if the tag is a ddp tag, false otherwise.
86+ */
87+static inline int cxgb3i_sw_tag_usable(struct cxgb3i_tag_format *tformat,
88+ u32 sw_tag)
89+{
90+ sw_tag >>= (32 - tformat->rsvd_bits);
91+ return !sw_tag;
92+}
93+
94+/**
95+ * cxgb3i_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag
96+ * @tformat: tag format information
97+ * @sw_tag: s/w tag to be checked
98+ *
99+ * insert 1 at the upper most reserved bit to mark it as an invalid ddp tag.
100+ */
101+static inline u32 cxgb3i_set_non_ddp_tag(struct cxgb3i_tag_format *tformat,
102+ u32 sw_tag)
103+{
104+ unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
105+ u32 mask = (1 << shift) - 1;
106+
107+ if (sw_tag && (sw_tag & ~mask)) {
108+ u32 v1 = sw_tag & ((1 << shift) - 1);
109+ u32 v2 = (sw_tag >> (shift - 1)) << shift;
110+
111+ return v2 | v1 | 1 << shift;
112+ }
113+ return sw_tag | 1 << shift;
114+}
115+
116+/**
117+ * cxgb3i_ddp_tag_base - shift the s/w tag bits so that reserved bits are not
118+ * used.
119+ * @tformat: tag format information
120+ * @sw_tag: s/w tag to be checked
121+ */
122+static inline u32 cxgb3i_ddp_tag_base(struct cxgb3i_tag_format *tformat,
123+ u32 sw_tag)
124+{
125+ u32 mask = (1 << tformat->rsvd_shift) - 1;
126+
127+ if (sw_tag && (sw_tag & ~mask)) {
128+ u32 v1 = sw_tag & mask;
129+ u32 v2 = sw_tag >> tformat->rsvd_shift;
130+
131+ v2 <<= tformat->rsvd_shift + tformat->rsvd_bits;
132+ return v2 | v1;
133+ }
134+ return sw_tag;
135+}
136+
137+/**
138+ * cxgb3i_tag_rsvd_bits - get the reserved bits used by the h/w
139+ * @tformat: tag format information
140+ * @tag: tag to be checked
141+ *
142+ * return the reserved bits in the tag
143+ */
144+static inline u32 cxgb3i_tag_rsvd_bits(struct cxgb3i_tag_format *tformat,
145+ u32 tag)
146+{
147+ if (cxgb3i_is_ddp_tag(tformat, tag))
148+ return (tag >> tformat->rsvd_shift) & tformat->rsvd_mask;
149+ return 0;
150+}
151+
152+/**
153+ * cxgb3i_tag_nonrsvd_bits - get the non-reserved bits used by the s/w
154+ * @tformat: tag format information
155+ * @tag: tag to be checked
156+ *
157+ * return the non-reserved bits in the tag.
158+ */
159+static inline u32 cxgb3i_tag_nonrsvd_bits(struct cxgb3i_tag_format *tformat,
160+ u32 tag)
161+{
162+ unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
163+ u32 v1, v2;
164+
165+ if (cxgb3i_is_ddp_tag(tformat, tag)) {
166+ v1 = tag & ((1 << tformat->rsvd_shift) - 1);
167+ v2 = (tag >> (shift + 1)) << tformat->rsvd_shift;
168+ } else {
169+ u32 mask = (1 << shift) - 1;
170+
171+ tag &= ~(1 << shift);
172+ v1 = tag & mask;
173+ v2 = (tag >> 1) & ~mask;
174+ }
175+ return v1 | v2;
176+}
177+
178+
179 /**
180 * struct cxgb3i_hba - cxgb3i iscsi structure (per port)
181 *
182@@ -146,16 +277,22 @@
183 * struct cxgb3i_conn - cxgb3i iscsi connection
184 *
185 * @tcp_conn: pointer to iscsi_tcp_conn structure
186- * @listhead: list head to link elements
187+ * @list_head: list head to link elements
188+ * @cep: pointer to iscsi_endpoint structure
189 * @conn: pointer to iscsi_conn structure
190 * @hba: pointer to the hba this conn. is going through
191+ * @task_idx_bits: # of bits needed for session->cmds_max
192+ * @frags: temp. holding area for tx coalesced sg list pages.
193 */
194+#define TX_PDU_PAGES_MAX (16384/512 + 1)
195 struct cxgb3i_conn {
196 struct iscsi_tcp_conn tcp_conn;
197 struct list_head list_head;
198 struct cxgb3i_endpoint *cep;
199 struct iscsi_conn *conn;
200 struct cxgb3i_hba *hba;
201+ unsigned int task_idx_bits;
202+ skb_frag_t frags[TX_PDU_PAGES_MAX];
203 };
204
205 /**
206@@ -190,8 +327,7 @@
207 int cxgb3i_ulp2_init(void);
208 void cxgb3i_ulp2_cleanup(void);
209 int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *, int, int);
210-void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32,
211- struct scatterlist *, unsigned int);
212+void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32);
213 u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *, unsigned int,
214 u32, unsigned int, struct scatterlist *,
215 unsigned int, int);
216diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c
217--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c 2009-01-14 15:17:57.000000000 -0800
218+++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c 2009-01-26 22:00:17.000000000 -0800
219@@ -11,8 +11,8 @@
220
221 #include "cxgb3i.h"
222
223-#define DRV_MODULE_NAME "cxgb3i"
224-#define DRV_MODULE_VERSION "0.1.0"
225+#define DRV_MODULE_NAME "cxgb3i"
226+#define DRV_MODULE_VERSION "0.9.0"
227 #define DRV_MODULE_RELDATE "Jun. 1, 2008"
228
229 static char version[] =
230diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
231--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c 2009-01-14 15:17:57.000000000 -0800
232+++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c 2009-01-26 22:00:17.000000000 -0800
233@@ -36,10 +36,10 @@
234 #define cxgb3i_api_debug(fmt...)
235 #endif
236
237-#define align_to_4k_boundary(n) \
238- do { \
239- n = (n) & ~((1 << 12) - 1); \
240- } while(0)
241+/*
242+ * align pdu size to multiple of 512 for better performance
243+ */
244+#define align_pdu_size(n) do { n = (n) & (~511); } while (0)
245
246 static struct scsi_transport_template *cxgb3i_scsi_transport;
247 static struct scsi_host_template cxgb3i_host_template;
248@@ -102,7 +102,7 @@
249 struct cxgb3i_adapter *snic;
250
251 /* remove from the list */
252- read_lock(&cxgb3i_snic_rwlock);
253+ write_lock(&cxgb3i_snic_rwlock);
254 list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
255 if (snic->tdev == t3dev) {
256 list_del(&snic->list_head);
257@@ -295,6 +295,8 @@
258 * stop the xmit path so the xmit_segment function is
259 * not being called
260 */
261+ iscsi_suspend_tx(cconn->conn);
262+
263 write_lock_bh(&cep->c3cn->callback_lock);
264 set_bit(ISCSI_SUSPEND_BIT, &cconn->conn->suspend_rx);
265 cep->c3cn->user_data = NULL;
266@@ -391,20 +393,17 @@
267 static inline int cxgb3i_conn_max_xmit_dlength(struct iscsi_conn *conn)
268 {
269 struct cxgb3i_conn *cconn = conn->dd_data;
270- unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
271- cconn->hba->snic->tx_max_size -
272- ISCSI_PDU_HEADER_MAX);
273-
274- cxgb3i_api_debug("conn 0x%p, max xmit %u.\n",
275- conn, conn->max_xmit_dlength);
276+ unsigned int max = min_t(unsigned int,
277+ ULP2_MAX_PDU_PAYLOAD,
278+ cconn->hba->snic->tx_max_size -
279+ ISCSI_PDU_NONPAYLOAD_MAX);
280
281 if (conn->max_xmit_dlength)
282 conn->max_xmit_dlength = min_t(unsigned int,
283- conn->max_xmit_dlength, max);
284+ conn->max_xmit_dlength, max);
285 else
286 conn->max_xmit_dlength = max;
287-
288- align_to_4k_boundary(conn->max_xmit_dlength);
289+ align_pdu_size(conn->max_xmit_dlength);
290
291 cxgb3i_api_debug("conn 0x%p, set max xmit %u.\n",
292 conn, conn->max_xmit_dlength);
293@@ -415,14 +414,10 @@
294 static inline int cxgb3i_conn_max_recv_dlength(struct iscsi_conn *conn)
295 {
296 struct cxgb3i_conn *cconn = conn->dd_data;
297- unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
298- cconn->hba->snic->rx_max_size -
299- ISCSI_PDU_HEADER_MAX);
300-
301- cxgb3i_api_debug("conn 0x%p, max recv %u.\n",
302- conn, conn->max_recv_dlength);
303-
304- align_to_4k_boundary(max);
305+ unsigned int max = min_t(unsigned int,
306+ ULP2_MAX_PDU_PAYLOAD,
307+ cconn->hba->snic->tx_max_size -
308+ ISCSI_PDU_NONPAYLOAD_MAX);
309
310 if (conn->max_recv_dlength) {
311 if (conn->max_recv_dlength > max) {
312@@ -433,9 +428,9 @@
313 }
314 conn->max_recv_dlength = min_t(unsigned int,
315 conn->max_recv_dlength, max);
316- align_to_4k_boundary(conn->max_recv_dlength);
317 } else
318 conn->max_recv_dlength = max;
319+ align_pdu_size(conn->max_recv_dlength);
320
321 cxgb3i_api_debug("conn 0x%p, set max recv %u.\n",
322 conn, conn->max_recv_dlength);
323@@ -516,12 +511,14 @@
324
325 cep = ep->dd_data;
326 c3cn = cep->c3cn;
327+ /* calculate the tag idx bits needed for this conn based on cmds_max */
328+ cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
329
330- read_lock(&c3cn->callback_lock);
331+ write_lock(&c3cn->callback_lock);
332 /* mnc: TODO don't abuse iscsi_tcp fields */
333 tcp_conn->sock = (struct socket *)c3cn;
334 c3cn->user_data = conn;
335- read_unlock(&c3cn->callback_lock);
336+ write_unlock(&c3cn->callback_lock);
337
338 cconn->hba = cep->hba;
339 cconn->cep = cep;
340@@ -609,11 +606,13 @@
341 return -ENOMEM;
342 case ISCSI_PARAM_MAX_RECV_DLENGTH:
343 err = iscsi_set_param(cls_conn, param, buf, buflen);
344- err = cxgb3i_conn_max_recv_dlength(conn);
345+ if (!err)
346+ err = cxgb3i_conn_max_recv_dlength(conn);
347 break;
348 case ISCSI_PARAM_MAX_XMIT_DLENGTH:
349 err = iscsi_set_param(cls_conn, param, buf, buflen);
350- err = cxgb3i_conn_max_xmit_dlength(conn);
351+ if (!err)
352+ err = cxgb3i_conn_max_xmit_dlength(conn);
353 break;
354 default:
355 return iscsi_set_param(cls_conn, param, buf, buflen);
356@@ -718,49 +717,23 @@
357 stats->custom[0].value = conn->eh_abort_cnt;
358 }
359
360-static inline u32 tag_base(struct cxgb3i_tag_format *format,
361- unsigned int idx, unsigned int age)
362-{
363- u32 sw_bits = idx | (age << format->idx_bits);
364- u32 tag = sw_bits >> format->rsvd_shift;
365-
366- tag <<= format->rsvd_bits + format->rsvd_shift;
367- tag |= sw_bits & ((1 << format->rsvd_shift) - 1);
368- return tag;
369-}
370-
371-static inline void cxgb3i_parse_tag(struct cxgb3i_tag_format *format,
372- u32 tag, u32 *rsvd_bits, u32 *sw_bits)
373-{
374- if (rsvd_bits)
375- *rsvd_bits = (tag >> format->rsvd_shift) & format->rsvd_mask;
376- if (sw_bits) {
377- *sw_bits = (tag >> (format->rsvd_shift + format->rsvd_bits))
378- << format->rsvd_shift;
379- *sw_bits |= tag & ((1 << format->rsvd_shift) - 1);
380- }
381-
382- cxgb3i_tag_debug("parse tag 0x%x, rsvd 0x%x, sw 0x%x.\n",
383- tag, rsvd_bits ? *rsvd_bits : 0xFFFFFFFF,
384- sw_bits ? *sw_bits : 0xFFFFFFFF);
385-}
386-
387-
388 static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt,
389 int *idx, int *age)
390 {
391 struct cxgb3i_conn *cconn = conn->dd_data;
392 struct cxgb3i_adapter *snic = cconn->hba->snic;
393+ u32 tag = itt;
394 u32 sw_bits;
395
396- cxgb3i_parse_tag(&snic->tag_format, itt, NULL, &sw_bits);
397+ sw_bits = cxgb3i_tag_nonrsvd_bits(&snic->tag_format, tag);
398 if (idx)
399- *idx = sw_bits & ISCSI_ITT_MASK;
400+ *idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
401 if (age)
402- *age = (sw_bits >> snic->tag_format.idx_bits) & ISCSI_AGE_MASK;
403+ *age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
404
405- cxgb3i_tag_debug("parse itt 0x%x, idx 0x%x, age 0x%x.\n",
406- itt, idx ? *idx : 0xFFFFF, age ? *age : 0xFF);
407+ cxgb3i_tag_debug("parse tag 0x%x/0x%x, sw 0x%x, idx 0x%x, age 0x%x.\n",
408+ tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
409+ age ? *age : 0xFF);
410 }
411
412 static int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
413@@ -771,26 +744,40 @@
414 struct cxgb3i_conn *cconn = conn->dd_data;
415 struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
416 struct cxgb3i_adapter *snic = cconn->hba->snic;
417- u32 sw_tag = tag_base(&snic->tag_format, task->itt, sess->age);
418+ struct cxgb3i_tag_format *tformat = &snic->tag_format;
419+ u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
420 u32 tag = RESERVED_ITT;
421
422- if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) {
423+ if (sc && (scsi_bidi_cmnd(sc) ||
424+ sc->sc_data_direction == DMA_FROM_DEVICE) &&
425+ cxgb3i_sw_tag_usable(tformat, sw_tag)) {
426 struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
427- tag =
428- cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
429+ tag = cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
430 scsi_in(sc)->length,
431 scsi_in(sc)->table.sgl,
432 scsi_in(sc)->table.nents,
433 GFP_ATOMIC);
434 }
435 if (tag == RESERVED_ITT)
436- tag = sw_tag | (snic->tag_format.rsvd_mask <<
437- snic->tag_format.rsvd_shift);
438+ tag = cxgb3i_set_non_ddp_tag(tformat, sw_tag);
439+ /* the itt need to sent in big-endian order */
440 *hdr_itt = htonl(tag);
441
442- cxgb3i_tag_debug("new tag 0x%x/0x%x (itt 0x%x, age 0x%x).\n",
443- tag, *hdr_itt, task->itt, sess->age);
444-
445+ if (sc) {
446+ if (sc->sc_data_direction == DMA_FROM_DEVICE)
447+ cxgb3i_tag_debug("read, len %u, tag 0x%x/0x%x "
448+ "(itt 0x%x, age 0x%x, sw 0x%x).\n",
449+ scsi_in(sc)->length, tag, *hdr_itt,
450+ task->itt, sess->age, sw_tag);
451+ else
452+ cxgb3i_tag_debug("write, len %u, tag 0x%x/0x%x "
453+ "(itt 0x%x, age 0x%x, sw 0x%x).\n",
454+ scsi_out(sc)->length, tag, *hdr_itt,
455+ task->itt, sess->age, sw_tag);
456+ } else
457+ cxgb3i_tag_debug("ctrl, tag 0x%x/0x%x (itt 0x%x, age 0x%x, "
458+ "sw 0x%x).\n",
459+ tag, *hdr_itt, task->itt, sess->age, sw_tag);
460 return 0;
461 }
462
463@@ -800,14 +787,15 @@
464 struct iscsi_conn *conn = task->conn;
465 struct cxgb3i_conn *cconn = conn->dd_data;
466 struct cxgb3i_adapter *snic = cconn->hba->snic;
467+ struct cxgb3i_tag_format *tformat = &snic->tag_format;
468 u32 tag = ntohl(hdr_itt);
469
470- cxgb3i_tag_debug("release tag 0x%x.\n", tag);
471+ cxgb3i_tag_debug("release %s tag 0x%x.\n", sc ? "scsi" : "ctrl", tag);
472
473- if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE))
474- cxgb3i_ddp_tag_release(snic, tag,
475- scsi_in(sc)->table.sgl,
476- scsi_in(sc)->table.nents);
477+ if (sc && (scsi_bidi_cmnd(sc) ||
478+ sc->sc_data_direction == DMA_FROM_DEVICE) &&
479+ cxgb3i_is_ddp_tag(tformat, tag))
480+ cxgb3i_ddp_tag_release(snic, tag);
481 }
482
483 /**
484@@ -820,7 +808,7 @@
485 .proc_name = "cxgb3i",
486 .queuecommand = iscsi_queuecommand,
487 .change_queue_depth = iscsi_change_queue_depth,
488- .can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1),
489+ .can_queue = CXGB3I_SCSI_QDEPTH_DFLT - 1,
490 .sg_tablesize = SG_ALL,
491 .max_sectors = 0xFFFF,
492 .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
493diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c
494--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c 2009-01-14 15:17:57.000000000 -0800
495+++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c 2009-01-26 22:00:17.000000000 -0800
496@@ -22,19 +22,19 @@
497 #include "cxgb3i_ulp2.h"
498
499 #ifdef __DEBUG_C3CN_CONN__
500-#define c3cn_conn_debug cxgb3i_log_debug
501+#define c3cn_conn_debug cxgb3i_log_debug
502 #else
503 #define c3cn_conn_debug(fmt...)
504 #endif
505
506 #ifdef __DEBUG_C3CN_TX__
507-#define c3cn_tx_debug cxgb3i_log_debug
508+#define c3cn_tx_debug cxgb3i_log_debug
509 #else
510 #define c3cn_tx_debug(fmt...)
511 #endif
512
513 #ifdef __DEBUG_C3CN_RX__
514-#define c3cn_rx_debug cxgb3i_log_debug
515+#define c3cn_rx_debug cxgb3i_log_debug
516 #else
517 #define c3cn_rx_debug(fmt...)
518 #endif
519@@ -42,9 +42,9 @@
520 /*
521 * module parameters releated to offloaded iscsi connection
522 */
523-static int cxgb3_rcv_win = 256 * 1024;
524+static int cxgb3_rcv_win = 128 * 1024;
525 module_param(cxgb3_rcv_win, int, 0644);
526-MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=256KB)");
527+MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=128KB)");
528
529 static int cxgb3_snd_win = 64 * 1024;
530 module_param(cxgb3_snd_win, int, 0644);
531@@ -456,12 +456,9 @@
532 * The number of WRs needed for an skb depends on the number of fragments
533 * in the skb and whether it has any payload in its main body. This maps the
534 * length of the gather list represented by an skb into the # of necessary WRs.
535- *
536- * The max. length of an skb is controlled by the max pdu size which is ~16K.
537- * Also, assume the min. fragment length is the sector size (512), then add
538- * extra fragment counts for iscsi bhs and payload padding.
539+ * The extra two fragments are for iscsi bhs and payload padding.
540 */
541-#define SKB_WR_LIST_SIZE (16384/512 + 3)
542+#define SKB_WR_LIST_SIZE (MAX_SKB_FRAGS + 2)
543 static unsigned int skb_wrs[SKB_WR_LIST_SIZE] __read_mostly;
544
545 static void s3_init_wr_tab(unsigned int wr_len)
546@@ -484,7 +481,7 @@
547
548 static inline void reset_wr_list(struct s3_conn *c3cn)
549 {
550- c3cn->wr_pending_head = NULL;
551+ c3cn->wr_pending_head = c3cn->wr_pending_tail = NULL;
552 }
553
554 /*
555@@ -495,7 +492,7 @@
556 static inline void enqueue_wr(struct s3_conn *c3cn,
557 struct sk_buff *skb)
558 {
559- skb->sp = NULL;
560+ skb_wr_next(skb) = NULL;
561
562 /*
563 * We want to take an extra reference since both us and the driver
564@@ -508,10 +505,22 @@
565 if (!c3cn->wr_pending_head)
566 c3cn->wr_pending_head = skb;
567 else
568- c3cn->wr_pending_tail->sp = (void *)skb;
569+ skb_wr_next(c3cn->wr_pending_tail) = skb;
570 c3cn->wr_pending_tail = skb;
571 }
572
573+static int count_pending_wrs(struct s3_conn *c3cn)
574+{
575+ int n = 0;
576+ const struct sk_buff *skb = c3cn->wr_pending_head;
577+
578+ while (skb) {
579+ n += skb->csum;
580+ skb = skb_wr_next(skb);
581+ }
582+ return n;
583+}
584+
585 static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn)
586 {
587 return c3cn->wr_pending_head;
588@@ -528,8 +537,8 @@
589
590 if (likely(skb)) {
591 /* Don't bother clearing the tail */
592- c3cn->wr_pending_head = (struct sk_buff *)skb->sp;
593- skb->sp = NULL;
594+ c3cn->wr_pending_head = skb_wr_next(skb);
595+ skb_wr_next(skb) = NULL;
596 }
597 return skb;
598 }
599@@ -542,13 +551,15 @@
600 }
601
602 static inline void make_tx_data_wr(struct s3_conn *c3cn, struct sk_buff *skb,
603- int len)
604+ int len, int req_completion)
605 {
606 struct tx_data_wr *req;
607
608 skb_reset_transport_header(skb);
609 req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
610- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
611+
612+ req->wr_hi = htonl((V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)) |
613+ (req_completion ? F_WR_COMPL : 0));
614 req->wr_lo = htonl(V_WR_TID(c3cn->tid));
615 req->sndseq = htonl(c3cn->snd_nxt);
616 /* len includes the length of any HW ULP additions */
617@@ -556,11 +567,11 @@
618 req->param = htonl(V_TX_PORT(c3cn->l2t->smt_idx));
619 /* V_TX_ULP_SUBMODE sets both the mode and submode */
620 req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) |
621- V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
622+ V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
623
624 if (!c3cn_flag(c3cn, C3CN_TX_DATA_SENT)) {
625- req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
626- V_TX_CPU_IDX(c3cn->qset));
627+ req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
628+ V_TX_CPU_IDX(c3cn->qset));
629 /* Sendbuffer is in units of 32KB. */
630 req->param |= htonl(V_TX_SNDBUF(cxgb3_snd_win >> 15));
631 c3cn_set_flag(c3cn, C3CN_TX_DATA_SENT);
632@@ -591,7 +602,7 @@
633
634 if (unlikely(c3cn->state == C3CN_STATE_CONNECTING ||
635 c3cn->state == C3CN_STATE_CLOSE_WAIT_1 ||
636- c3cn->state == C3CN_STATE_ABORTING)) {
637+ c3cn->state >= C3CN_STATE_ABORTING)) {
638 c3cn_tx_debug("c3cn 0x%p, in closing state %u.\n",
639 c3cn, c3cn->state);
640 return 0;
641@@ -626,19 +637,22 @@
642 c3cn->wr_unacked += wrs_needed;
643 enqueue_wr(c3cn, skb);
644
645- if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
646- len += ulp_extra_len(skb);
647- make_tx_data_wr(c3cn, skb, len);
648- c3cn->snd_nxt += len;
649- if ((req_completion
650- && c3cn->wr_unacked == wrs_needed)
651- || (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL)
652- || c3cn->wr_unacked >= c3cn->wr_max / 2) {
653- struct work_request_hdr *wr = cplhdr(skb);
654+ c3cn_tx_debug("c3cn 0x%p, enqueue, skb len %u/%u, frag %u, "
655+ "wr %d, left %u, unack %u.\n",
656+ c3cn, skb->len, skb->data_len, frags,
657+ wrs_needed, c3cn->wr_avail, c3cn->wr_unacked);
658
659- wr->wr_hi |= htonl(F_WR_COMPL);
660+ if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
661+ if ((req_completion &&
662+ c3cn->wr_unacked == wrs_needed) ||
663+ (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL) ||
664+ c3cn->wr_unacked >= c3cn->wr_max / 2) {
665+ req_completion = 1;
666 c3cn->wr_unacked = 0;
667 }
668+ len += ulp_extra_len(skb);
669+ make_tx_data_wr(c3cn, skb, len, req_completion);
670+ c3cn->snd_nxt += len;
671 CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR;
672 }
673
674@@ -1153,12 +1167,28 @@
675 * Process an acknowledgment of WR completion. Advance snd_una and send the
676 * next batch of work requests from the write queue.
677 */
678+
679+static void check_wr_invariants(struct s3_conn *c3cn)
680+{
681+ int pending = count_pending_wrs(c3cn);
682+
683+ if (unlikely(c3cn->wr_avail + pending != c3cn->wr_max))
684+ cxgb3i_log_error("TID %u: credit imbalance: avail %u, "
685+ "pending %u, total should be %u\n",
686+ c3cn->tid, c3cn->wr_avail, pending,
687+ c3cn->wr_max);
688+}
689+
690 static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
691 {
692 struct cpl_wr_ack *hdr = cplhdr(skb);
693 unsigned int credits = ntohs(hdr->credits);
694 u32 snd_una = ntohl(hdr->snd_una);
695
696+ c3cn_tx_debug("%u WR credits, avail %u, unack %u, TID %u, state %u.\n",
697+ credits, c3cn->wr_avail, c3cn->wr_unacked,
698+ c3cn->tid, c3cn->state);
699+
700 c3cn->wr_avail += credits;
701 if (c3cn->wr_unacked > c3cn->wr_max - c3cn->wr_avail)
702 c3cn->wr_unacked = c3cn->wr_max - c3cn->wr_avail;
703@@ -1173,6 +1203,17 @@
704 break;
705 }
706 if (unlikely(credits < p->csum)) {
707+ struct tx_data_wr *w = cplhdr(p);
708+ cxgb3i_log_error("TID %u got %u WR credits need %u, "
709+ "len %u, main body %u, frags %u, "
710+ "seq # %u, ACK una %u, ACK nxt %u, "
711+ "WR_AVAIL %u, WRs pending %u\n",
712+ c3cn->tid, credits, p->csum, p->len,
713+ p->len - p->data_len,
714+ skb_shinfo(p)->nr_frags,
715+ ntohl(w->sndseq), snd_una,
716+ ntohl(hdr->snd_nxt), c3cn->wr_avail,
717+ count_pending_wrs(c3cn) - credits);
718 p->csum -= credits;
719 break;
720 } else {
721@@ -1182,8 +1223,14 @@
722 }
723 }
724
725- if (unlikely(before(snd_una, c3cn->snd_una)))
726+ check_wr_invariants(c3cn);
727+
728+ if (unlikely(before(snd_una, c3cn->snd_una))) {
729+ cxgb3i_log_error("TID %u, unexpected sequence # %u in WR_ACK "
730+ "snd_una %u\n",
731+ c3cn->tid, snd_una, c3cn->snd_una);
732 goto out_free;
733+ }
734
735 if (c3cn->snd_una != snd_una) {
736 c3cn->snd_una = snd_una;
737@@ -1454,11 +1501,14 @@
738 struct dst_entry *dst)
739 {
740 BUG_ON(c3cn->cdev != cdev);
741- c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs;
742+ c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs - 1;
743 c3cn->wr_unacked = 0;
744 c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst));
745
746 reset_wr_list(c3cn);
747+
748+ c3cn_conn_debug("c3cn 0x%p, wr max %u, avail %u.\n",
749+ c3cn, c3cn->wr_max, c3cn->wr_avail);
750 }
751
752 static int initiate_act_open(struct s3_conn *c3cn, struct net_device *dev)
753@@ -1673,9 +1723,17 @@
754 goto out_err;
755 }
756
757- err = -EPIPE;
758 if (c3cn->err) {
759 c3cn_tx_debug("c3cn 0x%p, err %d.\n", c3cn, c3cn->err);
760+ err = -EPIPE;
761+ goto out_err;
762+ }
763+
764+ if (c3cn->write_seq - c3cn->snd_una >= cxgb3_snd_win) {
765+ c3cn_tx_debug("c3cn 0x%p, snd %u - %u > %u.\n",
766+ c3cn, c3cn->write_seq, c3cn->snd_una,
767+ cxgb3_snd_win);
768+ err = -EAGAIN;
769 goto out_err;
770 }
771
772diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h
773--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h 2009-01-14 15:17:57.000000000 -0800
774+++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h 2009-01-26 22:00:17.000000000 -0800
775@@ -180,7 +180,7 @@
776 * @seq: tcp sequence number
777 * @ddigest: pdu data digest
778 * @pdulen: recovered pdu length
779- * @ulp_data: scratch area for ULP
780+ * @wr_next: scratch area for tx wr
781 */
782 struct cxgb3_skb_cb {
783 __u8 flags;
784@@ -188,7 +188,7 @@
785 __u32 seq;
786 __u32 ddigest;
787 __u32 pdulen;
788- __u8 ulp_data[16];
789+ struct sk_buff *wr_next;
790 };
791
792 #define CXGB3_SKB_CB(skb) ((struct cxgb3_skb_cb *)&((skb)->cb[0]))
793@@ -196,7 +196,7 @@
794 #define skb_ulp_mode(skb) (CXGB3_SKB_CB(skb)->ulp_mode)
795 #define skb_ulp_ddigest(skb) (CXGB3_SKB_CB(skb)->ddigest)
796 #define skb_ulp_pdulen(skb) (CXGB3_SKB_CB(skb)->pdulen)
797-#define skb_ulp_data(skb) (CXGB3_SKB_CB(skb)->ulp_data)
798+#define skb_wr_next(skb) (CXGB3_SKB_CB(skb)->wr_next)
799
800 enum c3cb_flags {
801 C3CB_FLAG_NEED_HDR = 1 << 0, /* packet needs a TX_DATA_WR header */
802diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c
803--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c 2009-01-14 15:17:57.000000000 -0800
804+++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c 2009-01-26 22:00:17.000000000 -0800
805@@ -51,6 +51,7 @@
806 static unsigned char sw_tag_idx_bits;
807 static unsigned char sw_tag_age_bits;
808 static unsigned char page_idx = ULP2_PGIDX_MAX;
809+static unsigned int skb_copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
810
811 static void cxgb3i_ddp_page_init(void)
812 {
813@@ -59,6 +60,10 @@
814 sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
815 sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
816
817+ cxgb3i_log_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
818+ ISCSI_ITT_MASK, sw_tag_idx_bits,
819+ ISCSI_AGE_MASK, sw_tag_age_bits);
820+
821 for (i = 0; i < ULP2_PGIDX_MAX; i++) {
822 if (PAGE_SIZE == (1UL << ddp_page_shift[i])) {
823 page_idx = i;
824@@ -312,7 +317,6 @@
825 page_idx, sgcnt, xferlen, ULP2_DDP_THRESHOLD);
826 return RESERVED_ITT;
827 }
828- return RESERVED_ITT;
829
830 gl = ddp_make_gl(xferlen, sgl, sgcnt, gfp);
831 if (!gl) {
832@@ -322,9 +326,9 @@
833 }
834
835 npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
836- idx_max = ddp->nppods - npods + 1;
837+ idx_max = ddp->nppods - npods;
838
839- if (ddp->idx_last == ddp->nppods)
840+ if (ddp->idx_last >= idx_max)
841 idx = ddp_find_unused_entries(ddp, 0, idx_max, npods, gl);
842 else {
843 idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1, idx_max,
844@@ -345,12 +349,13 @@
845
846 if (ddp_gl_map(snic->pdev, gl) < 0)
847 goto unmap_sgl;
848-
849- tag = sw_tag | (idx << snic->tag_format.rsvd_shift);
850+
851+ tag = cxgb3i_ddp_tag_base(&snic->tag_format, sw_tag);
852+ tag |= idx << PPOD_IDX_SHIFT;
853
854 hdr.rsvd = 0;
855 hdr.vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
856- hdr.pgsz_tag_clr = htonl(tag & snic->tag_format.rsvd_tag_mask);
857+ hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
858 hdr.maxoffset = htonl(xferlen);
859 hdr.pgoffset = htonl(gl->offset);
860
861@@ -372,30 +377,35 @@
862 return RESERVED_ITT;
863 }
864
865-void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag,
866- struct scatterlist *sgl, unsigned int sgcnt)
867+void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag)
868 {
869- u32 idx = (tag >> snic->tag_format.rsvd_shift) &
870- snic->tag_format.rsvd_mask;
871+ struct cxgb3i_ddp_info *ddp = snic->ddp;
872+ u32 idx;
873
874- if (idx < snic->tag_format.rsvd_mask) {
875- struct cxgb3i_ddp_info *ddp = snic->ddp;
876+ if (!ddp) {
877+ cxgb3i_log_error("release ddp tag 0x%x, ddp NULL.\n", tag);
878+ return;
879+ }
880+
881+ idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
882+ if (idx < ddp->nppods) {
883 struct cxgb3i_gather_list *gl = ddp->gl_map[idx];
884 unsigned int npods;
885
886 if (!gl || !gl->nelem) {
887- cxgb3i_log_warn("release tag 0x%x, idx 0x%x, no gl.\n",
888- tag, idx);
889+ cxgb3i_log_error("release tag 0x%x, idx 0x%x, no gl.\n",
890+ tag, idx);
891 return;
892 }
893 npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
894-
895 cxgb3i_tag_debug("ddp tag 0x%x, release idx 0x%x, npods %u.\n",
896 tag, idx, npods);
897 clear_ddp_map(snic, idx, npods);
898 ddp_unmark_entries(ddp, idx, npods);
899 ddp_gl_unmap(snic->pdev, gl);
900- }
901+ } else
902+ cxgb3i_log_error("ddp tag 0x%x, idx 0x%x > max 0x%x.\n",
903+ tag, idx, ddp->nppods);
904 }
905
906 int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *cconn, int hcrc, int dcrc)
907@@ -403,12 +413,18 @@
908 struct iscsi_tcp_conn *tcp_conn = cconn->conn->dd_data;
909 struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
910 struct sk_buff *skb = alloc_skb(sizeof(struct cpl_set_tcb_field),
911- GFP_KERNEL | __GFP_NOFAIL);
912+ GFP_KERNEL);
913 struct cpl_set_tcb_field *req;
914 u64 val = (hcrc ? 1 : 0) | (dcrc ? 2 : 0);
915
916+ if (!skb)
917+ return -ENOMEM;
918+
919 if (page_idx < ULP2_PGIDX_MAX)
920 val |= page_idx << 4;
921+ else
922+ cxgb3i_log_warn("TID 0x%x, host page 0x%lx default to 4K.\n",
923+ c3cn->tid, PAGE_SIZE);
924
925 /* set up ulp submode and page size */
926 req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req));
927@@ -476,14 +492,14 @@
928 (skb_ulp_mode(skb) & ULP2_FLAG_DCRC_ERROR)) ?
929 ISCSI_SEGMENT_DGST_ERR : 0;
930 if (skb_ulp_mode(skb) & ULP2_FLAG_DATA_DDPED) {
931- cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
932- "ddp'ed, itt 0x%x.\n",
933- skb, hdr->opcode & ISCSI_OPCODE_MASK,
934- tcp_conn->in.datalen, hdr->itt);
935+ cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
936+ "ddp'ed, itt 0x%x.\n",
937+ skb, hdr->opcode & ISCSI_OPCODE_MASK,
938+ tcp_conn->in.datalen, hdr->itt);
939 segment->total_copied = segment->total_size;
940 } else {
941- cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
942- "not ddp'ed, itt 0x%x.\n",
943+ cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
944+ "not ddp'ed, itt 0x%x.\n",
945 skb, hdr->opcode & ISCSI_OPCODE_MASK,
946 tcp_conn->in.datalen, hdr->itt);
947 offset += sizeof(struct cpl_iscsi_hdr_norss);
948@@ -520,24 +536,141 @@
949 skb_ulp_mode(skb) = (ULP_MODE_ISCSI << 4) | submode;
950 }
951
952+static int sg_page_coalesce(struct scatterlist *sg, unsigned int offset,
953+ unsigned int dlen, skb_frag_t *frags, int frag_max)
954+{
955+ unsigned int sglen = sg->length - offset;
956+ struct page *page = sg_page(sg);
957+ unsigned int datalen = dlen, copy;
958+ int i;
959+
960+ i = 0;
961+ do {
962+ if (!sglen) {
963+ sg = sg_next(sg);
964+ offset = 0;
965+ sglen = sg->length;
966+ page = sg_page(sg);
967+ }
968+ copy = min(datalen, sglen);
969+ if (i && page == frags[i - 1].page &&
970+ offset + sg->offset ==
971+ frags[i - 1].page_offset + frags[i - 1].size) {
972+ frags[i - 1].size += copy;
973+ } else {
974+ if (i >= frag_max) {
975+ cxgb3i_log_error("%s, too many pages > %u, "
976+ "dlen %u.\n", __func__,
977+ frag_max, dlen);
978+ return -EINVAL;
979+ }
980+
981+ frags[i].page = page;
982+ frags[i].page_offset = sg->offset + offset;
983+ frags[i].size = copy;
984+ i++;
985+ }
986+ datalen -= copy;
987+ offset += copy;
988+ sglen -= copy;
989+ } while (datalen);
990+
991+ return i;
992+}
993+
994+static int copy_frags_to_skb_pages(struct sk_buff *skb, skb_frag_t *frags,
995+ int frag_cnt, unsigned int datalen)
996+{
997+ struct page *page = NULL;
998+ unsigned char *dp;
999+ unsigned int pg_left = 0;
1000+ unsigned int copy_total = 0;
1001+ int i;
1002+
1003+ for (i = 0; i < frag_cnt; i++, frags++) {
1004+ while (frags->size) {
1005+ unsigned char *sp = page_address(frags->page);
1006+ unsigned int copy;
1007+
1008+ if (!pg_left) {
1009+ int cnt = skb_shinfo(skb)->nr_frags;
1010+
1011+ if (cnt >= MAX_SKB_FRAGS) {
1012+ cxgb3i_log_error("%s: pdu data %u.\n",
1013+ __func__, datalen);
1014+ return -EINVAL;
1015+ }
1016+ page = alloc_page(GFP_ATOMIC);
1017+ if (!page)
1018+ return -ENOMEM;
1019+ dp = page_address(page);
1020+ pg_left = PAGE_SIZE;
1021+
1022+ copy = min(pg_left, datalen);
1023+ skb_fill_page_desc(skb, cnt, page, 0, copy);
1024+
1025+ skb->len += copy;
1026+ skb->data_len += copy;
1027+ skb->truesize += copy;
1028+ datalen -= copy;
1029+ }
1030+ copy = min(pg_left, frags->size);
1031+ memcpy(dp, sp + frags->page_offset, copy);
1032+
1033+ frags->size -= copy;
1034+ frags->page_offset += copy;
1035+ dp += copy;
1036+ pg_left -= copy;
1037+ copy_total += copy;
1038+ }
1039+ }
1040+
1041+ return copy_total;
1042+}
1043+
1044 int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *conn)
1045 {
1046- struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1047+ struct cxgb3i_conn *cconn = conn->dd_data;
1048+ struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
1049 struct iscsi_segment *hdr_seg = &tcp_conn->out.segment;
1050 struct iscsi_segment *data_seg = &tcp_conn->out.data_segment;
1051 unsigned int hdrlen = hdr_seg->total_size;
1052 unsigned int datalen = data_seg->total_size;
1053 unsigned int padlen = iscsi_padding(datalen);
1054- unsigned int copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
1055- unsigned int copylen;
1056+ unsigned int copylen = hdrlen;
1057+ unsigned int copy_dlen = 0;
1058 struct sk_buff *skb;
1059 unsigned char *dst;
1060+ int i, frag_cnt = 0;
1061 int err = -EAGAIN;
1062
1063- if (data_seg->data && ((datalen + padlen) < copymax))
1064- copylen = hdrlen + datalen + padlen;
1065- else
1066- copylen = hdrlen;
1067+ /*
1068+ * the whole pdu needs to fit into one skb, make sure we don't overrun
1069+ * the skb's frag_list. If there are more sg pages than MAX_SKB_FRAGS,
1070+ * we have to copy the data either to the head or newly allocated
1071+ * whole new page(s). This could happen if the sg contains a lot of
1072+ * fragmented data chunks (pages).
1073+ */
1074+ if (datalen) {
1075+ if (!data_seg->data) {
1076+ err = sg_page_coalesce(data_seg->sg,
1077+ data_seg->sg_offset,
1078+ data_seg->total_size,
1079+ cconn->frags,
1080+ TX_PDU_PAGES_MAX);
1081+ if (err < 0)
1082+ return err;
1083+ frag_cnt = err;
1084+
1085+ if (frag_cnt > MAX_SKB_FRAGS ||
1086+ (padlen && frag_cnt + 1 > MAX_SKB_FRAGS))
1087+ copy_dlen = datalen + padlen;
1088+ } else
1089+ copy_dlen += datalen + padlen;
1090+ }
1091+
1092+ if (copylen + copy_dlen < skb_copymax)
1093+ copylen += copy_dlen;
1094
1095 /* supports max. 16K pdus, so one skb is enough to hold all the data */
1096 skb = alloc_skb(TX_HEADER_LEN + copylen, GFP_ATOMIC);
1097@@ -575,70 +708,84 @@
1098 skb->data_len += datalen;
1099 skb->truesize += datalen;
1100 }
1101- } else {
1102- struct scatterlist *sg = data_seg->sg;
1103- unsigned int offset = data_seg->sg_offset;
1104- struct page *page = sg_page(sg);
1105- unsigned int sglen = sg->length - offset;
1106-
1107- do {
1108- int i = skb_shinfo(skb)->nr_frags;
1109- unsigned int copy;
1110+ } else if (copy_dlen) {
1111+ /* need to copy the page fragments */
1112+ if (copylen > hdrlen) {
1113+ skb_frag_t *frag = cconn->frags;
1114
1115- if (!sglen) {
1116- sg = sg_next(sg);
1117- page = sg_page(sg);
1118- offset = 0;
1119- sglen = sg->length;
1120+ /* data fits in the skb's headroom */
1121+ for (i = 0; i < frag_cnt; i++, frag++) {
1122+ memcpy(dst,
1123+ page_address(frag->page) +
1124+ frag->page_offset,
1125+ frag->size);
1126+ dst += frag->size;
1127 }
1128- copy = min(sglen, datalen);
1129-
1130- if (i && skb_can_coalesce(skb, i, page,
1131- sg->offset + offset)) {
1132- skb_shinfo(skb)->frags[i - 1].size += copy;
1133- } else {
1134- get_page(page);
1135- skb_fill_page_desc(skb, i, page,
1136- sg->offset + offset, copy);
1137+ if (padlen)
1138+ memset(dst, 0, padlen);
1139+ } else {
1140+ /* allocate pages to hold the data */
1141+ err = copy_frags_to_skb_pages(skb, cconn->frags,
1142+ frag_cnt, datalen);
1143+ if (err < 0) {
1144+ err = -EAGAIN;
1145+ goto free_skb;
1146 }
1147- skb->len += copy;
1148- skb->data_len += copy;
1149- skb->truesize += copy;
1150- offset += copy;
1151- sglen -= copy;
1152- datalen -= copy;
1153- } while (datalen);
1154- }
1155-
1156- if (padlen && skb_shinfo(skb)->nr_frags) {
1157- int idx = skb_shinfo(skb)->nr_frags;
1158- get_page(pad_page);
1159- skb_fill_page_desc(skb, idx, pad_page, 0, padlen);
1160- skb->data_len += padlen;
1161- skb->truesize += padlen;
1162- skb->len += padlen;
1163+ WARN_ON(err != datalen);
1164+ if (padlen) {
1165+ skb_frag_t *frag;
1166+
1167+ i = skb_shinfo(skb)->nr_frags;
1168+ frag = &skb_shinfo(skb)->frags[i];
1169+ dst = page_address(frag->page);
1170+
1171+ memset(dst + frag->page_offset + frag->size,
1172+ 0, padlen);
1173+ frag->size += padlen;
1174+ }
1175+ }
1176+ } else {
1177+ /* sg pages fit into frag_list */
1178+ for (i = 0; i < frag_cnt; i++)
1179+ get_page(cconn->frags[i].page);
1180+ memcpy(skb_shinfo(skb)->frags, cconn->frags,
1181+ sizeof(skb_frag_t) * frag_cnt);
1182+ skb_shinfo(skb)->nr_frags = frag_cnt;
1183+ skb->len += datalen;
1184+ skb->data_len += datalen;
1185+ skb->truesize += datalen;
1186+
1187+ if (padlen) {
1188+ i = skb_shinfo(skb)->nr_frags;
1189+ get_page(pad_page);
1190+ skb_fill_page_desc(skb, i, pad_page, 0, padlen);
1191+ skb->len += padlen;
1192+ skb->data_len += padlen;
1193+ skb->truesize += padlen;
1194+ }
1195 }
1196
1197 send_pdu:
1198 err = cxgb3i_c3cn_send_pdus((struct s3_conn *)tcp_conn->sock, skb);
1199-
1200 if (err > 0) {
1201 int pdulen = hdrlen + datalen + padlen;
1202+
1203 if (conn->hdrdgst_en)
1204 pdulen += ISCSI_DIGEST_SIZE;
1205 if (datalen && conn->datadgst_en)
1206 pdulen += ISCSI_DIGEST_SIZE;
1207
1208 hdr_seg->total_copied = hdr_seg->total_size;
1209- if (datalen)
1210- data_seg->total_copied = data_seg->total_size;
1211+ data_seg->total_copied = data_seg->total_size;
1212 conn->txdata_octets += pdulen;
1213 return pdulen;
1214 }
1215
1216+free_skb:
1217 kfree_skb(skb);
1218 if (err < 0 && err != -EAGAIN) {
1219- cxgb3i_log_error("conn 0x%p, xmit err %d.\n", conn, err);
1220+ cxgb3i_log_error("conn 0x%p, xmit err %d, skb len %u/%u.\n",
1221+ conn, err, skb->len, skb->data_len);
1222 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
1223 return err;
1224 }
1225@@ -652,6 +799,9 @@
1226 return -ENOMEM;
1227 memset(page_address(pad_page), 0, PAGE_SIZE);
1228 cxgb3i_ddp_page_init();
1229+ cxgb3i_log_info("skb max. frag %u, head %u.\n",
1230+ (unsigned int)MAX_SKB_FRAGS,
1231+ (unsigned int)skb_copymax);
1232 return 0;
1233 }
1234
1235@@ -720,7 +870,7 @@
1236
1237 read_lock(&c3cn->callback_lock);
1238 conn = c3cn->user_data;
1239- if (conn && c3cn->state != C3CN_STATE_ESTABLISHED)
1240+ if (conn)
1241 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
1242 read_unlock(&c3cn->callback_lock);
1243 }
1244@@ -730,7 +880,7 @@
1245 struct t3cdev *tdev = snic->tdev;
1246 struct cxgb3i_ddp_info *ddp;
1247 struct ulp_iscsi_info uinfo;
1248- unsigned int ppmax, bits, max_bits;
1249+ unsigned int ppmax, bits;
1250 int i, err;
1251
1252 err = tdev->ctl(tdev, ULP_ISCSI_GET_PARAMS, &uinfo);
1253@@ -740,26 +890,21 @@
1254 return err;
1255 }
1256
1257- ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
1258- max_bits = min(PPOD_IDX_MAX_SIZE,
1259- (32 - sw_tag_idx_bits - sw_tag_age_bits));
1260- bits = __ilog2_u32(ppmax) + 1;
1261- if (bits > max_bits)
1262- bits = max_bits;
1263- ppmax = (1 << bits) - 1;
1264-
1265 snic->tx_max_size = min_t(unsigned int,
1266 uinfo.max_txsz, ULP2_MAX_PKT_SIZE);
1267 snic->rx_max_size = min_t(unsigned int,
1268 uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
1269+ cxgb3i_log_info("ddp max pkt size: %u/%u,%u, %u/%u,%u.\n",
1270+ snic->tx_max_size, uinfo.max_txsz, ULP2_MAX_PKT_SIZE,
1271+ snic->rx_max_size, uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
1272
1273- snic->tag_format.idx_bits = sw_tag_idx_bits;
1274- snic->tag_format.age_bits = sw_tag_age_bits;
1275- snic->tag_format.rsvd_bits = bits;
1276- snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
1277- snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
1278- snic->tag_format.rsvd_tag_mask =
1279- (1 << (snic->tag_format.rsvd_bits + PPOD_IDX_SHIFT)) - 1;
1280+ snic->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
1281+
1282+ ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
1283+ bits = __ilog2_u32(ppmax) + 1;
1284+ if (bits > PPOD_IDX_MAX_SIZE)
1285+ bits = PPOD_IDX_MAX_SIZE;
1286+ ppmax = (1 << (bits - 1)) - 1;
1287
1288 ddp = cxgb3i_alloc_big_mem(sizeof(struct cxgb3i_ddp_info) +
1289 ppmax *
1290@@ -779,12 +924,15 @@
1291 spin_lock_init(&ddp->map_lock);
1292 ddp->llimit = uinfo.llimit;
1293 ddp->ulimit = uinfo.ulimit;
1294+ ddp->nppods = ppmax;
1295+ ddp->idx_last = ppmax;
1296+ ddp->idx_bits = bits;
1297+ ddp->idx_mask = (1 << bits) - 1;
1298+ ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
1299
1300- uinfo.tagmask =
1301- snic->tag_format.rsvd_mask << snic->tag_format.rsvd_shift;
1302+ uinfo.tagmask = ddp->idx_mask << PPOD_IDX_SHIFT;
1303 for (i = 0; i < ULP2_PGIDX_MAX; i++)
1304 uinfo.pgsz_factor[i] = ddp_page_order[i];
1305-
1306 uinfo.ulimit = uinfo.llimit + (ppmax << PPOD_SIZE_SHIFT);
1307
1308 err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo);
1309@@ -794,19 +942,20 @@
1310 goto free_ppod_map;
1311 }
1312
1313- ddp->nppods = ppmax;
1314- ddp->idx_last = ppmax;
1315-
1316 tdev->ulp_iscsi = snic->ddp = ddp;
1317
1318- cxgb3i_log_info("snic nppods %u (0x%x ~ 0x%x), rsvd shift %u, "
1319- "bits %u, mask 0x%x, 0x%x, pkt %u,%u.\n",
1320- ppmax, ddp->llimit, ddp->ulimit,
1321- snic->tag_format.rsvd_shift,
1322- snic->tag_format.rsvd_bits,
1323- snic->tag_format.rsvd_mask, uinfo.tagmask,
1324- snic->tx_max_size, snic->rx_max_size);
1325+ cxgb3i_log_info("nppods %u (0x%x ~ 0x%x), bits %u, mask 0x%x,0x%x.\n",
1326+ ppmax, ddp->llimit, ddp->ulimit, ddp->idx_bits,
1327+ ddp->idx_mask, ddp->rsvd_tag_mask);
1328
1329+ snic->tag_format.rsvd_bits = ddp->idx_bits;
1330+ snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
1331+ snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
1332+
1333+ cxgb3i_log_info("tag format: sw %u, rsvd %u,%u, mask 0x%x.\n",
1334+ snic->tag_format.sw_bits, snic->tag_format.rsvd_bits,
1335+ snic->tag_format.rsvd_shift,
1336+ snic->tag_format.rsvd_mask);
1337 return 0;
1338
1339 free_ppod_map:
1340diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h
1341--- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h 2009-01-14 15:17:57.000000000 -0800
1342+++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h 2009-01-26 22:00:17.000000000 -0800
1343@@ -13,7 +13,6 @@
1344 #ifndef __CXGB3I_ULP2_H__
1345 #define __CXGB3I_ULP2_H__
1346
1347-#define ULP2_PDU_PAYLOAD_DFLT (16224 - ISCSI_PDU_HEADER_MAX)
1348 #define PPOD_PAGES_MAX 4
1349 #define PPOD_PAGES_SHIFT 2 /* 4 pages per pod */
1350
1351@@ -100,9 +99,6 @@
1352 #define ULP2_FLAG_DCRC_ERROR 0x20
1353 #define ULP2_FLAG_PAD_ERROR 0x40
1354
1355-#define ULP2_MAX_PKT_SIZE 16224
1356-#define ULP2_MAX_PDU_SIZE 8192
1357-
1358 void cxgb3i_conn_closing(struct s3_conn *);
1359 void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn);
1360 void cxgb3i_conn_tx_open(struct s3_conn *c3cn);